Spaces:
Running
on
Zero
Running
on
Zero
Initial Implementation of Markitdown. Implemented:
Browse files- Basic python usage
- Image integration using OpenAI
- Plugin disabled by default
- .env.example +3 -0
- README.md +65 -0
- app.py +22 -2
- requirements.txt +5 -1
- setup.sh +6 -0
- src/main.py +1 -2
- src/parsers/__init__.py +7 -0
- src/parsers/markitdown_parser.py +111 -0
- src/ui/ui.py +49 -3
.env.example
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# API keys for various services
|
2 |
+
GOOGLE_API_KEY=your_google_api_key_here
|
3 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
README.md
CHANGED
@@ -11,6 +11,71 @@ startup_script: setup.sh
|
|
11 |
pinned: false
|
12 |
---
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Markit: Document to Markdown Converter
|
15 |
|
16 |
[](https://huggingface.co/spaces/Ansemin101/Markit)
|
|
|
11 |
pinned: false
|
12 |
---
|
13 |
|
14 |
+
# Document to Markdown Converter
|
15 |
+
|
16 |
+
A Hugging Face Space that converts various document formats to Markdown, now with MarkItDown integration!
|
17 |
+
|
18 |
+
## Features
|
19 |
+
|
20 |
+
- Convert PDFs, Office documents, images, and more to Markdown
|
21 |
+
- Multiple parser options:
|
22 |
+
- MarkItDown: For comprehensive document conversion
|
23 |
+
- GOT-OCR: For image-based OCR with LaTeX support
|
24 |
+
- Gemini Flash: For AI-powered text extraction
|
25 |
+
- Download converted documents as Markdown files
|
26 |
+
- Clean, responsive UI
|
27 |
+
|
28 |
+
## Using MarkItDown
|
29 |
+
|
30 |
+
This app integrates [Microsoft's MarkItDown](https://github.com/microsoft/markitdown) library, which supports a wide range of file formats:
|
31 |
+
|
32 |
+
- PDF
|
33 |
+
- PowerPoint (PPTX)
|
34 |
+
- Word (DOCX)
|
35 |
+
- Excel (XLSX)
|
36 |
+
- Images (JPG, PNG)
|
37 |
+
- Audio files (with transcription)
|
38 |
+
- HTML
|
39 |
+
- Text-based formats (CSV, JSON, XML)
|
40 |
+
- ZIP files
|
41 |
+
- YouTube URLs
|
42 |
+
- EPubs
|
43 |
+
- And more!
|
44 |
+
|
45 |
+
## Environment Variables
|
46 |
+
|
47 |
+
You can enhance the functionality by setting these environment variables:
|
48 |
+
|
49 |
+
- `OPENAI_API_KEY`: Enables AI-based image descriptions in MarkItDown
|
50 |
+
- `GOOGLE_API_KEY`: Used for Gemini Flash parser and LaTeX to Markdown conversion
|
51 |
+
|
52 |
+
## Usage
|
53 |
+
|
54 |
+
1. Select a file to upload
|
55 |
+
2. Choose "MarkItDown" as the parser
|
56 |
+
3. Select "Standard Conversion"
|
57 |
+
4. Click "Convert"
|
58 |
+
5. View the Markdown output and download the converted file
|
59 |
+
|
60 |
+
## Local Development
|
61 |
+
|
62 |
+
1. Clone the repository
|
63 |
+
2. Create a `.env` file based on `.env.example`
|
64 |
+
3. Install dependencies:
|
65 |
+
```
|
66 |
+
pip install -r requirements.txt
|
67 |
+
```
|
68 |
+
4. Run the application:
|
69 |
+
```
|
70 |
+
python app.py
|
71 |
+
```
|
72 |
+
|
73 |
+
## Credits
|
74 |
+
|
75 |
+
- [MarkItDown](https://github.com/microsoft/markitdown) by Microsoft
|
76 |
+
- [GOT-OCR](https://github.com/stepfun-ai/GOT-OCR-2.0) for image-based OCR
|
77 |
+
- [Gradio](https://gradio.app/) for the UI framework
|
78 |
+
|
79 |
# Markit: Document to Markdown Converter
|
80 |
|
81 |
[](https://huggingface.co/spaces/Ansemin101/Markit)
|
app.py
CHANGED
@@ -64,6 +64,19 @@ except ImportError:
|
|
64 |
print("WARNING: NumPy not installed. Installing NumPy 1.26.3...")
|
65 |
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "numpy==1.26.3"], check=False)
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
# Try to load environment variables from .env file
|
68 |
try:
|
69 |
from dotenv import load_dotenv
|
@@ -72,16 +85,23 @@ try:
|
|
72 |
except ImportError:
|
73 |
print("python-dotenv not installed, skipping .env file loading")
|
74 |
|
75 |
-
# Load
|
76 |
gemini_api_key = os.getenv("GOOGLE_API_KEY")
|
|
|
77 |
|
78 |
-
# Check if API
|
79 |
if not gemini_api_key:
|
80 |
print("Warning: GOOGLE_API_KEY environment variable not found. Gemini Flash parser and LaTeX to Markdown conversion may not work.")
|
81 |
else:
|
82 |
print(f"Found Gemini API key: {gemini_api_key[:5]}...{gemini_api_key[-5:] if len(gemini_api_key) > 10 else ''}")
|
83 |
print("Gemini API will be used for LaTeX to Markdown conversion when using GOT-OCR with Formatted Text mode")
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# Add the current directory to the Python path
|
86 |
sys.path.append(current_dir)
|
87 |
|
|
|
64 |
print("WARNING: NumPy not installed. Installing NumPy 1.26.3...")
|
65 |
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "numpy==1.26.3"], check=False)
|
66 |
|
67 |
+
# Check if markitdown is installed
|
68 |
+
try:
|
69 |
+
from markitdown import MarkItDown
|
70 |
+
print("MarkItDown is installed")
|
71 |
+
except ImportError:
|
72 |
+
print("WARNING: MarkItDown not installed. Installing...")
|
73 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "markitdown[all]"], check=False)
|
74 |
+
try:
|
75 |
+
from markitdown import MarkItDown
|
76 |
+
print("MarkItDown installed successfully")
|
77 |
+
except ImportError:
|
78 |
+
print("ERROR: Failed to install MarkItDown")
|
79 |
+
|
80 |
# Try to load environment variables from .env file
|
81 |
try:
|
82 |
from dotenv import load_dotenv
|
|
|
85 |
except ImportError:
|
86 |
print("python-dotenv not installed, skipping .env file loading")
|
87 |
|
88 |
+
# Load API keys from environment variables
|
89 |
gemini_api_key = os.getenv("GOOGLE_API_KEY")
|
90 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
91 |
|
92 |
+
# Check if API keys are available and print messages
|
93 |
if not gemini_api_key:
|
94 |
print("Warning: GOOGLE_API_KEY environment variable not found. Gemini Flash parser and LaTeX to Markdown conversion may not work.")
|
95 |
else:
|
96 |
print(f"Found Gemini API key: {gemini_api_key[:5]}...{gemini_api_key[-5:] if len(gemini_api_key) > 10 else ''}")
|
97 |
print("Gemini API will be used for LaTeX to Markdown conversion when using GOT-OCR with Formatted Text mode")
|
98 |
|
99 |
+
if not openai_api_key:
|
100 |
+
print("Warning: OPENAI_API_KEY environment variable not found. LLM-based image description in MarkItDown may not work.")
|
101 |
+
else:
|
102 |
+
print(f"Found OpenAI API key: {openai_api_key[:5]}...{openai_api_key[-5:] if len(openai_api_key) > 10 else ''}")
|
103 |
+
print("OpenAI API will be available for LLM-based image descriptions in MarkItDown")
|
104 |
+
|
105 |
# Add the current directory to the Python path
|
106 |
sys.path.append(current_dir)
|
107 |
|
requirements.txt
CHANGED
@@ -23,4 +23,8 @@ torchvision
|
|
23 |
git+https://github.com/huggingface/transformers.git@main
|
24 |
accelerate
|
25 |
verovio # Added missing dependency
|
26 |
-
huggingface_hub[cli]>=0.19.0
|
|
|
|
|
|
|
|
|
|
23 |
git+https://github.com/huggingface/transformers.git@main
|
24 |
accelerate
|
25 |
verovio # Added missing dependency
|
26 |
+
huggingface_hub[cli]>=0.19.0
|
27 |
+
|
28 |
+
# MarkItDown and its dependencies
|
29 |
+
markitdown[all]
|
30 |
+
openai>=1.1.0 # For LLM image description support
|
setup.sh
CHANGED
@@ -29,6 +29,7 @@ echo "NumPy installed successfully"
|
|
29 |
echo "Installing Python dependencies..."
|
30 |
pip install -q -U pillow opencv-python
|
31 |
pip install -q -U google-genai
|
|
|
32 |
# pip install -q -U latex2markdown - removed, now using Gemini API for LaTeX conversion
|
33 |
echo "Python dependencies installed successfully"
|
34 |
|
@@ -45,6 +46,11 @@ echo "Installing spaces module for ZeroGPU support..."
|
|
45 |
pip install -q -U spaces
|
46 |
echo "Spaces module installed successfully"
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
# Install the project in development mode only if setup.py or pyproject.toml exists
|
49 |
if [ -f "setup.py" ] || [ -f "pyproject.toml" ]; then
|
50 |
echo "Installing project in development mode..."
|
|
|
29 |
echo "Installing Python dependencies..."
|
30 |
pip install -q -U pillow opencv-python
|
31 |
pip install -q -U google-genai
|
32 |
+
pip install -q -U openai>=1.1.0 # For LLM image description support
|
33 |
# pip install -q -U latex2markdown - removed, now using Gemini API for LaTeX conversion
|
34 |
echo "Python dependencies installed successfully"
|
35 |
|
|
|
46 |
pip install -q -U spaces
|
47 |
echo "Spaces module installed successfully"
|
48 |
|
49 |
+
# Install markitdown with all optional dependencies
|
50 |
+
echo "Installing MarkItDown with all dependencies..."
|
51 |
+
pip install -q -U 'markitdown[all]'
|
52 |
+
echo "MarkItDown installed successfully"
|
53 |
+
|
54 |
# Install the project in development mode only if setup.py or pyproject.toml exists
|
55 |
if [ -f "setup.py" ] || [ -f "pyproject.toml" ]; then
|
56 |
echo "Installing project in development mode..."
|
src/main.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import parsers # Import all parsers to ensure they're registered
|
2 |
-
|
3 |
from src.ui.ui import launch_ui
|
4 |
|
5 |
-
|
6 |
def main():
|
|
|
7 |
launch_ui(
|
8 |
server_name="0.0.0.0",
|
9 |
server_port=7860,
|
|
|
1 |
import parsers # Import all parsers to ensure they're registered
|
|
|
2 |
from src.ui.ui import launch_ui
|
3 |
|
|
|
4 |
def main():
|
5 |
+
# Launch the UI
|
6 |
launch_ui(
|
7 |
server_name="0.0.0.0",
|
8 |
server_port=7860,
|
src/parsers/__init__.py
CHANGED
@@ -4,6 +4,13 @@
|
|
4 |
from src.parsers.gemini_flash_parser import GeminiFlashParser
|
5 |
from src.parsers.got_ocr_parser import GotOcrParser
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# You can add new parsers here in the future
|
8 |
|
9 |
# This file makes the parsers directory a Python package
|
|
|
4 |
from src.parsers.gemini_flash_parser import GeminiFlashParser
|
5 |
from src.parsers.got_ocr_parser import GotOcrParser
|
6 |
|
7 |
+
# Import MarkItDown parser if available - needs to be imported last so it's default
|
8 |
+
try:
|
9 |
+
from src.parsers.markitdown_parser import MarkItDownParser
|
10 |
+
print("MarkItDown parser imported successfully")
|
11 |
+
except ImportError as e:
|
12 |
+
print(f"Error importing MarkItDown parser: {str(e)}")
|
13 |
+
|
14 |
# You can add new parsers here in the future
|
15 |
|
16 |
# This file makes the parsers directory a Python package
|
src/parsers/markitdown_parser.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Dict, List, Optional, Any, Union
|
5 |
+
import io
|
6 |
+
|
7 |
+
# Import the parser interface and registry
|
8 |
+
from src.parsers.parser_interface import DocumentParser
|
9 |
+
from src.parsers.parser_registry import ParserRegistry
|
10 |
+
|
11 |
+
# Check for MarkItDown availability
|
12 |
+
try:
|
13 |
+
from markitdown import MarkItDown
|
14 |
+
from openai import OpenAI
|
15 |
+
HAS_MARKITDOWN = True
|
16 |
+
except ImportError:
|
17 |
+
HAS_MARKITDOWN = False
|
18 |
+
logging.warning("MarkItDown package not installed. Please install with 'pip install markitdown[all]'")
|
19 |
+
|
20 |
+
# Configure logging
|
21 |
+
logger = logging.getLogger(__name__)
|
22 |
+
logger.setLevel(logging.DEBUG)
|
23 |
+
|
24 |
+
class MarkItDownParser(DocumentParser):
|
25 |
+
"""
|
26 |
+
Parser implementation using MarkItDown for converting various file formats to Markdown.
|
27 |
+
"""
|
28 |
+
|
29 |
+
def __init__(self):
|
30 |
+
self.markdown_instance = None
|
31 |
+
# Initialize MarkItDown instance
|
32 |
+
if HAS_MARKITDOWN:
|
33 |
+
try:
|
34 |
+
# Check for OpenAI API key for LLM-based image descriptions
|
35 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
36 |
+
if openai_api_key:
|
37 |
+
client = OpenAI()
|
38 |
+
self.markdown_instance = MarkItDown(
|
39 |
+
enable_plugins=False,
|
40 |
+
llm_client=client,
|
41 |
+
llm_model="gpt-4o"
|
42 |
+
)
|
43 |
+
logger.info("MarkItDown initialized with OpenAI support for image descriptions")
|
44 |
+
else:
|
45 |
+
self.markdown_instance = MarkItDown(enable_plugins=False)
|
46 |
+
logger.info("MarkItDown initialized without OpenAI support")
|
47 |
+
except Exception as e:
|
48 |
+
logger.error(f"Error initializing MarkItDown: {str(e)}")
|
49 |
+
self.markdown_instance = None
|
50 |
+
|
51 |
+
def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
|
52 |
+
"""
|
53 |
+
Parse a document and return its content as Markdown.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
file_path: Path to the document
|
57 |
+
ocr_method: OCR method to use (not used in this parser)
|
58 |
+
**kwargs: Additional options including cancellation checking
|
59 |
+
|
60 |
+
Returns:
|
61 |
+
str: Markdown representation of the document
|
62 |
+
"""
|
63 |
+
# Check if MarkItDown is available
|
64 |
+
if not HAS_MARKITDOWN or self.markdown_instance is None:
|
65 |
+
return "Error: MarkItDown is not available. Please install with 'pip install markitdown[all]'"
|
66 |
+
|
67 |
+
# Get cancellation check function from kwargs
|
68 |
+
check_cancellation = kwargs.get('check_cancellation', lambda: False)
|
69 |
+
|
70 |
+
# Check for cancellation before starting
|
71 |
+
if check_cancellation():
|
72 |
+
return "Conversion cancelled."
|
73 |
+
|
74 |
+
try:
|
75 |
+
# Convert the file using the standard instance
|
76 |
+
result = self.markdown_instance.convert(file_path)
|
77 |
+
|
78 |
+
# Check for cancellation after processing
|
79 |
+
if check_cancellation():
|
80 |
+
return "Conversion cancelled."
|
81 |
+
|
82 |
+
return result.text_content
|
83 |
+
except Exception as e:
|
84 |
+
logger.error(f"Error converting file with MarkItDown: {str(e)}")
|
85 |
+
return f"Error: {str(e)}"
|
86 |
+
|
87 |
+
@classmethod
|
88 |
+
def get_name(cls) -> str:
|
89 |
+
return "MarkItDown"
|
90 |
+
|
91 |
+
@classmethod
|
92 |
+
def get_supported_ocr_methods(cls) -> List[Dict[str, Any]]:
|
93 |
+
return [
|
94 |
+
{
|
95 |
+
"id": "standard",
|
96 |
+
"name": "Standard Conversion",
|
97 |
+
"default_params": {}
|
98 |
+
}
|
99 |
+
]
|
100 |
+
|
101 |
+
@classmethod
|
102 |
+
def get_description(cls) -> str:
|
103 |
+
return "MarkItDown parser for converting various file formats to Markdown"
|
104 |
+
|
105 |
+
|
106 |
+
# Register the parser with the registry if available
|
107 |
+
if HAS_MARKITDOWN:
|
108 |
+
ParserRegistry.register(MarkItDownParser)
|
109 |
+
logger.info("MarkItDown parser registered successfully")
|
110 |
+
else:
|
111 |
+
logger.warning("Could not register MarkItDown parser: Package not installed")
|
src/ui/ui.py
CHANGED
@@ -7,6 +7,15 @@ from pathlib import Path
|
|
7 |
from src.core.converter import convert_file, set_cancellation_flag, is_conversion_in_progress
|
8 |
from src.parsers.parser_registry import ParserRegistry
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Configure logging
|
11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
12 |
logger = logging.getLogger(__name__)
|
@@ -158,8 +167,43 @@ def create_ui():
|
|
158 |
margin-top: 15px;
|
159 |
margin-bottom: 15px;
|
160 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
""") as demo:
|
162 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
# State to track if cancellation is requested
|
164 |
cancel_requested = gr.State(False)
|
165 |
# State to store the conversion thread
|
@@ -168,13 +212,15 @@ def create_ui():
|
|
168 |
output_format_state = gr.State("Markdown")
|
169 |
|
170 |
# File input first
|
171 |
-
file_input = gr.File(label="Upload
|
172 |
|
173 |
# Provider and OCR options below the file input
|
174 |
with gr.Row(elem_classes=["provider-options-row"]):
|
175 |
with gr.Column(scale=1):
|
176 |
parser_names = ParserRegistry.get_parser_names()
|
177 |
-
|
|
|
|
|
178 |
|
179 |
provider_dropdown = gr.Dropdown(
|
180 |
label="Provider",
|
|
|
7 |
from src.core.converter import convert_file, set_cancellation_flag, is_conversion_in_progress
|
8 |
from src.parsers.parser_registry import ParserRegistry
|
9 |
|
10 |
+
# Import MarkItDown to check if it's available
|
11 |
+
try:
|
12 |
+
from markitdown import MarkItDown
|
13 |
+
HAS_MARKITDOWN = True
|
14 |
+
logging.info("MarkItDown is available for use")
|
15 |
+
except ImportError:
|
16 |
+
HAS_MARKITDOWN = False
|
17 |
+
logging.warning("MarkItDown is not available")
|
18 |
+
|
19 |
# Configure logging
|
20 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
21 |
logger = logging.getLogger(__name__)
|
|
|
167 |
margin-top: 15px;
|
168 |
margin-bottom: 15px;
|
169 |
}
|
170 |
+
|
171 |
+
/* Style the app title */
|
172 |
+
.app-title {
|
173 |
+
text-align: center;
|
174 |
+
margin-bottom: 20px;
|
175 |
+
}
|
176 |
+
|
177 |
+
/* Info section */
|
178 |
+
.info-section {
|
179 |
+
background-color: #f8f9fa;
|
180 |
+
padding: 10px;
|
181 |
+
border-radius: 5px;
|
182 |
+
margin-bottom: 15px;
|
183 |
+
font-size: 14px;
|
184 |
+
}
|
185 |
""") as demo:
|
186 |
+
# Add title and description
|
187 |
+
gr.HTML(
|
188 |
+
"""
|
189 |
+
<div class="app-title">
|
190 |
+
<h1>Document to Markdown Converter</h1>
|
191 |
+
<p>Convert documents to markdown format using various parsers including MarkItDown</p>
|
192 |
+
</div>
|
193 |
+
"""
|
194 |
+
)
|
195 |
+
|
196 |
+
# Add MarkItDown info block if it's available
|
197 |
+
if HAS_MARKITDOWN:
|
198 |
+
gr.HTML(
|
199 |
+
"""
|
200 |
+
<div class="info-section">
|
201 |
+
<strong>MarkItDown is available!</strong> Use it to convert various file formats
|
202 |
+
including PDF, Office documents, images, and more to Markdown format.
|
203 |
+
</div>
|
204 |
+
"""
|
205 |
+
)
|
206 |
+
|
207 |
# State to track if cancellation is requested
|
208 |
cancel_requested = gr.State(False)
|
209 |
# State to store the conversion thread
|
|
|
212 |
output_format_state = gr.State("Markdown")
|
213 |
|
214 |
# File input first
|
215 |
+
file_input = gr.File(label="Upload Document", type="filepath")
|
216 |
|
217 |
# Provider and OCR options below the file input
|
218 |
with gr.Row(elem_classes=["provider-options-row"]):
|
219 |
with gr.Column(scale=1):
|
220 |
parser_names = ParserRegistry.get_parser_names()
|
221 |
+
|
222 |
+
# Make MarkItDown the default parser if available
|
223 |
+
default_parser = next((p for p in parser_names if p == "MarkItDown"), parser_names[0] if parser_names else "PyPdfium")
|
224 |
|
225 |
provider_dropdown = gr.Dropdown(
|
226 |
label="Provider",
|