File size: 10,057 Bytes
6a7a825 0f462f7 6a7a825 195dd9b 6a7a825 0f462f7 6a7a825 195dd9b 6a7a825 0f462f7 6a7a825 0f462f7 6a7a825 0f462f7 6a7a825 0f462f7 6a7a825 0f462f7 6a7a825 0f462f7 6a7a825 0f462f7 195dd9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
import streamlit as st
from docling.document_converter import DocumentConverter
import tempfile
import os
import logging
import time
from PIL import Image
import zipfile
import io
# vLLM and docling_core imports for batch processing
try:
from vllm import LLM, SamplingParams
from docling_core.types.doc import DoclingDocument
from docling_core.types.doc.document import DocTagsDocument
from pathlib import Path
VLLM_AVAILABLE = True
except ImportError:
VLLM_AVAILABLE = False
# Create necessary directories
os.makedirs("img", exist_ok=True)
os.makedirs("out", exist_ok=True)
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
# Custom CSS for better layout
st.markdown("""
<style>
.stFileUploader {
padding: 1rem;
}
button[data-testid="stFileUploaderButtonPrimary"] {
background-color: #000660 !important;
border: none !important;
color: white !important;
}
.stButton button {
background-color: #006666;
border: none !important;
color: white;
padding: 0.5rem 2rem !important;
}
.stButton button:hover {
background-color: #008080 !important;
color: white !important;
border-color: #008080 !important;
}
.upload-text {
font-size: 1.2rem;
margin-bottom: 1rem;
}
div[data-testid="stFileUploadDropzone"]:hover {
border-color: #006666 !important;
background-color: rgba(0, 102, 102, 0.05) !important;
}
</style>
""", unsafe_allow_html=True)
# Create tabs for different functionalities
tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"])
with tab1:
st.title("PDF to Markdown Converter")
# Initialize session state if it doesn't exist
if 'converter' not in st.session_state:
try:
st.session_state.converter = DocumentConverter()
logger.debug("Converter successfully created")
except Exception as e:
logger.error(f"Error creating converter: {str(e)}")
st.error(f"Error creating converter: {str(e)}")
st.stop()
# Main upload area
uploaded_file = st.file_uploader(
"Upload your PDF file",
type=['pdf'],
key='pdf_uploader',
help="Drag and drop or click to select a PDF file (max 200MB)"
)
# URL input area with spacing
st.markdown("<br>", unsafe_allow_html=True)
url = st.text_input("Or enter a PDF URL")
# Unified convert button
convert_clicked = st.button("Convert to Markdown", type="primary")
# Process either uploaded file or URL
if convert_clicked:
if uploaded_file is not None:
try:
with st.spinner('Converting file...'):
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
logger.debug(f"Temporary file created at: {tmp_path}")
try:
result = st.session_state.converter.convert(tmp_path)
markdown_text = result.document.export_to_markdown()
output_filename = os.path.splitext(uploaded_file.name)[0] + '.md'
st.success("Conversion completed!")
st.download_button(
label="Download Markdown file",
data=markdown_text,
file_name=output_filename,
mime="text/markdown"
)
except Exception as e:
logger.error(f"Error converting file: {str(e)}")
st.error(f"Error converting file: {str(e)}")
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
logger.debug("Temporary file deleted")
except Exception as e:
logger.error(f"Error processing file: {str(e)}")
st.error(f"Error processing file: {str(e)}")
elif url:
try:
with st.spinner('Converting from URL...'):
logger.debug(f"Converting from URL: {url}")
result = st.session_state.converter.convert(url)
markdown_text = result.document.export_to_markdown()
output_filename = url.split('/')[-1].split('.')[0] + '.md'
st.success("Conversion completed!")
st.download_button(
label="Download Markdown file",
data=markdown_text,
file_name=output_filename,
mime="text/markdown"
)
except Exception as e:
logger.error(f"Error converting from URL: {str(e)}")
st.error(f"Error converting from URL: {str(e)}")
else:
st.warning("Please upload a file or enter a URL first")
# Batch processing tab
with tab2:
st.title("Batch Image Processing with vLLM")
if not VLLM_AVAILABLE:
st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core")
else:
st.write("This feature uses vLLM to process multiple images and convert them to Markdown.")
# Ensure directories exist
img_dir = "img"
out_dir = "out"
os.makedirs(img_dir, exist_ok=True)
os.makedirs(out_dir, exist_ok=True)
st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.")
# Model configuration
MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview")
PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.")
# File uploader for multiple images
uploaded_images = st.file_uploader(
"Upload image files",
type=['png', 'jpg', 'jpeg'],
accept_multiple_files=True,
key='image_uploader',
help="Drag and drop or click to select image files"
)
# Process button
process_clicked = st.button("Process Images", type="primary", key="process_button")
if process_clicked and uploaded_images:
try:
with st.spinner('Processing images...'):
# Initialize LLM
llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1})
sampling_params = SamplingParams(
temperature=0.0,
max_tokens=8192
)
chat_template = f"<|im_start|>User:<image>{PROMPT_TEXT}<end_of_utterance>\nAssistant:"
start_time = time.time()
# Create a ZIP file in memory to store all outputs
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
progress_bar = st.progress(0)
status_text = st.empty()
for idx, img_file in enumerate(uploaded_images):
img_name = img_file.name
status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})")
# Open image
image = Image.open(img_file).convert("RGB")
# Process with vLLM
llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
output = llm.generate([llm_input], sampling_params=sampling_params)[0]
doctags = output.outputs[0].text
img_fn = os.path.splitext(img_name)[0]
# Add doctags to zip
zip_file.writestr(f"{img_fn}.dt", doctags)
# Convert to Docling Document
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
doc = DoclingDocument(name=img_fn)
doc.load_from_doctags(doctags_doc)
# Export as markdown and add to zip
md_content = doc.export_to_markdown()
zip_file.writestr(f"{img_fn}.md", md_content)
# Update progress
progress_bar.progress((idx + 1) / len(uploaded_images))
total_time = time.time() - start_time
# Offer the ZIP file for download
st.success(f"Processing completed in {total_time:.2f} seconds!")
zip_buffer.seek(0)
st.download_button(
label="Download All Results",
data=zip_buffer,
file_name="processed_images.zip",
mime="application/zip"
)
except Exception as e:
logger.error(f"Error in batch processing: {str(e)}")
st.error(f"Error in batch processing: {str(e)}")
elif process_clicked:
st.warning("Please upload at least one image file") |