feras-vbrl's picture
Upload 4 files
195dd9b verified
import streamlit as st
from docling.document_converter import DocumentConverter
import tempfile
import os
import logging
import time
from PIL import Image
import zipfile
import io
# vLLM and docling_core imports for batch processing
try:
from vllm import LLM, SamplingParams
from docling_core.types.doc import DoclingDocument
from docling_core.types.doc.document import DocTagsDocument
from pathlib import Path
VLLM_AVAILABLE = True
except ImportError:
VLLM_AVAILABLE = False
# Create necessary directories
os.makedirs("img", exist_ok=True)
os.makedirs("out", exist_ok=True)
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
# Custom CSS for better layout
st.markdown("""
<style>
.stFileUploader {
padding: 1rem;
}
button[data-testid="stFileUploaderButtonPrimary"] {
background-color: #000660 !important;
border: none !important;
color: white !important;
}
.stButton button {
background-color: #006666;
border: none !important;
color: white;
padding: 0.5rem 2rem !important;
}
.stButton button:hover {
background-color: #008080 !important;
color: white !important;
border-color: #008080 !important;
}
.upload-text {
font-size: 1.2rem;
margin-bottom: 1rem;
}
div[data-testid="stFileUploadDropzone"]:hover {
border-color: #006666 !important;
background-color: rgba(0, 102, 102, 0.05) !important;
}
</style>
""", unsafe_allow_html=True)
# Create tabs for different functionalities
tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"])
with tab1:
st.title("PDF to Markdown Converter")
# Initialize session state if it doesn't exist
if 'converter' not in st.session_state:
try:
st.session_state.converter = DocumentConverter()
logger.debug("Converter successfully created")
except Exception as e:
logger.error(f"Error creating converter: {str(e)}")
st.error(f"Error creating converter: {str(e)}")
st.stop()
# Main upload area
uploaded_file = st.file_uploader(
"Upload your PDF file",
type=['pdf'],
key='pdf_uploader',
help="Drag and drop or click to select a PDF file (max 200MB)"
)
# URL input area with spacing
st.markdown("<br>", unsafe_allow_html=True)
url = st.text_input("Or enter a PDF URL")
# Unified convert button
convert_clicked = st.button("Convert to Markdown", type="primary")
# Process either uploaded file or URL
if convert_clicked:
if uploaded_file is not None:
try:
with st.spinner('Converting file...'):
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
logger.debug(f"Temporary file created at: {tmp_path}")
try:
result = st.session_state.converter.convert(tmp_path)
markdown_text = result.document.export_to_markdown()
output_filename = os.path.splitext(uploaded_file.name)[0] + '.md'
st.success("Conversion completed!")
st.download_button(
label="Download Markdown file",
data=markdown_text,
file_name=output_filename,
mime="text/markdown"
)
except Exception as e:
logger.error(f"Error converting file: {str(e)}")
st.error(f"Error converting file: {str(e)}")
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
logger.debug("Temporary file deleted")
except Exception as e:
logger.error(f"Error processing file: {str(e)}")
st.error(f"Error processing file: {str(e)}")
elif url:
try:
with st.spinner('Converting from URL...'):
logger.debug(f"Converting from URL: {url}")
result = st.session_state.converter.convert(url)
markdown_text = result.document.export_to_markdown()
output_filename = url.split('/')[-1].split('.')[0] + '.md'
st.success("Conversion completed!")
st.download_button(
label="Download Markdown file",
data=markdown_text,
file_name=output_filename,
mime="text/markdown"
)
except Exception as e:
logger.error(f"Error converting from URL: {str(e)}")
st.error(f"Error converting from URL: {str(e)}")
else:
st.warning("Please upload a file or enter a URL first")
# Batch processing tab
with tab2:
st.title("Batch Image Processing with vLLM")
if not VLLM_AVAILABLE:
st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core")
else:
st.write("This feature uses vLLM to process multiple images and convert them to Markdown.")
# Ensure directories exist
img_dir = "img"
out_dir = "out"
os.makedirs(img_dir, exist_ok=True)
os.makedirs(out_dir, exist_ok=True)
st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.")
# Model configuration
MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview")
PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.")
# File uploader for multiple images
uploaded_images = st.file_uploader(
"Upload image files",
type=['png', 'jpg', 'jpeg'],
accept_multiple_files=True,
key='image_uploader',
help="Drag and drop or click to select image files"
)
# Process button
process_clicked = st.button("Process Images", type="primary", key="process_button")
if process_clicked and uploaded_images:
try:
with st.spinner('Processing images...'):
# Initialize LLM
llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1})
sampling_params = SamplingParams(
temperature=0.0,
max_tokens=8192
)
chat_template = f"<|im_start|>User:<image>{PROMPT_TEXT}<end_of_utterance>\nAssistant:"
start_time = time.time()
# Create a ZIP file in memory to store all outputs
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
progress_bar = st.progress(0)
status_text = st.empty()
for idx, img_file in enumerate(uploaded_images):
img_name = img_file.name
status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})")
# Open image
image = Image.open(img_file).convert("RGB")
# Process with vLLM
llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
output = llm.generate([llm_input], sampling_params=sampling_params)[0]
doctags = output.outputs[0].text
img_fn = os.path.splitext(img_name)[0]
# Add doctags to zip
zip_file.writestr(f"{img_fn}.dt", doctags)
# Convert to Docling Document
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
doc = DoclingDocument(name=img_fn)
doc.load_from_doctags(doctags_doc)
# Export as markdown and add to zip
md_content = doc.export_to_markdown()
zip_file.writestr(f"{img_fn}.md", md_content)
# Update progress
progress_bar.progress((idx + 1) / len(uploaded_images))
total_time = time.time() - start_time
# Offer the ZIP file for download
st.success(f"Processing completed in {total_time:.2f} seconds!")
zip_buffer.seek(0)
st.download_button(
label="Download All Results",
data=zip_buffer,
file_name="processed_images.zip",
mime="application/zip"
)
except Exception as e:
logger.error(f"Error in batch processing: {str(e)}")
st.error(f"Error in batch processing: {str(e)}")
elif process_clicked:
st.warning("Please upload at least one image file")