File size: 10,057 Bytes
6a7a825
0f462f7
6a7a825
 
 
195dd9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a7a825
 
0f462f7
6a7a825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195dd9b
 
 
 
 
6a7a825
0f462f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a7a825
0f462f7
 
 
 
 
6a7a825
0f462f7
 
 
6a7a825
0f462f7
 
 
 
 
 
 
 
 
 
 
 
 
6a7a825
0f462f7
 
 
 
6a7a825
0f462f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a7a825
0f462f7
 
 
 
195dd9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import streamlit as st
from docling.document_converter import DocumentConverter
import tempfile
import os
import logging
import time
from PIL import Image
import zipfile
import io

# vLLM and docling_core imports for batch processing
try:
    from vllm import LLM, SamplingParams
    from docling_core.types.doc import DoclingDocument
    from docling_core.types.doc.document import DocTagsDocument
    from pathlib import Path
    VLLM_AVAILABLE = True
except ImportError:
    VLLM_AVAILABLE = False

# Create necessary directories
os.makedirs("img", exist_ok=True)
os.makedirs("out", exist_ok=True)

# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Custom CSS for better layout
st.markdown("""
    <style>    
        .stFileUploader {
            padding: 1rem;
        }
        
        button[data-testid="stFileUploaderButtonPrimary"] {
            background-color: #000660 !important;
            border: none !important;
            color: white !important;
        }

        .stButton button {
            background-color: #006666;
            border: none !important;
            color: white;
            padding: 0.5rem 2rem !important;
        }
        .stButton button:hover {
            background-color: #008080 !important;
            color: white !important;
            border-color: #008080 !important;
        }
        .upload-text {
            font-size: 1.2rem;
            margin-bottom: 1rem;
        }
        div[data-testid="stFileUploadDropzone"]:hover {
            border-color: #006666 !important;
            background-color: rgba(0, 102, 102, 0.05) !important;
        }
    </style>
""", unsafe_allow_html=True)

# Create tabs for different functionalities
tab1, tab2 = st.tabs(["PDF to Markdown", "Batch Image Processing"])

with tab1:
    st.title("PDF to Markdown Converter")

# Initialize session state if it doesn't exist
if 'converter' not in st.session_state:
    try:
        st.session_state.converter = DocumentConverter()
        logger.debug("Converter successfully created")
    except Exception as e:
        logger.error(f"Error creating converter: {str(e)}")
        st.error(f"Error creating converter: {str(e)}")
        st.stop()

# Main upload area
uploaded_file = st.file_uploader(
    "Upload your PDF file",
    type=['pdf'],
    key='pdf_uploader',
    help="Drag and drop or click to select a PDF file (max 200MB)"
)

# URL input area with spacing
st.markdown("<br>", unsafe_allow_html=True)
url = st.text_input("Or enter a PDF URL")

# Unified convert button
convert_clicked = st.button("Convert to Markdown", type="primary")

# Process either uploaded file or URL
if convert_clicked:
    if uploaded_file is not None:
        try:
            with st.spinner('Converting file...'):
                with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                    tmp_file.write(uploaded_file.getvalue())
                    tmp_path = tmp_file.name
                    logger.debug(f"Temporary file created at: {tmp_path}")

                    try:
                        result = st.session_state.converter.convert(tmp_path)
                        markdown_text = result.document.export_to_markdown()
                        
                        output_filename = os.path.splitext(uploaded_file.name)[0] + '.md'
                        
                        st.success("Conversion completed!")
                        st.download_button(
                            label="Download Markdown file",
                            data=markdown_text,
                            file_name=output_filename,
                            mime="text/markdown"
                        )

                    except Exception as e:
                        logger.error(f"Error converting file: {str(e)}")
                        st.error(f"Error converting file: {str(e)}")
                    
                    finally:
                        if os.path.exists(tmp_path):
                            os.unlink(tmp_path)
                            logger.debug("Temporary file deleted")

        except Exception as e:
            logger.error(f"Error processing file: {str(e)}")
            st.error(f"Error processing file: {str(e)}")
            
    elif url:
        try:
            with st.spinner('Converting from URL...'):
                logger.debug(f"Converting from URL: {url}")
                result = st.session_state.converter.convert(url)
                markdown_text = result.document.export_to_markdown()
                
                output_filename = url.split('/')[-1].split('.')[0] + '.md'
                
                st.success("Conversion completed!")
                st.download_button(
                    label="Download Markdown file",
                    data=markdown_text,
                    file_name=output_filename,
                    mime="text/markdown"
                )

        except Exception as e:
            logger.error(f"Error converting from URL: {str(e)}")
            st.error(f"Error converting from URL: {str(e)}")
    else:
        st.warning("Please upload a file or enter a URL first")

# Batch processing tab
with tab2:
    st.title("Batch Image Processing with vLLM")
    
    if not VLLM_AVAILABLE:
        st.warning("vLLM and docling_core are required for batch processing. Please install them with: pip install vllm docling_core")
    else:
        st.write("This feature uses vLLM to process multiple images and convert them to Markdown.")
        
        # Ensure directories exist
        img_dir = "img"
        out_dir = "out"
        os.makedirs(img_dir, exist_ok=True)
        os.makedirs(out_dir, exist_ok=True)
        
        st.info(f"Images will be processed from the '{img_dir}' directory and results will be saved to the '{out_dir}' directory.")
        
        # Model configuration
        MODEL_PATH = st.text_input("Model Path", value="ds4sd/SmolDocling-256M-preview")
        PROMPT_TEXT = st.text_area("Prompt Text", value="Convert page to Docling.")
        
        # File uploader for multiple images
        uploaded_images = st.file_uploader(
            "Upload image files",
            type=['png', 'jpg', 'jpeg'],
            accept_multiple_files=True,
            key='image_uploader',
            help="Drag and drop or click to select image files"
        )
        
        # Process button
        process_clicked = st.button("Process Images", type="primary", key="process_button")
        
        if process_clicked and uploaded_images:
            try:
                with st.spinner('Processing images...'):
                    # Initialize LLM
                    llm = LLM(model=MODEL_PATH, limit_mm_per_prompt={"image": 1})
                    
                    sampling_params = SamplingParams(
                        temperature=0.0,
                        max_tokens=8192
                    )
                    
                    chat_template = f"<|im_start|>User:<image>{PROMPT_TEXT}<end_of_utterance>\nAssistant:"
                    
                    start_time = time.time()
                    
                    # Create a ZIP file in memory to store all outputs
                    zip_buffer = io.BytesIO()
                    with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
                        
                        progress_bar = st.progress(0)
                        status_text = st.empty()
                        
                        for idx, img_file in enumerate(uploaded_images):
                            img_name = img_file.name
                            status_text.text(f"Processing {img_name} ({idx+1}/{len(uploaded_images)})")
                            
                            # Open image
                            image = Image.open(img_file).convert("RGB")
                            
                            # Process with vLLM
                            llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
                            output = llm.generate([llm_input], sampling_params=sampling_params)[0]
                            
                            doctags = output.outputs[0].text
                            img_fn = os.path.splitext(img_name)[0]
                            
                            # Add doctags to zip
                            zip_file.writestr(f"{img_fn}.dt", doctags)
                            
                            # Convert to Docling Document
                            doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
                            doc = DoclingDocument(name=img_fn)
                            doc.load_from_doctags(doctags_doc)
                            
                            # Export as markdown and add to zip
                            md_content = doc.export_to_markdown()
                            zip_file.writestr(f"{img_fn}.md", md_content)
                            
                            # Update progress
                            progress_bar.progress((idx + 1) / len(uploaded_images))
                    
                    total_time = time.time() - start_time
                    
                    # Offer the ZIP file for download
                    st.success(f"Processing completed in {total_time:.2f} seconds!")
                    
                    zip_buffer.seek(0)
                    st.download_button(
                        label="Download All Results",
                        data=zip_buffer,
                        file_name="processed_images.zip",
                        mime="application/zip"
                    )
                    
            except Exception as e:
                logger.error(f"Error in batch processing: {str(e)}")
                st.error(f"Error in batch processing: {str(e)}")
        
        elif process_clicked:
            st.warning("Please upload at least one image file")