File size: 2,427 Bytes
8ba2238 17c6e9f 8ba2238 17c6e9f 8ba2238 17c6e9f 8ba2238 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""
File: module_ocr2.py
Description: module to interact with OCR deep learning models.
Author: Didier Guillevic
Date: 2025-04-07
"""
import gradio as gr
import os
import magic
import ocr2 # OCR with software 2.0 models
#
# Get file type: PDF or Image or something else
#
def get_file_type(file_path):
# Check file extension
file_extension = os.path.splitext(file_path)[1].lower()
# Check MIME type
mime = magic.Magic(mime=True)
mime_type = mime.from_file(file_path)
# Determine file type
if file_extension == '.pdf' or mime_type == 'application/pdf':
return 'PDF'
elif file_extension in ['.jpg', '.jpeg', '.png', '.gif'] or mime_type.startswith('image/'):
return 'Image'
elif file_extension == '.pptx' or mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
return 'PowerPoint'
else:
return 'Other'
#
# Process one file
#
def process(input_file: str):
"""Process given file with OCR using given languages."
"""
file_type = get_file_type(input_file)
if file_type == 'PDF':
return ocr2.process_pdf(input_file)
elif file_type == 'Image':
return ocr2.process_image(input_file)
else:
return "Unsupported file type. Please upload a PDF, or an image file."
return ocr2.process(input_file)
#
# User interface
#
with gr.Blocks() as demo:
# Upload file to process
with gr.Row():
input_file = gr.File(label="Upload a PDF file", scale=1)
output_text = gr.Textbox(label="OCR output", scale=2)
# Buttons
with gr.Row():
ocr_btn = gr.Button(value="OCR", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
# Examples
with gr.Accordion("Examples", open=False):
examples = gr.Examples(
[
['./scanned_doc.pdf',],
['./passport_jp.png']
],
inputs=[input_file,],
outputs=[output_text,],
fn=process,
cache_examples=False,
label="Examples"
)
# Functions
ocr_btn.click(
fn=process,
inputs=[input_file,],
outputs=[output_text,]
)
clear_btn.click(
fn=lambda : (None, ''),
inputs=[],
outputs=[input_file, output_text] # input_file, output_text
)
if __name__ == '__main__':
demo.launch()
|