Spaces:

Didier
/

Optical_character_recognition

Running

File size: 2,427 Bytes

"""
File: module_ocr2.py

Description: module to interact with OCR deep learning models.

Author: Didier Guillevic
Date: 2025-04-07
"""

import gradio as gr
import os
import magic

import ocr2 # OCR with software 2.0 models

#
# Get file type: PDF or Image or something else
#
def get_file_type(file_path):
    # Check file extension
    file_extension = os.path.splitext(file_path)[1].lower()

    # Check MIME type
    mime = magic.Magic(mime=True)
    mime_type = mime.from_file(file_path)

    # Determine file type
    if file_extension == '.pdf' or mime_type == 'application/pdf':
        return 'PDF'
    elif file_extension in ['.jpg', '.jpeg', '.png', '.gif'] or mime_type.startswith('image/'):
        return 'Image'
    elif file_extension == '.pptx' or mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
        return 'PowerPoint'
    else:
        return 'Other'

#
# Process one file
#
def process(input_file: str):
    """Process given file with OCR using given languages."
    """
    file_type = get_file_type(input_file)
    if file_type == 'PDF':
        return ocr2.process_pdf(input_file)
    elif file_type == 'Image':
        return ocr2.process_image(input_file)
    else:
        return "Unsupported file type. Please upload a PDF, or an image file."
    return ocr2.process(input_file)

    
#
# User interface
#
with gr.Blocks() as demo:

    # Upload file to process
    with gr.Row():
        input_file = gr.File(label="Upload a PDF file", scale=1)
        output_text = gr.Textbox(label="OCR output", scale=2)

    # Buttons
    with gr.Row():
        ocr_btn = gr.Button(value="OCR", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")
    
    # Examples
    with gr.Accordion("Examples", open=False):
        examples = gr.Examples(
            [
                ['./scanned_doc.pdf',],
                ['./passport_jp.png']
            ],
            inputs=[input_file,],
            outputs=[output_text,],
            fn=process,
            cache_examples=False,
            label="Examples"
        )
    
    # Functions
    ocr_btn.click(
        fn=process,
        inputs=[input_file,],
        outputs=[output_text,]
    )
    clear_btn.click(
        fn=lambda : (None, ''),
        inputs=[],
        outputs=[input_file, output_text] # input_file, output_text
    )

if __name__ == '__main__':
    demo.launch()