Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoProcessor, AutoModelForImageTextToText | |
import torch | |
import os | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device) | |
processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf") | |
os.environ["PYTORCH_SDP_ATTENTION"] = "0" # Disable SDPA | |
def extract_text_from_image(image): | |
inputs = processor(image, return_tensors="pt").to(device) | |
generate_ids = model.generate( | |
**inputs, | |
do_sample=False, | |
tokenizer=processor.tokenizer, | |
stop_strings="<|im_end|>", | |
max_new_tokens=4096, | |
) | |
return processor.decode(generate_ids[0, inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
interface = gr.Interface( | |
fn=extract_text_from_image, | |
inputs=gr.Image(type="pil"), | |
outputs=gr.Textbox(), | |
title="OCR on Receipts", | |
description="Upload an image to extract text using the GOT-OCR 2.0 model.", | |
examples = [ | |
["images/250406_01.jpg"], | |
["images/250409_01.jpg"] | |
] | |
) | |
interface.launch(share=False) |