Spaces:
Runtime error
Runtime error
File size: 2,509 Bytes
de612dc 11ef473 de612dc 11ef473 de612dc 11ef473 de612dc 11ef473 de612dc 11ef473 de612dc 11ef473 de612dc 11ef473 de612dc 11ef473 de612dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# import torch
# from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
# import gradio as gr
# from PIL import Image
# # Use a publicly available high-capacity model.
# # For instance, we use "google/pix2struct-docvqa-large".
# # (If you need a different model or a private one, adjust accordingly and add authentication if necessary.)
# model_name = "google/pix2struct-docvqa-large"
# model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
# processor = Pix2StructProcessor.from_pretrained(model_name)
# def solve_problem(image):
# try:
# # Ensure the image is in RGB.
# image = image.convert("RGB")
# # Preprocess image and text prompt.
# inputs = processor(
# images=[image],
# text="Solve the following problem:",
# return_tensors="pt",
# max_patches=2048
# )
# # Generate prediction.
# predictions = model.generate(
# **inputs,
# max_new_tokens=200,
# early_stopping=True,
# num_beams=4,
# temperature=0.2
# )
# # Decode the prompt (input IDs) and the generated output.
# problem_text = processor.decode(
# inputs["input_ids"][0],
# skip_special_tokens=True,
# clean_up_tokenization_spaces=True
# )
# solution = processor.decode(
# predictions[0],
# skip_special_tokens=True,
# clean_up_tokenization_spaces=True
# )
# return f"Problem: {problem_text}\nSolution: {solution}"
# except Exception as e:
# return f"Error processing image: {str(e)}"
# # Set up the Gradio interface.
# iface = gr.Interface(
# fn=solve_problem,
# inputs=gr.Image(type="pil", label="Upload Your Problem Image", image_mode="RGB"),
# outputs=gr.Textbox(label="Solution", show_copy_button=True),
# title="Problem Solver with Pix2Struct",
# description=(
# "Upload an image (for example, a handwritten math or logic problem) "
# "and get a solution generated by a high-capacity Pix2Struct model.\n\n"
# "Note: For best results on domain-specific tasks, consider fine-tuning on your own dataset."
# ),
# examples=[
# ["example_problem1.png"],
# ["example_problem2.jpg"]
# ],
# theme="soft",
# allow_flagging="never"
# )
# if __name__ == "__main__":
# iface.launch()
|