import gradio as gr from transformers import pipeline # Load the Hugging Face model pipe = pipeline("image-text-to-text", model="OpenGVLab/InternVL2-1B", trust_remote_code=True) # Define a function that will run the pipeline with user inputs def run_internvl(image, prompt): messages = [{"role": "user", "content": prompt}] return pipe(image=image, messages=messages) # Create the Gradio interface interface = gr.Interface( fn=run_internvl, inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Ask something about the image...")], outputs="text", title="InternVL2 Image + Text Assistant", description="Upload an image and ask a question or give a prompt." ) # Launch the app interface.launch()