Spaces:
Running
Running
"""import gradio as gr | |
from transformers import AutoProcessor, AutoModelForCausalLM | |
from PIL import Image | |
import torch | |
from fastapi import FastAPI | |
from fastapi.responses import RedirectResponse | |
# Initialize FastAPI | |
app = FastAPI() | |
# Load models - Using microsoft/git-large-coco | |
try: | |
# Load the better model | |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") | |
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") | |
print("Successfully loaded microsoft/git-large-coco model") | |
USE_GIT = True | |
except Exception as e: | |
print(f"Failed to load GIT model: {e}. Falling back to smaller model") | |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
USE_GIT = False | |
def generate_caption(image_path): | |
"Generate caption using the best available model"" | |
try: | |
if USE_GIT: | |
image = Image.open(image_path) | |
inputs = processor(images=image, return_tensors="pt") | |
outputs = git_model.generate(**inputs, max_length=50) | |
return processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
else: | |
result = captioner(image_path) | |
return result[0]['generated_text'] | |
except Exception as e: | |
print(f"Caption generation error: {e}") | |
return "Could not generate caption" | |
def process_image(file_path: str): | |
"Handle image processing for Gradio interface" | |
if not file_path: | |
return "Please upload an image first" | |
try: | |
caption = generate_caption(file_path) | |
return f"π· Image Caption:\n{caption}" | |
except Exception as e: | |
return f"Error processing image: {str(e)}" | |
# Gradio Interface | |
with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# πΌοΈ Image Captioning Service") | |
gr.Markdown("Upload an image to get automatic captioning") | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(label="Upload Image", type="filepath") | |
analyze_btn = gr.Button("Generate Caption", variant="primary") | |
with gr.Column(): | |
output = gr.Textbox(label="Caption Result", lines=5) | |
analyze_btn.click( | |
fn=process_image, | |
inputs=[image_input], | |
outputs=[output] | |
) | |
# Mount Gradio app to FastAPI | |
app = gr.mount_gradio_app(app, demo, path="/") | |
@app.get("/") | |
def redirect_to_interface(): | |
return RedirectResponse(url="/") | |
""" | |
import gradio as gr | |
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline | |
from PIL import Image | |
import torch | |
from fastapi import FastAPI, UploadFile, Form | |
from fastapi.responses import RedirectResponse, JSONResponse, FileResponse | |
from fastapi.middleware.cors import CORSMiddleware | |
import os | |
import tempfile | |
# β Initialize FastAPI | |
app = FastAPI() | |
# β Enable CORS (so frontend JS can call backend) | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# β Load caption model | |
USE_GIT = False | |
try: | |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") | |
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") | |
git_model.eval() | |
USE_GIT = True | |
except Exception as e: | |
print(f"[INFO] Falling back to ViT: {e}") | |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
# β Image captioning logic | |
def generate_caption(image_path: str) -> str: | |
try: | |
if USE_GIT: | |
image = Image.open(image_path).convert("RGB") | |
inputs = processor(images=image, return_tensors="pt") | |
outputs = git_model.generate(**inputs, max_length=50) | |
caption = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
else: | |
result = captioner(image_path) | |
caption = result[0]['generated_text'] | |
return caption | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# β For Gradio demo | |
def process_image(file_path: str): | |
if not file_path: | |
return "Please upload an image." | |
return f"π· Image Caption:\n{generate_caption(file_path)}" | |
# β FastAPI endpoint for frontend POSTs | |
async def caption_from_frontend(file: UploadFile, question: str = Form("")): | |
try: | |
# Save temp image | |
contents = await file.read() | |
tmp_path = os.path.join(tempfile.gettempdir(), file.filename) | |
with open(tmp_path, "wb") as f: | |
f.write(contents) | |
caption = generate_caption(tmp_path) | |
# Optionally generate audio | |
from gtts import gTTS | |
audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3") | |
tts = gTTS(text=caption) | |
tts.save(audio_path) | |
return { | |
"answer": caption, | |
"audio": f"/files/{os.path.basename(audio_path)}" | |
} | |
except Exception as e: | |
return JSONResponse({"error": str(e)}, status_code=500) | |
# β Serve static files | |
async def serve_file(file_name: str): | |
path = os.path.join(tempfile.gettempdir(), file_name) | |
if os.path.exists(path): | |
return FileResponse(path) | |
return JSONResponse({"error": "File not found"}, status_code=404) | |
# β Mount Gradio demo for test | |
with gr.Blocks(title="πΌοΈ Image Captioning") as demo: | |
gr.Markdown("# πΌοΈ Image Captioning Demo") | |
image_input = gr.Image(type="filepath", label="Upload Image") | |
result_box = gr.Textbox(label="Caption") | |
btn = gr.Button("Generate Caption") | |
btn.click(fn=process_image, inputs=[image_input], outputs=[result_box]) | |
app = gr.mount_gradio_app(app, demo, path="/") | |
# β Optional root redirect to frontend | |
def redirect_to_frontend(): | |
return RedirectResponse(url="/templates/home.html") | |