Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

Summarization / appImage.py

ikraamkb

Update appImage.py

1795a1a verified 7 days ago

raw

history blame

5.88 kB

	"""import gradio as gr
	from transformers import AutoProcessor, AutoModelForCausalLM
	from PIL import Image
	import torch
	from fastapi import FastAPI
	from fastapi.responses import RedirectResponse

	# Initialize FastAPI
	app = FastAPI()

	# Load models - Using microsoft/git-large-coco
	try:
	# Load the better model
	processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
	git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
	print("Successfully loaded microsoft/git-large-coco model")
	USE_GIT = True
	except Exception as e:
	print(f"Failed to load GIT model: {e}. Falling back to smaller model")
	captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
	USE_GIT = False

	def generate_caption(image_path):
	"Generate caption using the best available model""
	try:
	if USE_GIT:
	image = Image.open(image_path)
	inputs = processor(images=image, return_tensors="pt")
	outputs = git_model.generate(**inputs, max_length=50)
	return processor.batch_decode(outputs, skip_special_tokens=True)[0]
	else:
	result = captioner(image_path)
	return result[0]['generated_text']
	except Exception as e:
	print(f"Caption generation error: {e}")
	return "Could not generate caption"

	def process_image(file_path: str):
	"Handle image processing for Gradio interface"
	if not file_path:
	return "Please upload an image first"

	try:
	caption = generate_caption(file_path)
	return f"📷 Image Caption:\n{caption}"
	except Exception as e:
	return f"Error processing image: {str(e)}"

	# Gradio Interface
	with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🖼️ Image Captioning Service")
	gr.Markdown("Upload an image to get automatic captioning")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(label="Upload Image", type="filepath")
	analyze_btn = gr.Button("Generate Caption", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Caption Result", lines=5)

	analyze_btn.click(
	fn=process_image,
	inputs=[image_input],
	outputs=[output]
	)

	# Mount Gradio app to FastAPI
	app = gr.mount_gradio_app(app, demo, path="/")

	@app.get("/")
	def redirect_to_interface():
	return RedirectResponse(url="/")
	"""
	import gradio as gr
	from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
	from PIL import Image
	import torch
	from fastapi import FastAPI, UploadFile, Form
	from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
	from fastapi.middleware.cors import CORSMiddleware
	import os
	import tempfile

	# ✅ Initialize FastAPI
	app = FastAPI()

	# ✅ Enable CORS (so frontend JS can call backend)
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ✅ Load caption model
	USE_GIT = False
	try:
	processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
	git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
	git_model.eval()
	USE_GIT = True
	except Exception as e:
	print(f"[INFO] Falling back to ViT: {e}")
	captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

	# ✅ Image captioning logic
	def generate_caption(image_path: str) -> str:
	try:
	if USE_GIT:
	image = Image.open(image_path).convert("RGB")
	inputs = processor(images=image, return_tensors="pt")
	outputs = git_model.generate(**inputs, max_length=50)
	caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
	else:
	result = captioner(image_path)
	caption = result[0]['generated_text']
	return caption
	except Exception as e:
	return f"Error: {str(e)}"

	# ✅ For Gradio demo
	def process_image(file_path: str):
	if not file_path:
	return "Please upload an image."
	return f"📷 Image Caption:\n{generate_caption(file_path)}"

	# ✅ FastAPI endpoint for frontend POSTs
	@app.post("/imagecaption/")
	async def caption_from_frontend(file: UploadFile, question: str = Form("")):
	try:
	# Save temp image
	contents = await file.read()
	tmp_path = os.path.join(tempfile.gettempdir(), file.filename)
	with open(tmp_path, "wb") as f:
	f.write(contents)

	caption = generate_caption(tmp_path)

	# Optionally generate audio
	from gtts import gTTS
	audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3")
	tts = gTTS(text=caption)
	tts.save(audio_path)

	return {
	"answer": caption,
	"audio": f"/files/{os.path.basename(audio_path)}"
	}

	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)

	# ✅ Serve static files
	@app.get("/files/{file_name}")
	async def serve_file(file_name: str):
	path = os.path.join(tempfile.gettempdir(), file_name)
	if os.path.exists(path):
	return FileResponse(path)
	return JSONResponse({"error": "File not found"}, status_code=404)

	# ✅ Mount Gradio demo for test
	with gr.Blocks(title="🖼️ Image Captioning") as demo:
	gr.Markdown("# 🖼️ Image Captioning Demo")
	image_input = gr.Image(type="filepath", label="Upload Image")
	result_box = gr.Textbox(label="Caption")
	btn = gr.Button("Generate Caption")
	btn.click(fn=process_image, inputs=[image_input], outputs=[result_box])

	app = gr.mount_gradio_app(app, demo, path="/")

	# ✅ Optional root redirect to frontend
	@app.get("/")
	def redirect_to_frontend():
	return RedirectResponse(url="/templates/home.html")