"""from fastapi import FastAPI, UploadFile, File from fastapi.responses import RedirectResponse, JSONResponse from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image import tempfile import torch app = FastAPI() # Load model try: processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") USE_GIT = True except Exception: from transformers import pipeline captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") USE_GIT = False def generate_caption(image_path): try: if USE_GIT: image = Image.open(image_path) inputs = processor(images=image, return_tensors="pt") outputs = model.generate(**inputs, max_length=50) return processor.batch_decode(outputs, skip_special_tokens=True)[0] else: result = captioner(image_path) return result[0]['generated_text'] except Exception as e: return f"Error generating caption: {str(e)}" @app.post("/imagecaption/") async def caption_from_frontend(file: UploadFile = File(...)): contents = await file.read() with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp: tmp.write(contents) image_path = tmp.name caption = generate_caption(image_path) return JSONResponse({"caption": caption}) @app.get("/") def home(): return RedirectResponse(url="/")""" from fastapi import UploadFile from transformers import AutoProcessor, AutoModelForCausalLM, pipeline from PIL import Image import tempfile import os import torch from gtts import gTTS import uuid # Load model try: processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco") USE_GIT = True except Exception: captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") USE_GIT = False def generate_caption(image_path): try: if USE_GIT: image = Image.open(image_path).convert("RGB") inputs = processor(images=image, return_tensors="pt") outputs = model.generate(**inputs, max_length=50) return processor.batch_decode(outputs, skip_special_tokens=True)[0] else: result = captioner(image_path) return result[0]['generated_text'] except Exception as e: return f"Error generating caption: {str(e)}" async def caption_image(file: UploadFile): try: # Get file extension correctly _, ext = os.path.splitext(file.filename) if ext.lower() not in [".jpg", ".jpeg", ".png", ".bmp", ".gif"]: return {"error": "Unsupported file type"} # Save the uploaded image with correct extension with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp: contents = await file.read() tmp.write(contents) tmp_path = tmp.name # Generate caption caption = generate_caption(tmp_path) os.remove(tmp_path) # Handle errors inside generate_caption if caption.startswith("Error"): return {"error": caption} # Now generate TTS audio for the caption tts = gTTS(text=caption, lang="en") audio_filename = f"{uuid.uuid4()}.mp3" audio_path = os.path.join(tempfile.gettempdir(), audio_filename) tts.save(audio_path) # Return both caption and audio URL return { "caption": caption, "audio": f"/files/{audio_filename}" } except Exception as e: return {"error": f"Failed to generate caption: {str(e)}"}