Spaces:
Running
Running
File size: 3,761 Bytes
85abd3d 32dd4d2 7cab805 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 7cab805 d5d3aa6 7cab805 d5d3aa6 32dd4d2 7cab805 d5d3aa6 7cab805 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 32dd4d2 d5d3aa6 7cab805 32dd4d2 85abd3d f018781 85abd3d 3ba8f3d f018781 6852c86 dc2fb2f 3ba8f3d 6852c86 85abd3d 3ba8f3d f018781 3ba8f3d 8d67b19 f018781 8d67b19 f018781 3ba8f3d dc2fb2f f018781 3ba8f3d dc2fb2f f018781 dc2fb2f f018781 3ba8f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
"""from fastapi import FastAPI, UploadFile, File
from fastapi.responses import RedirectResponse, JSONResponse
from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import tempfile
import torch
app = FastAPI()
# Load model
try:
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
USE_GIT = True
except Exception:
from transformers import pipeline
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
USE_GIT = False
def generate_caption(image_path):
try:
if USE_GIT:
image = Image.open(image_path)
inputs = processor(images=image, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50)
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
else:
result = captioner(image_path)
return result[0]['generated_text']
except Exception as e:
return f"Error generating caption: {str(e)}"
@app.post("/imagecaption/")
async def caption_from_frontend(file: UploadFile = File(...)):
contents = await file.read()
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
tmp.write(contents)
image_path = tmp.name
caption = generate_caption(image_path)
return JSONResponse({"caption": caption})
@app.get("/")
def home():
return RedirectResponse(url="/")"""
from fastapi import UploadFile
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
from PIL import Image
import tempfile
import os
import torch
from gtts import gTTS
import uuid
# Load model
try:
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
USE_GIT = True
except Exception:
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
USE_GIT = False
def generate_caption(image_path):
try:
if USE_GIT:
image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50)
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
else:
result = captioner(image_path)
return result[0]['generated_text']
except Exception as e:
return f"Error generating caption: {str(e)}"
async def caption_image(file: UploadFile):
try:
# Get file extension correctly
_, ext = os.path.splitext(file.filename)
if ext.lower() not in [".jpg", ".jpeg", ".png", ".bmp", ".gif"]:
return {"error": "Unsupported file type"}
# Save the uploaded image with correct extension
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
contents = await file.read()
tmp.write(contents)
tmp_path = tmp.name
# Generate caption
caption = generate_caption(tmp_path)
os.remove(tmp_path)
# Handle errors inside generate_caption
if caption.startswith("Error"):
return {"error": caption}
# Now generate TTS audio for the caption
tts = gTTS(text=caption, lang="en")
audio_filename = f"{uuid.uuid4()}.mp3"
audio_path = os.path.join(tempfile.gettempdir(), audio_filename)
tts.save(audio_path)
# Return both caption and audio URL
return {
"caption": caption,
"audio": f"/files/{audio_filename}"
}
except Exception as e:
return {"error": f"Failed to generate caption: {str(e)}"}
|