Spaces:
Running
Running
Update appImage.py
Browse files- appImage.py +27 -31
appImage.py
CHANGED
@@ -44,12 +44,14 @@ async def caption_from_frontend(file: UploadFile = File(...)):
|
|
44 |
def home():
|
45 |
return RedirectResponse(url="/")"""
|
46 |
# appImage.py
|
47 |
-
from
|
48 |
-
import
|
|
|
49 |
from PIL import Image
|
50 |
-
|
51 |
import torch
|
52 |
|
|
|
53 |
try:
|
54 |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
55 |
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
@@ -58,33 +60,27 @@ except Exception:
|
|
58 |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
59 |
USE_GIT = False
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
captions = captioner(image_path)
|
74 |
-
caption = captions[0]['generated_text'] if captions else "No caption generated."
|
75 |
-
|
76 |
-
audio_path = text_to_speech(caption)
|
77 |
-
|
78 |
-
result = {"caption": caption}
|
79 |
-
if audio_path:
|
80 |
-
result["audio"] = f"/files/{os.path.basename(audio_path)}"
|
81 |
-
return result
|
82 |
|
83 |
-
def
|
84 |
try:
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
44 |
def home():
|
45 |
return RedirectResponse(url="/")"""
|
46 |
# appImage.py
|
47 |
+
from fastapi import UploadFile, File
|
48 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
49 |
+
from transformers import pipeline
|
50 |
from PIL import Image
|
51 |
+
import tempfile
|
52 |
import torch
|
53 |
|
54 |
+
# Load model
|
55 |
try:
|
56 |
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
57 |
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
|
|
60 |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
61 |
USE_GIT = False
|
62 |
|
63 |
+
def generate_caption(image_path):
|
64 |
+
try:
|
65 |
+
if USE_GIT:
|
66 |
+
image = Image.open(image_path)
|
67 |
+
inputs = processor(images=image, return_tensors="pt")
|
68 |
+
outputs = model.generate(**inputs, max_length=50)
|
69 |
+
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
70 |
+
else:
|
71 |
+
result = captioner(image_path)
|
72 |
+
return result[0]['generated_text']
|
73 |
+
except Exception as e:
|
74 |
+
return f"Error generating caption: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
async def caption_image(file: UploadFile = File(...)):
|
77 |
try:
|
78 |
+
contents = await file.read()
|
79 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
80 |
+
tmp.write(contents)
|
81 |
+
tmp_path = tmp.name
|
82 |
+
|
83 |
+
caption = generate_caption(tmp_path)
|
84 |
+
return caption
|
85 |
+
except Exception as e:
|
86 |
+
return {"error": f"Failed to generate caption: {str(e)}"}
|