Spaces:
Running
Running
Update appImage.py
Browse files- appImage.py +29 -19
appImage.py
CHANGED
@@ -43,25 +43,35 @@ async def caption_from_frontend(file: UploadFile = File(...)):
|
|
43 |
@app.get("/")
|
44 |
def home():
|
45 |
return RedirectResponse(url="/")"""
|
46 |
-
#
|
47 |
-
from transformers import
|
|
|
48 |
from PIL import Image
|
49 |
-
import
|
50 |
|
51 |
-
|
52 |
-
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
53 |
-
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
54 |
-
USE_GIT = True
|
55 |
-
except:
|
56 |
-
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
57 |
-
USE_GIT = False
|
58 |
|
59 |
-
def
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
@app.get("/")
|
44 |
def home():
|
45 |
return RedirectResponse(url="/")"""
|
46 |
+
# appImage.py
|
47 |
+
from transformers import pipeline
|
48 |
+
import tempfile, os
|
49 |
from PIL import Image
|
50 |
+
from gtts import gTTS
|
51 |
|
52 |
+
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
async def caption_image(file):
|
55 |
+
contents = await file.read()
|
56 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
57 |
+
tmp.write(contents)
|
58 |
+
image_path = tmp.name
|
59 |
+
|
60 |
+
captions = captioner(image_path)
|
61 |
+
caption = captions[0]['generated_text'] if captions else "No caption generated."
|
62 |
+
|
63 |
+
audio_path = text_to_speech(caption)
|
64 |
+
|
65 |
+
result = {"caption": caption}
|
66 |
+
if audio_path:
|
67 |
+
result["audioUrl"] = f"/files/{os.path.basename(audio_path)}"
|
68 |
+
return result
|
69 |
+
|
70 |
+
def text_to_speech(text: str):
|
71 |
+
try:
|
72 |
+
tts = gTTS(text)
|
73 |
+
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
74 |
+
tts.save(temp_audio.name)
|
75 |
+
return temp_audio.name
|
76 |
+
except:
|
77 |
+
return ""
|