Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

ikraamkb commited on 5 days ago

Commit

3ba8f3d

verified ·

1 Parent(s): e6909d8

Update appImage.py

Browse files

Files changed (1) hide show

appImage.py +27 -31

appImage.py CHANGED Viewed

@@ -44,12 +44,14 @@ async def caption_from_frontend(file: UploadFile = File(...)):
 def home():
     return RedirectResponse(url="/")"""
 # appImage.py
-from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
-import tempfile, os
 from PIL import Image
-from gtts import gTTS
 import torch
 try:
     processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
     model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
@@ -58,33 +60,27 @@ except Exception:
     captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     USE_GIT = False
-async def caption_image(file):
-    contents = await file.read()
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
-        tmp.write(contents)
-        image_path = tmp.name
-    if USE_GIT:
-        image = Image.open(image_path).convert('RGB')
-        pixel_values = processor(images=image, return_tensors="pt").pixel_values
-        generated_ids = model.generate(pixel_values, max_length=500)
-        caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    else:
-        captions = captioner(image_path)
-        caption = captions[0]['generated_text'] if captions else "No caption generated."
-    audio_path = text_to_speech(caption)
-    result = {"caption": caption}
-    if audio_path:
-        result["audio"] = f"/files/{os.path.basename(audio_path)}"
-    return result
-def text_to_speech(text: str):
     try:
-        tts = gTTS(text)
-        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-        tts.save(temp_audio.name)
-        return temp_audio.name
-    except:
-        return ""

 def home():
     return RedirectResponse(url="/")"""
 # appImage.py
+from fastapi import UploadFile, File
+from transformers import AutoProcessor, AutoModelForCausalLM
+from transformers import pipeline
 from PIL import Image
+import tempfile
 import torch
+# Load model
 try:
     processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
     model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
     captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     USE_GIT = False
+def generate_caption(image_path):
+    try:
+        if USE_GIT:
+            image = Image.open(image_path)
+            inputs = processor(images=image, return_tensors="pt")
+            outputs = model.generate(**inputs, max_length=50)
+            return processor.batch_decode(outputs, skip_special_tokens=True)[0]
+        else:
+            result = captioner(image_path)
+            return result[0]['generated_text']
+    except Exception as e:
+        return f"Error generating caption: {str(e)}"
+async def caption_image(file: UploadFile = File(...)):
     try:
+        contents = await file.read()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
+            tmp.write(contents)
+            tmp_path = tmp.name
+        caption = generate_caption(tmp_path)
+        return  caption
+    except Exception as e:
+        return {"error": f"Failed to generate caption: {str(e)}"}