ikraamkb commited on
Commit
dc2fb2f
·
verified ·
1 Parent(s): 587a2e1

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +16 -3
appImage.py CHANGED
@@ -49,7 +49,8 @@ from PIL import Image
49
  import tempfile
50
  import os
51
  import torch
52
-
 
53
  # Load model
54
  try:
55
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
@@ -84,14 +85,26 @@ async def caption_image(file: UploadFile):
84
  contents = await file.read()
85
  tmp.write(contents)
86
  tmp_path = tmp.name
87
-
88
  # Generate caption
89
  caption = generate_caption(tmp_path)
 
90
 
91
  # Handle errors inside generate_caption
92
  if caption.startswith("Error"):
93
  return {"error": caption}
94
- return {"caption": caption}
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  except Exception as e:
97
  return {"error": f"Failed to generate caption: {str(e)}"}
 
49
  import tempfile
50
  import os
51
  import torch
52
+ from gtts import gTTS
53
+ import uuid
54
  # Load model
55
  try:
56
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
 
85
  contents = await file.read()
86
  tmp.write(contents)
87
  tmp_path = tmp.name
88
+
89
  # Generate caption
90
  caption = generate_caption(tmp_path)
91
+ os.remove(tmp_path)
92
 
93
  # Handle errors inside generate_caption
94
  if caption.startswith("Error"):
95
  return {"error": caption}
96
+
97
+ # Now generate TTS audio for the caption
98
+ tts = gTTS(text=caption, lang="en")
99
+ audio_filename = f"{uuid.uuid4()}.mp3"
100
+ audio_path = os.path.join(tempfile.gettempdir(), audio_filename)
101
+ tts.save(audio_path)
102
+
103
+ # Return both caption and audio URL
104
+ return {
105
+ "caption": caption,
106
+ "audio": f"/files/{audio_filename}"
107
+ }
108
 
109
  except Exception as e:
110
  return {"error": f"Failed to generate caption: {str(e)}"}