ikraamkb commited on
Commit
8d67b19
·
verified ·
1 Parent(s): b5f584b

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +29 -19
appImage.py CHANGED
@@ -43,25 +43,35 @@ async def caption_from_frontend(file: UploadFile = File(...)):
43
  @app.get("/")
44
  def home():
45
  return RedirectResponse(url="/")"""
46
- # app_image_logic.py
47
- from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
 
48
  from PIL import Image
49
- import torch
50
 
51
- try:
52
- processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
53
- model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
54
- USE_GIT = True
55
- except:
56
- captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
57
- USE_GIT = False
58
 
59
- def generate_caption(image_path):
60
- if USE_GIT:
61
- image = Image.open(image_path)
62
- inputs = processor(images=image, return_tensors="pt")
63
- outputs = model.generate(**inputs, max_length=50)
64
- return processor.batch_decode(outputs, skip_special_tokens=True)[0]
65
- else:
66
- result = captioner(image_path)
67
- return result[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  @app.get("/")
44
  def home():
45
  return RedirectResponse(url="/")"""
46
+ # appImage.py
47
+ from transformers import pipeline
48
+ import tempfile, os
49
  from PIL import Image
50
+ from gtts import gTTS
51
 
52
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 
 
 
 
 
 
53
 
54
+ async def caption_image(file):
55
+ contents = await file.read()
56
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
57
+ tmp.write(contents)
58
+ image_path = tmp.name
59
+
60
+ captions = captioner(image_path)
61
+ caption = captions[0]['generated_text'] if captions else "No caption generated."
62
+
63
+ audio_path = text_to_speech(caption)
64
+
65
+ result = {"caption": caption}
66
+ if audio_path:
67
+ result["audioUrl"] = f"/files/{os.path.basename(audio_path)}"
68
+ return result
69
+
70
+ def text_to_speech(text: str):
71
+ try:
72
+ tts = gTTS(text)
73
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
74
+ tts.save(temp_audio.name)
75
+ return temp_audio.name
76
+ except:
77
+ return ""