ikraamkb commited on
Commit
3ba8f3d
·
verified ·
1 Parent(s): e6909d8

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +27 -31
appImage.py CHANGED
@@ -44,12 +44,14 @@ async def caption_from_frontend(file: UploadFile = File(...)):
44
  def home():
45
  return RedirectResponse(url="/")"""
46
  # appImage.py
47
- from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
48
- import tempfile, os
 
49
  from PIL import Image
50
- from gtts import gTTS
51
  import torch
52
 
 
53
  try:
54
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
55
  model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
@@ -58,33 +60,27 @@ except Exception:
58
  captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
59
  USE_GIT = False
60
 
61
- async def caption_image(file):
62
- contents = await file.read()
63
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
64
- tmp.write(contents)
65
- image_path = tmp.name
66
-
67
- if USE_GIT:
68
- image = Image.open(image_path).convert('RGB')
69
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
70
- generated_ids = model.generate(pixel_values, max_length=500)
71
- caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
72
- else:
73
- captions = captioner(image_path)
74
- caption = captions[0]['generated_text'] if captions else "No caption generated."
75
-
76
- audio_path = text_to_speech(caption)
77
-
78
- result = {"caption": caption}
79
- if audio_path:
80
- result["audio"] = f"/files/{os.path.basename(audio_path)}"
81
- return result
82
 
83
- def text_to_speech(text: str):
84
  try:
85
- tts = gTTS(text)
86
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
87
- tts.save(temp_audio.name)
88
- return temp_audio.name
89
- except:
90
- return ""
 
 
 
 
44
  def home():
45
  return RedirectResponse(url="/")"""
46
  # appImage.py
47
+ from fastapi import UploadFile, File
48
+ from transformers import AutoProcessor, AutoModelForCausalLM
49
+ from transformers import pipeline
50
  from PIL import Image
51
+ import tempfile
52
  import torch
53
 
54
+ # Load model
55
  try:
56
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
57
  model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
 
60
  captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
61
  USE_GIT = False
62
 
63
+ def generate_caption(image_path):
64
+ try:
65
+ if USE_GIT:
66
+ image = Image.open(image_path)
67
+ inputs = processor(images=image, return_tensors="pt")
68
+ outputs = model.generate(**inputs, max_length=50)
69
+ return processor.batch_decode(outputs, skip_special_tokens=True)[0]
70
+ else:
71
+ result = captioner(image_path)
72
+ return result[0]['generated_text']
73
+ except Exception as e:
74
+ return f"Error generating caption: {str(e)}"
 
 
 
 
 
 
 
 
 
75
 
76
+ async def caption_image(file: UploadFile = File(...)):
77
  try:
78
+ contents = await file.read()
79
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
80
+ tmp.write(contents)
81
+ tmp_path = tmp.name
82
+
83
+ caption = generate_caption(tmp_path)
84
+ return caption
85
+ except Exception as e:
86
+ return {"error": f"Failed to generate caption: {str(e)}"}