ikraamkb commited on
Commit
1795a1a
Β·
verified Β·
1 Parent(s): 0d83986

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +106 -3
appImage.py CHANGED
@@ -1,4 +1,4 @@
1
- import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  from PIL import Image
4
  import torch
@@ -21,7 +21,7 @@ except Exception as e:
21
  USE_GIT = False
22
 
23
  def generate_caption(image_path):
24
- """Generate caption using the best available model"""
25
  try:
26
  if USE_GIT:
27
  image = Image.open(image_path)
@@ -36,7 +36,7 @@ def generate_caption(image_path):
36
  return "Could not generate caption"
37
 
38
  def process_image(file_path: str):
39
- """Handle image processing for Gradio interface"""
40
  if not file_path:
41
  return "Please upload an image first"
42
 
@@ -71,3 +71,106 @@ app = gr.mount_gradio_app(app, demo, path="/")
71
  @app.get("/")
72
  def redirect_to_interface():
73
  return RedirectResponse(url="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  from PIL import Image
4
  import torch
 
21
  USE_GIT = False
22
 
23
  def generate_caption(image_path):
24
+ "Generate caption using the best available model""
25
  try:
26
  if USE_GIT:
27
  image = Image.open(image_path)
 
36
  return "Could not generate caption"
37
 
38
  def process_image(file_path: str):
39
+ "Handle image processing for Gradio interface"
40
  if not file_path:
41
  return "Please upload an image first"
42
 
 
71
  @app.get("/")
72
  def redirect_to_interface():
73
  return RedirectResponse(url="/")
74
+ """
75
+ import gradio as gr
76
+ from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
77
+ from PIL import Image
78
+ import torch
79
+ from fastapi import FastAPI, UploadFile, Form
80
+ from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
81
+ from fastapi.middleware.cors import CORSMiddleware
82
+ import os
83
+ import tempfile
84
+
85
+ # βœ… Initialize FastAPI
86
+ app = FastAPI()
87
+
88
+ # βœ… Enable CORS (so frontend JS can call backend)
89
+ app.add_middleware(
90
+ CORSMiddleware,
91
+ allow_origins=["*"],
92
+ allow_credentials=True,
93
+ allow_methods=["*"],
94
+ allow_headers=["*"],
95
+ )
96
+
97
+ # βœ… Load caption model
98
+ USE_GIT = False
99
+ try:
100
+ processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
101
+ git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
102
+ git_model.eval()
103
+ USE_GIT = True
104
+ except Exception as e:
105
+ print(f"[INFO] Falling back to ViT: {e}")
106
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
107
+
108
+ # βœ… Image captioning logic
109
+ def generate_caption(image_path: str) -> str:
110
+ try:
111
+ if USE_GIT:
112
+ image = Image.open(image_path).convert("RGB")
113
+ inputs = processor(images=image, return_tensors="pt")
114
+ outputs = git_model.generate(**inputs, max_length=50)
115
+ caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
116
+ else:
117
+ result = captioner(image_path)
118
+ caption = result[0]['generated_text']
119
+ return caption
120
+ except Exception as e:
121
+ return f"Error: {str(e)}"
122
+
123
+ # βœ… For Gradio demo
124
+ def process_image(file_path: str):
125
+ if not file_path:
126
+ return "Please upload an image."
127
+ return f"πŸ“· Image Caption:\n{generate_caption(file_path)}"
128
+
129
+ # βœ… FastAPI endpoint for frontend POSTs
130
+ @app.post("/imagecaption/")
131
+ async def caption_from_frontend(file: UploadFile, question: str = Form("")):
132
+ try:
133
+ # Save temp image
134
+ contents = await file.read()
135
+ tmp_path = os.path.join(tempfile.gettempdir(), file.filename)
136
+ with open(tmp_path, "wb") as f:
137
+ f.write(contents)
138
+
139
+ caption = generate_caption(tmp_path)
140
+
141
+ # Optionally generate audio
142
+ from gtts import gTTS
143
+ audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3")
144
+ tts = gTTS(text=caption)
145
+ tts.save(audio_path)
146
+
147
+ return {
148
+ "answer": caption,
149
+ "audio": f"/files/{os.path.basename(audio_path)}"
150
+ }
151
+
152
+ except Exception as e:
153
+ return JSONResponse({"error": str(e)}, status_code=500)
154
+
155
+ # βœ… Serve static files
156
+ @app.get("/files/{file_name}")
157
+ async def serve_file(file_name: str):
158
+ path = os.path.join(tempfile.gettempdir(), file_name)
159
+ if os.path.exists(path):
160
+ return FileResponse(path)
161
+ return JSONResponse({"error": "File not found"}, status_code=404)
162
+
163
+ # βœ… Mount Gradio demo for test
164
+ with gr.Blocks(title="πŸ–ΌοΈ Image Captioning") as demo:
165
+ gr.Markdown("# πŸ–ΌοΈ Image Captioning Demo")
166
+ image_input = gr.Image(type="filepath", label="Upload Image")
167
+ result_box = gr.Textbox(label="Caption")
168
+ btn = gr.Button("Generate Caption")
169
+ btn.click(fn=process_image, inputs=[image_input], outputs=[result_box])
170
+
171
+ app = gr.mount_gradio_app(app, demo, path="/")
172
+
173
+ # βœ… Optional root redirect to frontend
174
+ @app.get("/")
175
+ def redirect_to_frontend():
176
+ return RedirectResponse(url="/templates/home.html")