ikraamkb commited on
Commit
e88e7c9
·
verified ·
1 Parent(s): 094c949

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +5 -158
appImage.py CHANGED
@@ -1,134 +1,5 @@
1
- """import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForCausalLM
3
- from PIL import Image
4
- import torch
5
- from fastapi import FastAPI
6
- from fastapi.responses import RedirectResponse
7
-
8
- # Initialize FastAPI
9
- app = FastAPI()
10
-
11
- # Load models - Using microsoft/git-large-coco
12
- try:
13
- # Load the better model
14
- processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
15
- git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
16
- print("Successfully loaded microsoft/git-large-coco model")
17
- USE_GIT = True
18
- except Exception as e:
19
- print(f"Failed to load GIT model: {e}. Falling back to smaller model")
20
- captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
21
- USE_GIT = False
22
-
23
- def generate_caption(image_path):
24
- "Generate caption using the best available model""
25
- try:
26
- if USE_GIT:
27
- image = Image.open(image_path)
28
- inputs = processor(images=image, return_tensors="pt")
29
- outputs = git_model.generate(**inputs, max_length=50)
30
- return processor.batch_decode(outputs, skip_special_tokens=True)[0]
31
- else:
32
- result = captioner(image_path)
33
- return result[0]['generated_text']
34
- except Exception as e:
35
- print(f"Caption generation error: {e}")
36
- return "Could not generate caption"
37
-
38
- def process_image(file_path: str):
39
- "Handle image processing for Gradio interface"
40
- if not file_path:
41
- return "Please upload an image first"
42
-
43
- try:
44
- caption = generate_caption(file_path)
45
- return f"📷 Image Caption:\n{caption}"
46
- except Exception as e:
47
- return f"Error processing image: {str(e)}"
48
-
49
- # Gradio Interface
50
- with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
51
- gr.Markdown("# 🖼️ Image Captioning Service")
52
- gr.Markdown("Upload an image to get automatic captioning")
53
-
54
- with gr.Row():
55
- with gr.Column():
56
- image_input = gr.Image(label="Upload Image", type="filepath")
57
- analyze_btn = gr.Button("Generate Caption", variant="primary")
58
-
59
- with gr.Column():
60
- output = gr.Textbox(label="Caption Result", lines=5)
61
-
62
- analyze_btn.click(
63
- fn=process_image,
64
- inputs=[image_input],
65
- outputs=[output]
66
- )
67
-
68
- # Mount Gradio app to FastAPI
69
- app = gr.mount_gradio_app(app, demo, path="/")
70
-
71
- @app.get("/")
72
- def redirect_to_interface():
73
- return RedirectResponse(url="/")
74
- """
75
- import gradio as gr
76
- from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
77
- from PIL import Image
78
- import torch
79
- from fastapi import FastAPI, UploadFile, Form
80
- from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
81
- from fastapi.middleware.cors import CORSMiddleware
82
- import os
83
- import tempfile
84
-
85
- # ✅ Initialize FastAPI
86
- app = FastAPI()
87
-
88
- # ✅ Enable CORS (so frontend JS can call backend)
89
- app.add_middleware(
90
- CORSMiddleware,
91
- allow_origins=["*"],
92
- allow_credentials=True,
93
- allow_methods=["*"],
94
- allow_headers=["*"],
95
- )
96
-
97
- # ✅ Load caption model
98
- USE_GIT = False
99
- try:
100
- processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
101
- git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
102
- git_model.eval()
103
- USE_GIT = True
104
- except Exception as e:
105
- print(f"[INFO] Falling back to ViT: {e}")
106
- captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
107
-
108
- # ✅ Image captioning logic
109
- def generate_caption(image_path: str) -> str:
110
- try:
111
- if USE_GIT:
112
- image = Image.open(image_path).convert("RGB")
113
- inputs = processor(images=image, return_tensors="pt")
114
- outputs = git_model.generate(**inputs, max_length=50)
115
- caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
116
- else:
117
- result = captioner(image_path)
118
- caption = result[0]['generated_text']
119
- return caption
120
- except Exception as e:
121
- return f"Error: {str(e)}"
122
-
123
- # ✅ For Gradio demo
124
- def process_image(file_path: str):
125
- if not file_path:
126
- return "Please upload an image."
127
- return f"📷 Image Caption:\n{generate_caption(file_path)}"
128
-
129
- # ✅ FastAPI endpoint for frontend POSTs
130
  @app.post("/imagecaption/")
131
- async def caption_from_frontend(file: UploadFile, question: str = Form("")):
132
  try:
133
  # Save temp image
134
  contents = await file.read()
@@ -138,39 +9,15 @@ async def caption_from_frontend(file: UploadFile, question: str = Form("")):
138
 
139
  caption = generate_caption(tmp_path)
140
 
141
- # Optionally generate audio
142
- from gtts import gTTS
143
  audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3")
144
  tts = gTTS(text=caption)
145
  tts.save(audio_path)
146
 
147
- return {
148
  "answer": caption,
149
  "audio": f"/files/{os.path.basename(audio_path)}"
150
- }
151
 
152
  except Exception as e:
153
- return JSONResponse({"error": str(e)}, status_code=500)
154
-
155
- # ✅ Serve static files
156
- @app.get("/files/{file_name}")
157
- async def serve_file(file_name: str):
158
- path = os.path.join(tempfile.gettempdir(), file_name)
159
- if os.path.exists(path):
160
- return FileResponse(path)
161
- return JSONResponse({"error": "File not found"}, status_code=404)
162
-
163
- # ✅ Mount Gradio demo for test
164
- with gr.Blocks(title="🖼️ Image Captioning") as demo:
165
- gr.Markdown("# 🖼️ Image Captioning Demo")
166
- image_input = gr.Image(type="filepath", label="Upload Image")
167
- result_box = gr.Textbox(label="Caption")
168
- btn = gr.Button("Generate Caption")
169
- btn.click(fn=process_image, inputs=[image_input], outputs=[result_box])
170
-
171
- app = gr.mount_gradio_app(app, demo, path="/")
172
-
173
- # ✅ Optional root redirect to frontend
174
- @app.get("/")
175
- def redirect_to_frontend():
176
- return RedirectResponse(url="/templates/home.html")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  @app.post("/imagecaption/")
2
+ async def caption_from_frontend(file: UploadFile):
3
  try:
4
  # Save temp image
5
  contents = await file.read()
 
9
 
10
  caption = generate_caption(tmp_path)
11
 
12
+ # Generate audio
 
13
  audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3")
14
  tts = gTTS(text=caption)
15
  tts.save(audio_path)
16
 
17
+ return JSONResponse({
18
  "answer": caption,
19
  "audio": f"/files/{os.path.basename(audio_path)}"
20
+ })
21
 
22
  except Exception as e:
23
+ return JSONResponse({"error": str(e)}, status_code=500)