ikraamkb commited on
Commit
26435ba
·
verified ·
1 Parent(s): 9af5fdb

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +18 -142
appImage.py CHANGED
@@ -1,22 +1,16 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
3
- import easyocr
4
- from fastapi import FastAPI
5
- from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
6
- import tempfile
7
- import os
8
- from gtts import gTTS
9
- from fpdf import FPDF
10
- import datetime
11
  from PIL import Image
12
  import torch
 
 
13
 
14
- # Initialize components
15
  app = FastAPI()
16
 
17
  # Load models - Using microsoft/git-large-coco
18
  try:
19
- # Try loading the better model first
20
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
21
  git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
22
  print("Successfully loaded microsoft/git-large-coco model")
@@ -26,9 +20,6 @@ except Exception as e:
26
  captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
27
  USE_GIT = False
28
 
29
- # Initialize EasyOCR
30
- reader = easyocr.Reader(['en', 'fr']) # English and French OCR
31
-
32
  def generate_caption(image_path):
33
  """Generate caption using the best available model"""
34
  try:
@@ -44,152 +35,37 @@ def generate_caption(image_path):
44
  print(f"Caption generation error: {e}")
45
  return "Could not generate caption"
46
 
47
- def analyze_image(image_path):
48
- """Process image with both captioning and OCR"""
49
- try:
50
- # Generate image caption
51
- caption = generate_caption(image_path)
52
-
53
- # Extract text with EasyOCR
54
- ocr_result = reader.readtext(image_path, detail=0)
55
- extracted_text = "\n".join(ocr_result) if ocr_result else "No text detected"
56
-
57
- return {
58
- "caption": caption,
59
- "extracted_text": extracted_text
60
- }
61
- except Exception as e:
62
- return {"error": str(e)}
63
-
64
- def text_to_speech(text: str) -> str:
65
- """Convert text to speech"""
66
- try:
67
- tts = gTTS(text)
68
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
69
- tts.save(temp_audio.name)
70
- return temp_audio.name
71
- except Exception as e:
72
- print(f"Text-to-speech error: {e}")
73
- return ""
74
-
75
- def create_pdf(content: dict, original_filename: str) -> str:
76
- """Create PDF report"""
77
- try:
78
- pdf = FPDF()
79
- pdf.add_page()
80
- pdf.set_font("Arial", size=12)
81
-
82
- # Title
83
- pdf.set_font("Arial", 'B', 16)
84
- pdf.cell(200, 10, txt="Image Analysis Report", ln=1, align='C')
85
- pdf.set_font("Arial", size=12)
86
-
87
- # Metadata
88
- pdf.cell(200, 10, txt=f"Original file: {original_filename}", ln=1)
89
- pdf.cell(200, 10, txt=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=1)
90
- pdf.ln(10)
91
-
92
- # Caption
93
- pdf.set_font("", 'B')
94
- pdf.cell(200, 10, txt="Image Caption:", ln=1)
95
- pdf.set_font("")
96
- pdf.multi_cell(0, 10, txt=content['caption'])
97
- pdf.ln(5)
98
-
99
- # Extracted Text
100
- pdf.set_font("", 'B')
101
- pdf.cell(200, 10, txt="Extracted Text:", ln=1)
102
- pdf.set_font("")
103
- pdf.multi_cell(0, 10, txt=content['extracted_text'])
104
-
105
- temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
106
- pdf.output(temp_pdf.name)
107
- return temp_pdf.name
108
- except Exception as e:
109
- print(f"PDF creation error: {e}")
110
- return ""
111
-
112
- def process_image(file_path: str, enable_tts: bool):
113
  """Handle image processing for Gradio interface"""
114
  if not file_path:
115
- return "Please upload an image first", "Ready", None, None
116
 
117
  try:
118
- original_filename = os.path.basename(file_path)
119
-
120
- # Analyze image
121
- result = analyze_image(file_path)
122
- if "error" in result:
123
- return result["error"], "Error", None, None
124
-
125
- # Format output
126
- output_text = f"📷 Image Caption:\n{result['caption']}\n\n✍️ Extracted Text:\n{result['extracted_text']}"
127
-
128
- # Generate audio
129
- audio_path = text_to_speech(f"Image caption: {result['caption']}. Extracted text: {result['extracted_text']}") if enable_tts else None
130
-
131
- # Generate PDF
132
- pdf_path = create_pdf(result, original_filename)
133
-
134
- return output_text, "Analysis complete", audio_path, pdf_path
135
  except Exception as e:
136
- return f"Analysis error: {str(e)}", "Error", None, None
137
 
138
  # Gradio Interface
139
- with gr.Blocks(title="Image Analysis Service", theme=gr.themes.Soft()) as demo:
140
- gr.Markdown("# 🖼️ Image Analysis Service")
141
- gr.Markdown("Upload an image to get automatic captioning and text extraction")
142
 
143
  with gr.Row():
144
  with gr.Column():
145
  image_input = gr.Image(label="Upload Image", type="filepath")
146
- tts_checkbox = gr.Checkbox(
147
- label="Enable Text-to-Speech",
148
- value=False
149
- )
150
- analyze_btn = gr.Button("Analyze Image", variant="primary")
151
 
152
  with gr.Column():
153
- output = gr.Textbox(label="Analysis Results", lines=10)
154
- status = gr.Textbox(label="Status", interactive=False)
155
- audio_output = gr.Audio(label="Audio Summary", visible=False)
156
- pdf_download = gr.File(label="Download Report", visible=False)
157
-
158
- def toggle_audio_visibility(enable_tts):
159
- return gr.Audio(visible=enable_tts)
160
-
161
- def update_ui(result, status, audio_path, pdf_path):
162
- return (
163
- result,
164
- status,
165
- gr.Audio(visible=audio_path is not None, value=audio_path),
166
- gr.File(visible=pdf_path is not None, value=pdf_path)
167
- )
168
-
169
- tts_checkbox.change(
170
- fn=toggle_audio_visibility,
171
- inputs=tts_checkbox,
172
- outputs=audio_output
173
- )
174
 
175
  analyze_btn.click(
176
  fn=process_image,
177
- inputs=[image_input, tts_checkbox],
178
- outputs=[output, status, audio_output, pdf_download]
179
- ).then(
180
- fn=update_ui,
181
- inputs=[output, status, audio_output, pdf_download],
182
- outputs=[output, status, audio_output, pdf_download]
183
  )
184
 
185
- # FastAPI setup
186
- @app.get("/files/{file_name}")
187
- async def get_file(file_name: str):
188
- file_path = os.path.join(tempfile.gettempdir(), file_name)
189
- if os.path.exists(file_path):
190
- return FileResponse(file_path)
191
- return JSONResponse({"error": "File not found"}, status_code=404)
192
-
193
  app = gr.mount_gradio_app(app, demo, path="/")
194
 
195
  @app.get("/")
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
 
 
 
 
 
 
 
 
3
  from PIL import Image
4
  import torch
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import RedirectResponse
7
 
8
+ # Initialize FastAPI
9
  app = FastAPI()
10
 
11
  # Load models - Using microsoft/git-large-coco
12
  try:
13
+ # Load the better model
14
  processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
15
  git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
16
  print("Successfully loaded microsoft/git-large-coco model")
 
20
  captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
21
  USE_GIT = False
22
 
 
 
 
23
  def generate_caption(image_path):
24
  """Generate caption using the best available model"""
25
  try:
 
35
  print(f"Caption generation error: {e}")
36
  return "Could not generate caption"
37
 
38
+ def process_image(file_path: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  """Handle image processing for Gradio interface"""
40
  if not file_path:
41
+ return "Please upload an image first"
42
 
43
  try:
44
+ caption = generate_caption(file_path)
45
+ return f"📷 Image Caption:\n{caption}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
+ return f"Error processing image: {str(e)}"
48
 
49
  # Gradio Interface
50
+ with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
51
+ gr.Markdown("# 🖼️ Image Captioning Service")
52
+ gr.Markdown("Upload an image to get automatic captioning")
53
 
54
  with gr.Row():
55
  with gr.Column():
56
  image_input = gr.Image(label="Upload Image", type="filepath")
57
+ analyze_btn = gr.Button("Generate Caption", variant="primary")
 
 
 
 
58
 
59
  with gr.Column():
60
+ output = gr.Textbox(label="Caption Result", lines=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  analyze_btn.click(
63
  fn=process_image,
64
+ inputs=[image_input],
65
+ outputs=[output]
 
 
 
 
66
  )
67
 
68
+ # Mount Gradio app to FastAPI
 
 
 
 
 
 
 
69
  app = gr.mount_gradio_app(app, demo, path="/")
70
 
71
  @app.get("/")