mknolan commited on
Commit
ce32a95
·
verified ·
1 Parent(s): da1f9eb

Upload InternVL2 implementation

Browse files
Files changed (1) hide show
  1. app_internvl2.py +118 -216
app_internvl2.py CHANGED
@@ -8,11 +8,6 @@ import warnings
8
  import stat
9
  import subprocess
10
  import sys
11
- import asyncio
12
- import nest_asyncio
13
-
14
- # Apply nest_asyncio to allow nested event loops
15
- nest_asyncio.apply()
16
 
17
  # Set environment variables
18
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
@@ -102,8 +97,7 @@ def check_gpu_availability():
102
  return False
103
 
104
  # Global variables
105
- internvl2_pipeline = None
106
- MODEL_LOADED = False
107
  USE_GPU = check_gpu_availability()
108
 
109
  if USE_GPU:
@@ -111,209 +105,119 @@ if USE_GPU:
111
  else:
112
  print("WARNING: GPU is not available or not working properly. This application requires GPU acceleration.")
113
 
114
- # Check if lmdeploy is available and try to import
 
 
 
 
 
 
 
 
 
 
 
115
  try:
116
  from lmdeploy import pipeline, TurbomindEngineConfig
117
- LMDEPLOY_AVAILABLE = True
118
  print("Successfully imported lmdeploy")
119
  except ImportError as e:
120
- LMDEPLOY_AVAILABLE = False
121
- print(f"lmdeploy import failed: {str(e)}. Will use a placeholder for demos.")
122
 
123
- # Model configuration
124
- MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" # 4-bit quantized model
125
-
126
- def load_internvl2_model():
127
- """Load the InternVL2 model using lmdeploy"""
128
- global internvl2_pipeline, MODEL_LOADED
129
-
130
- # If already loaded, return
131
- if internvl2_pipeline is not None:
132
- return True
133
 
134
- # If lmdeploy is not available, we'll use a demo placeholder
135
- if not LMDEPLOY_AVAILABLE:
136
- print("lmdeploy not available. Using demo placeholder.")
137
- MODEL_LOADED = False
138
- return False
139
-
140
- # Check if GPU is available
141
  if not USE_GPU:
142
- print("Cannot load InternVL2 model without GPU acceleration.")
143
- MODEL_LOADED = False
144
- return False
145
-
146
- print("Loading InternVL2 model...")
147
- try:
148
- # Force synchronous execution for everything
149
- import os
150
- # Set environment variables to force synchronous behavior
151
- os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
152
- # Disable asyncio in lmdeploy
153
- os.environ["LMDEPLOY_DISABLE_ASYNC"] = "1"
154
-
155
- # Configure for AWQ quantized model
156
- backend_config = TurbomindEngineConfig(
157
- model_format='awq',
158
- session_len=2048 # Explicitly set session length
159
- )
160
-
161
- # Create a synchronous pipeline to avoid asyncio issues
162
- # Explicitly set all parameters that might default to async behavior
163
- internvl2_pipeline = pipeline(
164
- MODEL_ID,
165
- backend_config=backend_config,
166
- log_level='INFO',
167
- model_name_or_path=None,
168
- backend_name="turbomind",
169
- stream=False, # Important: disable streaming
170
- tensor_parallel=1, # Use single GPU to avoid distributed processing
171
- )
172
-
173
- print("InternVL2 model loaded successfully!")
174
- MODEL_LOADED = True
175
- return True
176
- except Exception as e:
177
- print(f"Error loading InternVL2 model: {str(e)}")
178
- if "CUDA out of memory" in str(e):
179
- print("Not enough GPU memory for the model")
180
- elif "Found no NVIDIA driver" in str(e):
181
- print("NVIDIA GPU driver not found or not properly configured")
182
- MODEL_LOADED = False
183
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  def analyze_image(image, prompt):
186
- """Analyze the image using InternVL2 model"""
 
 
 
 
 
 
187
  try:
188
- start_time = time.time()
189
-
190
- # Skip model loading if lmdeploy is not available
191
- if not LMDEPLOY_AVAILABLE:
192
- return ("This is a demo placeholder. The actual model couldn't be loaded because lmdeploy "
193
- "is not properly installed. Check your installation and dependencies.")
194
-
195
- # Check for GPU
196
- if not USE_GPU:
197
- return ("ERROR: This application requires a GPU to run InternVL2. "
198
- "The NVIDIA driver was not detected on this system. "
199
- "Please make sure this Space is using a GPU-enabled instance and that the GPU is correctly initialized.")
200
-
201
- # Make sure the model is loaded
202
- if not load_internvl2_model():
203
- return "Couldn't load InternVL2 model. See logs for details."
204
-
205
- # Convert numpy array to PIL Image
206
  if isinstance(image, np.ndarray):
207
- image_pil = Image.fromarray(image).convert('RGB')
208
  else:
209
- # If somehow it's already a PIL Image
210
- image_pil = image.convert('RGB')
211
-
212
- # We'll use a completely different approach - multiprocessing
213
- # This runs the model in a separate process, avoiding any event loop conflicts
214
- import multiprocessing as mp
215
-
216
- # Define a function to run in a separate process
217
- def run_in_process(prompt, image_path, result_queue):
218
  try:
219
- # Set environment variables in the subprocess
220
- import os
221
- os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
222
- os.environ["LMDEPLOY_DISABLE_ASYNC"] = "1"
223
-
224
- # Import libraries inside the process
225
- from lmdeploy import pipeline, TurbomindEngineConfig
226
-
227
- # Save the image to a temporary file to pass between processes
228
- import tempfile
229
- import torch
230
-
231
- # Check GPU in subprocess
232
- print(f"Subprocess GPU available: {torch.cuda.is_available()}")
233
-
234
- # Configure for AWQ quantized model
235
- backend_config = TurbomindEngineConfig(
236
- model_format='awq',
237
- session_len=2048
238
- )
239
-
240
- # Create new pipeline in the subprocess
241
- model_pipeline = pipeline(
242
- MODEL_ID,
243
- backend_config=backend_config,
244
- log_level='INFO',
245
- model_name_or_path=None,
246
- backend_name="turbomind",
247
- stream=False,
248
- tensor_parallel=1,
249
- )
250
-
251
- # Load the image in the subprocess
252
- from PIL import Image
253
- image = Image.open(image_path).convert('RGB')
254
-
255
- # Run inference
256
- response = model_pipeline((prompt, image))
257
  result = response.text if hasattr(response, "text") else str(response)
 
 
 
 
258
 
259
- # Put the result in the queue
260
- result_queue.put(("success", result))
261
-
 
 
 
 
 
 
262
  except Exception as e:
263
- import traceback
264
- error_msg = f"Error in subprocess: {str(e)}\n{traceback.format_exc()}"
265
- result_queue.put(("error", error_msg))
266
 
267
- # Create a temporary file for the image
268
- import tempfile
269
- with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
270
- temp_path = temp_file.name
271
- image_pil.save(temp_path)
272
 
273
- try:
274
- # Create a process-safe queue
275
- result_queue = mp.Queue()
276
-
277
- # Start the process
278
- print("Starting model inference in a separate process")
279
- process = mp.Process(
280
- target=run_in_process,
281
- args=(prompt, temp_path, result_queue)
282
- )
283
-
284
- # Make it a daemon so it terminates when the main process ends
285
- process.daemon = True
286
- process.start()
287
-
288
- # Wait for the process to complete (with timeout)
289
- process.join(timeout=180) # 3 minute timeout
290
-
291
- # Delete the temporary file
292
- try:
293
- os.unlink(temp_path)
294
- except:
295
- pass
296
-
297
- if process.is_alive():
298
- # Terminate the process if it's still running after timeout
299
- process.terminate()
300
- return "Model inference timed out after 180 seconds. The model might be too slow on this hardware."
301
-
302
- # Get the result from the queue (non-blocking to avoid hanging)
303
- if not result_queue.empty():
304
- status, result = result_queue.get(block=False)
305
- if status == "error":
306
- return f"Error in model inference: {result}"
307
- else:
308
- elapsed_time = time.time() - start_time
309
- return result
310
- else:
311
- return "Unknown error: Model inference process completed but did not produce a result"
312
-
313
- except Exception as e:
314
- print(f"Error in multiprocessing: {str(e)}")
315
- return f"Error setting up multiprocessing: {str(e)}"
316
-
317
  except Exception as e:
318
  print(f"Error in image analysis: {str(e)}")
319
  # Try to clean up memory in case of error
@@ -351,11 +255,13 @@ def process_image(image, analysis_type="general"):
351
  # Define the Gradio interface
352
  def create_interface():
353
  with gr.Blocks(title="Image Analysis with InternVL2") as demo:
354
- gr.Markdown("# Image Analysis with InternVL2-40B")
355
 
356
  # System diagnostics
357
  system_info = f"""
358
  ## System Diagnostics:
 
 
359
  - PyTorch Version: {torch.__version__}
360
  - CUDA Available: {torch.cuda.is_available()}
361
  - GPU Working: {USE_GPU}
@@ -363,14 +269,14 @@ def create_interface():
363
  """
364
 
365
  gr.Markdown(system_info)
366
- gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
367
 
368
  # Show warnings based on system status
369
- if not LMDEPLOY_AVAILABLE:
370
- gr.Markdown("⚠️ **WARNING**: lmdeploy is not properly installed. This demo will not function correctly.", elem_classes=["warning-message"])
371
 
372
  if not USE_GPU:
373
- gr.Markdown("🚫 **ERROR**: NVIDIA GPU not detected. This application requires GPU acceleration to run InternVL2 model.", elem_classes=["error-message"])
374
 
375
  with gr.Row():
376
  with gr.Column(scale=1):
@@ -382,22 +288,34 @@ def create_interface():
382
  )
383
  submit_btn = gr.Button("Analyze Image")
384
 
385
- # Disable button if GPU is not available
386
- if not USE_GPU:
387
  submit_btn.interactive = False
388
 
389
  with gr.Column(scale=2):
390
  output_text = gr.Textbox(label="Analysis Result", lines=20)
391
  if not USE_GPU:
392
- output_text.value = f"""ERROR: NVIDIA GPU driver not detected. This application requires GPU acceleration to run the InternVL2 model.
393
 
394
  Diagnostics:
 
395
  - PyTorch Version: {torch.__version__}
396
  - CUDA Available via PyTorch: {torch.cuda.is_available()}
397
  - nvidia-smi Available: {nvidia_smi_available}
398
  - GPU Working: {USE_GPU}
399
 
400
  Please ensure this Space is using a GPU-enabled instance and that the GPU is correctly initialized."""
 
 
 
 
 
 
 
 
 
 
 
401
 
402
  submit_btn.click(
403
  fn=process_image,
@@ -424,22 +342,6 @@ Please ensure this Space is using a GPU-enabled instance and that the GPU is cor
424
 
425
  If you're running this on Hugging Face Spaces, make sure to select a GPU-enabled hardware type.
426
  """)
427
-
428
- # Examples
429
- try:
430
- gr.Examples(
431
- examples=[
432
- ["data_temp/page_2.png", "general"],
433
- ["data_temp/page_2.png", "text"],
434
- ["data_temp/page_2.png", "chart"]
435
- ],
436
- inputs=[input_image, analysis_type],
437
- outputs=output_text,
438
- fn=process_image,
439
- cache_examples=True
440
- )
441
- except Exception as e:
442
- print(f"Warning: Could not load examples: {str(e)}")
443
 
444
  return demo
445
 
@@ -448,5 +350,5 @@ if __name__ == "__main__":
448
  # Create the Gradio interface
449
  demo = create_interface()
450
 
451
- # Launch the interface (removed incompatible parameters)
452
  demo.launch(share=False, server_name="0.0.0.0")
 
8
  import stat
9
  import subprocess
10
  import sys
 
 
 
 
 
11
 
12
  # Set environment variables
13
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
97
  return False
98
 
99
  # Global variables
100
+ internvl2_model = None
 
101
  USE_GPU = check_gpu_availability()
102
 
103
  if USE_GPU:
 
105
  else:
106
  print("WARNING: GPU is not available or not working properly. This application requires GPU acceleration.")
107
 
108
+ # ALTERNATIVE MODEL: Let's try a simpler vision model as backup
109
+ try:
110
+ from transformers import BlipProcessor, BlipForConditionalGeneration
111
+ HAS_BLIP = True
112
+ blip_processor = None
113
+ blip_model = None
114
+ print("Successfully imported BLIP model")
115
+ except ImportError:
116
+ HAS_BLIP = False
117
+ print("BLIP model not available, will try InternVL2")
118
+
119
+ # Try importing lmdeploy for InternVL2
120
  try:
121
  from lmdeploy import pipeline, TurbomindEngineConfig
122
+ HAS_LMDEPLOY = True
123
  print("Successfully imported lmdeploy")
124
  except ImportError as e:
125
+ HAS_LMDEPLOY = False
126
+ print(f"lmdeploy import failed: {str(e)}. Will try backup model.")
127
 
128
+ # Try to load the appropriate model
129
+ def load_model():
130
+ global internvl2_model, blip_processor, blip_model
 
 
 
 
 
 
 
131
 
 
 
 
 
 
 
 
132
  if not USE_GPU:
133
+ print("Cannot load models without GPU acceleration.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  return False
135
+
136
+ # First try to load InternVL2 if lmdeploy is available
137
+ if HAS_LMDEPLOY:
138
+ try:
139
+ print("Attempting to load InternVL2 model...")
140
+ # Configure for AWQ quantized model
141
+ backend_config = TurbomindEngineConfig(
142
+ model_format='awq',
143
+ session_len=2048 # Explicitly set session length
144
+ )
145
+
146
+ # Set to non-streaming mode
147
+ internvl2_model = pipeline(
148
+ "OpenGVLab/InternVL2-40B-AWQ",
149
+ backend_config=backend_config,
150
+ model_name_or_path=None,
151
+ backend_name="turbomind",
152
+ stream=False, # Disable streaming
153
+ )
154
+
155
+ print("InternVL2 model loaded successfully!")
156
+ return True
157
+ except Exception as e:
158
+ print(f"Failed to load InternVL2: {str(e)}")
159
+ internvl2_model = None
160
+
161
+ # If InternVL2 failed or lmdeploy not available, try BLIP
162
+ if HAS_BLIP:
163
+ try:
164
+ print("Falling back to BLIP model...")
165
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
166
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
167
+ print("BLIP model loaded successfully!")
168
+ return True
169
+ except Exception as e:
170
+ print(f"Failed to load BLIP: {str(e)}")
171
+ blip_processor = None
172
+ blip_model = None
173
+
174
+ print("Could not load any model")
175
+ return False
176
+
177
+ # Try to load a model at startup
178
+ MODEL_LOADED = load_model()
179
+ WHICH_MODEL = "InternVL2" if internvl2_model is not None else "BLIP" if blip_model is not None else "None"
180
 
181
  def analyze_image(image, prompt):
182
+ """Analyze the image using available model"""
183
+ if not MODEL_LOADED:
184
+ return "No model could be loaded. Please check the logs for details."
185
+
186
+ if not USE_GPU:
187
+ return "ERROR: This application requires GPU acceleration. No GPU detected."
188
+
189
  try:
190
+ # Convert image to right format if needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  if isinstance(image, np.ndarray):
192
+ pil_image = Image.fromarray(image).convert('RGB')
193
  else:
194
+ pil_image = image.convert('RGB')
195
+
196
+ # If we have InternVL2 loaded, use it
197
+ if internvl2_model is not None:
 
 
 
 
 
198
  try:
199
+ print("Running inference with InternVL2...")
200
+ response = internvl2_model((prompt, pil_image))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  result = response.text if hasattr(response, "text") else str(response)
202
+ return f"[InternVL2] {result}"
203
+ except Exception as e:
204
+ print(f"Error with InternVL2: {str(e)}")
205
+ # If InternVL2 fails, fall back to BLIP if available
206
 
207
+ # If we have BLIP loaded, use it
208
+ if blip_model is not None and blip_processor is not None:
209
+ try:
210
+ print("Running inference with BLIP...")
211
+ # BLIP doesn't use prompts the same way, simplify
212
+ inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
213
+ out = blip_model.generate(**inputs, max_new_tokens=100)
214
+ result = blip_processor.decode(out[0], skip_special_tokens=True)
215
+ return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
216
  except Exception as e:
217
+ print(f"Error with BLIP: {str(e)}")
 
 
218
 
219
+ return "No model was able to analyze the image. See logs for details."
 
 
 
 
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  except Exception as e:
222
  print(f"Error in image analysis: {str(e)}")
223
  # Try to clean up memory in case of error
 
255
  # Define the Gradio interface
256
  def create_interface():
257
  with gr.Blocks(title="Image Analysis with InternVL2") as demo:
258
+ gr.Markdown(f"# Image Analysis with {WHICH_MODEL}")
259
 
260
  # System diagnostics
261
  system_info = f"""
262
  ## System Diagnostics:
263
+ - Model Used: {WHICH_MODEL}
264
+ - Model Loaded: {MODEL_LOADED}
265
  - PyTorch Version: {torch.__version__}
266
  - CUDA Available: {torch.cuda.is_available()}
267
  - GPU Working: {USE_GPU}
 
269
  """
270
 
271
  gr.Markdown(system_info)
272
+ gr.Markdown(f"Upload an image to analyze it using the {WHICH_MODEL} model.")
273
 
274
  # Show warnings based on system status
275
+ if not MODEL_LOADED:
276
+ gr.Markdown("⚠️ **WARNING**: No model could be loaded. This demo will not function correctly.", elem_classes=["warning-message"])
277
 
278
  if not USE_GPU:
279
+ gr.Markdown("🚫 **ERROR**: NVIDIA GPU not detected. This application requires GPU acceleration.", elem_classes=["error-message"])
280
 
281
  with gr.Row():
282
  with gr.Column(scale=1):
 
288
  )
289
  submit_btn = gr.Button("Analyze Image")
290
 
291
+ # Disable button if GPU is not available or no model loaded
292
+ if not USE_GPU or not MODEL_LOADED:
293
  submit_btn.interactive = False
294
 
295
  with gr.Column(scale=2):
296
  output_text = gr.Textbox(label="Analysis Result", lines=20)
297
  if not USE_GPU:
298
+ output_text.value = f"""ERROR: NVIDIA GPU driver not detected. This application requires GPU acceleration.
299
 
300
  Diagnostics:
301
+ - Model Used: {WHICH_MODEL}
302
  - PyTorch Version: {torch.__version__}
303
  - CUDA Available via PyTorch: {torch.cuda.is_available()}
304
  - nvidia-smi Available: {nvidia_smi_available}
305
  - GPU Working: {USE_GPU}
306
 
307
  Please ensure this Space is using a GPU-enabled instance and that the GPU is correctly initialized."""
308
+ elif not MODEL_LOADED:
309
+ output_text.value = f"""ERROR: No model could be loaded.
310
+
311
+ Diagnostics:
312
+ - Model Used: {WHICH_MODEL}
313
+ - PyTorch Version: {torch.__version__}
314
+ - CUDA Available via PyTorch: {torch.cuda.is_available()}
315
+ - nvidia-smi Available: {nvidia_smi_available}
316
+ - GPU Working: {USE_GPU}
317
+
318
+ Please check the logs for more details."""
319
 
320
  submit_btn.click(
321
  fn=process_image,
 
342
 
343
  If you're running this on Hugging Face Spaces, make sure to select a GPU-enabled hardware type.
344
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  return demo
347
 
 
350
  # Create the Gradio interface
351
  demo = create_interface()
352
 
353
+ # Launch the interface
354
  demo.launch(share=False, server_name="0.0.0.0")