mitch commited on
Commit
4ab8943
·
unverified ·
1 Parent(s): 6e569c4

Updated app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -301
app.py CHANGED
@@ -1,41 +1,48 @@
1
  import gradio as gr
 
2
  from llama_cpp import Llama
3
  from qdrant_client import QdrantClient
4
  from datasets import load_dataset
5
  from sentence_transformers import SentenceTransformer
6
- import cv2
7
- import os
8
  import tempfile
9
  import uuid
10
  import re
11
  import subprocess
12
- import time
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  print("Initializing LLM...")
15
- # Ensure the model file exists or download will be attempted
16
  try:
17
  llm = Llama.from_pretrained(
18
  repo_id="m1tch/gemma-finetune-ai_class_gguf",
19
  filename="gemma-3_ai_class.Q8_0.gguf",
20
- n_gpu_layers=-1, # Use -1 to offload all possible layers to GPU
21
  n_ctx=2048,
22
- verbose=False # Set to True for more detailed llama.cpp output
23
  )
24
  print("LLM initialized successfully.")
25
  except Exception as e:
26
  print(f"Error initializing LLM: {e}")
27
- # Optionally raise the exception or handle it gracefully
28
  raise
29
 
30
  print("Connecting to Qdrant...")
31
  try:
32
  qdrant_client = QdrantClient(
33
  url="https://2c18d413-cbb5-441c-b060-4c8c2302dcde.us-east4-0.gcp.cloud.qdrant.io:6333/",
34
- # It's generally safer to load API keys from environment variables or a config file
35
- api_key=os.environ.get("QDRANT_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.b86GHyWqFDw63UkrR98LlY2GU4XdVyOAlv_qpm9KKTw"),
36
- timeout=60 # Increase timeout if experiencing connection issues
37
  )
38
- # Test connection
39
  qdrant_client.get_collections()
40
  print("Qdrant connection successful.")
41
  except Exception as e:
@@ -44,11 +51,9 @@ except Exception as e:
44
 
45
  print("Loading dataset stream...")
46
  try:
47
- # Load video dataset - ensure you have internet access
48
- # streaming=True avoids downloading the entire dataset at once
49
  dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
50
- # Peek at the first item to ensure the stream works
51
- print(f"Dataset loaded. First item example: {next(iter(dataset))['__key__']}")
52
  except Exception as e:
53
  print(f"Error loading dataset: {e}")
54
  raise
@@ -60,13 +65,13 @@ except Exception as e:
60
  print(f"Error loading Sentence Transformer model: {e}")
61
  raise
62
 
 
63
  def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
64
  """
65
  Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
66
  Uses the pre-loaded embedding_model.
67
  """
68
  try:
69
- # Use the pre-loaded model
70
  query_vector = embedding_model.encode(query_text).tolist()
71
 
72
  search_params = {
@@ -100,150 +105,82 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
100
  }
101
  except Exception as e:
102
  print(f"Error during RAG query: {e}")
103
- # Return a structure indicating error, but don't crash the app
104
  return {"error": str(e), "query": query_text, "results": []}
105
 
106
 
107
  def extract_video_segment(video_id, start_time, duration, dataset):
108
  """
109
- Generator function that extracts and yields a single video segment file path.
110
- Modified to return a single path suitable for Gradio.
111
  """
112
- target_id = str(video_id) # Ensure it's a string
113
- target_key = f"videos/{target_id}/{target_id}"
114
- start_time = float(start_time) # Ensure it's a float
 
115
  duration = float(duration)
116
 
117
  unique_id = str(uuid.uuid4())
118
- temp_dir = os.path.join(tempfile.gettempdir(), f"gradio_video_{unique_id}")
119
  os.makedirs(temp_dir, exist_ok=True)
120
- temp_video_path = os.path.join(temp_dir, f"{target_id}_full_{unique_id}.mp4")
121
- output_path_opencv = os.path.join(temp_dir, f"output_opencv_{unique_id}.mp4")
122
  output_path_ffmpeg = os.path.join(temp_dir, f"output_ffmpeg_{unique_id}.mp4")
123
 
124
- print(f"Attempting to extract segment for video_id={target_id}, start={start_time}, duration={duration}")
125
- print(f"Looking for dataset key: {target_key}")
126
  print(f"Temporary directory: {temp_dir}")
127
 
 
 
 
128
 
129
- try:
130
- # --- Find and save the full video ---
131
- found = False
132
- retries = 3 # Retry finding the video in the stream
133
- dataset_iterator = iter(dataset) # Get an iterator
134
-
135
- for _ in range(retries * 5000): # Limit search iterations to avoid infinite loops in case of issues
136
- try:
137
- sample = next(dataset_iterator)
138
- if '__key__' in sample and sample['__key__'] == target_key:
139
- found = True
140
- print(f"Found video key {target_key}. Saving to {temp_video_path}...")
141
- with open(temp_video_path, 'wb') as f:
142
- f.write(sample['mp4'])
143
- print(f"Video saved successfully ({os.path.getsize(temp_video_path)} bytes).")
144
- break
145
- except StopIteration:
146
- print("Reached end of dataset stream without finding the video.")
147
- break
148
- except Exception as e:
149
- print(f"Error iterating dataset: {e}")
150
- time.sleep(1) # Wait a bit before retrying iteration
151
-
152
-
153
- if not found:
154
- print(f"Could not find video with ID {target_id} (key: {target_key}) in the dataset stream after {_ + 1} attempts.")
155
- # Attempt to reset the stream IF the dataset library supports it easily (often not simple with streaming)
156
- # For now, we just report failure for this request.
157
- # yield None # Don't yield here, let the outer function handle no video path
158
- return None # Return None instead of yielding
159
-
160
- # --- Process the saved video ---
161
- if not os.path.exists(temp_video_path) or os.path.getsize(temp_video_path) == 0:
162
- print(f"Temporary video file {temp_video_path} is missing or empty.")
163
- return None
164
-
165
- cap = cv2.VideoCapture(temp_video_path)
166
- if not cap.isOpened():
167
- print(f"Error opening video file with OpenCV: {temp_video_path}")
168
- return None
169
-
170
- fps = cap.get(cv2.CAP_PROP_FPS)
171
- # Handle cases where FPS might be 0 or invalid
172
- if fps <= 0:
173
- print(f"Warning: Invalid FPS ({fps}) detected for {temp_video_path}. Assuming 30 FPS.")
174
- fps = 30 # Assume a default FPS
175
-
176
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
177
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
178
- total_vid_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
179
- vid_duration = total_vid_frames / fps if fps > 0 else 0
180
 
181
- print(f"Video properties: {width}x{height} @ {fps:.2f}fps, Total Duration: {vid_duration:.2f}s")
182
-
183
- start_frame = int(start_time * fps)
184
- end_frame = int((start_time + duration) * fps)
185
-
186
- # Clamp frame numbers to valid range
187
- start_frame = max(0, start_frame)
188
- end_frame = min(total_vid_frames, end_frame)
189
-
190
- if start_frame >= total_vid_frames or start_frame >= end_frame:
191
- print(f"Calculated start frame ({start_frame}) is beyond video length ({total_vid_frames}) or segment is invalid.")
192
- cap.release()
193
- return None
194
-
195
- cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
196
- frames_to_write = end_frame - start_frame
197
-
198
- print(f"Extracting frames from {start_frame} to {end_frame} ({frames_to_write} frames)")
199
-
200
- # --- Try OpenCV writing first (fallback) ---
201
- fourcc_opencv = cv2.VideoWriter_fourcc(*'mp4v') # mp4v is often more compatible than avc1 with base OpenCV
202
- out_opencv = cv2.VideoWriter(output_path_opencv, fourcc_opencv, fps, (width, height))
203
-
204
- if not out_opencv.isOpened():
205
- print("Error opening OpenCV VideoWriter with mp4v.")
206
- cap.release()
207
- return None
208
-
209
- frames_written_opencv = 0
210
- while frames_written_opencv < frames_to_write:
211
- ret, frame = cap.read()
212
- if not ret:
213
- print("Warning: Ran out of frames before reaching target end frame.")
214
  break
215
- out_opencv.write(frame)
216
- frames_written_opencv += 1
217
 
218
- out_opencv.release()
219
- print(f"OpenCV finished writing {frames_written_opencv} frames to {output_path_opencv}")
220
-
221
- # --- Release OpenCV capture ---
222
- cap.release() # Release the capture object before trying ffmpeg
223
 
224
- # --- Try converting/extracting with FFmpeg (preferred for compatibility) ---
225
  final_output_path = None
226
  try:
227
- # Use ffmpeg to directly cut the segment and ensure web-compatible encoding
228
- # This is generally more reliable than OpenCV for specific timings and codecs
229
  cmd = [
230
  'ffmpeg',
231
- '-ss', str(start_time), # Start time
232
- '-i', temp_video_path, # Input file (original downloaded)
233
- '-t', str(duration), # Duration of the segment
234
- '-c:v', 'libx264', # Video codec H.264
235
- '-profile:v', 'baseline', # Baseline profile for broad compatibility
236
- '-level', '3.0', # Level 3.0
237
- '-preset', 'fast', # Encoding speed/quality trade-off
238
- '-pix_fmt', 'yuv420p', # Pixel format for compatibility
239
- '-movflags', '+faststart', # Optimize for web streaming
240
- '-c:a', 'aac', # Audio codec AAC (common)
241
- '-b:a', '128k', # Audio bitrate
242
- '-y', # Overwrite output file if exists
 
 
243
  output_path_ffmpeg
244
  ]
245
  print(f"Running FFmpeg command: {' '.join(cmd)}")
246
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) # Add timeout
247
 
248
  if result.returncode == 0 and os.path.exists(output_path_ffmpeg) and os.path.getsize(output_path_ffmpeg) > 0:
249
  print(f"FFmpeg processing successful. Output: {output_path_ffmpeg}")
@@ -252,174 +189,125 @@ def extract_video_segment(video_id, start_time, duration, dataset):
252
  print(f"FFmpeg error (Return Code: {result.returncode}):")
253
  print(f"FFmpeg stdout:\n{result.stdout}")
254
  print(f"FFmpeg stderr:\n{result.stderr}")
255
- print("Falling back to OpenCV output.")
256
- # Check if OpenCV output is valid before using it
257
- if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
258
- final_output_path = output_path_opencv
259
- else:
260
- print("OpenCV output is also invalid or empty.")
261
- final_output_path = None # Neither worked
262
 
263
  except subprocess.TimeoutExpired:
264
- print("FFmpeg command timed out.")
265
- print("Falling back to OpenCV output.")
266
- if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
267
- final_output_path = output_path_opencv
268
- else:
269
- print("OpenCV output is also invalid or empty.")
270
- final_output_path = None
271
  except FileNotFoundError:
272
- print("Error: ffmpeg command not found. Make sure FFmpeg is installed and in your system's PATH.")
273
- print("Falling back to OpenCV output.")
274
- if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
275
- final_output_path = output_path_opencv
276
- else:
277
- print("OpenCV output is also invalid or empty.")
278
- final_output_path = None
279
  except Exception as e:
280
  print(f"An unexpected error occurred during FFmpeg processing: {e}")
281
- print("Falling back to OpenCV output.")
282
- if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
283
- final_output_path = output_path_opencv
284
- else:
285
- print("OpenCV output is also invalid or empty.")
286
- final_output_path = None
287
-
288
- # Clean up the large temporary full video file *after* processing
289
- if os.path.exists(temp_video_path):
290
- try:
291
- os.remove(temp_video_path)
292
- print(f"Cleaned up temporary full video: {temp_video_path}")
293
- except Exception as e:
294
- print(f"Warning: Could not remove temporary file {temp_video_path}: {e}")
295
-
296
- # If FFmpeg failed, potentially clean up its failed output
297
  if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
298
- try:
299
- os.remove(output_path_ffmpeg)
300
- except Exception as e:
301
- print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
302
-
303
 
304
- # Return the path of the successfully created segment
 
305
  print(f"Returning video segment path: {final_output_path}")
306
- return final_output_path # Return the path string directly
 
 
 
307
 
308
- except Exception as e:
309
- print(f"Error processing video segment for {video_id}: {e}")
310
- import traceback
311
- traceback.print_exc() # Print detailed traceback for debugging
312
- # Clean up potentially partially created files in case of error
313
- if 'cap' in locals() and cap.isOpened(): cap.release()
314
- if 'out_opencv' in locals() and out_opencv.isOpened(): out_opencv.release()
315
- # Attempt cleanup of temp files on error
316
- if os.path.exists(temp_video_path): os.remove(temp_video_path)
317
- if os.path.exists(output_path_opencv): os.remove(output_path_opencv)
318
- if os.path.exists(output_path_ffmpeg): os.remove(output_path_ffmpeg)
319
- return None # Return None on error
320
-
321
- QDRANT_COLLECTION_NAME = "video_frames"
322
- VIDEO_SEGMENT_DURATION = 30 # Extract 30 seconds around the timestamp
323
 
324
  def parse_llm_output(text):
325
  """
326
- Parses the LLM's structured output using a mix of regex for simple
327
- fields (video_id, timestamp) and string manipulation for reasoning
328
- as a workaround for regex matching issues.
329
  """
330
- # Optional: Print repr for debugging if needed
331
- # print(f"\nDEBUG: Raw text input to parse_llm_output:\n{repr(text)}\n")
332
  data = {}
 
333
 
334
- # --- Parse video_id and timestamp with regex (as they worked) ---
335
- simple_patterns = {
336
- 'video_id': r"\{Best Result:\s*\[?([^\]\}]+)\]?\s*\}",
337
- 'timestamp': r"\{Timestamp:\s*\[?([^\]\}]+)\]?\s*\}",
338
- }
339
- for key, pattern in simple_patterns.items():
340
- match = re.search(pattern, text, re.IGNORECASE)
341
- if match:
342
- value = match.group(1).strip()
343
- # Strip potential quotes (single, double, curly)
344
- value = value.strip('\'"“”')
345
- data[key] = value
346
- else:
347
- print(f"Warning: Could not parse '{key}' using regex pattern: {pattern}")
348
- data[key] = None
349
-
350
- # --- Parse reasoning using string manipulation ---
351
- reasoning_value = None
352
- try:
353
- # Define markers, converting search key to lowercase for case-insensitive find
354
- key_marker_lower = "{reasoning:"
355
- # Find the start index based on the lowercase marker
356
- start_index = text.lower().find(key_marker_lower)
357
 
358
  if start_index != -1:
359
- # Find the closing brace '}' starting the search *after* the marker
360
- # Add length of the marker to ensure we find the correct closing brace
361
- search_start_for_brace = start_index + len(key_marker_lower)
362
- end_index = text.find('}', search_start_for_brace)
363
 
364
  if end_index != -1:
365
- # Extract content using original casing from text, between actual marker end and brace
366
- # Calculate the actual end of the marker in the original string
367
- actual_marker_end = start_index + len(key_marker_lower)
368
  value = text[actual_marker_end : end_index]
369
-
370
- # Perform cleanup on the extracted value
371
- value = value.strip() # Strip outer whitespace first
372
  if value.startswith('[') and value.endswith(']'):
373
- value = value[1:-1] # Slice off brackets
374
- value = value.strip('\'"“”') # Strip quotes
375
- value = value.strip() # Strip whitespace again
376
- reasoning_value = value
377
  else:
378
- print("Warning: Found '{reasoning:' marker but no closing '}' found afterwards.")
379
  else:
380
- print("Warning: Marker '{reasoning:' not found in text.")
 
381
 
382
- except Exception as e:
383
- # Catch potential errors during slicing or finding
384
- print(f"Error during string manipulation parsing for reasoning: {e}")
385
-
386
- data['reasoning'] = reasoning_value # Assign found value or None
387
 
388
- # --- Validation ---
389
  if data.get('timestamp'):
390
  try:
391
  float(data['timestamp'])
392
  except ValueError:
393
  print(f"Warning: Parsed timestamp '{data['timestamp']}' is not a valid number.")
 
394
 
395
- print(f"Parsed LLM output (Using String Manipulation for Reasoning): {data}")
396
  return data
397
 
398
 
399
  def process_query_and_get_video(query_text):
400
  """
401
  Orchestrates RAG, LLM query, parsing, and video extraction.
 
402
  """
403
  print(f"\n--- Processing query: '{query_text}' ---")
404
 
405
- # 1. RAG Query
406
  print("Step 1: Performing RAG query...")
407
  rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
408
 
409
  if "error" in rag_results or not rag_results.get("results"):
410
  error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
411
  print(f"RAG Error/No Results: {error_msg}")
412
- return f"Error during RAG search: {error_msg}", None # Return error message and no video
 
413
 
414
  print(f"RAG query successful. Found {len(rag_results['results'])} results.")
415
- # print(f"Top RAG result: {rag_results['results'][0]}") # For debugging
416
 
417
- # 2. Format LLM Prompt
418
  print("Step 2: Formatting prompt for LLM...")
419
- # Use the exact prompt structure from your example
 
 
 
 
420
  prompt = f"""You are tasked with selecting the most relevant information from a set of video subtitle segments to answer a query.
421
 
422
- QUERY (also seen below): "{query_text}"
 
 
 
 
 
423
 
424
  For each result provided, evaluate how well it directly addresses the definition or explanation related to the query. Pay attention to:
425
  1. Clarity of explanation
@@ -431,12 +319,11 @@ From the provided results, select the SINGLE BEST match that most directly answe
431
  Format your response STRICTLY as follows, with each field on a new line:
432
  {{Best Result: [video_id]}}
433
  {{Timestamp: [timestamp]}}
434
- {{Content: [subtitle text]}}
435
  {{Reasoning: [Brief explanation of why this result best answers the query]}}
 
436
 
437
- {rag_results}""" # Pass the whole RAG results dictionary as string representation
438
-
439
- # 3. Call LLM
440
  print("Step 3: Querying the LLM...")
441
  try:
442
  output = llm.create_chat_completion(
@@ -444,60 +331,51 @@ Format your response STRICTLY as follows, with each field on a new line:
444
  {"role": "system", "content": "You are a helpful assistant designed to select the best video segment based on relevance to a query, following a specific output format."},
445
  {"role": "user", "content": prompt},
446
  ],
447
- temperature=0.1, # Lower temperature for more deterministic selection
448
- max_tokens=250 # Adjust as needed, ensure enough space for reasoning
449
  )
450
- llm_response_text = output['choices'][0]['message']['content']
451
- print(f"LLM Response:\n{llm_response_text}")
452
  except Exception as e:
453
  print(f"Error during LLM call: {e}")
454
- return f"Error calling LLM: {e}", None
 
455
 
456
- # 4. Parse LLM Response
457
  print("Step 4: Parsing LLM response...")
458
  parsed_data = parse_llm_output(llm_response_text)
459
 
460
  video_id = parsed_data.get('video_id')
461
  timestamp_str = parsed_data.get('timestamp')
462
- reasoning = parsed_data.get('reasoning')
463
 
464
  if not video_id or not timestamp_str:
465
  print("Error: Could not parse required video_id or timestamp from LLM response.")
466
- fallback_reasoning = reasoning if reasoning else "Could not determine the best segment."
467
- # Include raw LLM response in the error message for debugging
468
- error_msg = f"Failed to parse LLM response. LLM said:\n---\n{llm_response_text}\n---\nReasoning (if found): {fallback_reasoning}"
469
- return error_msg, None
470
 
471
  try:
472
  timestamp = float(timestamp_str)
473
- # Adjust timestamp slightly - start a bit earlier if possible
474
- start_time = max(0.0, timestamp - (VIDEO_SEGMENT_DURATION / 4))
 
 
475
  except ValueError:
476
  print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
477
- error_msg = f"Invalid timestamp format from LLM ('{timestamp_str}'). LLM reasoning (if found): {reasoning}"
478
- return error_msg, None
479
 
480
- final_reasoning = reasoning if reasoning else "No reasoning provided by LLM."
481
-
482
- # 5. Extract Video Segment
483
- print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {VIDEO_SEGMENT_DURATION}s)...")
484
- # Reset the dataset iterator for each new request IF POSSIBLE.
485
- # NOTE: Resetting a Hugging Face streaming dataset is tricky.
486
- # It might re-start from the beginning. For heavy use, downloading might be better.
487
- # Or, implement caching of downloaded videos if the same ones are accessed often.
488
- # For this example, we'll rely on the stream potentially starting over or finding the item.
489
- global dataset # Make sure we use the global dataset object
490
- # dataset = iter(load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)) # Attempt re-init (might be slow)
491
-
492
- video_path = extract_video_segment(video_id, start_time, VIDEO_SEGMENT_DURATION, dataset)
493
 
494
  if video_path and os.path.exists(video_path):
495
  print(f"Video segment extracted successfully: {video_path}")
496
- return final_reasoning, video_path
497
  else:
498
  print("Failed to extract video segment.")
499
- error_msg = f"{final_reasoning}\n\n(However, failed to extract the corresponding video segment for ID {video_id} at timestamp {timestamp_str}.)"
500
- return error_msg, None
501
 
502
  with gr.Blocks() as iface:
503
  gr.Markdown(
@@ -511,32 +389,25 @@ with gr.Blocks() as iface:
511
  query_input = gr.Textbox(label="Your Question", placeholder="e.g., What is a convolutional neural network?")
512
  submit_button = gr.Button("Ask & Find Video")
513
  with gr.Row():
514
- reasoning_output = gr.Markdown(label="LLM Reasoning")
515
- with gr.Row():
516
- video_output = gr.Video(label="Relevant Video Segment")
517
 
518
  submit_button.click(
519
  fn=process_query_and_get_video,
520
  inputs=query_input,
521
- outputs=[reasoning_output, video_output]
522
  )
523
 
524
  gr.Examples(
525
  examples=[
526
- "What are activation functions?",
527
- "Explain backpropagation.",
528
- "What is transfer learning?",
529
- "Show me an example of data augmentation.",
530
- "What is the difference between classification and regression?",
531
  ],
532
  inputs=query_input,
533
- outputs=[reasoning_output, video_output], # Outputs needed for examples too
534
- fn=process_query_and_get_video, # The function to run for examples
535
- cache_examples=False, # Disable caching if streaming/LLM state changes
536
  )
537
 
538
- # --- Launch the Interface ---
539
- # share=True creates a public link, requires internet. Set to False for local use.
540
- # debug=True provides more detailed error outputs in the console.
541
  print("Launching Gradio interface...")
542
- iface.launch(debug=True, share=False) # Run locally in the notebook
 
1
  import gradio as gr
2
+ import os
3
  from llama_cpp import Llama
4
  from qdrant_client import QdrantClient
5
  from datasets import load_dataset
6
  from sentence_transformers import SentenceTransformer
 
 
7
  import tempfile
8
  import uuid
9
  import re
10
  import subprocess
11
+ import traceback
12
+
13
+ QDRANT_COLLECTION_NAME = "video_frames"
14
+ VIDEO_SEGMENT_DURATION = 40 # Extract 40 seconds around the timestamp
15
+
16
+ # Load Secrets from Environment Variables
17
+ QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
18
+
19
+ # Check for qdrant key
20
+ if not QDRANT_API_KEY:
21
+ print("Error: QDRANT_API_KEY environment variable not found.")
22
+ print("Please add your Qdrant API key as a secret named 'QDRANT_API_KEY' in your Hugging Face Space settings.")
23
+ raise ValueError("QDRANT_API_KEY environment variable not set.")
24
 
25
  print("Initializing LLM...")
 
26
  try:
27
  llm = Llama.from_pretrained(
28
  repo_id="m1tch/gemma-finetune-ai_class_gguf",
29
  filename="gemma-3_ai_class.Q8_0.gguf",
30
+ n_gpu_layers=-1,
31
  n_ctx=2048,
32
+ verbose=False
33
  )
34
  print("LLM initialized successfully.")
35
  except Exception as e:
36
  print(f"Error initializing LLM: {e}")
 
37
  raise
38
 
39
  print("Connecting to Qdrant...")
40
  try:
41
  qdrant_client = QdrantClient(
42
  url="https://2c18d413-cbb5-441c-b060-4c8c2302dcde.us-east4-0.gcp.cloud.qdrant.io:6333/",
43
+ api_key=QDRANT_API_KEY,
44
+ timeout=60
 
45
  )
 
46
  qdrant_client.get_collections()
47
  print("Qdrant connection successful.")
48
  except Exception as e:
 
51
 
52
  print("Loading dataset stream...")
53
  try:
54
+ # Load video dataset
 
55
  dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
56
+ print(f"Dataset loaded.")
 
57
  except Exception as e:
58
  print(f"Error loading dataset: {e}")
59
  raise
 
65
  print(f"Error loading Sentence Transformer model: {e}")
66
  raise
67
 
68
+
69
  def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
70
  """
71
  Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
72
  Uses the pre-loaded embedding_model.
73
  """
74
  try:
 
75
  query_vector = embedding_model.encode(query_text).tolist()
76
 
77
  search_params = {
 
105
  }
106
  except Exception as e:
107
  print(f"Error during RAG query: {e}")
108
+ traceback.print_exc()
109
  return {"error": str(e), "query": query_text, "results": []}
110
 
111
 
112
  def extract_video_segment(video_id, start_time, duration, dataset):
113
  """
114
+ Extracts a single video segment file path from the dataset stream.
115
+ Returns a single path suitable for Gradio or None on failure.
116
  """
117
+ target_id = str(video_id)
118
+ target_key_pattern = re.compile(r"videos/" + re.escape(target_id) + r"/" + re.escape(target_id))
119
+
120
+ start_time = float(start_time)
121
  duration = float(duration)
122
 
123
  unique_id = str(uuid.uuid4())
124
+ temp_dir = os.path.join(tempfile.gettempdir(), f"gradio_video_seg_{unique_id}")
125
  os.makedirs(temp_dir, exist_ok=True)
126
+ temp_video_path_full = os.path.join(temp_dir, f"{target_id}_full_{unique_id}.mp4")
 
127
  output_path_ffmpeg = os.path.join(temp_dir, f"output_ffmpeg_{unique_id}.mp4")
128
 
129
+ print(f"Attempting to extract segment for video_id={target_id}, start={start_time:.2f}, duration={duration:.2f}")
130
+ print(f"Looking for dataset key matching pattern: {target_key_pattern.pattern}")
131
  print(f"Temporary directory: {temp_dir}")
132
 
133
+ found_sample = None
134
+ max_search_attempts = 1000 # Limit
135
+ print(f"Searching dataset stream for key matching pattern: {target_key_pattern.pattern}")
136
 
137
+ dataset_iterator = iter(dataset)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ try:
140
+ # Find and save the full video from the stream
141
+ for i in range(max_search_attempts):
142
+ try:
143
+ sample = next(dataset_iterator)
144
+ if '__key__' in sample and 'mp4' in sample and target_key_pattern.match(sample['__key__']):
145
+ print(f"Found video key {sample['__key__']} after {i+1} iterations. Saving to {temp_video_path_full}...")
146
+ with open(temp_video_path_full, 'wb') as f:
147
+ f.write(sample['mp4'])
148
+ print(f"Video saved successfully ({os.path.getsize(temp_video_path_full)} bytes).")
149
+ found_sample = sample
150
+ break # Found the video
151
+ except StopIteration:
152
+ print("Reached end of dataset stream without finding the video within search limit.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  break
154
+ except Exception as e:
155
+ print(f"Warning: Error iterating dataset sample {i+1}: {e}")
156
 
157
+ if not found_sample or not os.path.exists(temp_video_path_full) or os.path.getsize(temp_video_path_full) == 0:
158
+ print(f"Could not find or save video with ID {target_id} from dataset stream.")
159
+ return None
 
 
160
 
161
+ # Process the saved video with FFmpeg
162
  final_output_path = None
163
  try:
 
 
164
  cmd = [
165
  'ffmpeg',
166
+ '-y',
167
+ '-ss', str(start_time),
168
+ '-i', temp_video_path_full,
169
+ '-t', str(duration),
170
+ '-c:v', 'libx264',
171
+ '-profile:v', 'baseline',
172
+ '-level', '3.0',
173
+ '-preset', 'fast',
174
+ '-pix_fmt', 'yuv420p',
175
+ '-movflags', '+faststart',
176
+ '-c:a', 'aac',
177
+ '-b:a', '128k',
178
+ '-vf', f'select=gte(t,{start_time})',
179
+ '-vsync', 'vfr',
180
  output_path_ffmpeg
181
  ]
182
  print(f"Running FFmpeg command: {' '.join(cmd)}")
183
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
184
 
185
  if result.returncode == 0 and os.path.exists(output_path_ffmpeg) and os.path.getsize(output_path_ffmpeg) > 0:
186
  print(f"FFmpeg processing successful. Output: {output_path_ffmpeg}")
 
189
  print(f"FFmpeg error (Return Code: {result.returncode}):")
190
  print(f"FFmpeg stdout:\n{result.stdout}")
191
  print(f"FFmpeg stderr:\n{result.stderr}")
192
+ print("FFmpeg failed.")
193
+ final_output_path = None
 
 
 
 
 
194
 
195
  except subprocess.TimeoutExpired:
196
+ print("FFmpeg command timed out.")
197
+ final_output_path = None
 
 
 
 
 
198
  except FileNotFoundError:
199
+ print("Error: ffmpeg command not found. Make sure FFmpeg is installed.")
200
+ final_output_path = None
 
 
 
 
 
201
  except Exception as e:
202
  print(f"An unexpected error occurred during FFmpeg processing: {e}")
203
+ traceback.print_exc()
204
+ final_output_path = None
205
+
206
+ finally:
207
+ # Clean up temporary files
208
+ print(f"Cleaning up temporary directory: {temp_dir}")
209
+ if os.path.exists(temp_video_path_full):
210
+ try:
211
+ os.remove(temp_video_path_full)
212
+ print(f"Cleaned up temporary full video: {temp_video_path_full}")
213
+ except Exception as e:
214
+ print(f"Warning: Could not remove temporary file {temp_video_path_full}: {e}")
215
+
216
+ # Clean up failed FFmpeg output if it exists and wasn't the final path
 
 
217
  if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
218
+ try:
219
+ os.remove(output_path_ffmpeg)
220
+ except Exception as e:
221
+ print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
 
222
 
223
+ # Return the path of the successfully created segment or None
224
+ if final_output_path and os.path.exists(final_output_path):
225
  print(f"Returning video segment path: {final_output_path}")
226
+ return final_output_path
227
+ else:
228
+ print("Video segment extraction failed.")
229
+ return None
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  def parse_llm_output(text):
233
  """
234
+ Parses the LLM's structured output using string manipulation.
 
 
235
  """
 
 
236
  data = {}
237
+ print(f"\nDEBUG: Raw text input to parse_llm_output:\n---\n{text}\n---")
238
 
239
+ def extract_field(text, field_name):
240
+ start_marker_lower = "{" + field_name.lower() + ":"
241
+ start_index = text.lower().find(start_marker_lower)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  if start_index != -1:
244
+ actual_marker_end = start_index + len(start_marker_lower)
245
+ end_index = text.find('}', actual_marker_end)
 
 
246
 
247
  if end_index != -1:
 
 
 
248
  value = text[actual_marker_end : end_index]
249
+ value = value.strip()
 
 
250
  if value.startswith('[') and value.endswith(']'):
251
+ value = value[1:-1].strip()
252
+ value = value.strip('\'"“”')
253
+ return value.strip()
 
254
  else:
255
+ print(f"Warning: Found '{{{field_name}:' marker but no closing '}}' found afterwards.")
256
  else:
257
+ print(f"Warning: Marker '{{{field_name}:' not found in text.")
258
+ return None
259
 
260
+ # Extract fields
261
+ data['video_id'] = extract_field(text, 'Best Result')
262
+ data['timestamp'] = extract_field(text, 'Timestamp')
263
+ data['content'] = extract_field(text, 'Content')
264
+ data['reasoning'] = extract_field(text, 'Reasoning')
265
 
 
266
  if data.get('timestamp'):
267
  try:
268
  float(data['timestamp'])
269
  except ValueError:
270
  print(f"Warning: Parsed timestamp '{data['timestamp']}' is not a valid number.")
271
+ data['timestamp'] = None
272
 
273
+ print(f"Parsed LLM output: {data}")
274
  return data
275
 
276
 
277
  def process_query_and_get_video(query_text):
278
  """
279
  Orchestrates RAG, LLM query, parsing, and video extraction.
280
+ Returns only the video path or None.
281
  """
282
  print(f"\n--- Processing query: '{query_text}' ---")
283
 
284
+ # RAG Query
285
  print("Step 1: Performing RAG query...")
286
  rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
287
 
288
  if "error" in rag_results or not rag_results.get("results"):
289
  error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
290
  print(f"RAG Error/No Results: {error_msg}")
291
+ # Return None for video output on RAG failure
292
+ return None
293
 
294
  print(f"RAG query successful. Found {len(rag_results['results'])} results.")
 
295
 
296
+ # Format LLM Prompt
297
  print("Step 2: Formatting prompt for LLM...")
298
+ results_for_llm = "\n".join([
299
+ f"Rank: {r['rank']}, Score: {r['score']:.4f}, Video ID: {r['video_id']}, Timestamp: {r['timestamp']}, Subtitle: {r['subtitle']}"
300
+ for r in rag_results['results']
301
+ ])
302
+
303
  prompt = f"""You are tasked with selecting the most relevant information from a set of video subtitle segments to answer a query.
304
 
305
+ QUERY: "{query_text}"
306
+
307
+ Here are the relevant video segments found:
308
+ ---
309
+ {results_for_llm}
310
+ ---
311
 
312
  For each result provided, evaluate how well it directly addresses the definition or explanation related to the query. Pay attention to:
313
  1. Clarity of explanation
 
319
  Format your response STRICTLY as follows, with each field on a new line:
320
  {{Best Result: [video_id]}}
321
  {{Timestamp: [timestamp]}}
322
+ {{Content: [subtitle text from the selected result]}}
323
  {{Reasoning: [Brief explanation of why this result best answers the query]}}
324
+ """
325
 
326
+ # Call LLM
 
 
327
  print("Step 3: Querying the LLM...")
328
  try:
329
  output = llm.create_chat_completion(
 
331
  {"role": "system", "content": "You are a helpful assistant designed to select the best video segment based on relevance to a query, following a specific output format."},
332
  {"role": "user", "content": prompt},
333
  ],
334
+ temperature=0.1,
335
+ max_tokens=300
336
  )
337
+ llm_response_text = output['choices'][0]['message']['content'].strip()
338
+ print(f"LLM Response:\n---\n{llm_response_text}\n---")
339
  except Exception as e:
340
  print(f"Error during LLM call: {e}")
341
+ traceback.print_exc()
342
+ return None
343
 
344
+ # Parse LLM Response
345
  print("Step 4: Parsing LLM response...")
346
  parsed_data = parse_llm_output(llm_response_text)
347
 
348
  video_id = parsed_data.get('video_id')
349
  timestamp_str = parsed_data.get('timestamp')
 
350
 
351
  if not video_id or not timestamp_str:
352
  print("Error: Could not parse required video_id or timestamp from LLM response.")
353
+ print("Raw LLM response that failed parsing:\n---\n{llm_response_text}\n---") # Print raw output for debugging
354
+ # Return None for video output on parsing failure
355
+ return None
 
356
 
357
  try:
358
  timestamp = float(timestamp_str)
359
+ start_time = max(0.0, timestamp - (VIDEO_SEGMENT_DURATION / 4.0))
360
+ actual_duration = VIDEO_SEGMENT_DURATION
361
+ print(f"Calculated segment start time: {start_time:.2f}s")
362
+
363
  except ValueError:
364
  print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
365
+ # Return None for video output on invalid timestamp
366
+ return None
367
 
368
+ # Extract Video Segment
369
+ print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {actual_duration:.2f}s)...")
370
+ video_path = extract_video_segment(video_id, start_time, actual_duration, dataset)
 
 
 
 
 
 
 
 
 
 
371
 
372
  if video_path and os.path.exists(video_path):
373
  print(f"Video segment extracted successfully: {video_path}")
374
+ return video_path
375
  else:
376
  print("Failed to extract video segment.")
377
+ return None
378
+
379
 
380
  with gr.Blocks() as iface:
381
  gr.Markdown(
 
389
  query_input = gr.Textbox(label="Your Question", placeholder="e.g., What is a convolutional neural network?")
390
  submit_button = gr.Button("Ask & Find Video")
391
  with gr.Row():
392
+ video_output = gr.Video(label="Relevant Video Segment", format="mp4")
 
 
393
 
394
  submit_button.click(
395
  fn=process_query_and_get_video,
396
  inputs=query_input,
397
+ outputs=video_output
398
  )
399
 
400
  gr.Examples(
401
  examples=[
402
+ "Using only the videos, explain how ResNets work.",
403
+ "Using only the videos, explain the advantages of CNNs over fully connected networks.",
404
+ "Using only the videos, explain the the binary cross entropy loss function.",
 
 
405
  ],
406
  inputs=query_input,
407
+ outputs=video_output,
408
+ fn=process_query_and_get_video,
409
+ cache_examples=False,
410
  )
411
 
 
 
 
412
  print("Launching Gradio interface...")
413
+ iface.launch(debug=True, share=False)