mitch commited on
Commit
d6e91f6
·
unverified ·
1 Parent(s): 5147cde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -154
app.py CHANGED
@@ -1,16 +1,15 @@
1
  import gradio as gr
2
- import os
3
  from llama_cpp import Llama
4
  from qdrant_client import QdrantClient
5
  from datasets import load_dataset
6
  from sentence_transformers import SentenceTransformer
7
  import cv2
 
8
  import tempfile
9
  import uuid
10
  import re
11
  import subprocess
12
  import time
13
- import traceback
14
 
15
  # Configuration
16
  QDRANT_COLLECTION_NAME = "video_frames"
@@ -53,8 +52,9 @@ except Exception as e:
53
 
54
  print("Loading dataset stream...")
55
  try:
 
56
  dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
57
- print(f"Dataset loaded.")
58
  except Exception as e:
59
  print(f"Error loading dataset: {e}")
60
  raise
@@ -85,7 +85,7 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
85
  if filter_condition:
86
  search_params["filter"] = filter_condition
87
 
88
- search_results = client.query_points(query_points=query_vector, **search_params)
89
 
90
  formatted_results = []
91
  for idx, result in enumerate(search_results):
@@ -105,69 +105,128 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
105
  }
106
  except Exception as e:
107
  print(f"Error during RAG query: {e}")
108
- traceback.print_exc()
109
  return {"error": str(e), "query": query_text, "results": []}
110
 
111
 
112
  def extract_video_segment(video_id, start_time, duration, dataset):
113
  """
114
- Extracts a single video segment file path from the dataset stream.
115
- Saves it to a temporary file and returns the path or None on failure.
116
- Uses FFmpeg with -ss before -i and -t.
117
  """
118
  target_id = str(video_id)
119
- target_key_pattern = re.compile(r"videos/" + re.escape(target_id) + r"/" + re.escape(target_id))
120
-
121
  start_time = float(start_time)
122
  duration = float(duration)
123
 
124
  unique_id = str(uuid.uuid4())
125
- temp_dir = os.path.join(tempfile.gettempdir(), f"gradio_video_seg_{unique_id}")
126
  os.makedirs(temp_dir, exist_ok=True)
127
- temp_video_path_full = os.path.join(temp_dir, f"{target_id}_full_{unique_id}.mp4")
 
128
  output_path_ffmpeg = os.path.join(temp_dir, f"output_ffmpeg_{unique_id}.mp4")
129
 
130
- print(f"Attempting to extract segment for video_id={target_id}, start={start_time:.2f}, duration={duration:.2f}")
131
- print(f"Looking for dataset key matching pattern: {target_key_pattern.pattern}")
132
  print(f"Temporary directory: {temp_dir}")
133
 
134
- found_sample = None
135
- max_search_attempts = 1000 # Limit
136
- print(f"Searching dataset stream for key matching pattern: {target_key_pattern.pattern}")
137
-
138
- dataset_iterator = iter(dataset)
139
 
140
  try:
141
- # Find and save the full video from the stream
142
- for i in range(max_search_attempts):
143
- try:
144
- sample = next(dataset_iterator)
145
- if '__key__' in sample and 'mp4' in sample and target_key_pattern.match(sample['__key__']):
146
- print(f"Found video key {sample['__key__']} after {i+1} iterations. Saving to {temp_video_path_full}...")
147
- with open(temp_video_path_full, 'wb') as f:
148
- f.write(sample['mp4'])
149
- print(f"Video saved successfully ({os.path.getsize(temp_video_path_full)} bytes).")
150
- found_sample = sample
151
- break # Found the video, exit loop
152
- except StopIteration:
153
- print("Reached end of dataset stream without finding the video within search limit.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  break
155
- except Exception as e:
156
- print(f"Warning: Error iterating dataset sample {i+1}: {e}")
157
 
158
- if not found_sample or not os.path.exists(temp_video_path_full) or os.path.getsize(temp_video_path_full) == 0:
159
- print(f"Could not find or save video with ID {target_id} from dataset stream.")
160
- return None
161
 
162
- # Process the saved video with FFmpeg
 
 
163
  final_output_path = None
164
  try:
165
  cmd = [
166
  'ffmpeg',
167
- '-y', # Overwrite output file if exists
168
- '-ss', str(start_time), # Start time
169
- '-i', temp_video_path_full, # Input file
170
- '-t', str(duration), # Duration of the segment
171
  '-c:v', 'libx264',
172
  '-profile:v', 'baseline',
173
  '-level', '3.0',
@@ -176,10 +235,11 @@ def extract_video_segment(video_id, start_time, duration, dataset):
176
  '-movflags', '+faststart',
177
  '-c:a', 'aac',
178
  '-b:a', '128k',
 
179
  output_path_ffmpeg
180
  ]
181
  print(f"Running FFmpeg command: {' '.join(cmd)}")
182
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
183
 
184
  if result.returncode == 0 and os.path.exists(output_path_ffmpeg) and os.path.getsize(output_path_ffmpeg) > 0:
185
  print(f"FFmpeg processing successful. Output: {output_path_ffmpeg}")
@@ -188,155 +248,167 @@ def extract_video_segment(video_id, start_time, duration, dataset):
188
  print(f"FFmpeg error (Return Code: {result.returncode}):")
189
  print(f"FFmpeg stdout:\n{result.stdout}")
190
  print(f"FFmpeg stderr:\n{result.stderr}")
191
- print("FFmpeg failed.")
192
- final_output_path = None
 
 
 
 
193
 
194
  except subprocess.TimeoutExpired:
195
- print("FFmpeg command timed out.")
196
- final_output_path = None
 
 
 
 
 
197
  except FileNotFoundError:
198
- print("Error: ffmpeg command not found. Make sure FFmpeg is installed in the environment.")
199
- final_output_path = None
 
 
 
 
 
200
  except Exception as e:
201
  print(f"An unexpected error occurred during FFmpeg processing: {e}")
202
- traceback.print_exc()
203
- final_output_path = None
204
-
205
- finally:
206
- # Clean up temporary files
207
- print(f"Cleaning up temporary directory: {temp_dir}")
208
- if os.path.exists(temp_video_path_full):
209
- try:
210
- os.remove(temp_video_path_full)
211
- print(f"Cleaned up temporary full video: {temp_video_path_full}")
212
- except Exception as e:
213
- print(f"Warning: Could not remove temporary file {temp_video_path_full}: {e}")
214
 
 
 
 
 
 
 
 
 
215
  if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
216
- try:
217
- os.remove(output_path_ffmpeg)
218
- except Exception as e:
219
- print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
220
 
221
- if final_output_path and os.path.exists(final_output_path):
222
  print(f"Returning video segment path: {final_output_path}")
223
  return final_output_path
224
- else:
225
- print("Video segment extraction failed.")
 
 
 
 
 
 
 
 
226
  return None
227
 
 
 
 
228
 
229
  def parse_llm_output(text):
230
  """
231
- Parses the LLM's structured output using string manipulation.
232
- Returns parsed data dictionary.
 
233
  """
234
  data = {}
235
- print(f"\nDEBUG: Raw text input to parse_llm_output:\n---\n{text}\n---")
236
 
237
- def extract_field(text, field_name):
238
- start_marker_lower = "{" + field_name.lower() + ":"
239
- start_index = text.lower().find(start_marker_lower)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  if start_index != -1:
242
- actual_marker_end = start_index + len(start_marker_lower)
243
- end_index = text.find('}', actual_marker_end)
244
 
245
  if end_index != -1:
 
246
  value = text[actual_marker_end : end_index]
 
247
  value = value.strip()
248
  if value.startswith('[') and value.endswith(']'):
249
- value = value[1:-1].strip()
250
  value = value.strip('\'"“”')
251
- return value.strip()
 
252
  else:
253
- print(f"Warning: Found '{{{field_name}:' marker but no closing '}}' found afterwards.")
254
  else:
255
- print(f"Warning: Marker '{{{field_name}:' not found in text.")
256
- return None
257
 
258
- # Extract fields
259
- data['video_id'] = extract_field(text, 'Best Result')
260
- data['timestamp'] = extract_field(text, 'Timestamp')
261
- data['content'] = extract_field(text, 'Content')
262
- data['reasoning'] = extract_field(text, 'Reasoning')
263
 
264
- # Validation
265
  if data.get('timestamp'):
266
  try:
267
  float(data['timestamp'])
268
  except ValueError:
269
  print(f"Warning: Parsed timestamp '{data['timestamp']}' is not a valid number.")
270
- data['timestamp'] = None
271
 
272
- print(f"Parsed LLM output: {data}")
273
  return data
274
 
275
 
276
  def process_query_and_get_video(query_text):
277
  """
278
  Orchestrates RAG, LLM query, parsing, and video extraction.
279
- Returns the path to the extracted video segment or None on failure.
280
- Prints status and errors directly.
281
  """
282
  print(f"\n--- Processing query: '{query_text}' ---")
283
 
284
- # Check if necessary components are initialized
285
- if 'qdrant_client' not in globals() or qdrant_client is None:
286
- print("Setup Error: Qdrant client is not initialized. Cannot proceed.")
287
- return None
288
- if 'llm' not in globals() or llm is None:
289
- print("Setup Error: LLM is not initialized. Cannot proceed.")
290
- return None
291
- if 'embedding_model' not in globals() or embedding_model is None:
292
- print("Setup Error: Embedding model is not initialized. Cannot proceed.")
293
- return None
294
- if 'dataset' not in globals() or dataset is None:
295
- print("Setup Error: Dataset is not loaded. Cannot proceed.")
296
- return None
297
-
298
- # RAG Query
299
  print("Step 1: Performing RAG query...")
300
  rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
301
 
302
  if "error" in rag_results or not rag_results.get("results"):
303
  error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
304
  print(f"RAG Error/No Results: {error_msg}")
305
- return None
306
 
307
  print(f"RAG query successful. Found {len(rag_results['results'])} results.")
308
 
309
  # Format LLM Prompt
310
  print("Step 2: Formatting prompt for LLM...")
311
- results_for_llm = "\n".join([
312
- f"Rank: {r['rank']}, Score: {r['score']:.4f}, Video ID: {r['video_id']}, Timestamp: {r['timestamp']}, Subtitle: {r['subtitle']}"
313
- for r in rag_results['results']
314
- ])
315
-
316
  prompt = f"""You are tasked with selecting the most relevant information from a set of video subtitle segments to answer a query.
317
-
318
- QUERY: "{query_text}"
319
-
320
- Here are the relevant video segments found:
321
- ---
322
- {results_for_llm}
323
- ---
324
-
325
  For each result provided, evaluate how well it directly addresses the definition or explanation related to the query. Pay attention to:
326
  1. Clarity of explanation
327
  2. Relevance to the query
328
  3. Completeness of information
329
-
330
  From the provided results, select the SINGLE BEST match that most directly answers the query.
331
-
332
  Format your response STRICTLY as follows, with each field on a new line:
333
  {{Best Result: [video_id]}}
334
  {{Timestamp: [timestamp]}}
335
- {{Content: [subtitle text from the selected result]}}
336
  {{Reasoning: [Brief explanation of why this result best answers the query]}}
337
- """
338
 
339
- # Call LLM
340
  print("Step 3: Querying the LLM...")
341
  try:
342
  output = llm.create_chat_completion(
@@ -347,56 +419,49 @@ Format your response STRICTLY as follows, with each field on a new line:
347
  temperature=0.1,
348
  max_tokens=300
349
  )
350
- llm_response_text = output['choices'][0]['message']['content'].strip()
351
- print(f"LLM Response:\n---\n{llm_response_text}\n---")
352
  except Exception as e:
353
  print(f"Error during LLM call: {e}")
354
- traceback.print_exc()
355
- return None
356
 
357
- # Parse LLM Response
358
  print("Step 4: Parsing LLM response...")
359
  parsed_data = parse_llm_output(llm_response_text)
360
 
361
  video_id = parsed_data.get('video_id')
362
  timestamp_str = parsed_data.get('timestamp')
363
- # Get reasoning/content
364
  reasoning = parsed_data.get('reasoning')
365
- content = parsed_data.get('content')
366
-
367
- if reasoning:
368
- print(f"LLM Reasoning: {reasoning}")
369
-
370
- if content:
371
- print(f"LLM Selected Content: {content}")
372
-
373
 
374
  if not video_id or not timestamp_str:
375
  print("Error: Could not parse required video_id or timestamp from LLM response.")
376
- print("Raw LLM response that failed parsing:\n---\n{llm_response_text}\n---")
377
- return None
 
378
 
379
  try:
380
  timestamp = float(timestamp_str)
381
- start_time = max(0.0, timestamp - (VIDEO_SEGMENT_DURATION / 4.0))
382
- actual_duration = VIDEO_SEGMENT_DURATION
383
- print(f"Calculated segment start time: {start_time:.2f}s")
384
-
385
  except ValueError:
386
  print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
387
- return None
 
388
 
389
- # 5. Extract Video Segment
390
- print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {actual_duration:.2f}s)...")
391
- video_path = extract_video_segment(video_id, start_time, actual_duration, dataset)
 
 
 
392
 
393
  if video_path and os.path.exists(video_path):
394
  print(f"Video segment extracted successfully: {video_path}")
395
- return video_path
396
  else:
397
  print("Failed to extract video segment.")
398
- return None
399
-
400
 
401
  with gr.Blocks() as iface:
402
  gr.Markdown(
@@ -410,7 +475,7 @@ with gr.Blocks() as iface:
410
  query_input = gr.Textbox(label="Your Question", placeholder="e.g., What is a convolutional neural network?")
411
  submit_button = gr.Button("Ask & Find Video")
412
  with gr.Row():
413
- video_output = gr.Video(label="Relevant Video Segment", format="mp4")
414
 
415
  submit_button.click(
416
  fn=process_query_and_get_video,
@@ -425,7 +490,7 @@ with gr.Blocks() as iface:
425
  "Using only the videos, explain the the binary cross entropy loss function.",
426
  ],
427
  inputs=query_input,
428
- outputs=video_output,
429
  fn=process_query_and_get_video,
430
  cache_examples=False,
431
  )
 
1
  import gradio as gr
 
2
  from llama_cpp import Llama
3
  from qdrant_client import QdrantClient
4
  from datasets import load_dataset
5
  from sentence_transformers import SentenceTransformer
6
  import cv2
7
+ import os
8
  import tempfile
9
  import uuid
10
  import re
11
  import subprocess
12
  import time
 
13
 
14
  # Configuration
15
  QDRANT_COLLECTION_NAME = "video_frames"
 
52
 
53
  print("Loading dataset stream...")
54
  try:
55
+ # Load video dataset
56
  dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
57
+ print(f"Dataset loaded. First item example: {next(iter(dataset))['__key__']}")
58
  except Exception as e:
59
  print(f"Error loading dataset: {e}")
60
  raise
 
85
  if filter_condition:
86
  search_params["filter"] = filter_condition
87
 
88
+ search_results = client.search(**search_params)
89
 
90
  formatted_results = []
91
  for idx, result in enumerate(search_results):
 
105
  }
106
  except Exception as e:
107
  print(f"Error during RAG query: {e}")
 
108
  return {"error": str(e), "query": query_text, "results": []}
109
 
110
 
111
  def extract_video_segment(video_id, start_time, duration, dataset):
112
  """
113
+ Generator function that extracts and yields a single video segment file path.
114
+ Modified to return a single path suitable for Gradio.
 
115
  """
116
  target_id = str(video_id)
117
+ target_key = f"videos/{target_id}/{target_id}"
 
118
  start_time = float(start_time)
119
  duration = float(duration)
120
 
121
  unique_id = str(uuid.uuid4())
122
+ temp_dir = os.path.join(tempfile.gettempdir(), f"gradio_video_{unique_id}")
123
  os.makedirs(temp_dir, exist_ok=True)
124
+ temp_video_path = os.path.join(temp_dir, f"{target_id}_full_{unique_id}.mp4")
125
+ output_path_opencv = os.path.join(temp_dir, f"output_opencv_{unique_id}.mp4")
126
  output_path_ffmpeg = os.path.join(temp_dir, f"output_ffmpeg_{unique_id}.mp4")
127
 
128
+ print(f"Attempting to extract segment for video_id={target_id}, start={start_time}, duration={duration}")
129
+ print(f"Looking for dataset key: {target_key}")
130
  print(f"Temporary directory: {temp_dir}")
131
 
 
 
 
 
 
132
 
133
  try:
134
+ found = False
135
+ retries = 3
136
+ dataset_iterator = iter(dataset)
137
+
138
+ for _ in range(retries * 100):
139
+ try:
140
+ sample = next(dataset_iterator)
141
+ if '__key__' in sample and sample['__key__'] == target_key:
142
+ found = True
143
+ print(f"Found video key {target_key}. Saving to {temp_video_path}...")
144
+ with open(temp_video_path, 'wb') as f:
145
+ f.write(sample['mp4'])
146
+ print(f"Video saved successfully ({os.path.getsize(temp_video_path)} bytes).")
147
+ break
148
+ except StopIteration:
149
+ print("Reached end of dataset stream without finding the video.")
150
+ break
151
+ except Exception as e:
152
+ print(f"Error iterating dataset: {e}")
153
+ time.sleep(1)
154
+
155
+
156
+ if not found:
157
+ print(f"Could not find video with ID {target_id} (key: {target_key}) in the dataset stream after {_ + 1} attempts.")
158
+ return None
159
+
160
+ # Process the saved video
161
+ if not os.path.exists(temp_video_path) or os.path.getsize(temp_video_path) == 0:
162
+ print(f"Temporary video file {temp_video_path} is missing or empty.")
163
+ return None
164
+
165
+ cap = cv2.VideoCapture(temp_video_path)
166
+ if not cap.isOpened():
167
+ print(f"Error opening video file with OpenCV: {temp_video_path}")
168
+ return None
169
+
170
+ fps = cap.get(cv2.CAP_PROP_FPS)
171
+ if fps <= 0:
172
+ print(f"Warning: Invalid FPS ({fps}) detected for {temp_video_path}. Assuming 30 FPS.")
173
+ fps = 30
174
+
175
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
176
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
177
+ total_vid_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
178
+ vid_duration = total_vid_frames / fps if fps > 0 else 0
179
+
180
+ print(f"Video properties: {width}x{height} @ {fps:.2f}fps, Total Duration: {vid_duration:.2f}s")
181
+
182
+ start_frame = int(start_time * fps)
183
+ end_frame = int((start_time + duration) * fps)
184
+
185
+ # Clamp frame numbers to valid range
186
+ start_frame = max(0, start_frame)
187
+ end_frame = min(total_vid_frames, end_frame)
188
+
189
+ if start_frame >= total_vid_frames or start_frame >= end_frame:
190
+ print(f"Calculated start frame ({start_frame}) is beyond video length ({total_vid_frames}) or segment is invalid.")
191
+ cap.release()
192
+ return None
193
+
194
+ cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
195
+ frames_to_write = end_frame - start_frame
196
+
197
+ print(f"Extracting frames from {start_frame} to {end_frame} ({frames_to_write} frames)")
198
+
199
+ # Try OpenCV first
200
+ fourcc_opencv = cv2.VideoWriter_fourcc(*'mp4v') # mp4v is often more compatible than avc1 with base OpenCV
201
+ out_opencv = cv2.VideoWriter(output_path_opencv, fourcc_opencv, fps, (width, height))
202
+
203
+ if not out_opencv.isOpened():
204
+ print("Error opening OpenCV VideoWriter with mp4v.")
205
+ cap.release()
206
+ return None
207
+
208
+ frames_written_opencv = 0
209
+ while frames_written_opencv < frames_to_write:
210
+ ret, frame = cap.read()
211
+ if not ret:
212
+ print("Warning: Ran out of frames before reaching target end frame.")
213
  break
214
+ out_opencv.write(frame)
215
+ frames_written_opencv += 1
216
 
217
+ out_opencv.release()
218
+ print(f"OpenCV finished writing {frames_written_opencv} frames to {output_path_opencv}")
 
219
 
220
+ cap.release()
221
+
222
+ # FFmpeg
223
  final_output_path = None
224
  try:
225
  cmd = [
226
  'ffmpeg',
227
+ '-ss', str(start_time), # Start time
228
+ '-i', temp_video_path, # Input file (original downloaded)
229
+ '-t', str(duration), # Duration of the segment
 
230
  '-c:v', 'libx264',
231
  '-profile:v', 'baseline',
232
  '-level', '3.0',
 
235
  '-movflags', '+faststart',
236
  '-c:a', 'aac',
237
  '-b:a', '128k',
238
+ '-y',
239
  output_path_ffmpeg
240
  ]
241
  print(f"Running FFmpeg command: {' '.join(cmd)}")
242
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) # Add timeout
243
 
244
  if result.returncode == 0 and os.path.exists(output_path_ffmpeg) and os.path.getsize(output_path_ffmpeg) > 0:
245
  print(f"FFmpeg processing successful. Output: {output_path_ffmpeg}")
 
248
  print(f"FFmpeg error (Return Code: {result.returncode}):")
249
  print(f"FFmpeg stdout:\n{result.stdout}")
250
  print(f"FFmpeg stderr:\n{result.stderr}")
251
+ print("Falling back to OpenCV output.")
252
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
253
+ final_output_path = output_path_opencv
254
+ else:
255
+ print("OpenCV output is also invalid or empty.")
256
+ final_output_path = None
257
 
258
  except subprocess.TimeoutExpired:
259
+ print("FFmpeg command timed out.")
260
+ print("Falling back to OpenCV output.")
261
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
262
+ final_output_path = output_path_opencv
263
+ else:
264
+ print("OpenCV output is also invalid or empty.")
265
+ final_output_path = None
266
  except FileNotFoundError:
267
+ print("Error: ffmpeg command not found. Make sure FFmpeg is installed and in your system's PATH.")
268
+ print("Falling back to OpenCV output.")
269
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
270
+ final_output_path = output_path_opencv
271
+ else:
272
+ print("OpenCV output is also invalid or empty.")
273
+ final_output_path = None
274
  except Exception as e:
275
  print(f"An unexpected error occurred during FFmpeg processing: {e}")
276
+ print("Falling back to OpenCV output.")
277
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
278
+ final_output_path = output_path_opencv
279
+ else:
280
+ print("OpenCV output is also invalid or empty.")
281
+ final_output_path = None
 
 
 
 
 
 
282
 
283
+ if os.path.exists(temp_video_path):
284
+ try:
285
+ os.remove(temp_video_path)
286
+ print(f"Cleaned up temporary full video: {temp_video_path}")
287
+ except Exception as e:
288
+ print(f"Warning: Could not remove temporary file {temp_video_path}: {e}")
289
+
290
+ # If FFmpeg failed
291
  if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
292
+ try:
293
+ os.remove(output_path_ffmpeg)
294
+ except Exception as e:
295
+ print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
296
 
 
297
  print(f"Returning video segment path: {final_output_path}")
298
  return final_output_path
299
+
300
+ except Exception as e:
301
+ print(f"Error processing video segment for {video_id}: {e}")
302
+ import traceback
303
+ traceback.print_exc()
304
+ if 'cap' in locals() and cap.isOpened(): cap.release()
305
+ if 'out_opencv' in locals() and out_opencv.isOpened(): out_opencv.release()
306
+ if os.path.exists(temp_video_path): os.remove(temp_video_path)
307
+ if os.path.exists(output_path_opencv): os.remove(output_path_opencv)
308
+ if os.path.exists(output_path_ffmpeg): os.remove(output_path_ffmpeg)
309
  return None
310
 
311
+ QDRANT_COLLECTION_NAME = "video_frames"
312
+ VIDEO_SEGMENT_DURATION = 40 # Extract 40 seconds around the timestamp
313
+
314
 
315
  def parse_llm_output(text):
316
  """
317
+ Parses the LLM's structured output using a mix of regex for simple
318
+ fields (video_id, timestamp) and string manipulation for reasoning
319
+ as a workaround for regex matching issues.
320
  """
321
  data = {}
 
322
 
323
+ # Parse video_id and timestamp with regex
324
+ simple_patterns = {
325
+ 'video_id': r"\{Best Result:\s*\[?([^\]\}]+)\]?\s*\}",
326
+ 'timestamp': r"\{Timestamp:\s*\[?([^\]\}]+)\]?\s*\}",
327
+ }
328
+ for key, pattern in simple_patterns.items():
329
+ match = re.search(pattern, text, re.IGNORECASE)
330
+ if match:
331
+ value = match.group(1).strip()
332
+ value = value.strip('\'"“”')
333
+ data[key] = value
334
+ else:
335
+ print(f"Warning: Could not parse '{key}' using regex pattern: {pattern}")
336
+ data[key] = None
337
+
338
+ # Parse reasoning
339
+ reasoning_value = None
340
+ try:
341
+ key_marker_lower = "{reasoning:"
342
+ start_index = text.lower().find(key_marker_lower)
343
 
344
  if start_index != -1:
345
+ search_start_for_brace = start_index + len(key_marker_lower)
346
+ end_index = text.find('}', search_start_for_brace)
347
 
348
  if end_index != -1:
349
+ actual_marker_end = start_index + len(key_marker_lower)
350
  value = text[actual_marker_end : end_index]
351
+
352
  value = value.strip()
353
  if value.startswith('[') and value.endswith(']'):
354
+ value = value[1:-1]
355
  value = value.strip('\'"“”')
356
+ value = value.strip()
357
+ reasoning_value = value
358
  else:
359
+ print("Warning: Found '{reasoning:' marker but no closing '}' found afterwards.")
360
  else:
361
+ print("Warning: Marker '{reasoning:' not found in text.")
 
362
 
363
+ except Exception as e:
364
+ print(f"Error during string manipulation parsing for reasoning: {e}")
365
+
366
+ data['reasoning'] = reasoning_value
 
367
 
 
368
  if data.get('timestamp'):
369
  try:
370
  float(data['timestamp'])
371
  except ValueError:
372
  print(f"Warning: Parsed timestamp '{data['timestamp']}' is not a valid number.")
 
373
 
374
+ print(f"Parsed LLM output (Using String Manipulation for Reasoning): {data}")
375
  return data
376
 
377
 
378
  def process_query_and_get_video(query_text):
379
  """
380
  Orchestrates RAG, LLM query, parsing, and video extraction.
 
 
381
  """
382
  print(f"\n--- Processing query: '{query_text}' ---")
383
 
384
+ # 1. RAG Query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  print("Step 1: Performing RAG query...")
386
  rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
387
 
388
  if "error" in rag_results or not rag_results.get("results"):
389
  error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
390
  print(f"RAG Error/No Results: {error_msg}")
391
+ return f"Error during RAG search: {error_msg}", None
392
 
393
  print(f"RAG query successful. Found {len(rag_results['results'])} results.")
394
 
395
  # Format LLM Prompt
396
  print("Step 2: Formatting prompt for LLM...")
 
 
 
 
 
397
  prompt = f"""You are tasked with selecting the most relevant information from a set of video subtitle segments to answer a query.
398
+ QUERY (also seen below): "{query_text}"
 
 
 
 
 
 
 
399
  For each result provided, evaluate how well it directly addresses the definition or explanation related to the query. Pay attention to:
400
  1. Clarity of explanation
401
  2. Relevance to the query
402
  3. Completeness of information
 
403
  From the provided results, select the SINGLE BEST match that most directly answers the query.
 
404
  Format your response STRICTLY as follows, with each field on a new line:
405
  {{Best Result: [video_id]}}
406
  {{Timestamp: [timestamp]}}
407
+ {{Content: [subtitle text]}}
408
  {{Reasoning: [Brief explanation of why this result best answers the query]}}
409
+ {rag_results}"""
410
 
411
+ # 3. Call LLM
412
  print("Step 3: Querying the LLM...")
413
  try:
414
  output = llm.create_chat_completion(
 
419
  temperature=0.1,
420
  max_tokens=300
421
  )
422
+ llm_response_text = output['choices'][0]['message']['content']
423
+ print(f"LLM Response:\n{llm_response_text}")
424
  except Exception as e:
425
  print(f"Error during LLM call: {e}")
426
+ return f"Error calling LLM: {e}", None
 
427
 
428
+ # 4. Parse LLM Response
429
  print("Step 4: Parsing LLM response...")
430
  parsed_data = parse_llm_output(llm_response_text)
431
 
432
  video_id = parsed_data.get('video_id')
433
  timestamp_str = parsed_data.get('timestamp')
 
434
  reasoning = parsed_data.get('reasoning')
 
 
 
 
 
 
 
 
435
 
436
  if not video_id or not timestamp_str:
437
  print("Error: Could not parse required video_id or timestamp from LLM response.")
438
+ fallback_reasoning = reasoning if reasoning else "Could not determine the best segment."
439
+ error_msg = f"Failed to parse LLM response. LLM said:\n---\n{llm_response_text}\n---\nReasoning (if found): {fallback_reasoning}"
440
+ return error_msg, None
441
 
442
  try:
443
  timestamp = float(timestamp_str)
444
+ # Adjust timestamp slightly - start a bit earlier if possible
445
+ start_time = max(0.0, timestamp - (VIDEO_SEGMENT_DURATION / 4))
 
 
446
  except ValueError:
447
  print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
448
+ error_msg = f"Invalid timestamp format from LLM ('{timestamp_str}'). LLM reasoning (if found): {reasoning}"
449
+ return error_msg, None
450
 
451
+ final_reasoning = reasoning if reasoning else "No reasoning provided by LLM."
452
+
453
+ # Extract Video Segment
454
+ print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {VIDEO_SEGMENT_DURATION}s)...")
455
+ global dataset
456
+ video_path = extract_video_segment(video_id, start_time, VIDEO_SEGMENT_DURATION, dataset)
457
 
458
  if video_path and os.path.exists(video_path):
459
  print(f"Video segment extracted successfully: {video_path}")
460
+ return final_reasoning, video_path
461
  else:
462
  print("Failed to extract video segment.")
463
+ error_msg = f"{final_reasoning}\n\n(However, failed to extract the corresponding video segment for ID {video_id} at timestamp {timestamp_str}.)"
464
+ return error_msg, None
465
 
466
  with gr.Blocks() as iface:
467
  gr.Markdown(
 
475
  query_input = gr.Textbox(label="Your Question", placeholder="e.g., What is a convolutional neural network?")
476
  submit_button = gr.Button("Ask & Find Video")
477
  with gr.Row():
478
+ video_output = gr.Video(label="Relevant Video Segment")
479
 
480
  submit_button.click(
481
  fn=process_query_and_get_video,
 
490
  "Using only the videos, explain the the binary cross entropy loss function.",
491
  ],
492
  inputs=query_input,
493
+ outputs= video_output,
494
  fn=process_query_and_get_video,
495
  cache_examples=False,
496
  )