mitch commited on
Commit
a591a02
·
unverified ·
1 Parent(s): 4ab8943

Updat ffmpeg

Browse files
Files changed (1) hide show
  1. app.py +48 -25
app.py CHANGED
@@ -4,19 +4,21 @@ from llama_cpp import Llama
4
  from qdrant_client import QdrantClient
5
  from datasets import load_dataset
6
  from sentence_transformers import SentenceTransformer
 
7
  import tempfile
8
  import uuid
9
  import re
10
  import subprocess
 
11
  import traceback
12
 
 
13
  QDRANT_COLLECTION_NAME = "video_frames"
14
  VIDEO_SEGMENT_DURATION = 40 # Extract 40 seconds around the timestamp
15
 
16
- # Load Secrets from Environment Variables
17
  QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
18
 
19
- # Check for qdrant key
20
  if not QDRANT_API_KEY:
21
  print("Error: QDRANT_API_KEY environment variable not found.")
22
  print("Please add your Qdrant API key as a secret named 'QDRANT_API_KEY' in your Hugging Face Space settings.")
@@ -51,7 +53,6 @@ except Exception as e:
51
 
52
  print("Loading dataset stream...")
53
  try:
54
- # Load video dataset
55
  dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
56
  print(f"Dataset loaded.")
57
  except Exception as e:
@@ -65,7 +66,6 @@ except Exception as e:
65
  print(f"Error loading Sentence Transformer model: {e}")
66
  raise
67
 
68
-
69
  def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
70
  """
71
  Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
@@ -85,7 +85,7 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
85
  if filter_condition:
86
  search_params["filter"] = filter_condition
87
 
88
- search_results = client.search(**search_params)
89
 
90
  formatted_results = []
91
  for idx, result in enumerate(search_results):
@@ -112,7 +112,8 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
112
  def extract_video_segment(video_id, start_time, duration, dataset):
113
  """
114
  Extracts a single video segment file path from the dataset stream.
115
- Returns a single path suitable for Gradio or None on failure.
 
116
  """
117
  target_id = str(video_id)
118
  target_key_pattern = re.compile(r"videos/" + re.escape(target_id) + r"/" + re.escape(target_id))
@@ -147,7 +148,7 @@ def extract_video_segment(video_id, start_time, duration, dataset):
147
  f.write(sample['mp4'])
148
  print(f"Video saved successfully ({os.path.getsize(temp_video_path_full)} bytes).")
149
  found_sample = sample
150
- break # Found the video
151
  except StopIteration:
152
  print("Reached end of dataset stream without finding the video within search limit.")
153
  break
@@ -163,10 +164,10 @@ def extract_video_segment(video_id, start_time, duration, dataset):
163
  try:
164
  cmd = [
165
  'ffmpeg',
166
- '-y',
167
- '-ss', str(start_time),
168
- '-i', temp_video_path_full,
169
- '-t', str(duration),
170
  '-c:v', 'libx264',
171
  '-profile:v', 'baseline',
172
  '-level', '3.0',
@@ -175,8 +176,6 @@ def extract_video_segment(video_id, start_time, duration, dataset):
175
  '-movflags', '+faststart',
176
  '-c:a', 'aac',
177
  '-b:a', '128k',
178
- '-vf', f'select=gte(t,{start_time})',
179
- '-vsync', 'vfr',
180
  output_path_ffmpeg
181
  ]
182
  print(f"Running FFmpeg command: {' '.join(cmd)}")
@@ -196,7 +195,7 @@ def extract_video_segment(video_id, start_time, duration, dataset):
196
  print("FFmpeg command timed out.")
197
  final_output_path = None
198
  except FileNotFoundError:
199
- print("Error: ffmpeg command not found. Make sure FFmpeg is installed.")
200
  final_output_path = None
201
  except Exception as e:
202
  print(f"An unexpected error occurred during FFmpeg processing: {e}")
@@ -213,14 +212,12 @@ def extract_video_segment(video_id, start_time, duration, dataset):
213
  except Exception as e:
214
  print(f"Warning: Could not remove temporary file {temp_video_path_full}: {e}")
215
 
216
- # Clean up failed FFmpeg output if it exists and wasn't the final path
217
  if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
218
  try:
219
  os.remove(output_path_ffmpeg)
220
  except Exception as e:
221
  print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
222
 
223
- # Return the path of the successfully created segment or None
224
  if final_output_path and os.path.exists(final_output_path):
225
  print(f"Returning video segment path: {final_output_path}")
226
  return final_output_path
@@ -232,6 +229,7 @@ def extract_video_segment(video_id, start_time, duration, dataset):
232
  def parse_llm_output(text):
233
  """
234
  Parses the LLM's structured output using string manipulation.
 
235
  """
236
  data = {}
237
  print(f"\nDEBUG: Raw text input to parse_llm_output:\n---\n{text}\n---")
@@ -263,6 +261,7 @@ def parse_llm_output(text):
263
  data['content'] = extract_field(text, 'Content')
264
  data['reasoning'] = extract_field(text, 'Reasoning')
265
 
 
266
  if data.get('timestamp'):
267
  try:
268
  float(data['timestamp'])
@@ -277,10 +276,25 @@ def parse_llm_output(text):
277
  def process_query_and_get_video(query_text):
278
  """
279
  Orchestrates RAG, LLM query, parsing, and video extraction.
280
- Returns only the video path or None.
 
281
  """
282
  print(f"\n--- Processing query: '{query_text}' ---")
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  # RAG Query
285
  print("Step 1: Performing RAG query...")
286
  rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
@@ -288,7 +302,6 @@ def process_query_and_get_video(query_text):
288
  if "error" in rag_results or not rag_results.get("results"):
289
  error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
290
  print(f"RAG Error/No Results: {error_msg}")
291
- # Return None for video output on RAG failure
292
  return None
293
 
294
  print(f"RAG query successful. Found {len(rag_results['results'])} results.")
@@ -347,11 +360,20 @@ Format your response STRICTLY as follows, with each field on a new line:
347
 
348
  video_id = parsed_data.get('video_id')
349
  timestamp_str = parsed_data.get('timestamp')
 
 
 
 
 
 
 
 
 
 
350
 
351
  if not video_id or not timestamp_str:
352
  print("Error: Could not parse required video_id or timestamp from LLM response.")
353
- print("Raw LLM response that failed parsing:\n---\n{llm_response_text}\n---") # Print raw output for debugging
354
- # Return None for video output on parsing failure
355
  return None
356
 
357
  try:
@@ -362,10 +384,9 @@ Format your response STRICTLY as follows, with each field on a new line:
362
 
363
  except ValueError:
364
  print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
365
- # Return None for video output on invalid timestamp
366
  return None
367
 
368
- # Extract Video Segment
369
  print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {actual_duration:.2f}s)...")
370
  video_path = extract_video_segment(video_id, start_time, actual_duration, dataset)
371
 
@@ -399,9 +420,11 @@ with gr.Blocks() as iface:
399
 
400
  gr.Examples(
401
  examples=[
402
- "Using only the videos, explain how ResNets work.",
403
- "Using only the videos, explain the advantages of CNNs over fully connected networks.",
404
- "Using only the videos, explain the the binary cross entropy loss function.",
 
 
405
  ],
406
  inputs=query_input,
407
  outputs=video_output,
 
4
  from qdrant_client import QdrantClient
5
  from datasets import load_dataset
6
  from sentence_transformers import SentenceTransformer
7
+ import cv2
8
  import tempfile
9
  import uuid
10
  import re
11
  import subprocess
12
+ import time
13
  import traceback
14
 
15
+ # Configuration
16
  QDRANT_COLLECTION_NAME = "video_frames"
17
  VIDEO_SEGMENT_DURATION = 40 # Extract 40 seconds around the timestamp
18
 
19
+ # Load Qdrant key
20
  QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
21
 
 
22
  if not QDRANT_API_KEY:
23
  print("Error: QDRANT_API_KEY environment variable not found.")
24
  print("Please add your Qdrant API key as a secret named 'QDRANT_API_KEY' in your Hugging Face Space settings.")
 
53
 
54
  print("Loading dataset stream...")
55
  try:
 
56
  dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
57
  print(f"Dataset loaded.")
58
  except Exception as e:
 
66
  print(f"Error loading Sentence Transformer model: {e}")
67
  raise
68
 
 
69
  def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
70
  """
71
  Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
 
85
  if filter_condition:
86
  search_params["filter"] = filter_condition
87
 
88
+ search_results = client.query_points(query_points=query_vector, **search_params)
89
 
90
  formatted_results = []
91
  for idx, result in enumerate(search_results):
 
112
  def extract_video_segment(video_id, start_time, duration, dataset):
113
  """
114
  Extracts a single video segment file path from the dataset stream.
115
+ Saves it to a temporary file and returns the path or None on failure.
116
+ Uses FFmpeg with -ss before -i and -t.
117
  """
118
  target_id = str(video_id)
119
  target_key_pattern = re.compile(r"videos/" + re.escape(target_id) + r"/" + re.escape(target_id))
 
148
  f.write(sample['mp4'])
149
  print(f"Video saved successfully ({os.path.getsize(temp_video_path_full)} bytes).")
150
  found_sample = sample
151
+ break # Found the video, exit loop
152
  except StopIteration:
153
  print("Reached end of dataset stream without finding the video within search limit.")
154
  break
 
164
  try:
165
  cmd = [
166
  'ffmpeg',
167
+ '-y', # Overwrite output file if exists
168
+ '-ss', str(start_time), # Start time
169
+ '-i', temp_video_path_full, # Input file
170
+ '-t', str(duration), # Duration of the segment
171
  '-c:v', 'libx264',
172
  '-profile:v', 'baseline',
173
  '-level', '3.0',
 
176
  '-movflags', '+faststart',
177
  '-c:a', 'aac',
178
  '-b:a', '128k',
 
 
179
  output_path_ffmpeg
180
  ]
181
  print(f"Running FFmpeg command: {' '.join(cmd)}")
 
195
  print("FFmpeg command timed out.")
196
  final_output_path = None
197
  except FileNotFoundError:
198
+ print("Error: ffmpeg command not found. Make sure FFmpeg is installed in the environment.")
199
  final_output_path = None
200
  except Exception as e:
201
  print(f"An unexpected error occurred during FFmpeg processing: {e}")
 
212
  except Exception as e:
213
  print(f"Warning: Could not remove temporary file {temp_video_path_full}: {e}")
214
 
 
215
  if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
216
  try:
217
  os.remove(output_path_ffmpeg)
218
  except Exception as e:
219
  print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
220
 
 
221
  if final_output_path and os.path.exists(final_output_path):
222
  print(f"Returning video segment path: {final_output_path}")
223
  return final_output_path
 
229
  def parse_llm_output(text):
230
  """
231
  Parses the LLM's structured output using string manipulation.
232
+ Returns parsed data dictionary.
233
  """
234
  data = {}
235
  print(f"\nDEBUG: Raw text input to parse_llm_output:\n---\n{text}\n---")
 
261
  data['content'] = extract_field(text, 'Content')
262
  data['reasoning'] = extract_field(text, 'Reasoning')
263
 
264
+ # Validation
265
  if data.get('timestamp'):
266
  try:
267
  float(data['timestamp'])
 
276
  def process_query_and_get_video(query_text):
277
  """
278
  Orchestrates RAG, LLM query, parsing, and video extraction.
279
+ Returns the path to the extracted video segment or None on failure.
280
+ Prints status and errors directly.
281
  """
282
  print(f"\n--- Processing query: '{query_text}' ---")
283
 
284
+ # Check if necessary components are initialized
285
+ if 'qdrant_client' not in globals() or qdrant_client is None:
286
+ print("Setup Error: Qdrant client is not initialized. Cannot proceed.")
287
+ return None
288
+ if 'llm' not in globals() or llm is None:
289
+ print("Setup Error: LLM is not initialized. Cannot proceed.")
290
+ return None
291
+ if 'embedding_model' not in globals() or embedding_model is None:
292
+ print("Setup Error: Embedding model is not initialized. Cannot proceed.")
293
+ return None
294
+ if 'dataset' not in globals() or dataset is None:
295
+ print("Setup Error: Dataset is not loaded. Cannot proceed.")
296
+ return None
297
+
298
  # RAG Query
299
  print("Step 1: Performing RAG query...")
300
  rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
 
302
  if "error" in rag_results or not rag_results.get("results"):
303
  error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
304
  print(f"RAG Error/No Results: {error_msg}")
 
305
  return None
306
 
307
  print(f"RAG query successful. Found {len(rag_results['results'])} results.")
 
360
 
361
  video_id = parsed_data.get('video_id')
362
  timestamp_str = parsed_data.get('timestamp')
363
+ # Get reasoning/content
364
+ reasoning = parsed_data.get('reasoning')
365
+ content = parsed_data.get('content')
366
+
367
+ if reasoning:
368
+ print(f"LLM Reasoning: {reasoning}")
369
+
370
+ if content:
371
+ print(f"LLM Selected Content: {content}")
372
+
373
 
374
  if not video_id or not timestamp_str:
375
  print("Error: Could not parse required video_id or timestamp from LLM response.")
376
+ print("Raw LLM response that failed parsing:\n---\n{llm_response_text}\n---")
 
377
  return None
378
 
379
  try:
 
384
 
385
  except ValueError:
386
  print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
 
387
  return None
388
 
389
+ # 5. Extract Video Segment
390
  print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {actual_duration:.2f}s)...")
391
  video_path = extract_video_segment(video_id, start_time, actual_duration, dataset)
392
 
 
420
 
421
  gr.Examples(
422
  examples=[
423
+ "What are activation functions?",
424
+ "Explain backpropagation.",
425
+ "What is transfer learning?",
426
+ "Show me an example of data augmentation.",
427
+ "What is the difference between classification and regression?",
428
  ],
429
  inputs=query_input,
430
  outputs=video_output,