Spaces:

m1tch
/

ai_class_app

Running

App Files Files Community

mitch commited on 7 days ago

Commit

a591a02

unverified ·

1 Parent(s): 4ab8943

Updat ffmpeg

Browse files

Files changed (1) hide show

app.py +48 -25

app.py CHANGED Viewed

@@ -4,19 +4,21 @@ from llama_cpp import Llama
 from qdrant_client import QdrantClient
 from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
 import tempfile
 import uuid
 import re
 import subprocess
 import traceback
 QDRANT_COLLECTION_NAME = "video_frames"
 VIDEO_SEGMENT_DURATION = 40 # Extract 40 seconds around the timestamp
-# Load Secrets from Environment Variables
 QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
-# Check for qdrant key
 if not QDRANT_API_KEY:
     print("Error: QDRANT_API_KEY environment variable not found.")
     print("Please add your Qdrant API key as a secret named 'QDRANT_API_KEY' in your Hugging Face Space settings.")
@@ -51,7 +53,6 @@ except Exception as e:
 print("Loading dataset stream...")
 try:
-    # Load video dataset
     dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
     print(f"Dataset loaded.")
 except Exception as e:
@@ -65,7 +66,6 @@ except Exception as e:
     print(f"Error loading Sentence Transformer model: {e}")
     raise
 def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
     """
     Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
@@ -85,7 +85,7 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
         if filter_condition:
             search_params["filter"] = filter_condition
-        search_results = client.search(**search_params)
         formatted_results = []
         for idx, result in enumerate(search_results):
@@ -112,7 +112,8 @@ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=Non
 def extract_video_segment(video_id, start_time, duration, dataset):
     """
     Extracts a single video segment file path from the dataset stream.
-    Returns a single path suitable for Gradio or None on failure.
     """
     target_id = str(video_id)
     target_key_pattern = re.compile(r"videos/" + re.escape(target_id) + r"/" + re.escape(target_id))
@@ -147,7 +148,7 @@ def extract_video_segment(video_id, start_time, duration, dataset):
                         f.write(sample['mp4'])
                     print(f"Video saved successfully ({os.path.getsize(temp_video_path_full)} bytes).")
                     found_sample = sample
-                    break # Found the video
             except StopIteration:
                 print("Reached end of dataset stream without finding the video within search limit.")
                 break
@@ -163,10 +164,10 @@ def extract_video_segment(video_id, start_time, duration, dataset):
         try:
             cmd = [
                 'ffmpeg',
-                '-y',
-                '-ss', str(start_time),
-                '-i', temp_video_path_full,
-                '-t', str(duration),
                 '-c:v', 'libx264',
                 '-profile:v', 'baseline',
                 '-level', '3.0',
@@ -175,8 +176,6 @@ def extract_video_segment(video_id, start_time, duration, dataset):
                 '-movflags', '+faststart',
                 '-c:a', 'aac',
                 '-b:a', '128k',
-                '-vf', f'select=gte(t,{start_time})',
-                '-vsync', 'vfr',
                 output_path_ffmpeg
             ]
             print(f"Running FFmpeg command: {' '.join(cmd)}")
@@ -196,7 +195,7 @@ def extract_video_segment(video_id, start_time, duration, dataset):
             print("FFmpeg command timed out.")
             final_output_path = None
         except FileNotFoundError:
-            print("Error: ffmpeg command not found. Make sure FFmpeg is installed.")
             final_output_path = None
         except Exception as e:
             print(f"An unexpected error occurred during FFmpeg processing: {e}")
@@ -213,14 +212,12 @@ def extract_video_segment(video_id, start_time, duration, dataset):
             except Exception as e:
                 print(f"Warning: Could not remove temporary file {temp_video_path_full}: {e}")
-        # Clean up failed FFmpeg output if it exists and wasn't the final path
         if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
             try:
                 os.remove(output_path_ffmpeg)
             except Exception as e:
                 print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
-    # Return the path of the successfully created segment or None
     if final_output_path and os.path.exists(final_output_path):
         print(f"Returning video segment path: {final_output_path}")
         return final_output_path
@@ -232,6 +229,7 @@ def extract_video_segment(video_id, start_time, duration, dataset):
 def parse_llm_output(text):
     """
     Parses the LLM's structured output using string manipulation.
     """
     data = {}
     print(f"\nDEBUG: Raw text input to parse_llm_output:\n---\n{text}\n---")
@@ -263,6 +261,7 @@ def parse_llm_output(text):
     data['content'] = extract_field(text, 'Content')
     data['reasoning'] = extract_field(text, 'Reasoning')
     if data.get('timestamp'):
         try:
             float(data['timestamp'])
@@ -277,10 +276,25 @@ def parse_llm_output(text):
 def process_query_and_get_video(query_text):
     """
     Orchestrates RAG, LLM query, parsing, and video extraction.
-    Returns only the video path or None.
     """
     print(f"\n--- Processing query: '{query_text}' ---")
     # RAG Query
     print("Step 1: Performing RAG query...")
     rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
@@ -288,7 +302,6 @@ def process_query_and_get_video(query_text):
     if "error" in rag_results or not rag_results.get("results"):
         error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
         print(f"RAG Error/No Results: {error_msg}")
-        # Return None for video output on RAG failure
         return None
     print(f"RAG query successful. Found {len(rag_results['results'])} results.")
@@ -347,11 +360,20 @@ Format your response STRICTLY as follows, with each field on a new line:
     video_id = parsed_data.get('video_id')
     timestamp_str = parsed_data.get('timestamp')
     if not video_id or not timestamp_str:
         print("Error: Could not parse required video_id or timestamp from LLM response.")
-        print("Raw LLM response that failed parsing:\n---\n{llm_response_text}\n---") # Print raw output for debugging
-        # Return None for video output on parsing failure
         return None
     try:
@@ -362,10 +384,9 @@ Format your response STRICTLY as follows, with each field on a new line:
     except ValueError:
         print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
-        # Return None for video output on invalid timestamp
         return None
-    # Extract Video Segment
     print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {actual_duration:.2f}s)...")
     video_path = extract_video_segment(video_id, start_time, actual_duration, dataset)
@@ -399,9 +420,11 @@ with gr.Blocks() as iface:
     gr.Examples(
         examples=[
-            "Using only the videos, explain how ResNets work.",
-            "Using only the videos, explain the advantages of CNNs over fully connected networks.",
-            "Using only the videos, explain the the binary cross entropy loss function.",
         ],
         inputs=query_input,
         outputs=video_output,

 from qdrant_client import QdrantClient
 from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
+import cv2
 import tempfile
 import uuid
 import re
 import subprocess
+import time
 import traceback
+# Configuration
 QDRANT_COLLECTION_NAME = "video_frames"
 VIDEO_SEGMENT_DURATION = 40 # Extract 40 seconds around the timestamp
+# Load Qdrant key
 QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
 if not QDRANT_API_KEY:
     print("Error: QDRANT_API_KEY environment variable not found.")
     print("Please add your Qdrant API key as a secret named 'QDRANT_API_KEY' in your Hugging Face Space settings.")
 print("Loading dataset stream...")
 try:
     dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
     print(f"Dataset loaded.")
 except Exception as e:
     print(f"Error loading Sentence Transformer model: {e}")
     raise
 def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
     """
     Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
         if filter_condition:
             search_params["filter"] = filter_condition
+        search_results = client.query_points(query_points=query_vector, **search_params)
         formatted_results = []
         for idx, result in enumerate(search_results):
 def extract_video_segment(video_id, start_time, duration, dataset):
     """
     Extracts a single video segment file path from the dataset stream.
+    Saves it to a temporary file and returns the path or None on failure.
+    Uses FFmpeg with -ss before -i and -t.
     """
     target_id = str(video_id)
     target_key_pattern = re.compile(r"videos/" + re.escape(target_id) + r"/" + re.escape(target_id))
                         f.write(sample['mp4'])
                     print(f"Video saved successfully ({os.path.getsize(temp_video_path_full)} bytes).")
                     found_sample = sample
+                    break # Found the video, exit loop
             except StopIteration:
                 print("Reached end of dataset stream without finding the video within search limit.")
                 break
         try:
             cmd = [
                 'ffmpeg',
+                '-y', # Overwrite output file if exists
+                '-ss', str(start_time), # Start time
+                '-i', temp_video_path_full, # Input file
+                '-t', str(duration), # Duration of the segment
                 '-c:v', 'libx264',
                 '-profile:v', 'baseline',
                 '-level', '3.0',
                 '-movflags', '+faststart',
                 '-c:a', 'aac',
                 '-b:a', '128k',
                 output_path_ffmpeg
             ]
             print(f"Running FFmpeg command: {' '.join(cmd)}")
             print("FFmpeg command timed out.")
             final_output_path = None
         except FileNotFoundError:
+            print("Error: ffmpeg command not found. Make sure FFmpeg is installed in the environment.")
             final_output_path = None
         except Exception as e:
             print(f"An unexpected error occurred during FFmpeg processing: {e}")
             except Exception as e:
                 print(f"Warning: Could not remove temporary file {temp_video_path_full}: {e}")
         if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
             try:
                 os.remove(output_path_ffmpeg)
             except Exception as e:
                 print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
     if final_output_path and os.path.exists(final_output_path):
         print(f"Returning video segment path: {final_output_path}")
         return final_output_path
 def parse_llm_output(text):
     """
     Parses the LLM's structured output using string manipulation.
+    Returns parsed data dictionary.
     """
     data = {}
     print(f"\nDEBUG: Raw text input to parse_llm_output:\n---\n{text}\n---")
     data['content'] = extract_field(text, 'Content')
     data['reasoning'] = extract_field(text, 'Reasoning')
+    # Validation
     if data.get('timestamp'):
         try:
             float(data['timestamp'])
 def process_query_and_get_video(query_text):
     """
     Orchestrates RAG, LLM query, parsing, and video extraction.
+    Returns the path to the extracted video segment or None on failure.
+    Prints status and errors directly.
     """
     print(f"\n--- Processing query: '{query_text}' ---")
+    # Check if necessary components are initialized
+    if 'qdrant_client' not in globals() or qdrant_client is None:
+        print("Setup Error: Qdrant client is not initialized. Cannot proceed.")
+        return None
+    if 'llm' not in globals() or llm is None:
+         print("Setup Error: LLM is not initialized. Cannot proceed.")
+         return None
+    if 'embedding_model' not in globals() or embedding_model is None:
+         print("Setup Error: Embedding model is not initialized. Cannot proceed.")
+         return None
+    if 'dataset' not in globals() or dataset is None:
+         print("Setup Error: Dataset is not loaded. Cannot proceed.")
+         return None
     # RAG Query
     print("Step 1: Performing RAG query...")
     rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
     if "error" in rag_results or not rag_results.get("results"):
         error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
         print(f"RAG Error/No Results: {error_msg}")
         return None
     print(f"RAG query successful. Found {len(rag_results['results'])} results.")
     video_id = parsed_data.get('video_id')
     timestamp_str = parsed_data.get('timestamp')
+    # Get reasoning/content
+    reasoning = parsed_data.get('reasoning')
+    content = parsed_data.get('content')
+    if reasoning:
+        print(f"LLM Reasoning: {reasoning}")
+    if content:
+        print(f"LLM Selected Content: {content}")
     if not video_id or not timestamp_str:
         print("Error: Could not parse required video_id or timestamp from LLM response.")
+        print("Raw LLM response that failed parsing:\n---\n{llm_response_text}\n---")
         return None
     try:
     except ValueError:
         print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
         return None
+    # 5. Extract Video Segment
     print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {actual_duration:.2f}s)...")
     video_path = extract_video_segment(video_id, start_time, actual_duration, dataset)
     gr.Examples(
         examples=[
+            "What are activation functions?",
+            "Explain backpropagation.",
+            "What is transfer learning?",
+            "Show me an example of data augmentation.",
+            "What is the difference between classification and regression?",
         ],
         inputs=query_input,
         outputs=video_output,