mitch commited on
Commit
112cf0c
·
unverified ·
1 Parent(s): 8573fe7

Added app.py

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/ai_class_app.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="31">
8
+ <item index="0" class="java.lang.String" itemvalue="pydot" />
9
+ <item index="1" class="java.lang.String" itemvalue="joblib" />
10
+ <item index="2" class="java.lang.String" itemvalue="scikit-learn" />
11
+ <item index="3" class="java.lang.String" itemvalue="nltk" />
12
+ <item index="4" class="java.lang.String" itemvalue="urlextract" />
13
+ <item index="5" class="java.lang.String" itemvalue="google-cloud-storage" />
14
+ <item index="6" class="java.lang.String" itemvalue="numpy" />
15
+ <item index="7" class="java.lang.String" itemvalue="requests" />
16
+ <item index="8" class="java.lang.String" itemvalue="tensorflow" />
17
+ <item index="9" class="java.lang.String" itemvalue="tensorflow-serving-api" />
18
+ <item index="10" class="java.lang.String" itemvalue="tensorflow-hub" />
19
+ <item index="11" class="java.lang.String" itemvalue="tensorflow-datasets" />
20
+ <item index="12" class="java.lang.String" itemvalue="nbdime" />
21
+ <item index="13" class="java.lang.String" itemvalue="graphviz" />
22
+ <item index="14" class="java.lang.String" itemvalue="jupyterlab" />
23
+ <item index="15" class="java.lang.String" itemvalue="xgboost" />
24
+ <item index="16" class="java.lang.String" itemvalue="keras-tuner" />
25
+ <item index="17" class="java.lang.String" itemvalue="ipywidgets" />
26
+ <item index="18" class="java.lang.String" itemvalue="scipy" />
27
+ <item index="19" class="java.lang.String" itemvalue="tensorboard-plugin-profile" />
28
+ <item index="20" class="java.lang.String" itemvalue="transformers" />
29
+ <item index="21" class="java.lang.String" itemvalue="gymnasium" />
30
+ <item index="22" class="java.lang.String" itemvalue="google-cloud-aiplatform" />
31
+ <item index="23" class="java.lang.String" itemvalue="pandas" />
32
+ <item index="24" class="java.lang.String" itemvalue="tqdm" />
33
+ <item index="25" class="java.lang.String" itemvalue="tensorboard" />
34
+ <item index="26" class="java.lang.String" itemvalue="swig" />
35
+ <item index="27" class="java.lang.String" itemvalue="matplotlib" />
36
+ <item index="28" class="java.lang.String" itemvalue="statsmodels" />
37
+ <item index="29" class="java.lang.String" itemvalue="Pillow" />
38
+ <item index="30" class="java.lang.String" itemvalue="numexpr" />
39
+ </list>
40
+ </value>
41
+ </option>
42
+ </inspection_tool>
43
+ </profile>
44
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.13" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ai_class_app.iml" filepath="$PROJECT_DIR$/.idea/ai_class_app.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from qdrant_client import QdrantClient
4
+ from datasets import load_dataset
5
+ from sentence_transformers import SentenceTransformer
6
+ import cv2
7
+ import os
8
+ import tempfile
9
+ import uuid
10
+ import re
11
+ import subprocess
12
+ import time
13
+
14
+ print("Initializing LLM...")
15
+ # Ensure the model file exists or download will be attempted
16
+ try:
17
+ llm = Llama.from_pretrained(
18
+ repo_id="m1tch/gemma-finetune-ai_class_gguf",
19
+ filename="gemma-3_ai_class.Q8_0.gguf",
20
+ n_gpu_layers=-1, # Use -1 to offload all possible layers to GPU
21
+ n_ctx=2048,
22
+ verbose=False # Set to True for more detailed llama.cpp output
23
+ )
24
+ print("LLM initialized successfully.")
25
+ except Exception as e:
26
+ print(f"Error initializing LLM: {e}")
27
+ # Optionally raise the exception or handle it gracefully
28
+ raise
29
+
30
+ print("Connecting to Qdrant...")
31
+ try:
32
+ qdrant_client = QdrantClient(
33
+ url="https://2c18d413-cbb5-441c-b060-4c8c2302dcde.us-east4-0.gcp.cloud.qdrant.io:6333/",
34
+ # It's generally safer to load API keys from environment variables or a config file
35
+ api_key=os.environ.get("QDRANT_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.b86GHyWqFDw63UkrR98LlY2GU4XdVyOAlv_qpm9KKTw"),
36
+ timeout=60 # Increase timeout if experiencing connection issues
37
+ )
38
+ # Test connection
39
+ qdrant_client.get_collections()
40
+ print("Qdrant connection successful.")
41
+ except Exception as e:
42
+ print(f"Error connecting to Qdrant: {e}")
43
+ raise
44
+
45
+ print("Loading dataset stream...")
46
+ try:
47
+ # Load video dataset - ensure you have internet access
48
+ # streaming=True avoids downloading the entire dataset at once
49
+ dataset = load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)
50
+ # Peek at the first item to ensure the stream works
51
+ print(f"Dataset loaded. First item example: {next(iter(dataset))['__key__']}")
52
+ except Exception as e:
53
+ print(f"Error loading dataset: {e}")
54
+ raise
55
+
56
+ try:
57
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
58
+ print("Sentence Transformer model loaded.")
59
+ except Exception as e:
60
+ print(f"Error loading Sentence Transformer model: {e}")
61
+ raise
62
+
63
+ def rag_query(client, collection_name, query_text, top_k=5, filter_condition=None):
64
+ """
65
+ Test RAG by querying the vector database with text. Returns a dictionary with search results and metadata.
66
+ Uses the pre-loaded embedding_model.
67
+ """
68
+ try:
69
+ # Use the pre-loaded model
70
+ query_vector = embedding_model.encode(query_text).tolist()
71
+
72
+ search_params = {
73
+ "collection_name": collection_name,
74
+ "query_vector": query_vector,
75
+ "limit": top_k,
76
+ "with_payload": True,
77
+ "with_vectors": False
78
+ }
79
+
80
+ if filter_condition:
81
+ search_params["filter"] = filter_condition
82
+
83
+ search_results = client.search(**search_params)
84
+
85
+ formatted_results = []
86
+ for idx, result in enumerate(search_results):
87
+ formatted_results.append({
88
+ "rank": idx + 1,
89
+ "score": result.score,
90
+ "video_id": result.payload.get("video_id"),
91
+ "timestamp": result.payload.get("timestamp"),
92
+ "subtitle": result.payload.get("subtitle"),
93
+ "frame_number": result.payload.get("frame_number")
94
+ })
95
+
96
+ return {
97
+ "query": query_text,
98
+ "results": formatted_results,
99
+ "avg_score": sum(r.score for r in search_results) / len(search_results) if search_results else 0
100
+ }
101
+ except Exception as e:
102
+ print(f"Error during RAG query: {e}")
103
+ # Return a structure indicating error, but don't crash the app
104
+ return {"error": str(e), "query": query_text, "results": []}
105
+
106
+
107
+ def extract_video_segment(video_id, start_time, duration, dataset):
108
+ """
109
+ Generator function that extracts and yields a single video segment file path.
110
+ Modified to return a single path suitable for Gradio.
111
+ """
112
+ target_id = str(video_id) # Ensure it's a string
113
+ target_key = f"videos/{target_id}/{target_id}"
114
+ start_time = float(start_time) # Ensure it's a float
115
+ duration = float(duration)
116
+
117
+ unique_id = str(uuid.uuid4())
118
+ temp_dir = os.path.join(tempfile.gettempdir(), f"gradio_video_{unique_id}")
119
+ os.makedirs(temp_dir, exist_ok=True)
120
+ temp_video_path = os.path.join(temp_dir, f"{target_id}_full_{unique_id}.mp4")
121
+ output_path_opencv = os.path.join(temp_dir, f"output_opencv_{unique_id}.mp4")
122
+ output_path_ffmpeg = os.path.join(temp_dir, f"output_ffmpeg_{unique_id}.mp4")
123
+
124
+ print(f"Attempting to extract segment for video_id={target_id}, start={start_time}, duration={duration}")
125
+ print(f"Looking for dataset key: {target_key}")
126
+ print(f"Temporary directory: {temp_dir}")
127
+
128
+
129
+ try:
130
+ # --- Find and save the full video ---
131
+ found = False
132
+ retries = 3 # Retry finding the video in the stream
133
+ dataset_iterator = iter(dataset) # Get an iterator
134
+
135
+ for _ in range(retries * 5000): # Limit search iterations to avoid infinite loops in case of issues
136
+ try:
137
+ sample = next(dataset_iterator)
138
+ if '__key__' in sample and sample['__key__'] == target_key:
139
+ found = True
140
+ print(f"Found video key {target_key}. Saving to {temp_video_path}...")
141
+ with open(temp_video_path, 'wb') as f:
142
+ f.write(sample['mp4'])
143
+ print(f"Video saved successfully ({os.path.getsize(temp_video_path)} bytes).")
144
+ break
145
+ except StopIteration:
146
+ print("Reached end of dataset stream without finding the video.")
147
+ break
148
+ except Exception as e:
149
+ print(f"Error iterating dataset: {e}")
150
+ time.sleep(1) # Wait a bit before retrying iteration
151
+
152
+
153
+ if not found:
154
+ print(f"Could not find video with ID {target_id} (key: {target_key}) in the dataset stream after {_ + 1} attempts.")
155
+ # Attempt to reset the stream IF the dataset library supports it easily (often not simple with streaming)
156
+ # For now, we just report failure for this request.
157
+ # yield None # Don't yield here, let the outer function handle no video path
158
+ return None # Return None instead of yielding
159
+
160
+ # --- Process the saved video ---
161
+ if not os.path.exists(temp_video_path) or os.path.getsize(temp_video_path) == 0:
162
+ print(f"Temporary video file {temp_video_path} is missing or empty.")
163
+ return None
164
+
165
+ cap = cv2.VideoCapture(temp_video_path)
166
+ if not cap.isOpened():
167
+ print(f"Error opening video file with OpenCV: {temp_video_path}")
168
+ return None
169
+
170
+ fps = cap.get(cv2.CAP_PROP_FPS)
171
+ # Handle cases where FPS might be 0 or invalid
172
+ if fps <= 0:
173
+ print(f"Warning: Invalid FPS ({fps}) detected for {temp_video_path}. Assuming 30 FPS.")
174
+ fps = 30 # Assume a default FPS
175
+
176
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
177
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
178
+ total_vid_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
179
+ vid_duration = total_vid_frames / fps if fps > 0 else 0
180
+
181
+ print(f"Video properties: {width}x{height} @ {fps:.2f}fps, Total Duration: {vid_duration:.2f}s")
182
+
183
+ start_frame = int(start_time * fps)
184
+ end_frame = int((start_time + duration) * fps)
185
+
186
+ # Clamp frame numbers to valid range
187
+ start_frame = max(0, start_frame)
188
+ end_frame = min(total_vid_frames, end_frame)
189
+
190
+ if start_frame >= total_vid_frames or start_frame >= end_frame:
191
+ print(f"Calculated start frame ({start_frame}) is beyond video length ({total_vid_frames}) or segment is invalid.")
192
+ cap.release()
193
+ return None
194
+
195
+ cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
196
+ frames_to_write = end_frame - start_frame
197
+
198
+ print(f"Extracting frames from {start_frame} to {end_frame} ({frames_to_write} frames)")
199
+
200
+ # --- Try OpenCV writing first (fallback) ---
201
+ fourcc_opencv = cv2.VideoWriter_fourcc(*'mp4v') # mp4v is often more compatible than avc1 with base OpenCV
202
+ out_opencv = cv2.VideoWriter(output_path_opencv, fourcc_opencv, fps, (width, height))
203
+
204
+ if not out_opencv.isOpened():
205
+ print("Error opening OpenCV VideoWriter with mp4v.")
206
+ cap.release()
207
+ return None
208
+
209
+ frames_written_opencv = 0
210
+ while frames_written_opencv < frames_to_write:
211
+ ret, frame = cap.read()
212
+ if not ret:
213
+ print("Warning: Ran out of frames before reaching target end frame.")
214
+ break
215
+ out_opencv.write(frame)
216
+ frames_written_opencv += 1
217
+
218
+ out_opencv.release()
219
+ print(f"OpenCV finished writing {frames_written_opencv} frames to {output_path_opencv}")
220
+
221
+ # --- Release OpenCV capture ---
222
+ cap.release() # Release the capture object before trying ffmpeg
223
+
224
+ # --- Try converting/extracting with FFmpeg (preferred for compatibility) ---
225
+ final_output_path = None
226
+ try:
227
+ # Use ffmpeg to directly cut the segment and ensure web-compatible encoding
228
+ # This is generally more reliable than OpenCV for specific timings and codecs
229
+ cmd = [
230
+ 'ffmpeg',
231
+ '-ss', str(start_time), # Start time
232
+ '-i', temp_video_path, # Input file (original downloaded)
233
+ '-t', str(duration), # Duration of the segment
234
+ '-c:v', 'libx264', # Video codec H.264
235
+ '-profile:v', 'baseline', # Baseline profile for broad compatibility
236
+ '-level', '3.0', # Level 3.0
237
+ '-preset', 'fast', # Encoding speed/quality trade-off
238
+ '-pix_fmt', 'yuv420p', # Pixel format for compatibility
239
+ '-movflags', '+faststart', # Optimize for web streaming
240
+ '-c:a', 'aac', # Audio codec AAC (common)
241
+ '-b:a', '128k', # Audio bitrate
242
+ '-y', # Overwrite output file if exists
243
+ output_path_ffmpeg
244
+ ]
245
+ print(f"Running FFmpeg command: {' '.join(cmd)}")
246
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) # Add timeout
247
+
248
+ if result.returncode == 0 and os.path.exists(output_path_ffmpeg) and os.path.getsize(output_path_ffmpeg) > 0:
249
+ print(f"FFmpeg processing successful. Output: {output_path_ffmpeg}")
250
+ final_output_path = output_path_ffmpeg
251
+ else:
252
+ print(f"FFmpeg error (Return Code: {result.returncode}):")
253
+ print(f"FFmpeg stdout:\n{result.stdout}")
254
+ print(f"FFmpeg stderr:\n{result.stderr}")
255
+ print("Falling back to OpenCV output.")
256
+ # Check if OpenCV output is valid before using it
257
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
258
+ final_output_path = output_path_opencv
259
+ else:
260
+ print("OpenCV output is also invalid or empty.")
261
+ final_output_path = None # Neither worked
262
+
263
+ except subprocess.TimeoutExpired:
264
+ print("FFmpeg command timed out.")
265
+ print("Falling back to OpenCV output.")
266
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
267
+ final_output_path = output_path_opencv
268
+ else:
269
+ print("OpenCV output is also invalid or empty.")
270
+ final_output_path = None
271
+ except FileNotFoundError:
272
+ print("Error: ffmpeg command not found. Make sure FFmpeg is installed and in your system's PATH.")
273
+ print("Falling back to OpenCV output.")
274
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
275
+ final_output_path = output_path_opencv
276
+ else:
277
+ print("OpenCV output is also invalid or empty.")
278
+ final_output_path = None
279
+ except Exception as e:
280
+ print(f"An unexpected error occurred during FFmpeg processing: {e}")
281
+ print("Falling back to OpenCV output.")
282
+ if os.path.exists(output_path_opencv) and os.path.getsize(output_path_opencv) > 0:
283
+ final_output_path = output_path_opencv
284
+ else:
285
+ print("OpenCV output is also invalid or empty.")
286
+ final_output_path = None
287
+
288
+ # Clean up the large temporary full video file *after* processing
289
+ if os.path.exists(temp_video_path):
290
+ try:
291
+ os.remove(temp_video_path)
292
+ print(f"Cleaned up temporary full video: {temp_video_path}")
293
+ except Exception as e:
294
+ print(f"Warning: Could not remove temporary file {temp_video_path}: {e}")
295
+
296
+ # If FFmpeg failed, potentially clean up its failed output
297
+ if final_output_path != output_path_ffmpeg and os.path.exists(output_path_ffmpeg):
298
+ try:
299
+ os.remove(output_path_ffmpeg)
300
+ except Exception as e:
301
+ print(f"Warning: Could not remove failed ffmpeg output {output_path_ffmpeg}: {e}")
302
+
303
+
304
+ # Return the path of the successfully created segment
305
+ print(f"Returning video segment path: {final_output_path}")
306
+ return final_output_path # Return the path string directly
307
+
308
+ except Exception as e:
309
+ print(f"Error processing video segment for {video_id}: {e}")
310
+ import traceback
311
+ traceback.print_exc() # Print detailed traceback for debugging
312
+ # Clean up potentially partially created files in case of error
313
+ if 'cap' in locals() and cap.isOpened(): cap.release()
314
+ if 'out_opencv' in locals() and out_opencv.isOpened(): out_opencv.release()
315
+ # Attempt cleanup of temp files on error
316
+ if os.path.exists(temp_video_path): os.remove(temp_video_path)
317
+ if os.path.exists(output_path_opencv): os.remove(output_path_opencv)
318
+ if os.path.exists(output_path_ffmpeg): os.remove(output_path_ffmpeg)
319
+ return None # Return None on error
320
+
321
+ QDRANT_COLLECTION_NAME = "video_frames"
322
+ VIDEO_SEGMENT_DURATION = 30 # Extract 30 seconds around the timestamp
323
+
324
+ def parse_llm_output(text):
325
+ """
326
+ Parses the LLM's structured output using a mix of regex for simple
327
+ fields (video_id, timestamp) and string manipulation for reasoning
328
+ as a workaround for regex matching issues.
329
+ """
330
+ # Optional: Print repr for debugging if needed
331
+ # print(f"\nDEBUG: Raw text input to parse_llm_output:\n{repr(text)}\n")
332
+ data = {}
333
+
334
+ # --- Parse video_id and timestamp with regex (as they worked) ---
335
+ simple_patterns = {
336
+ 'video_id': r"\{Best Result:\s*\[?([^\]\}]+)\]?\s*\}",
337
+ 'timestamp': r"\{Timestamp:\s*\[?([^\]\}]+)\]?\s*\}",
338
+ }
339
+ for key, pattern in simple_patterns.items():
340
+ match = re.search(pattern, text, re.IGNORECASE)
341
+ if match:
342
+ value = match.group(1).strip()
343
+ # Strip potential quotes (single, double, curly)
344
+ value = value.strip('\'"“”')
345
+ data[key] = value
346
+ else:
347
+ print(f"Warning: Could not parse '{key}' using regex pattern: {pattern}")
348
+ data[key] = None
349
+
350
+ # --- Parse reasoning using string manipulation ---
351
+ reasoning_value = None
352
+ try:
353
+ # Define markers, converting search key to lowercase for case-insensitive find
354
+ key_marker_lower = "{reasoning:"
355
+ # Find the start index based on the lowercase marker
356
+ start_index = text.lower().find(key_marker_lower)
357
+
358
+ if start_index != -1:
359
+ # Find the closing brace '}' starting the search *after* the marker
360
+ # Add length of the marker to ensure we find the correct closing brace
361
+ search_start_for_brace = start_index + len(key_marker_lower)
362
+ end_index = text.find('}', search_start_for_brace)
363
+
364
+ if end_index != -1:
365
+ # Extract content using original casing from text, between actual marker end and brace
366
+ # Calculate the actual end of the marker in the original string
367
+ actual_marker_end = start_index + len(key_marker_lower)
368
+ value = text[actual_marker_end : end_index]
369
+
370
+ # Perform cleanup on the extracted value
371
+ value = value.strip() # Strip outer whitespace first
372
+ if value.startswith('[') and value.endswith(']'):
373
+ value = value[1:-1] # Slice off brackets
374
+ value = value.strip('\'"“”') # Strip quotes
375
+ value = value.strip() # Strip whitespace again
376
+ reasoning_value = value
377
+ else:
378
+ print("Warning: Found '{reasoning:' marker but no closing '}' found afterwards.")
379
+ else:
380
+ print("Warning: Marker '{reasoning:' not found in text.")
381
+
382
+ except Exception as e:
383
+ # Catch potential errors during slicing or finding
384
+ print(f"Error during string manipulation parsing for reasoning: {e}")
385
+
386
+ data['reasoning'] = reasoning_value # Assign found value or None
387
+
388
+ # --- Validation ---
389
+ if data.get('timestamp'):
390
+ try:
391
+ float(data['timestamp'])
392
+ except ValueError:
393
+ print(f"Warning: Parsed timestamp '{data['timestamp']}' is not a valid number.")
394
+
395
+ print(f"Parsed LLM output (Using String Manipulation for Reasoning): {data}")
396
+ return data
397
+
398
+
399
+ def process_query_and_get_video(query_text):
400
+ """
401
+ Orchestrates RAG, LLM query, parsing, and video extraction.
402
+ """
403
+ print(f"\n--- Processing query: '{query_text}' ---")
404
+
405
+ # 1. RAG Query
406
+ print("Step 1: Performing RAG query...")
407
+ rag_results = rag_query(qdrant_client, QDRANT_COLLECTION_NAME, query_text)
408
+
409
+ if "error" in rag_results or not rag_results.get("results"):
410
+ error_msg = rag_results.get('error', 'No relevant segments found by RAG.')
411
+ print(f"RAG Error/No Results: {error_msg}")
412
+ return f"Error during RAG search: {error_msg}", None # Return error message and no video
413
+
414
+ print(f"RAG query successful. Found {len(rag_results['results'])} results.")
415
+ # print(f"Top RAG result: {rag_results['results'][0]}") # For debugging
416
+
417
+ # 2. Format LLM Prompt
418
+ print("Step 2: Formatting prompt for LLM...")
419
+ # Use the exact prompt structure from your example
420
+ prompt = f"""You are tasked with selecting the most relevant information from a set of video subtitle segments to answer a query.
421
+
422
+ QUERY (also seen below): "{query_text}"
423
+
424
+ For each result provided, evaluate how well it directly addresses the definition or explanation related to the query. Pay attention to:
425
+ 1. Clarity of explanation
426
+ 2. Relevance to the query
427
+ 3. Completeness of information
428
+
429
+ From the provided results, select the SINGLE BEST match that most directly answers the query.
430
+
431
+ Format your response STRICTLY as follows, with each field on a new line:
432
+ {{Best Result: [video_id]}}
433
+ {{Timestamp: [timestamp]}}
434
+ {{Content: [subtitle text]}}
435
+ {{Reasoning: [Brief explanation of why this result best answers the query]}}
436
+
437
+ {rag_results}""" # Pass the whole RAG results dictionary as string representation
438
+
439
+ # 3. Call LLM
440
+ print("Step 3: Querying the LLM...")
441
+ try:
442
+ output = llm.create_chat_completion(
443
+ messages=[
444
+ {"role": "system", "content": "You are a helpful assistant designed to select the best video segment based on relevance to a query, following a specific output format."},
445
+ {"role": "user", "content": prompt},
446
+ ],
447
+ temperature=0.1, # Lower temperature for more deterministic selection
448
+ max_tokens=250 # Adjust as needed, ensure enough space for reasoning
449
+ )
450
+ llm_response_text = output['choices'][0]['message']['content']
451
+ print(f"LLM Response:\n{llm_response_text}")
452
+ except Exception as e:
453
+ print(f"Error during LLM call: {e}")
454
+ return f"Error calling LLM: {e}", None
455
+
456
+ # 4. Parse LLM Response
457
+ print("Step 4: Parsing LLM response...")
458
+ parsed_data = parse_llm_output(llm_response_text)
459
+
460
+ video_id = parsed_data.get('video_id')
461
+ timestamp_str = parsed_data.get('timestamp')
462
+ reasoning = parsed_data.get('reasoning')
463
+
464
+ if not video_id or not timestamp_str:
465
+ print("Error: Could not parse required video_id or timestamp from LLM response.")
466
+ fallback_reasoning = reasoning if reasoning else "Could not determine the best segment."
467
+ # Include raw LLM response in the error message for debugging
468
+ error_msg = f"Failed to parse LLM response. LLM said:\n---\n{llm_response_text}\n---\nReasoning (if found): {fallback_reasoning}"
469
+ return error_msg, None
470
+
471
+ try:
472
+ timestamp = float(timestamp_str)
473
+ # Adjust timestamp slightly - start a bit earlier if possible
474
+ start_time = max(0.0, timestamp - (VIDEO_SEGMENT_DURATION / 4))
475
+ except ValueError:
476
+ print(f"Error: Could not convert parsed timestamp '{timestamp_str}' to float.")
477
+ error_msg = f"Invalid timestamp format from LLM ('{timestamp_str}'). LLM reasoning (if found): {reasoning}"
478
+ return error_msg, None
479
+
480
+ final_reasoning = reasoning if reasoning else "No reasoning provided by LLM."
481
+
482
+ # 5. Extract Video Segment
483
+ print(f"Step 5: Extracting video segment (ID: {video_id}, Start: {start_time:.2f}s, Duration: {VIDEO_SEGMENT_DURATION}s)...")
484
+ # Reset the dataset iterator for each new request IF POSSIBLE.
485
+ # NOTE: Resetting a Hugging Face streaming dataset is tricky.
486
+ # It might re-start from the beginning. For heavy use, downloading might be better.
487
+ # Or, implement caching of downloaded videos if the same ones are accessed often.
488
+ # For this example, we'll rely on the stream potentially starting over or finding the item.
489
+ global dataset # Make sure we use the global dataset object
490
+ # dataset = iter(load_dataset("aegean-ai/ai-lectures-spring-24", split="train", streaming=True)) # Attempt re-init (might be slow)
491
+
492
+ video_path = extract_video_segment(video_id, start_time, VIDEO_SEGMENT_DURATION, dataset)
493
+
494
+ if video_path and os.path.exists(video_path):
495
+ print(f"Video segment extracted successfully: {video_path}")
496
+ return final_reasoning, video_path
497
+ else:
498
+ print("Failed to extract video segment.")
499
+ error_msg = f"{final_reasoning}\n\n(However, failed to extract the corresponding video segment for ID {video_id} at timestamp {timestamp_str}.)"
500
+ return error_msg, None
501
+
502
+ with gr.Blocks() as iface:
503
+ gr.Markdown(
504
+ """
505
+ # AI Lecture Video Q&A
506
+ Ask a question about the AI lectures. The system will find relevant segments using RAG,
507
+ ask a fine-tuned LLM to select the best one, and display the corresponding video clip.
508
+ """
509
+ )
510
+ with gr.Row():
511
+ query_input = gr.Textbox(label="Your Question", placeholder="e.g., What is a convolutional neural network?")
512
+ submit_button = gr.Button("Ask & Find Video")
513
+ with gr.Row():
514
+ reasoning_output = gr.Markdown(label="LLM Reasoning")
515
+ with gr.Row():
516
+ video_output = gr.Video(label="Relevant Video Segment")
517
+
518
+ submit_button.click(
519
+ fn=process_query_and_get_video,
520
+ inputs=query_input,
521
+ outputs=[reasoning_output, video_output]
522
+ )
523
+
524
+ gr.Examples(
525
+ examples=[
526
+ "What are activation functions?",
527
+ "Explain backpropagation.",
528
+ "What is transfer learning?",
529
+ "Show me an example of data augmentation.",
530
+ "What is the difference between classification and regression?",
531
+ ],
532
+ inputs=query_input,
533
+ outputs=[reasoning_output, video_output], # Outputs needed for examples too
534
+ fn=process_query_and_get_video, # The function to run for examples
535
+ cache_examples=False, # Disable caching if streaming/LLM state changes
536
+ )
537
+
538
+ # --- Launch the Interface ---
539
+ # share=True creates a public link, requires internet. Set to False for local use.
540
+ # debug=True provides more detailed error outputs in the console.
541
+ print("Launching Gradio interface...")
542
+ iface.launch(debug=True, share=False) # Run locally in the notebook