Spaces:
Running
on
Zero
Running
on
Zero
from src.data.chapters import Chapters, sec_to_hms | |
class Prompt: | |
def __init__( | |
self, | |
chapters: Chapters, | |
): | |
self.chapters = chapters | |
def __contains__(self, vid_id): | |
raise NotImplementedError( | |
"Subclasses must implement the '__contains__' method." | |
) | |
def get_duration_prompt(self, vid_id: str) -> str: | |
duration = self.chapters.get_duration(vid_id, hms=True) | |
return f"Given the complete transcript of a video of duration {duration}, " | |
def get_task_prompt(self) -> str: | |
raise NotImplementedError( | |
"Subclasses must implement the 'get_task_prompt' method." | |
) | |
def get_format_instruction(self): | |
return "Identify the approximate start time of each chapter in the format 'hh:mm:ss - Title'. " | |
def get_new_line_instruction(self): | |
return "Ensure each chapter entry is on a new line. " | |
def get_focus_instruction(self): | |
return "Focus on significant topic changes that would merit a new chapter in a video, " | |
def get_no_summaries_instruction(self): | |
return "but do not provide summaries of the chapters.\n" | |
def get_transcript_introduction(self): | |
return "Here is the transcript to analyze:\n" | |
def get_transcript(self, vid_id: str) -> str: | |
# By default, the transcript is the same for train and test | |
raise NotImplementedError( | |
"Subclasses must implement the 'get_transcript' method." | |
) | |
def get_transcript_train(self, vid_id: str) -> str: | |
return self.get_transcript(vid_id) | |
def get_transcript_test(self, vid_id: str) -> str: | |
return self.get_transcript(vid_id) | |
def get_base_prompt(self, vid_id: str) -> str: | |
prompt_parts = [ | |
self.get_duration_prompt(vid_id), | |
self.get_task_prompt(), | |
self.get_format_instruction(), | |
self.get_new_line_instruction(), | |
self.get_focus_instruction(), | |
self.get_no_summaries_instruction(), | |
self.get_transcript_introduction(), | |
] | |
return "".join(prompt_parts) | |
def get_prompt_train(self, vid_id: str) -> str: | |
return self.get_base_prompt(vid_id) | |
def get_prompt_test(self, vid_id: str) -> str: | |
return self.get_base_prompt(vid_id) | |
def get_output(self, vid_id: str) -> str: | |
vid_chapters = self.chapters.get_chapters(vid_id) | |
answers = [] | |
for chp_time, chp_title in vid_chapters.items(): | |
chp_time = sec_to_hms(chp_time) | |
answers.append(f"{chp_time} - {chp_title}") | |
return "\n".join(answers) | |
def get_dialog(self, vid_id: str) -> str: | |
prompt = self.get_prompt_train(vid_id) | |
transcript = self.get_transcript_train(vid_id) | |
output = self.get_output(vid_id) | |
dialog = [ | |
{ | |
"role": "user", | |
"content": prompt + transcript, | |
}, | |
{ | |
"role": "assistant", | |
"content": output, | |
}, | |
] | |
return dialog | |