# -*- coding: utf-8 -*- conda install -c conda-forge ffmpeg -y import os import contextlib @contextlib.contextmanager def new_cd(x): d = os.getcwd() # This could raise an exception, but it's probably # best to let it propagate and let the caller # deal with it, since they requested x os.chdir(x) try: yield finally: # This could also raise an exception, but you *really* # aren't equipped to figure out what went wrong if the # old working directory can't be restored. os.chdir(d) from ipex_llm.transformers import AutoModelForCausalLM from transformers import LlamaTokenizer llm = AutoModelForCausalLM.from_pretrained("checkpoints\\Llama-2-7b-chat-hf",load_in_low_bit="sym_int4") llm.save_low_bit("checkpoints\\Llama-2-7b-chat-hf-INT4") tokenizer = LlamaTokenizer.from_pretrained("checkpoints\\Llama-2-7b-chat-hf\\") tokenizer.save_pretrained("checkpoints\\Llama-2-7b-chat-hf-INT4") from huggingface_hub import snapshot_download # Clip snapshot_download(repo_id='openai/clip-vit-base-patch32', local_dir="./checkpoints/clip-vit-base-patch32") # LLM snapshot_download(repo_id='meta-llama/Llama-2-7b-chat-hf', local_dir="./checkpoints/Llama-2-7b-chat-hf", token=hf_token) # Translation snapshot_download(repo_id='Helsinki-NLP/opus-mt-en-zh', local_dir="./checkpoints/Helsinki-NLP-opus-mt-en-zh") snapshot_download(repo_id='Helsinki-NLP/opus-mt-zh-en', local_dir="./checkpoints/Helsinki-NLP-opus-mt-zh-en") # Embeddings snapshot_download(repo_id='sentence-transformers/all-MiniLM-L12-v2', local_dir="./checkpoints/all-MiniLM-L12-v2") import argparse import gradio as gr import os from models.helperbot_bigdl import Chat from models.sum_model import Sum from models.whisper_model import AudioTranslator from models.llm_model import LlmReasoner import os from langchain.chains import ConversationalRetrievalChain, StuffDocumentsChain from langchain.prompts import PromptTemplate from ipex_llm.langchain.llms import TransformersLLM from langchain.vectorstores import FAISS from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter from ipex_llm.langchain.embeddings import TransformersEmbeddings from langchain import LLMChain from utils.utils import new_cd from ipex_llm.langchain.llms import TransformersLLM from langchain import LLMChain from langchain.chains.summarize import load_summarize_chain from langchain.docstore.document import Document from langchain.prompts import PromptTemplate from langchain.chains.combine_documents.stuff import StuffDocumentsChain from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter import whisper from ipex_llm import optimize_model def has_intersection(t1, t2): if t1[1] < t2[0] or t2[1] < t1[0]: return False else: return True class AudioTranslator(): def __init__(self, args): self.model = whisper.load_model(args.whisper_version, download_root='checkpoints') self.model = optimize_model(self.model) def __call__(self, video_path): """ input: video_path (str) output: audio_results (list) """ print("Extract the audio results.") audio_results = self.model.transcribe(video_path, task = 'translate')["segments"] print("Finished.") return audio_results def match(self, audio_results): transcript = '' for res in audio_results: transcript += res['text'] + ' ' # if has_intersection((start, end), (res["start"], res["end"])): # transcript += res['text'] + ' ' return transcript class Sum(): def __init__(self, args): self.llm_version = args.llm_version # self.max_tokens = args.qa_max_new_tokens def summarize_refine(self, script): text_splitter = CharacterTextSplitter(chunk_size=1024, separator="\n", chunk_overlap=0) texts = text_splitter.split_text(script) docs = [Document(page_content=t) for t in texts] llm = TransformersLLM.from_model_id_low_bit(f"checkpoint\\{self.llm_version}") prompt_template = """Write a concise summary of the following: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "(only if needed) with some more context below.\n" "------------\n" "{text}\n" "------------\n" "If the context isn't useful, return the original summary." ) refine_prompt = PromptTemplate.from_template(refine_template) chain = load_summarize_chain( llm=llm, chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, input_key="input_documents", output_key="output_text", ) result = chain({"input_documents": docs}, return_only_outputs=True) return result def summarize_mapreduce(self, script): text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0) texts = text_splitter.split_text(script) text = [Document(page_content=t) for t in texts] llm = TransformersLLM.from_model_id_low_bit(f"checkpoint\\{self.llm_version}") # Map map_template = """The following is a meeting recording ========= {texts} ========= Based on this list of recordings, please summary the main idea briefly Helpful Answer:""" map_prompt = PromptTemplate.from_template(map_template) map_chain = LLMChain(llm=llm, prompt=map_prompt, llm_kwargs={"max_new_tokens": 512}) # Reduce reduce_template = """The following is set of summaries: ========= {texts} ========= Take these and distill it into a final, consolidated summary of the meeting. Helpful Answer:""" reduce_prompt = PromptTemplate.from_template(reduce_template) reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt, llm_kwargs={"max_new_tokens": 4096}) # Takes a list of documents, combines them into a single string, and passes this to an LLMChain combine_documents_chain = StuffDocumentsChain( llm_chain=reduce_chain, document_variable_name="texts" ) # Combines and iteratively reduces the mapped documents reduce_documents_chain = ReduceDocumentsChain( combine_documents_chain=combine_documents_chain, collapse_documents_chain=combine_documents_chain, token_max=4000, ) # Combining documents by mapping a chain over them, then combining results map_reduce_chain = MapReduceDocumentsChain( llm_chain=map_chain, reduce_documents_chain=reduce_documents_chain, document_variable_name="texts", return_intermediate_steps=False, ) result = map_reduce_chain({"input_documents": text}, return_only_outputs=True) # print("-." * 40) # print(result) result = result['output_text'].split("Helpful Answer:").strip()[-1] return result def summarize(self, script): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0) texts = text_splitter.split_text(script) prompt_template = """The following is a piece of meeting recording: <<<{text}>>> Based on recording, summary the main idea fluently. JUST SUMMARY!NO OTHER WORDS! SUMMARY:""" reduce_template = """The following is a meeting recording pieces: <<<{text}>>> Take these and distill it into a final, consolidated summary of the meeting. JUST SUMMARY!NO OTHER WORDS! SUMMARY:""" print(len(texts)) for text in texts: print(text) print("\n") llm = TransformersLLM.from_model_id_low_bit( f"checkpoint\\{self.llm_version}") sum_split = [] for text in texts: response = llm(prompt=prompt_template.format(text=text), max_new_tokens=1024) print(response) response_answer = response.split("SUMMARY:") sum_split.append(response_answer[1]) sum_all = "\n".join(sum_split) result = llm(prompt=reduce_template.format(text=sum_all), max_new_tokens=4000) result_split = result.split("SUMMARY:") return result_split[1] # # for test # import argparse # # parser = argparse.ArgumentParser() # parser.add_argument("--llm_version", default="Llama-2-7b-chat-hf-INT4", help="LLM model version") # args = parser.parse_args() # file_path = "../test.txt" # with open(file_path, "r", encoding="utf-8") as file: # content = file.read() # Sumbot = Sum(args) # result = Sumbot.summarize_map(content) # print("-." * 20) # print(result) parent_dir = os.path.dirname(__file__) condense_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You can assume the discussion is about the video content. REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \ not sure." Don't try to make up an answer. \ Chat History: {chat_history} Follow Up Question: {question} Standalone question: """ qa_template = """ You are an AI assistant designed for answering questions about a meeting. You are given a word records of this meeting. Try to comprehend the dialogs and provide a answer based on it. ========= {context} ========= Question: {question} Answer: """ # CONDENSE_QUESTION_PROMPT 用于将聊天历史记录和下一个问题压缩为一个独立的问题 CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(condense_template) # QA_PROMPT为机器人设定基调和目的 QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["question", "context"]) # DOC_PROMPT = PromptTemplate.from_template("Video Clip {video_clip}: {page_content}") DOC_PROMPT = PromptTemplate.from_template("{page_content}") class LlmReasoner(): def __init__(self, args): self.history = [] self.llm_version = args.llm_version self.embed_version = args.embed_version self.qa_chain = None self.vectorstore = None self.top_k = args.top_k self.qa_max_new_tokens = args.qa_max_new_tokens self.init_model() def init_model(self): with new_cd(parent_dir): self.llm = TransformersLLM.from_model_id_low_bit( f"..\\checkpoints\\{self.llm_version}") self.llm.streaming = False self.embeddings = TransformersEmbeddings.from_model_id( model_id=f"..\\checkpoints\\{self.embed_version}") def create_qa_chain(self, args, input_log): self.top_k = args.top_k self.qa_max_new_tokens = args.qa_max_new_tokens self.question_generator = LLMChain(llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT) self.answer_generator = LLMChain(llm=self.llm, prompt=QA_PROMPT, llm_kwargs={"max_new_tokens": self.qa_max_new_tokens}) self.doc_chain = StuffDocumentsChain(llm_chain=self.answer_generator, document_prompt=DOC_PROMPT, document_variable_name='context') # 拆分查看字符的文本, 创建一个新的文本分割器 # self.text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0, keep_separator=True) self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=0) texts = self.text_splitter.split_text(input_log) self.vectorstore = FAISS.from_texts(texts, self.embeddings, metadatas=[{"video_clip": str(i)} for i in range(len(texts))]) retriever = self.vectorstore.as_retriever(search_kwargs={"k": self.top_k}) self.qa_chain = ConversationalRetrievalChain(retriever=retriever, question_generator=self.question_generator, combine_docs_chain=self.doc_chain, return_generated_question=True, return_source_documents=True, rephrase_question=False) def __call__(self, question): response = self.qa_chain({"question": question, "chat_history": self.history}) answer = response["answer"] generated_question = response["generated_question"] source_documents = response["source_documents"] self.history.append([question, answer]) return self.history, generated_question, source_documents def clean_history(self): self.history = [] class Chat: def __init__(self, args) -> None: self.args = args def init_model(self): print('\033[1;33m' + "Initializing models...".center(50, '-') + '\033[0m') self.audio_translator = AudioTranslator(self.args) self.llm_reasoner = LlmReasoner(self.args) print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m') def video2log(self, video_path): audio_results = self.audio_translator(video_path) en_log_result = [] en_log_result_tmp = "" audio_transcript = self.audio_translator.match(audio_results) en_log_result_tmp += f"\n{audio_transcript}" en_log_result.append(en_log_result_tmp) en_log_result = "\n\n".join(en_log_result) print(f"\033[1;34mLog: \033[0m\n{en_log_result}\n") return en_log_result def chat2video(self, args, user_input, en_log_result): self.llm_reasoner.create_qa_chain(args, en_log_result) en_user_input = user_input print("\n\033[1;32mGnerating response...\033[0m") answer, generated_question, source_documents = self.llm_reasoner(en_user_input) print(f"\033[1;32mQuestion: \033[0m{user_input}") print(f"\033[1;32mAnswer: \033[0m{answer[0][1]}") self.clean_history() return answer, generated_question, source_documents def clean_history(self): self.llm_reasoner.clean_history() return os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" parser = argparse.ArgumentParser() # whisper model arguments parser.add_argument("--whisper_version", default="small", help="Whisper model version for video asr") # llm model arguments parser.add_argument("--llm_version", default="Llama-2-7b-chat-hf-INT4", help="LLM model version") parser.add_argument("--embed_version", default="all-MiniLM-L12-v2", help="Embedding model version") parser.add_argument("--top_k", default=3, type=int, help="Return top k relevant contexts to llm") parser.add_argument("--qa_max_new_tokens", default=128, type=int, help="Number of max new tokens for llm") # general arguments parser.add_argument("--port", type=int, default=7860, help="Gradio server port") args = parser.parse_args() chat = Chat(args) sumbot = Sum(args) chat.init_model() global_chat_history = [] global_result = "" global_summary = "" def clean_conversation(): global global_chat_history chat.clean_history() global_chat_history = [] return '', gr.update(value=None, interactive=True), None, gr.update(value=None, visible=True), gr.update(value=None, visible=True) def clean_chat_history(): global global_chat_history chat.clean_history() global_chat_history = [] return '', None def submit_message(message, max_tokens, top_p): args.qa_max_new_tokens = max_tokens args.top_k = top_p print(args) chat_history, generated_question, source_documents = chat.chat2video(args, message, global_result) global_chat_history.append((message, chat_history[0][1])) return '', global_chat_history def gen_script(vid_path): print(vid_path) global global_result if vid_path is None: log_text = "===== Please upload video! =====" gr.update(value=log_text, visible=True) else: global_result = chat.video2log(vid_path) # script_pth = download_script_file() return gr.update(value=global_result, visible=True), download_script_file() def download_script_file(): try: with open("script_result.txt", "w") as file: file.write(global_result) return "script_result.txt" except Exception as e: return f"Error preparing file for download: {str(e)}" def download_sum_file(): try: with open("sum_result.txt", "w") as file: file.write(global_summary) return "sum_result.txt" except Exception as e: return f"Error preparing file for download: {str(e)}" def upload_file(files): global global_result file_paths = [file.name for file in files][0] try: with open(file_paths, "r", encoding="utf-8") as file: file_content = file.read() global_result = file_content except FileNotFoundError: print("File not found") except IOError: print("Error occurred while reading the file") return file_content, download_script_file() def summary(): global global_summary global_summary = sumbot.summarize(global_result) return gr.update(value=global_summary, visible=True), download_sum_file() css = """ #col-container {max-width: 80%; margin-left: auto; margin-right: auto;} #video_inp {min-height: 100px} #chatbox {min-height: 100px;} #header {text-align: center;} #hint {font-size: 1.0em; padding: 0.5em; margin: 0;} .message { font-size: 1.2em; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(""" ## Meeting Helper Bot Upload meeting recording in mp3/mp4/txt format and you can get the summary and chat based on content (You can adjust parameters based on your needs) Powered by BigDL, Llama, Whisper, and LangChain""", elem_id="header") with gr.Column() as advanced_column: max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=1024, step=1, value=128) top_k = gr.Slider(label="Top-k", minimum=1, maximum=50, step=1, value=3) with gr.Row(): with gr.Column(): video_inp = gr.Video(label="1.Upload MP3/MP4 File") # file_inp = gr.File(label="file/doc_input") upload_button = gr.UploadButton("1. Or Click to Upload a txt File", file_types=["doc", "txt"], file_count="multiple") gen_btn = gr.Button("2. Generate Script") sum_outp = gr.Textbox(label="Summerization output", lines=15) # save_sum_btn = gr.Button("Save Summarization to txt file") save_sum_dl = gr.outputs.File(label="Download Summary") # save_sum_btn.click(download_sum_file, [], outputs=[gr.outputs.File(label="Download Summary")]) with gr.Column(): script_outp = gr.Textbox(label="Script output", lines=30) with gr.Row(): script_summarization_btn = gr.Button("3.Script Summarization ") # save_script_btn = gr.Button("Save Script to txt file") save_script_dl = gr.outputs.File(label="Download Script") # save_script_btn.click(download_script_file, [], outputs=[gr.outputs.File(label="Download Script")]) with gr.Column(): chatbot = gr.Chatbot(elem_id="chatbox") input_message = gr.Textbox(show_label=False, placeholder="Enter text and press enter", visible=True) btn_submit = gr.Button("Submit") with gr.Row(): btn_clean_chat_history = gr.Button("Clean Chat History") btn_clean_conversation = gr.Button("Start New Conversation") upload_button.upload(upload_file, upload_button, [script_outp, save_script_dl]) gen_btn.click(gen_script, [video_inp], [script_outp, save_script_dl]) script_summarization_btn.click(summary, [], [sum_outp, save_sum_dl]) btn_submit.click(submit_message, [input_message, max_new_tokens, top_k], [input_message, chatbot]) input_message.submit(submit_message, [input_message, max_new_tokens, top_k], [input_message, chatbot]) btn_clean_conversation.click(clean_conversation, [], [input_message, video_inp, chatbot, sum_outp, script_outp]) btn_clean_chat_history.click(clean_chat_history, [], [input_message, chatbot]) demo.load(queur=False) demo.queue(concurrency_count=1) demo.launch(height='800px', server_port=args.port, debug=True, share=False)