joey1101 commited on
Commit
d46516b
·
verified ·
1 Parent(s): 6f0ca63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py CHANGED
@@ -18,6 +18,160 @@ from ipex_llm.langchain.embeddings import TransformersEmbeddings
18
  from langchain import LLMChain
19
  from utils.utils import new_cd
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  parent_dir = os.path.dirname(__file__)
22
 
23
  condense_template = """
 
18
  from langchain import LLMChain
19
  from utils.utils import new_cd
20
 
21
+ from ipex_llm.langchain.llms import TransformersLLM
22
+ from langchain import LLMChain
23
+ from langchain.chains.summarize import load_summarize_chain
24
+ from langchain.docstore.document import Document
25
+ from langchain.prompts import PromptTemplate
26
+ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
27
+ from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
28
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
29
+
30
+
31
+ class Sum():
32
+ def __init__(self, args):
33
+ self.llm_version = args.llm_version
34
+ # self.max_tokens = args.qa_max_new_tokens
35
+
36
+ def summarize_refine(self, script):
37
+ text_splitter = CharacterTextSplitter(chunk_size=1024, separator="\n", chunk_overlap=0)
38
+ texts = text_splitter.split_text(script)
39
+ docs = [Document(page_content=t) for t in texts]
40
+ llm = TransformersLLM.from_model_id_low_bit(f"checkpoint\\{self.llm_version}")
41
+
42
+ prompt_template = """Write a concise summary of the following:
43
+ {text}
44
+ CONCISE SUMMARY:"""
45
+ prompt = PromptTemplate.from_template(prompt_template)
46
+ refine_template = (
47
+ "Your job is to produce a final summary\n"
48
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
49
+ "We have the opportunity to refine the existing summary"
50
+ "(only if needed) with some more context below.\n"
51
+ "------------\n"
52
+ "{text}\n"
53
+ "------------\n"
54
+ "If the context isn't useful, return the original summary."
55
+ )
56
+ refine_prompt = PromptTemplate.from_template(refine_template)
57
+ chain = load_summarize_chain(
58
+ llm=llm,
59
+ chain_type="refine",
60
+ question_prompt=prompt,
61
+ refine_prompt=refine_prompt,
62
+ return_intermediate_steps=True,
63
+ input_key="input_documents",
64
+ output_key="output_text",
65
+ )
66
+ result = chain({"input_documents": docs}, return_only_outputs=True)
67
+
68
+ return result
69
+
70
+ def summarize_mapreduce(self, script):
71
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
72
+ texts = text_splitter.split_text(script)
73
+ text = [Document(page_content=t) for t in texts]
74
+
75
+ llm = TransformersLLM.from_model_id_low_bit(f"checkpoint\\{self.llm_version}")
76
+
77
+ # Map
78
+ map_template = """The following is a meeting recording
79
+ =========
80
+ {texts}
81
+ =========
82
+ Based on this list of recordings, please summary the main idea briefly
83
+ Helpful Answer:"""
84
+ map_prompt = PromptTemplate.from_template(map_template)
85
+ map_chain = LLMChain(llm=llm, prompt=map_prompt, llm_kwargs={"max_new_tokens": 512})
86
+
87
+ # Reduce
88
+ reduce_template = """The following is set of summaries:
89
+ =========
90
+ {texts}
91
+ =========
92
+ Take these and distill it into a final, consolidated summary of the meeting.
93
+ Helpful Answer:"""
94
+ reduce_prompt = PromptTemplate.from_template(reduce_template)
95
+ reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt, llm_kwargs={"max_new_tokens": 4096})
96
+
97
+ # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
98
+ combine_documents_chain = StuffDocumentsChain(
99
+ llm_chain=reduce_chain, document_variable_name="texts"
100
+ )
101
+
102
+ # Combines and iteratively reduces the mapped documents
103
+ reduce_documents_chain = ReduceDocumentsChain(
104
+ combine_documents_chain=combine_documents_chain,
105
+ collapse_documents_chain=combine_documents_chain,
106
+ token_max=4000,
107
+ )
108
+
109
+ # Combining documents by mapping a chain over them, then combining results
110
+ map_reduce_chain = MapReduceDocumentsChain(
111
+ llm_chain=map_chain,
112
+ reduce_documents_chain=reduce_documents_chain,
113
+ document_variable_name="texts",
114
+ return_intermediate_steps=False,
115
+ )
116
+
117
+ result = map_reduce_chain({"input_documents": text}, return_only_outputs=True)
118
+ # print("-." * 40)
119
+ # print(result)
120
+ result = result['output_text'].split("Helpful Answer:").strip()[-1]
121
+ return result
122
+
123
+ def summarize(self, script):
124
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
125
+ texts = text_splitter.split_text(script)
126
+
127
+ prompt_template = """The following is a piece of meeting recording:
128
+ <<<{text}>>>
129
+ Based on recording, summary the main idea fluently.
130
+ JUST SUMMARY!NO OTHER WORDS!
131
+ SUMMARY:"""
132
+
133
+ reduce_template = """The following is a meeting recording pieces:
134
+ <<<{text}>>>
135
+ Take these and distill it into a final, consolidated summary of the meeting.
136
+ JUST SUMMARY!NO OTHER WORDS!
137
+ SUMMARY:"""
138
+
139
+ print(len(texts))
140
+ for text in texts:
141
+ print(text)
142
+ print("\n")
143
+
144
+ llm = TransformersLLM.from_model_id_low_bit(
145
+ f"checkpoint\\{self.llm_version}")
146
+ sum_split = []
147
+
148
+ for text in texts:
149
+ response = llm(prompt=prompt_template.format(text=text), max_new_tokens=1024)
150
+ print(response)
151
+ response_answer = response.split("SUMMARY:")
152
+
153
+ sum_split.append(response_answer[1])
154
+
155
+ sum_all = "\n".join(sum_split)
156
+
157
+ result = llm(prompt=reduce_template.format(text=sum_all), max_new_tokens=4000)
158
+ result_split = result.split("SUMMARY:")
159
+ return result_split[1]
160
+
161
+ # # for test
162
+ # import argparse
163
+ #
164
+ # parser = argparse.ArgumentParser()
165
+ # parser.add_argument("--llm_version", default="Llama-2-7b-chat-hf-INT4", help="LLM model version")
166
+ # args = parser.parse_args()
167
+ # file_path = "../test.txt"
168
+ # with open(file_path, "r", encoding="utf-8") as file:
169
+ # content = file.read()
170
+ # Sumbot = Sum(args)
171
+ # result = Sumbot.summarize_map(content)
172
+ # print("-." * 20)
173
+ # print(result)
174
+
175
  parent_dir = os.path.dirname(__file__)
176
 
177
  condense_template = """