awacke1 commited on
Commit
8bb28e6
·
1 Parent(s): d900de3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -43
app.py CHANGED
@@ -152,50 +152,52 @@ def readitaloud(result):
152
  #return result
153
 
154
  def chat_with_model(prompt, document_section, model_choice='Llama-2-7b-chat-hf'):
155
- endpoint_url = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
156
- hf_token = os.getenv('HF_KEY')
157
- client = InferenceClient(endpoint_url, token=hf_token)
158
- gen_kwargs = dict(
159
- max_new_tokens=512,
160
- top_k=30,
161
- top_p=0.9,
162
- temperature=0.2,
163
- repetition_penalty=1.02,
164
- stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
165
- )
166
-
167
- stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
168
- report=[]
169
- res_box = st.empty()
170
- collected_chunks=[]
171
- collected_messages=[]
172
- allresults=''
173
-
174
- for r in stream:
175
- if r.token.special:
176
- continue
177
- if r.token.text in gen_kwargs["stop_sequences"]:
178
- break
179
- collected_chunks.append(r.token.text)
180
- chunk_message = r.token.text
181
- collected_messages.append(chunk_message)
182
- try:
183
- report.append(r.token.text)
184
- if len(r.token.text) > 0:
185
- result="".join(report).strip()
186
- res_box.markdown(f'*{result}*')
187
-
188
- except:
189
- st.write('.')
190
 
191
- full_reply_content = result
192
- st.write("Elapsed time:")
193
- st.write(time.time() - start_time)
194
-
195
- filename = generate_filename(full_reply_content, prompt)
196
- create_file(filename, prompt, full_reply_content, should_save)
197
- readitaloud(full_reply_content)
198
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  # Chat and Chat with files
201
  def chat_with_model2(prompt, document_section, model_choice='gpt-3.5-turbo'):
 
152
  #return result
153
 
154
  def chat_with_model(prompt, document_section, model_choice='Llama-2-7b-chat-hf'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ start_time = time.time()
157
+ endpoint_url = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
158
+ hf_token = os.getenv('HF_KEY')
159
+ client = InferenceClient(endpoint_url, token=hf_token)
160
+ gen_kwargs = dict(
161
+ max_new_tokens=512,
162
+ top_k=30,
163
+ top_p=0.9,
164
+ temperature=0.2,
165
+ repetition_penalty=1.02,
166
+ stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
167
+ )
168
+
169
+ stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
170
+ report=[]
171
+ res_box = st.empty()
172
+ collected_chunks=[]
173
+ collected_messages=[]
174
+ allresults=''
175
+
176
+ for r in stream:
177
+ if r.token.special:
178
+ continue
179
+ if r.token.text in gen_kwargs["stop_sequences"]:
180
+ break
181
+ collected_chunks.append(r.token.text)
182
+ chunk_message = r.token.text
183
+ collected_messages.append(chunk_message)
184
+ try:
185
+ report.append(r.token.text)
186
+ if len(r.token.text) > 0:
187
+ result="".join(report).strip()
188
+ res_box.markdown(f'*{result}*')
189
+
190
+ except:
191
+ st.write('.')
192
+
193
+ full_reply_content = result
194
+ st.write("Elapsed time:")
195
+ st.write(time.time() - start_time)
196
+
197
+ filename = generate_filename(full_reply_content, prompt)
198
+ create_file(filename, prompt, full_reply_content, should_save)
199
+ readitaloud(full_reply_content)
200
+ return result
201
 
202
  # Chat and Chat with files
203
  def chat_with_model2(prompt, document_section, model_choice='gpt-3.5-turbo'):