WizardCoder-Python-34b-v1.0

Runtime error

App Files Files Community

bleysg commited on Sep 7, 2023

Commit

3928243

1 Parent(s): d84665e

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -27

app.py CHANGED Viewed

@@ -11,10 +11,10 @@ openai.api_key = os.environ.get("OPENAI_API_KEY")
 BASE_SYSTEM_MESSAGE = """"""
 def make_prediction(chat, prompt, max_tokens=None, temperature=None, top_p=None, top_k=None, repetition_penalty=None):
-    if chat="Chatbot1":
         openai.api_base = os.environ.get("OPENAI_API_BASE")
         completion = openai.Completion.create(model="wizardcoder-python-34b-v1.0.Q5_K_M.gguf", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True, stop=["</s>", "<|im_end|>"])
-    elif chat="Chatbot2":
         openai.api_base = os.environ.get("OPENAI_API_BASE2")
         completion = openai.Completion.create(model="wizardcoder-python-34b-v1.0.Q5_K_M.gguf", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True, stop=["</s>", "<|im_end|>"])
@@ -35,7 +35,7 @@ def user(message, history):
     return "", history
-def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
     history = history or []
     messages = BASE_SYSTEM_MESSAGE + system_message.strip() + "\n" + \
@@ -46,8 +46,8 @@ def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetit
     # remove last space from assistant, some models output a ZWSP if you leave a space
     messages = messages.rstrip()
-    prediction1 = make_prediction(
-        chat="Chatbot1",
         messages,
         max_tokens=max_tokens,
         temperature=temperature,
@@ -55,27 +55,7 @@ def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetit
         top_k=top_k,
         repetition_penalty=repetition_penalty,
     )
-    prediction2 = = make_prediction(
-        chat="Chatbot1",
-        messages,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        repetition_penalty=repetition_penalty,
-    )
-    for tokens in prediction1:
-        tokens = re.findall(r'(.*?)(\s|$)', tokens)
-        for subtoken in tokens:
-            subtoken = "".join(subtoken)
-            # Remove "Response\n" if it's at the beginning of the assistant's output
-            if subtoken.startswith("Response"):
-                subtoken = subtoken[len("Response"):]
-            answer = subtoken
-            history[-1][1] += answer
-            # stream the response
-            yield history, history, ""
-    for tokens in prediction2:
         tokens = re.findall(r'(.*?)(\s|$)', tokens)
         for subtoken in tokens:
             subtoken = "".join(subtoken)
@@ -143,7 +123,8 @@ with gr.Blocks(css=CSS) as demo:
     submit_click_event = submit.click(
         fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
     ).then(
-        fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot1, chat_history_state, message], queue=True
     )
     stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event], queue=False)

 BASE_SYSTEM_MESSAGE = """"""
 def make_prediction(chat, prompt, max_tokens=None, temperature=None, top_p=None, top_k=None, repetition_penalty=None):
+    if chat == "Chatbot1":
         openai.api_base = os.environ.get("OPENAI_API_BASE")
         completion = openai.Completion.create(model="wizardcoder-python-34b-v1.0.Q5_K_M.gguf", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True, stop=["</s>", "<|im_end|>"])
+    elif chat == "Chatbot2":
         openai.api_base = os.environ.get("OPENAI_API_BASE2")
         completion = openai.Completion.create(model="wizardcoder-python-34b-v1.0.Q5_K_M.gguf", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True, stop=["</s>", "<|im_end|>"])
     return "", history
+def chat(chatbot, history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
     history = history or []
     messages = BASE_SYSTEM_MESSAGE + system_message.strip() + "\n" + \
     # remove last space from assistant, some models output a ZWSP if you leave a space
     messages = messages.rstrip()
+    prediction = make_prediction(
+        chatbot,
         messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_k=top_k,
         repetition_penalty=repetition_penalty,
     )
+    for tokens in prediction:
         tokens = re.findall(r'(.*?)(\s|$)', tokens)
         for subtoken in tokens:
             subtoken = "".join(subtoken)
     submit_click_event = submit.click(
         fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
     ).then(
+        fn=chat, inputs=[chatbot="Chatbot1", chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot1, chat_history_state, message], queue=True
+        fn=chat, inputs=[chatbot="Chatbot2", chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot2, chat_history_state, message], queue=True
     )
     stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event], queue=False)