Spaces:
Runtime error
Runtime error
File size: 7,442 Bytes
c67caa3 23e4a4d afaaf98 c67caa3 0f2c752 c67caa3 23e4a4d d84665e c67caa3 23e4a4d c67caa3 380e40f c67caa3 23e4a4d d84665e 3928243 c67caa3 ccca515 e562990 c67caa3 380e40f c67caa3 380e40f c67caa3 e562990 c67caa3 23e4a4d c67caa3 23e4a4d c67caa3 23e4a4d c67caa3 e562990 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import os
import re
import logging
import gradio as gr
import openai
print(os.environ)
openai.api_base1 = os.environ.get("OPENAI_API_BASE")
openai.api_base2 = os.environ.get("OPENAI_API_BASE2")
openai.api_key = os.environ.get("OPENAI_API_KEY")
BASE_SYSTEM_MESSAGE = """"""
def make_prediction1(prompt, max_tokens=None, temperature=None, top_p=None, top_k=None, repetition_penalty=None):
completion = openai.Completion.create(api_base=openai.api_base1, model="wizardcoder-python-34b-v1.0.Q5_K_M.gguf", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True, stop=["</s>", "<|im_end|>"])
for chunk in completion:
yield chunk["choices"][0]["text"]
def make_prediction2(prompt, max_tokens=None, temperature=None, top_p=None, top_k=None, repetition_penalty=None):
completion = openai.Completion.create(api_base=openai.api_base2, model="wizardcoder-python-34b-v1.0.Q5_K_M.gguf", prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stream=True, stop=["</s>", "<|im_end|>"])
for chunk in completion:
yield chunk["choices"][0]["text"]
def clear_chat(chat_history_state, chat_message):
chat_history_state = []
chat_message = ''
return chat_history_state, chat_message
def user(message, history):
history = history or []
# Append the user's message to the conversation history
history.append([message, ""])
return "", history
def chat1(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
history = history or []
messages = BASE_SYSTEM_MESSAGE + system_message.strip() + "\n" + \
"\n".join(["\n".join(["###Instruction\n"+item[0]+"\n\n", "###Response\n"+item[1]+"\n\n"])
for item in history])
# strip the last `<|end_of_turn|>` from the messages
#messages = messages.rstrip("<|end_of_turn|>")
# remove last space from assistant, some models output a ZWSP if you leave a space
messages = messages.rstrip()
prediction = make_prediction1(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
)
for tokens in prediction:
tokens = re.findall(r'(.*?)(\s|$)', tokens)
for subtoken in tokens:
subtoken = "".join(subtoken)
# Remove "Response\n" if it's at the beginning of the assistant's output
if subtoken.startswith("Response"):
subtoken = subtoken[len("Response"):]
answer = subtoken
history[-1][1] += answer
# stream the response
yield history, history, ""
def chat2(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
history = history or []
messages = BASE_SYSTEM_MESSAGE + system_message.strip() + "\n" + \
"\n".join(["\n".join(["###Instruction\n"+item[0]+"\n\n", "###Response\n"+item[1]+"\n\n"])
for item in history])
# strip the last `<|end_of_turn|>` from the messages
#messages = messages.rstrip("<|end_of_turn|>")
# remove last space from assistant, some models output a ZWSP if you leave a space
messages = messages.rstrip()
prediction = make_prediction2(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
)
for tokens in prediction:
tokens = re.findall(r'(.*?)(\s|$)', tokens)
for subtoken in tokens:
subtoken = "".join(subtoken)
# Remove "Response\n" if it's at the beginning of the assistant's output
if subtoken.startswith("Response"):
subtoken = subtoken[len("Response"):]
answer = subtoken
history[-1][1] += answer
# stream the response
yield history, history, ""
start_message = ""
CSS ="""
.contain { display: flex; flex-direction: column; }
.gradio-container { height: 100vh !important; }
#component-0 { height: 100%; }
#chatbot { flex-grow: 1; overflow: auto; resize: vertical; }
#chatbot1 { flex-grow: 1; overflow: auto; resize: vertical; }
#chatbot2 { flex-grow: 1; overflow: auto; resize: vertical; }
"""
#with gr.Blocks() as demo:
with gr.Blocks(css=CSS) as demo:
with gr.Row():
with gr.Column():
gr.Markdown(f"""
## This demo is an unquantized GPU chatbot of [WizardCoder-Python-34B-V1.0-GGUF](https://huggingface.co./TheBloke/WizardCoder-Python-34B-V1.0-GGUF)
""")
with gr.Row():
gr.Markdown("# 🔍 WizardCoder-Python-34B-V1.0-GGUF Playground Space! 🔎")
with gr.Row():
with gr.Column():
#chatbot = gr.Chatbot().style(height=500)
chatbot1 = gr.Chatbot(label="Chatbot1", elem_id="chatbot1")
with gr.Column():
chatbot2 = gr.Chatbot(label="Chatbot2", elem_id="chatbot2")
with gr.Row():
message = gr.Textbox(
label="What do you want to chat about?",
placeholder="Ask me anything.",
lines=3,
)
with gr.Row():
submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
stop = gr.Button(value="Stop", variant="secondary").style(full_width=False)
with gr.Accordion("Show Model Parameters", open=False):
with gr.Row():
with gr.Column():
max_tokens = gr.Slider(20, 4000, label="Max Tokens", step=20, value=2000)
temperature = gr.Slider(0.0, 2.0, label="Temperature", step=0.1, value=0.8)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.02, value=0.95)
top_k = gr.Slider(-1, 100, label="Top K", step=1, value=40)
repetition_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.05, value=1.1)
system_msg = gr.Textbox(
start_message, label="System Message", interactive=True, visible=True, placeholder="System prompt. Provide instructions which you want the model to remember.", lines=5)
chat_history_state = gr.State()
clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message], queue=False)
clear.click(lambda: None, None, chatbot1, queue=False)
clear.click(lambda: None, None, chatbot2, queue=False)
submit_click_event1 = submit.click(
fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
).then(
fn=chat1, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot1, chat_history_state, message], queue=True
)
submit_click_event2 = submit.click(
fn=chat2, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot1, chat_history_state, message], queue=True
)
stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event1, submit_click_event2], queue=False)
demo.queue(max_size=48, concurrency_count=8).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|