|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import os |
|
|
|
|
|
client = InferenceClient( |
|
"TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", |
|
) |
|
|
|
def generate_text(prompt, system_prompt="", max_new_tokens=512, temperature=0.7, top_p=0.95): |
|
|
|
if system_prompt: |
|
formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" |
|
else: |
|
formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" |
|
|
|
|
|
response = client.text_generation( |
|
formatted_prompt, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
stopping_words=["<|im_end|>"] |
|
) |
|
|
|
return response |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# OpenHermes-2.5-Mistral-7B API") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
system_prompt = gr.Textbox(label="System Prompt (optional)", lines=2) |
|
prompt = gr.Textbox(label="User Prompt", lines=4) |
|
|
|
with gr.Row(): |
|
max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max New Tokens") |
|
temp = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature") |
|
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p") |
|
|
|
submit_btn = gr.Button("Generate") |
|
|
|
with gr.Column(): |
|
output = gr.Textbox(label="Generated Output", lines=10) |
|
|
|
submit_btn.click( |
|
generate_text, |
|
inputs=[prompt, system_prompt, max_tokens, temp, top_p], |
|
outputs=output |
|
) |
|
|
|
|
|
demo.queue().launch(share=True) |