File size: 1,965 Bytes
afcda5c ab1bdf3 afcda5c ab1bdf3 afcda5c 03cc6aa 8cfb246 afcda5c 3a1c398 8cfb246 afcda5c 372b193 afcda5c 8cfb246 ab1bdf3 6d6790a ab1bdf3 afcda5c 8cfb246 ab1bdf3 03cc6aa 372b193 afcda5c ab1bdf3 afcda5c ab1bdf3 8cfb246 03cc6aa 3a1c398 ab1bdf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import gradio as gr
from openai import OpenAI
title = "ERNIE 4.5 Turbo: BAIDU's LLM"
description = """
- Official Website: <https://yiyan.baidu.com/> (UI in Chinese)
- API services: [Qianfan Large Model Platform](https://cloud.baidu.com/product-s/qianfan_home) (cloud platform providing LLM services, UI in Chinese)
- [ERNIE 4.5 Turbo Demo](https://huggingface.co./spaces/PaddlePaddle/ernie_4.5_turbo_demo) | [ERNIE X1 Turbo Demo](https://huggingface.co./spaces/PaddlePaddle/ernie_x1_turbo_demo)
"""
qianfan_api_key = os.getenv("QIANFAN_TOKEN")
qianfan_model = "ernie-4.5-turbo-32k"
client = OpenAI(base_url="https://qianfan.baidubce.com/v2", api_key=qianfan_api_key)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat.completions.create(
model=qianfan_model,
messages=messages,
max_completion_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
)
output_message = ""
for chunk in response:
token = chunk.choices[0].delta.content
output_message += token
yield output_message
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=2, maximum=12288, value=2048, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
title=title,
description=description,
type='messages',
concurrency_limit=50
)
if __name__ == "__main__":
demo.launch()
|