File size: 1,431 Bytes
43ce954 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import time
import gradio as gr
import requests
from client import respond
from huggingface_hub.errors import HfHubHTTPError
"""
API Huggingface some time return 503 error, so we need to retry multiple times
"""
def robust_respond(*args, **kwargs):
max_retries = 10
wait_time = 2
for attempt in range(max_retries):
try:
yield from respond(*args, **kwargs)
return
except HfHubHTTPError as e:
if "503" in str(e):
print(
f"Attempt {attempt+1}: Hugging Face API is down. Retrying in {wait_time}s..."
)
time.sleep(wait_time)
wait_time *= 2
else:
yield f"Error: {str(e)}"
return
yield "Server busy right now !"
chatbot = gr.Chatbot(height=600)
demo = gr.ChatInterface(
robust_respond,
additional_inputs=[
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
],
fill_height=True,
chatbot=chatbot,
theme="Nymbo/Nymbo_Theme",
)
|