|
import spaces |
|
import gradio as gr |
|
import transformers |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
import torch |
|
|
|
bnb_config = BitsAndBytesConfig(load_in_8bit=True) |
|
|
|
model_id = "CohereForAI/c4ai-command-r-v01" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config) |
|
|
|
@spaces.GPU |
|
def generate_response(user_input, max_new_tokens, temperature): |
|
|
|
messages = [{"role": "user", "content": user_input}] |
|
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt") |
|
|
|
|
|
gen_tokens = model.generate( |
|
input_ids['input_ids'], |
|
max_length=max_new_tokens + input_ids['input_ids'].shape[1], |
|
do_sample=True, |
|
temperature=temperature, |
|
) |
|
|
|
|
|
gen_text = tokenizer.decode(gen_tokens[0]) |
|
return gen_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_response, |
|
inputs=[ |
|
gr.inputs.Textbox(lines=2, label="Your Message"), |
|
gr.inputs.Slider(minimum=10, maximum=100, default=50, label="Max New Tokens"), |
|
gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.3, label="Temperature") |
|
], |
|
outputs=gr.outputs.Textbox(label="Model Response"), |
|
title="Text Generation Model Interface", |
|
description="This is a Gradio interface for a text generation model. Enter your message and adjust the parameters to generate a response." |
|
) |
|
|
|
|
|
iface.launch() |