import gradio as gr import torch # from transformers import pipeline import huggingface_hub import os hf_token = os.getenv('HF_TOKEN') huggingface_hub.login(token=hf_token) model_id = "meta-llama/Llama-3.2-3B-Instruct" messages = [ {"role": "user", "content": "Who are you?"}, ] # device = "cuda" if torch.cuda.is_available() else "cpu" # pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True, device=device) from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True) messages = [ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, {"role": "user", "content": "Who are you?"}, ] def model(params): outputs = pipe( messages, max_new_tokens=256, ) output = outputs[0]["generated_text"][-1] print(output) return output app = gr.Interface(fn=model, inputs="textbox", outputs="textbox") app.launch()