Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
# Load the TinyLlama model for text generation on GPU | |
pipe = pipeline( | |
"text-generation", | |
model="TinyLlama/TinyLlama_v1.1", | |
device=0 # 0 for 'cuda:0', -1 for CPU | |
) # No .to("cuda") needed[4][6] | |
# Define the prediction function | |
def generate_text(prompt, max_length=128, temperature=1.0, top_p=0.95): | |
result = pipe( | |
prompt, | |
max_length=max_length, | |
temperature=temperature, | |
top_p=top_p, | |
num_return_sequences=1, | |
do_sample=True | |
) | |
return result[0]['generated_text'] | |
# Create the Gradio interface | |
demo = gr.Interface( | |
fn=generate_text, | |
inputs=[ | |
gr.Textbox(lines=4, label="Input Prompt"), | |
gr.Slider(32, 512, value=128, step=8, label="Max Length"), | |
gr.Slider(0.1, 2.0, value=1.0, step=0.05, label="Temperature"), | |
gr.Slider(0.5, 1.0, value=0.95, step=0.01, label="Top-p (nucleus sampling)") | |
], | |
outputs=gr.Textbox(lines=8, label="Generated Text"), | |
title="TinyLlama Text Generation", | |
description="Enter a prompt and generate text using TinyLlama/TinyLlama_v1.1." | |
) | |
# Launch the app | |
demo.launch() | |