Spaces:
Running
Running
File size: 1,171 Bytes
5492d7f 009e955 5492d7f 71b64d2 5492d7f 71b64d2 5492d7f 71b64d2 5492d7f 71b64d2 5492d7f 71b64d2 5492d7f 71b64d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
from transformers import pipeline
# Load the TinyLlama model for text generation on GPU
pipe = pipeline(
"text-generation",
model="TinyLlama/TinyLlama_v1.1",
device=0 # 0 for 'cuda:0', -1 for CPU
) # No .to("cuda") needed[4][6]
# Define the prediction function
def generate_text(prompt, max_length=128, temperature=1.0, top_p=0.95):
result = pipe(
prompt,
max_length=max_length,
temperature=temperature,
top_p=top_p,
num_return_sequences=1,
do_sample=True
)
return result[0]['generated_text']
# Create the Gradio interface
demo = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(lines=4, label="Input Prompt"),
gr.Slider(32, 512, value=128, step=8, label="Max Length"),
gr.Slider(0.1, 2.0, value=1.0, step=0.05, label="Temperature"),
gr.Slider(0.5, 1.0, value=0.95, step=0.01, label="Top-p (nucleus sampling)")
],
outputs=gr.Textbox(lines=8, label="Generated Text"),
title="TinyLlama Text Generation",
description="Enter a prompt and generate text using TinyLlama/TinyLlama_v1.1."
)
# Launch the app
demo.launch()
|