File size: 1,171 Bytes
5492d7f
 
 
009e955
 
 
 
 
 
5492d7f
71b64d2
 
 
 
5492d7f
 
71b64d2
5492d7f
71b64d2
5492d7f
71b64d2
5492d7f
71b64d2
 
 
 
 
 
 
 
 
 
 
 
 
5492d7f
71b64d2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
from transformers import pipeline

# Load the TinyLlama model for text generation on GPU
pipe = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama_v1.1",
    device=0  # 0 for 'cuda:0', -1 for CPU
)  # No .to("cuda") needed[4][6]

# Define the prediction function
def generate_text(prompt, max_length=128, temperature=1.0, top_p=0.95):
    result = pipe(
        prompt,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        num_return_sequences=1,
        do_sample=True
    )
    return result[0]['generated_text']

# Create the Gradio interface
demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=4, label="Input Prompt"),
        gr.Slider(32, 512, value=128, step=8, label="Max Length"),
        gr.Slider(0.1, 2.0, value=1.0, step=0.05, label="Temperature"),
        gr.Slider(0.5, 1.0, value=0.95, step=0.01, label="Top-p (nucleus sampling)")
    ],
    outputs=gr.Textbox(lines=8, label="Generated Text"),
    title="TinyLlama Text Generation",
    description="Enter a prompt and generate text using TinyLlama/TinyLlama_v1.1."
)

# Launch the app
demo.launch()