import gradio as gr from transformers import pipeline # Load the TinyLlama model for text generation on GPU pipe = pipeline( "text-generation", model="TinyLlama/TinyLlama_v1.1", device=0 # 0 for 'cuda:0', -1 for CPU ) # No .to("cuda") needed[4][6] # Define the prediction function def generate_text(prompt, max_length=128, temperature=1.0, top_p=0.95): result = pipe( prompt, max_length=max_length, temperature=temperature, top_p=top_p, num_return_sequences=1, do_sample=True ) return result[0]['generated_text'] # Create the Gradio interface demo = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(lines=4, label="Input Prompt"), gr.Slider(32, 512, value=128, step=8, label="Max Length"), gr.Slider(0.1, 2.0, value=1.0, step=0.05, label="Temperature"), gr.Slider(0.5, 1.0, value=0.95, step=0.01, label="Top-p (nucleus sampling)") ], outputs=gr.Textbox(lines=8, label="Generated Text"), title="TinyLlama Text Generation", description="Enter a prompt and generate text using TinyLlama/TinyLlama_v1.1." ) # Launch the app demo.launch()