import torch from transformers import AutoTokenizer, AutoModelForCausalLM model_path = "./Path-to-llm-folder" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) def generate_text(prompt, max_length=2000): inputs = tokenizer(prompt, return_tensors="pt").to(device) output = model.generate( **inputs, do_sample=True, temperature=0.7 ) return tokenizer.decode(output[0], skip_special_tokens=True) prompt = "Write a code in react for calling api to server at https://example.com/test" generated_text = generate_text(prompt) print(generated_text)