import requests import json messages = [ {"role": "user", "content": "helo"}, {"role": "assistant", "content": "Hello! How can I assist you today?"}, {"role": "user", "content": "who are you and give me a breif description of who you are"} ] model = "meta-llama/llama-4-scout-17b-16e-instruct" url = "http://127.0.0.1:8000/v1/generate" payload = { "messages": messages, "model": model } response = requests.post(url, json=payload, stream=True) if response.status_code == 200: for line in response.iter_lines(): if line: print(line) decoded_line = line.decode('utf-8') if decoded_line.startswith('data: '): try: # Remove 'data: ' prefix and parse JSON json_data = json.loads(decoded_line[6:]) # Check if there are choices and text if json_data["choices"] and "text" in json_data["choices"][0]: print(json_data["choices"][0]["text"], end='') except json.JSONDecodeError: continue else: print(f"Request failed with status code {response.status_code}")