Spaces:
Running
Running
import requests | |
import json | |
messages = [ | |
{"role": "user", "content": "helo"}, | |
{"role": "assistant", "content": "Hello! How can I assist you today?"}, | |
{"role": "user", "content": "who are you and give me a breif description of who you are"} | |
] | |
model = "meta-llama/llama-4-scout-17b-16e-instruct" | |
url = "http://127.0.0.1:8000/v1/generate" | |
payload = { | |
"messages": messages, | |
"model": model | |
} | |
response = requests.post(url, json=payload, stream=True) | |
if response.status_code == 200: | |
for line in response.iter_lines(): | |
if line: | |
print(line) | |
decoded_line = line.decode('utf-8') | |
if decoded_line.startswith('data: '): | |
try: | |
# Remove 'data: ' prefix and parse JSON | |
json_data = json.loads(decoded_line[6:]) | |
# Check if there are choices and text | |
if json_data["choices"] and "text" in json_data["choices"][0]: | |
print(json_data["choices"][0]["text"], end='') | |
except json.JSONDecodeError: | |
continue | |
else: | |
print(f"Request failed with status code {response.status_code}") | |