Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
from fastapi.responses import JSONResponse
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
import requests
|
@@ -8,6 +8,7 @@ from typing import Dict, Any, List
|
|
8 |
from pydantic import BaseModel
|
9 |
import time
|
10 |
import json
|
|
|
11 |
|
12 |
load_dotenv()
|
13 |
|
@@ -32,6 +33,7 @@ class ChatCompletionRequest(BaseModel):
|
|
32 |
model: str
|
33 |
messages: List[ChatMessage]
|
34 |
temperature: float = 0.7
|
|
|
35 |
|
36 |
def count_tokens(text: str) -> int:
|
37 |
# Используем тот же алгоритм, что и в прямом API
|
@@ -47,6 +49,37 @@ def clean_assistant_response(text: str) -> str:
|
|
47 |
text = text[:-3].strip()
|
48 |
return text
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
@app.get("/")
|
51 |
async def root():
|
52 |
response = JSONResponse({"status": "FastFlowWrapper is running"})
|
@@ -109,6 +142,17 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
109 |
flowise_response = response.json()
|
110 |
assistant_response = clean_assistant_response(flowise_response.get("text", ""))
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
response = JSONResponse({
|
113 |
"id": "chatcmpl-" + os.urandom(12).hex(),
|
114 |
"object": "chat.completion",
|
@@ -126,8 +170,9 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
126 |
}
|
127 |
],
|
128 |
"usage": {
|
129 |
-
"
|
130 |
-
"
|
|
|
131 |
},
|
132 |
"stats": {},
|
133 |
"system_fingerprint": "phi4-r1"
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
import requests
|
|
|
8 |
from pydantic import BaseModel
|
9 |
import time
|
10 |
import json
|
11 |
+
import asyncio
|
12 |
|
13 |
load_dotenv()
|
14 |
|
|
|
33 |
model: str
|
34 |
messages: List[ChatMessage]
|
35 |
temperature: float = 0.7
|
36 |
+
stream: bool = False
|
37 |
|
38 |
def count_tokens(text: str) -> int:
|
39 |
# Используем тот же алгоритм, что и в прямом API
|
|
|
49 |
text = text[:-3].strip()
|
50 |
return text
|
51 |
|
52 |
+
async def stream_response(response_text: str):
|
53 |
+
# Разбиваем текст на части для стриминга
|
54 |
+
words = response_text.split()
|
55 |
+
for i in range(0, len(words), 2):
|
56 |
+
chunk = " ".join(words[i:i+2]) + " "
|
57 |
+
yield f"data: {json.dumps({
|
58 |
+
'id': f'chatcmpl-{os.urandom(12).hex()}',
|
59 |
+
'object': 'chat.completion.chunk',
|
60 |
+
'created': int(time.time()),
|
61 |
+
'model': 'phi4-r1',
|
62 |
+
'choices': [{
|
63 |
+
'index': 0,
|
64 |
+
'delta': {'content': chunk},
|
65 |
+
'finish_reason': None
|
66 |
+
}]
|
67 |
+
}, ensure_ascii=False)}\n\n"
|
68 |
+
await asyncio.sleep(0.1) # Небольшая задержка между чанками
|
69 |
+
|
70 |
+
# Отправляем финальное сообщение
|
71 |
+
yield f"data: {json.dumps({
|
72 |
+
'id': f'chatcmpl-{os.urandom(12).hex()}',
|
73 |
+
'object': 'chat.completion.chunk',
|
74 |
+
'created': int(time.time()),
|
75 |
+
'model': 'phi4-r1',
|
76 |
+
'choices': [{
|
77 |
+
'index': 0,
|
78 |
+
'delta': {},
|
79 |
+
'finish_reason': 'stop'
|
80 |
+
}]
|
81 |
+
}, ensure_ascii=False)}\n\n"
|
82 |
+
|
83 |
@app.get("/")
|
84 |
async def root():
|
85 |
response = JSONResponse({"status": "FastFlowWrapper is running"})
|
|
|
142 |
flowise_response = response.json()
|
143 |
assistant_response = clean_assistant_response(flowise_response.get("text", ""))
|
144 |
|
145 |
+
# Если запрошен стриминг
|
146 |
+
if request.stream:
|
147 |
+
return StreamingResponse(
|
148 |
+
stream_response(assistant_response),
|
149 |
+
media_type="text/event-stream"
|
150 |
+
)
|
151 |
+
|
152 |
+
# Подсчитываем токены
|
153 |
+
prompt_tokens = count_tokens(last_message.content)
|
154 |
+
completion_tokens = count_tokens(assistant_response)
|
155 |
+
|
156 |
response = JSONResponse({
|
157 |
"id": "chatcmpl-" + os.urandom(12).hex(),
|
158 |
"object": "chat.completion",
|
|
|
170 |
}
|
171 |
],
|
172 |
"usage": {
|
173 |
+
"prompt_tokens": prompt_tokens,
|
174 |
+
"completion_tokens": completion_tokens,
|
175 |
+
"total_tokens": prompt_tokens + completion_tokens
|
176 |
},
|
177 |
"stats": {},
|
178 |
"system_fingerprint": "phi4-r1"
|