nitrox commited on
Commit
8e69f13
·
verified ·
1 Parent(s): 610d6eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -3
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.responses import JSONResponse
4
  import os
5
  from dotenv import load_dotenv
6
  import requests
@@ -8,6 +8,7 @@ from typing import Dict, Any, List
8
  from pydantic import BaseModel
9
  import time
10
  import json
 
11
 
12
  load_dotenv()
13
 
@@ -32,6 +33,7 @@ class ChatCompletionRequest(BaseModel):
32
  model: str
33
  messages: List[ChatMessage]
34
  temperature: float = 0.7
 
35
 
36
  def count_tokens(text: str) -> int:
37
  # Используем тот же алгоритм, что и в прямом API
@@ -47,6 +49,37 @@ def clean_assistant_response(text: str) -> str:
47
  text = text[:-3].strip()
48
  return text
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  @app.get("/")
51
  async def root():
52
  response = JSONResponse({"status": "FastFlowWrapper is running"})
@@ -109,6 +142,17 @@ async def create_chat_completion(request: ChatCompletionRequest):
109
  flowise_response = response.json()
110
  assistant_response = clean_assistant_response(flowise_response.get("text", ""))
111
 
 
 
 
 
 
 
 
 
 
 
 
112
  response = JSONResponse({
113
  "id": "chatcmpl-" + os.urandom(12).hex(),
114
  "object": "chat.completion",
@@ -126,8 +170,9 @@ async def create_chat_completion(request: ChatCompletionRequest):
126
  }
127
  ],
128
  "usage": {
129
- "completion_tokens": 0, # Устанавливаем 0, так как это не важно
130
- "total_tokens": 0 # Устанавливаем 0, так как это не важно
 
131
  },
132
  "stats": {},
133
  "system_fingerprint": "phi4-r1"
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import JSONResponse, StreamingResponse
4
  import os
5
  from dotenv import load_dotenv
6
  import requests
 
8
  from pydantic import BaseModel
9
  import time
10
  import json
11
+ import asyncio
12
 
13
  load_dotenv()
14
 
 
33
  model: str
34
  messages: List[ChatMessage]
35
  temperature: float = 0.7
36
+ stream: bool = False
37
 
38
  def count_tokens(text: str) -> int:
39
  # Используем тот же алгоритм, что и в прямом API
 
49
  text = text[:-3].strip()
50
  return text
51
 
52
+ async def stream_response(response_text: str):
53
+ # Разбиваем текст на части для стриминга
54
+ words = response_text.split()
55
+ for i in range(0, len(words), 2):
56
+ chunk = " ".join(words[i:i+2]) + " "
57
+ yield f"data: {json.dumps({
58
+ 'id': f'chatcmpl-{os.urandom(12).hex()}',
59
+ 'object': 'chat.completion.chunk',
60
+ 'created': int(time.time()),
61
+ 'model': 'phi4-r1',
62
+ 'choices': [{
63
+ 'index': 0,
64
+ 'delta': {'content': chunk},
65
+ 'finish_reason': None
66
+ }]
67
+ }, ensure_ascii=False)}\n\n"
68
+ await asyncio.sleep(0.1) # Небольшая задержка между чанками
69
+
70
+ # Отправляем финальное сообщение
71
+ yield f"data: {json.dumps({
72
+ 'id': f'chatcmpl-{os.urandom(12).hex()}',
73
+ 'object': 'chat.completion.chunk',
74
+ 'created': int(time.time()),
75
+ 'model': 'phi4-r1',
76
+ 'choices': [{
77
+ 'index': 0,
78
+ 'delta': {},
79
+ 'finish_reason': 'stop'
80
+ }]
81
+ }, ensure_ascii=False)}\n\n"
82
+
83
  @app.get("/")
84
  async def root():
85
  response = JSONResponse({"status": "FastFlowWrapper is running"})
 
142
  flowise_response = response.json()
143
  assistant_response = clean_assistant_response(flowise_response.get("text", ""))
144
 
145
+ # Если запрошен стриминг
146
+ if request.stream:
147
+ return StreamingResponse(
148
+ stream_response(assistant_response),
149
+ media_type="text/event-stream"
150
+ )
151
+
152
+ # Подсчитываем токены
153
+ prompt_tokens = count_tokens(last_message.content)
154
+ completion_tokens = count_tokens(assistant_response)
155
+
156
  response = JSONResponse({
157
  "id": "chatcmpl-" + os.urandom(12).hex(),
158
  "object": "chat.completion",
 
170
  }
171
  ],
172
  "usage": {
173
+ "prompt_tokens": prompt_tokens,
174
+ "completion_tokens": completion_tokens,
175
+ "total_tokens": prompt_tokens + completion_tokens
176
  },
177
  "stats": {},
178
  "system_fingerprint": "phi4-r1"