Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,12 +34,7 @@ class ChatCompletionRequest(BaseModel):
|
|
34 |
messages: List[ChatMessage]
|
35 |
temperature: float = 0.7
|
36 |
stream: bool = False
|
37 |
-
frequency_penalty: float = 0.0
|
38 |
-
presence_penalty: float = 0.0
|
39 |
-
max_tokens: int = 512
|
40 |
seed: int = None
|
41 |
-
top_p: float = 1.0
|
42 |
-
tools: List[Any] = None
|
43 |
|
44 |
def count_tokens(text: str) -> int:
|
45 |
# Используем тот же алгоритм, что и в прямом API
|
@@ -122,24 +117,6 @@ async def get_models():
|
|
122 |
except requests.RequestException as e:
|
123 |
raise HTTPException(status_code=500, detail=str(e))
|
124 |
|
125 |
-
def extract_system_prompt(messages: List[ChatMessage]) -> str:
|
126 |
-
"""Извлекает system prompt из сообщений"""
|
127 |
-
for msg in messages:
|
128 |
-
if msg.role == "system":
|
129 |
-
return msg.content
|
130 |
-
return ""
|
131 |
-
|
132 |
-
def get_conversation_history(messages: List[ChatMessage]) -> List[Dict[str, str]]:
|
133 |
-
"""Преобразует сообщения в формат для Flowise"""
|
134 |
-
history = []
|
135 |
-
for msg in messages:
|
136 |
-
if msg.role in ["user", "assistant"]:
|
137 |
-
history.append({
|
138 |
-
"role": msg.role,
|
139 |
-
"content": msg.content
|
140 |
-
})
|
141 |
-
return history
|
142 |
-
|
143 |
@app.post("/v1/chat/completions")
|
144 |
async def create_chat_completion(request: ChatCompletionRequest):
|
145 |
try:
|
@@ -147,20 +124,30 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
147 |
last_message = request.messages[-1]
|
148 |
if last_message.role != "user":
|
149 |
raise HTTPException(status_code=400, detail="Last message must be from user")
|
|
|
|
|
|
|
|
|
150 |
|
151 |
-
#
|
152 |
-
|
153 |
-
"
|
154 |
-
|
155 |
-
|
|
|
156 |
"role": msg.role,
|
157 |
"content": msg.content
|
158 |
-
}
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
162 |
}
|
163 |
|
|
|
|
|
|
|
|
|
164 |
# Засекаем время начала запроса
|
165 |
start_time = time.time()
|
166 |
|
@@ -187,8 +174,11 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
187 |
prompt_tokens = count_tokens(last_message.content)
|
188 |
completion_tokens = count_tokens(assistant_response)
|
189 |
|
|
|
|
|
|
|
190 |
response = JSONResponse({
|
191 |
-
"id":
|
192 |
"object": "chat.completion",
|
193 |
"created": int(start_time),
|
194 |
"model": "phi4-r1",
|
|
|
34 |
messages: List[ChatMessage]
|
35 |
temperature: float = 0.7
|
36 |
stream: bool = False
|
|
|
|
|
|
|
37 |
seed: int = None
|
|
|
|
|
38 |
|
39 |
def count_tokens(text: str) -> int:
|
40 |
# Используем тот же алгоритм, что и в прямом API
|
|
|
117 |
except requests.RequestException as e:
|
118 |
raise HTTPException(status_code=500, detail=str(e))
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
@app.post("/v1/chat/completions")
|
121 |
async def create_chat_completion(request: ChatCompletionRequest):
|
122 |
try:
|
|
|
124 |
last_message = request.messages[-1]
|
125 |
if last_message.role != "user":
|
126 |
raise HTTPException(status_code=400, detail="Last message must be from user")
|
127 |
+
|
128 |
+
# Формируем историю диалога
|
129 |
+
history = []
|
130 |
+
system_prompt = ""
|
131 |
|
132 |
+
# Ищем system prompt и создаем историю диалога
|
133 |
+
for msg in request.messages[:-1]: # исключаем последнее сообщение
|
134 |
+
if msg.role == "system":
|
135 |
+
system_prompt = msg.content
|
136 |
+
elif msg.role in ["user", "assistant"]:
|
137 |
+
history.append({
|
138 |
"role": msg.role,
|
139 |
"content": msg.content
|
140 |
+
})
|
141 |
+
|
142 |
+
# Формируем запрос к Flowise
|
143 |
+
flowise_request = {
|
144 |
+
"question": last_message.content
|
145 |
}
|
146 |
|
147 |
+
# Добавляем историю, если она есть
|
148 |
+
if history:
|
149 |
+
flowise_request["history"] = history
|
150 |
+
|
151 |
# Засекаем время начала запроса
|
152 |
start_time = time.time()
|
153 |
|
|
|
174 |
prompt_tokens = count_tokens(last_message.content)
|
175 |
completion_tokens = count_tokens(assistant_response)
|
176 |
|
177 |
+
# Создаем ID сессии, используя seed из запроса или генерируем новый
|
178 |
+
session_id = f"chatcmpl-{request.seed or os.urandom(12).hex()}"
|
179 |
+
|
180 |
response = JSONResponse({
|
181 |
+
"id": session_id,
|
182 |
"object": "chat.completion",
|
183 |
"created": int(start_time),
|
184 |
"model": "phi4-r1",
|