Spaces:
Sleeping
Sleeping
File size: 7,791 Bytes
cf7a28a cfda68d 8e69f13 cfda68d dc0278a aa16976 0e91dea 8e69f13 cfda68d 319adbb cfda68d dc0278a 8e69f13 6e5e6f5 dc0278a aa16976 610d6eb 893627d 610d6eb aa16976 8e69f13 bc948df 8e69f13 bc948df 8e69f13 bc948df 8e69f13 bc948df 8e69f13 cfda68d cf7a28a dc0278a 0e91dea dc0278a ce08ada 0e91dea dc0278a cf7a28a dc0278a 393b09b d73fd0f 05469cf d73fd0f 05469cf d73fd0f 05469cf fc53287 05469cf d73fd0f fc53287 dc0278a d73fd0f 05469cf d73fd0f aa16976 0e91dea dc0278a 0e91dea 6e5e6f5 dc0278a aa16976 dc0278a aa16976 8e69f13 fc53287 8e69f13 d73fd0f cf7a28a d73fd0f dc0278a 0e91dea dc0278a 0e91dea ce08ada dc0278a aa16976 ce08ada dc0278a 8e69f13 ce08ada 0e91dea cf7a28a dc0278a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
import os
from dotenv import load_dotenv
import requests
from typing import Dict, Any, List
from pydantic import BaseModel
import time
import json
import asyncio
load_dotenv()
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"]
)
# Получаем переменные окружения
FLOWISE_API_BASE_URL = os.getenv("FLOWISE_API_BASE_URL")
FLOWISE_CHATFLOW_ID = os.getenv("FLOWISE_CHATFLOW_ID")
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
temperature: float = 0.7
stream: bool = False
seed: int = None
def count_tokens(text: str) -> int:
# Используем тот же алгоритм, что и в прямом API
# Считаем слова и знаки препинания
words = text.split()
punctuation = sum(1 for c in text if c in ".,!?;:()[]{}")
return len(words) + punctuation
def clean_assistant_response(text: str) -> str:
# Удаляем лишние маркеры кода и форматирования
text = text.strip()
if text.endswith("```"):
text = text[:-3].strip()
return text
async def stream_response(response_text: str):
# Разбиваем текст на части для стриминга
words = response_text.split()
for i in range(0, len(words), 2):
chunk = " ".join(words[i:i+2]) + " "
chunk_data = {
'id': f'chatcmpl-{os.urandom(12).hex()}',
'object': 'chat.completion.chunk',
'created': int(time.time()),
'model': 'phi4-r1',
'choices': [{
'index': 0,
'delta': {'content': chunk},
'finish_reason': None
}]
}
yield f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n"
await asyncio.sleep(0.1) # Небольшая задержка между чанками
# Отправляем финальное сообщение
final_data = {
'id': f'chatcmpl-{os.urandom(12).hex()}',
'object': 'chat.completion.chunk',
'created': int(time.time()),
'model': 'phi4-r1',
'choices': [{
'index': 0,
'delta': {},
'finish_reason': 'stop'
}]
}
yield f"data: {json.dumps(final_data, ensure_ascii=False)}\n\n"
@app.get("/")
async def root():
response = JSONResponse({"status": "FastFlowWrapper is running"})
response.headers["Content-Type"] = "application/json; charset=utf-8"
return response
@app.get("/v1/models")
async def get_models():
try:
# Запрашиваем список чатфлоу из Flowise
response = requests.get(f"{FLOWISE_API_BASE_URL}/chatflows")
response.raise_for_status()
chatflows = response.json()
# Преобразуем в формат OpenAI API
models = []
for chatflow in chatflows:
models.append({
"id": chatflow.get("id"),
"object": "model",
"created": int(time.time()),
"owned_by": "flowise",
"permission": [],
"root": "flowise",
"parent": None,
"system_fingerprint": "phi4-r1"
})
response = JSONResponse({"object": "list", "data": models})
response.headers["Content-Type"] = "application/json; charset=utf-8"
return response
except requests.RequestException as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/v1/chat/completions")
async def create_chat_completion(request: ChatCompletionRequest):
try:
# Получаем последнее сообщение из диалога
last_message = request.messages[-1]
if last_message.role != "user":
raise HTTPException(status_code=400, detail="Last message must be from user")
# Объединяем system prompt с сообщением пользователя
system_prompt = ""
for msg in request.messages:
if msg.role == "system":
system_prompt = msg.content
break
user_message = last_message.content
if system_prompt:
combined_message = f"{system_prompt}\n\n{user_message}"
else:
combined_message = user_message
# Формируем историю диалога для Flowise
history = []
for i, msg in enumerate(request.messages[:-1]): # исключаем последнее сообщение
if msg.role == "user":
history.append({
"role": "userMessage",
"content": msg.content
})
elif msg.role == "assistant":
history.append({
"role": "apiMessage",
"content": msg.content
})
# Формируем запрос к Flowise
flowise_request = {
"question": combined_message
}
# Добавляем историю, если она есть
if history:
flowise_request["history"] = history
# Засекаем время начала запроса
start_time = time.time()
# Отправляем запрос к Flowise с таймаутом
response = requests.post(
f"{FLOWISE_API_BASE_URL}/prediction/{FLOWISE_CHATFLOW_ID}",
json=flowise_request,
timeout=10
)
response.raise_for_status()
# Получаем и очищаем ответ
flowise_response = response.json()
assistant_response = clean_assistant_response(flowise_response.get("text", ""))
# Если запрошен стриминг
if request.stream:
return StreamingResponse(
stream_response(assistant_response),
media_type="text/event-stream"
)
# Подсчитываем токены
prompt_tokens = count_tokens(combined_message)
completion_tokens = count_tokens(assistant_response)
# Создаем ID сессии, используя seed из запроса или генерируем новый
session_id = f"chatcmpl-{request.seed or os.urandom(12).hex()}"
response = JSONResponse({
"id": session_id,
"object": "chat.completion",
"created": int(start_time),
"model": "phi4-r1",
"choices": [
{
"index": 0,
"logprobs": None,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": assistant_response
}
}
],
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
},
"stats": {},
"system_fingerprint": "phi4-r1"
})
response.headers["Content-Type"] = "application/json; charset=utf-8"
return response
except requests.RequestException as e:
raise HTTPException(status_code=500, detail=str(e)) |