Maouu commited on
Commit
7a02485
·
1 Parent(s): 1c58916

revert app.py

Browse files
Files changed (3) hide show
  1. app.py +7 -92
  2. app2.py +524 -0
  3. test.py +4 -2
app.py CHANGED
@@ -163,7 +163,7 @@ async def groqgenerate(json_data: Dict[str, Any]):
163
  # Create streaming response
164
  stream = client.chat.completions.create(
165
  messages=messages,
166
- model=json_data.get("model", "meta-llama/llama-4-scout-17b-16e-instruct"),
167
  temperature=json_data.get("temperature", 0.7),
168
  max_completion_tokens=json_data.get("max_tokens", 1024),
169
  top_p=json_data.get("top_p", 1),
@@ -181,7 +181,7 @@ async def groqgenerate(json_data: Dict[str, Any]):
181
  "id": chunk_id,
182
  "object": "chat.completion.chunk",
183
  "created": created,
184
- "model": json_data.get("model", "meta-llama/llama-4-scout-17b-16e-instruct"),
185
  "choices": [{
186
  "index": 0,
187
  "text": content,
@@ -197,7 +197,7 @@ async def groqgenerate(json_data: Dict[str, Any]):
197
  "id": chunk_id,
198
  "object": "chat.completion.chunk",
199
  "created": created,
200
- "model": json_data.get("model", "meta-llama/llama-4-scout-17b-16e-instruct"),
201
  "choices": [],
202
  "usage": {
203
  "prompt_tokens": len(messages),
@@ -230,11 +230,9 @@ async def vercelXaigenerate(json_data: Dict[str, Any]):
230
  request_data = {
231
  "id": "".join(random.choices("0123456789abcdef", k=16)),
232
  "messages": messages,
233
- "selectedModel": json_data.get("model", "grok-2-1212"),
234
  }
235
 
236
- print(request_data)
237
-
238
  chunk_id = "xai-" + "".join(random.choices("0123456789abcdef", k=32))
239
  created = int(asyncio.get_event_loop().time())
240
  total_tokens = 0
@@ -316,7 +314,7 @@ async def vercelGroqgenerate(json_data: Dict[str, Any]):
316
  request_data = {
317
  "id": "".join(random.choices("0123456789abcdef", k=16)),
318
  "messages": messages,
319
- "selectedModel": json_data.get("model", "deepseek-r1-distill-llama-70b"),
320
  }
321
 
322
  chunk_id = "vercel-groq-" + "".join(random.choices("0123456789abcdef", k=32))
@@ -346,7 +344,7 @@ async def vercelGroqgenerate(json_data: Dict[str, Any]):
346
  "id": chunk_id,
347
  "object": "chat.completion.chunk",
348
  "created": created,
349
- "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
350
  "choices": [{
351
  "index": 0,
352
  "text": text,
@@ -362,7 +360,7 @@ async def vercelGroqgenerate(json_data: Dict[str, Any]):
362
  "id": chunk_id,
363
  "object": "chat.completion.chunk",
364
  "created": created,
365
- "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
366
  "choices": [],
367
  "usage": {
368
  "prompt_tokens": len(messages),
@@ -439,86 +437,3 @@ async def scrape_md(request: Request):
439
  data = scrape_to_markdown(url)
440
 
441
  return {"markdown": data}
442
-
443
- @app.post("/v1/generate")
444
- async def api_generate(request: Request):
445
- data = await request.json()
446
- messages = data["messages"]
447
- model = data["model"]
448
- if not messages:
449
- return {"error": "messages is required"}
450
- elif not model:
451
- return {"error": "Model is required"}
452
-
453
- try:
454
- json_data = {
455
- 'model': model,
456
- 'max_tokens': None,
457
- 'temperature': 0.7,
458
- 'top_p': 0.7,
459
- 'top_k': 50,
460
- 'repetition_penalty': 1,
461
- 'stream_tokens': True,
462
- 'stop': ['<|eot_id|>', '<|eom_id|>'],
463
- 'messages': messages,
464
- 'stream': True,
465
- }
466
-
467
- xaimodels = ["grok-3-mini", "grok-2-1212", "grok-3", "grok-3-fast", "grok-3-mini-fast"]
468
-
469
- if model in xaimodels:
470
- return StreamingResponse(vercelXaigenerate(json_data), media_type='text/event-stream')
471
- else:
472
- try:
473
- return StreamingResponse(vercelGroqgenerate(json_data), media_type='text/event-stream')
474
- except Exception as e:
475
- try:
476
- return StreamingResponse(generate(json_data), media_type='text/event-stream')
477
- except Exception as e:
478
- return StreamingResponse(groqgenerate(json_data), media_type='text/event-stream')
479
- except Exception as e:
480
- return {"error": str(e)}
481
-
482
- @app.post("/v1/generate-images")
483
- async def generate_images(request: Request):
484
- data = await request.json()
485
- prompt = data.get("prompt")
486
- provider = data.get("provider")
487
- modelId = data.get("modelId")
488
-
489
- if not prompt:
490
- return {"error": "Prompt is required"}
491
- if not provider:
492
- return {"error": "Provider is required"}
493
- if not modelId:
494
- return {"error": "Model ID is required"}
495
-
496
- headers = {
497
- 'accept': '*/*',
498
- 'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
499
- 'content-type': 'application/json',
500
- 'origin': 'https://fal-image-generator.vercel.app',
501
- 'priority': 'u=1, i',
502
- 'referer': 'https://fal-image-generator.vercel.app/',
503
- 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
504
- 'sec-ch-ua-mobile': '?0',
505
- 'sec-ch-ua-platform': '"macOS"',
506
- 'sec-fetch-dest': 'empty',
507
- 'sec-fetch-mode': 'cors',
508
- 'sec-fetch-site': 'same-origin',
509
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
510
- }
511
-
512
- json_data = {
513
- 'prompt': prompt,
514
- 'provider': 'fal',
515
- 'modelId': 'fal-ai/fast-sdxl',
516
- }
517
-
518
- async with httpx.AsyncClient() as client:
519
- response = await client.post(
520
- 'https://fal-image-generator.vercel.app/api/generate-images',
521
- headers=headers,
522
- json=json_data
523
- )
524
- return response.json()
 
163
  # Create streaming response
164
  stream = client.chat.completions.create(
165
  messages=messages,
166
+ model="meta-llama/llama-4-scout-17b-16e-instruct",
167
  temperature=json_data.get("temperature", 0.7),
168
  max_completion_tokens=json_data.get("max_tokens", 1024),
169
  top_p=json_data.get("top_p", 1),
 
181
  "id": chunk_id,
182
  "object": "chat.completion.chunk",
183
  "created": created,
184
+ "model": json_data.get("model", "llama-3.3-70b-versatile"),
185
  "choices": [{
186
  "index": 0,
187
  "text": content,
 
197
  "id": chunk_id,
198
  "object": "chat.completion.chunk",
199
  "created": created,
200
+ "model": json_data.get("model", "llama-3.3-70b-versatile"),
201
  "choices": [],
202
  "usage": {
203
  "prompt_tokens": len(messages),
 
230
  request_data = {
231
  "id": "".join(random.choices("0123456789abcdef", k=16)),
232
  "messages": messages,
233
+ "selectedModel": "grok-2-1212"
234
  }
235
 
 
 
236
  chunk_id = "xai-" + "".join(random.choices("0123456789abcdef", k=32))
237
  created = int(asyncio.get_event_loop().time())
238
  total_tokens = 0
 
314
  request_data = {
315
  "id": "".join(random.choices("0123456789abcdef", k=16)),
316
  "messages": messages,
317
+ "selectedModel": "deepseek-r1-distill-llama-70b"
318
  }
319
 
320
  chunk_id = "vercel-groq-" + "".join(random.choices("0123456789abcdef", k=32))
 
344
  "id": chunk_id,
345
  "object": "chat.completion.chunk",
346
  "created": created,
347
+ "model": json_data.get("model", "grok-2-1212"),
348
  "choices": [{
349
  "index": 0,
350
  "text": text,
 
360
  "id": chunk_id,
361
  "object": "chat.completion.chunk",
362
  "created": created,
363
+ "model": json_data.get("model", "llama-8b"),
364
  "choices": [],
365
  "usage": {
366
  "prompt_tokens": len(messages),
 
437
  data = scrape_to_markdown(url)
438
 
439
  return {"markdown": data}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app2.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from typing import List, Dict, Any, Optional
5
+ from pydantic import BaseModel
6
+ import asyncio
7
+ import httpx
8
+ import random
9
+ from config import cookies, headers, groqapi
10
+ from prompts import ChiplingPrompts
11
+ from groq import Groq
12
+ import json
13
+ from fastapi.responses import HTMLResponse
14
+ from fastapi.templating import Jinja2Templates
15
+ from pathlib import Path
16
+ from collections import Counter, defaultdict
17
+ from utils.logger import log_request
18
+ from chipsearch.main import search
19
+ from scrape.main import scrape_to_markdown
20
+
21
+ app = FastAPI()
22
+
23
+ # Add CORS middleware
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["http://localhost:8080", "https://www.chipling.xyz"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+ templates = Jinja2Templates(directory="templates")
33
+ LOG_FILE = Path("logs.json")
34
+
35
+ @app.get("/dashboard", response_class=HTMLResponse)
36
+ async def dashboard(request: Request, endpoint: str = None):
37
+ try:
38
+ with open("logs.json") as f:
39
+ logs = json.load(f)
40
+ except FileNotFoundError:
41
+ logs = []
42
+
43
+ # Filter logs
44
+ if endpoint:
45
+ logs = [log for log in logs if log["endpoint"] == endpoint]
46
+
47
+ # Summary stats
48
+ total_requests = len(logs)
49
+ endpoint_counts = Counter(log["endpoint"] for log in logs)
50
+ query_counts = Counter(log["query"] for log in logs)
51
+
52
+ # Requests per date
53
+ date_counts = defaultdict(int)
54
+ for log in logs:
55
+ date = log["timestamp"].split("T")[0]
56
+ date_counts[date] += 1
57
+
58
+ # Sort logs by timestamp (desc)
59
+ logs_sorted = sorted(logs, key=lambda x: x["timestamp"], reverse=True)
60
+
61
+ return templates.TemplateResponse("dashboard.html", {
62
+ "request": request,
63
+ "logs": logs_sorted[:100], # show top 100
64
+ "total_requests": total_requests,
65
+ "endpoint_counts": dict(endpoint_counts),
66
+ "query_counts": query_counts.most_common(5),
67
+ "date_counts": dict(date_counts),
68
+ "filter_endpoint": endpoint or "",
69
+ })
70
+
71
+ # Define request model
72
+ class ChatRequest(BaseModel):
73
+ message: str
74
+ messages: List[Dict[Any, Any]]
75
+ model: Optional[str] = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
76
+
77
+ client = Groq(api_key=groqapi)
78
+
79
+ async def generate(json_data: Dict[str, Any]):
80
+ max_retries = 5
81
+ for attempt in range(max_retries):
82
+ async with httpx.AsyncClient(timeout=None) as client:
83
+ try:
84
+ request_ctx = client.stream(
85
+ "POST",
86
+ "https://api.together.ai/inference",
87
+ cookies=cookies,
88
+ headers=headers,
89
+ json=json_data
90
+ )
91
+
92
+ async with request_ctx as response:
93
+ if response.status_code == 200:
94
+ async for line in response.aiter_lines():
95
+ if line:
96
+ yield f"{line}\n"
97
+ return
98
+ elif response.status_code == 429:
99
+ if attempt < max_retries - 1:
100
+ await asyncio.sleep(0.5)
101
+ continue
102
+ yield "data: [Rate limited, max retries]\n\n"
103
+ return
104
+ else:
105
+ yield f"data: [Unexpected status code: {response.status_code}]\n\n"
106
+ return
107
+ except Exception as e:
108
+ yield f"data: [Connection error: {str(e)}]\n\n"
109
+ return
110
+
111
+ yield "data: [Max retries reached]\n\n"
112
+
113
+ def convert_to_groq_schema(messages: List[Dict[str, Any]]) -> List[Dict[str, str]]:
114
+ converted = []
115
+ for message in messages:
116
+ role = message.get("role", "user")
117
+ content = message.get("content")
118
+
119
+ if isinstance(content, list):
120
+ flattened = []
121
+ for item in content:
122
+ if isinstance(item, dict) and item.get("type") == "text":
123
+ flattened.append(item.get("text", ""))
124
+ content = "\n".join(flattened)
125
+ elif not isinstance(content, str):
126
+ content = str(content)
127
+
128
+ converted.append({"role": role, "content": content})
129
+ return converted
130
+
131
+
132
+ def conver_to_xai_schema(messages: List[Dict[str, Any]]) -> List[Dict[str, str]]:
133
+ converted = []
134
+ for message in messages:
135
+ role = message.get("role", "user")
136
+ content = message.get("content", "")
137
+
138
+ if isinstance(content, list):
139
+ # Handle content that's already in parts format
140
+ parts = content
141
+ text_content = "\n".join([p.get("text", "") for p in content if p.get("type") == "text"])
142
+ else:
143
+ # Create parts format for text content
144
+ text_content = str(content)
145
+ parts = [{"type": "text", "text": text_content}]
146
+ if role == "assistant":
147
+ parts.insert(0, {"type": "step-start"})
148
+
149
+ converted.append({
150
+ "role": role,
151
+ "content": text_content,
152
+ "parts": parts
153
+ })
154
+ return converted
155
+
156
+
157
+ async def groqgenerate(json_data: Dict[str, Any]):
158
+ try:
159
+ messages = convert_to_groq_schema(json_data["messages"])
160
+ chunk_id = "groq-" + "".join(random.choices("0123456789abcdef", k=32))
161
+ created = int(asyncio.get_event_loop().time())
162
+
163
+ # Create streaming response
164
+ stream = client.chat.completions.create(
165
+ messages=messages,
166
+ model=json_data.get("model", "meta-llama/llama-4-scout-17b-16e-instruct"),
167
+ temperature=json_data.get("temperature", 0.7),
168
+ max_completion_tokens=json_data.get("max_tokens", 1024),
169
+ top_p=json_data.get("top_p", 1),
170
+ stop=json_data.get("stop", None),
171
+ stream=True,
172
+ )
173
+
174
+ total_tokens = 0
175
+
176
+ # Use normal for-loop since stream is not async
177
+ for chunk in stream:
178
+ content = chunk.choices[0].delta.content
179
+ if content:
180
+ response = {
181
+ "id": chunk_id,
182
+ "object": "chat.completion.chunk",
183
+ "created": created,
184
+ "model": json_data.get("model", "meta-llama/llama-4-scout-17b-16e-instruct"),
185
+ "choices": [{
186
+ "index": 0,
187
+ "text": content,
188
+ "logprobs": None,
189
+ "finish_reason": None
190
+ }],
191
+ "usage": None
192
+ }
193
+ yield f"data: {json.dumps(response)}\n\n"
194
+ total_tokens += 1
195
+
196
+ final = {
197
+ "id": chunk_id,
198
+ "object": "chat.completion.chunk",
199
+ "created": created,
200
+ "model": json_data.get("model", "meta-llama/llama-4-scout-17b-16e-instruct"),
201
+ "choices": [],
202
+ "usage": {
203
+ "prompt_tokens": len(messages),
204
+ "completion_tokens": total_tokens,
205
+ "total_tokens": len(messages) + total_tokens,
206
+ }
207
+ }
208
+ yield f"data: {json.dumps(final)}\n\n"
209
+ yield "data: [DONE]\n\n"
210
+
211
+ except Exception as e:
212
+ generate(json_data)
213
+
214
+
215
+ async def vercelXaigenerate(json_data: Dict[str, Any]):
216
+ headers = {
217
+ 'accept': '*/*',
218
+ 'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
219
+ 'content-type': 'application/json',
220
+ 'origin': 'https://ai-sdk-starter-xai.vercel.app',
221
+ 'referer': 'https://ai-sdk-starter-xai.vercel.app/',
222
+ 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
223
+ 'sec-ch-ua-mobile': '?0',
224
+ 'sec-ch-ua-platform': '"macOS"',
225
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
226
+ }
227
+
228
+ messages = conver_to_xai_schema(json_data["messages"])
229
+
230
+ request_data = {
231
+ "id": "".join(random.choices("0123456789abcdef", k=16)),
232
+ "messages": messages,
233
+ "selectedModel": json_data.get("model", "grok-2-1212"),
234
+ }
235
+
236
+ print(request_data)
237
+
238
+ chunk_id = "xai-" + "".join(random.choices("0123456789abcdef", k=32))
239
+ created = int(asyncio.get_event_loop().time())
240
+ total_tokens = 0
241
+
242
+ try:
243
+ async with httpx.AsyncClient(timeout=None) as client:
244
+ async with client.stream(
245
+ "POST",
246
+ "https://ai-sdk-starter-xai.vercel.app/api/chat",
247
+ headers=headers,
248
+ json=request_data
249
+ ) as request_ctx:
250
+ if request_ctx.status_code == 200:
251
+ async for line in request_ctx.aiter_lines():
252
+ if line:
253
+ if line.startswith('0:'):
254
+ # Clean up the text and properly escape JSON characters
255
+ text = line[2:].strip()
256
+ if text.startswith('"') and text.endswith('"'):
257
+ text = text[1:-1]
258
+ text = text.replace('\\n', '\n').replace('\\', '')
259
+
260
+ response = {
261
+ "id": chunk_id,
262
+ "object": "chat.completion.chunk",
263
+ "created": created,
264
+ "model": json_data.get("model", "grok-2-1212"),
265
+ "choices": [{
266
+ "index": 0,
267
+ "text": text,
268
+ "logprobs": None,
269
+ "finish_reason": None
270
+ }],
271
+ "usage": None
272
+ }
273
+ yield f"data: {json.dumps(response)}\n\n"
274
+ total_tokens += 1
275
+ elif line.startswith('d:'):
276
+ final = {
277
+ "id": chunk_id,
278
+ "object": "chat.completion.chunk",
279
+ "created": created,
280
+ "model": json_data.get("model", "grok-2-1212"),
281
+ "choices": [],
282
+ "usage": {
283
+ "prompt_tokens": len(messages),
284
+ "completion_tokens": total_tokens,
285
+ "total_tokens": len(messages) + total_tokens
286
+ }
287
+ }
288
+ yield f"data: {json.dumps(final)}\n\n"
289
+ yield "data: [DONE]\n\n"
290
+ return
291
+ else:
292
+ yield f"data: [Unexpected status code: {request_ctx.status_code}]\n\n"
293
+ except Exception as e:
294
+ yield f"data: [Connection error: {str(e)}]\n\n"
295
+
296
+
297
+ async def vercelGroqgenerate(json_data: Dict[str, Any]):
298
+ headers = {
299
+ 'accept': '*/*',
300
+ 'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
301
+ 'content-type': 'application/json',
302
+ 'origin': 'https://ai-sdk-starter-groq.vercel.app',
303
+ 'priority': 'u=1, i',
304
+ 'referer': 'https://ai-sdk-starter-groq.vercel.app/',
305
+ 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
306
+ 'sec-ch-ua-mobile': '?0',
307
+ 'sec-ch-ua-platform': '"macOS"',
308
+ 'sec-fetch-dest': 'empty',
309
+ 'sec-fetch-mode': 'cors',
310
+ 'sec-fetch-site': 'same-origin',
311
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
312
+ }
313
+
314
+ messages = conver_to_xai_schema(json_data["messages"])
315
+
316
+ request_data = {
317
+ "id": "".join(random.choices("0123456789abcdef", k=16)),
318
+ "messages": messages,
319
+ "selectedModel": json_data.get("model", "deepseek-r1-distill-llama-70b"),
320
+ }
321
+
322
+ chunk_id = "vercel-groq-" + "".join(random.choices("0123456789abcdef", k=32))
323
+ created = int(asyncio.get_event_loop().time())
324
+ total_tokens = 0
325
+
326
+ try:
327
+ async with httpx.AsyncClient(timeout=None) as client:
328
+ async with client.stream(
329
+ "POST",
330
+ "https://ai-sdk-starter-groq.vercel.app/api/chat",
331
+ headers=headers,
332
+ json=request_data
333
+ ) as request_ctx:
334
+ print(request_ctx.status_code)
335
+ if request_ctx.status_code == 200:
336
+ async for line in request_ctx.aiter_lines():
337
+ if line:
338
+ if line.startswith('0:'):
339
+ # Clean up the text and properly escape JSON characters
340
+ text = line[2:].strip()
341
+ if text.startswith('"') and text.endswith('"'):
342
+ text = text[1:-1]
343
+ text = text.replace('\\n', '\n').replace('\\', '')
344
+
345
+ response = {
346
+ "id": chunk_id,
347
+ "object": "chat.completion.chunk",
348
+ "created": created,
349
+ "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
350
+ "choices": [{
351
+ "index": 0,
352
+ "text": text,
353
+ "logprobs": None,
354
+ "finish_reason": None
355
+ }],
356
+ "usage": None
357
+ }
358
+ yield f"data: {json.dumps(response)}\n\n"
359
+ total_tokens += 1
360
+ elif line.startswith('d:'):
361
+ final = {
362
+ "id": chunk_id,
363
+ "object": "chat.completion.chunk",
364
+ "created": created,
365
+ "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
366
+ "choices": [],
367
+ "usage": {
368
+ "prompt_tokens": len(messages),
369
+ "completion_tokens": total_tokens,
370
+ "total_tokens": len(messages) + total_tokens
371
+ }
372
+ }
373
+ yield f"data: {json.dumps(final)}\n\n"
374
+ yield "data: [DONE]\n\n"
375
+ return
376
+ else:
377
+ yield f"data: [Unexpected status code: {request_ctx.status_code}]\n\n"
378
+ except Exception as e:
379
+ yield f"data: [Connection error: {str(e)}]\n\n"
380
+
381
+
382
+ @app.get("/")
383
+ async def index():
384
+ return {"status": "ok", "message": "Welcome to the Chipling API!", "version": "1.0", "routes": ["/chat", "/generate-modules", "/generate-topics"]}
385
+
386
+ @app.post("/chat")
387
+ async def chat(request: ChatRequest):
388
+ current_messages = request.messages.copy()
389
+
390
+ # Handle both single text or list content
391
+ if request.messages and isinstance(request.messages[-1].get('content'), list):
392
+ current_messages = request.messages
393
+ else:
394
+ current_messages.append({
395
+ 'content': [{
396
+ 'type': 'text',
397
+ 'text': request.message
398
+ }],
399
+ 'role': 'user'
400
+ })
401
+
402
+ json_data = {
403
+ 'model': request.model,
404
+ 'max_tokens': None,
405
+ 'temperature': 0.7,
406
+ 'top_p': 0.7,
407
+ 'top_k': 50,
408
+ 'repetition_penalty': 1,
409
+ 'stream_tokens': True,
410
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
411
+ 'messages': current_messages,
412
+ 'stream': True,
413
+ }
414
+
415
+ selected_generator = random.choice([generate, groqgenerate, vercelGroqgenerate, vercelXaigenerate])
416
+ log_request("/chat", selected_generator.__name__)
417
+ return StreamingResponse(selected_generator(json_data), media_type='text/event-stream')
418
+
419
+
420
+
421
+ @app.post("/chipsearch")
422
+ async def chipsearch(request: Request):
423
+ data = search(
424
+ term=request.query_params.get("term"),
425
+ num_results=int(request.query_params.get("num_results", 10)),
426
+ advanced=bool(request.query_params.get("advanced", False)),
427
+ unique=bool(request.query_params.get("unique", False))
428
+ )
429
+ return data
430
+
431
+
432
+ @app.post("/scrape-md")
433
+ async def scrape_md(request: Request):
434
+ data = await request.json()
435
+ url = data.get("url")
436
+ if not url:
437
+ return {"error": "URL is required"}
438
+
439
+ data = scrape_to_markdown(url)
440
+
441
+ return {"markdown": data}
442
+
443
+ @app.post("/v1/generate")
444
+ async def api_generate(request: Request):
445
+ data = await request.json()
446
+ messages = data["messages"]
447
+ model = data["model"]
448
+ if not messages:
449
+ return {"error": "messages is required"}
450
+ elif not model:
451
+ return {"error": "Model is required"}
452
+
453
+ try:
454
+ json_data = {
455
+ 'model': model,
456
+ 'max_tokens': None,
457
+ 'temperature': 0.7,
458
+ 'top_p': 0.7,
459
+ 'top_k': 50,
460
+ 'repetition_penalty': 1,
461
+ 'stream_tokens': True,
462
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
463
+ 'messages': messages,
464
+ 'stream': True,
465
+ }
466
+
467
+ xaimodels = ["grok-3-mini", "grok-2-1212", "grok-3", "grok-3-fast", "grok-3-mini-fast"]
468
+
469
+ if model in xaimodels:
470
+ return StreamingResponse(vercelXaigenerate(json_data), media_type='text/event-stream')
471
+ else:
472
+ try:
473
+ return StreamingResponse(vercelGroqgenerate(json_data), media_type='text/event-stream')
474
+ except Exception as e:
475
+ try:
476
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')
477
+ except Exception as e:
478
+ return StreamingResponse(groqgenerate(json_data), media_type='text/event-stream')
479
+ except Exception as e:
480
+ return {"error": str(e)}
481
+
482
+ @app.post("/v1/generate-images")
483
+ async def generate_images(request: Request):
484
+ data = await request.json()
485
+ prompt = data.get("prompt")
486
+ provider = data.get("provider")
487
+ modelId = data.get("modelId")
488
+
489
+ if not prompt:
490
+ return {"error": "Prompt is required"}
491
+ if not provider:
492
+ return {"error": "Provider is required"}
493
+ if not modelId:
494
+ return {"error": "Model ID is required"}
495
+
496
+ headers = {
497
+ 'accept': '*/*',
498
+ 'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
499
+ 'content-type': 'application/json',
500
+ 'origin': 'https://fal-image-generator.vercel.app',
501
+ 'priority': 'u=1, i',
502
+ 'referer': 'https://fal-image-generator.vercel.app/',
503
+ 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
504
+ 'sec-ch-ua-mobile': '?0',
505
+ 'sec-ch-ua-platform': '"macOS"',
506
+ 'sec-fetch-dest': 'empty',
507
+ 'sec-fetch-mode': 'cors',
508
+ 'sec-fetch-site': 'same-origin',
509
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
510
+ }
511
+
512
+ json_data = {
513
+ 'prompt': prompt,
514
+ 'provider': 'fal',
515
+ 'modelId': 'fal-ai/fast-sdxl',
516
+ }
517
+
518
+ async with httpx.AsyncClient() as client:
519
+ response = await client.post(
520
+ 'https://fal-image-generator.vercel.app/api/generate-images',
521
+ headers=headers,
522
+ json=json_data
523
+ )
524
+ return response.json()
test.py CHANGED
@@ -7,8 +7,9 @@ messages = [
7
  {"role": "user", "content": "who are you and give me a breif description of who you are"}
8
  ]
9
 
10
- model = "grok-3"
11
- url = "http://127.0.0.1:8000/v1/generate"
 
12
  payload = {
13
  "messages": messages,
14
  "model": model
@@ -18,6 +19,7 @@ response = requests.post(url, json=payload, stream=True)
18
 
19
  if response.status_code == 200:
20
  for line in response.iter_lines():
 
21
  if line:
22
  decoded_line = line.decode('utf-8')
23
  if decoded_line.startswith('data: '):
 
7
  {"role": "user", "content": "who are you and give me a breif description of who you are"}
8
  ]
9
 
10
+ model = "Llama-4-Maverick-17B-128E-Instruct-FP8"
11
+
12
+ url = "https://maouu-chipling-api.hf.space/v1/generate"
13
  payload = {
14
  "messages": messages,
15
  "model": model
 
19
 
20
  if response.status_code == 200:
21
  for line in response.iter_lines():
22
+ print(line)
23
  if line:
24
  decoded_line = line.decode('utf-8')
25
  if decoded_line.startswith('data: '):