OraCatQAQ commited on
Commit
5e92546
·
1 Parent(s): f48a33c

识别提交验证码

Browse files
Files changed (4) hide show
  1. .env +3 -0
  2. README.md +93 -0
  3. app.py +259 -25
  4. requirements.txt +6 -4
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
2
+ DEEPSIDER_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJkYXRhIjp7ImVtYWlsIjoib3J6Y2F0QGNhbnRlcmxvdC5lZHUucGwiLCJ1aWQiOiI2N2UyMWY5ZjJiNjhlYTlmYzJiMjhiNzgiLCJ2ZXJzaW9uIjowLCJjaGFubmVsVGFnIjoiIn0sImV4cGlyZSI6MTc1MTQyNTU5MTAxNywiaWF0IjoxNzQzNjQ5NTkxLCJleHAiOjE3NTE0MjU1OTF9.gMrecWLlsfLrTGf0VeciJeIhR7edZ2BBJ6a_-wWut24
3
+ ADMIN_KEY=sk-123456
README.md CHANGED
@@ -9,3 +9,96 @@ license: mit
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+
13
+ # DeepSider API代理
14
+
15
+ 这是一个将OpenAI API请求代理到DeepSider API的服务,支持验证码自动识别和处理功能。
16
+
17
+ ## 功能特点
18
+
19
+ - 支持OpenAI API的主要格式
20
+ - 自动映射模型名称
21
+ - 流式响应支持
22
+ - 多Token轮询支持
23
+ - 自动验证码识别与提交功能
24
+ - 思维链(reasoning_content)支持
25
+
26
+ ## 依赖要求
27
+
28
+ - Python 3.8+
29
+ - FastAPI
30
+ - uvicorn
31
+ - Pillow
32
+ - ddddocr
33
+ - requests
34
+
35
+ ## 安装步骤
36
+
37
+ 1. 克隆仓库
38
+
39
+ ```bash
40
+ git clone https://github.com/yourusername/deepsider-api-proxy.git
41
+ cd deepsider-api-proxy
42
+ ```
43
+
44
+ 2. 安装Python依赖
45
+
46
+ ```bash
47
+ pip install -r requirements.txt
48
+ ```
49
+
50
+ 3. 创建.env文件并设置环境变量
51
+
52
+ ```
53
+ ADMIN_KEY=你的管理员密钥
54
+ DEEPSIDER_TOKEN=你的DeepSider API令牌
55
+ PORT=7860 # 可选,默认为7860
56
+ ```
57
+
58
+ 4. 启动服务
59
+
60
+ ```bash
61
+ python app.py
62
+ ```
63
+
64
+ ## API使用
65
+
66
+ 服务启动后,可以通过以下URL访问:
67
+
68
+ ```
69
+ http://localhost:7860
70
+ ```
71
+
72
+ ### 请求示例
73
+
74
+ ```bash
75
+ curl http://localhost:7860/v1/chat/completions \
76
+ -H "Content-Type: application/json" \
77
+ -H "Authorization: Bearer YOUR_ADMIN_KEY" \
78
+ -d '{
79
+ "model": "gpt-4",
80
+ "messages": [{"role": "user", "content": "Hello, who are you?"}],
81
+ "stream": true
82
+ }'
83
+ ```
84
+
85
+ ## 验证码自动处理
86
+
87
+ 当DeepSider API返回验证码挑战时,系统会自动:
88
+
89
+ 1. 检测到验证码图片
90
+ 2. 使用ddddocr识别验证码
91
+ 3. 自动提交验证码
92
+ 4. 继续处理请求
93
+
94
+ ### 关于验证码识别
95
+
96
+ 本项目使用[ddddocr](https://github.com/sml2h3/ddddocr)库进行验证码识别,它是一个基于深度学习的通用验证码识别库:
97
+
98
+ - 无需配置复杂的环境和依赖
99
+ - 识别速度快,准确率高
100
+ - 支持多种类型的验证码
101
+
102
+ ## 许可证
103
+
104
+ MIT
app.py CHANGED
@@ -11,7 +11,13 @@ import requests
11
  from datetime import datetime
12
  import logging
13
  import os
 
 
 
 
 
14
  from dotenv import load_dotenv
 
15
 
16
  # 加载环境变量
17
  load_dotenv()
@@ -43,6 +49,9 @@ app.add_middleware(
43
  DEEPSIDER_API_BASE = "https://api.chargpt.ai/api/v2"
44
  TOKEN_INDEX = 0
45
 
 
 
 
46
  # 模型映射表
47
  MODEL_MAPPING = {
48
  "gpt-3.5-turbo": "anthropic/claude-3.5-sonnet",
@@ -249,11 +258,70 @@ async def generate_openai_response(full_response: str, request_id: str, model: s
249
 
250
  return response_data
251
 
252
- async def stream_openai_response(response, request_id: str, model: str, api_key, token_index):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  """流式返回OpenAI API格式的响应"""
254
  timestamp = int(time.time())
255
  full_response = ""
256
  full_reasoning = "" # 添加思维链内容累积变量
 
 
 
257
 
258
  try:
259
  # 使用iter_content替代iter_lines
@@ -271,13 +339,23 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
271
  data = json.loads(line[6:])
272
  logger.debug(f"Received data: {data}")
273
 
 
 
 
 
 
274
  if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
275
  content = data.get('data', {}).get('content', '')
276
  reasoning_content = data.get('data', {}).get('reasoning_content', '')
277
 
278
- if content:
279
- full_response += content
280
- chunk = {
 
 
 
 
 
281
  "id": f"chatcmpl-{request_id}",
282
  "object": "chat.completion.chunk",
283
  "created": timestamp,
@@ -286,15 +364,36 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
286
  {
287
  "index": 0,
288
  "delta": {
289
- "content": content
290
  },
291
  "finish_reason": None
292
  }
293
  ]
294
  }
295
- yield f"data: {json.dumps(chunk)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- # 处理思维链内容
298
  if reasoning_content:
299
  full_reasoning += reasoning_content
300
  reasoning_chunk = {
@@ -315,22 +414,156 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
315
  yield f"data: {json.dumps(reasoning_chunk)}\n\n"
316
 
317
  elif data.get('code') == 203:
318
- # 生成完成信号
319
- chunk = {
320
- "id": f"chatcmpl-{request_id}",
321
- "object": "chat.completion.chunk",
322
- "created": timestamp,
323
- "model": model,
324
- "choices": [
325
- {
326
- "index": 0,
327
- "delta": {},
328
- "finish_reason": "stop"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  }
330
- ]
331
- }
332
- yield f"data: {json.dumps(chunk)}\n\n"
333
- yield "data: [DONE]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
  except json.JSONDecodeError as e:
336
  logger.warning(f"JSON解析失败: {line}, 错误: {str(e)}")
@@ -345,6 +578,7 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
345
  logger.error(f"流式响应处理出错: {str(e)}")
346
 
347
  # 返回错误信息
 
348
  error_chunk = {
349
  "id": f"chatcmpl-{request_id}",
350
  "object": "chat.completion.chunk",
@@ -354,7 +588,7 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
354
  {
355
  "index": 0,
356
  "delta": {
357
- "content": f"\n\n[处理响应时出错: {str(e)}]"
358
  },
359
  "finish_reason": "stop"
360
  }
@@ -454,9 +688,9 @@ async def create_chat_completion(
454
 
455
  # 处理流式或非流式响应
456
  if chat_request.stream:
457
- # 返回流式响应
458
  return StreamingResponse(
459
- stream_openai_response(response, request_id, chat_request.model, api_key, TOKEN_INDEX),
460
  media_type="text/event-stream"
461
  )
462
  else:
 
11
  from datetime import datetime
12
  import logging
13
  import os
14
+ import re
15
+ import base64
16
+ import io
17
+ from PIL import Image
18
+ import ddddocr
19
  from dotenv import load_dotenv
20
+ from PIL import ImageFilter
21
 
22
  # 加载环境变量
23
  load_dotenv()
 
49
  DEEPSIDER_API_BASE = "https://api.chargpt.ai/api/v2"
50
  TOKEN_INDEX = 0
51
 
52
+ # 验证码识别器实例
53
+ ocr = ddddocr.DdddOcr()
54
+
55
  # 模型映射表
56
  MODEL_MAPPING = {
57
  "gpt-3.5-turbo": "anthropic/claude-3.5-sonnet",
 
258
 
259
  return response_data
260
 
261
+ # 验证码处理函数
262
+ def extract_captcha_image(content: str) -> Optional[str]:
263
+ """从内容中提取Base64编码的验证码图片"""
264
+ # 匹配 markdown 格式的图片 ![](data:image/png;base64,...)
265
+ pattern = r'!\[\]\(data:image\/[^;]+;base64,([^)]+)\)'
266
+ match = re.search(pattern, content)
267
+ if match:
268
+ return match.group(1)
269
+ return None
270
+
271
+ def recognize_captcha(base64_img: str) -> str:
272
+ """使用ddddocr识别验证码"""
273
+ try:
274
+ # 解码base64图片
275
+ img_data = base64.b64decode(base64_img)
276
+
277
+ # 使用ddddocr识别验证码
278
+ captcha_text = ocr.classification(img_data)
279
+
280
+ logger.info(f"识别到的验证码: {captcha_text}")
281
+ return captcha_text
282
+ except Exception as e:
283
+ logger.error(f"验证码识别出错: {str(e)}")
284
+ return ""
285
+
286
+ async def submit_captcha(api_key: str, conversation_id: str, captcha: str, model: str) -> Optional[requests.Response]:
287
+ """提交验证码到DeepSider API"""
288
+ logger.info(f"提交验证码: {captcha}, 会话ID: {conversation_id}, 模型: {model}")
289
+
290
+ headers = get_headers(api_key)
291
+
292
+ try:
293
+ # 准备验证码提交请求体
294
+ payload = {
295
+ "clId": conversation_id,
296
+ "model": model, # 使用原始请求中的模型
297
+ "prompt": captcha, # 验证码作为提示
298
+ "webAccess": "close",
299
+ "timezone": "Asia/Shanghai"
300
+ }
301
+
302
+ # 发送验证码提交请求
303
+ response = requests.post(
304
+ f"{DEEPSIDER_API_BASE}/chat/conversation",
305
+ headers=headers,
306
+ json=payload,
307
+ stream=True, # 验证码提交后,响应也是流式的
308
+ timeout=30
309
+ )
310
+
311
+ return response
312
+ except Exception as e:
313
+ logger.error(f"提交验证码时出错: {str(e)}")
314
+ return None
315
+
316
+ # 修改流式响应处理
317
+ async def stream_openai_response(response, request_id: str, model: str, api_key, token_index, deepsider_model: str, is_post_captcha: bool = False):
318
  """流式返回OpenAI API格式的响应"""
319
  timestamp = int(time.time())
320
  full_response = ""
321
  full_reasoning = "" # 添加思维链内容累积变量
322
+ conversation_id = None # 会话ID
323
+ captcha_base64 = None # 验证码图片
324
+ captcha_detected_in_stream = False # 新增标志位,仅用于当前流
325
 
326
  try:
327
  # 使用iter_content替代iter_lines
 
339
  data = json.loads(line[6:])
340
  logger.debug(f"Received data: {data}")
341
 
342
+ # 获取会话ID (所有流都可能包含)
343
+ if data.get('code') == 201:
344
+ conversation_id = data.get('data', {}).get('clId')
345
+ logger.info(f"会话ID: {conversation_id}")
346
+
347
  if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
348
  content = data.get('data', {}).get('content', '')
349
  reasoning_content = data.get('data', {}).get('reasoning_content', '')
350
 
351
+ # 检测是否含有验证码 (仅在非递归调用时检测)
352
+ if not is_post_captcha and "验证码提示" in content and "![](data:image" in content:
353
+ captcha_detected_in_stream = True # 标记在本流中检测到验证码
354
+ logger.info("检测到验证码响应")
355
+ captcha_base64 = extract_captcha_image(content)
356
+
357
+ # 向客户端发送验证码提示
358
+ captcha_message = {
359
  "id": f"chatcmpl-{request_id}",
360
  "object": "chat.completion.chunk",
361
  "created": timestamp,
 
364
  {
365
  "index": 0,
366
  "delta": {
367
+ "content": "[系统检测到验证码,正在自动识别...]"
368
  },
369
  "finish_reason": None
370
  }
371
  ]
372
  }
373
+ yield f"data: {json.dumps(captcha_message)}\n\n"
374
+
375
+ if content:
376
+ full_response += content
377
+ # 如果在本流中检测到了验证码,则不发送实际内容,直到验证码处理完毕
378
+ if not captcha_detected_in_stream:
379
+ chunk_content = {
380
+ "id": f"chatcmpl-{request_id}",
381
+ "object": "chat.completion.chunk",
382
+ "created": timestamp,
383
+ "model": model,
384
+ "choices": [
385
+ {
386
+ "index": 0,
387
+ "delta": {
388
+ "content": content
389
+ },
390
+ "finish_reason": None
391
+ }
392
+ ]
393
+ }
394
+ yield f"data: {json.dumps(chunk_content)}\n\n"
395
 
396
+ # 处理思维链内容 (始终发送)
397
  if reasoning_content:
398
  full_reasoning += reasoning_content
399
  reasoning_chunk = {
 
414
  yield f"data: {json.dumps(reasoning_chunk)}\n\n"
415
 
416
  elif data.get('code') == 203:
417
+ # 如果在本流检测到验证码且会话结束,处理验证码
418
+ # 确保只在原始流(非递归调用)中处理
419
+ if not is_post_captcha and captcha_detected_in_stream and captcha_base64 and conversation_id:
420
+ captcha_text = recognize_captcha(captcha_base64)
421
+
422
+ if captcha_text:
423
+ # 发送验证码识别结果通知
424
+ captcha_result = {
425
+ "id": f"chatcmpl-{request_id}",
426
+ "object": "chat.completion.chunk",
427
+ "created": timestamp,
428
+ "model": model,
429
+ "choices": [
430
+ {
431
+ "index": 0,
432
+ "delta": {
433
+ "content": f"\n[系统已自动识别验证码: {captcha_text},正在提交...]"
434
+ },
435
+ "finish_reason": None
436
+ }
437
+ ]
438
+ }
439
+ yield f"data: {json.dumps(captcha_result)}\n\n"
440
+
441
+ # 提交验证码
442
+ captcha_response = await submit_captcha(api_key, conversation_id, captcha_text, deepsider_model)
443
+
444
+ if captcha_response is None:
445
+ # 请求本身失败 (网络错误等)
446
+ error_msg = "\n[验证码提交请求失败,请检查网络或服务日志]"
447
+ error_chunk = {
448
+ "id": f"chatcmpl-{request_id}",
449
+ "object": "chat.completion.chunk",
450
+ "created": timestamp,
451
+ "model": model,
452
+ "choices": [
453
+ {
454
+ "index": 0,
455
+ "delta": {
456
+ "content": error_msg
457
+ },
458
+ "finish_reason": "stop"
459
+ }
460
+ ]
461
+ }
462
+ yield f"data: {json.dumps(error_chunk)}\n\n"
463
+ yield "data: [DONE]\n\n"
464
+ return
465
+ elif not captcha_response.ok:
466
+ # API返回了错误状态码 (4xx, 5xx)
467
+ status_code = captcha_response.status_code
468
+ logger.error(f"提交验证码后API返回错误: {status_code}")
469
+ error_body_text = ""
470
+ error_message = f"HTTP Status {status_code}"
471
+ try:
472
+ # 尝试读取错误响应体
473
+ error_body_text = captcha_response.text
474
+ logger.error(f"错误响应体: {error_body_text}")
475
+ # 尝试解析JSON错误信息
476
+ error_data = captcha_response.json()
477
+ error_message = error_data.get('message', str(error_data))
478
+ except Exception as parse_err:
479
+ logger.warning(f"解析错误响应体失败: {parse_err}")
480
+ if error_body_text:
481
+ error_message = error_body_text[:100] # 截断以防过长
482
+
483
+ error_msg = f"\n[验证码提交后出错: {error_message}]"
484
+ error_chunk = {
485
+ "id": f"chatcmpl-{request_id}",
486
+ "object": "chat.completion.chunk",
487
+ "created": timestamp,
488
+ "model": model,
489
+ "choices": [
490
+ {
491
+ "index": 0,
492
+ "delta": {
493
+ "content": error_msg
494
+ },
495
+ "finish_reason": "stop"
496
+ }
497
+ ]
498
+ }
499
+ yield f"data: {json.dumps(error_chunk)}\n\n"
500
+ yield "data: [DONE]\n\n"
501
+ return
502
+ else:
503
+ # 验证码提交成功 (2xx),继续处理响应流
504
+ # 发送验证码提交成功通知
505
+ captcha_submitted_message = {
506
+ "id": f"chatcmpl-{request_id}",
507
+ "object": "chat.completion.chunk",
508
+ "created": timestamp,
509
+ "model": model,
510
+ "choices": [
511
+ {
512
+ "index": 0,
513
+ "delta": {
514
+ "content": "\n[验证码已提交,正在获取响应...]"
515
+ },
516
+ "finish_reason": None
517
+ }
518
+ ]
519
+ }
520
+ yield f"data: {json.dumps(captcha_submitted_message)}\n\n"
521
+
522
+ # 启动递归调用处理成功的响应流
523
+ async for chunk_after_captcha in stream_openai_response(
524
+ captcha_response, request_id, model, api_key, token_index, deepsider_model, is_post_captcha=True
525
+ ):
526
+ yield chunk_after_captcha
527
+ return # 正常结束验证码处理
528
+ else:
529
+ # 验证码识别失败的处理
530
+ error_msg = "\n[验证码识别失败,请重试]"
531
+ error_chunk = {
532
+ "id": f"chatcmpl-{request_id}",
533
+ "object": "chat.completion.chunk",
534
+ "created": timestamp,
535
+ "model": model,
536
+ "choices": [
537
+ {
538
+ "index": 0,
539
+ "delta": {
540
+ "content": error_msg
541
+ },
542
+ "finish_reason": "stop"
543
+ }
544
+ ]
545
  }
546
+ yield f"data: {json.dumps(error_chunk)}\n\n"
547
+ yield "data: [DONE]\n\n"
548
+ return
549
+
550
+ # 普通完成信号 (如果没有在本流检测到验证码,或者是在递归调用中)
551
+ if not captcha_detected_in_stream or is_post_captcha:
552
+ final_chunk = {
553
+ "id": f"chatcmpl-{request_id}",
554
+ "object": "chat.completion.chunk",
555
+ "created": timestamp,
556
+ "model": model,
557
+ "choices": [
558
+ {
559
+ "index": 0,
560
+ "delta": {},
561
+ "finish_reason": "stop"
562
+ }
563
+ ]
564
+ }
565
+ yield f"data: {json.dumps(final_chunk)}\n\n"
566
+ yield "data: [DONE]\n\n"
567
 
568
  except json.JSONDecodeError as e:
569
  logger.warning(f"JSON解析失败: {line}, 错误: {str(e)}")
 
578
  logger.error(f"流式响应处理出错: {str(e)}")
579
 
580
  # 返回错误信息
581
+ error_msg = "\n\n[处理响应时出错: {str(e)}]"
582
  error_chunk = {
583
  "id": f"chatcmpl-{request_id}",
584
  "object": "chat.completion.chunk",
 
588
  {
589
  "index": 0,
590
  "delta": {
591
+ "content": error_msg
592
  },
593
  "finish_reason": "stop"
594
  }
 
688
 
689
  # 处理流式或非流式响应
690
  if chat_request.stream:
691
+ # 返回流式响应 - 初始调用 is_post_captcha 默认为 False
692
  return StreamingResponse(
693
+ stream_openai_response(response, request_id, chat_request.model, api_key, TOKEN_INDEX, deepsider_model),
694
  media_type="text/event-stream"
695
  )
696
  else:
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
- fastapi==0.103.1
2
- uvicorn==0.23.2
 
 
3
  requests==2.31.0
4
- python-dotenv==1.0.0
5
- pydantic==2.3.0
 
1
+ fastapi==0.110.0
2
+ uvicorn==0.27.1
3
+ pydantic==2.6.1
4
+ python-dotenv==1.0.1
5
  requests==2.31.0
6
+ Pillow==10.4.0
7
+ ddddocr==1.4.8