deepseek_done
Browse files- config.py +113 -181
- direct_deepseek.py +61 -10
config.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
"""
|
2 |
λ²‘ν° μ€ν μ΄, μλ² λ© λͺ¨λΈ, LLM λ± κ΅¬μ± μμ μ€μ
|
3 |
-
νκ²½ λ³μ λ° .env νμΌ νμ© κ°μ λ²μ -
|
4 |
"""
|
5 |
import os
|
6 |
import logging
|
@@ -21,49 +21,43 @@ logger.info(f"μ€ν¬λ¦½νΈ λλ ν 리: {script_dir}")
|
|
21 |
logger.info(f"νμ¬ μμ
λλ ν 리: {os.getcwd()}")
|
22 |
logger.info(f"μ΄μ 체μ : {os.name}")
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
# .env νμΌ μμΉ νλ³΄λ€ (.env νμΌμ μΌλ°μ μΌλ‘ νλ‘μ νΈ λ£¨νΈμ μμ)
|
33 |
env_paths = [
|
34 |
".env", # νμ¬ λλ ν 리
|
35 |
os.path.join(script_dir, ".env"), # μ€ν¬λ¦½νΈ λλ ν 리
|
|
|
|
|
36 |
]
|
37 |
|
38 |
-
#
|
|
|
39 |
for env_path in env_paths:
|
40 |
if os.path.isfile(env_path):
|
41 |
logger.info(f".env νμΌ λ°κ²¬: {env_path}")
|
42 |
-
|
43 |
-
if
|
44 |
logger.info(f".env νμΌ λ‘λ μ±κ³΅: {env_path}")
|
45 |
-
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
return False
|
50 |
|
51 |
-
|
52 |
-
env_loaded = fast_env_load()
|
53 |
|
54 |
-
# νκ²½ κ°μ§
|
55 |
-
IS_HUGGINGFACE = os.getenv('SPACE_ID') is not None
|
56 |
IS_WINDOWS = os.name == 'nt'
|
57 |
|
58 |
-
|
59 |
-
logger.info("HuggingFace Spaces νκ²½μ΄ κ°μ§λμμ΅λλ€.")
|
60 |
-
else:
|
61 |
-
logger.info(f"λ‘컬 νκ²½μμ μ€ν μ€μ
λλ€. (OS: {'Windows' if IS_WINDOWS else 'Unix/Linux/MacOS'})")
|
62 |
-
|
63 |
-
# μ νΈλ¦¬ν° ν¨μ: νκ²½ λ³μ κ°μ Έμ€κΈ° (κΈ°λ³Έκ° μ 곡)
|
64 |
def get_env(key: str, default: Any = None, required: bool = False) -> Any:
|
65 |
"""
|
66 |
-
νκ²½ λ³μλ₯Ό κ°μ Έμ€λ μ νΈλ¦¬ν° ν¨μ
|
67 |
|
68 |
Args:
|
69 |
key: νκ²½ λ³μ ν€
|
@@ -73,11 +67,29 @@ def get_env(key: str, default: Any = None, required: bool = False) -> Any:
|
|
73 |
Returns:
|
74 |
νκ²½ λ³μ κ° λλ κΈ°λ³Έκ°
|
75 |
"""
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
return value
|
83 |
|
@@ -152,8 +164,8 @@ CHUNK_OVERLAP = int(get_env("CHUNK_OVERLAP", "200"))
|
|
152 |
|
153 |
# API ν€ λ° νκ²½ μ€μ
|
154 |
OPENAI_API_KEY = get_env("OPENAI_API_KEY", "")
|
155 |
-
LANGFUSE_PUBLIC_KEY = get_env("LANGFUSE_PUBLIC_KEY", "
|
156 |
-
LANGFUSE_SECRET_KEY = get_env("LANGFUSE_SECRET_KEY", "
|
157 |
LANGFUSE_HOST = get_env("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
158 |
|
159 |
# DeepSeek κ΄λ ¨ μ€μ μΆκ°
|
@@ -161,6 +173,17 @@ DEEPSEEK_API_KEY = get_env("DEEPSEEK_API_KEY", "")
|
|
161 |
DEEPSEEK_ENDPOINT = get_env("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions")
|
162 |
DEEPSEEK_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat")
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
# Milvus λ²‘ν° DB μ€μ
|
165 |
MILVUS_HOST = get_env("MILVUS_HOST", "localhost")
|
166 |
MILVUS_PORT = get_env("MILVUS_PORT", "19530")
|
@@ -174,8 +197,46 @@ RERANKER_MODEL = get_env("RERANKER_MODEL", "Alibaba-NLP/gte-multilingual-reranke
|
|
174 |
USE_OPENAI = get_env("USE_OPENAI", "False").lower() == "true"
|
175 |
USE_DEEPSEEK = get_env("USE_DEEPSEEK", "False").lower() == "true"
|
176 |
|
177 |
-
#
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
# DeepSeek API ν
μ€νΈ ν¨μ
|
181 |
def test_deepseek_connection():
|
@@ -216,7 +277,7 @@ def test_deepseek_connection():
|
|
216 |
response = requests.post(
|
217 |
DEEPSEEK_ENDPOINT,
|
218 |
headers=headers,
|
219 |
-
|
220 |
timeout=10 # 10μ΄ νμμμ
|
221 |
)
|
222 |
|
@@ -265,70 +326,6 @@ def test_deepseek_connection():
|
|
265 |
"status_code": None
|
266 |
}
|
267 |
|
268 |
-
if IS_HUGGINGFACE:
|
269 |
-
# HuggingFace νκ²½μμλ DeepSeek μ¬μ©
|
270 |
-
if get_env("DEEPSEEK_API_KEY", ""):
|
271 |
-
USE_DEEPSEEK = True
|
272 |
-
USE_OPENAI = False
|
273 |
-
LLM_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat")
|
274 |
-
logger.info("HuggingFace Spaces νκ²½ κ°μ§: DeepSeek λͺ¨λΈ μ¬μ©")
|
275 |
-
|
276 |
-
# DeepSeek API μ°κ²° ν
μ€νΈ
|
277 |
-
deepseek_test_result = test_deepseek_connection()
|
278 |
-
if deepseek_test_result["success"]:
|
279 |
-
logger.info("DeepSeek API μ°κ²° ν
μ€νΈ μ±κ³΅")
|
280 |
-
else:
|
281 |
-
logger.warning(f"DeepSeek API μ°κ²° ν
μ€νΈ μ€ν¨: {deepseek_test_result['message']}")
|
282 |
-
logger.info("OpenAI λͺ¨λΈλ‘ ν΄λ°±ν©λλ€")
|
283 |
-
USE_DEEPSEEK = False
|
284 |
-
USE_OPENAI = True
|
285 |
-
LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo")
|
286 |
-
else:
|
287 |
-
# DeepSeek API ν€κ° μμΌλ©΄ OpenAI μ¬μ©
|
288 |
-
USE_OPENAI = True
|
289 |
-
USE_DEEPSEEK = False
|
290 |
-
LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo")
|
291 |
-
logger.info("HuggingFace Spaces νκ²½ κ°μ§: OpenAI λͺ¨λΈ μ¬μ©")
|
292 |
-
else:
|
293 |
-
# λ‘컬 νκ²½μμλ μ€μ μ λ°λΌ μ ν
|
294 |
-
if USE_DEEPSEEK:
|
295 |
-
LLM_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat")
|
296 |
-
logger.info(f"DeepSeek λͺ¨λΈ μ¬μ©")
|
297 |
-
|
298 |
-
# DeepSeek API μ°κ²° ν
μ€νΈ
|
299 |
-
deepseek_test_result = test_deepseek_connection()
|
300 |
-
if deepseek_test_result["success"]:
|
301 |
-
logger.info("DeepSeek API μ°κ²° ν
μ€νΈ μ±κ³΅")
|
302 |
-
else:
|
303 |
-
logger.warning(f"DeepSeek API μ°κ²° ν
μ€νΈ μ€ν¨: {deepseek_test_result['message']}")
|
304 |
-
if not USE_OPENAI:
|
305 |
-
logger.info("Ollamaλ‘ ν΄λ°±ν©λλ€")
|
306 |
-
USE_DEEPSEEK = False
|
307 |
-
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
308 |
-
else:
|
309 |
-
logger.info("OpenAI λͺ¨λΈλ‘ ν΄λ°±ν©λλ€")
|
310 |
-
USE_DEEPSEEK = False
|
311 |
-
elif USE_OPENAI:
|
312 |
-
LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo")
|
313 |
-
logger.info(f"OpenAI λͺ¨λΈ μ¬μ©")
|
314 |
-
else:
|
315 |
-
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
316 |
-
logger.info(f"Ollama λͺ¨λΈ μ¬μ©")
|
317 |
-
|
318 |
-
# API ν€ κ²μ¦
|
319 |
-
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
|
320 |
-
logger.warning("DeepSeek λͺ¨λΈμ΄ μ νλμμ§λ§ API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
321 |
-
USE_DEEPSEEK = False
|
322 |
-
USE_OPENAI = False
|
323 |
-
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
324 |
-
logger.info("DeepSeek API ν€κ° μμ΄ Ollamaλ‘ ν΄λ°±ν©λλ€.")
|
325 |
-
elif USE_OPENAI and not OPENAI_API_KEY:
|
326 |
-
logger.warning("OpenAI λͺ¨λΈμ΄ μ νλμμ§λ§ API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
327 |
-
if not IS_HUGGINGFACE: # HuggingFace νκ²½μμλ μ체 API ν€λ₯Ό μ¬μ©ν μ μμ
|
328 |
-
logger.warning("OpenAI API ν€κ° μμ΄ Ollamaλ‘ ν΄λ°±ν©λλ€.")
|
329 |
-
USE_OPENAI = False
|
330 |
-
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
331 |
-
|
332 |
# λ²‘ν° κ²μ μ€μ
|
333 |
TOP_K_RETRIEVAL = int(get_env("TOP_K_RETRIEVAL", "5")) # λ²‘ν° κ²μ κ²°κ³Ό μ
|
334 |
TOP_K_RERANK = int(get_env("TOP_K_RERANK", "3")) # 리λνΉ ν μ νν κ²°κ³Ό μ
|
@@ -341,13 +338,15 @@ LOG_FILE = get_env("LOG_FILE", "autorag.log")
|
|
341 |
def print_config():
|
342 |
"""νμ¬ μ€μ μ 보λ₯Ό λ‘κ·Έμ μΆλ ₯"""
|
343 |
logger.info("===== νμ¬ μ€μ μ 보 =====")
|
|
|
344 |
logger.info(f"λ¬Έμ λλ ν 리: {PDF_DIRECTORY}")
|
345 |
logger.info(f"μΊμ λλ ν 리: {CACHE_DIRECTORY}")
|
346 |
logger.info(f"μ²ν¬ ν¬κΈ°: {CHUNK_SIZE}, μ€λ²λ©: {CHUNK_OVERLAP}")
|
347 |
logger.info(f"OpenAI μ¬μ©: {USE_OPENAI}")
|
348 |
logger.info(f"DeepSeek μ¬μ©: {USE_DEEPSEEK}")
|
349 |
logger.info(f"LLM λͺ¨λΈ: {LLM_MODEL}")
|
350 |
-
|
|
|
351 |
logger.info(f"μλ² λ© λͺ¨λΈ: {EMBEDDING_MODEL}")
|
352 |
logger.info(f"리λ컀 λͺ¨λΈ: {RERANKER_MODEL}")
|
353 |
logger.info(f"TOP_K κ²μ: {TOP_K_RETRIEVAL}, 리λνΉ: {TOP_K_RERANK}")
|
@@ -367,12 +366,16 @@ def validate_config() -> Dict[str, Any]:
|
|
367 |
if not os.path.exists(PDF_DIRECTORY):
|
368 |
warnings.append(f"PDF λλ ν 리({PDF_DIRECTORY})κ° μ‘΄μ¬νμ§ μμ΅λλ€.")
|
369 |
|
370 |
-
# API ν€ νμΈ
|
371 |
-
if
|
372 |
-
|
|
|
|
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
|
376 |
|
377 |
# λͺ¨λΈ λ° μ€μ κ° νμΈ
|
378 |
if CHUNK_SIZE <= CHUNK_OVERLAP:
|
@@ -394,77 +397,6 @@ def validate_config() -> Dict[str, Any]:
|
|
394 |
"warnings": warnings
|
395 |
}
|
396 |
|
397 |
-
#
|
398 |
-
def list_directory_contents():
|
399 |
-
"""
|
400 |
-
νμ¬ μμ
λλ ν 리μ PDF_DIRECTORYμ λ΄μ©μ λ‘κΉ
|
401 |
-
"""
|
402 |
-
# νμ¬ μμ
λλ ν 리 λ΄μ©
|
403 |
-
try:
|
404 |
-
cwd_contents = os.listdir(os.getcwd())
|
405 |
-
logger.info(f"νμ¬ μμ
λλ ν 리 λ΄μ©: {cwd_contents}")
|
406 |
-
except Exception as e:
|
407 |
-
logger.error(f"νμ¬ μμ
λλ ν 리 λ΄μ© νμΈ μ€ν¨: {e}")
|
408 |
-
|
409 |
-
# PDF λλ ν 리 λ΄μ©
|
410 |
-
try:
|
411 |
-
if os.path.exists(PDF_DIRECTORY):
|
412 |
-
pdf_dir_contents = os.listdir(PDF_DIRECTORY)
|
413 |
-
logger.info(f"PDF λλ ν 리 λ΄μ©: {pdf_dir_contents}")
|
414 |
-
|
415 |
-
# PDF νμΌλ§ νν°λ§
|
416 |
-
pdf_files = [f for f in pdf_dir_contents if f.lower().endswith('.pdf')]
|
417 |
-
logger.info(f"PDF λλ ν 리 λ΄ PDF νμΌ: {pdf_files}")
|
418 |
-
else:
|
419 |
-
logger.warning(f"PDF λλ ν λ¦¬κ° μ‘΄μ¬νμ§ μμ: {PDF_DIRECTORY}")
|
420 |
-
except Exception as e:
|
421 |
-
logger.error(f"PDF λλ ν 리 λ΄μ© νμΈ μ€ν¨: {e}")
|
422 |
-
|
423 |
-
# μ§μ μ£Όμ΄μ§ κ²½λ‘μμ PDF μ°ΎκΈ° (λλ²κΉ
μ©)
|
424 |
-
def find_pdf_files_in_path(path: str) -> list:
|
425 |
-
"""
|
426 |
-
νΉμ κ²½λ‘μμ PDF νμΌ μ°ΎκΈ°
|
427 |
-
|
428 |
-
Args:
|
429 |
-
path: κ²μν κ²½λ‘
|
430 |
-
|
431 |
-
Returns:
|
432 |
-
λ°κ²¬λ PDF νμΌ λͺ©λ‘
|
433 |
-
"""
|
434 |
-
try:
|
435 |
-
if os.path.exists(path) and os.path.isdir(path):
|
436 |
-
pdf_files = [f for f in os.listdir(path) if f.lower().endswith('.pdf')]
|
437 |
-
logger.info(f"κ²½λ‘ '{path}'μμ {len(pdf_files)}κ°μ PDF νμΌ λ°κ²¬: {pdf_files}")
|
438 |
-
return pdf_files
|
439 |
-
else:
|
440 |
-
logger.warning(f"κ²½λ‘κ° μ‘΄μ¬νμ§ μκ±°λ λλ ν λ¦¬κ° μλ: {path}")
|
441 |
-
return []
|
442 |
-
except Exception as e:
|
443 |
-
logger.error(f"κ²½λ‘ '{path}'μμ PDF νμΌ κ²μ μ€ μ€λ₯: {e}")
|
444 |
-
return []
|
445 |
-
|
446 |
-
# μλμ°μ¦ μ£Όμ κ²½λ‘μμ PDF νμΌ κ²μ (λλ²κΉ
μ©)
|
447 |
-
def find_pdfs_in_windows_paths():
|
448 |
-
"""μλμ°μ¦μμ μ£Όμ κ²½λ‘μ PDF νμΌμ΄ μλμ§ νμΈ"""
|
449 |
-
if not IS_WINDOWS:
|
450 |
-
return
|
451 |
-
|
452 |
-
# μΌλ°μ μΈ μλμ°μ¦ κ²½λ‘λ€
|
453 |
-
common_paths = [
|
454 |
-
"C:\\Users\\USER\\RAG3\\documents",
|
455 |
-
"C:\\Users\\USER\\Documents",
|
456 |
-
os.path.join(os.environ.get('USERPROFILE', ''), 'Documents'),
|
457 |
-
os.path.join(os.environ.get('USERPROFILE', ''), 'Downloads'),
|
458 |
-
"documents",
|
459 |
-
"."
|
460 |
-
]
|
461 |
-
|
462 |
-
for path in common_paths:
|
463 |
-
find_pdf_files_in_path(path)
|
464 |
-
|
465 |
-
# μ€μ μ 보 μΆλ ₯ λ° κ²μ¦ (λͺ¨λ μν¬νΈ μ μ€ν)
|
466 |
print_config()
|
467 |
-
config_status = validate_config()
|
468 |
-
list_directory_contents()
|
469 |
-
if IS_WINDOWS:
|
470 |
-
find_pdfs_in_windows_paths()
|
|
|
1 |
"""
|
2 |
λ²‘ν° μ€ν μ΄, μλ² λ© λͺ¨λΈ, LLM λ± κ΅¬μ± μμ μ€μ
|
3 |
+
νκ²½ λ³μ λ° .env νμΌ νμ© κ°μ λ²μ - HuggingFace νκ²½ μ§μ μΆκ°
|
4 |
"""
|
5 |
import os
|
6 |
import logging
|
|
|
21 |
logger.info(f"νμ¬ μμ
λλ ν 리: {os.getcwd()}")
|
22 |
logger.info(f"μ΄μ 체μ : {os.name}")
|
23 |
|
24 |
+
# νκ²½ κ°μ§ - HuggingFace Space νκ²½μΈμ§ νμΈ
|
25 |
+
IS_HUGGINGFACE = False
|
26 |
+
if os.getenv('SPACE_ID') is not None or os.getenv('SYSTEM') == 'spaces':
|
27 |
+
IS_HUGGINGFACE = True
|
28 |
+
logger.info("HuggingFace Spaces νκ²½μ΄ κ°μ§λμμ΅λλ€.")
|
29 |
+
else:
|
30 |
+
# λ‘컬 νκ²½μΈ κ²½μ° .env νμΌ λ‘λ
|
31 |
+
# .env νμΌ μμΉ ν보λ€
|
|
|
32 |
env_paths = [
|
33 |
".env", # νμ¬ λλ ν 리
|
34 |
os.path.join(script_dir, ".env"), # μ€ν¬λ¦½νΈ λλ ν 리
|
35 |
+
os.path.join(script_dir, "config", ".env"), # config νμ λλ ν 리
|
36 |
+
os.path.join(os.path.dirname(script_dir), ".env"), # μμ λλ ν 리
|
37 |
]
|
38 |
|
39 |
+
# .env νμΌ μ°Ύμμ λ‘λ
|
40 |
+
env_loaded = False
|
41 |
for env_path in env_paths:
|
42 |
if os.path.isfile(env_path):
|
43 |
logger.info(f".env νμΌ λ°κ²¬: {env_path}")
|
44 |
+
env_loaded = load_dotenv(env_path, verbose=True)
|
45 |
+
if env_loaded:
|
46 |
logger.info(f".env νμΌ λ‘λ μ±κ³΅: {env_path}")
|
47 |
+
break
|
48 |
|
49 |
+
if not env_loaded:
|
50 |
+
logger.warning(".env νμΌμ μ°Ύμ μ μμ΅λλ€. κΈ°λ³Έκ° λλ μμ€ν
νκ²½ λ³μλ₯Ό μ¬μ©ν©λλ€.")
|
|
|
51 |
|
52 |
+
logger.info(f"λ‘컬 νκ²½μμ μ€ν μ€μ
λλ€. (OS: {'Windows' if os.name == 'nt' else 'Unix/Linux/MacOS'})")
|
|
|
53 |
|
54 |
+
# Windows νκ²½ κ°μ§
|
|
|
55 |
IS_WINDOWS = os.name == 'nt'
|
56 |
|
57 |
+
# μ νΈλ¦¬ν° ν¨μ: νκ²½ λ³μ κ°μ Έμ€κΈ° (HuggingFace νκ²½κ³Ό λ‘컬 νκ²½ ꡬλΆ)
|
|
|
|
|
|
|
|
|
|
|
58 |
def get_env(key: str, default: Any = None, required: bool = False) -> Any:
|
59 |
"""
|
60 |
+
νκ²½ λ³μλ₯Ό κ°μ Έμ€λ μ νΈλ¦¬ν° ν¨μ (HuggingFace νκ²½ μ§μ)
|
61 |
|
62 |
Args:
|
63 |
key: νκ²½ λ³μ ν€
|
|
|
67 |
Returns:
|
68 |
νκ²½ λ³μ κ° λλ κΈ°λ³Έκ°
|
69 |
"""
|
70 |
+
# HuggingFace Spaces νκ²½μμλ λ΄λΆ νκ²½λ³μ νμ©
|
71 |
+
if IS_HUGGINGFACE:
|
72 |
+
# HuggingFace Spacesμμλ μν¬λ¦Ώ κ°μ μ§μ μ¬μ©
|
73 |
+
# HF_SECRET_<KEY> νμμΌλ‘ μ μ₯λ μν¬λ¦Ώ νμΈ
|
74 |
+
hf_secret_key = f"HF_SECRET_{key.upper()}"
|
75 |
+
value = os.getenv(hf_secret_key)
|
76 |
+
|
77 |
+
# μν¬λ¦Ώμ΄ μμΌλ©΄ μΌλ° νκ²½λ³μ νμΈ
|
78 |
+
if value is None:
|
79 |
+
value = os.getenv(key, default)
|
80 |
+
else:
|
81 |
+
# λ‘컬 νκ²½μμλ μΌλ°μ μΈ λ°©μμΌλ‘ νκ²½λ³μ κ°μ Έμ€κΈ°
|
82 |
+
value = os.getenv(key, default)
|
83 |
+
|
84 |
+
if required and value is None:
|
85 |
+
if IS_HUGGINGFACE:
|
86 |
+
error_msg = f"νμ νκ²½ λ³μ {key}κ° μ€μ λμ§ μμμ΅λλ€. HuggingFace Spaceμμ μν¬λ¦Ώμ μ€μ ν΄μ£ΌμΈμ."
|
87 |
+
logger.error(error_msg)
|
88 |
+
raise ValueError(error_msg)
|
89 |
+
else:
|
90 |
+
error_msg = f"νμ νκ²½ λ³μ {key}κ° μ€μ λμ§ μμμ΅λλ€. .env νμΌμ μΆκ°ν΄μ£ΌμΈμ."
|
91 |
+
logger.error(error_msg)
|
92 |
+
raise ValueError(error_msg)
|
93 |
|
94 |
return value
|
95 |
|
|
|
164 |
|
165 |
# API ν€ λ° νκ²½ μ€μ
|
166 |
OPENAI_API_KEY = get_env("OPENAI_API_KEY", "")
|
167 |
+
LANGFUSE_PUBLIC_KEY = get_env("LANGFUSE_PUBLIC_KEY", "")
|
168 |
+
LANGFUSE_SECRET_KEY = get_env("LANGFUSE_SECRET_KEY", "")
|
169 |
LANGFUSE_HOST = get_env("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
170 |
|
171 |
# DeepSeek κ΄λ ¨ μ€μ μΆκ°
|
|
|
173 |
DEEPSEEK_ENDPOINT = get_env("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions")
|
174 |
DEEPSEEK_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat")
|
175 |
|
176 |
+
# νκΉ
νμ΄μ€ νκ²½μμ API ν€ νμΈ λ° λ‘κ·Έ μΆλ ₯
|
177 |
+
if IS_HUGGINGFACE:
|
178 |
+
logger.info(f"νκΉ
νμ΄μ€ νκ²½μμ DeepSeek API ν€ μ‘΄μ¬ μ¬λΆ: {bool(DEEPSEEK_API_KEY)}")
|
179 |
+
# 보μμ μν΄ API ν€ μ²« 4μ리μ λ§μ§λ§ 4μλ¦¬λ§ νμ (ν€κ° μ‘΄μ¬νλ κ²½μ°)
|
180 |
+
if DEEPSEEK_API_KEY:
|
181 |
+
masked_key = DEEPSEEK_API_KEY[:4] + "****" + DEEPSEEK_API_KEY[-4:] if len(DEEPSEEK_API_KEY) > 8 else "****"
|
182 |
+
logger.info(f"DeepSeek API ν€: {masked_key}")
|
183 |
+
|
184 |
+
logger.info(f"DeepSeek λͺ¨λΈ: {DEEPSEEK_MODEL}")
|
185 |
+
logger.info(f"DeepSeek μλν¬μΈνΈ: {DEEPSEEK_ENDPOINT}")
|
186 |
+
|
187 |
# Milvus λ²‘ν° DB μ€μ
|
188 |
MILVUS_HOST = get_env("MILVUS_HOST", "localhost")
|
189 |
MILVUS_PORT = get_env("MILVUS_PORT", "19530")
|
|
|
197 |
USE_OPENAI = get_env("USE_OPENAI", "False").lower() == "true"
|
198 |
USE_DEEPSEEK = get_env("USE_DEEPSEEK", "False").lower() == "true"
|
199 |
|
200 |
+
# νκΉ
νμ΄μ€ νκ²½μμλ DeepSeek μ°μ μ¬μ©
|
201 |
+
if IS_HUGGINGFACE:
|
202 |
+
# νκΉ
νμ΄μ€ νκ²½μμ DeepSeek API ν€κ° μλμ§ νμΈ
|
203 |
+
if DEEPSEEK_API_KEY:
|
204 |
+
USE_DEEPSEEK = True
|
205 |
+
USE_OPENAI = False
|
206 |
+
LLM_MODEL = DEEPSEEK_MODEL
|
207 |
+
logger.info("HuggingFace Spaces νκ²½: DeepSeek λͺ¨λΈ μ¬μ©")
|
208 |
+
else:
|
209 |
+
logger.warning("HuggingFace Spaces νκ²½μμ DeepSeek API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
210 |
+
USE_DEEPSEEK = False
|
211 |
+
USE_OPENAI = False # κΈ°λ³Έμ μΌλ‘ API ν€κ° μμΌλ©΄ λΉνμ±ν
|
212 |
+
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") # λ체 λͺ¨λΈ μ€μ
|
213 |
+
logger.info(f"HuggingFace Spaces νκ²½: DeepSeek API ν€ μμ, LLM λͺ¨λΈ: {LLM_MODEL}")
|
214 |
+
else:
|
215 |
+
# λ‘컬 νκ²½μμλ μ€μ μ λ°λΌ LLM μ ν
|
216 |
+
if USE_DEEPSEEK:
|
217 |
+
LLM_MODEL = DEEPSEEK_MODEL
|
218 |
+
logger.info(f"λ‘컬 νκ²½: DeepSeek λͺ¨λΈ μ¬μ© ({DEEPSEEK_MODEL})")
|
219 |
+
elif USE_OPENAI:
|
220 |
+
LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo")
|
221 |
+
logger.info(f"λ‘컬 νκ²½: OpenAI λͺ¨λΈ μ¬μ© ({LLM_MODEL})")
|
222 |
+
else:
|
223 |
+
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
224 |
+
OLLAMA_HOST = get_env("OLLAMA_HOST", "http://localhost:11434")
|
225 |
+
logger.info(f"λ‘컬 νκ²½: Ollama λͺ¨λΈ μ¬μ© ({LLM_MODEL})")
|
226 |
+
|
227 |
+
# API ν€ κ²μ¦ (λ‘컬 νκ²½λ§)
|
228 |
+
if not IS_HUGGINGFACE:
|
229 |
+
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
|
230 |
+
logger.warning("DeepSeek λͺ¨λΈμ΄ μ νλμμ§λ§ API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
231 |
+
USE_DEEPSEEK = False
|
232 |
+
USE_OPENAI = False
|
233 |
+
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
234 |
+
logger.info("DeepSeek API ν€κ° μμ΄ Ollamaλ‘ ν΄λ°±ν©λλ€.")
|
235 |
+
elif USE_OPENAI and not OPENAI_API_KEY:
|
236 |
+
logger.warning("OpenAI λͺ¨λΈμ΄ μ νλμμ§λ§ API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
237 |
+
logger.warning("OpenAI API ν€κ° μμ΄ Ollamaλ‘ ν΄λ°±ν©λλ€.")
|
238 |
+
USE_OPENAI = False
|
239 |
+
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
|
240 |
|
241 |
# DeepSeek API ν
μ€νΈ ν¨μ
|
242 |
def test_deepseek_connection():
|
|
|
277 |
response = requests.post(
|
278 |
DEEPSEEK_ENDPOINT,
|
279 |
headers=headers,
|
280 |
+
json=payload,
|
281 |
timeout=10 # 10μ΄ νμμμ
|
282 |
)
|
283 |
|
|
|
326 |
"status_code": None
|
327 |
}
|
328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
# λ²‘ν° κ²μ μ€μ
|
330 |
TOP_K_RETRIEVAL = int(get_env("TOP_K_RETRIEVAL", "5")) # λ²‘ν° κ²μ κ²°κ³Ό μ
|
331 |
TOP_K_RERANK = int(get_env("TOP_K_RERANK", "3")) # 리λνΉ ν μ νν κ²°κ³Ό μ
|
|
|
338 |
def print_config():
|
339 |
"""νμ¬ μ€μ μ 보λ₯Ό λ‘κ·Έμ μΆλ ₯"""
|
340 |
logger.info("===== νμ¬ μ€μ μ 보 =====")
|
341 |
+
logger.info(f"μ€ν νκ²½: {'HuggingFace Spaces' if IS_HUGGINGFACE else 'λ‘컬'}")
|
342 |
logger.info(f"λ¬Έμ λλ ν 리: {PDF_DIRECTORY}")
|
343 |
logger.info(f"μΊμ λλ ν 리: {CACHE_DIRECTORY}")
|
344 |
logger.info(f"μ²ν¬ ν¬κΈ°: {CHUNK_SIZE}, μ€λ²λ©: {CHUNK_OVERLAP}")
|
345 |
logger.info(f"OpenAI μ¬μ©: {USE_OPENAI}")
|
346 |
logger.info(f"DeepSeek μ¬μ©: {USE_DEEPSEEK}")
|
347 |
logger.info(f"LLM λͺ¨λΈ: {LLM_MODEL}")
|
348 |
+
if not USE_OPENAI and not USE_DEEPSEEK and not IS_HUGGINGFACE:
|
349 |
+
logger.info(f"Ollama νΈμ€νΈ: {OLLAMA_HOST}")
|
350 |
logger.info(f"μλ² λ© λͺ¨λΈ: {EMBEDDING_MODEL}")
|
351 |
logger.info(f"리λ컀 λͺ¨λΈ: {RERANKER_MODEL}")
|
352 |
logger.info(f"TOP_K κ²μ: {TOP_K_RETRIEVAL}, 리λνΉ: {TOP_K_RERANK}")
|
|
|
366 |
if not os.path.exists(PDF_DIRECTORY):
|
367 |
warnings.append(f"PDF λλ ν 리({PDF_DIRECTORY})κ° μ‘΄μ¬νμ§ μμ΅λλ€.")
|
368 |
|
369 |
+
# API ν€ νμΈ (νκΉ
νμ΄μ€μ λ‘컬 νκ²½ ꡬλΆ)
|
370 |
+
if IS_HUGGINGFACE:
|
371 |
+
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
|
372 |
+
warnings.append("νκΉ
νμ΄μ€ νκ²½μμ DeepSeek μ¬μ©μ΄ μ€μ λμμ§λ§ API ν€κ° μ 곡λμ§ μμμ΅λλ€.")
|
373 |
+
else:
|
374 |
+
if USE_OPENAI and not OPENAI_API_KEY:
|
375 |
+
warnings.append("OpenAI μ¬μ©μ΄ μ€μ λμμ§λ§ API ν€κ° μ 곡λμ§ μμμ΅λλ€.")
|
376 |
|
377 |
+
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
|
378 |
+
warnings.append("DeepSeek μ¬μ©μ΄ μ€μ λμμ§λ§ API ν€κ° μ 곡λμ§ μμμ΅λλ€.")
|
379 |
|
380 |
# λͺ¨λΈ λ° μ€μ κ° νμΈ
|
381 |
if CHUNK_SIZE <= CHUNK_OVERLAP:
|
|
|
397 |
"warnings": warnings
|
398 |
}
|
399 |
|
400 |
+
# μ€μ λ‘λ μ μ€ν
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
print_config()
|
402 |
+
config_status = validate_config()
|
|
|
|
|
|
direct_deepseek.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""
|
2 |
-
μ§μ DeepSeek API νΈμΆμ μν ν΄λΌμ΄μΈνΈ ꡬν
|
3 |
"""
|
4 |
import os
|
5 |
import time
|
@@ -11,24 +11,58 @@ from typing import Dict, Any, Optional, List
|
|
11 |
# λ‘κΉ
μ€μ
|
12 |
logger = logging.getLogger("DirectDeepSeek")
|
13 |
|
|
|
|
|
|
|
14 |
class DirectDeepSeekClient:
|
15 |
"""
|
16 |
DeepSeek APIλ₯Ό μ§μ νΈμΆνλ ν΄λΌμ΄μΈνΈ
|
17 |
OpenAI ν΄λΌμ΄μΈνΈλ₯Ό μ°ννκ³ μ§μ HTTP μμ² μ¬μ©
|
|
|
18 |
"""
|
19 |
-
def __init__(self, api_key: str, model_name: str = "deepseek-chat"):
|
20 |
"""
|
21 |
ν΄λΌμ΄μΈνΈ μ΄κΈ°ν
|
22 |
|
23 |
Args:
|
24 |
-
api_key: DeepSeek API ν€
|
25 |
model_name: μ¬μ©ν λͺ¨λΈ μ΄λ¦ (κΈ°λ³Έκ°: "deepseek-chat")
|
26 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
self.api_key = api_key
|
28 |
self.model_name = model_name
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
logger.info(f"DirectDeepSeekClient μ΄κΈ°ν: λͺ¨λΈ={model_name}, μλν¬μΈνΈ={self.endpoint}")
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def generate(self,
|
33 |
prompt: str,
|
34 |
temperature: float = 0.3,
|
@@ -71,6 +105,16 @@ class DirectDeepSeekClient:
|
|
71 |
Returns:
|
72 |
μμ± κ²°κ³Ό λμ
λ리 (success, response, message λ±)
|
73 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
# API μμ² ν€λ λ° λ°μ΄ν°
|
75 |
headers = {
|
76 |
"Content-Type": "application/json",
|
@@ -235,14 +279,21 @@ if __name__ == "__main__":
|
|
235 |
# λ‘κΉ
μ€μ
|
236 |
logging.basicConfig(level=logging.INFO)
|
237 |
|
238 |
-
#
|
239 |
-
|
240 |
-
|
241 |
-
print("
|
242 |
-
|
|
|
|
|
243 |
|
244 |
# ν΄λΌμ΄μΈνΈ μμ±
|
245 |
-
client = DirectDeepSeekClient(
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
# κ°λ¨ν ν
μ€νΈ
|
248 |
response = client.generate("Hello, what can you do?")
|
|
|
1 |
"""
|
2 |
+
μ§μ DeepSeek API νΈμΆμ μν ν΄λΌμ΄μΈνΈ ꡬν - νκΉ
νμ΄μ€ νκ²½ μ§μ
|
3 |
"""
|
4 |
import os
|
5 |
import time
|
|
|
11 |
# λ‘κΉ
μ€μ
|
12 |
logger = logging.getLogger("DirectDeepSeek")
|
13 |
|
14 |
+
# νκ²½ κ°μ§
|
15 |
+
IS_HUGGINGFACE = os.getenv('SPACE_ID') is not None or os.getenv('SYSTEM') == 'spaces'
|
16 |
+
|
17 |
class DirectDeepSeekClient:
|
18 |
"""
|
19 |
DeepSeek APIλ₯Ό μ§μ νΈμΆνλ ν΄λΌμ΄μΈνΈ
|
20 |
OpenAI ν΄λΌμ΄μΈνΈλ₯Ό μ°ννκ³ μ§μ HTTP μμ² μ¬μ©
|
21 |
+
νκΉ
νμ΄μ€ νκ²½ μ§μ
|
22 |
"""
|
23 |
+
def __init__(self, api_key: Optional[str] = None, model_name: str = "deepseek-chat"):
|
24 |
"""
|
25 |
ν΄λΌμ΄μΈνΈ μ΄κΈ°ν
|
26 |
|
27 |
Args:
|
28 |
+
api_key: DeepSeek API ν€ (NoneμΈ κ²½μ° νκ²½λ³μμμ κ°μ Έμ΄)
|
29 |
model_name: μ¬μ©ν λͺ¨λΈ μ΄λ¦ (κΈ°λ³Έκ°: "deepseek-chat")
|
30 |
"""
|
31 |
+
# API ν€ μ€μ (νκΉ
νμ΄μ€ νκ²½ νμΈ)
|
32 |
+
if api_key is None:
|
33 |
+
if IS_HUGGINGFACE:
|
34 |
+
# νκΉ
νμ΄μ€ νκ²½μμλ μν¬λ¦Ώμμ κ°μ Έμ€κΈ° μλ
|
35 |
+
api_key = os.getenv('HF_SECRET_DEEPSEEK_API_KEY')
|
36 |
+
if not api_key:
|
37 |
+
# μν¬λ¦Ώμ΄ μμΌλ©΄ μΌλ° νκ²½λ³μ νμΈ
|
38 |
+
api_key = os.getenv("DEEPSEEK_API_KEY", "")
|
39 |
+
else:
|
40 |
+
# λ‘컬 νκ²½μμλ νκ²½λ³μ μ¬μ©
|
41 |
+
api_key = os.getenv("DEEPSEEK_API_KEY", "")
|
42 |
+
|
43 |
self.api_key = api_key
|
44 |
self.model_name = model_name
|
45 |
+
|
46 |
+
# μλν¬μΈνΈ μ€μ (νκΉ
νμ΄μ€ νκ²½ νμΈ)
|
47 |
+
if IS_HUGGINGFACE:
|
48 |
+
# νκΉ
νμ΄μ€ νκ²½μμλ μν¬λ¦Ώμμ κ°μ Έμ€κΈ° μλ
|
49 |
+
self.endpoint = os.getenv('HF_SECRET_DEEPSEEK_ENDPOINT')
|
50 |
+
if not self.endpoint:
|
51 |
+
# μν¬λ¦Ώμ΄ μμΌλ©΄ μΌλ° νκ²½λ³μ νμΈ
|
52 |
+
self.endpoint = os.getenv("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions")
|
53 |
+
else:
|
54 |
+
# λ‘컬 νκ²½μμλ νκ²½λ³μ μ¬μ©
|
55 |
+
self.endpoint = os.getenv("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions")
|
56 |
+
|
57 |
logger.info(f"DirectDeepSeekClient μ΄κΈ°ν: λͺ¨λΈ={model_name}, μλν¬μΈνΈ={self.endpoint}")
|
58 |
|
59 |
+
# API ν€ νμΈ
|
60 |
+
if not self.api_key:
|
61 |
+
if IS_HUGGINGFACE:
|
62 |
+
logger.warning("νκΉ
νμ΄μ€ νκ²½μμ DeepSeek API ν€κ° μ€μ λμ§ μμμ΅λλ€. Space μν¬λ¦Ώμ νμΈνμΈμ.")
|
63 |
+
else:
|
64 |
+
logger.warning("DeepSeek API ν€κ° μ€μ λμ§ μμμ΅λλ€. .env νμΌμ΄λ νκ²½λ³μλ₯Ό νμΈνμΈμ.")
|
65 |
+
|
66 |
def generate(self,
|
67 |
prompt: str,
|
68 |
temperature: float = 0.3,
|
|
|
105 |
Returns:
|
106 |
μμ± κ²°κ³Ό λμ
λ리 (success, response, message λ±)
|
107 |
"""
|
108 |
+
# API ν€ νμΈ
|
109 |
+
if not self.api_key:
|
110 |
+
error_msg = "DeepSeek API ν€κ° μ€μ λμ§ μμμ΅λλ€."
|
111 |
+
logger.error(error_msg)
|
112 |
+
return {
|
113 |
+
"success": False,
|
114 |
+
"message": error_msg,
|
115 |
+
"status_code": None
|
116 |
+
}
|
117 |
+
|
118 |
# API μμ² ν€λ λ° λ°μ΄ν°
|
119 |
headers = {
|
120 |
"Content-Type": "application/json",
|
|
|
279 |
# λ‘κΉ
μ€μ
|
280 |
logging.basicConfig(level=logging.INFO)
|
281 |
|
282 |
+
# νκΉ
νμ΄μ€ νκ²½ νμΈ
|
283 |
+
if IS_HUGGINGFACE:
|
284 |
+
print("νκΉ
νμ΄μ€ νκ²½μμ μ€ν μ€μ
λλ€.")
|
285 |
+
print("HF_SECRET_DEEPSEEK_API_KEY μν¬λ¦Ώ μ€μ μ΄ νμν©λλ€.")
|
286 |
+
else:
|
287 |
+
print("λ‘컬 νκ²½μμ μ€ν μ€μ
λλ€.")
|
288 |
+
print("DEEPSEEK_API_KEY νκ²½λ³μ μ€μ μ΄ νμν©λλ€.")
|
289 |
|
290 |
# ν΄λΌμ΄μΈνΈ μμ±
|
291 |
+
client = DirectDeepSeekClient()
|
292 |
+
|
293 |
+
# API ν€ νμΈ
|
294 |
+
if not client.api_key:
|
295 |
+
print("DeepSeek API ν€κ° μ€μ λμ§ μμμ΅λλ€.")
|
296 |
+
exit(1)
|
297 |
|
298 |
# κ°λ¨ν ν
μ€νΈ
|
299 |
response = client.generate("Hello, what can you do?")
|