File size: 4,642 Bytes
3500b1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
"""
λμμ΄ μ²λ¦¬ λͺ¨λ
"""
import os
import sys
import re
from typing import Dict, List, Optional, Set
# κΈ°λ³Έ λμμ΄ μ¬μ (MP_synonyms.py νμΌμ΄ μμ κ²½μ° μ¬μ©)
DEFAULT_SYNONYMS = {
"μμΈλ μ΄ν°": "μ‘μΈμμ΄ν°",
"μ‘μΈμμ΄ν°": "μ‘μΈμμ΄ν°",
"λͺ¨ν°": "μ‘μΈμμ΄ν°",
"컨λ°": "컨νΈλ‘€λ°μ€"
}
class SynonymsHandler:
"""
λΆνλͺ
μ λμμ΄λ₯Ό μ²λ¦¬νλ ν΄λμ€
"""
def __init__(self, synonyms_file: Optional[str] = None):
"""
λμμ΄ νΈλ€λ¬ μ΄κΈ°ν
Args:
synonyms_file: λμμ΄ νμΌ κ²½λ‘ (μ νμ )
"""
self.synonyms = {}
self.loaded = False
# 1. κΈ°λ³Έ μ 곡λ νμΌ κ²½λ‘ νμΈ
if synonyms_file and os.path.exists(synonyms_file):
self._load_from_file(synonyms_file)
# 2. μΌλ°μ μΈ μμΉ νμΈ (.venv/SYNONYMS/MP_synonyms.py)
elif os.path.exists(".venv/SYNONYMS/MP_synonyms.py"):
self._load_from_file(".venv/SYNONYMS/MP_synonyms.py")
# 3. νμ¬ λλ ν 리 νμΈ
elif os.path.exists("MP_synonyms.py"):
self._load_from_file("MP_synonyms.py")
# 4. κΈ°λ³Έ λμμ΄ μ¬μ©
else:
print("λμμ΄ νμΌμ μ°Ύμ μ μμ΄ κΈ°λ³Έ λμμ΄ μ¬μ μ μ¬μ©ν©λλ€.")
self.synonyms = DEFAULT_SYNONYMS
self.loaded = True
def _load_from_file(self, file_path: str) -> None:
"""
νμΌμμ λμμ΄ μ¬μ λ‘λ
Args:
file_path: λμμ΄ νμΌ κ²½λ‘
"""
try:
# νμΌ λ΄μ© μ½κΈ°
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# SYNONYMS λμ
λ리 μΆμΆ
synonyms_match = re.search(r'SYNONYMS\s*=\s*\{(.*?)\}', content, re.DOTALL)
if synonyms_match:
# μ€ννμ§ μκ³ λ³ννλ λ°©λ²
synonyms_str = "{" + synonyms_match.group(1) + "}"
# μ κ·μμ μ¬μ©νμ¬ λμ
λ리 ννλ‘ νμ±
pattern = r'"([^"]*)"\s*:\s*"([^"]*)"'
matches = re.findall(pattern, synonyms_str)
self.synonyms = {key: value for key, value in matches}
self.loaded = True
print(f"λμμ΄ μ¬μ λ‘λ μλ£: {file_path}, {len(self.synonyms)}κ° νλͺ©")
else:
print(f"νμΌμμ SYNONYMS λμ
λ리λ₯Ό μ°Ύμ μ μμ΅λλ€: {file_path}")
self.synonyms = DEFAULT_SYNONYMS
self.loaded = True
except Exception as e:
print(f"λμμ΄ μ¬μ λ‘λ μ€ μ€λ₯: {e}")
self.synonyms = DEFAULT_SYNONYMS
self.loaded = True
def find_in_text(self, text: str) -> List[str]:
"""
ν
μ€νΈμμ λμμ΄ μ°ΎκΈ°
Args:
text: κ²μν ν
μ€νΈ
Returns:
μ°Ύμ νμ€νλ λΆνλͺ
리μ€νΈ
"""
if not text or not self.loaded:
return []
# 곡백 μ κ±° λ° μλ¬Έμ λ³ν
text = text.lower()
found_parts = set()
# λμμ΄ ν€μλκ° ν
μ€νΈμ ν¬ν¨λμ΄ μλμ§ νμΈ
for keyword, standard_name in self.synonyms.items():
if keyword.lower() in text:
found_parts.add(standard_name)
return list(found_parts)
def standardize(self, part_name: str) -> str:
"""
λΆνλͺ
μ νμ€ν
Args:
part_name: νμ€νν λΆνλͺ
Returns:
νμ€νλ λΆνλͺ
"""
if not part_name or not self.loaded:
return part_name
# μλ¬Έμ λ³ννμ¬ λΉκ΅
part_lower = part_name.lower().strip()
# λμμ΄ μ¬μ μμ κ²μ
for keyword, standard_name in self.synonyms.items():
if part_lower == keyword.lower():
return standard_name
# λ§€μΉλμ§ μμΌλ©΄ μλ μ΄λ¦ λ°ν
return part_name
def standardize_parts_list(self, parts: List[str]) -> List[str]:
"""
λΆνλͺ
리μ€νΈλ₯Ό νμ€ν
Args:
parts: νμ€νν λΆνλͺ
리μ€νΈ
Returns:
νμ€νλ λΆνλͺ
리μ€νΈ
"""
if not parts or not self.loaded:
return parts
standardized = set()
for part in parts:
if part:
standardized.add(self.standardize(part))
return list(standardized) |