Spaces:
Running
Running
Commit
·
de89832
1
Parent(s):
01d0236
3.34
Browse files
app.py
CHANGED
@@ -16,135 +16,172 @@ import contextlib
|
|
16 |
from langchain_openai import ChatOpenAI # Updated import
|
17 |
import pdfkit
|
18 |
from jinja2 import Template
|
19 |
-
from googletrans import Translator as GoogleTranslator
|
20 |
import time
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class TranslationSystem:
|
23 |
-
def __init__(self, method='
|
24 |
"""
|
25 |
-
Initialize translation system with
|
26 |
|
27 |
Args:
|
28 |
-
method (str): '
|
29 |
llm: LangChain LLM instance (required if method is 'llm')
|
|
|
30 |
"""
|
31 |
self.method = method
|
32 |
self.llm = llm
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
self.google_translator.translate('test', src='en', dest='ru')
|
38 |
-
except Exception as e:
|
39 |
-
st.warning(f"Error initializing Google Translator: {str(e)}. Falling back to LLM translation.")
|
40 |
-
self.method = 'llm'
|
41 |
-
else:
|
42 |
-
self.google_translator = None
|
43 |
|
44 |
-
def
|
45 |
"""
|
46 |
-
|
47 |
-
|
48 |
-
Args:
|
49 |
-
text (str): Text to translate
|
50 |
-
src (str): Source language code
|
51 |
-
dest (str): Destination language code
|
52 |
-
|
53 |
-
Returns:
|
54 |
-
str: Translated text
|
55 |
"""
|
56 |
-
if
|
57 |
-
|
|
|
|
|
58 |
|
59 |
try:
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
67 |
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
"""
|
70 |
-
Translate
|
71 |
"""
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
return text
|
77 |
-
|
78 |
-
# Add delay to avoid rate limits
|
79 |
-
time.sleep(0.5)
|
80 |
|
81 |
-
|
82 |
-
max_retries = 3
|
83 |
-
for attempt in range(max_retries):
|
84 |
try:
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
89 |
except Exception as e:
|
90 |
-
|
91 |
-
|
92 |
-
time.sleep(1) # Wait before retry
|
93 |
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
st.warning(f"Googletrans error: {str(e)}. Falling back to LLM translation.")
|
100 |
-
return self._translate_with_llm(text, src, dest)
|
101 |
-
raise Exception(f"Googletrans error: {str(e)}")
|
102 |
-
|
103 |
-
def _translate_with_llm(self, text, src='ru', dest='en'):
|
104 |
"""
|
105 |
-
Translate
|
106 |
"""
|
107 |
-
if not
|
108 |
-
|
109 |
-
|
110 |
-
try:
|
111 |
-
# Clean input text
|
112 |
-
text = text.strip()
|
113 |
-
if not text:
|
114 |
-
return text
|
115 |
-
|
116 |
-
# Prepare system message based on language direction
|
117 |
-
if src == 'ru' and dest == 'en':
|
118 |
-
system_msg = "You are a translator. Translate the given Russian text to English accurately and concisely."
|
119 |
-
user_msg = f"Translate this Russian text to English: {text}"
|
120 |
-
elif src == 'en' and dest == 'ru':
|
121 |
-
system_msg = "You are a translator. Translate the given English text to Russian accurately and concisely."
|
122 |
-
user_msg = f"Translate this English text to Russian: {text}"
|
123 |
-
else:
|
124 |
-
raise Exception(f"Unsupported language pair: {src} to {dest}")
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
-
#
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
else:
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
143 |
|
144 |
-
return translation
|
145 |
-
|
146 |
except Exception as e:
|
147 |
-
raise Exception(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
def process_file(uploaded_file, model_choice, translation_method='googletrans'):
|
150 |
df = None
|
@@ -618,7 +655,7 @@ def create_output_file(df, uploaded_file, llm):
|
|
618 |
|
619 |
def main():
|
620 |
with st.sidebar:
|
621 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
622 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
623 |
|
624 |
model_choice = st.radio(
|
|
|
16 |
from langchain_openai import ChatOpenAI # Updated import
|
17 |
import pdfkit
|
18 |
from jinja2 import Template
|
|
|
19 |
import time
|
20 |
+
from tenacity import retry, stop_after_attempt, wait_exponential
|
21 |
+
from typing import Optional
|
22 |
+
from deep_translator import GoogleTranslator as DeepGoogleTranslator
|
23 |
+
from googletrans import Translator as LegacyTranslator
|
24 |
|
25 |
class TranslationSystem:
|
26 |
+
def __init__(self, method='auto', llm=None, batch_size=10):
|
27 |
"""
|
28 |
+
Initialize translation system with multiple fallback options.
|
29 |
|
30 |
Args:
|
31 |
+
method (str): 'auto', 'deep-google', or 'llm'
|
32 |
llm: LangChain LLM instance (required if method is 'llm')
|
33 |
+
batch_size (int): Number of texts to process in each batch
|
34 |
"""
|
35 |
self.method = method
|
36 |
self.llm = llm
|
37 |
+
self.batch_size = batch_size
|
38 |
+
self.rate_limiter = RateLimitHandler()
|
39 |
+
self.translator = None
|
40 |
+
self._initialize_translator()
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
def _initialize_translator(self):
|
43 |
"""
|
44 |
+
Initialize translator with fallback options.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
"""
|
46 |
+
if self.method == 'llm':
|
47 |
+
if not self.llm:
|
48 |
+
raise Exception("LLM must be provided when using 'llm' method")
|
49 |
+
return
|
50 |
|
51 |
try:
|
52 |
+
# Try deep-translator first (more stable)
|
53 |
+
self.translator = DeepGoogleTranslator()
|
54 |
+
self.method = 'deep-google'
|
55 |
+
# Test translation
|
56 |
+
test_result = self.translator.translate(text='test', source='en', target='ru')
|
57 |
+
if not test_result:
|
58 |
+
raise Exception("Deep translator test failed")
|
59 |
+
|
60 |
+
except Exception as deep_e:
|
61 |
+
st.warning(f"Deep-translator initialization failed: {str(deep_e)}")
|
62 |
|
63 |
+
if self.method != 'llm' and self.llm:
|
64 |
+
st.info("Falling back to LLM translation")
|
65 |
+
self.method = 'llm'
|
66 |
+
else:
|
67 |
+
raise Exception("No translation method available")
|
68 |
+
|
69 |
+
def translate_batch(self, texts, src='ru', dest='en'):
|
70 |
"""
|
71 |
+
Translate a batch of texts with fallback options.
|
72 |
"""
|
73 |
+
translations = []
|
74 |
+
for i in range(0, len(texts), self.batch_size):
|
75 |
+
batch = texts[i:i + self.batch_size]
|
76 |
+
batch_translations = []
|
|
|
|
|
|
|
|
|
77 |
|
78 |
+
for text in batch:
|
|
|
|
|
79 |
try:
|
80 |
+
translation = self.rate_limiter.execute_with_retry(
|
81 |
+
self._translate_single_text,
|
82 |
+
text,
|
83 |
+
src,
|
84 |
+
dest
|
85 |
+
)
|
86 |
+
batch_translations.append(translation)
|
87 |
except Exception as e:
|
88 |
+
st.warning(f"Translation error: {str(e)}. Using original text.")
|
89 |
+
batch_translations.append(text)
|
|
|
90 |
|
91 |
+
# If deep-google fails, try falling back to LLM
|
92 |
+
if self.method == 'deep-google' and self.llm:
|
93 |
+
try:
|
94 |
+
st.info("Attempting LLM translation fallback...")
|
95 |
+
self.method = 'llm'
|
96 |
+
translation = self._translate_single_text(text, src, dest)
|
97 |
+
batch_translations[-1] = translation # Replace original text with translation
|
98 |
+
except Exception as llm_e:
|
99 |
+
st.warning(f"LLM fallback failed: {str(llm_e)}")
|
100 |
+
|
101 |
+
translations.extend(batch_translations)
|
102 |
+
time.sleep(1) # Small delay between batches
|
103 |
|
104 |
+
return translations
|
105 |
+
|
106 |
+
def _translate_single_text(self, text, src='ru', dest='en'):
|
|
|
|
|
|
|
|
|
|
|
107 |
"""
|
108 |
+
Translate a single text with appropriate method.
|
109 |
"""
|
110 |
+
if pd.isna(text) or not isinstance(text, str) or not text.strip():
|
111 |
+
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
text = text.strip()
|
114 |
+
|
115 |
+
if self.method == 'llm':
|
116 |
+
return self._translate_with_llm(text, src, dest)
|
117 |
+
elif self.method == 'deep-google':
|
118 |
+
return self._translate_with_deep_google(text, src, dest)
|
119 |
+
else:
|
120 |
+
raise Exception(f"Unsupported translation method: {self.method}")
|
121 |
|
122 |
+
def _translate_with_deep_google(self, text, src='ru', dest='en'):
|
123 |
+
"""
|
124 |
+
Translate using deep-translator's Google Translate.
|
125 |
+
"""
|
126 |
+
try:
|
127 |
+
# deep-translator uses different language codes
|
128 |
+
src = 'auto' if src == 'auto' else src.lower()
|
129 |
+
dest = dest.lower()
|
130 |
|
131 |
+
# Split long texts (deep-translator has a character limit)
|
132 |
+
max_length = 5000
|
133 |
+
if len(text) > max_length:
|
134 |
+
chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
|
135 |
+
translated_chunks = []
|
136 |
+
for chunk in chunks:
|
137 |
+
translated_chunk = self.translator.translate(
|
138 |
+
text=chunk,
|
139 |
+
source=src,
|
140 |
+
target=dest
|
141 |
+
)
|
142 |
+
translated_chunks.append(translated_chunk)
|
143 |
+
return ' '.join(translated_chunks)
|
144 |
else:
|
145 |
+
return self.translator.translate(
|
146 |
+
text=text,
|
147 |
+
source=src,
|
148 |
+
target=dest
|
149 |
+
)
|
150 |
|
|
|
|
|
151 |
except Exception as e:
|
152 |
+
raise Exception(f"Deep-translator error: {str(e)}")
|
153 |
+
|
154 |
+
def _translate_with_llm(self, text, src='ru', dest='en'):
|
155 |
+
"""
|
156 |
+
Translate using LangChain LLM.
|
157 |
+
"""
|
158 |
+
if not self.llm:
|
159 |
+
raise Exception("LLM not initialized for translation")
|
160 |
+
|
161 |
+
messages = [
|
162 |
+
{"role": "system", "content": "You are a translator. Translate the given text accurately and concisely."},
|
163 |
+
{"role": "user", "content": f"Translate this text from {src} to {dest}: {text}"}
|
164 |
+
]
|
165 |
+
|
166 |
+
response = self.llm.invoke(messages)
|
167 |
+
return response.content.strip() if hasattr(response, 'content') else str(response).strip()
|
168 |
+
|
169 |
+
def init_translation_system(model_choice, translation_method='auto'):
|
170 |
+
"""
|
171 |
+
Initialize translation system with appropriate configuration.
|
172 |
+
"""
|
173 |
+
llm = init_langchain_llm(model_choice) if translation_method != 'deep-google' else None
|
174 |
+
|
175 |
+
try:
|
176 |
+
translator = TranslationSystem(
|
177 |
+
method=translation_method,
|
178 |
+
llm=llm,
|
179 |
+
batch_size=5
|
180 |
+
)
|
181 |
+
return translator
|
182 |
+
except Exception as e:
|
183 |
+
st.error(f"Failed to initialize translation system: {str(e)}")
|
184 |
+
raise
|
185 |
|
186 |
def process_file(uploaded_file, model_choice, translation_method='googletrans'):
|
187 |
df = None
|
|
|
655 |
|
656 |
def main():
|
657 |
with st.sidebar:
|
658 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.34 ):::")
|
659 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
660 |
|
661 |
model_choice = st.radio(
|