pentarosarium commited on
Commit
de89832
·
1 Parent(s): 01d0236
Files changed (1) hide show
  1. app.py +136 -99
app.py CHANGED
@@ -16,135 +16,172 @@ import contextlib
16
  from langchain_openai import ChatOpenAI # Updated import
17
  import pdfkit
18
  from jinja2 import Template
19
- from googletrans import Translator as GoogleTranslator
20
  import time
 
 
 
 
21
 
22
  class TranslationSystem:
23
- def __init__(self, method='googletrans', llm=None):
24
  """
25
- Initialize translation system with specified method.
26
 
27
  Args:
28
- method (str): 'googletrans' or 'llm'
29
  llm: LangChain LLM instance (required if method is 'llm')
 
30
  """
31
  self.method = method
32
  self.llm = llm
33
- if method == 'googletrans':
34
- try:
35
- self.google_translator = GoogleTranslator()
36
- # Test the translator with a simple string
37
- self.google_translator.translate('test', src='en', dest='ru')
38
- except Exception as e:
39
- st.warning(f"Error initializing Google Translator: {str(e)}. Falling back to LLM translation.")
40
- self.method = 'llm'
41
- else:
42
- self.google_translator = None
43
 
44
- def translate_text(self, text, src='ru', dest='en'):
45
  """
46
- Translate text using the selected translation method.
47
-
48
- Args:
49
- text (str): Text to translate
50
- src (str): Source language code
51
- dest (str): Destination language code
52
-
53
- Returns:
54
- str: Translated text
55
  """
56
- if pd.isna(text) or not isinstance(text, str) or not text.strip():
57
- return text
 
 
58
 
59
  try:
60
- if self.method == 'googletrans' and self.google_translator:
61
- return self._translate_with_googletrans(text, src, dest)
62
- else:
63
- return self._translate_with_llm(text, src, dest)
64
- except Exception as e:
65
- st.warning(f"Translation error: {str(e)}. Returning original text.")
66
- return text
 
 
 
67
 
68
- def _translate_with_googletrans(self, text, src='ru', dest='en'):
 
 
 
 
 
 
69
  """
70
- Translate using googletrans library with improved error handling.
71
  """
72
- try:
73
- # Clean and validate input text
74
- text = text.strip()
75
- if not text:
76
- return text
77
-
78
- # Add delay to avoid rate limits
79
- time.sleep(0.5)
80
 
81
- # Attempt translation with retry logic
82
- max_retries = 3
83
- for attempt in range(max_retries):
84
  try:
85
- result = self.google_translator.translate(text, src=src, dest=dest)
86
- if result and result.text:
87
- return result.text
88
- raise Exception("Empty translation result")
 
 
 
89
  except Exception as e:
90
- if attempt == max_retries - 1:
91
- raise
92
- time.sleep(1) # Wait before retry
93
 
94
- raise Exception("All translation attempts failed")
 
 
 
 
 
 
 
 
 
 
 
95
 
96
- except Exception as e:
97
- # If googletrans fails, fall back to LLM translation
98
- if self.llm:
99
- st.warning(f"Googletrans error: {str(e)}. Falling back to LLM translation.")
100
- return self._translate_with_llm(text, src, dest)
101
- raise Exception(f"Googletrans error: {str(e)}")
102
-
103
- def _translate_with_llm(self, text, src='ru', dest='en'):
104
  """
105
- Translate using LangChain LLM with improved error handling.
106
  """
107
- if not self.llm:
108
- raise Exception("LLM not initialized for translation")
109
-
110
- try:
111
- # Clean input text
112
- text = text.strip()
113
- if not text:
114
- return text
115
-
116
- # Prepare system message based on language direction
117
- if src == 'ru' and dest == 'en':
118
- system_msg = "You are a translator. Translate the given Russian text to English accurately and concisely."
119
- user_msg = f"Translate this Russian text to English: {text}"
120
- elif src == 'en' and dest == 'ru':
121
- system_msg = "You are a translator. Translate the given English text to Russian accurately and concisely."
122
- user_msg = f"Translate this English text to Russian: {text}"
123
- else:
124
- raise Exception(f"Unsupported language pair: {src} to {dest}")
125
 
126
- messages = [
127
- {"role": "system", "content": system_msg},
128
- {"role": "user", "content": user_msg}
129
- ]
 
 
 
 
130
 
131
- response = self.llm.invoke(messages)
 
 
 
 
 
 
 
132
 
133
- # Handle different response types
134
- if hasattr(response, 'content'):
135
- translation = response.content.strip()
136
- elif isinstance(response, str):
137
- translation = response.strip()
 
 
 
 
 
 
 
 
138
  else:
139
- translation = str(response).strip()
140
-
141
- if not translation:
142
- raise Exception("Empty translation result")
 
143
 
144
- return translation
145
-
146
  except Exception as e:
147
- raise Exception(f"LLM translation error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  def process_file(uploaded_file, model_choice, translation_method='googletrans'):
150
  df = None
@@ -618,7 +655,7 @@ def create_output_file(df, uploaded_file, llm):
618
 
619
  def main():
620
  with st.sidebar:
621
- st.title("::: AI-анализ мониторинга новостей (v.3.33 ):::")
622
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
623
 
624
  model_choice = st.radio(
 
16
  from langchain_openai import ChatOpenAI # Updated import
17
  import pdfkit
18
  from jinja2 import Template
 
19
  import time
20
+ from tenacity import retry, stop_after_attempt, wait_exponential
21
+ from typing import Optional
22
+ from deep_translator import GoogleTranslator as DeepGoogleTranslator
23
+ from googletrans import Translator as LegacyTranslator
24
 
25
  class TranslationSystem:
26
+ def __init__(self, method='auto', llm=None, batch_size=10):
27
  """
28
+ Initialize translation system with multiple fallback options.
29
 
30
  Args:
31
+ method (str): 'auto', 'deep-google', or 'llm'
32
  llm: LangChain LLM instance (required if method is 'llm')
33
+ batch_size (int): Number of texts to process in each batch
34
  """
35
  self.method = method
36
  self.llm = llm
37
+ self.batch_size = batch_size
38
+ self.rate_limiter = RateLimitHandler()
39
+ self.translator = None
40
+ self._initialize_translator()
 
 
 
 
 
 
41
 
42
+ def _initialize_translator(self):
43
  """
44
+ Initialize translator with fallback options.
 
 
 
 
 
 
 
 
45
  """
46
+ if self.method == 'llm':
47
+ if not self.llm:
48
+ raise Exception("LLM must be provided when using 'llm' method")
49
+ return
50
 
51
  try:
52
+ # Try deep-translator first (more stable)
53
+ self.translator = DeepGoogleTranslator()
54
+ self.method = 'deep-google'
55
+ # Test translation
56
+ test_result = self.translator.translate(text='test', source='en', target='ru')
57
+ if not test_result:
58
+ raise Exception("Deep translator test failed")
59
+
60
+ except Exception as deep_e:
61
+ st.warning(f"Deep-translator initialization failed: {str(deep_e)}")
62
 
63
+ if self.method != 'llm' and self.llm:
64
+ st.info("Falling back to LLM translation")
65
+ self.method = 'llm'
66
+ else:
67
+ raise Exception("No translation method available")
68
+
69
+ def translate_batch(self, texts, src='ru', dest='en'):
70
  """
71
+ Translate a batch of texts with fallback options.
72
  """
73
+ translations = []
74
+ for i in range(0, len(texts), self.batch_size):
75
+ batch = texts[i:i + self.batch_size]
76
+ batch_translations = []
 
 
 
 
77
 
78
+ for text in batch:
 
 
79
  try:
80
+ translation = self.rate_limiter.execute_with_retry(
81
+ self._translate_single_text,
82
+ text,
83
+ src,
84
+ dest
85
+ )
86
+ batch_translations.append(translation)
87
  except Exception as e:
88
+ st.warning(f"Translation error: {str(e)}. Using original text.")
89
+ batch_translations.append(text)
 
90
 
91
+ # If deep-google fails, try falling back to LLM
92
+ if self.method == 'deep-google' and self.llm:
93
+ try:
94
+ st.info("Attempting LLM translation fallback...")
95
+ self.method = 'llm'
96
+ translation = self._translate_single_text(text, src, dest)
97
+ batch_translations[-1] = translation # Replace original text with translation
98
+ except Exception as llm_e:
99
+ st.warning(f"LLM fallback failed: {str(llm_e)}")
100
+
101
+ translations.extend(batch_translations)
102
+ time.sleep(1) # Small delay between batches
103
 
104
+ return translations
105
+
106
+ def _translate_single_text(self, text, src='ru', dest='en'):
 
 
 
 
 
107
  """
108
+ Translate a single text with appropriate method.
109
  """
110
+ if pd.isna(text) or not isinstance(text, str) or not text.strip():
111
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ text = text.strip()
114
+
115
+ if self.method == 'llm':
116
+ return self._translate_with_llm(text, src, dest)
117
+ elif self.method == 'deep-google':
118
+ return self._translate_with_deep_google(text, src, dest)
119
+ else:
120
+ raise Exception(f"Unsupported translation method: {self.method}")
121
 
122
+ def _translate_with_deep_google(self, text, src='ru', dest='en'):
123
+ """
124
+ Translate using deep-translator's Google Translate.
125
+ """
126
+ try:
127
+ # deep-translator uses different language codes
128
+ src = 'auto' if src == 'auto' else src.lower()
129
+ dest = dest.lower()
130
 
131
+ # Split long texts (deep-translator has a character limit)
132
+ max_length = 5000
133
+ if len(text) > max_length:
134
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
135
+ translated_chunks = []
136
+ for chunk in chunks:
137
+ translated_chunk = self.translator.translate(
138
+ text=chunk,
139
+ source=src,
140
+ target=dest
141
+ )
142
+ translated_chunks.append(translated_chunk)
143
+ return ' '.join(translated_chunks)
144
  else:
145
+ return self.translator.translate(
146
+ text=text,
147
+ source=src,
148
+ target=dest
149
+ )
150
 
 
 
151
  except Exception as e:
152
+ raise Exception(f"Deep-translator error: {str(e)}")
153
+
154
+ def _translate_with_llm(self, text, src='ru', dest='en'):
155
+ """
156
+ Translate using LangChain LLM.
157
+ """
158
+ if not self.llm:
159
+ raise Exception("LLM not initialized for translation")
160
+
161
+ messages = [
162
+ {"role": "system", "content": "You are a translator. Translate the given text accurately and concisely."},
163
+ {"role": "user", "content": f"Translate this text from {src} to {dest}: {text}"}
164
+ ]
165
+
166
+ response = self.llm.invoke(messages)
167
+ return response.content.strip() if hasattr(response, 'content') else str(response).strip()
168
+
169
+ def init_translation_system(model_choice, translation_method='auto'):
170
+ """
171
+ Initialize translation system with appropriate configuration.
172
+ """
173
+ llm = init_langchain_llm(model_choice) if translation_method != 'deep-google' else None
174
+
175
+ try:
176
+ translator = TranslationSystem(
177
+ method=translation_method,
178
+ llm=llm,
179
+ batch_size=5
180
+ )
181
+ return translator
182
+ except Exception as e:
183
+ st.error(f"Failed to initialize translation system: {str(e)}")
184
+ raise
185
 
186
  def process_file(uploaded_file, model_choice, translation_method='googletrans'):
187
  df = None
 
655
 
656
  def main():
657
  with st.sidebar:
658
+ st.title("::: AI-анализ мониторинга новостей (v.3.34 ):::")
659
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
660
 
661
  model_choice = st.radio(