pentarosarium commited on
Commit
6177fd7
·
1 Parent(s): 5037fb3

progress more 41+

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -54,18 +54,19 @@ translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
54
 
55
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ru-en")
56
 
 
57
  def translate(text):
58
  # Tokenize the input text
59
  inputs = translation_tokenizer(text, return_tensors="pt", truncation=True)
60
 
61
- # Calculate max_length based on input length (you may need to adjust this ratio)
62
  input_length = inputs.input_ids.shape[1]
63
- max_length = min(512, int(input_length * 1.5))
64
 
65
  # Generate translation
66
  translated_tokens = translation_model.generate(
67
  **inputs,
68
- max_length=max_length,
69
  num_beams=5,
70
  no_repeat_ngram_size=2,
71
  early_stopping=True
@@ -75,7 +76,6 @@ def translate(text):
75
  translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
76
  return translated_text
77
 
78
-
79
  # Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
80
  def get_mapped_sentiment(result):
81
  label = result['label'].lower()
@@ -249,7 +249,7 @@ def create_output_file(df, uploaded_file, analysis_df):
249
  return output
250
 
251
  def main():
252
- st.title("... приступим к анализу... версия 40+")
253
 
254
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
255
 
 
54
 
55
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ru-en")
56
 
57
+
58
  def translate(text):
59
  # Tokenize the input text
60
  inputs = translation_tokenizer(text, return_tensors="pt", truncation=True)
61
 
62
+ # Calculate max_length based on input length
63
  input_length = inputs.input_ids.shape[1]
64
+ max_length = max(input_length + 10, int(input_length * 1.5)) # Ensure at least 10 new tokens
65
 
66
  # Generate translation
67
  translated_tokens = translation_model.generate(
68
  **inputs,
69
+ max_new_tokens=max_length, # Use max_new_tokens instead of max_length
70
  num_beams=5,
71
  no_repeat_ngram_size=2,
72
  early_stopping=True
 
76
  translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
77
  return translated_text
78
 
 
79
  # Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
80
  def get_mapped_sentiment(result):
81
  label = result['label'].lower()
 
249
  return output
250
 
251
  def main():
252
+ st.title("... приступим к анализу... версия 41+")
253
 
254
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
255