pentarosarium commited on
Commit
10f44f6
·
1 Parent(s): 9da9717

4.16 attempt at mend stop

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -992,7 +992,14 @@ def process_file(uploaded_file, model_choice, translation_method=None):
992
  # Create processed_rows_df with all columns from original df and required columns
993
  all_columns = list(set(list(df.columns) + list(required_columns.keys())))
994
  processed_rows_df = pd.DataFrame(columns=all_columns)
995
-
 
 
 
 
 
 
 
996
  # Process rows
997
  total_rows = len(df)
998
  processed_rows = 0
@@ -1017,7 +1024,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
1017
  result_df['Сводка'] = svodka_df.to_dict('records')
1018
  result_df['Публикации'] = processed_rows_df.to_dict('records')
1019
 
1020
- output = create_output_file(result_df, uploaded_file)
1021
  if output is not None:
1022
  st.download_button(
1023
  label=f"📊 Скачать результат ({processed_rows} из {total_rows} строк)",
@@ -1525,7 +1532,7 @@ def main():
1525
  st.set_page_config(layout="wide")
1526
 
1527
  with st.sidebar:
1528
- st.title("::: AI-анализ мониторинга новостей (v.4.15):::")
1529
  st.subheader("по материалам СКАН-ИНТЕРФАКС")
1530
 
1531
  model_choice = st.radio(
 
992
  # Create processed_rows_df with all columns from original df and required columns
993
  all_columns = list(set(list(df.columns) + list(required_columns.keys())))
994
  processed_rows_df = pd.DataFrame(columns=all_columns)
995
+
996
+ # Deduplication
997
+ original_count = len(df)
998
+ df = df.groupby('Объект', group_keys=False).apply(
999
+ lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 55)
1000
+ ).reset_index(drop=True)
1001
+ st.write(f"Из {original_count} сообщений удалено {original_count - len(df)} дубликатов.")
1002
+
1003
  # Process rows
1004
  total_rows = len(df)
1005
  processed_rows = 0
 
1024
  result_df['Сводка'] = svodka_df.to_dict('records')
1025
  result_df['Публикации'] = processed_rows_df.to_dict('records')
1026
 
1027
+ output = create_output_file(processed_rows_df, uploaded_file)
1028
  if output is not None:
1029
  st.download_button(
1030
  label=f"📊 Скачать результат ({processed_rows} из {total_rows} строк)",
 
1532
  st.set_page_config(layout="wide")
1533
 
1534
  with st.sidebar:
1535
+ st.title("::: AI-анализ мониторинга новостей (v.4.16):::")
1536
  st.subheader("по материалам СКАН-ИНТЕРФАКС")
1537
 
1538
  model_choice = st.radio(