Spaces:
Running
Running
Commit
·
10f44f6
1
Parent(s):
9da9717
4.16 attempt at mend stop
Browse files
app.py
CHANGED
@@ -992,7 +992,14 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
992 |
# Create processed_rows_df with all columns from original df and required columns
|
993 |
all_columns = list(set(list(df.columns) + list(required_columns.keys())))
|
994 |
processed_rows_df = pd.DataFrame(columns=all_columns)
|
995 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
996 |
# Process rows
|
997 |
total_rows = len(df)
|
998 |
processed_rows = 0
|
@@ -1017,7 +1024,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
1017 |
result_df['Сводка'] = svodka_df.to_dict('records')
|
1018 |
result_df['Публикации'] = processed_rows_df.to_dict('records')
|
1019 |
|
1020 |
-
output = create_output_file(
|
1021 |
if output is not None:
|
1022 |
st.download_button(
|
1023 |
label=f"📊 Скачать результат ({processed_rows} из {total_rows} строк)",
|
@@ -1525,7 +1532,7 @@ def main():
|
|
1525 |
st.set_page_config(layout="wide")
|
1526 |
|
1527 |
with st.sidebar:
|
1528 |
-
st.title("::: AI-анализ мониторинга новостей (v.4.
|
1529 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
1530 |
|
1531 |
model_choice = st.radio(
|
|
|
992 |
# Create processed_rows_df with all columns from original df and required columns
|
993 |
all_columns = list(set(list(df.columns) + list(required_columns.keys())))
|
994 |
processed_rows_df = pd.DataFrame(columns=all_columns)
|
995 |
+
|
996 |
+
# Deduplication
|
997 |
+
original_count = len(df)
|
998 |
+
df = df.groupby('Объект', group_keys=False).apply(
|
999 |
+
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 55)
|
1000 |
+
).reset_index(drop=True)
|
1001 |
+
st.write(f"Из {original_count} сообщений удалено {original_count - len(df)} дубликатов.")
|
1002 |
+
|
1003 |
# Process rows
|
1004 |
total_rows = len(df)
|
1005 |
processed_rows = 0
|
|
|
1024 |
result_df['Сводка'] = svodka_df.to_dict('records')
|
1025 |
result_df['Публикации'] = processed_rows_df.to_dict('records')
|
1026 |
|
1027 |
+
output = create_output_file(processed_rows_df, uploaded_file)
|
1028 |
if output is not None:
|
1029 |
st.download_button(
|
1030 |
label=f"📊 Скачать результат ({processed_rows} из {total_rows} строк)",
|
|
|
1532 |
st.set_page_config(layout="wide")
|
1533 |
|
1534 |
with st.sidebar:
|
1535 |
+
st.title("::: AI-анализ мониторинга новостей (v.4.16):::")
|
1536 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
1537 |
|
1538 |
model_choice = st.radio(
|