Spaces:
Sleeping
Sleeping
Commit
·
ff8256a
1
Parent(s):
446a37d
back 2 async fix2
Browse files
app.py
CHANGED
@@ -768,6 +768,133 @@ def create_output_file(df, uploaded_file):
|
|
768 |
return None
|
769 |
|
770 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
771 |
def create_interface():
|
772 |
control = ProcessControl()
|
773 |
|
@@ -775,7 +902,7 @@ def create_interface():
|
|
775 |
# Create state for file data
|
776 |
current_file = gr.State(None)
|
777 |
|
778 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.2.
|
779 |
|
780 |
with gr.Row():
|
781 |
file_input = gr.File(
|
@@ -825,7 +952,6 @@ def create_interface():
|
|
825 |
with gr.Column(scale=1):
|
826 |
events_plot = gr.Plot(label="Распределение событий")
|
827 |
|
828 |
-
# Create a download row with file component only
|
829 |
with gr.Row():
|
830 |
file_output = gr.File(
|
831 |
label="Скачать результаты",
|
@@ -836,125 +962,10 @@ def create_interface():
|
|
836 |
def stop_processing():
|
837 |
control.request_stop()
|
838 |
return "Остановка обработки..."
|
839 |
-
|
840 |
-
|
841 |
-
@spaces.GPU(duration=300)
|
842 |
-
async def process_and_download(file_bytes):
|
843 |
-
if file_bytes is None:
|
844 |
-
gr.Warning("Пожалуйста, загрузите файл")
|
845 |
-
yield (pd.DataFrame(), None, None, None, "Ожидание файла...", "")
|
846 |
-
return
|
847 |
-
|
848 |
-
detector = None
|
849 |
-
gpu_manager = GPUTaskManager(
|
850 |
-
max_retries=3,
|
851 |
-
retry_delay=30,
|
852 |
-
cleanup_callback=lambda: detector.cleanup() if detector else None
|
853 |
-
)
|
854 |
-
|
855 |
-
try:
|
856 |
-
file_obj = io.BytesIO(file_bytes)
|
857 |
-
logger.info("File loaded into BytesIO successfully")
|
858 |
-
|
859 |
-
detector = EventDetector()
|
860 |
-
|
861 |
-
# Read and deduplicate data with retry
|
862 |
-
async def read_and_dedupe():
|
863 |
-
df = pd.read_excel(file_obj, sheet_name='Публикации')
|
864 |
-
original_count = len(df)
|
865 |
-
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
866 |
-
return df, original_count
|
867 |
-
|
868 |
-
df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
|
869 |
-
|
870 |
-
# Process in smaller batches with better error handling
|
871 |
-
processed_rows = []
|
872 |
-
batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
|
873 |
-
|
874 |
-
for batch in batches:
|
875 |
-
if control.should_stop():
|
876 |
-
break
|
877 |
-
|
878 |
-
try:
|
879 |
-
# Process batch with retry mechanism
|
880 |
-
async def process_batch():
|
881 |
-
batch_results = []
|
882 |
-
for idx, row in batch:
|
883 |
-
text = str(row.get('Выдержки из текста', '')).strip()
|
884 |
-
entity = str(row.get('Объект', '')).strip()
|
885 |
-
|
886 |
-
if text and entity:
|
887 |
-
results = detector.process_text(text, entity)
|
888 |
-
batch_results.append({
|
889 |
-
'Объект': entity,
|
890 |
-
'Заголовок': str(row.get('Заголовок', '')),
|
891 |
-
'Translated': results['translated_text'],
|
892 |
-
'Sentiment': results['sentiment'],
|
893 |
-
'Impact': results['impact'],
|
894 |
-
'Reasoning': results['reasoning'],
|
895 |
-
'Event_Type': results['event_type'],
|
896 |
-
'Event_Summary': results['event_summary'],
|
897 |
-
'Выдержки из текста': text
|
898 |
-
})
|
899 |
-
return batch_results
|
900 |
-
|
901 |
-
batch_results = await gpu_manager.run_with_retry(process_batch)
|
902 |
-
processed_rows.extend(batch_results)
|
903 |
-
|
904 |
-
# Create intermediate results
|
905 |
-
if processed_rows:
|
906 |
-
result_df = pd.DataFrame(processed_rows)
|
907 |
-
yield (
|
908 |
-
result_df,
|
909 |
-
None, None, None,
|
910 |
-
f"Обработано {len(processed_rows)}/{len(df)} строк",
|
911 |
-
f"Удалено {original_count - len(df)} дубликатов"
|
912 |
-
)
|
913 |
-
|
914 |
-
except Exception as e:
|
915 |
-
if gpu_manager.is_gpu_error(e):
|
916 |
-
logger.warning(f"GPU error in batch processing: {str(e)}")
|
917 |
-
continue
|
918 |
-
else:
|
919 |
-
logger.error(f"Non-GPU error in batch processing: {str(e)}")
|
920 |
-
|
921 |
-
finally:
|
922 |
-
torch.cuda.empty_cache()
|
923 |
-
|
924 |
-
# Create final results
|
925 |
-
if processed_rows:
|
926 |
-
result_df = pd.DataFrame(processed_rows)
|
927 |
-
output_bytes_io = create_output_file(result_df, file_obj)
|
928 |
-
fig_sentiment, fig_events = create_visualizations(result_df)
|
929 |
-
|
930 |
-
if output_bytes_io:
|
931 |
-
temp_file = "results.xlsx"
|
932 |
-
with open(temp_file, "wb") as f:
|
933 |
-
f.write(output_bytes_io.getvalue())
|
934 |
-
yield (
|
935 |
-
result_df,
|
936 |
-
fig_sentiment,
|
937 |
-
fig_events,
|
938 |
-
temp_file,
|
939 |
-
"Обработка завершена!",
|
940 |
-
f"Удалено {original_count - len(df)} дубликатов"
|
941 |
-
)
|
942 |
-
return
|
943 |
-
|
944 |
-
yield (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
|
945 |
-
|
946 |
-
except Exception as e:
|
947 |
-
error_msg = f"Ошибка анализа: {str(e)}"
|
948 |
-
logger.error(error_msg)
|
949 |
-
yield (pd.DataFrame(), None, None, None, error_msg, "")
|
950 |
-
|
951 |
-
finally:
|
952 |
-
if detector:
|
953 |
-
detector.cleanup()
|
954 |
|
955 |
stop_btn.click(fn=stop_processing, outputs=[progress])
|
956 |
|
957 |
-
# Main processing
|
958 |
analyze_btn.click(
|
959 |
fn=process_and_download,
|
960 |
inputs=[file_input],
|
@@ -970,6 +981,7 @@ def create_interface():
|
|
970 |
|
971 |
return app
|
972 |
|
|
|
973 |
if __name__ == "__main__":
|
974 |
app = create_interface()
|
975 |
app.launch(share=True)
|
|
|
768 |
return None
|
769 |
|
770 |
|
771 |
+
|
772 |
+
|
773 |
+
@spaces.GPU(duration=300)
|
774 |
+
def process_and_download(file_bytes):
|
775 |
+
"""Synchronous wrapper for async processing"""
|
776 |
+
if file_bytes is None:
|
777 |
+
gr.Warning("Пожалуйста, загрузите файл")
|
778 |
+
return pd.DataFrame(), None, None, None, "Ожидание файла...", ""
|
779 |
+
|
780 |
+
async def async_process():
|
781 |
+
detector = None
|
782 |
+
gpu_manager = GPUTaskManager(
|
783 |
+
max_retries=3,
|
784 |
+
retry_delay=30,
|
785 |
+
cleanup_callback=lambda: detector.cleanup() if detector else None
|
786 |
+
)
|
787 |
+
|
788 |
+
try:
|
789 |
+
file_obj = io.BytesIO(file_bytes)
|
790 |
+
logger.info("File loaded into BytesIO successfully")
|
791 |
+
|
792 |
+
detector = EventDetector()
|
793 |
+
|
794 |
+
# Read and deduplicate data with retry
|
795 |
+
async def read_and_dedupe():
|
796 |
+
df = pd.read_excel(file_obj, sheet_name='Публикации')
|
797 |
+
original_count = len(df)
|
798 |
+
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
799 |
+
return df, original_count
|
800 |
+
|
801 |
+
df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
|
802 |
+
|
803 |
+
# Process in smaller batches with better error handling
|
804 |
+
processed_rows = []
|
805 |
+
batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
|
806 |
+
|
807 |
+
latest_result = (pd.DataFrame(), None, None, None, "Начало обработки...", "")
|
808 |
+
|
809 |
+
for batch in batches:
|
810 |
+
if control.should_stop():
|
811 |
+
return latest_result
|
812 |
+
|
813 |
+
try:
|
814 |
+
# Process batch with retry mechanism
|
815 |
+
async def process_batch():
|
816 |
+
batch_results = []
|
817 |
+
for idx, row in batch:
|
818 |
+
text = str(row.get('Выдержки из текста', '')).strip()
|
819 |
+
entity = str(row.get('Объект', '')).strip()
|
820 |
+
|
821 |
+
if text and entity:
|
822 |
+
results = detector.process_text(text, entity)
|
823 |
+
batch_results.append({
|
824 |
+
'Объект': entity,
|
825 |
+
'Заголовок': str(row.get('Заголовок', '')),
|
826 |
+
'Translated': results['translated_text'],
|
827 |
+
'Sentiment': results['sentiment'],
|
828 |
+
'Impact': results['impact'],
|
829 |
+
'Reasoning': results['reasoning'],
|
830 |
+
'Event_Type': results['event_type'],
|
831 |
+
'Event_Summary': results['event_summary'],
|
832 |
+
'Выдержки из текста': text
|
833 |
+
})
|
834 |
+
return batch_results
|
835 |
+
|
836 |
+
batch_results = await gpu_manager.run_with_retry(process_batch)
|
837 |
+
processed_rows.extend(batch_results)
|
838 |
+
|
839 |
+
# Update latest result
|
840 |
+
if processed_rows:
|
841 |
+
result_df = pd.DataFrame(processed_rows)
|
842 |
+
latest_result = (
|
843 |
+
result_df,
|
844 |
+
None, None, None,
|
845 |
+
f"Обработано {len(processed_rows)}/{len(df)} строк",
|
846 |
+
f"Удалено {original_count - len(df)} дубликатов"
|
847 |
+
)
|
848 |
+
|
849 |
+
except Exception as e:
|
850 |
+
if gpu_manager.is_gpu_error(e):
|
851 |
+
logger.warning(f"GPU error in batch processing: {str(e)}")
|
852 |
+
continue
|
853 |
+
else:
|
854 |
+
logger.error(f"Non-GPU error in batch processing: {str(e)}")
|
855 |
+
|
856 |
+
finally:
|
857 |
+
torch.cuda.empty_cache()
|
858 |
+
|
859 |
+
# Create final results
|
860 |
+
if processed_rows:
|
861 |
+
result_df = pd.DataFrame(processed_rows)
|
862 |
+
output_bytes_io = create_output_file(result_df, file_obj)
|
863 |
+
fig_sentiment, fig_events = create_visualizations(result_df)
|
864 |
+
|
865 |
+
if output_bytes_io:
|
866 |
+
temp_file = "results.xlsx"
|
867 |
+
with open(temp_file, "wb") as f:
|
868 |
+
f.write(output_bytes_io.getvalue())
|
869 |
+
return (
|
870 |
+
result_df,
|
871 |
+
fig_sentiment,
|
872 |
+
fig_events,
|
873 |
+
temp_file,
|
874 |
+
"Обработка завершена!",
|
875 |
+
f"Уд��лено {original_count - len(df)} дубликатов"
|
876 |
+
)
|
877 |
+
|
878 |
+
return (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
|
879 |
+
|
880 |
+
except Exception as e:
|
881 |
+
error_msg = f"Ошибка анализа: {str(e)}"
|
882 |
+
logger.error(error_msg)
|
883 |
+
return (pd.DataFrame(), None, None, None, error_msg, "")
|
884 |
+
|
885 |
+
finally:
|
886 |
+
if detector:
|
887 |
+
detector.cleanup()
|
888 |
+
|
889 |
+
# Run the async function in the event loop
|
890 |
+
try:
|
891 |
+
loop = asyncio.get_event_loop()
|
892 |
+
except RuntimeError:
|
893 |
+
loop = asyncio.new_event_loop()
|
894 |
+
asyncio.set_event_loop(loop)
|
895 |
+
|
896 |
+
return loop.run_until_complete(async_process())
|
897 |
+
|
898 |
def create_interface():
|
899 |
control = ProcessControl()
|
900 |
|
|
|
902 |
# Create state for file data
|
903 |
current_file = gr.State(None)
|
904 |
|
905 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.2.1 + ext")
|
906 |
|
907 |
with gr.Row():
|
908 |
file_input = gr.File(
|
|
|
952 |
with gr.Column(scale=1):
|
953 |
events_plot = gr.Plot(label="Распределение событий")
|
954 |
|
|
|
955 |
with gr.Row():
|
956 |
file_output = gr.File(
|
957 |
label="Скачать результаты",
|
|
|
962 |
def stop_processing():
|
963 |
control.request_stop()
|
964 |
return "Остановка обработки..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
965 |
|
966 |
stop_btn.click(fn=stop_processing, outputs=[progress])
|
967 |
|
968 |
+
# Main processing with synchronous function
|
969 |
analyze_btn.click(
|
970 |
fn=process_and_download,
|
971 |
inputs=[file_input],
|
|
|
981 |
|
982 |
return app
|
983 |
|
984 |
+
|
985 |
if __name__ == "__main__":
|
986 |
app = create_interface()
|
987 |
app.launch(share=True)
|