pentarosarium commited on
Commit
ff8256a
·
1 Parent(s): 446a37d

back 2 async fix2

Browse files
Files changed (1) hide show
  1. app.py +130 -118
app.py CHANGED
@@ -768,6 +768,133 @@ def create_output_file(df, uploaded_file):
768
  return None
769
 
770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
771
  def create_interface():
772
  control = ProcessControl()
773
 
@@ -775,7 +902,7 @@ def create_interface():
775
  # Create state for file data
776
  current_file = gr.State(None)
777
 
778
- gr.Markdown("# AI-анализ мониторинга новостей v.2.0 + добавка")
779
 
780
  with gr.Row():
781
  file_input = gr.File(
@@ -825,7 +952,6 @@ def create_interface():
825
  with gr.Column(scale=1):
826
  events_plot = gr.Plot(label="Распределение событий")
827
 
828
- # Create a download row with file component only
829
  with gr.Row():
830
  file_output = gr.File(
831
  label="Скачать результаты",
@@ -836,125 +962,10 @@ def create_interface():
836
  def stop_processing():
837
  control.request_stop()
838
  return "Остановка обработки..."
839
-
840
-
841
- @spaces.GPU(duration=300)
842
- async def process_and_download(file_bytes):
843
- if file_bytes is None:
844
- gr.Warning("Пожалуйста, загрузите файл")
845
- yield (pd.DataFrame(), None, None, None, "Ожидание файла...", "")
846
- return
847
-
848
- detector = None
849
- gpu_manager = GPUTaskManager(
850
- max_retries=3,
851
- retry_delay=30,
852
- cleanup_callback=lambda: detector.cleanup() if detector else None
853
- )
854
-
855
- try:
856
- file_obj = io.BytesIO(file_bytes)
857
- logger.info("File loaded into BytesIO successfully")
858
-
859
- detector = EventDetector()
860
-
861
- # Read and deduplicate data with retry
862
- async def read_and_dedupe():
863
- df = pd.read_excel(file_obj, sheet_name='Публикации')
864
- original_count = len(df)
865
- df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
866
- return df, original_count
867
-
868
- df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
869
-
870
- # Process in smaller batches with better error handling
871
- processed_rows = []
872
- batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
873
-
874
- for batch in batches:
875
- if control.should_stop():
876
- break
877
-
878
- try:
879
- # Process batch with retry mechanism
880
- async def process_batch():
881
- batch_results = []
882
- for idx, row in batch:
883
- text = str(row.get('Выдержки из текста', '')).strip()
884
- entity = str(row.get('Объект', '')).strip()
885
-
886
- if text and entity:
887
- results = detector.process_text(text, entity)
888
- batch_results.append({
889
- 'Объект': entity,
890
- 'Заголовок': str(row.get('Заголовок', '')),
891
- 'Translated': results['translated_text'],
892
- 'Sentiment': results['sentiment'],
893
- 'Impact': results['impact'],
894
- 'Reasoning': results['reasoning'],
895
- 'Event_Type': results['event_type'],
896
- 'Event_Summary': results['event_summary'],
897
- 'Выдержки из текста': text
898
- })
899
- return batch_results
900
-
901
- batch_results = await gpu_manager.run_with_retry(process_batch)
902
- processed_rows.extend(batch_results)
903
-
904
- # Create intermediate results
905
- if processed_rows:
906
- result_df = pd.DataFrame(processed_rows)
907
- yield (
908
- result_df,
909
- None, None, None,
910
- f"Обработано {len(processed_rows)}/{len(df)} строк",
911
- f"Удалено {original_count - len(df)} дубликатов"
912
- )
913
-
914
- except Exception as e:
915
- if gpu_manager.is_gpu_error(e):
916
- logger.warning(f"GPU error in batch processing: {str(e)}")
917
- continue
918
- else:
919
- logger.error(f"Non-GPU error in batch processing: {str(e)}")
920
-
921
- finally:
922
- torch.cuda.empty_cache()
923
-
924
- # Create final results
925
- if processed_rows:
926
- result_df = pd.DataFrame(processed_rows)
927
- output_bytes_io = create_output_file(result_df, file_obj)
928
- fig_sentiment, fig_events = create_visualizations(result_df)
929
-
930
- if output_bytes_io:
931
- temp_file = "results.xlsx"
932
- with open(temp_file, "wb") as f:
933
- f.write(output_bytes_io.getvalue())
934
- yield (
935
- result_df,
936
- fig_sentiment,
937
- fig_events,
938
- temp_file,
939
- "Обработка завершена!",
940
- f"Удалено {original_count - len(df)} дубликатов"
941
- )
942
- return
943
-
944
- yield (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
945
-
946
- except Exception as e:
947
- error_msg = f"Ошибка анализа: {str(e)}"
948
- logger.error(error_msg)
949
- yield (pd.DataFrame(), None, None, None, error_msg, "")
950
-
951
- finally:
952
- if detector:
953
- detector.cleanup()
954
 
955
  stop_btn.click(fn=stop_processing, outputs=[progress])
956
 
957
- # Main processing - simplified outputs
958
  analyze_btn.click(
959
  fn=process_and_download,
960
  inputs=[file_input],
@@ -970,6 +981,7 @@ def create_interface():
970
 
971
  return app
972
 
 
973
  if __name__ == "__main__":
974
  app = create_interface()
975
  app.launch(share=True)
 
768
  return None
769
 
770
 
771
+
772
+
773
+ @spaces.GPU(duration=300)
774
+ def process_and_download(file_bytes):
775
+ """Synchronous wrapper for async processing"""
776
+ if file_bytes is None:
777
+ gr.Warning("Пожалуйста, загрузите файл")
778
+ return pd.DataFrame(), None, None, None, "Ожидание файла...", ""
779
+
780
+ async def async_process():
781
+ detector = None
782
+ gpu_manager = GPUTaskManager(
783
+ max_retries=3,
784
+ retry_delay=30,
785
+ cleanup_callback=lambda: detector.cleanup() if detector else None
786
+ )
787
+
788
+ try:
789
+ file_obj = io.BytesIO(file_bytes)
790
+ logger.info("File loaded into BytesIO successfully")
791
+
792
+ detector = EventDetector()
793
+
794
+ # Read and deduplicate data with retry
795
+ async def read_and_dedupe():
796
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
797
+ original_count = len(df)
798
+ df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
799
+ return df, original_count
800
+
801
+ df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
802
+
803
+ # Process in smaller batches with better error handling
804
+ processed_rows = []
805
+ batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
806
+
807
+ latest_result = (pd.DataFrame(), None, None, None, "Начало обработки...", "")
808
+
809
+ for batch in batches:
810
+ if control.should_stop():
811
+ return latest_result
812
+
813
+ try:
814
+ # Process batch with retry mechanism
815
+ async def process_batch():
816
+ batch_results = []
817
+ for idx, row in batch:
818
+ text = str(row.get('Выдержки из текста', '')).strip()
819
+ entity = str(row.get('Объект', '')).strip()
820
+
821
+ if text and entity:
822
+ results = detector.process_text(text, entity)
823
+ batch_results.append({
824
+ 'Объект': entity,
825
+ 'Заголовок': str(row.get('Заголовок', '')),
826
+ 'Translated': results['translated_text'],
827
+ 'Sentiment': results['sentiment'],
828
+ 'Impact': results['impact'],
829
+ 'Reasoning': results['reasoning'],
830
+ 'Event_Type': results['event_type'],
831
+ 'Event_Summary': results['event_summary'],
832
+ 'Выдержки из текста': text
833
+ })
834
+ return batch_results
835
+
836
+ batch_results = await gpu_manager.run_with_retry(process_batch)
837
+ processed_rows.extend(batch_results)
838
+
839
+ # Update latest result
840
+ if processed_rows:
841
+ result_df = pd.DataFrame(processed_rows)
842
+ latest_result = (
843
+ result_df,
844
+ None, None, None,
845
+ f"Обработано {len(processed_rows)}/{len(df)} строк",
846
+ f"Удалено {original_count - len(df)} дубликатов"
847
+ )
848
+
849
+ except Exception as e:
850
+ if gpu_manager.is_gpu_error(e):
851
+ logger.warning(f"GPU error in batch processing: {str(e)}")
852
+ continue
853
+ else:
854
+ logger.error(f"Non-GPU error in batch processing: {str(e)}")
855
+
856
+ finally:
857
+ torch.cuda.empty_cache()
858
+
859
+ # Create final results
860
+ if processed_rows:
861
+ result_df = pd.DataFrame(processed_rows)
862
+ output_bytes_io = create_output_file(result_df, file_obj)
863
+ fig_sentiment, fig_events = create_visualizations(result_df)
864
+
865
+ if output_bytes_io:
866
+ temp_file = "results.xlsx"
867
+ with open(temp_file, "wb") as f:
868
+ f.write(output_bytes_io.getvalue())
869
+ return (
870
+ result_df,
871
+ fig_sentiment,
872
+ fig_events,
873
+ temp_file,
874
+ "Обработка завершена!",
875
+ f"Уд��лено {original_count - len(df)} дубликатов"
876
+ )
877
+
878
+ return (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
879
+
880
+ except Exception as e:
881
+ error_msg = f"Ошибка анализа: {str(e)}"
882
+ logger.error(error_msg)
883
+ return (pd.DataFrame(), None, None, None, error_msg, "")
884
+
885
+ finally:
886
+ if detector:
887
+ detector.cleanup()
888
+
889
+ # Run the async function in the event loop
890
+ try:
891
+ loop = asyncio.get_event_loop()
892
+ except RuntimeError:
893
+ loop = asyncio.new_event_loop()
894
+ asyncio.set_event_loop(loop)
895
+
896
+ return loop.run_until_complete(async_process())
897
+
898
  def create_interface():
899
  control = ProcessControl()
900
 
 
902
  # Create state for file data
903
  current_file = gr.State(None)
904
 
905
+ gr.Markdown("# AI-анализ мониторинга новостей v.2.1 + ext")
906
 
907
  with gr.Row():
908
  file_input = gr.File(
 
952
  with gr.Column(scale=1):
953
  events_plot = gr.Plot(label="Распределение событий")
954
 
 
955
  with gr.Row():
956
  file_output = gr.File(
957
  label="Скачать результаты",
 
962
  def stop_processing():
963
  control.request_stop()
964
  return "Остановка обработки..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965
 
966
  stop_btn.click(fn=stop_processing, outputs=[progress])
967
 
968
+ # Main processing with synchronous function
969
  analyze_btn.click(
970
  fn=process_and_download,
971
  inputs=[file_input],
 
981
 
982
  return app
983
 
984
+
985
  if __name__ == "__main__":
986
  app = create_interface()
987
  app.launch(share=True)