Spaces:

rote1
/

IAGO

Sleeping

callanwu commited on Mar 13

Commit

c581f7d

1 Parent(s): f580792

fix chunkr use bug

Files changed (2) hide show

owl/utils/document_toolkit.py CHANGED Viewed

@@ -144,12 +144,11 @@ class DocumentProcessingToolkit(BaseToolkit):
                 return True, extracted_text
             try:
                 result = asyncio.run(self._extract_content_with_chunkr(document_path))
-                raise ValueError("Chunkr is not available.")
                 return True, result
             except Exception as e:
                 logger.warning(
-                    f"Error occurred while using chunkr to process document: {e}"
                 )
                 if document_path.endswith(".pdf"):
                     # try using pypdf to extract text from pdf
@@ -226,7 +225,7 @@ class DocumentProcessingToolkit(BaseToolkit):
         if result.status == "Failed":
             logger.error(
-                f"Error while processing document {document_path}: {result.message}"
             )
             return f"Error while processing document: {result.message}"

                 return True, extracted_text
             try:
                 result = asyncio.run(self._extract_content_with_chunkr(document_path))
                 return True, result
             except Exception as e:
                 logger.warning(
+                    f"Error occurred while using Chunkr to process document: {e}"
                 )
                 if document_path.endswith(".pdf"):
                     # try using pypdf to extract text from pdf
         if result.status == "Failed":
             logger.error(
+                f"Error while processing document {document_path}: {result.message} using Chunkr."
             )
             return f"Error while processing document: {result.message}"

owl/utils/gaia.py CHANGED Viewed

@@ -191,15 +191,10 @@ class GAIABenchmark(BaseBenchmark):
             except Exception as e:
                 logger.warning(e)
                 # raise FileNotFoundError(f"{self.save_to} does not exist.")
         # Process tasks
         for task in tqdm(datas, desc="Running"):
-            if self._check_task_completed(task["task_id"]):
-                logger.info(
-                    f"The following task is already completed:\n task id: {task['task_id']}, question: {task['Question']}"
-                )
-                continue
             if_prepared_task, info = self._prepare_task(task)
             if not if_prepared_task:
                 _result_info = {

             except Exception as e:
                 logger.warning(e)
                 # raise FileNotFoundError(f"{self.save_to} does not exist.")
+        datas = [data for data in datas if not self._check_task_completed(data["task_id"])]
+        logger.info(f"Number of tasks to be processed: {len(datas)}")
         # Process tasks
         for task in tqdm(datas, desc="Running"):
             if_prepared_task, info = self._prepare_task(task)
             if not if_prepared_task:
                 _result_info = {