callanwu commited on
Commit
c581f7d
·
1 Parent(s): f580792

fix chunkr use bug

Browse files
owl/utils/document_toolkit.py CHANGED
@@ -144,12 +144,11 @@ class DocumentProcessingToolkit(BaseToolkit):
144
  return True, extracted_text
145
  try:
146
  result = asyncio.run(self._extract_content_with_chunkr(document_path))
147
- raise ValueError("Chunkr is not available.")
148
  return True, result
149
 
150
  except Exception as e:
151
  logger.warning(
152
- f"Error occurred while using chunkr to process document: {e}"
153
  )
154
  if document_path.endswith(".pdf"):
155
  # try using pypdf to extract text from pdf
@@ -226,7 +225,7 @@ class DocumentProcessingToolkit(BaseToolkit):
226
 
227
  if result.status == "Failed":
228
  logger.error(
229
- f"Error while processing document {document_path}: {result.message}"
230
  )
231
  return f"Error while processing document: {result.message}"
232
 
 
144
  return True, extracted_text
145
  try:
146
  result = asyncio.run(self._extract_content_with_chunkr(document_path))
 
147
  return True, result
148
 
149
  except Exception as e:
150
  logger.warning(
151
+ f"Error occurred while using Chunkr to process document: {e}"
152
  )
153
  if document_path.endswith(".pdf"):
154
  # try using pypdf to extract text from pdf
 
225
 
226
  if result.status == "Failed":
227
  logger.error(
228
+ f"Error while processing document {document_path}: {result.message} using Chunkr."
229
  )
230
  return f"Error while processing document: {result.message}"
231
 
owl/utils/gaia.py CHANGED
@@ -191,15 +191,10 @@ class GAIABenchmark(BaseBenchmark):
191
  except Exception as e:
192
  logger.warning(e)
193
  # raise FileNotFoundError(f"{self.save_to} does not exist.")
194
-
 
195
  # Process tasks
196
  for task in tqdm(datas, desc="Running"):
197
- if self._check_task_completed(task["task_id"]):
198
- logger.info(
199
- f"The following task is already completed:\n task id: {task['task_id']}, question: {task['Question']}"
200
- )
201
- continue
202
-
203
  if_prepared_task, info = self._prepare_task(task)
204
  if not if_prepared_task:
205
  _result_info = {
 
191
  except Exception as e:
192
  logger.warning(e)
193
  # raise FileNotFoundError(f"{self.save_to} does not exist.")
194
+ datas = [data for data in datas if not self._check_task_completed(data["task_id"])]
195
+ logger.info(f"Number of tasks to be processed: {len(datas)}")
196
  # Process tasks
197
  for task in tqdm(datas, desc="Running"):
 
 
 
 
 
 
198
  if_prepared_task, info = self._prepare_task(task)
199
  if not if_prepared_task:
200
  _result_info = {