fix chunkr use bug
Browse files- owl/utils/document_toolkit.py +2 -3
- owl/utils/gaia.py +2 -7
owl/utils/document_toolkit.py
CHANGED
@@ -144,12 +144,11 @@ class DocumentProcessingToolkit(BaseToolkit):
|
|
144 |
return True, extracted_text
|
145 |
try:
|
146 |
result = asyncio.run(self._extract_content_with_chunkr(document_path))
|
147 |
-
raise ValueError("Chunkr is not available.")
|
148 |
return True, result
|
149 |
|
150 |
except Exception as e:
|
151 |
logger.warning(
|
152 |
-
f"Error occurred while using
|
153 |
)
|
154 |
if document_path.endswith(".pdf"):
|
155 |
# try using pypdf to extract text from pdf
|
@@ -226,7 +225,7 @@ class DocumentProcessingToolkit(BaseToolkit):
|
|
226 |
|
227 |
if result.status == "Failed":
|
228 |
logger.error(
|
229 |
-
f"Error while processing document {document_path}: {result.message}"
|
230 |
)
|
231 |
return f"Error while processing document: {result.message}"
|
232 |
|
|
|
144 |
return True, extracted_text
|
145 |
try:
|
146 |
result = asyncio.run(self._extract_content_with_chunkr(document_path))
|
|
|
147 |
return True, result
|
148 |
|
149 |
except Exception as e:
|
150 |
logger.warning(
|
151 |
+
f"Error occurred while using Chunkr to process document: {e}"
|
152 |
)
|
153 |
if document_path.endswith(".pdf"):
|
154 |
# try using pypdf to extract text from pdf
|
|
|
225 |
|
226 |
if result.status == "Failed":
|
227 |
logger.error(
|
228 |
+
f"Error while processing document {document_path}: {result.message} using Chunkr."
|
229 |
)
|
230 |
return f"Error while processing document: {result.message}"
|
231 |
|
owl/utils/gaia.py
CHANGED
@@ -191,15 +191,10 @@ class GAIABenchmark(BaseBenchmark):
|
|
191 |
except Exception as e:
|
192 |
logger.warning(e)
|
193 |
# raise FileNotFoundError(f"{self.save_to} does not exist.")
|
194 |
-
|
|
|
195 |
# Process tasks
|
196 |
for task in tqdm(datas, desc="Running"):
|
197 |
-
if self._check_task_completed(task["task_id"]):
|
198 |
-
logger.info(
|
199 |
-
f"The following task is already completed:\n task id: {task['task_id']}, question: {task['Question']}"
|
200 |
-
)
|
201 |
-
continue
|
202 |
-
|
203 |
if_prepared_task, info = self._prepare_task(task)
|
204 |
if not if_prepared_task:
|
205 |
_result_info = {
|
|
|
191 |
except Exception as e:
|
192 |
logger.warning(e)
|
193 |
# raise FileNotFoundError(f"{self.save_to} does not exist.")
|
194 |
+
datas = [data for data in datas if not self._check_task_completed(data["task_id"])]
|
195 |
+
logger.info(f"Number of tasks to be processed: {len(datas)}")
|
196 |
# Process tasks
|
197 |
for task in tqdm(datas, desc="Running"):
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
if_prepared_task, info = self._prepare_task(task)
|
199 |
if not if_prepared_task:
|
200 |
_result_info = {
|