Update ocr.py
Browse files
ocr.py
CHANGED
@@ -10,15 +10,6 @@ Date: 2024-11-23
|
|
10 |
import os
|
11 |
os.system("bash setup.sh") # Ensure setup script runs before importing pytesseract
|
12 |
|
13 |
-
# Check Ghostscript installation
|
14 |
-
gs_path = "/usr/bin/gs" # Default Ghostscript location on Ubuntu
|
15 |
-
|
16 |
-
if not os.path.exists(gs_path):
|
17 |
-
raise FileNotFoundError(f"Ghostscript not found at {gs_path}")
|
18 |
-
|
19 |
-
# Set Ghostscript path explicitly
|
20 |
-
os.environ["OCRMYPDF_GS"] = gs_path
|
21 |
-
|
22 |
import pytesseract
|
23 |
from pdf2image import convert_from_path
|
24 |
from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError
|
|
|
10 |
import os
|
11 |
os.system("bash setup.sh") # Ensure setup script runs before importing pytesseract
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
import pytesseract
|
14 |
from pdf2image import convert_from_path
|
15 |
from pdf2image.exceptions import PDFPageCountError, PDFSyntaxError
|