|
""" |
|
File: ocr2.py |
|
|
|
Description: Optical Character Recognition (OCR) using software 2.0 models |
|
|
|
Author: Didier Guillevic |
|
Date: 2025-04-07 |
|
""" |
|
|
|
import os |
|
import base64 |
|
from mistralai import Mistral |
|
|
|
|
|
|
|
|
|
api_key = os.environ["MISTRAL_API_KEY"] |
|
client = Mistral(api_key=api_key) |
|
|
|
|
|
|
|
|
|
|
|
def process_pdf(pdf_path: str): |
|
"""Process given file with Mistral_OCR |
|
|
|
Args: |
|
pdf_path: Path to a local PDF file. |
|
|
|
Returns: |
|
str: The OCR result as a string. |
|
|
|
Note: |
|
We follow the Mistral API documentation to upload the file and |
|
process it with OCR. The file is uploaded to the Mistral API |
|
and the OCR is performed on the uploaded file. The result is |
|
returned as a string. |
|
https://docs.mistral.ai/capabilities/document/ |
|
""" |
|
|
|
uploaded_pdf = client.files.upload( |
|
file={"file_name": pdf_path, "content": open(pdf_path, "rb"),}, |
|
purpose="ocr" |
|
) |
|
signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id) |
|
|
|
ocr_response = client.ocr.process( |
|
model="mistral-ocr-latest", |
|
document={"type": "document_url", "document_url": signed_url.url,} |
|
) |
|
|
|
return ocr_response.pages[0].markdown |
|
|
|
|
|
|
|
|
|
|
|
def process_image(image_path: str): |
|
"""Process given image file: extract information present in image. |
|
|
|
Args: |
|
image_path: Path to a local image file. |
|
|
|
Returns: |
|
str: The OCR result as a string. |
|
|
|
Note: |
|
Although it should "work", when I proces an image file with Mistral_OCR, |
|
I get an empty result. Everything appears fine, but no text is extracted. |
|
Hence, I will send the image to a model such as as Mistral_Small (or |
|
Mistral_Large) to extract the text present in the image. |
|
""" |
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": ( |
|
"Could you extract the information present in the image. " |
|
"No need to repeat the task description. Simply respond." |
|
) |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": f"data:image/jpeg;base64,{encode_image(image_path)}" |
|
} |
|
] |
|
} |
|
] |
|
|
|
response = client.chat.complete( |
|
model='mistral-small-latest', |
|
messages=messages |
|
) |
|
return response.choices[0].message.content |
|
|
|
|
|
|
|
|
|
|
|
def encode_image(image_path): |
|
"""Encode the image to base64.""" |
|
try: |
|
with open(image_path, "rb") as image_file: |
|
return base64.b64encode(image_file.read()).decode('utf-8') |
|
except FileNotFoundError: |
|
print(f"Error: The file {image_path} was not found.") |
|
return None |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return None |
|
|