Didier's picture
Upload ocr2.py
6717123 verified
"""
File: ocr2.py
Description: Optical Character Recognition (OCR) using software 2.0 models
Author: Didier Guillevic
Date: 2025-04-07
"""
import os
import base64
from mistralai import Mistral
#
# MistralAI client
#
api_key = os.environ["MISTRAL_API_KEY"]
client = Mistral(api_key=api_key)
#
# Process PDF file
#
def process_pdf(pdf_path: str):
"""Process given file with Mistral_OCR
Args:
pdf_path: Path to a local PDF file.
Returns:
str: The OCR result as a string.
Note:
We follow the Mistral API documentation to upload the file and
process it with OCR. The file is uploaded to the Mistral API
and the OCR is performed on the uploaded file. The result is
returned as a string.
https://docs.mistral.ai/capabilities/document/
"""
uploaded_pdf = client.files.upload(
file={"file_name": pdf_path, "content": open(pdf_path, "rb"),},
purpose="ocr"
)
signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)
ocr_response = client.ocr.process(
model="mistral-ocr-latest",
document={"type": "document_url", "document_url": signed_url.url,}
)
return ocr_response.pages[0].markdown
#
# Process image file
#
def process_image(image_path: str):
"""Process given image file: extract information present in image.
Args:
image_path: Path to a local image file.
Returns:
str: The OCR result as a string.
Note:
Although it should "work", when I proces an image file with Mistral_OCR,
I get an empty result. Everything appears fine, but no text is extracted.
Hence, I will send the image to a model such as as Mistral_Small (or
Mistral_Large) to extract the text present in the image.
"""
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": (
"Could you extract the information present in the image. "
"No need to repeat the task description. Simply respond."
)
},
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{encode_image(image_path)}"
}
]
}
]
response = client.chat.complete(
model='mistral-small-latest',
messages=messages
)
return response.choices[0].message.content
#
# Encode images as base64
#
def encode_image(image_path):
"""Encode the image to base64."""
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"Error: The file {image_path} was not found.")
return None
except Exception as e: # Added general exception handling
print(f"Error: {e}")
return None