""" File: ocr2.py Description: Optical Character Recognition (OCR) using software 2.0 models Author: Didier Guillevic Date: 2025-04-07 """ import os import base64 from mistralai import Mistral # # MistralAI client # api_key = os.environ["MISTRAL_API_KEY"] client = Mistral(api_key=api_key) # # Process PDF file # def process_pdf(pdf_path: str): """Process given file with Mistral_OCR Args: pdf_path: Path to a local PDF file. Returns: str: The OCR result as a string. Note: We follow the Mistral API documentation to upload the file and process it with OCR. The file is uploaded to the Mistral API and the OCR is performed on the uploaded file. The result is returned as a string. https://docs.mistral.ai/capabilities/document/ """ uploaded_pdf = client.files.upload( file={"file_name": pdf_path, "content": open(pdf_path, "rb"),}, purpose="ocr" ) signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id) ocr_response = client.ocr.process( model="mistral-ocr-latest", document={"type": "document_url", "document_url": signed_url.url,} ) return ocr_response.pages[0].markdown # # Process image file # def process_image(image_path: str): """Process given image file: extract information present in image. Args: image_path: Path to a local image file. Returns: str: The OCR result as a string. Note: Although it should "work", when I proces an image file with Mistral_OCR, I get an empty result. Everything appears fine, but no text is extracted. Hence, I will send the image to a model such as as Mistral_Small (or Mistral_Large) to extract the text present in the image. """ messages = [ { "role": "user", "content": [ { "type": "text", "text": ( "Could you extract the information present in the image. " "No need to repeat the task description. Simply respond." ) }, { "type": "image_url", "image_url": f"data:image/jpeg;base64,{encode_image(image_path)}" } ] } ] response = client.chat.complete( model='mistral-small-latest', messages=messages ) return response.choices[0].message.content # # Encode images as base64 # def encode_image(image_path): """Encode the image to base64.""" try: with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') except FileNotFoundError: print(f"Error: The file {image_path} was not found.") return None except Exception as e: # Added general exception handling print(f"Error: {e}") return None