Spaces:

Didier
/

Optical_character_recognition

Running

App Files Files Community

Optical_character_recognition / ocr2.py

Didier

Upload ocr2.py

6717123 verified 23 days ago

raw

history blame contribute delete

2.99 kB

	"""
	File: ocr2.py

	Description: Optical Character Recognition (OCR) using software 2.0 models

	Author: Didier Guillevic
	Date: 2025-04-07
	"""

	import os
	import base64
	from mistralai import Mistral

	#
	# MistralAI client
	#
	api_key = os.environ["MISTRAL_API_KEY"]
	client = Mistral(api_key=api_key)


	#
	# Process PDF file
	#
	def process_pdf(pdf_path: str):
	"""Process given file with Mistral_OCR

	Args:
	pdf_path: Path to a local PDF file.

	Returns:
	str: The OCR result as a string.

	Note:
	We follow the Mistral API documentation to upload the file and
	process it with OCR. The file is uploaded to the Mistral API
	and the OCR is performed on the uploaded file. The result is
	returned as a string.
	https://docs.mistral.ai/capabilities/document/
	"""

	uploaded_pdf = client.files.upload(
	file={"file_name": pdf_path, "content": open(pdf_path, "rb"),},
	purpose="ocr"
	)
	signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)

	ocr_response = client.ocr.process(
	model="mistral-ocr-latest",
	document={"type": "document_url", "document_url": signed_url.url,}
	)

	return ocr_response.pages[0].markdown


	#
	# Process image file
	#
	def process_image(image_path: str):
	"""Process given image file: extract information present in image.

	Args:
	image_path: Path to a local image file.

	Returns:
	str: The OCR result as a string.

	Note:
	Although it should "work", when I proces an image file with Mistral_OCR,
	I get an empty result. Everything appears fine, but no text is extracted.
	Hence, I will send the image to a model such as as Mistral_Small (or
	Mistral_Large) to extract the text present in the image.
	"""
	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": (
	"Could you extract the information present in the image. "
	"No need to repeat the task description. Simply respond."
	)
	},
	{
	"type": "image_url",
	"image_url": f"data:image/jpeg;base64,{encode_image(image_path)}"
	}
	]
	}
	]

	response = client.chat.complete(
	model='mistral-small-latest',
	messages=messages
	)
	return response.choices[0].message.content


	#
	# Encode images as base64
	#
	def encode_image(image_path):
	"""Encode the image to base64."""
	try:
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')
	except FileNotFoundError:
	print(f"Error: The file {image_path} was not found.")
	return None
	except Exception as e: # Added general exception handling
	print(f"Error: {e}")
	return None