import base64 import mimetypes import markdown from weasyprint import HTML import fitz # PyMuPDF import os def encode_image_v2(image_path: str): """Encodes an image to base64 and determines the correct MIME type.""" mime_type, _ = mimetypes.guess_type(image_path) if mime_type is None: raise ValueError(f"Cannot determine MIME type for {image_path}") with open(image_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') return f"data:{mime_type};base64,{encoded_string}" # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") # Function to convert Markdown to PDF def convert_markdown_to_pdf(md_content, output_pdf_path): # Convert Markdown to HTML html_content = markdown.markdown(md_content) # Save HTML to a PDF file HTML(string=html_content).write_pdf(output_pdf_path) print("Markdown has been successfully converted to PDF!") def pdf_to_images(pdf_path, output_folder, zoom_x=2.0, zoom_y=2.0): # Open the PDF file pdf_document = fitz.open(pdf_path) # Create output folder if it doesn't exist if not os.path.exists(output_folder): os.makedirs(output_folder) image_paths = [] for page_num in range(len(pdf_document)): page = pdf_document.load_page(page_num) matrix = fitz.Matrix(zoom_x, zoom_y) # Adjust the zoom factor to increase quality pix = page.get_pixmap(matrix=matrix) image_path = os.path.join(output_folder, f'page_{page_num + 1}.png') pix.save(image_path) image_paths.append(image_path) return image_paths