Spaces:
Running
Running
import base64 | |
import mimetypes | |
import markdown | |
from weasyprint import HTML | |
import fitz # PyMuPDF | |
import os | |
def encode_image_v2(image_path: str): | |
"""Encodes an image to base64 and determines the correct MIME type.""" | |
mime_type, _ = mimetypes.guess_type(image_path) | |
if mime_type is None: | |
raise ValueError(f"Cannot determine MIME type for {image_path}") | |
with open(image_path, "rb") as image_file: | |
encoded_string = base64.b64encode(image_file.read()).decode('utf-8') | |
return f"data:{mime_type};base64,{encoded_string}" | |
# Function to encode the image | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode("utf-8") | |
# Function to convert Markdown to PDF | |
def convert_markdown_to_pdf(md_content, output_pdf_path): | |
# Convert Markdown to HTML | |
html_content = markdown.markdown(md_content) | |
# Save HTML to a PDF file | |
HTML(string=html_content).write_pdf(output_pdf_path) | |
print("Markdown has been successfully converted to PDF!") | |
def pdf_to_images(pdf_path, output_folder, zoom_x=2.0, zoom_y=2.0): | |
# Open the PDF file | |
pdf_document = fitz.open(pdf_path) | |
# Create output folder if it doesn't exist | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
image_paths = [] | |
for page_num in range(len(pdf_document)): | |
page = pdf_document.load_page(page_num) | |
matrix = fitz.Matrix(zoom_x, zoom_y) # Adjust the zoom factor to increase quality | |
pix = page.get_pixmap(matrix=matrix) | |
image_path = os.path.join(output_folder, f'page_{page_num + 1}.png') | |
pix.save(image_path) | |
image_paths.append(image_path) | |
return image_paths | |