File size: 1,764 Bytes
c924c3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import base64
import mimetypes
import markdown
from weasyprint import HTML
import fitz  # PyMuPDF
import os

def encode_image_v2(image_path: str):
    """Encodes an image to base64 and determines the correct MIME type."""
    mime_type, _ = mimetypes.guess_type(image_path)
    if mime_type is None:
        raise ValueError(f"Cannot determine MIME type for {image_path}")

    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
        return f"data:{mime_type};base64,{encoded_string}"
    

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")
    

# Function to convert Markdown to PDF
def convert_markdown_to_pdf(md_content, output_pdf_path):
    # Convert Markdown to HTML
    html_content = markdown.markdown(md_content)

    # Save HTML to a PDF file
    HTML(string=html_content).write_pdf(output_pdf_path)

    print("Markdown has been successfully converted to PDF!")




def pdf_to_images(pdf_path, output_folder, zoom_x=2.0, zoom_y=2.0):
    # Open the PDF file
    pdf_document = fitz.open(pdf_path)
    
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    image_paths = []
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        matrix = fitz.Matrix(zoom_x, zoom_y)  # Adjust the zoom factor to increase quality
        pix = page.get_pixmap(matrix=matrix)
        image_path = os.path.join(output_folder, f'page_{page_num + 1}.png')
        pix.save(image_path)
        image_paths.append(image_path)
    
    return image_paths