pdf_to_single_image / pdf_processor.py
tsphan's picture
breaks away from single file
576a588
# pdf_processor.py
"""
Handles the core logic of converting a PDF document into a single image.
"""
import fitz # PyMuPDF
from PIL import Image
import io
import streamlit as st # Imported for progress bar updates
from typing import Tuple, List, Union
# Constants
DEFAULT_PDF_DPI = 72 # Standard PDF DPI used for scaling calculations
JPEG_QUALITY = 95 # Quality setting for JPEG output
def calculate_image_dimensions(pdf_document: fitz.Document, dpi: int) -> Tuple[int, int, List[float]]:
"""
Calculates the total dimensions required for the final image canvas.
Iterates through PDF pages to determine the maximum width and total height
needed when rendered at the specified DPI.
Parameters
----------
pdf_document : fitz.Document
The opened PyMuPDF document object.
dpi : int
The target resolution in dots per inch.
Returns
-------
Tuple[int, int, List[float]]
A tuple containing:
- max_width (int): The maximum width required among all pages.
- total_height (int): The sum of heights of all pages.
- zooms (List[float]): A list of zoom factors for each page.
"""
total_height = 0
max_width = 0
zooms = []
num_pages = len(pdf_document)
# First pass: Calculate dimensions and zoom factors
for page_num in range(num_pages):
page = pdf_document[page_num]
# Calculate the zoom factor needed to achieve the target DPI
zoom = dpi / DEFAULT_PDF_DPI
zooms.append(zoom)
# Get page dimensions in pixels at the calculated zoom
rect = page.rect
page_width = int(rect.width * zoom)
page_height = int(rect.height * zoom)
# Update maximum width and total height
max_width = max(max_width, page_width)
total_height += page_height
return max_width, total_height, zooms
def render_pages_to_image(
pdf_document: fitz.Document,
zooms: List[float],
canvas_width: int,
canvas_height: int
) -> Image.Image:
"""
Renders each page of the PDF onto a single PIL Image canvas.
Parameters
----------
pdf_document : fitz.Document
The opened PyMuPDF document object.
zooms : List[float]
A list of zoom factors, one for each page.
canvas_width : int
The width of the final image canvas.
canvas_height : int
The height of the final image canvas.
Returns
-------
Image.Image
A PIL Image object containing all rendered PDF pages.
"""
num_pages = len(pdf_document)
# Create a new blank image canvas (RGB white background)
result_image = Image.new("RGB", (canvas_width, canvas_height), (255, 255, 255))
current_height = 0
# Initialize Streamlit progress reporting
progress_bar = st.progress(0)
status_text = st.empty()
# Second pass: Render each page and paste it onto the canvas
for page_num in range(num_pages):
status_text.text(f"Processing page {page_num + 1}/{num_pages}...")
page = pdf_document[page_num]
zoom = zooms[page_num]
# Generate a pixmap (raster image) of the page
# Use fitz.Matrix for transformation with the calculated zoom
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
# Convert the pixmap to a PIL Image
# Ensure the mode ("RGB" or "RGBA") matches pix.samples structure if issues arise
try:
page_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
except ValueError as e:
st.error(f"Error converting page {page_num+1} to Image: {e}")
st.warning(f"Pixmap details: width={pix.width}, height={pix.height}, alpha={pix.alpha}, samples length={len(pix.samples)}")
# Attempt RGBA conversion as a fallback if alpha channel is present
if pix.alpha:
page_image = Image.frombytes("RGBA", [pix.width, pix.height], pix.samples).convert("RGB")
st.info("Retrying page conversion with RGBA mode.")
else:
raise # Re-raise the original error if not an alpha channel issue
# Paste the page image onto the main canvas
# The paste position is (0, current_height)
result_image.paste(page_image, (0, current_height))
current_height += pix.height # Move down for the next page
# Update Streamlit progress bar
progress_bar.progress((page_num + 1) / num_pages)
status_text.text("Rendering complete!")
return result_image
def pdf_to_single_image(pdf_path: str, output_format: str = "PNG", dpi: int = 300) -> io.BytesIO:
"""
Converts all pages of a PDF file into a single vertical image.
Opens the PDF, calculates the required dimensions, renders each page
at the specified DPI, stitches them together vertically, and returns
the result as an image in a BytesIO buffer.
Parameters
----------
pdf_path : str
The file path to the input PDF document.
output_format : str, optional
The desired output image format ("PNG" or "JPG"), by default "PNG".
dpi : int, optional
The resolution (dots per inch) for rendering the PDF pages, by default 300.
Higher DPI results in better quality but larger file size.
Returns
-------
io.BytesIO
A BytesIO buffer containing the generated image data in the specified format.
Raises
------
fitz.FitzError
If there is an error opening or processing the PDF file.
Exception
For other potential errors during image processing or saving.
"""
pdf_document = None # Initialize to ensure it's defined in finally block
try:
# Open the PDF document
pdf_document = fitz.open(pdf_path)
# Calculate the necessary dimensions for the final image
canvas_width, canvas_height, zooms = calculate_image_dimensions(pdf_document, dpi)
if canvas_width == 0 or canvas_height == 0:
st.warning("Could not determine valid dimensions for the PDF. It might be empty or corrupted.")
return io.BytesIO() # Return empty buffer
# Render pages onto the canvas
result_image = render_pages_to_image(pdf_document, zooms, canvas_width, canvas_height)
# Create an in-memory buffer to save the image
img_buffer = io.BytesIO()
# Save the final image to the buffer in the specified format
if output_format.upper() == "PNG":
result_image.save(img_buffer, format="PNG")
elif output_format.upper() == "JPG" or output_format.upper() == "JPEG":
# Save as JPEG with specified quality, converting RGBA to RGB if necessary
if result_image.mode == 'RGBA':
result_image = result_image.convert('RGB')
result_image.save(img_buffer, format="JPEG", quality=JPEG_QUALITY)
else:
# Default to PNG if format is unknown
st.warning(f"Unsupported format '{output_format}'. Defaulting to PNG.")
result_image.save(img_buffer, format="PNG")
# Reset buffer position to the beginning for reading
img_buffer.seek(0)
return img_buffer
except fitz.FitzError as e:
st.error(f"Error processing PDF: {e}")
raise # Re-raise the specific exception
except Exception as e:
st.error(f"An unexpected error occurred during conversion: {e}")
raise # Re-raise general exceptions
finally:
# Ensure the PDF document is closed even if errors occur
if pdf_document:
pdf_document.close()
# st.write("PDF document closed.") # Optional debug message