Spaces:
Sleeping
Sleeping
# pdf_processor.py | |
""" | |
Handles the core logic of converting a PDF document into a single image. | |
""" | |
import fitz # PyMuPDF | |
from PIL import Image | |
import io | |
import streamlit as st # Imported for progress bar updates | |
from typing import Tuple, List, Union | |
# Constants | |
DEFAULT_PDF_DPI = 72 # Standard PDF DPI used for scaling calculations | |
JPEG_QUALITY = 95 # Quality setting for JPEG output | |
def calculate_image_dimensions(pdf_document: fitz.Document, dpi: int) -> Tuple[int, int, List[float]]: | |
""" | |
Calculates the total dimensions required for the final image canvas. | |
Iterates through PDF pages to determine the maximum width and total height | |
needed when rendered at the specified DPI. | |
Parameters | |
---------- | |
pdf_document : fitz.Document | |
The opened PyMuPDF document object. | |
dpi : int | |
The target resolution in dots per inch. | |
Returns | |
------- | |
Tuple[int, int, List[float]] | |
A tuple containing: | |
- max_width (int): The maximum width required among all pages. | |
- total_height (int): The sum of heights of all pages. | |
- zooms (List[float]): A list of zoom factors for each page. | |
""" | |
total_height = 0 | |
max_width = 0 | |
zooms = [] | |
num_pages = len(pdf_document) | |
# First pass: Calculate dimensions and zoom factors | |
for page_num in range(num_pages): | |
page = pdf_document[page_num] | |
# Calculate the zoom factor needed to achieve the target DPI | |
zoom = dpi / DEFAULT_PDF_DPI | |
zooms.append(zoom) | |
# Get page dimensions in pixels at the calculated zoom | |
rect = page.rect | |
page_width = int(rect.width * zoom) | |
page_height = int(rect.height * zoom) | |
# Update maximum width and total height | |
max_width = max(max_width, page_width) | |
total_height += page_height | |
return max_width, total_height, zooms | |
def render_pages_to_image( | |
pdf_document: fitz.Document, | |
zooms: List[float], | |
canvas_width: int, | |
canvas_height: int | |
) -> Image.Image: | |
""" | |
Renders each page of the PDF onto a single PIL Image canvas. | |
Parameters | |
---------- | |
pdf_document : fitz.Document | |
The opened PyMuPDF document object. | |
zooms : List[float] | |
A list of zoom factors, one for each page. | |
canvas_width : int | |
The width of the final image canvas. | |
canvas_height : int | |
The height of the final image canvas. | |
Returns | |
------- | |
Image.Image | |
A PIL Image object containing all rendered PDF pages. | |
""" | |
num_pages = len(pdf_document) | |
# Create a new blank image canvas (RGB white background) | |
result_image = Image.new("RGB", (canvas_width, canvas_height), (255, 255, 255)) | |
current_height = 0 | |
# Initialize Streamlit progress reporting | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
# Second pass: Render each page and paste it onto the canvas | |
for page_num in range(num_pages): | |
status_text.text(f"Processing page {page_num + 1}/{num_pages}...") | |
page = pdf_document[page_num] | |
zoom = zooms[page_num] | |
# Generate a pixmap (raster image) of the page | |
# Use fitz.Matrix for transformation with the calculated zoom | |
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom)) | |
# Convert the pixmap to a PIL Image | |
# Ensure the mode ("RGB" or "RGBA") matches pix.samples structure if issues arise | |
try: | |
page_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
except ValueError as e: | |
st.error(f"Error converting page {page_num+1} to Image: {e}") | |
st.warning(f"Pixmap details: width={pix.width}, height={pix.height}, alpha={pix.alpha}, samples length={len(pix.samples)}") | |
# Attempt RGBA conversion as a fallback if alpha channel is present | |
if pix.alpha: | |
page_image = Image.frombytes("RGBA", [pix.width, pix.height], pix.samples).convert("RGB") | |
st.info("Retrying page conversion with RGBA mode.") | |
else: | |
raise # Re-raise the original error if not an alpha channel issue | |
# Paste the page image onto the main canvas | |
# The paste position is (0, current_height) | |
result_image.paste(page_image, (0, current_height)) | |
current_height += pix.height # Move down for the next page | |
# Update Streamlit progress bar | |
progress_bar.progress((page_num + 1) / num_pages) | |
status_text.text("Rendering complete!") | |
return result_image | |
def pdf_to_single_image(pdf_path: str, output_format: str = "PNG", dpi: int = 300) -> io.BytesIO: | |
""" | |
Converts all pages of a PDF file into a single vertical image. | |
Opens the PDF, calculates the required dimensions, renders each page | |
at the specified DPI, stitches them together vertically, and returns | |
the result as an image in a BytesIO buffer. | |
Parameters | |
---------- | |
pdf_path : str | |
The file path to the input PDF document. | |
output_format : str, optional | |
The desired output image format ("PNG" or "JPG"), by default "PNG". | |
dpi : int, optional | |
The resolution (dots per inch) for rendering the PDF pages, by default 300. | |
Higher DPI results in better quality but larger file size. | |
Returns | |
------- | |
io.BytesIO | |
A BytesIO buffer containing the generated image data in the specified format. | |
Raises | |
------ | |
fitz.FitzError | |
If there is an error opening or processing the PDF file. | |
Exception | |
For other potential errors during image processing or saving. | |
""" | |
pdf_document = None # Initialize to ensure it's defined in finally block | |
try: | |
# Open the PDF document | |
pdf_document = fitz.open(pdf_path) | |
# Calculate the necessary dimensions for the final image | |
canvas_width, canvas_height, zooms = calculate_image_dimensions(pdf_document, dpi) | |
if canvas_width == 0 or canvas_height == 0: | |
st.warning("Could not determine valid dimensions for the PDF. It might be empty or corrupted.") | |
return io.BytesIO() # Return empty buffer | |
# Render pages onto the canvas | |
result_image = render_pages_to_image(pdf_document, zooms, canvas_width, canvas_height) | |
# Create an in-memory buffer to save the image | |
img_buffer = io.BytesIO() | |
# Save the final image to the buffer in the specified format | |
if output_format.upper() == "PNG": | |
result_image.save(img_buffer, format="PNG") | |
elif output_format.upper() == "JPG" or output_format.upper() == "JPEG": | |
# Save as JPEG with specified quality, converting RGBA to RGB if necessary | |
if result_image.mode == 'RGBA': | |
result_image = result_image.convert('RGB') | |
result_image.save(img_buffer, format="JPEG", quality=JPEG_QUALITY) | |
else: | |
# Default to PNG if format is unknown | |
st.warning(f"Unsupported format '{output_format}'. Defaulting to PNG.") | |
result_image.save(img_buffer, format="PNG") | |
# Reset buffer position to the beginning for reading | |
img_buffer.seek(0) | |
return img_buffer | |
except fitz.FitzError as e: | |
st.error(f"Error processing PDF: {e}") | |
raise # Re-raise the specific exception | |
except Exception as e: | |
st.error(f"An unexpected error occurred during conversion: {e}") | |
raise # Re-raise general exceptions | |
finally: | |
# Ensure the PDF document is closed even if errors occur | |
if pdf_document: | |
pdf_document.close() | |
# st.write("PDF document closed.") # Optional debug message | |