Spaces:
Sleeping
Sleeping
File size: 7,697 Bytes
576a588 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
# pdf_processor.py
"""
Handles the core logic of converting a PDF document into a single image.
"""
import fitz # PyMuPDF
from PIL import Image
import io
import streamlit as st # Imported for progress bar updates
from typing import Tuple, List, Union
# Constants
DEFAULT_PDF_DPI = 72 # Standard PDF DPI used for scaling calculations
JPEG_QUALITY = 95 # Quality setting for JPEG output
def calculate_image_dimensions(pdf_document: fitz.Document, dpi: int) -> Tuple[int, int, List[float]]:
"""
Calculates the total dimensions required for the final image canvas.
Iterates through PDF pages to determine the maximum width and total height
needed when rendered at the specified DPI.
Parameters
----------
pdf_document : fitz.Document
The opened PyMuPDF document object.
dpi : int
The target resolution in dots per inch.
Returns
-------
Tuple[int, int, List[float]]
A tuple containing:
- max_width (int): The maximum width required among all pages.
- total_height (int): The sum of heights of all pages.
- zooms (List[float]): A list of zoom factors for each page.
"""
total_height = 0
max_width = 0
zooms = []
num_pages = len(pdf_document)
# First pass: Calculate dimensions and zoom factors
for page_num in range(num_pages):
page = pdf_document[page_num]
# Calculate the zoom factor needed to achieve the target DPI
zoom = dpi / DEFAULT_PDF_DPI
zooms.append(zoom)
# Get page dimensions in pixels at the calculated zoom
rect = page.rect
page_width = int(rect.width * zoom)
page_height = int(rect.height * zoom)
# Update maximum width and total height
max_width = max(max_width, page_width)
total_height += page_height
return max_width, total_height, zooms
def render_pages_to_image(
pdf_document: fitz.Document,
zooms: List[float],
canvas_width: int,
canvas_height: int
) -> Image.Image:
"""
Renders each page of the PDF onto a single PIL Image canvas.
Parameters
----------
pdf_document : fitz.Document
The opened PyMuPDF document object.
zooms : List[float]
A list of zoom factors, one for each page.
canvas_width : int
The width of the final image canvas.
canvas_height : int
The height of the final image canvas.
Returns
-------
Image.Image
A PIL Image object containing all rendered PDF pages.
"""
num_pages = len(pdf_document)
# Create a new blank image canvas (RGB white background)
result_image = Image.new("RGB", (canvas_width, canvas_height), (255, 255, 255))
current_height = 0
# Initialize Streamlit progress reporting
progress_bar = st.progress(0)
status_text = st.empty()
# Second pass: Render each page and paste it onto the canvas
for page_num in range(num_pages):
status_text.text(f"Processing page {page_num + 1}/{num_pages}...")
page = pdf_document[page_num]
zoom = zooms[page_num]
# Generate a pixmap (raster image) of the page
# Use fitz.Matrix for transformation with the calculated zoom
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
# Convert the pixmap to a PIL Image
# Ensure the mode ("RGB" or "RGBA") matches pix.samples structure if issues arise
try:
page_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
except ValueError as e:
st.error(f"Error converting page {page_num+1} to Image: {e}")
st.warning(f"Pixmap details: width={pix.width}, height={pix.height}, alpha={pix.alpha}, samples length={len(pix.samples)}")
# Attempt RGBA conversion as a fallback if alpha channel is present
if pix.alpha:
page_image = Image.frombytes("RGBA", [pix.width, pix.height], pix.samples).convert("RGB")
st.info("Retrying page conversion with RGBA mode.")
else:
raise # Re-raise the original error if not an alpha channel issue
# Paste the page image onto the main canvas
# The paste position is (0, current_height)
result_image.paste(page_image, (0, current_height))
current_height += pix.height # Move down for the next page
# Update Streamlit progress bar
progress_bar.progress((page_num + 1) / num_pages)
status_text.text("Rendering complete!")
return result_image
def pdf_to_single_image(pdf_path: str, output_format: str = "PNG", dpi: int = 300) -> io.BytesIO:
"""
Converts all pages of a PDF file into a single vertical image.
Opens the PDF, calculates the required dimensions, renders each page
at the specified DPI, stitches them together vertically, and returns
the result as an image in a BytesIO buffer.
Parameters
----------
pdf_path : str
The file path to the input PDF document.
output_format : str, optional
The desired output image format ("PNG" or "JPG"), by default "PNG".
dpi : int, optional
The resolution (dots per inch) for rendering the PDF pages, by default 300.
Higher DPI results in better quality but larger file size.
Returns
-------
io.BytesIO
A BytesIO buffer containing the generated image data in the specified format.
Raises
------
fitz.FitzError
If there is an error opening or processing the PDF file.
Exception
For other potential errors during image processing or saving.
"""
pdf_document = None # Initialize to ensure it's defined in finally block
try:
# Open the PDF document
pdf_document = fitz.open(pdf_path)
# Calculate the necessary dimensions for the final image
canvas_width, canvas_height, zooms = calculate_image_dimensions(pdf_document, dpi)
if canvas_width == 0 or canvas_height == 0:
st.warning("Could not determine valid dimensions for the PDF. It might be empty or corrupted.")
return io.BytesIO() # Return empty buffer
# Render pages onto the canvas
result_image = render_pages_to_image(pdf_document, zooms, canvas_width, canvas_height)
# Create an in-memory buffer to save the image
img_buffer = io.BytesIO()
# Save the final image to the buffer in the specified format
if output_format.upper() == "PNG":
result_image.save(img_buffer, format="PNG")
elif output_format.upper() == "JPG" or output_format.upper() == "JPEG":
# Save as JPEG with specified quality, converting RGBA to RGB if necessary
if result_image.mode == 'RGBA':
result_image = result_image.convert('RGB')
result_image.save(img_buffer, format="JPEG", quality=JPEG_QUALITY)
else:
# Default to PNG if format is unknown
st.warning(f"Unsupported format '{output_format}'. Defaulting to PNG.")
result_image.save(img_buffer, format="PNG")
# Reset buffer position to the beginning for reading
img_buffer.seek(0)
return img_buffer
except fitz.FitzError as e:
st.error(f"Error processing PDF: {e}")
raise # Re-raise the specific exception
except Exception as e:
st.error(f"An unexpected error occurred during conversion: {e}")
raise # Re-raise general exceptions
finally:
# Ensure the PDF document is closed even if errors occur
if pdf_document:
pdf_document.close()
# st.write("PDF document closed.") # Optional debug message
|