PyMuPDF icon indicating copy to clipboard operation
PyMuPDF copied to clipboard

ValueError: not enough values to unpack (expected 3, got 2) is thrown when call insert_pdf

Open hgdhot opened this issue 6 months ago • 3 comments

Description of the bug

Want to split a pdf file into multiple pdf by 5 pages per file. When insert_pdf is called, a ValueError is thrown. traceback example.pdf

How to reproduce the bug

from io import BytesIO
from pathlib import Path
import fitz

def _clean_up(file_path):
    doc = fitz.open(file_path)
    
    tmp = BytesIO()
    tmp.write(doc.write(garbage=4, deflate=True))
    
    doc = fitz.Document('pdf', tmp.getvalue())
    tmp.close()

    return doc


def split_pdf_by_page(file_path, pages_per_split, result_dir, num_splits=None, paging_prefix=""):
    # Not split
    if not pages_per_split:
        return [shutil.copy(file_path, result_dir/f"{paging_prefix}_0.pdf")]

    # Clean pdf
    source_doc = _clean_up(file_path)

    # Calculate the number of pages per split file and the number of split files
    page_range = pages_per_split - 1
    split_range = range(0, source_doc.page_count, pages_per_split)
    num_splits = num_splits or len(split_range)

    # Create a list to hold the paths of the split files
    split_paths = []

    # Loop through each split range and create a new PDF file
    for i, start in enumerate(split_range):
        output_doc = fitz.open()

        # Determine the ending page for this split file
        to_page = start + page_range if i < num_splits - 1 else -1
        output_doc.insert_pdf(source_doc, from_page=start, to_page=to_page)

        # Save the output document to a file and add the path to the list of split files
        path = result_dir/f"{paging_prefix}_{i}.pdf"
        output_doc.save(path, garbage=2)
        split_paths.append(path)

        # If this is the last split file, exit the loop
        if to_page == -1:
            break

    return split_paths

if __name__ == "__main__":
    file_path = Path("./example.pdf")
    pages_per_file = 5
    result_dir = Path("./result")
    result_dir.mkdir()
    split_pdf_by_page(file_path, pages_per_file, result_dir, paging_prefix="src")

PyMuPDF version

1.24.9

Operating system

Linux

Python version

3.8

hgdhot avatar Aug 19 '24 03:08 hgdhot