Skip to content

AxaFrance/axa-fr-splitter

Repository files navigation

@axa-fr/axa-fr-splitter

PyPI PyPI - License PyPI - Wheel

Tests python: 3.10 (shields.io) python: 3.11 (shields.io) python: 3.12 (shields.io)

About

The axa-fr-splitter package aims at providing tools to process several types of documents (pdf, tiff, ...) into images using Python.

Quick Start

pip install axa-fr-splitter
from pathlib import Path
from splitter import FileHandler
from splitter.image.tiff_handler import TifHandler
from splitter.pdf.pdf_handler import FitzPdfHandler


def create_file_handler() -> FileHandler:
    """Factory to create customized file handler"""

    # Create File Handler
    file_handler = FileHandler()

    # Create pdf Handler
    pdf_handler = FitzPdfHandler()

    # Create tiff Handler
    tiff_handler = TifHandler()

    # Register PDF Handler
    file_handler.register_converter(
        pdf_handler,
        extensions=['.pdf'],
        mime_types=['application/pdf']
    )

    # Register tiff Handler
    file_handler.register_converter(
        tiff_handler,
        extensions=['.tif', '.tiff'],
        mime_types=['image/tiff']
    )

    return file_handler


def main(filepath, output_path):
    file_handler = create_file_handler()
    output_path = Path(output_path)

    for file_or_exception in file_handler.split_document(filepath):
        file = file_or_exception.unwrap()

        print(file.metadata)
        # {
        #     'original_filename': 'specimen.tiff',
        #     'page_number': 1,
        #     'total_pages': 4,
        #     'width': 1554,
        #     'height': 2200,
        #     'resized_ratio': 0.9405728943993159
        # }

        # Export File file bytes:
        export_path = output_path.joinpath(file.relative_path)
        export_path.write_bytes(file.file_bytes)

if __name__ == '__main__':
    main(r"tests/inputs/specimen.tiff", MY_OUTPUT_PATH)

You can use the match statement to handle the exceptions in a different way:

from returns.result import Failure, Success

...

def main(filepath, output_path):
    file_handler = create_file_handler()
    output_path = Path(output_path)

    for file_or_exception in file_handler.split_document(filepath):
        match file_or_exception:
            case Success(file):
                print(file.metadata)
                export_path = output_path.joinpath(file.relative_path)
                export_path.write_bytes(file.file_bytes)
            case Failure(exception):
                # Handle Exception ...
                raise exception

Contribute