-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdf_processing.py
35 lines (28 loc) · 1.17 KB
/
pdf_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import fitz
import os
import zipfile
from pathlib import Path
def pdf_to_images(pdf_path, output_folder):
# Check if output folder exists, if not, create it
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Open the PDF file
doc = fitz.open(pdf_path)
# Iterate through each page
for page_num in range(len(doc)):
page = doc.load_page(page_num) # number of page
pix = page.get_pixmap()
output_image_path = os.path.join(output_folder, f'page_{page_num + 1}.png')
pix.save(output_image_path)
# Close the document
doc.close()
def zip_folder(folder_path, output_zip_file):
# Create a ZipFile object in write mode
with zipfile.ZipFile(output_zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Walk through the folder
for root, dirs, files in os.walk(folder_path):
for file in files:
# Create a relative path for files to keep the directory structure
relative_path = os.path.relpath(os.path.join(root, file), folder_path)
# Add file to the zip file
zipf.write(os.path.join(root, file), arcname=relative_path)