-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdfToImages.py
58 lines (46 loc) · 1.36 KB
/
pdfToImages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# /usr/bin/python3
import os
import glob
from pdf2image import convert_from_path
# 1. 遍历 pdf_data,作为输入源头
# 2. 转换成 images
# 3. 把 images 放入 同名文件夹
WIDTH = 790
IN_PATH = 'pdf_data/'
OUT_PATH = 'pdf_out/'
def execute():
types = ('*.pdf')
for type in types:
filename = os.path.join(IN_PATH, type)
a = glob.glob(filename)
# file list
for path in a:
name = path.replace(IN_PATH, '').replace('.pdf', '')
outPut = path.replace(IN_PATH, OUT_PATH).replace('.pdf', '') + '/'
if not os.path.exists(outPut):
os.makedirs(outPut)
else:
continue
# 这里需要置空 不然会报错
images = []
images = convert_from_path(path)
index = 0
for im in images:
im = convert_to_jpg(im)
im = thumb(im)
nameNew = name + '_' + str(index).zfill(2) + '.jpg'
index += 1
im.save(outPut + nameNew, 'JPEG')
print(name + ' is done!')
print('all is ok!')
def convert_to_jpg(im):
im = im.convert('RGB')
return im
def thumb(im):
width = im.size[0]
height = im.size[1]
radio = WIDTH / width
im.thumbnail((WIDTH, height * radio))
return im
if __name__ == '__main__':
execute()