forked from apachecn/doctool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nhentaidl.py
120 lines (102 loc) · 2.95 KB
/
nhentaidl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import requests
from pyquery import PyQuery as pq
import os
import sys
from os import path
from imgyaso import grid
import shutil
import json
import subprocess as subp
import uuid
import tempfile
import numpy as np
import cv2
# npm install -g gen-epub
hdrs = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
}
def safe_mkdir(dir):
try:
os.mkdir(dir)
except:
pass
def safe_rmdir(dir):
try:
shutil.rmtree(dir)
except:
pass
def get_info(html):
root = pq(html)
title = root('h2.title').eq(0).text().strip()
imgs = root('.gallerythumb > img')
imgs = [
pq(i).attr('data-src')
.replace('t.jpg', '.jpg')
.replace('t.png', '.png')
.replace('t.nhentai', 'i.nhentai')
for i in imgs
]
return {'title': title, 'imgs': imgs}
def process_img(img):
img = np.frombuffer(img, np.uint8)
img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
h, w = img.shape
if w > 1000:
rate = 1000 / w
nh = round(h * rate)
img = cv2.resize(img, (1000, nh), interpolation=cv2.INTER_CUBIC)
img = grid(img)
img = cv2.imencode(
'.png', img,
[cv2.IMWRITE_PNG_COMPRESSION, 9]
)[1]
return bytes(img)
def gen_epub(articles, imgs=None, name=None, out_path=None):
imgs = imgs or {}
dir = path.join(tempfile.gettempdir(), uuid.uuid4().hex)
safe_mkdir(dir)
img_dir = path.join(dir, 'img')
safe_mkdir(img_dir)
for fname, img in imgs.items():
fname = path.join(img_dir, fname)
with open(fname, 'wb') as f:
f.write(img)
fname = path.join(dir, 'articles.json')
with open(fname, 'w') as f:
f.write(json.dumps(articles))
args = [
'gen-epub',
fname,
'-i',
img_dir,
]
if name: args += ['-n', name]
if out_path: args += ['-p', out_path]
subp.Popen(
args, shell=True,
stdout=subp.PIPE,
stderr=subp.PIPE
).communicate()
safe_rmdir(dir)
def main():
id = sys.argv[1]
url = f'https://nhentai.net/g/{id}/'
html = requests.get(url).text
info = get_info(html)
print(info['title'])
imgs = {}
l = len(str(len(info['imgs'])))
for i, img_url in enumerate(info['imgs']):
fname = str(i).zfill(l) + '.png'
print(f'{img_url} => {fname}')
img = requests.get(img_url, headers=hdrs).content
img = process_img(img)
imgs[fname] = img
co = [
f'<p><img src="../Images/{str(i).zfill(l)}.png" /></p>'
for i in range(len(info['imgs']))
]
co = '\n'.join(co)
articles = [{'title': info['title'], 'content': co}]
gen_epub(articles, imgs)
if __name__ == '__main__': main()