-
Notifications
You must be signed in to change notification settings - Fork 11
/
dead_link.py
58 lines (46 loc) · 1.01 KB
/
dead_link.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from requests_html import HTMLSession
import requests
from sys import argv
from urllib.parse import urlparse, urljoin
DEBUG = True
USAGE = '''
USAGE:
python dead_link.py www.itest.info
'''
if len(argv) != 2:
print(USAGE)
exit(1)
script_name, url = argv
if url[:4] != 'http':
url = 'http://' + url
res = urlparse(url)
if res.netloc == '':
print('无法获取站点的domain信息')
exit(1)
domain = res.netloc
print(f"站点domain: {domain}")
session = HTMLSession()
r = session.get(url)
links = r.html.find('a')
for link in links:
if 'href' in link.attrs:
href = link.attrs['href']
else:
continue
result = urlparse(href)
if result.netloc == '':
href = urljoin(url, href)
url_type = '内链'
else:
if domain in href:
url_type = '内链'
else:
url_type = '外链'
try:
response = requests.get(href)
if response.status_code >= 400:
print(f"{url_type} {href} 失败")
else:
print(f"{url_type} {href} 成功")
except:
print("出现异常")