-
Notifications
You must be signed in to change notification settings - Fork 46
/
linkchecker.js
127 lines (113 loc) · 3.74 KB
/
linkchecker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Check every sources(img, stylesheets, scripts) are in same folder as spec document, and they are reachable.
import puppeteer from 'puppeteer';
const self = {
name: 'links.linkchecker',
section: 'document-body',
rule: 'brokenLink',
};
const compound = {
name: 'links.linkchecker',
section: 'compound',
rule: 'compoundFilesLocation',
};
const allowList = [
/^https:\/\/www.w3.org\/StyleSheets\//,
/^https:\/\/www.w3.org\/scripts\//,
'https://www.w3.org/TR/tr-outdated-spec',
/^https:\/\/www.w3.org\/analytics\/piwik\//,
/^https:\/\/test.csswg.org\/harness\//,
/^https:\/\/www.w3.org\/assets\//,
/^https:\/\/static.cloudflareinsights.com\/beacon\.min\.js/,
/^https:\/\/www.w3.org\/Icons\/member_subm\.png/,
/^data:/,
];
const noRespondAllowList = [
'https://www.w3.org/TR/tr-outdated-spec',
'https://www.w3.org/analytics/piwik/matomo.js',
];
export const { name } = self;
/**
* @param url
*/
function simplifyURL(url) {
const urlObj = new URL(url);
return (
(urlObj.origin !== 'null' ? urlObj.origin : urlObj.protocol) +
urlObj.pathname
);
}
/**
* Upgrade version of Array.include(). The array can be RegExp
*
* @param url
* @param regArray
* @returns {boolean}
*/
function includedByReg(url, regArray = allowList) {
return regArray.some(item => {
if (typeof item === 'object') {
// item is RegExp
return item.test(url);
}
// item is simple string
return item === url;
});
}
/**
* @param sr
* @param done
*/
export async function check(sr, done) {
// send out warning for /nu W3C link checker.
sr.warning(self, 'display', { link: sr.url });
if (!sr.url) {
return done();
}
// sr.url is used as base url. Every other resources should use in same folder as base. e.g.
// - spec doc: https://www.w3.org/TR/2021/WD-pubrules-20210401/
// - image (pass): https://www.w3.org/TR/2021/WD-pubrules-20210401/images/sample.png
// - image (pass): https://www.w3.org/TR/2021/WD-pubrules-20210401/sample.png
// - image (error): https://w3c.github.io/pubrules/sample.png
const browser = await puppeteer.launch({
headless: true,
args: ['--disable-gpu'],
});
const page = await browser.newPage();
const docPath = sr.url.replace(/\/[^/]+$/, '/').replace(/^https?:/, '');
page.on('response', response => {
const url = simplifyURL(response.url());
const { referer } = response.request().headers();
// check if resource is in same folder as base document
if (
!url.replace(/^https?:/, '').startsWith(docPath) &&
!(includedByReg(url) || includedByReg(referer)) &&
url !== sr.url
) {
sr.error(compound, 'not-same-folder', { base: docPath, url });
}
// check if every resource's status code is ok, ignore 3xx
if (response.status() >= 400 && !noRespondAllowList.includes(url)) {
const chain = response.request().redirectChain();
// If an url is redirected from another, chain shall exist
if (chain.length) {
sr.error(compound, 'response-error-with-redirect', {
url,
originUrl: chain[0].url(),
status: response.status(),
text: response.statusText(),
referer,
});
} else {
sr.error(compound, 'response-error', {
url,
status: response.status(),
text: response.statusText(),
referer,
});
}
}
});
await page.goto(sr.url);
await browser.close();
done();
}