-
Notifications
You must be signed in to change notification settings - Fork 446
/
index.js
68 lines (57 loc) · 1.28 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
export const NYTimesExtractor = {
domain: 'www.nytimes.com',
title: {
selectors: [
'h1[data-testid="headline"]',
'h1.g-headline',
'h1[itemprop="headline"]',
'h1.headline',
'h1 .balancedHeadline',
],
},
author: {
selectors: [
['meta[name="author"]', 'value'],
'.g-byline',
'.byline',
['meta[name="byl"]', 'value'],
],
},
content: {
selectors: ['div.g-blocks', 'section[name="articleBody"]', 'article#story'],
transforms: {
'img.g-lazy': $node => {
let src = $node.attr('src');
const width = 640;
src = src.replace('{{size}}', width);
$node.attr('src', src);
},
},
clean: [
'.ad',
'header#story-header',
'.story-body-1 .lede.video',
'.visually-hidden',
'#newsletter-promo',
'.promo',
'.comments-button',
'.hidden',
'.comments',
'.supplemental',
'.nocontent',
'.story-footer-links',
],
},
date_published: {
selectors: [
['meta[name="article:published_time"]', 'value'],
['meta[name="article:published"]', 'value'],
],
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']],
},
dek: null,
next_page_url: null,
excerpt: null,
};