forked from syuilo/summaly
-
Notifications
You must be signed in to change notification settings - Fork 10
/
index.ts
152 lines (127 loc) · 3.51 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/**
* summaly
* https://github.com/misskey-dev/summaly
*/
import { URL } from 'node:url';
import tracer from 'trace-redirect';
import * as Got from 'got';
import { SummalyResult } from './summary.js';
import { SummalyPlugin } from './iplugin.js';
export * from './iplugin.js';
import general, { GeneralScrapingOptions } from './general.js';
import { setAgent } from './utils/got.js';
import { plugins as builtinPlugins } from './plugins/index.js';
import type { FastifyInstance } from 'fastify';
export type SummalyOptions = {
/**
* Accept-Language for the request
*/
lang?: string | null;
/**
* Whether follow redirects
*/
followRedirects?: boolean;
/**
* Custom Plugins
*/
plugins?: SummalyPlugin[];
/**
* Custom HTTP agent
*/
agent?: Got.Agents;
/**
* User-Agent for the request
*/
userAgent?: string;
/**
* Response timeout.
* Set timeouts for each phase, such as host name resolution and socket communication.
*/
responseTimeout?: number;
/**
* Operation timeout.
* Set the timeout from the start to the end of the request.
*/
operationTimeout?: number;
/**
* Maximum content length.
* If set to true, an error will occur if the content-length value returned from the other server is larger than this parameter (or if the received body size exceeds this parameter).
*/
contentLengthLimit?: number;
/**
* Content length required.
* If set to true, it will be an error if the other server does not return content-length.
*/
contentLengthRequired?: boolean;
};
export const summalyDefaultOptions = {
lang: null,
followRedirects: true,
plugins: [],
} as SummalyOptions;
/**
* Summarize an web page
*/
export const summaly = async (url: string, options?: SummalyOptions): Promise<SummalyResult> => {
if (options?.agent) setAgent(options.agent);
const opts = Object.assign(summalyDefaultOptions, options);
const plugins = builtinPlugins.concat(opts.plugins || []);
let actualUrl = url;
if (opts.followRedirects) {
// .catch(() => url)にすればいいけど、jestにtrace-redirectを食わせるのが面倒なのでtry-catch
try {
actualUrl = await tracer(url);
} catch (e) {
actualUrl = url;
}
}
const _url = new URL(actualUrl);
// Find matching plugin
const match = plugins.filter(plugin => plugin.test(_url))[0];
// Get summary
const scrapingOptions: GeneralScrapingOptions = {
lang: opts.lang,
userAgent: opts.userAgent,
responseTimeout: opts.responseTimeout,
operationTimeout: opts.operationTimeout,
contentLengthLimit: opts.contentLengthLimit,
contentLengthRequired: opts.contentLengthRequired,
};
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
const summary = await (match ? match.summarize : general)(_url, scrapingOptions);
if (summary == null) {
throw new Error('failed summarize');
}
return Object.assign(summary, {
url: actualUrl,
});
};
export default function (fastify: FastifyInstance, options: SummalyOptions, done: (err?: Error) => void) {
fastify.get<{
Querystring: {
url?: string;
lang?: string;
};
}>('/', async (req, reply) => {
const url = req.query.url as string;
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (url == null) {
return reply.status(400).send({
error: 'url is required',
});
}
try {
const summary = await summaly(url, {
lang: req.query.lang as string,
followRedirects: false,
...options,
});
return summary;
} catch (e) {
return reply.status(500).send({
error: e,
});
}
});
done();
}