Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use range など #727

Merged
merged 1 commit into from
Mar 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion built/general.d.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import { SummalyEx } from './summaly';
declare const _default: (url: URL, lang?: string | null) => Promise<SummalyEx>;
declare const _default: (url: URL, lang?: string | null, useRange?: boolean) => Promise<SummalyEx>;
export default _default;
4 changes: 2 additions & 2 deletions built/general.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ const cleanup_title_1 = require("./utils/cleanup-title");
const decode_entities_1 = require("./utils/decode-entities");
const got_1 = require("./utils/got");
const cleanup_url_1 = require("./utils/cleanup-url");
exports.default = (url, lang = null) => __awaiter(void 0, void 0, void 0, function* () {
exports.default = (url, lang = null, useRange = false) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1, _2, _3, _4, _5, _6;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
lang = null;
const res = yield (0, got_1.scpaping)(url.href, { lang: lang || undefined });
const res = yield (0, got_1.scpaping)(url.href, { lang: lang || undefined, useRange });
const $ = res.$;
const landingUrl = new URL(res.response.url);
const twitterCard = (_a = $('meta[name="twitter:card"]').attr('content')) !== null && _a !== void 0 ? _a : $('meta[property="twitter:card"]').attr('content');
Expand Down
4 changes: 2 additions & 2 deletions built/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ type RequestOptions = {
*/
lang?: string | null;
/**
* Whether follow redirects
* Use range for the request
*/
followRedirects?: boolean;
useRange?: boolean;
};
export declare class Summary {
private plugins;
Expand Down
3 changes: 1 addition & 2 deletions built/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class Summary {
return __awaiter(this, void 0, void 0, function* () {
const opts = Object.assign({
lang: null,
followRedirects: true,
}, requestOptions);
const _url = new URL(url);
// pre
Expand All @@ -50,7 +49,7 @@ class Summary {
return summary;
}
else {
let summary = yield (0, general_1.default)(_url, opts.lang);
let summary = yield (0, general_1.default)(_url, opts.lang, opts.useRange);
if (summary == null)
throw 'failed summarize';
const landingUrl = summary.url;
Expand Down
10 changes: 2 additions & 8 deletions built/server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ const load_config_1 = require("./load-config");
const h3 = require("h3");
const http_1 = require("http");
const h3_typebox_1 = require("h3-typebox");
const status_error_1 = require("../utils/status-error");
const config = (0, load_config_1.default)();
const summaryInstance = new __1.Summary({
allowedPlugins: config.allowedPlugins
Expand All @@ -30,19 +29,14 @@ router.get('/url', h3.eventHandler((event) => __awaiter(void 0, void 0, void 0,
try {
const summary = yield summaryInstance.summary(query.url, {
lang: query.lang,
followRedirects: false,
useRange: config.useRange,
});
h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=604800');
return summary;
}
catch (e) {
console.log(`summaly error: ${e} ${query.url}`);
if (e instanceof status_error_1.StatusError && e.isPermanentError) {
h3.setResponseStatus(event, 400);
}
else {
h3.setResponseStatus(event, 500);
}
h3.setResponseStatus(event, 422);
h3.setResponseHeader(event, 'Content-Type', 'text/plain');
h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=3600');
return 'error';
Expand Down
1 change: 1 addition & 0 deletions built/server/load-config.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
type Config = {
allowedPlugins?: string[];
useRange?: boolean;
};
export default function (): Config;
export {};
1 change: 1 addition & 0 deletions built/utils/got.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as Got from 'got';
import * as cheerio from 'cheerio';
export declare function scpaping(url: string, opts?: {
lang?: string;
useRange?: boolean;
}): Promise<{
body: string;
$: cheerio.CheerioAPI;
Expand Down
33 changes: 27 additions & 6 deletions built/utils/got.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ function scpaping(url, opts) {
};
if (opts === null || opts === void 0 ? void 0 : opts.lang)
headers['accept-language'] = opts.lang;
if (opts === null || opts === void 0 ? void 0 : opts.useRange)
headers['range'] = `bytes=0-${MAX_RESPONSE_SIZE - 1}`;
const response = yield getResponse({
url,
method: 'GET',
Expand Down Expand Up @@ -100,7 +102,8 @@ function getResponse(args) {
});
req.on('redirect', (res, opts) => {
if (!(0, check_allowed_url_1.checkAllowedUrl)(opts.url)) {
req.cancel(`Invalid url: ${opts.url}`);
console.warn(`Invalid url: ${opts.url}`);
req.cancel();
}
});
return yield receiveResponce({ req, typeFilter: args.typeFilter });
Expand All @@ -111,25 +114,43 @@ function receiveResponce(args) {
const req = args.req;
const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res) => {
var _a;
var _a, _b;
if (res.statusCode === 206) {
const m = ((_a = res.headers['content-range']) !== null && _a !== void 0 ? _a : '').match(new RegExp(/^bytes\s+0-(\d+)\/(\d+)$/, 'i')); // bytes 0-47254/47255
if (m == null) {
console.warn(`Invalid content-range '${res.headers['content-range']}'`);
req.cancel();
return;
}
if (Number(m[1]) + 1 !== Number(m[2])) {
console.warn(`maxSize exceeded by content-range (${m[2]} > ${maxSize}) on response`);
req.cancel();
return;
}
}
// Check html
if (args.typeFilter && !((_a = res.headers['content-type']) === null || _a === void 0 ? void 0 : _a.match(args.typeFilter))) {
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
if (args.typeFilter && !((_b = res.headers['content-type']) === null || _b === void 0 ? void 0 : _b.match(args.typeFilter))) {
console.warn(`Rejected by type filter ${res.headers['content-type']}`);
req.cancel();
return;
}
// 応答ヘッダでサイズチェック
const contentLength = res.headers['content-length'];
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
console.warn(`maxSize exceeded by content-length (${size} > ${maxSize}) on response`);
req.cancel();
return;
}
}
});
// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
console.warn(`maxSize exceeded in transfer (${progress.transferred} > ${maxSize}) on response`);
req.cancel();
return;
}
});
// 応答取得 with ステータスコードエラーの整形
Expand Down
3 changes: 3 additions & 0 deletions server_config.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ allowedPlugins:
# - iwara
# - komiflo
# - dlsite

# Range付きリクエストを出すか
useRange: false
4 changes: 2 additions & 2 deletions src/general.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import { SummalyEx } from './summaly';
import { scpaping } from './utils/got';
import { cleanupUrl } from './utils/cleanup-url';

export default async (url: URL, lang: string | null = null): Promise<SummalyEx> => {
export default async (url: URL, lang: string | null = null, useRange = false): Promise<SummalyEx> => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;

const res = await scpaping(url.href, { lang: lang || undefined });
const res = await scpaping(url.href, { lang: lang || undefined, useRange });
const $ = res.$;
const landingUrl = new URL(res.response.url);

Expand Down
7 changes: 3 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ type RequestOptions = {
lang?: string | null;

/**
* Whether follow redirects
* Use range for the request
*/
followRedirects?: boolean;
useRange?: boolean;
};

export class Summary {
Expand All @@ -37,7 +37,6 @@ export class Summary {
public async summary(url: string, requestOptions?: RequestOptions): Promise<Summaly> {
const opts = Object.assign({
lang: null,
followRedirects: true,
}, requestOptions);

const _url = new URL(url);
Expand All @@ -56,7 +55,7 @@ export class Summary {

return summary;
} else {
let summary = await general(_url, opts.lang);
let summary = await general(_url, opts.lang, opts.useRange);
if (summary == null) throw 'failed summarize';
const landingUrl = summary.url;

Expand Down
8 changes: 2 additions & 6 deletions src/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,14 @@ router.get('/url', h3.eventHandler(async event => {
try {
const summary = await summaryInstance.summary(query.url, {
lang: query.lang,
followRedirects: false,
useRange: config.useRange,
});

h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=604800');
return summary;
} catch (e) {
console.log(`summaly error: ${e} ${query.url}`);
if (e instanceof StatusError && e.isPermanentError) {
h3.setResponseStatus(event, 400);
} else {
h3.setResponseStatus(event, 500);
}
h3.setResponseStatus(event, 422);
h3.setResponseHeader(event, 'Content-Type', 'text/plain');
h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=3600');
return 'error';
Expand Down
1 change: 1 addition & 0 deletions src/server/load-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as yaml from 'js-yaml';

type Config = {
allowedPlugins?: string[];
useRange?: boolean;
};

export default function () {
Expand Down
35 changes: 29 additions & 6 deletions src/utils/got.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ const NOT_BOT_UA = [
'www.sankei.com',
];

export async function scpaping(url: string, opts?: { lang?: string; }) {
export async function scpaping(url: string, opts?: { lang?: string; useRange?: boolean }) {
const u = new URL(url);

const headers = {
'accept': 'text/html, application/xhtml+xml',
'user-agent': NOT_BOT_UA.includes(u.hostname) ? browserUA : BOT_UA,
};
} as Record<string, string>;

if (opts?.lang) headers['accept-language'] = opts.lang;
if (opts?.useRange) headers['range'] = `bytes=0-${MAX_RESPONSE_SIZE - 1}`;

const response = await getResponse({
url,
Expand Down Expand Up @@ -96,7 +97,8 @@ async function getResponse(args: { url: string, method: 'GET' | 'POST', body?: s

req.on('redirect', (res, opts) => {
if (!checkAllowedUrl(opts.url)) {
req.cancel(`Invalid url: ${opts.url}`);
console.warn(`Invalid url: ${opts.url}`);
req.cancel();
}
});

Expand All @@ -108,9 +110,26 @@ async function receiveResponce<T>(args: { req: Got.CancelableRequest<Got.Respons
const maxSize = MAX_RESPONSE_SIZE;

req.on('response', (res: Got.Response) => {
if (res.statusCode === 206) {
const m = (res.headers['content-range'] ?? '').match(new RegExp(/^bytes\s+0-(\d+)\/(\d+)$/, 'i')); // bytes 0-47254/47255

if (m == null) {
console.warn(`Invalid content-range '${res.headers['content-range']}'`);
req.cancel();
return;
}

if (Number(m[1]) + 1 !== Number(m[2])) {
console.warn(`maxSize exceeded by content-range (${m[2]} > ${maxSize}) on response`);
req.cancel();
return;
}
}

// Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
console.warn(`Rejected by type filter ${res.headers['content-type']}`);
req.cancel();
return;
}

Expand All @@ -119,15 +138,19 @@ async function receiveResponce<T>(args: { req: Got.CancelableRequest<Got.Respons
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
console.warn(`maxSize exceeded by content-length (${size} > ${maxSize}) on response`);
req.cancel();
return;
}
}
});

// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress: Got.Progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
console.warn(`maxSize exceeded in transfer (${progress.transferred} > ${maxSize}) on response`);
req.cancel();
return;
}
});

Expand Down
Loading