Skip to content

Commit

Permalink
PDF Resolve #726
Browse files Browse the repository at this point in the history
  • Loading branch information
mei23 committed Mar 17, 2024
1 parent ca2d2c1 commit 748bf71
Show file tree
Hide file tree
Showing 15 changed files with 173 additions and 39 deletions.
44 changes: 33 additions & 11 deletions built/general.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,52 @@ const cleanup_title_1 = require("./utils/cleanup-title");
const decode_entities_1 = require("./utils/decode-entities");
const got_1 = require("./utils/got");
const cleanup_url_1 = require("./utils/cleanup-url");
exports.default = (url, lang = null, useRange = false) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1, _2, _3, _4, _5, _6;
exports.default = (url_1, ...args_1) => __awaiter(void 0, [url_1, ...args_1], void 0, function* (url, lang = null, useRange = false) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1, _2, _3, _4, _5, _6, _7, _8;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
lang = null;
const res = yield (0, got_1.scpaping)(url.href, { lang: lang || undefined, useRange });
const $ = res.$;
const landingUrl = new URL(res.response.url);
const twitterCard = (_a = $('meta[name="twitter:card"]').attr('content')) !== null && _a !== void 0 ? _a : $('meta[property="twitter:card"]').attr('content');
let title = (_e = (_d = (_c = (_b = $('meta[name="twitter:title"]').attr('content')) !== null && _b !== void 0 ? _b : $('meta[property="twitter:title"]').attr('content')) !== null && _c !== void 0 ? _c : $('meta[property="og:title"]').attr('content')) !== null && _d !== void 0 ? _d : $('title').text()) !== null && _e !== void 0 ? _e : null;
if (res.pdf) {
console.log(res.pdf);
const result = {
title: (_a = res.pdf.title) !== null && _a !== void 0 ? _a : 'PDF Document',
icon: ``,
description: null,
thumbnail: null,
medias: undefined,
player: {
url: null,
width: null,
height: null
},
sitename: (_b = landingUrl.hostname) !== null && _b !== void 0 ? _b : null,
sensitive: false,
url: landingUrl.href,
$,
};
return result;
}
if (!$)
throw new Error('unex 1');
const twitterCard = (_c = $('meta[name="twitter:card"]').attr('content')) !== null && _c !== void 0 ? _c : $('meta[property="twitter:card"]').attr('content');
let title = (_g = (_f = (_e = (_d = $('meta[name="twitter:title"]').attr('content')) !== null && _d !== void 0 ? _d : $('meta[property="twitter:title"]').attr('content')) !== null && _e !== void 0 ? _e : $('meta[property="og:title"]').attr('content')) !== null && _f !== void 0 ? _f : $('title').text()) !== null && _g !== void 0 ? _g : null;
title = (0, decode_entities_1.decodeEntities)(title, 300);
let image = (_l = (_k = (_j = (_h = (_g = (_f = $('meta[name="twitter:image"]').attr('content')) !== null && _f !== void 0 ? _f : $('meta[property="twitter:image"]').attr('content')) !== null && _g !== void 0 ? _g : $('meta[property="og:image"]').attr('content')) !== null && _h !== void 0 ? _h : $('link[rel="image_src"]').attr('href')) !== null && _j !== void 0 ? _j : $('link[rel="apple-touch-icon"]').attr('href')) !== null && _k !== void 0 ? _k : $('link[rel="apple-touch-icon image_src"]').attr('href')) !== null && _l !== void 0 ? _l : null;
let image = (_o = (_m = (_l = (_k = (_j = (_h = $('meta[name="twitter:image"]').attr('content')) !== null && _h !== void 0 ? _h : $('meta[property="twitter:image"]').attr('content')) !== null && _j !== void 0 ? _j : $('meta[property="og:image"]').attr('content')) !== null && _k !== void 0 ? _k : $('link[rel="image_src"]').attr('href')) !== null && _l !== void 0 ? _l : $('link[rel="apple-touch-icon"]').attr('href')) !== null && _m !== void 0 ? _m : $('link[rel="apple-touch-icon image_src"]').attr('href')) !== null && _o !== void 0 ? _o : null;
image = (0, cleanup_url_1.cleanupUrl)(image, landingUrl.href);
let playerUrl = (_r = (_q = (_p = (_o = (_m = (twitterCard !== 'summary_large_image' ? $('meta[name="twitter:player"]').attr('content') : null)) !== null && _m !== void 0 ? _m : (twitterCard !== 'summary_large_image' ? $('meta[property="twitter:player"]').attr('content') : null)) !== null && _o !== void 0 ? _o : $('meta[property="og:video"]').attr('content')) !== null && _p !== void 0 ? _p : $('meta[property="og:video:secure_url"]').attr('content')) !== null && _q !== void 0 ? _q : $('meta[property="og:video:url"]').attr('content')) !== null && _r !== void 0 ? _r : null;
let playerUrl = (_t = (_s = (_r = (_q = (_p = (twitterCard !== 'summary_large_image' ? $('meta[name="twitter:player"]').attr('content') : null)) !== null && _p !== void 0 ? _p : (twitterCard !== 'summary_large_image' ? $('meta[property="twitter:player"]').attr('content') : null)) !== null && _q !== void 0 ? _q : $('meta[property="og:video"]').attr('content')) !== null && _r !== void 0 ? _r : $('meta[property="og:video:secure_url"]').attr('content')) !== null && _s !== void 0 ? _s : $('meta[property="og:video:url"]').attr('content')) !== null && _t !== void 0 ? _t : null;
playerUrl = (0, cleanup_url_1.cleanupUrl)(playerUrl, landingUrl.href);
const playerWidth = parseInt((_u = (_t = (_s = $('meta[name="twitter:player:width"]').attr('content')) !== null && _s !== void 0 ? _s : $('meta[property="twitter:player:width"]').attr('content')) !== null && _t !== void 0 ? _t : $('meta[property="og:video:width"]').attr('content')) !== null && _u !== void 0 ? _u : '');
const playerHeight = parseInt((_x = (_w = (_v = $('meta[name="twitter:player:height"]').attr('content')) !== null && _v !== void 0 ? _v : $('meta[property="twitter:player:height"]').attr('content')) !== null && _w !== void 0 ? _w : $('meta[property="og:video:height"]').attr('content')) !== null && _x !== void 0 ? _x : '');
let description = (_1 = (_0 = (_z = (_y = $('meta[name="twitter:description"]').attr('content')) !== null && _y !== void 0 ? _y : $('meta[property="twitter:description"]').attr('content')) !== null && _z !== void 0 ? _z : $('meta[property="og:description"]').attr('content')) !== null && _0 !== void 0 ? _0 : $('meta[name="description"]').attr('content')) !== null && _1 !== void 0 ? _1 : null;
const playerWidth = parseInt((_w = (_v = (_u = $('meta[name="twitter:player:width"]').attr('content')) !== null && _u !== void 0 ? _u : $('meta[property="twitter:player:width"]').attr('content')) !== null && _v !== void 0 ? _v : $('meta[property="og:video:width"]').attr('content')) !== null && _w !== void 0 ? _w : '');
const playerHeight = parseInt((_z = (_y = (_x = $('meta[name="twitter:player:height"]').attr('content')) !== null && _x !== void 0 ? _x : $('meta[property="twitter:player:height"]').attr('content')) !== null && _y !== void 0 ? _y : $('meta[property="og:video:height"]').attr('content')) !== null && _z !== void 0 ? _z : '');
let description = (_3 = (_2 = (_1 = (_0 = $('meta[name="twitter:description"]').attr('content')) !== null && _0 !== void 0 ? _0 : $('meta[property="twitter:description"]').attr('content')) !== null && _1 !== void 0 ? _1 : $('meta[property="og:description"]').attr('content')) !== null && _2 !== void 0 ? _2 : $('meta[name="description"]').attr('content')) !== null && _3 !== void 0 ? _3 : null;
description = (0, decode_entities_1.decodeEntities)(description, 300);
if (title === description) {
description = null;
}
let siteName = (_4 = (_3 = (_2 = $('meta[property="og:site_name"]').attr('content')) !== null && _2 !== void 0 ? _2 : $('meta[name="application-name"]').attr('content')) !== null && _3 !== void 0 ? _3 : landingUrl.hostname) !== null && _4 !== void 0 ? _4 : null;
let siteName = (_6 = (_5 = (_4 = $('meta[property="og:site_name"]').attr('content')) !== null && _4 !== void 0 ? _4 : $('meta[name="application-name"]').attr('content')) !== null && _5 !== void 0 ? _5 : landingUrl.hostname) !== null && _6 !== void 0 ? _6 : null;
siteName = (0, decode_entities_1.decodeEntities)(siteName, 300);
const favicon = (_6 = (_5 = $('link[rel="shortcut icon"]').attr('href')) !== null && _5 !== void 0 ? _5 : $('link[rel="icon"]').attr('href')) !== null && _6 !== void 0 ? _6 : null;
const favicon = (_8 = (_7 = $('link[rel="shortcut icon"]').attr('href')) !== null && _7 !== void 0 ? _7 : $('link[rel="icon"]').attr('href')) !== null && _8 !== void 0 ? _8 : null;
const icon = (0, cleanup_url_1.cleanupUrl)(favicon, landingUrl.href);
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true';
// Clean up the title
Expand Down
2 changes: 1 addition & 1 deletion built/plugins/komiflo.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ function test(url) {
}
exports.test = test;
function postProcess(summaly) {
var _a, _b, _c, _d, _e, _f, _g, _h;
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h;
const landingUrl = summaly.url;
// 作品ページ?
const m = landingUrl.match(/komiflo[.]com(?:[/]#!)?[/]comics[/](\d+)/);
Expand Down
2 changes: 1 addition & 1 deletion built/plugins/spotify.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ function test(url) {
}
exports.test = test;
function process(url) {
var _a, _b, _c;
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c;
// build oEmbed url
const u = new URL('https://open.spotify.com/oembed');
u.searchParams.append('url', url.href);
Expand Down
2 changes: 1 addition & 1 deletion built/plugins/twitter.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ function test(url) {
}
exports.test = test;
function process(url) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
const m = url.pathname.match(/^[/]\w+[/]status[/](\d+)/);
if (!m)
throw 'err';
Expand Down
2 changes: 1 addition & 1 deletion built/plugins/youtube.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ function test(url) {
}
exports.test = test;
function process(url) {
var _a, _b, _c;
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c;
// build oEmbed url
const u = new URL('https://www.youtube.com/oembed');
u.searchParams.append('url', url.href);
Expand Down
10 changes: 9 additions & 1 deletion built/utils/got.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,17 @@ export declare function scpaping(url: string, opts?: {
lang?: string;
useRange?: boolean;
}): Promise<{
pdf: {
title: string | undefined;
};
response: Got.Response<unknown>;
body?: undefined;
$?: undefined;
} | {
body: string;
$: cheerio.CheerioAPI;
response: Got.Response<string>;
response: Got.Response<unknown>;
pdf?: undefined;
}>;
export declare function getJson(url: string, referer: string): Promise<any>;
export declare function fetchUrl(url: string, path: string): Promise<void>;
31 changes: 22 additions & 9 deletions built/utils/got.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const cheerio = require("cheerio");
const client_1 = require("../client");
const agent_1 = require("./agent");
const check_allowed_url_1 = require("./check-allowed-url");
const pdf = require('pdf-parse');
const PrivateIp = require('private-ip');
const pipeline = util.promisify(stream.pipeline);
const RESPONSE_TIMEOUT = 20 * 1000;
Expand All @@ -33,6 +34,7 @@ const NOT_BOT_UA = [
const LOG_CONSOLE = !!process.env.SUMMALY_LOG_CONSOLE;
function scpaping(url, opts) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b;
const u = new URL(url);
const headers = {
'accept': 'text/html, application/xhtml+xml',
Expand All @@ -46,19 +48,30 @@ function scpaping(url, opts) {
url,
method: 'GET',
headers,
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
typeFilter: /^(text\/html|application\/xhtml\+xml|application\/pdf)/,
});
if (response.ip && PrivateIp(response.ip)) {
throw new status_error_1.StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
}
const encoding = (0, encoding_1.detectEncoding)(response.rawBody);
const body = (0, encoding_1.toUtf8)(response.rawBody, encoding);
const $ = cheerio.load(body);
return {
body,
$,
response,
};
if ((_a = response.headers['content-type']) === null || _a === void 0 ? void 0 : _a.match(/^application\/pdf/)) {
const data = yield pdf(response.rawBody);
return {
pdf: {
title: (_b = data === null || data === void 0 ? void 0 : data.info) === null || _b === void 0 ? void 0 : _b.Title,
},
response,
};
}
else {
const encoding = (0, encoding_1.detectEncoding)(response.rawBody);
const body = (0, encoding_1.toUtf8)(response.rawBody, encoding);
const $ = cheerio.load(body);
return {
body,
$,
response,
};
}
});
}
exports.scpaping = scpaping;
Expand Down
2 changes: 2 additions & 0 deletions built/utils/sanitize-url.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ function sanitizeUrl(str) {
return str;
if (u.protocol === 'http:')
return str;
if (u.protocol === 'data:')
return str;
}
catch (_a) {
return null;
Expand Down
1 change: 1 addition & 0 deletions built/utils/tmp-ope.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export declare function tmpOpe(): Promise<any[]>;
26 changes: 26 additions & 0 deletions built/utils/tmp-ope.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.tmpOpe = void 0;
const tmp = require("tmp");
function tmpOpe() {
return __awaiter(this, void 0, void 0, function* () {
const [path, cleanup] = yield new Promise((res, rej) => {
tmp.file((e, path, fd, cleanup) => {
if (e)
return rej(e);
res([path, cleanup]);
});
});
return [path, cleanup];
});
}
exports.tmpOpe = tmpOpe;
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"iconv-lite": "0.6.3",
"js-yaml": "4.1.0",
"jschardet": "3.0.0",
"pdf-parse": "1.1.1",
"private-ip": "2.3.4",
"typescript": "5.4.2"
}
Expand Down
Loading

0 comments on commit 748bf71

Please sign in to comment.