From aa48356d23549151f62668aa5dec2c92ea8f1c65 Mon Sep 17 00:00:00 2001 From: TimeForANinja Date: Wed, 24 Aug 2022 20:36:22 +0200 Subject: [PATCH 1/5] support regex in `utils#cutAfterJSON` --- lib/utils.js | 21 +++++++++++++++------ test/utils-test.js | 6 ++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index 2bd5ae2c..1d23844f 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -69,8 +69,11 @@ exports.cutAfterJSON = mixedJson => { throw new Error(`Can't cut unsupported JSON (need to begin with [ or { ) but got: ${mixedJson[0]}`); } - // States if the loop is currently in a string - let isString = false; + // States if the loop is currently inside a complex js object + // 0 = no + // 1 = string + // 2 = regex + let isComplexObject = 0; // States if the current character is treated as escaped or not let isEscaped = false; @@ -80,9 +83,15 @@ exports.cutAfterJSON = mixedJson => { let i; for (i = 0; i < mixedJson.length; i++) { - // Toggle the isString boolean when leaving/entering string - if (mixedJson[i] === '"' && !isEscaped) { - isString = !isString; + // Toggle the isComplexObject boolean when leaving/entering string + if (mixedJson[i] === '"' && !isEscaped && (isComplexObject === 0 || isComplexObject === 1)) { + isComplexObject = isComplexObject === 0 ? 1 : 0; + continue; + } + + // Toggle the isComplexObject boolean when leaving/entering regex + if (mixedJson[i] === '/' && !isEscaped && (isComplexObject === 0 || isComplexObject === 2)) { + isComplexObject = isComplexObject === 0 ? 2 : 0; continue; } @@ -90,7 +99,7 @@ exports.cutAfterJSON = mixedJson => { // Reset for every regular character isEscaped = mixedJson[i] === '\\' && !isEscaped; - if (isString) continue; + if (isComplexObject > 0) continue; if (mixedJson[i] === open) { counter++; diff --git a/test/utils-test.js b/test/utils-test.js index f7d9b12b..f3cdfd43 100644 --- a/test/utils-test.js +++ b/test/utils-test.js @@ -45,6 +45,12 @@ describe('utils.cutAfterJSON()', () => { it('Tolerant to string with escaped quoting', () => { assert.strictEqual(utils.cutAfterJSON('{"a": "\\"}1", "b": 1}abcd'), '{"a": "\\"}1", "b": 1}'); }); + it('Tolerant to string with regexes', () => { + assert.strictEqual( + utils.cutAfterJSON('{"a": "\\"}1", "b": 1, "c": /[0-9]}}\\/}/}abcd'), + '{"a": "\\"}1", "b": 1, "c": /[0-9]}}\\/}/}', + ); + }); it('works with nested', () => { assert.strictEqual( utils.cutAfterJSON('{"a": "\\"1", "b": 1, "c": {"test": 1}}abcd'), From 97822395c2eadf2dfc24c544d2675b9d7750d8bb Mon Sep 17 00:00:00 2001 From: TimeForANinja Date: Wed, 24 Aug 2022 20:59:44 +0200 Subject: [PATCH 2/5] fix test and code --- lib/utils.js | 17 ++++++++++++++--- test/utils-test.js | 6 ++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index 1d23844f..349780d2 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -89,9 +89,20 @@ exports.cutAfterJSON = mixedJson => { continue; } - // Toggle the isComplexObject boolean when leaving/entering regex - if (mixedJson[i] === '/' && !isEscaped && (isComplexObject === 0 || isComplexObject === 2)) { - isComplexObject = isComplexObject === 0 ? 2 : 0; + // Toggle the isComplexObject boolean when entering regex + if (mixedJson[i] === '/' && !isEscaped && isComplexObject === 0) { + let j = i - 1; + // Spool to last non-whitespace character + for (; mixedJson[j].match(/\s/); j--); + // Check if the preceding character signaled the start of a block + if (['[', '{', ',', ':'].includes(mixedJson[j])) { + isComplexObject = 2; + continue; + } + } + // Toggle the isComplexObject boolean when leaving regex + if (mixedJson[i] === '/' && !isEscaped && isComplexObject === 2) { + isComplexObject = 0; continue; } diff --git a/test/utils-test.js b/test/utils-test.js index f3cdfd43..ac403d6e 100644 --- a/test/utils-test.js +++ b/test/utils-test.js @@ -51,6 +51,12 @@ describe('utils.cutAfterJSON()', () => { '{"a": "\\"}1", "b": 1, "c": /[0-9]}}\\/}/}', ); }); + it('does not fail for division followed by a regex', () => { + assert.strictEqual( + utils.cutAfterJSON('{"a": "\\"}1", "b": 1, "c": [4/6, /[0-9]}}\\/}/]}abcd'), + '{"a": "\\"}1", "b": 1, "c": [4/6, /[0-9]}}\\/}/]}', + ); + }); it('works with nested', () => { assert.strictEqual( utils.cutAfterJSON('{"a": "\\"1", "b": 1, "c": {"test": 1}}abcd'), From f8f0680cb19e61a2a6e0a70b949236f3ea92e341 Mon Sep 17 00:00:00 2001 From: TimeForANinja Date: Fri, 26 Aug 2022 12:04:21 +0200 Subject: [PATCH 3/5] add support for single and backtick quoted strings to cutAfterJSON --- lib/info.js | 2 +- lib/sig.js | 6 ++--- lib/utils.js | 64 +++++++++++++++++++++++++++------------------- test/utils-test.js | 43 ++++++++++++++++++++----------- 4 files changed, 69 insertions(+), 46 deletions(-) diff --git a/lib/info.js b/lib/info.js index 5ddd8c23..34b88e93 100644 --- a/lib/info.js +++ b/lib/info.js @@ -265,7 +265,7 @@ const findJSON = (source, varName, body, left, right, prependJSON) => { if (!jsonStr) { throw Error(`Could not find ${varName} in ${source}`); } - return parseJSON(source, varName, utils.cutAfterJSON(`${prependJSON}${jsonStr}`)); + return parseJSON(source, varName, utils.cutAfterJS(`${prependJSON}${jsonStr}`)); }; diff --git a/lib/sig.js b/lib/sig.js index d7454268..1eb28b45 100644 --- a/lib/sig.js +++ b/lib/sig.js @@ -39,7 +39,7 @@ exports.extractFunctions = body => { const ndx = body.indexOf(functionStart); if (ndx < 0) return ''; const subBody = body.slice(ndx + functionStart.length - 1); - return `var ${functionName}=${utils.cutAfterJSON(subBody)}`; + return `var ${functionName}=${utils.cutAfterJS(subBody)}`; }; const extractDecipher = () => { const functionName = utils.between(body, `a.set("alr","yes");c&&(c=`, `(decodeURIC`); @@ -48,7 +48,7 @@ exports.extractFunctions = body => { const ndx = body.indexOf(functionStart); if (ndx >= 0) { const subBody = body.slice(ndx + functionStart.length); - let functionBody = `var ${functionStart}${utils.cutAfterJSON(subBody)}`; + let functionBody = `var ${functionStart}${utils.cutAfterJS(subBody)}`; functionBody = `${extractManipulations(functionBody)};${functionBody};${functionName}(sig);`; functions.push(functionBody); } @@ -62,7 +62,7 @@ exports.extractFunctions = body => { const ndx = body.indexOf(functionStart); if (ndx >= 0) { const subBody = body.slice(ndx + functionStart.length); - const functionBody = `var ${functionStart}${utils.cutAfterJSON(subBody)};${functionName}(ncode);`; + const functionBody = `var ${functionStart}${utils.cutAfterJS(subBody)};${functionName}(ncode);`; functions.push(functionBody); } } diff --git a/lib/utils.js b/lib/utils.js index 349780d2..d24d477c 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -48,14 +48,29 @@ exports.parseAbbreviatedNumber = string => { return null; }; +/** + * Escape sequences for cutAfterJS + * @param {string} start + * @param {string} end + * @param {undefined|Regex} startPrefix + */ +const ESCAPING_SEQUENZES = [ + // Strings + { start: '"', end: '"' }, + { start: "'", end: "'" }, + { start: '`', end: '`' }, + // RegeEx + { start: '/', end: '/', startPrefix: /(^|[[{:;,])\s+$/ }, +]; /** - * Match begin and end braces of input JSON, return only json + * Match begin and end braces of input JS, return only JS * * @param {string} mixedJson * @returns {string} */ -exports.cutAfterJSON = mixedJson => { +exports.cutAfterJS = mixedJson => { + // Define the general open and closing tag let open, close; if (mixedJson[0] === '[') { open = '['; @@ -69,11 +84,8 @@ exports.cutAfterJSON = mixedJson => { throw new Error(`Can't cut unsupported JSON (need to begin with [ or { ) but got: ${mixedJson[0]}`); } - // States if the loop is currently inside a complex js object - // 0 = no - // 1 = string - // 2 = regex - let isComplexObject = 0; + // States if the loop is currently inside an escaped js object + let isEscapedObject = null; // States if the current character is treated as escaped or not let isEscaped = false; @@ -82,35 +94,33 @@ exports.cutAfterJSON = mixedJson => { let counter = 0; let i; + // Go through all characters from the start for (i = 0; i < mixedJson.length; i++) { - // Toggle the isComplexObject boolean when leaving/entering string - if (mixedJson[i] === '"' && !isEscaped && (isComplexObject === 0 || isComplexObject === 1)) { - isComplexObject = isComplexObject === 0 ? 1 : 0; + // End of current escaped object + if (!isEscaped && isEscapedObject !== null && mixedJson[i] === isEscapedObject.end) { + isEscapedObject = null; continue; - } - - // Toggle the isComplexObject boolean when entering regex - if (mixedJson[i] === '/' && !isEscaped && isComplexObject === 0) { - let j = i - 1; - // Spool to last non-whitespace character - for (; mixedJson[j].match(/\s/); j--); - // Check if the preceding character signaled the start of a block - if (['[', '{', ',', ':'].includes(mixedJson[j])) { - isComplexObject = 2; + // Might be the start of a new escaped object + } else if (!isEscaped && isEscapedObject === null) { + for (const escaped of ESCAPING_SEQUENZES) { + if (mixedJson[i] !== escaped.start) continue; + // Test startPrefix against last 10 characters + if (!escaped.startPrefix || mixedJson.substring(i - 10, i).match(escaped.startPrefix)) { + isEscapedObject = escaped; + break; + } + } + // Continue if we found a new escaped object + if (isEscapedObject !== null) { continue; } } - // Toggle the isComplexObject boolean when leaving regex - if (mixedJson[i] === '/' && !isEscaped && isComplexObject === 2) { - isComplexObject = 0; - continue; - } // Toggle the isEscaped boolean for every backslash // Reset for every regular character isEscaped = mixedJson[i] === '\\' && !isEscaped; - if (isComplexObject > 0) continue; + if (isEscapedObject !== null) continue; if (mixedJson[i] === open) { counter++; @@ -121,7 +131,7 @@ exports.cutAfterJSON = mixedJson => { // All brackets have been closed, thus end of JSON is reached if (counter === 0) { // Return the cut JSON - return mixedJson.substr(0, i + 1); + return mixedJson.substring(0, i + 1); } } diff --git a/test/utils-test.js b/test/utils-test.js index ac403d6e..1b488000 100644 --- a/test/utils-test.js +++ b/test/utils-test.js @@ -32,69 +32,82 @@ describe('utils.between()', () => { }); -describe('utils.cutAfterJSON()', () => { +describe('utils.cutAfterJS()', () => { it('Works with simple JSON', () => { - assert.strictEqual(utils.cutAfterJSON('{"a": 1, "b": 1}'), '{"a": 1, "b": 1}'); + assert.strictEqual(utils.cutAfterJS('{"a": 1, "b": 1}'), '{"a": 1, "b": 1}'); }); it('Cut extra characters after JSON', () => { - assert.strictEqual(utils.cutAfterJSON('{"a": 1, "b": 1}abcd'), '{"a": 1, "b": 1}'); + assert.strictEqual(utils.cutAfterJS('{"a": 1, "b": 1}abcd'), '{"a": 1, "b": 1}'); + }); + it('Tolerant to double-quoted string constants', () => { + assert.strictEqual(utils.cutAfterJS('{"a": "}1", "b": 1}abcd'), '{"a": "}1", "b": 1}'); + }); + it('Tolerant to single-quoted string constants', () => { + assert.strictEqual(utils.cutAfterJS(`{"a": '}1', "b": 1}abcd`), `{"a": '}1', "b": 1}`); + }); + it('Tolerant to back-tick-quoted string constants', () => { + assert.strictEqual(utils.cutAfterJS('{"a": `}1`, "b": 1}abcd'), '{"a": `}1`, "b": 1}'); }); it('Tolerant to string constants', () => { - assert.strictEqual(utils.cutAfterJSON('{"a": "}1", "b": 1}abcd'), '{"a": "}1", "b": 1}'); + assert.strictEqual(utils.cutAfterJS('{"a": "}1", "b": 1}abcd'), '{"a": "}1", "b": 1}'); }); it('Tolerant to string with escaped quoting', () => { - assert.strictEqual(utils.cutAfterJSON('{"a": "\\"}1", "b": 1}abcd'), '{"a": "\\"}1", "b": 1}'); + assert.strictEqual(utils.cutAfterJS('{"a": "\\"}1", "b": 1}abcd'), '{"a": "\\"}1", "b": 1}'); }); it('Tolerant to string with regexes', () => { assert.strictEqual( - utils.cutAfterJSON('{"a": "\\"}1", "b": 1, "c": /[0-9]}}\\/}/}abcd'), + utils.cutAfterJS('{"a": "\\"}1", "b": 1, "c": /[0-9]}}\\/}/}abcd'), '{"a": "\\"}1", "b": 1, "c": /[0-9]}}\\/}/}', ); }); it('does not fail for division followed by a regex', () => { assert.strictEqual( - utils.cutAfterJSON('{"a": "\\"}1", "b": 1, "c": [4/6, /[0-9]}}\\/}/]}abcd'), + utils.cutAfterJS('{"a": "\\"}1", "b": 1, "c": [4/6, /[0-9]}}\\/}/]}abcd', true), '{"a": "\\"}1", "b": 1, "c": [4/6, /[0-9]}}\\/}/]}', ); }); - it('works with nested', () => { + it('works with nested objects', () => { assert.strictEqual( - utils.cutAfterJSON('{"a": "\\"1", "b": 1, "c": {"test": 1}}abcd'), + utils.cutAfterJS('{"a": "\\"1", "b": 1, "c": {"test": 1}}abcd'), '{"a": "\\"1", "b": 1, "c": {"test": 1}}', ); }); + it('works with try/catch', () => { + let testStr = '{"a": "\\"1", "b": 1, "c": () => { try { /* do sth */ } catch (e) { a = [2+3] }; return 5}}'; + assert.strictEqual(utils.cutAfterJS(`${testStr}abcd`), testStr); + }); it('Works with utf', () => { assert.strictEqual( - utils.cutAfterJSON('{"a": "\\"фыва", "b": 1, "c": {"test": 1}}abcd'), + utils.cutAfterJS('{"a": "\\"фыва", "b": 1, "c": {"test": 1}}abcd'), '{"a": "\\"фыва", "b": 1, "c": {"test": 1}}', ); }); it('Works with \\\\ in string', () => { assert.strictEqual( - utils.cutAfterJSON('{"a": "\\\\фыва", "b": 1, "c": {"test": 1}}abcd'), + utils.cutAfterJS('{"a": "\\\\фыва", "b": 1, "c": {"test": 1}}abcd'), '{"a": "\\\\фыва", "b": 1, "c": {"test": 1}}', ); }); it('Works with \\\\ towards the end of a string', () => { assert.strictEqual( - utils.cutAfterJSON('{"text": "\\\\"};'), + utils.cutAfterJS('{"text": "\\\\"};'), '{"text": "\\\\"}', ); }); it('Works with [ as start', () => { assert.strictEqual( - utils.cutAfterJSON('[{"a": 1}, {"b": 2}]abcd'), + utils.cutAfterJS('[{"a": 1}, {"b": 2}]abcd'), '[{"a": 1}, {"b": 2}]', ); }); it('Returns an error when not beginning with [ or {', () => { assert.throws(() => { - utils.cutAfterJSON('abcd]}'); + utils.cutAfterJS('abcd]}'); }, /Can't cut unsupported JSON \(need to begin with \[ or { \) but got: ./); }); it('Returns an error when missing closing bracket', () => { assert.throws(() => { - utils.cutAfterJSON('{"a": 1,{ "b": 1}'); + utils.cutAfterJS('{"a": 1,{ "b": 1}'); }, /Can't cut unsupported JSON \(no matching closing bracket found\)/); }); }); From 5139e4565154178318bced89fc37f78b0bf77a50 Mon Sep 17 00:00:00 2001 From: TimeForANinja Date: Fri, 26 Aug 2022 12:08:54 +0200 Subject: [PATCH 4/5] yet another unit-test --- test/utils-test.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/utils-test.js b/test/utils-test.js index 1b488000..a00c4cfd 100644 --- a/test/utils-test.js +++ b/test/utils-test.js @@ -45,6 +45,10 @@ describe('utils.cutAfterJS()', () => { it('Tolerant to single-quoted string constants', () => { assert.strictEqual(utils.cutAfterJS(`{"a": '}1', "b": 1}abcd`), `{"a": '}1', "b": 1}`); }); + it('Tolerant to complex single-quoted string constants', () => { + const str = "[-1816574795, '\",;/[;', function asdf() { a = 2/3; return a;}]"; + assert.strictEqual(utils.cutAfterJS(`${str}abcd`), str); + }); it('Tolerant to back-tick-quoted string constants', () => { assert.strictEqual(utils.cutAfterJS('{"a": `}1`, "b": 1}abcd'), '{"a": `}1`, "b": 1}'); }); From 4defe8bd40364d213832891ee602af0509c25a08 Mon Sep 17 00:00:00 2001 From: TimeForANinja Date: Fri, 26 Aug 2022 12:13:42 +0200 Subject: [PATCH 5/5] update comments --- lib/utils.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index d24d477c..5ba61ae8 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -50,9 +50,9 @@ exports.parseAbbreviatedNumber = string => { /** * Escape sequences for cutAfterJS - * @param {string} start - * @param {string} end - * @param {undefined|Regex} startPrefix + * @param {string} start the character string the escape sequence + * @param {string} end the character string to stop the escape seequence + * @param {undefined|Regex} startPrefix a regex to check against the preceding 10 characters */ const ESCAPING_SEQUENZES = [ // Strings