From 9d5a5dfe59aa6f46e4d5d844564b065bdd037154 Mon Sep 17 00:00:00 2001 From: Aditi Khare <106987683+aditi-khare-mongoDB@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:11:08 -0400 Subject: [PATCH] feat(NODE-5648): add Long.fromStringStrict() (#675) --- src/int_32.ts | 7 +- src/long.ts | 143 ++++++++++++++++++++++++--- src/utils/string_utils.ts | 44 +++++++++ test/node/int_32_tests.js | 4 +- test/node/long.test.ts | 116 ++++++++++++++++++++++ test/node/release.test.ts | 1 + test/node/utils/string_utils.test.ts | 55 +++++++++++ 7 files changed, 353 insertions(+), 17 deletions(-) create mode 100644 src/utils/string_utils.ts create mode 100644 test/node/utils/string_utils.test.ts diff --git a/src/int_32.ts b/src/int_32.ts index f394f7af6..7c95027ce 100644 --- a/src/int_32.ts +++ b/src/int_32.ts @@ -3,6 +3,7 @@ import { BSON_INT32_MAX, BSON_INT32_MIN } from './constants'; import { BSONError } from './error'; import type { EJSONOptions } from './extended_json'; import { type InspectFn, defaultInspect } from './parser/utils'; +import { removeLeadingZerosAndExplicitPlus } from './utils/string_utils'; /** @public */ export interface Int32Extended { @@ -48,11 +49,7 @@ export class Int32 extends BSONValue { * @param value - the string we want to represent as an int32. */ static fromString(value: string): Int32 { - const cleanedValue = !/[^0]+/.test(value) - ? value.replace(/^0+/, '0') // all zeros case - : value[0] === '-' - ? value.replace(/^-0+/, '-') // negative number with leading zeros - : value.replace(/^\+?0+/, ''); // positive number with leading zeros + const cleanedValue = removeLeadingZerosAndExplicitPlus(value); const coercedValue = Number(value); diff --git a/src/long.ts b/src/long.ts index f05f71e6b..fc3c69460 100644 --- a/src/long.ts +++ b/src/long.ts @@ -3,6 +3,7 @@ import { BSONError } from './error'; import type { EJSONOptions } from './extended_json'; import { type InspectFn, defaultInspect } from './parser/utils'; import type { Timestamp } from './timestamp'; +import * as StringUtils from './utils/string_utils'; interface LongWASMHelpers { /** Gets the high bits of the last operation performed */ @@ -246,29 +247,24 @@ export class Long extends BSONValue { } /** + * @internal * Returns a Long representation of the given string, written using the specified radix. + * Throws an error if `throwsError` is set to true and any of the following conditions are true: + * - the string contains invalid characters for the given radix + * - the string contains whitespace * @param str - The textual representation of the Long * @param unsigned - Whether unsigned or not, defaults to signed * @param radix - The radix in which the text is written (2-36), defaults to 10 * @returns The corresponding Long value */ - static fromString(str: string, unsigned?: boolean, radix?: number): Long { + private static _fromString(str: string, unsigned: boolean, radix: number): Long { if (str.length === 0) throw new BSONError('empty string'); - if (str === 'NaN' || str === 'Infinity' || str === '+Infinity' || str === '-Infinity') - return Long.ZERO; - if (typeof unsigned === 'number') { - // For goog.math.long compatibility - (radix = unsigned), (unsigned = false); - } else { - unsigned = !!unsigned; - } - radix = radix || 10; if (radix < 2 || 36 < radix) throw new BSONError('radix'); let p; if ((p = str.indexOf('-')) > 0) throw new BSONError('interior hyphen'); else if (p === 0) { - return Long.fromString(str.substring(1), unsigned, radix).neg(); + return Long._fromString(str.substring(1), unsigned, radix).neg(); } // Do several (8) digits each time through the loop, so as to @@ -291,6 +287,131 @@ export class Long extends BSONValue { return result; } + /** + * Returns a signed Long representation of the given string, written using radix 10. + * Will throw an error if the given text is not exactly representable as a Long. + * Throws an error if any of the following conditions are true: + * - the string contains invalid characters for the radix 10 + * - the string contains whitespace + * - the value the string represents is too large or too small to be a Long + * Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero + * @param str - The textual representation of the Long + * @returns The corresponding Long value + */ + static fromStringStrict(str: string): Long; + /** + * Returns a Long representation of the given string, written using the radix 10. + * Will throw an error if the given parameters are not exactly representable as a Long. + * Throws an error if any of the following conditions are true: + * - the string contains invalid characters for the given radix + * - the string contains whitespace + * - the value the string represents is too large or too small to be a Long + * Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero + * @param str - The textual representation of the Long + * @param unsigned - Whether unsigned or not, defaults to signed + * @returns The corresponding Long value + */ + static fromStringStrict(str: string, unsigned?: boolean): Long; + /** + * Returns a signed Long representation of the given string, written using the specified radix. + * Will throw an error if the given parameters are not exactly representable as a Long. + * Throws an error if any of the following conditions are true: + * - the string contains invalid characters for the given radix + * - the string contains whitespace + * - the value the string represents is too large or too small to be a Long + * Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero + * @param str - The textual representation of the Long + * @param radix - The radix in which the text is written (2-36), defaults to 10 + * @returns The corresponding Long value + */ + static fromStringStrict(str: string, radix?: boolean): Long; + /** + * Returns a Long representation of the given string, written using the specified radix. + * Will throw an error if the given parameters are not exactly representable as a Long. + * Throws an error if any of the following conditions are true: + * - the string contains invalid characters for the given radix + * - the string contains whitespace + * - the value the string represents is too large or too small to be a Long + * Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero + * @param str - The textual representation of the Long + * @param unsigned - Whether unsigned or not, defaults to signed + * @param radix - The radix in which the text is written (2-36), defaults to 10 + * @returns The corresponding Long value + */ + static fromStringStrict(str: string, unsigned?: boolean, radix?: number): Long; + static fromStringStrict(str: string, unsignedOrRadix?: boolean | number, radix?: number): Long { + let unsigned = false; + if (typeof unsignedOrRadix === 'number') { + // For goog.math.long compatibility + (radix = unsignedOrRadix), (unsignedOrRadix = false); + } else { + unsigned = !!unsignedOrRadix; + } + radix ??= 10; + + if (str.trim() !== str) { + throw new BSONError(`Input: '${str}' contains leading and/or trailing whitespace`); + } + if (!StringUtils.validateStringCharacters(str, radix)) { + throw new BSONError(`Input: '${str}' contains invalid characters for radix: ${radix}`); + } + + // remove leading zeros (for later string comparison and to make math faster) + const cleanedStr = StringUtils.removeLeadingZerosAndExplicitPlus(str); + + // check roundtrip result + const result = Long._fromString(cleanedStr, unsigned, radix); + if (result.toString(radix).toLowerCase() !== cleanedStr.toLowerCase()) { + throw new BSONError( + `Input: ${str} is not representable as ${result.unsigned ? 'an unsigned' : 'a signed'} 64-bit Long ${radix != null ? `with radix: ${radix}` : ''}` + ); + } + return result; + } + + /** + * Returns a signed Long representation of the given string, written using radix 10. + * @param str - The textual representation of the Long + * @returns The corresponding Long value + */ + static fromString(str: string): Long; + /** + * Returns a signed Long representation of the given string, written using radix 10. + * @param str - The textual representation of the Long + * @param radix - The radix in which the text is written (2-36), defaults to 10 + * @returns The corresponding Long value + */ + static fromString(str: string, radix?: number): Long; + /** + * Returns a Long representation of the given string, written using radix 10. + * @param str - The textual representation of the Long + * @param unsigned - Whether unsigned or not, defaults to signed + * @returns The corresponding Long value + */ + static fromString(str: string, unsigned?: boolean): Long; + /** + * Returns a Long representation of the given string, written using the specified radix. + * @param str - The textual representation of the Long + * @param unsigned - Whether unsigned or not, defaults to signed + * @param radix - The radix in which the text is written (2-36), defaults to 10 + * @returns The corresponding Long value + */ + static fromString(str: string, unsigned?: boolean, radix?: number): Long; + static fromString(str: string, unsignedOrRadix?: boolean | number, radix?: number): Long { + let unsigned = false; + if (typeof unsignedOrRadix === 'number') { + // For goog.math.long compatibility + (radix = unsignedOrRadix), (unsignedOrRadix = false); + } else { + unsigned = !!unsignedOrRadix; + } + radix ??= 10; + if (str === 'NaN' || str === 'Infinity' || str === '+Infinity' || str === '-Infinity') { + return Long.ZERO; + } + return Long._fromString(str, unsigned, radix); + } + /** * Creates a Long from its byte representation. * @param bytes - Byte representation diff --git a/src/utils/string_utils.ts b/src/utils/string_utils.ts new file mode 100644 index 000000000..1ffb118e9 --- /dev/null +++ b/src/utils/string_utils.ts @@ -0,0 +1,44 @@ +/** + * @internal + * Removes leading zeros and explicit plus from textual representation of a number. + */ +export function removeLeadingZerosAndExplicitPlus(str: string): string { + if (str === '') { + return str; + } + + let startIndex = 0; + + const isNegative = str[startIndex] === '-'; + const isExplicitlyPositive = str[startIndex] === '+'; + + if (isExplicitlyPositive || isNegative) { + startIndex += 1; + } + + let foundInsignificantZero = false; + + for (; startIndex < str.length && str[startIndex] === '0'; ++startIndex) { + foundInsignificantZero = true; + } + + if (!foundInsignificantZero) { + return isExplicitlyPositive ? str.slice(1) : str; + } + + return `${isNegative ? '-' : ''}${str.length === startIndex ? '0' : str.slice(startIndex)}`; +} + +/** + * @internal + * Returns false for an string that contains invalid characters for its radix, else returns the original string. + * @param str - The textual representation of the Long + * @param radix - The radix in which the text is written (2-36), defaults to 10 + */ +export function validateStringCharacters(str: string, radix?: number): false | string { + radix = radix ?? 10; + const validCharacters = '0123456789abcdefghijklmnopqrstuvwxyz'.slice(0, radix); + // regex is case insensitive and checks that each character within the string is one of the validCharacters + const regex = new RegExp(`[^-+${validCharacters}]`, 'i'); + return regex.test(str) ? false : str; +} diff --git a/test/node/int_32_tests.js b/test/node/int_32_tests.js index 44f2b7440..8e3fab140 100644 --- a/test/node/int_32_tests.js +++ b/test/node/int_32_tests.js @@ -108,7 +108,9 @@ describe('Int32', function () { ['a string with zero with leading zeros', '000000', 0], ['a string with positive leading zeros', '000000867', 867], ['a string with explicity positive leading zeros', '+000000867', 867], - ['a string with negative leading zeros', '-00007', -7] + ['a string with negative leading zeros', '-00007', -7], + ['a string with explicit positive zeros', '+000000', 0], + ['a string explicit positive no leading zeros', '+32', 32] ]; const errorInputs = [ ['Int32.max + 1', '2147483648', 'larger than the maximum value for Int32'], diff --git a/test/node/long.test.ts b/test/node/long.test.ts index 75611a8a5..32bf7ebb9 100644 --- a/test/node/long.test.ts +++ b/test/node/long.test.ts @@ -163,4 +163,120 @@ describe('Long', function () { }); }); }); + + describe('static fromString()', function () { + const successInputs: [ + name: string, + input: string, + unsigned: boolean | undefined, + radix: number | undefined, + expectedStr?: string + ][] = [ + ['Infinity', 'Infinity', false, 34, '0'], + ['-Infinity', '-Infinity', false, 23, '0'], + ['+Infinity', '+Infinity', false, 12, '0'], + ['NaN', 'NaN', false, 16, '0'] + ]; + + for (const [testName, str, unsigned, radix, expectedStr] of successInputs) { + context(`when the input is ${testName}`, () => { + it(`should return a Long representation of the input`, () => { + expect(Long.fromString(str, unsigned, radix).toString(radix)).to.equal( + expectedStr ?? str.toLowerCase() + ); + }); + }); + } + }); + + describe('static fromStringStrict()', function () { + const successInputs: [ + name: string, + input: string, + unsigned: boolean | undefined, + radix: number | undefined, + expectedStr?: string + ][] = [ + ['basic no alphabet low radix', '1236', true, 8], + ['negative basic no alphabet low radix', '-1236', false, 8], + ['valid upper and lower case letters in string with radix > 10', 'eEe', true, 15], + ['hexadecimal letters', '126073efbcdADEF', true, 16], + ['negative hexadecimal letters', '-1267efbcdDEF', false, 16], + ['negative leading zeros', '-00000032', false, 15, '-32'], + ['leading zeros', '00000032', false, 15, '32'], + ['explicit positive leading zeros', '+00000032', false, 15, '32'], + ['max unsigned binary input', Long.MAX_UNSIGNED_VALUE.toString(2), true, 2], + ['max unsigned decimal input', Long.MAX_UNSIGNED_VALUE.toString(10), true, 10], + ['max unsigned hex input', Long.MAX_UNSIGNED_VALUE.toString(16), true, 16], + ['max signed binary input', Long.MAX_VALUE.toString(2), false, 2], + ['max signed decimal input', Long.MAX_VALUE.toString(10), false, 10], + ['max signed hex input', Long.MAX_VALUE.toString(16), false, 16], + ['min signed binary input', Long.MIN_VALUE.toString(2), false, 2], + ['min signed decimal input', Long.MIN_VALUE.toString(10), false, 10], + ['min signed hex input', Long.MIN_VALUE.toString(16), false, 16], + ['signed zeros', '+000000', false, 10, '0'], + ['unsigned zero', '0', true, 10], + ['explicit positive no leading zeros', '+32', true, 10, '32'], + // the following inputs are valid radix 36 inputs, but will not represent NaN or +/- Infinity + ['radix 36 Infinity', 'Infinity', false, 36], + ['radix 36 -Infinity', '-Infinity', false, 36], + ['radix 36 +Infinity', '+Infinity', false, 36, 'infinity'], + ['radix 36 NaN', 'NaN', false, 36], + ['overload no unsigned and no radix parameter', '-32', undefined, undefined], + ['overload no unsigned parameter', '-32', undefined, 12], + ['overload no radix parameter', '32', true, undefined] + ]; + + const failureInputs: [ + name: string, + input: string, + unsigned: boolean | undefined, + radix: number | undefined + ][] = [ + ['empty string', '', true, 2], + ['invalid numbers in binary string', '234', true, 2], + ['non a-z or numeric string', '~~', true, 36], + ['alphabet in radix < 10', 'a', true, 9], + ['radix does not allow all alphabet letters', 'eee', false, 14], + ['over max unsigned binary input', Long.MAX_UNSIGNED_VALUE.toString(2) + '1', true, 2], + ['over max unsigned decimal input', Long.MAX_UNSIGNED_VALUE.toString(10) + '1', true, 10], + ['over max unsigned hex input', Long.MAX_UNSIGNED_VALUE.toString(16) + '1', true, 16], + ['over max signed binary input', Long.MAX_VALUE.toString(2) + '1', false, 2], + ['over max signed decimal input', Long.MAX_VALUE.toString(10) + '1', false, 10], + ['over max signed hex input', Long.MAX_VALUE.toString(16) + '1', false, 16], + ['under min signed binary input', Long.MIN_VALUE.toString(2) + '1', false, 2], + ['under min signed decimal input', Long.MIN_VALUE.toString(10) + '1', false, 10], + ['under min signed hex input', Long.MIN_VALUE.toString(16) + '1', false, 16], + ['string with whitespace', ' 3503a ', false, 11], + ['negative zero unsigned', '-0', true, 9], + ['negative zero signed', '-0', false, 13], + ['radix 1', '12', false, 1], + ['negative radix', '12', false, -4], + ['radix over 36', '12', false, 37], + // the following inputs are invalid radix 16 inputs + // this is because of the characters, not because of the values they commonly represent + ['radix 10 Infinity', 'Infinity', false, 10], + ['radix 10 -Infinity', '-Infinity', false, 10], + ['radix 10 +Infinity', '+Infinity', false, 10], + ['radix 10 NaN', 'NaN', false, 10], + ['overload no radix parameter and invalid sign', '-32', true, undefined] + ]; + + for (const [testName, str, unsigned, radix, expectedStr] of successInputs) { + context(`when the input is ${testName}`, () => { + it(`should return a Long representation of the input`, () => { + expect(Long.fromStringStrict(str, unsigned, radix).toString(radix)).to.equal( + expectedStr ?? str.toLowerCase() + ); + }); + }); + } + for (const [testName, str, unsigned, radix] of failureInputs) { + context(`when the input is ${testName}`, () => { + it(`should throw BSONError`, () => { + expect(() => Long.fromStringStrict(str, unsigned, radix)).to.throw(BSONError); + }); + }); + } + }); }); diff --git a/test/node/release.test.ts b/test/node/release.test.ts index 756305b38..cc3aefad4 100644 --- a/test/node/release.test.ts +++ b/test/node/release.test.ts @@ -48,6 +48,7 @@ const REQUIRED_FILES = [ 'src/utils/byte_utils.ts', 'src/utils/node_byte_utils.ts', 'src/utils/number_utils.ts', + 'src/utils/string_utils.ts', 'src/utils/web_byte_utils.ts', 'src/utils/latin.ts', 'src/parse_utf8.ts', diff --git a/test/node/utils/string_utils.test.ts b/test/node/utils/string_utils.test.ts new file mode 100644 index 000000000..73b05c7aa --- /dev/null +++ b/test/node/utils/string_utils.test.ts @@ -0,0 +1,55 @@ +import { expect } from 'chai'; +import * as StringUtils from '../../../src/utils/string_utils'; + +describe('removeLeadingZerosAndExplicitPlus()', function () { + const inputs: [testName: string, str: string, expectedStr: string][] = [ + ['a string with zero with leading zeros', '000000', '0'], + ['a string with positive leading zeros', '000000867', '867'], + ['a string with explicity positive leading zeros', '+000000867', '867'], + ['a string with negative leading zeros', '-00007', '-7'], + ['a string with explicit positive zeros', '+000000', '0'], + ['a string explicit positive no leading zeros', '+32', '32'], + ['a string explicit positive no leading zeros and letters', '+ab00', 'ab00'] + ]; + + for (const [testName, str, expectedStr] of inputs) { + context(`when the input is ${testName}`, () => { + it(`should return a input string`, () => { + expect(StringUtils.removeLeadingZerosAndExplicitPlus(str)).to.equal(expectedStr); + }); + }); + } +}); + +describe('validateStringCharacters()', function () { + const successInputs: [testName: string, str: string, radix: number][] = [ + ['radix does allows given alphabet letter', 'eEe', 15], + ['empty string', '', 2], + ['all possible hexadecimal characters', '12efabc689873dADCDEF', 16], + ['leading zeros', '0000000004567e', 16], + ['explicit positive no leading zeros', '+32', 10] + ]; + + const failureInputs = [ + ['multiple decimal points', '..', 30], + ['non a-z or numeric string', '~~', 36], + ['alphabet in radix < 10', 'a', 4], + ['radix does not allow all alphabet letters', 'eee', 14] + ]; + + for (const [testName, str, radix] of successInputs) { + context(`when the input is ${testName}`, () => { + it(`should return a input string`, () => { + expect(StringUtils.validateStringCharacters(str, radix)).to.equal(str); + }); + }); + } + + for (const [testName, str, radix] of failureInputs) { + context(`when the input is ${testName}`, () => { + it(`should return false`, () => { + expect(StringUtils.validateStringCharacters(str, radix)).to.equal(false); + }); + }); + } +});