Skip to content

Commit

Permalink
feat(NODE-5909): optimize writing basic latin strings
Browse files Browse the repository at this point in the history
  • Loading branch information
nbbeeken committed Feb 7, 2024
1 parent 3242587 commit a5300ad
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 17 deletions.
39 changes: 39 additions & 0 deletions src/utils/latin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,42 @@ export function tryLatin(uint8array: Uint8Array, start: number, end: number): st

return String.fromCharCode(...latinBytes);
}

/**
* This function is an optimization for writing small basic latin strings.
* @internal
* @remarks
* ### Important characteristics:
* - If the string length is 0 return 0, do not perform any work
* - If a string is longer than 25 code units return null
* - If any code unit exceeds 128 this function returns null
*
* @param destination - The uint8array to serialize the string to
* @param source - The string to turn into UTF-8 bytes if it fits in the basic latin range
* @param offset - The position in the destination to begin writing bytes to
* @returns the number of bytes written to destination if all code units are below 128, otherwise null
*/
export function tryWriteLatin(
destination: Uint8Array,
source: string,
offset: number
): number | null {
if (source.length === 0) return 0;

if (source.length > 25) return null;

if (destination.length - offset < source.length) return null;

for (
let charOffset = 0, destinationOffset = offset;
charOffset < source.length;
charOffset++, destinationOffset++
) {
const char = source.charCodeAt(charOffset);
if (char > 127) return null;

destination[destinationOffset] = char;
}

return source.length;
}
7 changes: 6 additions & 1 deletion src/utils/node_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { BSONError } from '../error';
import { validateUtf8 } from '../validate_utf8';
import { tryLatin } from './latin';
import { tryLatin, tryWriteLatin } from './latin';

type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
type NodeJsBuffer = ArrayBufferView &
Expand Down Expand Up @@ -149,6 +149,11 @@ export const nodeJsByteUtils = {
},

encodeUTF8Into(buffer: Uint8Array, source: string, byteOffset: number): number {
const latinBytesWritten = tryWriteLatin(buffer, source, byteOffset);
if (latinBytesWritten != null) {
return latinBytesWritten;
}

return nodeJsByteUtils.toLocalBufferType(buffer).write(source, byteOffset, undefined, 'utf8');
},

Expand Down
32 changes: 17 additions & 15 deletions test/node/byte_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -365,33 +365,35 @@ const toISO88591Tests: ByteUtilTest<'toISO88591'>[] = [
}
}
];
const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [
{
name: 'should create buffer from utf8 input',
inputs: [Buffer.from('abc\u{1f913}', 'utf8').toString('utf8')],
name: 'should insert utf8 bytes into buffer',
inputs: [Buffer.alloc(7), 'abc\u{1f913}', 0],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8'));
expect(output).to.equal(7);
expect(this.inputs[0]).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8'));
}
},
{
name: 'should return empty buffer for empty string input',
inputs: [''],
name: 'should return 0 and not modify input buffer',
inputs: [Uint8Array.from([2, 2]), '', 0],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.have.property('byteLength', 0);
expect(output).to.equal(0);
expect(this.inputs[0]).to.deep.equal(Uint8Array.from([2, 2]));
}
},
{
name: 'should return bytes with replacement character if string is not encodable',
inputs: ['\u{1f913}'.slice(0, 1)],
name: 'should insert replacement character bytes if string is not encodable',
inputs: [Uint8Array.from({ length: 10 }, () => 2), '\u{1f913}'.slice(0, 1), 2],
expectation({ output, error }) {
expect(error).to.be.null;
expect(output).to.have.property('byteLength', 3);
expect(output).to.have.property('0', 0xef);
expect(output).to.have.property('1', 0xbf);
expect(output).to.have.property('2', 0xbd);
const backToString = Buffer.from(output!).toString('utf8');
expect(output).to.equal(3);
expect(this.inputs[0]).to.have.property('2', 0xef);
expect(this.inputs[0]).to.have.property('3', 0xbf);
expect(this.inputs[0]).to.have.property('4', 0xbd);
const backToString = Buffer.from(this.inputs[0].subarray(2, 5)).toString('utf8');
const replacementCharacter = '\u{fffd}';
expect(backToString).to.equal(replacementCharacter);
}
Expand Down Expand Up @@ -507,7 +509,7 @@ const table = new Map<keyof ByteUtils, ByteUtilTest<keyof ByteUtils>[]>([
['toHex', toHexTests],
['fromISO88591', fromISO88591Tests],
['toISO88591', toISO88591Tests],
['fromUTF8', fromUTF8Tests],
['encodeUTF8Into', fromUTF8Tests],
['toUTF8', toUTF8Tests],
['utf8ByteLength', utf8ByteLengthTests],
['randomBytes', randomBytesTests]
Expand Down
61 changes: 60 additions & 1 deletion test/node/utils/latin.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { expect } from 'chai';
import { tryLatin } from '../../../src/utils/latin';
import { tryLatin, tryWriteLatin } from '../../../src/utils/latin';
import * as sinon from 'sinon';

describe('tryLatin()', () => {
Expand Down Expand Up @@ -116,3 +116,62 @@ describe('tryLatin()', () => {
});
});
});

describe('tryWriteLatin()', () => {
context('when given a string of length 0', () => {
it('returns 0 and does not modify the destination', () => {
const input = Uint8Array.from({ length: 10 }, () => 1);
expect(tryWriteLatin(input, '', 2)).to.equal(0);
expect(input).to.deep.equal(Uint8Array.from({ length: 10 }, () => 1));
});
});

context('when given a string with a length larger than the buffer', () => {
it('returns null', () => {
const input = Uint8Array.from({ length: 10 }, () => 1);
expect(tryWriteLatin(input, 'a'.repeat(11), 0)).to.be.null;
expect(tryWriteLatin(input, 'a'.repeat(13), 2)).to.be.null;
});
});

let charCodeAtSpy;

beforeEach(() => {
charCodeAtSpy = sinon.spy(String.prototype, 'charCodeAt');
});

afterEach(() => {
sinon.restore();
});

for (let stringLength = 1; stringLength <= 25; stringLength++) {
context(`when there is ${stringLength} bytes`, () => {
context('that exceed 127', () => {
it('returns null', () => {
expect(
tryWriteLatin(
new Uint8Array(stringLength * 3),
'a'.repeat(stringLength - 1) + '\x80',
0
)
).be.null;
});
});

it(`calls charCodeAt ${stringLength}`, () => {
tryWriteLatin(
new Uint8Array(stringLength * 3),
String.fromCharCode(127).repeat(stringLength),
stringLength
);
expect(charCodeAtSpy).to.have.callCount(stringLength);
});
});
}

context('when there is >25 characters', () => {
it('returns null', () => {
expect(tryWriteLatin(new Uint8Array(75), 'a'.repeat(26), 0)).be.null;
});
});
});

0 comments on commit a5300ad

Please sign in to comment.