Skip to content

Commit

Permalink
DevTools encoding supports multibyte characters (e.g. "🟩") (facebook#…
Browse files Browse the repository at this point in the history
…22424)

Changes our text encoding approach to properly support multibyte characters following this algorithm. Based on benchmarking, this new approach is roughly equivalent in terms of performance (sometimes slightly faster, sometimes slightly slower).

I also considered using TextEncoder/TextDecoder for this, but it was much slower (~85%).
  • Loading branch information
Brian Vaughn authored and zhengjitf committed Apr 15, 2022
1 parent fd72bed commit 891c7af
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 23 deletions.
13 changes: 13 additions & 0 deletions packages/react-devtools-shared/src/__tests__/store-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,19 @@ describe('Store', () => {
`);
});

it('should handle multibyte character strings', () => {
const Component = () => null;
Component.displayName = '🟩💜🔵';

const container = document.createElement('div');

act(() => legacyRender(<Component />, container));
expect(store).toMatchInlineSnapshot(`
[root]
<🟩💜🔵>
`);
});

describe('collapseNodesByDefault:false', () => {
beforeEach(() => {
store.collapseNodesByDefault = false;
Expand Down
61 changes: 41 additions & 20 deletions packages/react-devtools-shared/src/backend/renderer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1514,11 +1514,16 @@ export function attach(

type OperationsArray = Array<number>;

type StringTableEntry = {|
encodedString: Array<number>,
id: number,
|};

const pendingOperations: OperationsArray = [];
const pendingRealUnmountedIDs: Array<number> = [];
const pendingSimulatedUnmountedIDs: Array<number> = [];
let pendingOperationsQueue: Array<OperationsArray> | null = [];
const pendingStringTable: Map<string, number> = new Map();
const pendingStringTable: Map<string, StringTableEntry> = new Map();
let pendingStringTableLength: number = 0;
let pendingUnmountedRootID: number | null = null;

Expand Down Expand Up @@ -1736,13 +1741,19 @@ export function attach(
// Now fill in the string table.
// [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
operations[i++] = pendingStringTableLength;
pendingStringTable.forEach((value, key) => {
operations[i++] = key.length;
const encodedKey = utfEncodeString(key);
for (let j = 0; j < encodedKey.length; j++) {
operations[i + j] = encodedKey[j];
pendingStringTable.forEach((entry, stringKey) => {
const encodedString = entry.encodedString;

// Don't use the string length.
// It won't work for multibyte characters (like emoji).
const length = encodedString.length;

operations[i++] = length;
for (let j = 0; j < length; j++) {
operations[i + j] = encodedString[j];
}
i += key.length;

i += length;
});

if (numUnmountIDs > 0) {
Expand Down Expand Up @@ -1789,21 +1800,31 @@ export function attach(
pendingStringTableLength = 0;
}

function getStringID(str: string | null): number {
if (str === null) {
function getStringID(string: string | null): number {
if (string === null) {
return 0;
}
const existingID = pendingStringTable.get(str);
if (existingID !== undefined) {
return existingID;
}
const stringID = pendingStringTable.size + 1;
pendingStringTable.set(str, stringID);
// The string table total length needs to account
// both for the string length, and for the array item
// that contains the length itself. Hence + 1.
pendingStringTableLength += str.length + 1;
return stringID;
const existingEntry = pendingStringTable.get(string);
if (existingEntry !== undefined) {
return existingEntry.id;
}

const id = pendingStringTable.size + 1;
const encodedString = utfEncodeString(string);

pendingStringTable.set(string, {
encodedString,
id,
});

// The string table total length needs to account both for the string length,
// and for the array item that contains the length itself.
//
// Don't use string length for this table.
// It won't work for multibyte characters (like emoji).
pendingStringTableLength += encodedString.length + 1;

return id;
}

function recordMount(fiber: Fiber, parentFiber: Fiber | null) {
Expand Down
26 changes: 23 additions & 3 deletions packages/react-devtools-shared/src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,37 @@ export function utfDecodeString(array: Array<number>): string {
return string;
}

function surrogatePairToCodePoint(
charCode1: number,
charCode2: number,
): number {
return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
}

// Credit for this encoding approach goes to Tim Down:
// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
export function utfEncodeString(string: string): Array<number> {
const cached = encodedStringCache.get(string);
if (cached !== undefined) {
return cached;
}

const encoded = new Array(string.length);
for (let i = 0; i < string.length; i++) {
encoded[i] = string.codePointAt(i);
const encoded = [];
let i = 0;
let charCode;
while (i < string.length) {
charCode = string.charCodeAt(i);
// Handle multibyte unicode characters (like emoji).
if ((charCode & 0xf800) === 0xd800) {
encoded.push(surrogatePairToCodePoint(charCode, string.charCodeAt(++i)));
} else {
encoded.push(charCode);
}
++i;
}

encodedStringCache.set(string, encoded);

return encoded;
}

Expand Down

0 comments on commit 891c7af

Please sign in to comment.