Skip to content

Commit

Permalink
Escaped the BOM by Uint8Array serializer
Browse files Browse the repository at this point in the history
  • Loading branch information
genki committed Feb 22, 2024
1 parent 070039e commit 6a9af99
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
31 changes: 25 additions & 6 deletions packages/qwik/src/core/util/string.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
'./string.unit.ts';
// pack bytes into valid UTF-16 string
//
// strategy:
Expand Down Expand Up @@ -58,6 +59,12 @@ export const packUint8Array = (bytes: Uint8Array) => {
code += String.fromCharCode(0xfffd, hi, lo);
surrogate = false; // reset surrogate
}
// escape the BOM
if (c === 0xfeff) {
// BOM
code += String.fromCharCode(0xfffd, 0xd801, 0xdc02);
continue;
}
// double the escape character
if (c === 0xfffd) {
code += String.fromCharCode(0xfffd);
Expand Down Expand Up @@ -99,21 +106,33 @@ export const unpackUint8Array = (code: string) => {
dbytes[j++] = c;
break; // break with escaped being true to adjust the length
}
if (c >= 0xd800 && c <= 0xdbff && escaped) {
if (escaped && c >= 0xd800 && c <= 0xdbff) {
escaped = false;
// faked high surrogate
if (c === 0xd800) {
// escaped low surrogate
i++; // skip the fake high surrogate
dbytes[j++] = code.charCodeAt(i); // save the low surrogate
} else if (c === 0xd801 && code.charCodeAt(i + 1) === 0xdc01) {
i++; // skip the fake low surrogate
dbytes[j++] = 0xd800; // save the escaped 0xD800
} else {
if (c === 0xd801) {
switch (code.charCodeAt(i + 1)) {
case 0xdc00: // this is the fake low surrogate
break;
case 0xdc01:
i++; // skip the fake low surrogate
dbytes[j++] = 0xd800; // save the escaped 0xD800
continue;
case 0xdc02:
i++; // skip the fake low surrogate
dbytes[j++] = 0xfeff; // save the escaped BOM
continue;
default:
continue;
}
}
// escaped high surrogate
dbytes[j++] = code.charCodeAt(i); // save the high surrogate
i++; // skip the fake low surrogate
}
escaped = false;
continue;
}
// normal codepoint
Expand Down
8 changes: 0 additions & 8 deletions packages/qwik/src/core/util/string.unit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ test('random pack/unpack of even array', () => {
for (let i = 0; i < 1000; i++) {
crypto.getRandomValues(a);
const packed = packUint8Array(a);
if (packed.includes('\ufeff')) {
// skip if the BOM is included because the TextEncoder removes it
continue;
}
const code = enc.encode(packed);
const decoded = dec.decode(code);
expect(unpackUint8Array(decoded)).toStrictEqual(a);
Expand All @@ -34,10 +30,6 @@ test('random pack/unpack of odd array', () => {
for (let i = 0; i < 1000; i++) {
crypto.getRandomValues(a);
const packed = packUint8Array(a);
if (packed.includes('\ufeff')) {
// skip if the BOM is included because the TextEncoder removes it
continue;
}
const code = enc.encode(packed);
const decoded = dec.decode(code);
expect(unpackUint8Array(decoded)).toStrictEqual(a);
Expand Down

0 comments on commit 6a9af99

Please sign in to comment.