Skip to content

Commit

Permalink
+ better support for surrogate detection while dehushing, now support…
Browse files Browse the repository at this point in the history
…ing properly sanitizing text with unicode and flag emojis
  • Loading branch information
Livshitz committed Jan 17, 2024
1 parent 644fcf2 commit aa1b9bc
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 19 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "husher",
"version": "0.1.5",
"version": "0.2.0",
"main": "./build/Husher.js",
"license": "MIT",
"author": "feedox",
Expand Down
40 changes: 26 additions & 14 deletions src/Husher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,53 @@ export class Husher {
}

public setSeparator() {
this.separator = String.fromCharCode(this.options.sepCode + this.options.offset + this.options.lowSurrogateOffset);
this.separator = String.fromCharCode(this.options.sepCode + this.options.lowSMPA + this.options.highSurrogateOffset);
}

public hush(clearTest: string) {
let ret = this.separator;
ret += clearTest.split('').map(i => {
const tmp = i.charCodeAt(0) + this.options.offset + this.options.lowSurrogateOffset;
const tmp = i.charCodeAt(0) + this.options.lowSMPA + this.options.highSurrogateOffset;
return String.fromCharCode(tmp);
}).join(this.separator);

return ret;
}

public dehush(encodedText: string, skipSeparator = false) {
const charCodes = encodedText.split('').map(i => i.charCodeAt(0));
const ret = charCodes.map(x => {
let c = x - this.options.offset - this.options.lowSurrogateOffset;
if (x < this.options.highSurrogateBase) c = x; // don't offset normal chars
const ret = String.fromCharCode(c);
if (skipSeparator && ret.charCodeAt(0) == this.options.sepCode) return '';
return ret;
}); //.join('');
const chars = Array.from(encodedText);

const ret = chars.map(char => {
let part = char.charCodeAt(0);
if (part >= this.options.lowSurrogateBase) { // has surrogate in SMPA
const payload = char.charCodeAt(1);
const deciphered = payload - this.options.highSurrogateOffset;
if (skipSeparator) char = String.fromCharCode(deciphered);
else char = String.fromCharCode(this.options.sepCode, deciphered);
}

if (skipSeparator && part == this.options.sepCode) return '';
return char;
});
return ret;
}

public sanitize(encodedText: string) {
return this.dehush(encodedText, true).join('');
}

private isSMPA(char) {
const codePoint = char.codePointAt(0);
return codePoint >= this.options.lowSMPA && codePoint <= this.options.highSMPA;
}
}

export class ModuleOptions {
highSurrogateBase = 0xDB40;
lowSurrogateOffset = 0xDC00; // 56320
offset = 0xE0000; // 917504
sepCode = 0xff40; //65344
lowSurrogateBase = 0xDB40; // 56128
highSurrogateOffset = 0xDC00; // 56320
lowSMPA = 0xE0000; // 917504
highSMPA = 0xE0FFF; // 921599
sepCode = 0xff40; // 65344
}

export const husher = new Husher();
24 changes: 20 additions & 4 deletions tests/Husher.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ test('husher.hush', async () => {

test('husher.dehush', async () => {
const input = `󠁨󠁥󠁬󠁬󠁯󠀠󠁷󠁯󠁲󠁬󠁤`;
const expected = '`,h,`,e,`,l,`,l,`,o,`, ,`,w,`,o,`,r,`,l,`,d';
const expected = '`h,`e,`l,`l,`o,` ,`w,`o,`r,`l,`d';
const output = husher.dehush(input).join();
expect(output.length).toEqual(43);
expect(output.length).toEqual(expected.length);
// output.split('').map(x=>x.charCodeAt(0))
expect(output).toEqual(expected);
});
Expand All @@ -22,7 +22,7 @@ test('husher.dehush - no separator', async () => {
const input = `󠁨󠁥󠁬󠁬󠁯󠀠󠁷󠁯󠁲󠁬󠁤`;
const expected = 'hello world';
const output = husher.dehush(input, true).join('');
expect(output.length).toEqual(11);
expect(output.length).toEqual(expected.length);
// output.split('').map(x=>x.charCodeAt(0))
expect(output).toEqual(expected);
});
Expand All @@ -31,7 +31,23 @@ test('husher.sanitize', async () => {
const input = `yo, 󠁨󠁥󠁬󠁬󠁯󠀠󠁷󠁯󠁲󠁬󠁤`;
const expected = 'yo, hello world';
const output = husher.sanitize(input);
expect(output.length).toEqual(15);
expect(output.length).toEqual(expected.length);
// output.split('').map(x=>x.charCodeAt(0))
expect(output).toEqual(expected);
});

test('husher.sanitize-flag', async () => {
const inputPoisoned = `🇮🇱󠁨󠁥󠁬󠁬󠁯`;
const expected = '🇮🇱hello';
const output = husher.sanitize(inputPoisoned);
expect(output.length).toEqual(expected.length);
expect(output).toEqual(expected);
});

test('husher.sanitize-mix', async () => {
const inputPoisoned = `hello world from 🇮🇱 שלום לכם!󠁨󠁥󠁬󠁬󠁯`;
const expected = 'hello world from 🇮🇱 שלום לכם!hello';
const output = husher.sanitize(inputPoisoned);
expect(output.length).toEqual(expected.length);
expect(output).toEqual(expected);
});

0 comments on commit aa1b9bc

Please sign in to comment.