-
Notifications
You must be signed in to change notification settings - Fork 2
/
anonymize.js
102 lines (79 loc) · 2.47 KB
/
anonymize.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
var fs = require('fs');
var DICTIONARIES = {};
var DATA_VAULT = {};
function removePersonalData(text, language) {
var rules = DICTIONARIES[language.toLowerCase()];
var data = {};
var keyCount =0;
for (var i = 0; rules && i < rules.length; ++i) {
var rule = rules[i];
if (rule.type == 'list') {
for (var j = 0; j < rule.data.length; ++j) {
var text2 = text.toLowerCase().replace(/[\ \.\,\?\;\!\)\(]/g, " ") + " ";
while (text2.indexOf(" " + rule.data[j] + " ") > -1) {
var start = text2.indexOf(" " + rule.data[j] + " ");
var len = rule.data[j].length;
var value = text.substring(start + 1, start + len + 1);
var key = rule.name + '_' + keyCount++;
data[key] = value;
text = text.substring(0, start + 1) + key + text.substring(start + len + 1);
text2 = text.toLowerCase().replace(/[\ \.\,\?\;\!\)\(]/g, " ") + " ";
}
}
}
if (rule.type == 'regexp') {
var matches = text.match(rule.data);
for (var j =0;matches && j < matches.length; ++j) {
var key = rule.name + '_' + keyCount++;
data[key] = matches[j];
text = text.replace(matches[j], key);
}
}
}
return {
text: text,
data : data
}
}
function restorePersonalData(text, data) {
for (var key in data) {
text = text.replace(key, data[key]);
}
return text;
}
function guid() {
function s4() {
return Math.floor((1 + Math.random()) * 0x10000)
.toString(16)
.substring(1);
}
return s4() + s4() + '-' + s4() + '-' + s4() + '-' +
s4() + '-' + s4() + s4() + s4();
}
module.exports = {
removePersonalData,
restorePersonalData
}
function laodDictionaries() {
var languages = fs.readdirSync(__dirname + "/dictionary");
for (var i = 0; i < languages.length; ++i) {
var dictionary = [];
var wordLists = fs.readdirSync(__dirname + "/dictionary/" +languages[i])
for (var j = 0; j < wordLists.length; ++j) {
var data = fs.readFileSync(__dirname + "/dictionary/" + languages[i] + "/" + wordLists[j], 'utf8');
var item = {
name : wordLists[j].split('.')[0],
type : wordLists[j].split('.')[1]
}
if (item.type == 'list') {
item.data = data.toLowerCase().split('\n');
}
if (item.type == 'regexp') {
item.data = new RegExp(data, 'ig');
}
dictionary.push(item);
}
DICTIONARIES[languages[i].toLowerCase()] = dictionary;
}
}
laodDictionaries();