-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.js
113 lines (103 loc) · 3.93 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
require('@tensorflow/tfjs-node');
const toxicity = require('@tensorflow-models/toxicity');
const parser = require('fast-xml-parser');
const language = require('@google-cloud/language');
const client = new language.LanguageServiceClient();
module.exports = {
parseXML: async function parseXML(f) {
if (parser.validate(f) === true) {
const options = {
attributeNamePrefix: "",
attrNodeName: false, //default is 'false'
textNodeName: "",
ignoreAttributes: false,
ignoreNameSpace: false,
allowBooleanAttributes: false,
parseNodeValue: false,
parseAttributeValue: true,
trimValues: true,
cdataTagName: false, //default is 'false'
cdataPositionChar: "\\c",
parseTrueNumberOnly: false,
arrayMode: false, //"strict"
stopNodes: []
};
const jsonObj = parser.parse(f, options);
const messages = jsonObj.smses.sms;
let jsonArr = [];
messages.forEach(messageMeta => {
let body = messageMeta.body.toString() || ''
body = body.replace(/(\?|\.|&#[0-9]+;|\!)/gm, '')
// body = body.replace(/\./gm,, '');
// body = body.replace(/\!/g, '');
// body = body.replace(/\&\#.*;/g, '');
// body = body.replace(/\?/g, '');
if (body && body != '') {
let date = messageMeta.date
const type = messageMeta.type - 1
while (jsonArr.length <= type) {
jsonArr.push([])
}
let senderArr = jsonArr[type]
let messageData = {
body: body,
date: date.toString()
}
senderArr.push(messageData)
}
});
return jsonArr;
}
},
addSentimentGCP: async (parsedXML) => {
const sentencesArray = getSentencesArray(parsedXML)
const combinedSentences = sentencesArray.join(". ") + "."
const [result] = await client.analyzeSentiment({
document: {
content: combinedSentences,
type: 'PLAIN_TEXT'
}
});
let sentimentsObj = {}
result.sentences.forEach((sentence) => {
// console.log("Sentence: ", sentence.text.content.replace(". ", ""))
sentimentsObj[sentence.text.content.replace(".", "")] = sentence.sentiment.score;
})
parsedXML.forEach((userEntry) => {
userEntry.forEach((textObject) => {
textObject.score = sentimentsObj[textObject.body]
if (textObject.score == undefined) {
console.log("error finding: ", textObject.body)
}
})
})
},
addSentimentTFJS: async (parsedXML) => {
const sentencesArray = getSentencesArray(parsedXML)
//identity_attack, insult, obscene, severe_toxicity, sexual_explicit, threat, toxicity,
const model = await toxicity.load(0.5, ['toxicity']);
const [predictions] = await model.classify(sentencesArray)
const sentiments = predictions.results.map((e) => {
return e.probabilities[1]
})
pushSentiments(parsedXML, sentiments)
}
}
const getSentencesArray = (parsedXML) => {
const sentencesArray = []
parsedXML.forEach((userEntry) => {
userEntry.forEach((textObject) => {
sentencesArray.push(textObject.body)
})
})
return sentencesArray;
}
const pushSentiments = (parsedXML, sentiments) => {
let i = 0;
parsedXML.forEach((userEntry) => {
userEntry.forEach((textObject) => {
textObject.score = sentiments[i]
i++;
})
})
}