-
Notifications
You must be signed in to change notification settings - Fork 0
/
allTimeCount.js
111 lines (100 loc) · 2.06 KB
/
allTimeCount.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/* eslint-disable no-console */
const path = require('path')
const {
readMeta,
wordCountMap,
wordCountMapToArray
} = require('./util')
const maxWords = 15
const presidents = ['allende', 'bachelet', 'fernandez', 'kirchner', 'macri', 'piñera']
const ignore = new Set([
'país',
'chile',
'pais',
'año',
'personas',
'gobierno',
'año',
'importante',
'quiero',
'salud',
'mil',
'region',
'argentina',
'pais',
'argentinos',
'año',
'quiero',
'mundo',
'millones',
'años',
'importante',
'ciente',
'republica',
'seguir',
'gracias',
'materia',
'nacional',
'argentina',
'argentinos',
'pais',
'gracias',
'señor',
'señores',
'importante',
'argentino',
'presidente',
'seguir',
'argentina',
'argentinos',
'pais',
'gaños',
'aca',
'juntos',
'gracias',
'importante',
'camino',
'quiero',
'gobierno',
'realmente',
'pais',
'chile',
'año',
'chilenos',
'quiero',
'años',
'mil',
'forma'
])
function main() {
const encabezado = ['presidente']
for (let i = 1; i <= maxWords; i++) {
encabezado.push('palabra ' + i)
encabezado.push('cantidad palabra ' + i)
}
console.log(encabezado.join('\t'))
presidents.forEach(p => printAlltimeCount(p))
}
function printAlltimeCount(president) {
const processed_text_parent_dir = process.argv[2]
const processed_text_dir = path.join(processed_text_parent_dir, 'processed_text_' + president)
const meta = readMeta(processed_text_dir)
const fullMap = {}
meta.forEach(discurso => {
const map = wordCountMap(discurso.filePath)
Object.keys(map).forEach(k => {
if (!fullMap[k]) {
fullMap[k] = map[k]
} else {
fullMap[k] += map[k]
}
})
})
const wordsCount = wordCountMapToArray(fullMap).filter(w => !ignore.has(w.word)).slice(0, maxWords)
for (let i = wordsCount.length + 1; i <= maxWords; i++) {
wordsCount.push({word: 'NULL', count: 0})
}
const formattedCount = wordsCount.map(word => word.word + '\t' + word.count).join('\t')
console.log(president + '\t' + formattedCount)
}
main()