This repository has been archived by the owner on Jul 8, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.js
132 lines (101 loc) · 3.32 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import fs from 'fs';
import jszip from 'jszip';
import { inspect } from 'util';
import { parseString } from 'xml2js';
import zlib from 'node:zlib';
import Database from './Database.js';
import MailReader from './MailReader.js';
const PROCESSED_DATA_FOLDER = 'processedData/';
const RAW_DATA_FOLDER = 'rawdata/';
import config from './config.js';
const db = new Database(config.postgres);
async function doWork() {
if(!fs.existsSync(RAW_DATA_FOLDER)) {
fs.mkdirSync(RAW_DATA_FOLDER);
}
/// GET MAILS
let mailReader;
const job = new Promise((resolve, _) => {
mailReader = new MailReader(config.mail.credentials, resolve, config.mail.directory);
});
await job;
//console.log('------------');
/*console.log(*/await mailReader.retrieveLastMail()/*);*/
mailReader.closeConnection();
console.log('Did retrieveLastMail !')
fs.rmSync(PROCESSED_DATA_FOLDER, { recursive: true, force: true });
fs.mkdirSync(PROCESSED_DATA_FOLDER);
/// Extract archives
for(const file of fs.readdirSync(RAW_DATA_FOLDER)) {
if(file.endsWith('.zip')) {
const filename = RAW_DATA_FOLDER + file;
const newFilename = PROCESSED_DATA_FOLDER + file;
console.log('Unzipping ' + file);
const fileContent = fs.readFileSync(filename);
const jszipInstance = new jszip();
const res = await jszipInstance.loadAsync(fileContent);
fs.mkdirSync(newFilename);
for(const insideFileName in res.files) {
if (res.files && res.files.hasOwnProperty && !res.files.hasOwnProperty(insideFileName)) {
continue;
}
const insideFile = res.files[insideFileName];
if(insideFile.dir) {
fs.mkdirSync(newFilename + '/' + insideFile.name);
} else {
fs.writeFileSync(`${newFilename}/${insideFile.name}`, Buffer.from(await insideFile.async('arraybuffer')));
}
}
} else if(file.endsWith('.gz')) {
console.log('Gunzipping ' + file);
const name = file.replaceAll('.gz', '');
const folder = name.replace('.xml', '');
fs.mkdirSync(PROCESSED_DATA_FOLDER + folder);
const writeStream = fs.createWriteStream(PROCESSED_DATA_FOLDER + folder + '/' + name);
fs.createReadStream(RAW_DATA_FOLDER + file)
.pipe(zlib.createGunzip())
.pipe(writeStream);
} else {
console.log('Invalid file: ' + file);
}
}
/// PARSE XML
const dbData = await db.selectAll();
for(const dir of fs.readdirSync(PROCESSED_DATA_FOLDER)) {
if(!fs.lstatSync(PROCESSED_DATA_FOLDER + dir).isDirectory()) {
console.log(dir + ' is not a directory !');
continue;
}
for(const file of fs.readdirSync(PROCESSED_DATA_FOLDER + dir)) {
const path = PROCESSED_DATA_FOLDER + dir + '/' + file;
if(!path.endsWith('.xml')) {
console.log('Invalid extracted file: ', file);
continue;
}
const xml = fs.readFileSync(path, 'utf8');
parseString(xml, async function(err, result) {
if(err) {
console.error(err);
return;
}
if(!result) {
console.error(`Unknown error while parsing ${path}`)
return;
}
if(dbData.find((elt) =>
elt.report_id === result.feedback.report_metadata[0].report_id[0] &&
elt.org_name === result.feedback.report_metadata[0].org_name[0]
)) {
return;
}
try {
await db.insertData(result, path);
} catch(e) {
console.error(e);
}
});
}
}
}
doWork();
setInterval(doWork, 1000 * 60 * 60 * 12); // Run every 12 hours