-
Notifications
You must be signed in to change notification settings - Fork 0
/
1.js
79 lines (72 loc) · 1.86 KB
/
1.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
var http = require('http');
/**
* Get forum page content.
* @param {function(Object, String?)} callback
*/
function getForumPage(callback) {
http.get("http://forum.academ.org/index.php?showforum=573", function (res) {
if (res.statusCode != 200) {
callback({no_page: true});
return;
}
var result = [];
res.on('data', function (chunk) {
result.push(chunk);
});
res.on('end', function () {
callback(null, result.join(''));
});
}).on('error', function (e) {
console.log("Got error: " + e.message);
callback({request_error: e});
});
}
getForumPage(function(err, text){
if (err || !text) {
console.log('err:');
console.log(err);
return;
}
var topics_texts = extractForumPageTopics(text);
var i;
for (i = 0; i < topics_texts.length; i++) {
console.log('-------------------------');
console.log(topics_texts[i]);
console.log(parseTopicHeaderText(topics_texts[i]));
}
});
/**
* Parse forum page as topics.
* @param {String} text
* @return {Array.<{id: String, text: String}>}
*/
function extractForumPageTopics(text) {
var re = /<!-- Begin Topic Entry (\d+) -->([\s\S]*?)<!-- End Topic Entry \1 -->/g;
var match;
var matches = [];
while (match = re.exec(text)) {
matches.push({id:match[1], text:match[2]})
}
return matches;
}
/**
* @param {String} text
* @param {{tag: String, id: String}} element
* @return {String}
*/
function parseElementContent(text, element) {
var re = new RegExp('<' + element.tag + '\\s[^>]*id=("|\')' + element.id + '\\1[^>]*?>([\\s\\S]*?)<', '');
var match = re.exec(text);
return match ? match[2] : '';
}
/**
*
* @param {{id: String, text: String}} header
* @return {Object}
*/
function parseTopicHeaderText(header) {
return {
link: parseElementContent(header.text, {tag: 'a', id: 'tid-link-' + header.id}),
desc: parseElementContent(header.text, {tag: 'span', id: 'tid-desc-' + header.id})
};
}