-
Notifications
You must be signed in to change notification settings - Fork 0
/
Parser.cpp
76 lines (66 loc) · 2.04 KB
/
Parser.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include"Parser.h"
#include<iterator>
std::vector<std::string> Parser::getLinks()
{
std::vector<std::string> res;
tree<HTML::Node>::iterator it = _dom.begin();
tree<HTML::Node>::iterator end = _dom.end();
for(; it != end; ++it)
{
if (it->tagName() == "a")
{
it->parseAttributes();
if (it->attribute("href").first)
{
auto temp = it->attribute("href").second;
while (temp[0] == '/')
temp = temp.substr(1);
if (temp.substr(0, 3) == "www" || temp.substr(0, 4) == "http")
res.push_back(temp);
else
res.push_back(_host + temp);
}
}
}
return res;
}
std::string Parser::getTitle()
{
tree<HTML::Node>::iterator it = _dom.begin();
tree<HTML::Node>::iterator end = _dom.end();
for(; it != end; ++it)
{
if (it->tagName() == "title")
{
return std::next(it)->text();
}
}
return "NO TITLE";
}
std::pair<std::string, std::string> Parser::getTitles(std::string name)
{
tree<HTML::Node>::iterator it = _dom.begin();
tree<HTML::Node>::iterator end = _dom.end();
for(; it != end; ++it)
{
if (it->tagName() == "a")
{
it->parseAttributes();
if (it->attribute("href").first && it->attribute("title").first)
{
auto title = it->attribute("title").second;
if (title.find(name) != std::string::npos)
{
auto temp = it->attribute("href").second;
while (temp[0] == '/')
temp = temp.substr(1);
if (temp.substr(0, 3) != "www" && temp.substr(0, 4) != "http")
temp = _host + temp;
_titles[title] = temp;
return std::pair<std::string, std::string>(title, temp);
}
}
}
}
return std::pair<std::string, std::string>("","");
}