-
Notifications
You must be signed in to change notification settings - Fork 0
/
page_counter.php
65 lines (51 loc) · 1.65 KB
/
page_counter.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<?php
// disable php error report
// error_reporting(0);
set_time_limit(0);
if (!empty($_GET)) {
$base_url = urldecode($_GET["url"]);
$urls = array();
$robots = file_get_contents(sprintf("%s/robots.txt", $base_url));
$sitemaps = explode("Sitemap:", $robots);
for ($i = 1; $i < sizeof($sitemaps); $i++) {
$url = trim($sitemaps[$i]);
if (filter_var($url, FILTER_VALIDATE_URL))
$urls[] = $url;
else
$urls[] = $base_url . $url;
}
$total_uri = array();
foreach ($urls as $url) {
$total_uri[] = page_counter($url);
}
echo count(array_unique($total_uri[0]));
// $sitemaps = simplexml_load_file($url);
// // if it has a sitemap.xml file
// $page_uris = array();
// // iterate through sitemap locations and urls, and fill the URI's array
// foreach ($sitemaps as $sitemap) {
// $urlset = simplexml_load_file($sitemap->loc);
// foreach ($urlset as $url) {
// $page_uris[] = $url->loc;
// }
// }
// // remove duplicates
// $page_uris = array_unique($page_uris);
// generate json response
// print(json_encode(array("error" => 0, "message" => sprintf("This site have %d pages!", sizeof($page_uris)))));
} else {
// print(json_encode(array("error" => 1, "message" => "Please, set the url parameter!")));
}
function page_counter($url) {
$xml = simplexml_load_file($url);
if (isset($xml->sitemap)) {
foreach ($xml as $sitemap) {
page_counter($sitemap->loc);
}
} else {
foreach ($xml as $urlset_url) {
$page_uris[] = $urlset_url->loc;
}
}
}
?>