Skip to content

Commit

Permalink
Added delay between crawl and added configurable http headers
Browse files Browse the repository at this point in the history
  • Loading branch information
fhamon committed Jun 15, 2017
1 parent c530dee commit e2d28ec
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 35 deletions.
66 changes: 34 additions & 32 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,41 +90,43 @@ sitemap(config, function (sitemap, urls) {

var results = _.map(urls, function (url, index) {
console.log('Registered ' + url.url);
var processResults = processOne(config, url, function (error, record) {
if (!!error || !record) {
console.error('Error! ' + error.message);
if (!!error.pageNotFound && !!record) {
pages.deleteObject(record.objectID, function (error, result) {
console.log('Object ' + record.objectID + ' has been deleted');
});
}
removeOldEntries();
return;
}

pages.saveObject(record, function (error, result) {
if (!!error) {
console.log();
if (!!result && !!result.message) {
console.error('Error! ' + result.message);
}
if (!!error && !!error.message) {
console.error('Error! ' + error.message);
setTimeout(function () {
var processResults = processOne(config, url, function (error, record) {
if (!!error || !record) {
console.error('Error! ' + error.message);
if (!!error.pageNotFound && !!record) {
pages.deleteObject(record.objectID, function (error, result) {
console.log('Object ' + record.objectID + ' has been deleted');
});
}
console.log();
} else if (record.objectID !== result.objectID) {
console.log();
console.error('Error! Object ID mismatch!');
console.log();
} else {
console.log('Object %s:%s saved (%s)', record.objectID, record.lang, record.url);
removeOldEntries();
return;
}
removeOldEntries();

pages.saveObject(record, function (error, result) {
if (!!error) {
console.log();
if (!!result && !!result.message) {
console.error('Error! ' + result.message);
}
if (!!error && !!error.message) {
console.error('Error! ' + error.message);
}
console.log();
} else if (record.objectID !== result.objectID) {
console.log();
console.error('Error! Object ID mismatch!');
console.log();
} else {
console.log('Object %s:%s saved (%s)', record.objectID, record.lang, record.url);
}
removeOldEntries();
});
});
});
if (!processResults.ok) {
console.error(processResults.message || 'Error!');
}
if (!processResults.ok) {
console.error(processResults.message || 'Error!');
}
}, config.delayBetweenRequest * index);
});

console.log('Sitemap %s registered %s / %s urls', sitemap.url, results.length, urls.length);
Expand Down
6 changes: 5 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@
{"url": "http://example.com/en/sitemap.xml", "lang": "en"}
],
"http": {
"auth": ""
"auth": "",
"headers": {

}
},
"delayBetweenRequest": 100,
"selectors": {
"title": "title",
"image": "meta[property=\"og:image\"]",
Expand Down
3 changes: 2 additions & 1 deletion lib/process.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ module.exports = function (config, url, cb) {
port: parsedUrl.port || (parsedUrl.protocol === 'https:' ? 443 : 80),
path: parsedUrl.pathname || '/',
method: 'GET',
auth: config.http && config.http.auth
auth: config.http && config.http.auth,
headers: config.http.headers
};

if (!httpOptions.hostname) {
Expand Down
3 changes: 2 additions & 1 deletion lib/sitemap.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ module.exports = function (config, cb) {
port: parsedUrl.port || (parsedUrl.protocol === 'https:' ? 443 : 80),
path: parsedUrl.path,
method: 'GET',
auth: config.http && config.http.auth
auth: config.http && config.http.auth,
headers: config.http.headers
};
var client = parsedUrl.protocol === 'https:' ? https : http;

Expand Down

0 comments on commit e2d28ec

Please sign in to comment.