Validate urls against robots.txt rules. Configure with output from robots-txt-parse
var guard = require('robots-txt-guard');
var robotsTxt = guard({
groups: [{
agents: [ '*' ],
rules: [
{ rule: 'allow', path: '/' }
]
}, {
agents: [ 'googlebot', 'twitterbot' ],
rules: [
{ rule: 'disallow', path: '/tmp/*' },
{ rule: 'disallow', path: '/temporary/*' },
{ rule: 'noindex', path: '/temporary/*' }
]
}]
});
robotsTxt.isAllowed('Googlebot', '/tmp/abc'); // false
robotsTxt.isAllowed('mozilla', '/tmp/abc'); // true
robotsTxt.isAllowed('googlebot-news', '/home.html'); // true
robotsTxt.isIndexable('googlebot', '/tmp/*'); // true
robotsTxt.isIndexable('googlebot', '/temporary/*'); // false