diff --git a/README.md b/README.md index 09bc645..5a39554 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,20 @@ CSS selector for the title of the page. CSS selector for the "key" property. You can add custom keys as you wish. +### exclusions: Object + +An object containing CSS selectors to find elements that must not be indexed. +Those CSS selectors are matched for each node and are check against all their parents to make +sure non of its parent are excluded. + +#### exclusions.text: String + +CSS selector of excluded elements for the text of the page. + +#### exclusions[key]: String + +CSS selector of excluded elements for "key" property. The key must match the one used in selectors[key]. + #### formatters: Object An object containing formatter string. Their values are removed from the original result obtained diff --git a/app.js b/app.js index 35a8c48..67786bf 100644 --- a/app.js +++ b/app.js @@ -59,7 +59,8 @@ config.selectors = _.map(config.selectors, (selector, key) => { return { key, attributes: selector.attributes, - selector: selector.selector + selector: selector.selector, + exclude: config.exclusions && config.exclusions[key] }; }); diff --git a/config.json b/config.json index 313568f..6dc5c15 100644 --- a/config.json +++ b/config.json @@ -29,6 +29,9 @@ "image": "meta[property=\"og:image\"]", "description": "meta[name=\"description\"]", "text": "h1, h2, h3, h4, h5, h6, p, li" + }, + "exclusions": { + }, "formatters": { "title": "-" diff --git a/lib/process.js b/lib/process.js index 536b9e8..43df4c6 100644 --- a/lib/process.js +++ b/lib/process.js @@ -87,7 +87,11 @@ const parse = (record, data, config) => { const key = selector.key; if (record[key] === undefined) { record[key] = []; - const nodes = $(selector.selector); + // Fetch all and filter exclusions + const nodes = $(selector.selector).filter((i, node) => { + return !selector.exclude || $(node).closest(selector.exclude).length === 0; + }); + // Populate the record _.each(nodes, (node) => recursiveFindValue(node, record[key], selector.attributes)); diff --git a/test/parse.js b/test/parse.js index bac430a..91fb802 100644 --- a/test/parse.js +++ b/test/parse.js @@ -43,6 +43,19 @@ test('Custom selector parse', (t) => { t.end(); }); +test('Selector exclusion parse', (t) => { + const rec = {}; + const c = _.clone(config); + const data = ` + test + +`; + c.selectors.push({key: 'links', selector: 'a', exclude: 'footer'}); + parse(rec, data, c); + t.equal(rec.links, 'test'); + t.end(); +}); + test('JSON formatter', (t) => { const rec = {}; const c = _.clone(config);