Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

News sitemap #5

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
39 changes: 36 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,51 @@ First install the module:
$ npm install -s amphora-sitemaps
```

Then pass the module into Amphora as an item for the `plugins` array property.
Then, require the module and pass all the options you need to start the plugin:
```javascript
const amphoraSitemaps = require('amphora-sitemaps'),
amphoraSitemapsPlugin = amphoraSitemaps({ _news: { component: 'article' }});
```

After that, pass the module into Amphora as an item for the `plugins` array property.

```javascript
amphora({
...
plugins: [
...
require('amphora-sitemaps'),
amphoraSitemapsPlugin,
...
],
...
})
```

At startup time the module will create and expose endpoints that you can hit in your browser to get the all the published pages in an XML response for that site. `eg. yoursite.com/_sitemap`
## Options

The options object is used to initialize some endpoints like `_news`.

The object should have the following format:

```javascript
var options = {
_news: { // Endpoint name
component: 'article' // The component used to get the data for the sitemap
}
}
```

## Endpoints
At startup time, the module will create the following XML endpoints:

### _sitemaps
Gets the all the published pages.
pedro-rosario marked this conversation as resolved.
Show resolved Hide resolved

`eg. yoursite.com/_sitemap`

### _news
Gets pages based on the [Google News Sitemap](https://support.google.com/news/publisher-center/answer/74288?hl=en) guidelines.

In order to meet these guidelines, you must pass a component name with the properties `canonicalUrl` and `date` because those fields are required.

`eg. yoursite.com/_news`
60 changes: 60 additions & 0 deletions lib/controllers/news.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
'use strict';

const h = require('highland'),
db = require('../services/db'),
xml = require('xml'),
{ setup, logError } = require('../services/logger'),
log = setup({ file: __filename }),
errorLogger = logError(log);

/**
* Gets the published pages of a site.
* @param {string} uri
* @param {object} options
pedro-rosario marked this conversation as resolved.
Show resolved Hide resolved
* @return {Promise<T | void>}
*/
function getPublishedContent({ uri }, options) {
const elem = xml.element({
_attr: {
xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9',
'xmlns:news': 'http://www.google.com/schemas/sitemap-news/0.9'
}
}),
stream = xml({ urlset: elem }, { stream: true, declaration: true }),
site = uri.replace('/_news', '');

h(db.selectPublishedContent(site, options))
.filter(content => options.componentFilter(content.data))
.map(({ data, title }) => {
return options.getKeywords(data)
.then(keywords => ({ data, title, keywords }));
})
.flatMap(h)
.map(({ data, title , keywords }) => {
const { date, canonicalUrl } = data;

elem.push({
url: [
{ loc: canonicalUrl },
{ 'news:news': [
{ 'news:publication': [
{ 'news:name': site },
{ 'news:language': 'en' }
] },
{ 'news:publication_date': date },
{ 'news:title': title },
{ 'news:language': 'en' },
{ 'news:keywords': keywords }
] }
]
});
})
.errors(errorLogger)
.done(() => {
elem.close();
});

return stream;
}

module.exports.getPublishedContent = getPublishedContent;
8 changes: 4 additions & 4 deletions lib/services/sitemaps.js → lib/controllers/sitemap.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
'use strict';

const _ = require('highland'),
db = require('./db'),
db = require('../services/db'),
xml = require('xml'),
{ setup, logError } = require('./logger'),
{ setup, logError } = require('../services/logger'),
log = setup({ file: __filename }),
errorLogger = logError(log);

Expand All @@ -14,11 +14,11 @@ const _ = require('highland'),
*/
function getPublishedPages({ uri, query }) {
const elem = xml.element({ _attr: { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' } }),
stream = xml({ urlset: elem }, { stream: true });
stream = xml({ urlset: elem }, { stream: true , declaration: true });

_(db.selectPublishedPages(uri, query))
.map(item => {
var arr = [];
const arr = [];

for (const key in item) {
if (item.hasOwnProperty(key)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
'use strict';

const sitemaps = require('./sitemaps'),
const sitemaps = require('./sitemap'),
h = require('highland'),
db = require('./db'),
db = require('../services/db'),
data = [{
loc: 'http://localhost.nymag.com/',
lastmod: '2018-10-01T15:45:44.494Z'
Expand Down
24 changes: 24 additions & 0 deletions lib/routes/_news.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'use strict';

const responses = require('../services/responses'),
controller = require('../controllers/news'),
handlers = {
getPublishedContent: options => {
return (req, res) => {
// set response type
res.type('text/xml');
// stream from the db
controller.getPublishedContent(req, options).pipe(res);
};
},
};

function routes(router, options) {
router.all('/', responses.allow({ allow: ['get'] }));
router.get('/', handlers.getPublishedContent(options));
}

module.exports = routes;

// For testing
module.exports.route = handlers;
4 changes: 1 addition & 3 deletions lib/routes/_sitemap.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
'use strict';

const responses = require('../services/responses'),
controller = require('../services/sitemaps'),
controller = require('../controllers/sitemap'),
route = {
getList: function (req, res) {
// set response type
res.type('text/xml');
// We need to add in the xml version as first line
res.write('<?xml version="1.0" encoding="UTF-8"?>');
// stream from the db
controller.getPublishedPages(req).pipe(res);
},
Expand Down
2 changes: 1 addition & 1 deletion lib/routes/_sitemap.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict';

const sitemap = require('./_sitemap'),
controller = require('../services/sitemaps'),
controller = require('../controllers/sitemap'),
h = require('highland');

jest.mock('../services/sitemaps');
Expand Down
17 changes: 17 additions & 0 deletions lib/services/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@ function selectPublishedPages(uri, { year = '' }) {
return db.raw(sql, params);
}

function selectPublishedContent(site, options) {
const componentSql = options.components.map(component => {
return `(SELECT pages.meta ->> 'title' as title, componentTable.data as data, componentTable.data->> 'date' as pubDate
FROM components."${component}" as componentTable, public.pages as pages, public.uris as uris
WHERE pages.meta->>'url' = componentTable.data->>'canonicalUrl'
AND pages.id = uris.data
AND (componentTable.data->>'date')::date between (now() - interval '2 days') AND now()
AND componentTable.id like '${site}'||'/_components%')`
});
let sql = componentSql.join(' UNION ');

sql += ` ORDER BY (pubDate) desc
LIMIT 1000;`;

return db.raw(sql);
}

module.exports = init;
module.exports.selectPublishedPages = selectPublishedPages;
module.exports.selectPublishedContent = selectPublishedContent;
22 changes: 12 additions & 10 deletions lib/services/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ const db = require('./db'),
routes = require('./routes');

/**
* Initializes plugin.
* @param {Object} router
* @param {Object} storage
* @param {Object} _
* @param {Object} sites
* @return {Promise}
* Sets up the plugin with some options
*
* @param {Object} options
* @param {Object} options._news
* @returns {Function}
*/
function onInit(router, storage) {
return db(storage)
.then(() => routes(router));
function setup(options) {
// Initializes plugin
return (router, storage) => {
return db(storage)
.then(() => routes(router, options));
};
}

module.exports = onInit;
module.exports = setup;
7 changes: 4 additions & 3 deletions lib/services/routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@ const express = require('express'),
/**
* Set up sitemaping routes for all sites. eg. nymag.com/_sitemap
* @param {object} router
* @param {object} options
*/
function setupRoutes(router) {
function setupRoutes(router, options) {
const routesPath = 'routes';

// load all controller routers
files.getFiles([__dirname, '..', routesPath].join(path.sep)).filter((filename) => {
const pathRouter = express.Router(),
name = removeExtension(filename),
controller = files.tryRequire([__dirname, '..', routesPath, name].join(path.sep));
route = files.tryRequire([__dirname, '..', routesPath, name].join(path.sep));

pathRouter.use(jsonBodyParser);

controller(pathRouter);
route(pathRouter, options[name]);
router.use(`/${name}`, pathRouter);
});
}
Expand Down