Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

News sitemap #5

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
68 changes: 65 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,80 @@ First install the module:
$ npm install -s amphora-sitemaps
```

Then pass the module into Amphora as an item for the `plugins` array property.
Then, require the module and pass all the options you need to start the plugin:
```javascript
/**
* Gets the keywords from each content.
*
* @param {Object} data
* @returns {Promise}
*/
function getKeywords(data) {
...
}

/**
* Filters content by the criteria you choose.
*
* @param {Object} data
* @returns {boolean}
*/
function componentFilter(data) {
...
}

const amphoraSitemaps = require('amphora-sitemaps'),
amphoraSitemapsPlugin = amphoraSitemaps({
_news: {
components: ['article', 'gallery'],
getKeywords,
componentFilter
}
});
```

After that, pass the module into Amphora as an item for the `plugins` array property.

```javascript
amphora({
...
plugins: [
...
require('amphora-sitemaps'),
amphoraSitemapsPlugin,
...
],
...
})
```

At startup time the module will create and expose endpoints that you can hit in your browser to get the all the published pages in an XML response for that site. `eg. yoursite.com/_sitemap`
## Options

The options object is used to initialize some endpoints like `_news`.

The object should have the following format:

```javascript
var options = {
_news: { // Endpoint name
component: 'article' // The component used to get the data for the sitemap
}
}
```

## Endpoints

At startup time, the module will create the following XML endpoints:

### _sitemaps

Gets all the published pages.
wacevedo marked this conversation as resolved.
Show resolved Hide resolved

`eg. yoursite.com/_sitemap`

### _news

Gets pages based on the [Google News Sitemap](https://support.google.com/news/publisher-center/answer/74288?hl=en) guidelines.

In order to meet these guidelines, you must pass a component name with the properties `canonicalUrl` and `date` because those fields are required.

`eg. yoursite.com/_news`
60 changes: 60 additions & 0 deletions lib/controllers/news.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
'use strict';

const h = require('highland'),
db = require('../services/db'),
xml = require('xml'),
{ setup, logError } = require('../services/logger'),
log = setup({ file: __filename }),
errorLogger = logError(log);

/**
* Gets the published pages of a site.
* @param {string} uri
* @param {Object} options
* @return {Stream}
*/
function getPublishedContent({ uri }, options) {
const elem = xml.element({
_attr: {
xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9',
'xmlns:news': 'http://www.google.com/schemas/sitemap-news/0.9'
}
}),
stream = xml({ urlset: elem }, { stream: true, declaration: true }),
site = uri.replace('/_news', '');

h(db.selectPublishedContent(site, options))
.filter(content => options.componentFilter(content.data))
.map(({ data, title }) => {
return options.getKeywords(data)
.then(keywords => ({ data, title, keywords }));
})
.flatMap(h)
.map(({ data, title , keywords }) => {
const { date, canonicalUrl } = data;

elem.push({
url: [
{ loc: canonicalUrl },
{ 'news:news': [
{ 'news:publication': [
{ 'news:name': site },
{ 'news:language': 'en' }
] },
{ 'news:publication_date': date },
{ 'news:title': title },
{ 'news:language': 'en' },
{ 'news:keywords': keywords }
] }
]
});
})
.errors(errorLogger)
.done(() => {
elem.close();
});

return stream;
}

module.exports.getPublishedContent = getPublishedContent;
8 changes: 4 additions & 4 deletions lib/services/sitemaps.js → lib/controllers/sitemap.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
'use strict';

const _ = require('highland'),
db = require('./db'),
db = require('../services/db'),
xml = require('xml'),
{ setup, logError } = require('./logger'),
{ setup, logError } = require('../services/logger'),
log = setup({ file: __filename }),
errorLogger = logError(log);

Expand All @@ -14,11 +14,11 @@ const _ = require('highland'),
*/
function getPublishedPages({ uri, query }) {
const elem = xml.element({ _attr: { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' } }),
stream = xml({ urlset: elem }, { stream: true });
stream = xml({ urlset: elem }, { stream: true , declaration: true });

_(db.selectPublishedPages(uri, query))
.map(item => {
var arr = [];
const arr = [];

for (const key in item) {
if (item.hasOwnProperty(key)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
'use strict';

const sitemaps = require('./sitemaps'),
const sitemaps = require('./sitemap'),
h = require('highland'),
db = require('./db'),
db = require('../services/db'),
data = [{
loc: 'http://localhost.nymag.com/',
lastmod: '2018-10-01T15:45:44.494Z'
Expand All @@ -16,7 +16,7 @@ const sitemaps = require('./sitemaps'),
lastmod: '2018-10-04T14:19:52.262Z'
}];

jest.mock('./db');
jest.mock('../services/db');

describe('sitemaps', () => {
test('It should call the db function correctly', () => {
Expand Down
24 changes: 24 additions & 0 deletions lib/routes/_news.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'use strict';

const responses = require('../services/responses'),
controller = require('../controllers/news'),
handlers = {
getPublishedContent: options => {
return (req, res) => {
// set response type
res.type('text/xml');
// stream from the db
controller.getPublishedContent(req, options).pipe(res);
};
},
};

function routes(router, options) {
router.all('/', responses.allow({ allow: ['get'] }));
router.get('/', handlers.getPublishedContent(options));
}

module.exports = routes;

// For testing
module.exports.route = handlers;
4 changes: 1 addition & 3 deletions lib/routes/_sitemap.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
'use strict';

const responses = require('../services/responses'),
controller = require('../services/sitemaps'),
controller = require('../controllers/sitemap'),
route = {
getList: function (req, res) {
// set response type
res.type('text/xml');
// We need to add in the xml version as first line
res.write('<?xml version="1.0" encoding="UTF-8"?>');
// stream from the db
controller.getPublishedPages(req).pipe(res);
},
Expand Down
6 changes: 2 additions & 4 deletions lib/routes/_sitemap.test.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
'use strict';

const sitemap = require('./_sitemap'),
controller = require('../services/sitemaps'),
controller = require('../controllers/sitemap'),
h = require('highland');

jest.mock('../services/sitemaps');
jest.mock('../controllers/sitemap');

describe('routes/_sitemap', () => {
describe('routes', () => {
Expand All @@ -17,12 +17,10 @@ describe('routes/_sitemap', () => {
let res = h();

res.type = jest.fn();
res.write = jest.fn();

controller.getPublishedPages.mockImplementation(() => h());
sitemap.route.getList(req, res);
expect(res.type.mock.calls.length).toBe(1);
expect(res.write.mock.calls.length).toBe(1);
expect(controller.getPublishedPages.mock.calls.length).toBe(1);
expect(controller.getPublishedPages.mock.calls[0][0]).toBe(req);
});
Expand Down
17 changes: 17 additions & 0 deletions lib/services/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@ function selectPublishedPages(uri, { year = '' }) {
return db.raw(sql, params);
}

function selectPublishedContent(site, options) {
const componentSql = options.components.map(component => {
return `(SELECT pages.meta ->> 'title' as title, componentTable.data as data, componentTable.data->> 'date' as pubDate
FROM components."${component}" as componentTable, public.pages as pages, public.uris as uris
WHERE pages.meta->>'url' = componentTable.data->>'canonicalUrl'
AND pages.id = uris.data
AND (componentTable.data->>'date')::date between (now() - interval '2 days') AND now()
AND componentTable.id like '${site}'||'/_components%')`;
});
let sql = componentSql.join(' UNION ');

sql += ` ORDER BY (pubDate) desc
LIMIT 1000;`;

return db.raw(sql);
}

module.exports = init;
module.exports.selectPublishedPages = selectPublishedPages;
module.exports.selectPublishedContent = selectPublishedContent;
22 changes: 12 additions & 10 deletions lib/services/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ const db = require('./db'),
routes = require('./routes');

/**
* Initializes plugin.
* @param {Object} router
* @param {Object} storage
* @param {Object} _
* @param {Object} sites
* @return {Promise}
* Sets up the plugin with some options
*
* @param {Object} options
* @param {Object} options._news
* @returns {Function}
*/
function onInit(router, storage) {
return db(storage)
.then(() => routes(router));
function setup(options) {
// Initializes plugin
return (router, storage) => {
return db(storage)
.then(() => routes(router, options));
};
}

module.exports = onInit;
module.exports = setup;
11 changes: 7 additions & 4 deletions lib/services/init.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const init = require('./init'),
routes = require('./routes'),
router = {},
storage = {},
sites = {};
options = {};

jest.mock('./db');
jest.mock('./routes');
Expand All @@ -15,9 +15,12 @@ db.mockResolvedValue(Promise.resolve());
describe('init', () => {
describe('onInit', () => {
test('it sets up the storage, the routes for the plugin and start listening for sitemaps pages', () => {
return init(router, storage, {}, sites).then(() => {
expect(db.mock.calls.length).toBe(1);
expect(db.mock.calls[0][0]).toEqual(storage);
const controler = init(options),
initializedPlugin = controler(router, storage);

expect(db.mock.calls.length).toBe(1);
expect(db.mock.calls[0][0]).toEqual(storage);
return initializedPlugin.then(() => {
expect(routes.mock.calls.length).toBe(1);
expect(routes.mock.calls[0][0]).toEqual(router);
});
Expand Down
7 changes: 4 additions & 3 deletions lib/services/routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@ const express = require('express'),
/**
* Set up sitemaping routes for all sites. eg. nymag.com/_sitemap
* @param {object} router
* @param {object} options
*/
function setupRoutes(router) {
function setupRoutes(router, options) {
const routesPath = 'routes';

// load all controller routers
files.getFiles([__dirname, '..', routesPath].join(path.sep)).filter((filename) => {
const pathRouter = express.Router(),
name = removeExtension(filename),
controller = files.tryRequire([__dirname, '..', routesPath, name].join(path.sep));
route = files.tryRequire([__dirname, '..', routesPath, name].join(path.sep));

pathRouter.use(jsonBodyParser);

controller(pathRouter);
route(pathRouter, options[name]);
router.use(`/${name}`, pathRouter);
});
}
Expand Down
9 changes: 8 additions & 1 deletion lib/services/routes.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,19 @@ describe('routes', () => {
const controller = jest.fn(),
pathRouter = {
use: jest.fn()
},
option = {
_news: {
components: ['article', 'lede-video'],
getKeywords: jest.fn(),
componentFilter: jest.fn()
}
};

express.Router = jest.fn(() => pathRouter);
files.tryRequire = jest.fn(() => controller);

setupRoutes(router);
setupRoutes(router, option);

expect(router.use.mock.calls.length).toBe(sites.length);
expect(controller.mock.calls.length).toBe(sites.length);
Expand Down