Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gatsby-source-drupal): Use Got instead of Axios for retries & caching support + restrict to 20 concurrent requests #31514

Merged
merged 16 commits into from
Jun 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions benchmarks/source-drupal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@
},
"dependencies": {
"cross-env": "^7.0.0",
"dotenv": "^8.2.0",
"faker": "^4.1.0",
"gatsby": "^2.19.7",
"gatsby-image": "^2.2.40",
"dotenv": "^9.0.2",
"faker": "^5.5.3",
"gatsby": "^3.5.1",
"gatsby-image": "^3.5.0",
"gatsby-plugin-benchmark-reporting": "*",
"gatsby-plugin-sharp": "^2.4.5",
"gatsby-source-drupal": "^3.3.18",
"gatsby-source-filesystem": "^2.1.48",
"gatsby-transformer-sharp": "^2.3.14",
"gatsby-plugin-sharp": "^3.5.0",
"gatsby-source-drupal": "^4.7.2",
"gatsby-source-filesystem": "^3.5.0",
"gatsby-transformer-sharp": "^3.5.0",
"lodash.kebabcase": "^4.1.1",
"node-fetch": "^2.6.0",
"react": "^16.12.0",
"react-dom": "^16.12.0",
"ts-node": "^8.6.2",
"typescript": "^3.8.3"
"react": "^17.0.2",
"react-dom": "^17.0.2",
"ts-node": "^9.1.1",
"typescript": "^4.2.4"
},
"devDependencies": {
"prettier": "2.0.4"
Expand Down
15 changes: 12 additions & 3 deletions packages/gatsby-source-drupal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ module.exports = {
}
```

## GET Params
## GET Search Params

You can append optional GET request params to the request url using `params` option.

Expand Down Expand Up @@ -214,9 +214,13 @@ module.exports = {
}
```

## Concurrent API Requests

You can use the `concurrentAPIRequests` option to change how many simultaneous API requests are made to the server/service. 20 is the default and seems to be the fastest for most sites.

## Disallowed Link Types

You can use the `disallowedLinkTypes` option to skip link types found in JSON:API documents. By default it skips the `self` and `describedby` links, which do not provide data that can be sourced. You may override the setting to add additional link types to be skipped.
You can use the `disallowedLinkTypes` option to skip link types found in JSON:API documents. By default it skips the `self`, `describedby`, `contact_message--feedback`, and `contact_message--pesonal` links, which do not provide data that can be sourced. You may override the setting to add additional link types to be skipped.

```javascript
// In your gatsby-config.js
Expand All @@ -227,7 +231,12 @@ module.exports = {
options: {
baseUrl: `https://live-contentacms.pantheonsite.io/`,
// skip the action--action resource type.
disallowedLinkTypes: [`self`, `describedby`, `action--action`],
disallowedLinkTypes: [
`self`,
`describedby`,
`contact_message--feedback`,
`contact_message--personal`,
],
},
},
],
Expand Down
10 changes: 4 additions & 6 deletions packages/gatsby-source-drupal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
},
"dependencies": {
"@babel/runtime": "^7.14.0",
"axios": "^0.21.1",
"got": "^11.8.2",
"agentkeepalive": "^4.1.1",
"fastq": "^1.11.0",
"bluebird": "^3.7.2",
"body-parser": "^1.19.0",
"gatsby-source-filesystem": "^3.8.0-next.1",
Expand All @@ -23,11 +25,7 @@
"cross-env": "^7.0.3"
},
"homepage": "https://github.com/gatsbyjs/gatsby/tree/master/packages/gatsby-source-drupal#readme",
"keywords": [
"gatsby",
"gatsby-plugin",
"gatsby-source-plugin"
],
"keywords": ["gatsby", "gatsby-plugin", "gatsby-source-plugin"],
"license": "MIT",
"peerDependencies": {
"gatsby": "^3.0.0-next.0"
Expand Down
30 changes: 14 additions & 16 deletions packages/gatsby-source-drupal/src/__tests__/index.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
jest.mock(`axios`, () => {
return {
get: jest.fn(path => {
const last = path.split(`/`).pop()
try {
return { data: require(`./fixtures/${last}.json`) }
} catch (e) {
console.log(`Error`, e)
return null
}
}),
}
})
jest.mock(`got`, () =>
jest.fn(path => {
const last = path.split(`/`).pop()
try {
return { body: require(`./fixtures/${last}.json`) }
} catch (e) {
console.log(`Error`, e)
return null
}
})
)

jest.mock(`gatsby-source-filesystem`, () => {
return {
Expand Down Expand Up @@ -506,18 +504,18 @@ describe(`gatsby-source-drupal`, () => {

describe(`Error handling`, () => {
describe(`Does end activities if error is thrown`, () => {
const axios = require(`axios`)
const got = require(`got`)
beforeEach(() => {
nodes = {}
reporter.activityTimer.mockClear()
activity.start.mockClear()
activity.end.mockClear()
axios.get.mockClear()
got.mockClear()
downloadFileSpy.mockClear()
})

it(`during data fetching`, async () => {
axios.get.mockImplementationOnce(() => {
got.mockImplementationOnce(() => {
throw new Error(`data fetching failed`)
})
expect.assertions(5)
Expand Down
104 changes: 71 additions & 33 deletions packages/gatsby-source-drupal/src/gatsby-node.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
const axios = require(`axios`)
const got = require(`got`)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got automatically retries requests on errors which helps the source plugin recover from occasional glitches

const _ = require(`lodash`)
const urlJoin = require(`url-join`)
import HttpAgent from "agentkeepalive"

const { HttpsAgent } = HttpAgent

const { setOptions, getOptions } = require(`./plugin-options`)

Expand All @@ -12,6 +15,17 @@ const {
} = require(`./normalize`)
const { handleReferences, handleWebhookUpdate } = require(`./utils`)

const agent = {
http: new HttpAgent(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an optimized http agent with https://www.npmjs.com/package/agentkeepalive

https: new HttpsAgent(),
}

async function worker([url, options]) {
return got(url, { agent, ...options })
}

const requestQueue = require(`fastq`).promise(worker, 20)

const asyncPool = require(`tiny-async-pool`)
const bodyParser = require(`body-parser`)

Expand Down Expand Up @@ -53,12 +67,18 @@ exports.sourceNodes = async (
const {
baseUrl,
apiBase = `jsonapi`,
basicAuth,
basicAuth = {},
filters,
headers,
params,
params = {},
concurrentFileRequests = 20,
disallowedLinkTypes = [`self`, `describedby`],
concurrentAPIRequests = 20,
disallowedLinkTypes = [
`self`,
`describedby`,
`contact_message--feedback`,
`contact_message--personal`,
],
skipFileDownloads = false,
fastBuilds = false,
entityReferenceRevisions = [],
Expand All @@ -70,6 +90,9 @@ exports.sourceNodes = async (
} = pluginOptions
const { createNode, setPluginStatus, touchNode } = actions

// Update the concurrency limit from the plugin options
requestQueue.concurrency = concurrentAPIRequests

if (webhookBody && Object.keys(webhookBody).length) {
const changesActivity = reporter.activityTimer(
`loading Drupal content changes`,
Expand Down Expand Up @@ -139,19 +162,22 @@ exports.sourceNodes = async (

try {
// Hit fastbuilds endpoint with the lastFetched date.
const data = await axios.get(
const res = await requestQueue.push([
urlJoin(baseUrl, `gatsby-fastbuilds/sync/`, lastFetched.toString()),
{
auth: basicAuth,
username: basicAuth.username,
password: basicAuth.password,
headers,
params,
}
)
searchParams: params,
responseType: `json`,
cache,
},
])

if (data.data.status === -1) {
if (res.body.status === -1) {
// The incremental data is expired or this is the first fetch.
reporter.info(`Unable to pull incremental data changes from Drupal`)
setPluginStatus({ lastFetched: data.data.timestamp })
setPluginStatus({ lastFetched: res.body.timestamp })
requireFullRebuild = true
} else {
// Touch nodes so they are not garbage collected by Gatsby.
Expand All @@ -162,7 +188,7 @@ exports.sourceNodes = async (
})

// Process sync data from Drupal.
const nodesToSync = data.data.entities
const nodesToSync = res.body.entities
for (const nodeSyncData of nodesToSync) {
if (nodeSyncData.action === `delete`) {
actions.deleteNode(
Expand Down Expand Up @@ -208,7 +234,7 @@ exports.sourceNodes = async (
}
}

setPluginStatus({ lastFetched: data.data.timestamp })
setPluginStatus({ lastFetched: res.body.timestamp })
}
} catch (e) {
gracefullyRethrow(drupalFetchIncrementalActivity, e)
Expand All @@ -233,13 +259,19 @@ exports.sourceNodes = async (

let allData
try {
const data = await axios.get(urlJoin(baseUrl, apiBase), {
auth: basicAuth,
headers,
params,
})
const res = await requestQueue.push([
urlJoin(baseUrl, apiBase),
{
username: basicAuth.username,
password: basicAuth.password,
headers,
searchParams: params,
responseType: `json`,
cache,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pass in Gatsby's cache object to Got to cache http requests

},
])
allData = await Promise.all(
_.map(data.data.links, async (url, type) => {
_.map(res.body.links, async (url, type) => {
if (disallowedLinkTypes.includes(type)) return
if (!url) return
if (!type) return
Expand Down Expand Up @@ -276,31 +308,36 @@ exports.sourceNodes = async (

let d
try {
d = await axios.get(url, {
auth: basicAuth,
headers,
params,
})
d = await requestQueue.push([
url,
{
username: basicAuth.username,
password: basicAuth.password,
headers,
responseType: `json`,
cache,
},
])
} catch (error) {
if (error.response && error.response.status == 405) {
if (error.response && error.response.statusCode == 405) {
// The endpoint doesn't support the GET method, so just skip it.
return []
} else {
console.error(`Failed to fetch ${url}`, error.message)
console.log(error.data)
console.log(error)
throw error
}
}
data = data.concat(d.data.data)
data = data.concat(d.body.data)
// Add support for includes. Includes allow entity data to be expanded
// based on relationships. The expanded data is exposed as `included`
// in the JSON API response.
// See https://www.drupal.org/docs/8/modules/jsonapi/includes
if (d.data.included) {
data = data.concat(d.data.included)
if (d.body.included) {
data = data.concat(d.body.included)
}
if (d.data.links && d.data.links.next) {
data = await getNext(d.data.links.next, data)
if (d.body.links && d.body.links.next) {
data = await getNext(d.body.links.next, data)
}

return data
Expand Down Expand Up @@ -485,8 +522,8 @@ exports.pluginOptionsSchema = ({ Joi }) =>
`The path to the root of the JSONAPI — defaults to "jsonapi"`
),
basicAuth: Joi.object({
username: Joi.string(),
password: Joi.string(),
username: Joi.string().required(),
password: Joi.string().required(),
}).description(`Enables basicAuth`),
filters: Joi.object().description(
`Pass filters to the JSON API for specific collections`
Expand All @@ -496,6 +533,7 @@ exports.pluginOptionsSchema = ({ Joi }) =>
),
params: Joi.object().description(`Append optional GET params to requests`),
concurrentFileRequests: Joi.number().integer().default(20).min(1),
concurrentAPIRequests: Joi.number().integer().default(20).min(1),
disallowedLinkTypes: Joi.array().items(Joi.string()),
skipFileDownloads: Joi.boolean(),
fastBuilds: Joi.boolean(),
Expand Down
8 changes: 2 additions & 6 deletions renovate.json5
Original file line number Diff line number Diff line change
Expand Up @@ -16733,9 +16733,7 @@
],
"groupName": "minor and patch dependencies for gatsby-source-drupal",
"groupSlug": "gatsby-source-drupal-prod-minor",
"matchPackageNames": [
"axios"
],
"matchPackageNames": [],
"matchUpdateTypes": [
"patch"
],
Expand Down Expand Up @@ -16768,9 +16766,7 @@
],
"groupName": "major dependencies for gatsby-source-drupal",
"groupSlug": "gatsby-source-drupal-prod-major",
"matchPackageNames": [
"axios"
],
"matchPackageNames": [],
"matchUpdateTypes": [
"major",
"minor"
Expand Down
Loading