From ecc7e1406009ea4cd1a1f67d437a8e0bf954e348 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 15 Dec 2017 18:15:35 -0800 Subject: [PATCH] Reduce excess file system writes by implementing query queue (#3237) * Reduce excess file system writes by implementing query queue There are multiple ways that running queries can be triggered. Previous to this PR, Gatsby had done some de-duping of queries but in practice, queries for pages/layouts can often be running multiple times in short succession. This is normally fine-ish but @gaeron was running into troubles while editing markdown on reactjs.org where webpack would error when it detected a change in a file and before it could read it, Gatsby would start writing to the file again before webpack could finish reading it resulting in a JSON.parse error due to the incomplete JSON. On top of this, Gatsby would write out the result of every query if even the query returned the same result as previously. To address these this PR adds a query queue that deduplicates added pages/layouts. This means we're writing out results far less often. Second we hash each query result and only write to file if the result has changed which means far less work for webpack and far faster hot reloading of query updates to the browser. * Add back bootstrapping repo to netlify build * yarn global add isn't working for some reason --- packages/gatsby/package.json | 2 + .../gatsby/src/bootstrap/page-hot-reloader.js | 2 + .../query-runner/page-query-runner.js | 46 ++++++++----------- .../query-runner/pages-writer.js | 44 +++++++++--------- .../query-runner/query-queue.js | 17 +++++++ .../query-runner/query-runner.js | 21 +++++++-- .../query-runner/query-watcher.js | 24 +++------- scripts/publish-site.sh | 4 +- yarn.lock | 18 +++++++- 9 files changed, 105 insertions(+), 73 deletions(-) create mode 100644 packages/gatsby/src/internal-plugins/query-runner/query-queue.js diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json index 666bcf11425a1..a46e332a33aa1 100644 --- a/packages/gatsby/package.json +++ b/packages/gatsby/package.json @@ -24,6 +24,7 @@ "babel-runtime": "^6.26.0", "babel-traverse": "^6.24.1", "babylon": "^6.17.3", + "better-queue": "^3.8.6", "bluebird": "^3.5.0", "chalk": "^1.1.3", "chokidar": "^1.7.0", @@ -66,6 +67,7 @@ "lodash": "^4.17.4", "lodash-id": "^0.14.0", "lowdb": "^0.16.2", + "md5": "^2.2.1", "md5-file": "^3.1.1", "mime": "^1.3.6", "mitt": "^1.1.2", diff --git a/packages/gatsby/src/bootstrap/page-hot-reloader.js b/packages/gatsby/src/bootstrap/page-hot-reloader.js index e328d74e1db42..d294de6de059c 100644 --- a/packages/gatsby/src/bootstrap/page-hot-reloader.js +++ b/packages/gatsby/src/bootstrap/page-hot-reloader.js @@ -61,6 +61,8 @@ const runCreatePages = async () => { deleteComponentsDependencies([page.path]) deletePage(page) }) + + emitter.emit(`CREATE_PAGE_END`) } const debouncedCreatePages = _.debounce(runCreatePages, 100) diff --git a/packages/gatsby/src/internal-plugins/query-runner/page-query-runner.js b/packages/gatsby/src/internal-plugins/query-runner/page-query-runner.js index 4caf5666473f0..f4e0f0af46b61 100644 --- a/packages/gatsby/src/internal-plugins/query-runner/page-query-runner.js +++ b/packages/gatsby/src/internal-plugins/query-runner/page-query-runner.js @@ -6,10 +6,9 @@ */ const _ = require(`lodash`) -const async = require(`async`) +const queue = require(`./query-queue`) const { store, emitter } = require(`../../redux`) -const queryRunner = require(`./query-runner`) let queuedDirtyActions = [] let active = false @@ -95,34 +94,27 @@ const findIdsWithoutDataDependencies = () => { } const runQueriesForIds = ids => { - ids = _.uniq(ids) - if (ids.length < 1) { + const state = store.getState() + const pagesAndLayouts = [...state.pages, ...state.layouts] + let didNotQueueItems = true + ids.forEach(id => { + const plObj = pagesAndLayouts.find( + pl => pl.path === id || `LAYOUT___${pl.id}` === id + ) + if (plObj) { + didNotQueueItems = false + queue.push({ ...plObj, _id: plObj.id, id: plObj.jsonName }) + } + }) + + if (didNotQueueItems || !ids || ids.length === 0) { return Promise.resolve() } - const state = store.getState() - return new Promise((resolve, reject) => { - async.mapLimit( - ids, - 4, - (id, callback) => { - const pagesAndLayouts = [...state.pages, ...state.layouts] - const plObj = pagesAndLayouts.find( - pl => pl.path === id || `LAYOUT___${pl.id}` === id - ) - if (plObj) { - return queryRunner(plObj, state.components[plObj.component]).then( - result => callback(null, result), - error => callback(error) - ) - } else { - return callback(null, null) - } - }, - (error, result) => { - error ? reject(error) : resolve(result) - } - ) + return new Promise(resolve => { + queue.on(`drain`, () => { + resolve() + }) }) } diff --git a/packages/gatsby/src/internal-plugins/query-runner/pages-writer.js b/packages/gatsby/src/internal-plugins/query-runner/pages-writer.js index 2fb5aa5244064..86ef1079d23d4 100644 --- a/packages/gatsby/src/internal-plugins/query-runner/pages-writer.js +++ b/packages/gatsby/src/internal-plugins/query-runner/pages-writer.js @@ -54,11 +54,6 @@ const writePages = async () => { pageLayouts = _.uniq(pageLayouts) components = _.uniqBy(components, c => c.componentChunkName) - await fs.writeFile( - joinPath(program.directory, `.cache/pages.json`), - JSON.stringify(pagesData, null, 4) - ) - // Create file with sync requires of layouts/components/json files. let syncRequires = `// prefer default export if available const preferDefault = m => m && m.default || m @@ -93,10 +88,6 @@ const preferDefault = m => m && m.default || m .join(`,\n`)} }` - await fs.writeFile( - `${program.directory}/.cache/sync-requires.js`, - syncRequires - ) // Create file with async requires of layouts/components/json files. let asyncRequires = `// prefer default export if available const preferDefault = m => m && m.default || m @@ -131,10 +122,17 @@ const preferDefault = m => m && m.default || m .join(`,\n`)} }` - await fs.writeFile( - joinPath(program.directory, `.cache/async-requires.js`), - asyncRequires - ) + await Promise.all([ + fs.writeFile( + joinPath(program.directory, `.cache/pages.json`), + JSON.stringify(pagesData, null, 4) + ), + fs.writeFile(`${program.directory}/.cache/sync-requires.js`, syncRequires), + fs.writeFile( + joinPath(program.directory, `.cache/async-requires.js`), + asyncRequires + ), + ]) return } @@ -143,15 +141,19 @@ exports.writePages = writePages let bootstrapFinished = false let oldPages -const debouncedWritePages = _.debounce(() => { - // Don't write pages again until bootstrap has finished. - if (bootstrapFinished && !_.isEqual(oldPages, store.getState().pages)) { - writePages() - oldPages = store.getState().pages - } -}, 250) +const debouncedWritePages = _.debounce( + () => { + // Don't write pages again until bootstrap has finished. + if (bootstrapFinished && !_.isEqual(oldPages, store.getState().pages)) { + writePages() + oldPages = store.getState().pages + } + }, + 500, + { leading: true } +) -emitter.on(`CREATE_PAGE`, () => { +emitter.on(`CREATE_PAGE_END`, () => { debouncedWritePages() }) emitter.on(`DELETE_PAGE`, () => { diff --git a/packages/gatsby/src/internal-plugins/query-runner/query-queue.js b/packages/gatsby/src/internal-plugins/query-runner/query-queue.js new file mode 100644 index 0000000000000..013a2544cce7c --- /dev/null +++ b/packages/gatsby/src/internal-plugins/query-runner/query-queue.js @@ -0,0 +1,17 @@ +const Queue = require(`better-queue`) + +const queryRunner = require(`./query-runner`) +const { store } = require(`../../redux`) + +const queue = new Queue( + (plObj, callback) => { + const state = store.getState() + return queryRunner(plObj, state.components[plObj.component]).then( + result => callback(null, result), + error => callback(error) + ) + }, + { concurrent: 4 } +) + +module.exports = queue diff --git a/packages/gatsby/src/internal-plugins/query-runner/query-runner.js b/packages/gatsby/src/internal-plugins/query-runner/query-runner.js index d4b3b215ff7bf..4d60a4b5cd09b 100644 --- a/packages/gatsby/src/internal-plugins/query-runner/query-runner.js +++ b/packages/gatsby/src/internal-plugins/query-runner/query-runner.js @@ -1,12 +1,16 @@ import { graphql as graphqlFunction } from "graphql" const fs = require(`fs-extra`) const report = require(`gatsby-cli/lib/reporter`) +const md5 = require(`md5`) const { joinPath } = require(`../../utils/path`) const { store } = require(`../../redux`) +const resultHashes = {} + // Run query for a page module.exports = async (pageOrLayout, component) => { + pageOrLayout.id = pageOrLayout._id const { schema, program } = store.getState() const graphql = (query, context) => @@ -51,10 +55,17 @@ module.exports = async (pageOrLayout, component) => { contextKey = `layoutContext` } result[contextKey] = pageOrLayout.context - const resultJSON = JSON.stringify(result, null, 4) - - await fs.writeFile( - joinPath(program.directory, `.cache`, `json`, pageOrLayout.jsonName), - resultJSON + const resultJSON = JSON.stringify(result) + const resultHash = md5(resultJSON) + const resultPath = joinPath( + program.directory, + `.cache`, + `json`, + pageOrLayout.jsonName ) + + if (resultHashes[resultPath] !== resultHash) { + resultHashes[resultPath] = resultHash + await fs.writeFile(resultPath, resultJSON) + } } diff --git a/packages/gatsby/src/internal-plugins/query-runner/query-watcher.js b/packages/gatsby/src/internal-plugins/query-runner/query-watcher.js index 0c469569e00df..45879f477c2ee 100644 --- a/packages/gatsby/src/internal-plugins/query-runner/query-watcher.js +++ b/packages/gatsby/src/internal-plugins/query-runner/query-watcher.js @@ -10,12 +10,11 @@ const _ = require(`lodash`) const chokidar = require(`chokidar`) -const async = require(`async`) const { store } = require(`../../redux/`) const { boundActionCreators } = require(`../../redux/actions`) const queryCompiler = require(`./query-compiler`).default -const queryRunner = require(`./query-runner`) +const queue = require(`./query-queue`) const invariant = require(`invariant`) const normalize = require(`normalize-path`) @@ -57,21 +56,12 @@ const runQueriesForComponent = componentPath => { boundActionCreators.deleteComponentsDependencies( pages.map(p => p.path || p.id) ) - const component = store.getState().components[componentPath] - return new Promise((resolve, reject) => { - async.mapLimit( - pages, - 4, - (page, callback) => { - queryRunner(page, component).then( - result => callback(null, result), - error => callback(error) - ) - }, - (error, result) => { - error ? reject(error) : resolve(result) - } - ) + pages.forEach(page => + queue.push({ ...page, _id: page.id, id: page.jsonName }) + ) + + return new Promise(resolve => { + queue.on(`drain`, () => resolve()) }) } diff --git a/scripts/publish-site.sh b/scripts/publish-site.sh index 148712a7dfe60..92bf7504334dd 100644 --- a/scripts/publish-site.sh +++ b/scripts/publish-site.sh @@ -1,5 +1,5 @@ - -yarn global add gatsby-dev-cli +yarn bootstrap +npm install -g gatsby-dev-cli gatsby-dev --set-path-to-repo . echo "=== Installing the website dependencies" diff --git a/yarn.lock b/yarn.lock index 3c64b607cd95d..c82d2767a7879 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1528,6 +1528,18 @@ better-assert@~1.0.0: dependencies: callsite "1.0.0" +better-queue-memory@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/better-queue-memory/-/better-queue-memory-1.0.2.tgz#aa6d169aa1d0cc77409185cb9cb5c7dc251bcd41" + +better-queue@^3.8.6: + version "3.8.6" + resolved "https://registry.yarnpkg.com/better-queue/-/better-queue-3.8.6.tgz#73220bdfab403924cffa7497220dd387abb73a63" + dependencies: + better-queue-memory "^1.0.1" + node-eta "^0.9.0" + uuid "^3.0.0" + big.js@^3.1.3: version "3.2.0" resolved "https://registry.yarnpkg.com/big.js/-/big.js-3.2.0.tgz#a5fc298b81b9e0dca2e458824784b65c52ba588e" @@ -7705,7 +7717,7 @@ md5.js@^1.3.4: hash-base "^3.0.0" inherits "^2.0.1" -md5@^2.0.0: +md5@^2.0.0, md5@^2.2.1: version "2.2.1" resolved "https://registry.yarnpkg.com/md5/-/md5-2.2.1.tgz#53ab38d5fe3c8891ba465329ea23fac0540126f9" dependencies: @@ -8161,6 +8173,10 @@ node-emoji@^1.0.4: dependencies: lodash.toarray "^4.4.0" +node-eta@^0.9.0: + version "0.9.0" + resolved "https://registry.yarnpkg.com/node-eta/-/node-eta-0.9.0.tgz#9fb0b099bcd2a021940e603c64254dc003d9a7a8" + node-fetch@^1.0.1: version "1.7.3" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-1.7.3.tgz#980f6f72d85211a5347c6b2bc18c5b84c3eb47ef"