diff --git a/docs/docs/node-creation.md b/docs/docs/node-creation.md index ce605abe0e7d5..eb699b0539e63 100644 --- a/docs/docs/node-creation.md +++ b/docs/docs/node-creation.md @@ -65,6 +65,33 @@ When a `source-nodes` plugin runs again, it generally recreates nodes (which aut Any nodes that aren't touched by the end of the `source-nodes` phase, are deleted. This is performed via a diff between the `nodesTouched` and `nodes` Redux namespaces, in [source-nodes.ts](https://github.com/gatsbyjs/gatsby/blob/master/packages/gatsby/src/utils/source-nodes.ts) +### Opting out of stale node deletion + +Stale node deletion is a very expensive process because all nodes in the data store need to be iterated on to check if they're stale or not. Iterating on nodes to check for staleness also requires loading that entire node into memory, so all nodes created by a source plugin are loaded into memory to check for their staleness, even if they're otherwise not needed in memory. + +Source plugins can skip this expensive step by calling the `enableStatefulSourceNodes` action. +This will stop Gatsby from checking for stale nodes created by the source plugin that called the action. +This is a major performance improvement for medium and large sites and those sites will need less total memory to build. + +When enabling stateful sourcing plugin authors need to be sure their plugins properly handle deleting nodes when they need to be deleted. Since Gatsby is no longer checking for node staleness, data which should no longer exist could stick around. +`enableStatefulSourceNodes` should only be enabled for source plugins that can fully support all delete operations in their data source. + +Note that if `enableStatefulSourceNodes` is supported by the user's `gatsby` version, the action should be called every time `sourceNodes` runs. + +Example: + +```js +import { hasFeature } from "gatsby-plugin-utils" + +exports.sourceNodes = ({ actions }) => { + if (hasFeature(`stateful-source-nodes`)) { + actions.enableStatefulSourceNodes() + } else { + // fallback to old behavior where all nodes are iterated on and touchNode is called. + } +} +``` + ## Changing a node's fields From a site developer's point of view, nodes are immutable. In the sense that if you change a node object, those changes will not be seen by other parts of Gatsby. To make a change to a node, it must be persisted to Redux via an action. diff --git a/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js b/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js index d8ad217174f65..f4e77f963eecb 100644 --- a/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js +++ b/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js @@ -87,6 +87,7 @@ describe(`gatsby-node`, () => { currentNodeMap.delete(node.id) }), touchNode: jest.fn(), + enableStatefulSourceNodes: jest.fn(), } const schema = { buildObjectType: jest.fn(() => { diff --git a/packages/gatsby-source-contentful/src/normalize.js b/packages/gatsby-source-contentful/src/normalize.js index 1b0c0e8f32014..39428ee912a2b 100644 --- a/packages/gatsby-source-contentful/src/normalize.js +++ b/packages/gatsby-source-contentful/src/normalize.js @@ -5,7 +5,8 @@ import { getGatsbyVersion } from "gatsby-core-utils" import { lt, prerelease } from "semver" const typePrefix = `Contentful` -const makeTypeName = type => _.upperFirst(_.camelCase(`${typePrefix} ${type}`)) +export const makeTypeName = type => + _.upperFirst(_.camelCase(`${typePrefix} ${type}`)) const GATSBY_VERSION_MANIFEST_V2 = `4.3.0` const gatsbyVersion = diff --git a/packages/gatsby-source-contentful/src/source-nodes.js b/packages/gatsby-source-contentful/src/source-nodes.js index 87ca7a8bd982a..88fd078ab9a78 100644 --- a/packages/gatsby-source-contentful/src/source-nodes.js +++ b/packages/gatsby-source-contentful/src/source-nodes.js @@ -1,4 +1,5 @@ // @ts-check +import { hasFeature } from "gatsby-plugin-utils/has-feature" import isOnline from "is-online" import _ from "lodash" @@ -11,6 +12,7 @@ import { createAssetNodes, createNodesForContentType, makeId, + makeTypeName, } from "./normalize" import { createPluginConfig } from "./plugin-options" import { CODES } from "./report" @@ -40,7 +42,7 @@ const CONTENT_DIGEST_COUNTER_SEPARATOR = `_COUNT_` * or the fallback field or the default field. */ -let isFirstSource = true +let isFirstSourceNodesCallOfCurrentNodeProcess = true export async function sourceNodes( { actions, @@ -55,14 +57,26 @@ export async function sourceNodes( }, pluginOptions ) { - const { createNode, touchNode, deleteNode, unstable_createNodeManifest } = - actions + const hasStatefulSourceNodes = hasFeature(`stateful-source-nodes`) + const needToTouchNodes = !hasStatefulSourceNodes + + const { + createNode, + touchNode, + deleteNode, + unstable_createNodeManifest, + enableStatefulSourceNodes, + } = actions + const online = await isOnline() + if (hasStatefulSourceNodes) { + enableStatefulSourceNodes() + } // Gatsby only checks if a node has been touched on the first sourcing. // As iterating and touching nodes can grow quite expensive on larger sites with // 1000s of nodes, we'll skip doing this on subsequent sources. - if (isFirstSource) { + else if (isFirstSourceNodesCallOfCurrentNodeProcess && needToTouchNodes) { getNodes().forEach(node => { if (node.internal.owner !== `gatsby-source-contentful`) { return @@ -73,9 +87,10 @@ export async function sourceNodes( touchNode(getNode(node.fields.localFile)) } }) - isFirstSource = false } + isFirstSourceNodesCallOfCurrentNodeProcess = false + if ( !online && process.env.GATSBY_CONTENTFUL_OFFLINE === `true` && diff --git a/packages/gatsby/index.d.ts b/packages/gatsby/index.d.ts index 62f1852d6a8da..52b60dbfe9f6c 100644 --- a/packages/gatsby/index.d.ts +++ b/packages/gatsby/index.d.ts @@ -21,6 +21,7 @@ export type AvailableFeatures = | "image-cdn" | "graphql-typegen" | "content-file-path" + | "stateful-source-nodes" export { Link, @@ -433,6 +434,11 @@ export interface GatsbyNode< calllback: PluginCallback ): void | Promise + /** + * Marks the source plugin that called this function as stateful. Gatsby will not check for stale nodes for any plugin that calls this. + */ + enableStatefulSourceNodes?(this: void, plugin?: ActionPlugin) + /** * Called when a new node is created. Plugins wishing to extend or * transform nodes created by other plugins should implement this API. diff --git a/packages/gatsby/scripts/__tests__/api.js b/packages/gatsby/scripts/__tests__/api.js index beb4a4671e966..c3bd68d38e038 100644 --- a/packages/gatsby/scripts/__tests__/api.js +++ b/packages/gatsby/scripts/__tests__/api.js @@ -35,6 +35,7 @@ it("generates the expected api output", done => { "graphql-typegen", "content-file-path", "slices", + "stateful-source-nodes", ], "node": Object { "createPages": Object {}, diff --git a/packages/gatsby/scripts/output-api-file.js b/packages/gatsby/scripts/output-api-file.js index 460332104fd0a..8863f26421d7a 100644 --- a/packages/gatsby/scripts/output-api-file.js +++ b/packages/gatsby/scripts/output-api-file.js @@ -41,7 +41,7 @@ async function outputFile() { }, {}) /** @type {Array} */ - output.features = ["image-cdn", "graphql-typegen", "content-file-path", "slices"]; + output.features = ["image-cdn", "graphql-typegen", "content-file-path", "slices", "stateful-source-nodes"]; return fs.writeFile( path.resolve(OUTPUT_FILE_NAME), diff --git a/packages/gatsby/src/datastore/__tests__/nodes.js b/packages/gatsby/src/datastore/__tests__/nodes.js index 518e44ef1f8d8..f5ad186f38c91 100644 --- a/packages/gatsby/src/datastore/__tests__/nodes.js +++ b/packages/gatsby/src/datastore/__tests__/nodes.js @@ -339,4 +339,103 @@ describe(`nodes db tests`, () => { }) expect(getNodes()).toHaveLength(0) }) + + it(`records the node type owner when a node is created`, async () => { + // creating a node + store.dispatch( + actions.createNode( + { + id: `1`, + internal: { + type: `OwnerOneTestTypeOne`, + contentDigest: `ok`, + }, + }, + { + name: `test-owner-1`, + } + ) + ) + // and creating a second node in the same type + store.dispatch( + actions.createNode( + { + id: `2`, + internal: { + type: `OwnerOneTestTypeOne`, + contentDigest: `ok`, + }, + }, + { + name: `test-owner-1`, + } + ) + ) + + // and a third node of a different type but same plugin + store.dispatch( + actions.createNode( + { + id: `3`, + internal: { + type: `OwnerOneTestTypeTwo`, + contentDigest: `ok`, + }, + }, + { + name: `test-owner-1`, + } + ) + ) + + // fourth node by a different plugin + store.dispatch( + actions.createNode( + { + id: `4`, + internal: { + type: `OwnerTwoTestTypeThree`, + contentDigest: `ok`, + }, + }, + { + name: `test-owner-2`, + } + ) + ) + + // fifth node by second plugin but the node is deleted. Deleted nodes still have type owners + const nodeFive = { + id: `5`, + internal: { + type: `OwnerTwoTestTypeFour`, + contentDigest: `ok`, + }, + } + store.dispatch( + actions.createNode(nodeFive, { + name: `test-owner-2`, + }) + ) + store.dispatch( + actions.deleteNode(nodeFive, { + name: `test-owner-2`, + }) + ) + expect(getNode(`5`)).toBeUndefined() + + const state = store.getState() + + const ownerOne = state.typeOwners.pluginsToTypes.get(`test-owner-1`) + expect(ownerOne).toBeTruthy() + expect(ownerOne.has(`OwnerOneTestTypeOne`)).toBeTrue() + expect(ownerOne.has(`OwnerOneTestTypeTwo`)).toBeTrue() + expect(ownerOne.has(`OwnerTwoTestTypeThree`)).toBeFalse() + + const ownerTwo = state.typeOwners.pluginsToTypes.get(`test-owner-2`) + expect(ownerTwo).toBeTruthy() + expect(ownerTwo.has(`OwnerOneTestTypeTwo`)).toBeFalse() + expect(ownerTwo.has(`OwnerTwoTestTypeThree`)).toBeTrue() + expect(ownerTwo.has(`OwnerTwoTestTypeFour`)).toBeTrue() + }) }) diff --git a/packages/gatsby/src/query/__tests__/data-tracking.js b/packages/gatsby/src/query/__tests__/data-tracking.js index 749124b9f49e0..c121b3c175919 100644 --- a/packages/gatsby/src/query/__tests__/data-tracking.js +++ b/packages/gatsby/src/query/__tests__/data-tracking.js @@ -70,6 +70,13 @@ jest.mock(`gatsby-cli/lib/reporter`, () => { end: jest.fn(), } }, + createProgress: () => { + return { + start: jest.fn(), + tick: jest.fn(), + end: jest.fn(), + } + }, phantomActivity: () => { return { start: jest.fn(), diff --git a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap index 834676040ee73..eae952bbb1063 100644 --- a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap +++ b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap @@ -123,6 +123,7 @@ Object { }, "slices": Map {}, "slicesByTemplate": Map {}, + "statefulSourcePlugins": Set {}, "staticQueriesByTemplate": Map {}, "staticQueryComponents": Map {}, "status": Object { @@ -130,6 +131,10 @@ Object { "PLUGINS_HASH": "", "plugins": Object {}, }, + "typeOwners": Object { + "pluginsToTypes": Map {}, + "typesToPlugins": Map {}, + }, "webpackCompilationHash": "", } `; diff --git a/packages/gatsby/src/redux/__tests__/nodes.ts b/packages/gatsby/src/redux/__tests__/nodes.ts index a6e4b34f3e26f..99a93ef06e6e0 100644 --- a/packages/gatsby/src/redux/__tests__/nodes.ts +++ b/packages/gatsby/src/redux/__tests__/nodes.ts @@ -4,8 +4,7 @@ import { nodesReducer } from "../reducers/nodes" import { IGatsbyNode } from "../types" import { nodesTouchedReducer } from "../reducers/nodes-touched" -const dispatch = jest.fn() - +const dispatch = jest.spyOn(store, `dispatch`) type MapObject = Record const fromMapToObject = (map: Map): MapObject => { @@ -18,8 +17,8 @@ const fromMapToObject = (map: Map): MapObject => { describe(`Create and update nodes`, (): void => { beforeEach((): void => { - dispatch.mockClear() store.dispatch({ type: `DELETE_CACHE` }) + dispatch.mockClear() }) it(`allows creating nodes`, (): void => { @@ -80,7 +79,7 @@ describe(`Create and update nodes`, (): void => { children: [], parent: `test`, internal: { - contentDigest: `hasdfljds`, + contentDigest: `hasdfljds2`, type: `Test`, }, pickle: false, diff --git a/packages/gatsby/src/redux/actions/public.js b/packages/gatsby/src/redux/actions/public.js index c20966c2d675b..4a738b0e43c5b 100644 --- a/packages/gatsby/src/redux/actions/public.js +++ b/packages/gatsby/src/redux/actions/public.js @@ -480,11 +480,13 @@ ${reservedFields.map(f => ` * "${f}"`).join(`\n`)} let deleteActions let updateNodeAction + // marking internal-data-bridge as owner of SitePage instead of plugin that calls createPage if (oldNode && !hasNodeChanged(node.id, node.internal.contentDigest)) { updateNodeAction = { ...actionOptions, - plugin, + plugin: { name: `internal-data-bridge` }, type: `TOUCH_NODE`, + typeName: node.internal.type, payload: node.id, } } else { @@ -495,8 +497,9 @@ ${reservedFields.map(f => ` * "${f}"`).join(`\n`)} return { ...actionOptions, type: `DELETE_NODE`, - plugin, + plugin: { name: `internal-data-bridge` }, payload: node, + isRecursiveChildrenDelete: true, } } deleteActions = findChildren(oldNode.children) @@ -509,7 +512,7 @@ ${reservedFields.map(f => ` * "${f}"`).join(`\n`)} updateNodeAction = { ...actionOptions, type: `CREATE_NODE`, - plugin, + plugin: { name: `internal-data-bridge` }, oldNode, payload: node, } @@ -539,8 +542,6 @@ ${reservedFields.map(f => ` * "${f}"`).join(`\n`)} return actions } -const deleteNodeDeprecationWarningDisplayedMessages = new Set() - /** * Delete a node * @param {object} node A node object. See the "createNode" action for more information about the node object details. @@ -553,36 +554,15 @@ actions.deleteNode = (node: any, plugin?: Plugin) => { // Always get node from the store, as the node we get as an arg // might already have been deleted. const internalNode = getNode(id) - if (plugin) { - const pluginName = plugin.name - - if ( - internalNode && - typeOwners[internalNode.internal.type] && - typeOwners[internalNode.internal.type] !== pluginName - ) - throw new Error(stripIndent` - The plugin "${pluginName}" deleted a node of a type owned by another plugin. - - The node type "${internalNode.internal.type}" is owned by "${ - typeOwners[internalNode.internal.type] - }". - - The node object passed to "deleteNode": - - ${JSON.stringify(internalNode, null, 4)} - - The plugin deleting the node: - - ${JSON.stringify(plugin, null, 4)} - `) - } const createDeleteAction = node => { return { type: `DELETE_NODE`, plugin, payload: node, + // main node need to be owned by plugin that calls deleteNode + // child nodes should skip ownership check + isRecursiveChildrenDelete: node !== internalNode, } } @@ -613,8 +593,6 @@ function getNextNodeCounter() { return lastNodeCounter + 1 } -const typeOwners = {} - // memberof notation is added so this code can be referenced instead of the wrapper. /** * Create a new node. @@ -795,47 +773,6 @@ const createNode = ( const oldNode = getNode(node.id) - // Ensure the plugin isn't creating a node type owned by another - // plugin. Type "ownership" is first come first served. - if (plugin) { - const pluginName = plugin.name - - if (!typeOwners[node.internal.type]) - typeOwners[node.internal.type] = pluginName - else if (typeOwners[node.internal.type] !== pluginName) - throw new Error(stripIndent` - The plugin "${pluginName}" created a node of a type owned by another plugin. - - The node type "${node.internal.type}" is owned by "${ - typeOwners[node.internal.type] - }". - - If you copy and pasted code from elsewhere, you'll need to pick a new type name - for your new node(s). - - The node object passed to "createNode": - - ${JSON.stringify(node, null, 4)} - - The plugin creating the node: - - ${JSON.stringify(plugin, null, 4)} - `) - - // If the node has been created in the past, check that - // the current plugin is the same as the previous. - if (oldNode && oldNode.internal.owner !== pluginName) { - throw new Error( - stripIndent` - Nodes can only be updated by their owner. Node "${node.id}" is - owned by "${oldNode.internal.owner}" and another plugin "${pluginName}" - tried to update it. - - ` - ) - } - } - if (actionOptions.parentSpan) { actionOptions.parentSpan.setTag(`nodeId`, node.id) actionOptions.parentSpan.setTag(`nodeType`, node.id) @@ -850,6 +787,7 @@ const createNode = ( plugin, type: `TOUCH_NODE`, payload: node.id, + typeName: node.internal.type, } } else { // Remove any previously created descendant nodes as they're all due @@ -861,6 +799,7 @@ const createNode = ( type: `DELETE_NODE`, plugin, payload: node, + isRecursiveChildrenDelete: true, } } deleteActions = findChildren(oldNode.children) @@ -921,8 +860,6 @@ actions.createNode = } } -const touchNodeDeprecationWarningDisplayedMessages = new Set() - /** * "Touch" a node. Tells Gatsby a node still exists and shouldn't * be garbage collected. Primarily useful for source plugins fetching @@ -934,10 +871,6 @@ const touchNodeDeprecationWarningDisplayedMessages = new Set() * touchNode(node) */ actions.touchNode = (node: any, plugin?: Plugin) => { - if (node && !typeOwners[node.internal.type]) { - typeOwners[node.internal.type] = node.internal.owner - } - const nodeId = node?.id if (!nodeId) { @@ -949,6 +882,7 @@ actions.touchNode = (node: any, plugin?: Plugin) => { type: `TOUCH_NODE`, plugin, payload: nodeId, + typeName: node.internal.type, } } @@ -1513,6 +1447,31 @@ actions.unstable_createNodeManifest = ( } } +/** + * Marks a source plugin as "stateful" which disables automatically deleting untouched nodes. Stateful source plugins manage deleting their own nodes without stale node checks in Gatsby. + * Enabling this is a major performance improvement for source plugins that manage their own node deletion. It also lowers the total memory required by a source plugin. + * When using this action, check if it's supported first with `hasFeature('stateful-source-nodes')`, `hasFeature` is exported from `gatsby-plugin-utils`. + * + * @example + * import { hasFeature } from "gatsby-plugin-utils" + * + * exports.sourceNodes = ({ actions }) => { + * if (hasFeature(`stateful-source-nodes`)) { + * actions.enableStatefulSourceNodes() + * } else { + * // fallback to old behavior where all nodes are iterated on and touchNode is called. + * } + * } + * + * @param {void} $0 + */ +actions.enableStatefulSourceNodes = (plugin: Plugin) => { + return { + type: `ENABLE_STATEFUL_SOURCE_PLUGIN`, + plugin, + } +} + /** * Stores request headers for a given domain to be later used when making requests for Image CDN (and potentially other features). * diff --git a/packages/gatsby/src/redux/index.ts b/packages/gatsby/src/redux/index.ts index 8630d7b63257d..51a9db83fdaa3 100644 --- a/packages/gatsby/src/redux/index.ts +++ b/packages/gatsby/src/redux/index.ts @@ -110,6 +110,8 @@ export const saveState = (): void => { return writeToCache({ nodes: state.nodes, + typeOwners: state.typeOwners, + statefulSourcePlugins: state.statefulSourcePlugins, status: state.status, components: state.components, jobsV2: state.jobsV2, diff --git a/packages/gatsby/src/redux/reducers/index.ts b/packages/gatsby/src/redux/reducers/index.ts index 2c5eb5395214a..b23897cf61a3b 100644 --- a/packages/gatsby/src/redux/reducers/index.ts +++ b/packages/gatsby/src/redux/reducers/index.ts @@ -1,3 +1,4 @@ +import { typeOwnersReducer } from "./type-owners" import { nodesReducer } from "./nodes" import { reducer as logReducer } from "gatsby-cli/lib/reporter/redux/reducers/logs" import { pagesReducer } from "./pages" @@ -32,6 +33,7 @@ import { telemetryReducer } from "./telemetry" import { nodeManifestReducer } from "./node-manifest" import { reducer as pageTreeReducer } from "gatsby-cli/lib/reporter/redux/reducers/page-tree" import { setRequestHeadersReducer } from "./set-request-headers" +import { statefulSourcePluginsReducer } from "./stateful-source-plugins" import { slicesReducer } from "./slices" import { componentsUsingSlicesReducer } from "./components-using-slices" import { slicesByTemplateReducer } from "./slices-by-template" @@ -42,6 +44,7 @@ import { slicesByTemplateReducer } from "./slices-by-template" export { definitionsReducer as definitions, programReducer as program, + typeOwnersReducer as typeOwners, nodesReducer as nodes, nodesByTypeReducer as nodesByType, resolvedNodesCacheReducer as resolvedNodesCache, @@ -73,6 +76,7 @@ export { nodeManifestReducer as nodeManifests, pageTreeReducer as pageTree, setRequestHeadersReducer as requestHeaders, + statefulSourcePluginsReducer as statefulSourcePlugins, slicesReducer as slices, componentsUsingSlicesReducer as componentsUsingSlices, slicesByTemplateReducer as slicesByTemplate, diff --git a/packages/gatsby/src/redux/reducers/stateful-source-plugins.ts b/packages/gatsby/src/redux/reducers/stateful-source-plugins.ts new file mode 100644 index 0000000000000..86a6f44d33b15 --- /dev/null +++ b/packages/gatsby/src/redux/reducers/stateful-source-plugins.ts @@ -0,0 +1,19 @@ +import { IGatsbyState, IEnableStatefulSourcePluginAction } from "../types" + +/** + * Flags a source plugin as being "stateful" which means it manages its own data updates and Gatsby doesn't look for "stale" nodes after each `sourceNodes` run. + */ +export const statefulSourcePluginsReducer = ( + statefulSourcePlugins: IGatsbyState["statefulSourcePlugins"] = new Set(), + action: IEnableStatefulSourcePluginAction +): IGatsbyState["statefulSourcePlugins"] => { + switch (action.type) { + case `ENABLE_STATEFUL_SOURCE_PLUGIN`: { + statefulSourcePlugins.add(action.plugin.name) + + return statefulSourcePlugins + } + default: + return statefulSourcePlugins + } +} diff --git a/packages/gatsby/src/redux/reducers/type-owners.ts b/packages/gatsby/src/redux/reducers/type-owners.ts new file mode 100644 index 0000000000000..3a4dd902a8408 --- /dev/null +++ b/packages/gatsby/src/redux/reducers/type-owners.ts @@ -0,0 +1,133 @@ +import { stripIndent } from "common-tags" +import { reportOnce } from "../../utils/report-once" +import { + ActionsUnion, + IGatsbyNode, + IGatsbyPlugin, + IGatsbyState, +} from "../types" + +function setTypeOwner( + typeName: string, + plugin: IGatsbyPlugin, + typeOwners: IGatsbyState["typeOwners"], + fullNode?: IGatsbyNode +): IGatsbyState["typeOwners"] { + const ownerName = plugin?.name || fullNode?.internal.owner + + if (!ownerName) { + reportOnce(`No plugin owner for type "${typeName}"`) + return typeOwners + } + + const existingOwnerTypes = typeOwners.pluginsToTypes.get(ownerName) + + if (!existingOwnerTypes) { + typeOwners.pluginsToTypes.set(ownerName, new Set([typeName])) + } else { + existingOwnerTypes.add(typeName) + } + + const existingTypeOwnerNameByTypeName = + typeOwners.typesToPlugins.get(typeName) + + if (!existingTypeOwnerNameByTypeName) { + typeOwners.typesToPlugins.set(typeName, ownerName) + } else if (existingTypeOwnerNameByTypeName !== ownerName) { + throw new Error(stripIndent` + The plugin "${ownerName}" created a node of a type owned by another plugin. + + The node type "${typeName}" is owned by "${existingTypeOwnerNameByTypeName}". + + If you copy and pasted code from elsewhere, you'll need to pick a new type name + for your new node(s). + + ${ + fullNode + ? stripIndent( + `The node object passed to "createNode": + + ${JSON.stringify(fullNode, null, 4)}\n` + ) + : `` + } + The plugin creating the node: + + ${JSON.stringify(plugin, null, 4)} + `) + } + + return typeOwners +} + +export const typeOwnersReducer = ( + typeOwners: IGatsbyState["typeOwners"] = { + pluginsToTypes: new Map(), + typesToPlugins: new Map(), + } as IGatsbyState["typeOwners"], + action: ActionsUnion +): IGatsbyState["typeOwners"] => { + switch (action.type) { + case `DELETE_NODE`: { + const { plugin, payload: internalNode } = action + + if (plugin && internalNode && !action.isRecursiveChildrenDelete) { + const pluginName = plugin.name + + const previouslyRecordedOwnerName = typeOwners.typesToPlugins.get( + internalNode.internal.type + ) + + if ( + internalNode && + previouslyRecordedOwnerName && + previouslyRecordedOwnerName !== pluginName + ) { + throw new Error(stripIndent` + The plugin "${pluginName}" deleted a node of a type owned by another plugin. + + The node type "${ + internalNode.internal.type + }" is owned by "${previouslyRecordedOwnerName}". + + The node object passed to "deleteNode": + + ${JSON.stringify(internalNode, null, 4)} + + The plugin deleting the node: + + ${JSON.stringify(plugin, null, 4)} + `) + } + } + + return typeOwners + } + case `TOUCH_NODE`: { + return setTypeOwner(action.typeName, action.plugin, typeOwners) + } + case `CREATE_NODE`: { + const { plugin, oldNode, payload: node } = action + const { owner, type } = node.internal + + setTypeOwner(type, plugin, typeOwners, node) + + // If the node has been created in the past, check that + // the current plugin is the same as the previous. + if (oldNode && oldNode.internal.owner !== owner) { + throw new Error( + stripIndent` + Nodes can only be updated by their owner. Node "${node.id}" is + owned by "${oldNode.internal.owner}" and another plugin "${owner}" + tried to update it. + ` + ) + } + + return typeOwners + } + + default: + return typeOwners + } +} diff --git a/packages/gatsby/src/redux/types.ts b/packages/gatsby/src/redux/types.ts index 87867a5079886..127d4a12b884b 100644 --- a/packages/gatsby/src/redux/types.ts +++ b/packages/gatsby/src/redux/types.ts @@ -262,8 +262,16 @@ export interface IGatsbyState { nodesByType: Map resolvedNodesCache: Map // TODO nodesTouched: Set + typeOwners: { + pluginsToTypes: Map< + IGatsbyPlugin[`name`], + Set + > + typesToPlugins: Map + } nodeManifests: Array requestHeaders: Map + statefulSourcePlugins: Set telemetry: ITelemetry lastAction: ActionsUnion flattenedPlugins: Array<{ @@ -397,6 +405,8 @@ export type GatsbyStateKeys = keyof IGatsbyState export interface ICachedReduxState { nodes?: IGatsbyState["nodes"] + typeOwners?: IGatsbyState["typeOwners"] + statefulSourcePlugins?: IGatsbyState["statefulSourcePlugins"] status: IGatsbyState["status"] components: IGatsbyState["components"] jobsV2: IGatsbyState["jobsV2"] @@ -487,6 +497,7 @@ export type ActionsUnion = | ISetJobV2Context | IClearJobV2Context | ISetDomainRequestHeaders + | IEnableStatefulSourcePluginAction | ICreateSliceAction | IDeleteSliceAction | ISetSSRTemplateWebpackCompilationHashAction @@ -991,6 +1002,7 @@ export interface ICreateNodeAction { traceId: string parentSpan: Span followsSpan: Span + plugin: IGatsbyPlugin } export interface IAddFieldToNodeAction { @@ -1008,6 +1020,8 @@ export interface IDeleteNodeAction { type: `DELETE_NODE` // FIXME: figure out why payload can be undefined here payload: IGatsbyNode | void + plugin: IGatsbyPlugin + isRecursiveChildrenDelete?: boolean } export interface ISetSiteFlattenedPluginsAction { @@ -1046,6 +1060,8 @@ export interface IAddSliceDataStatsAction { export interface ITouchNodeAction { type: `TOUCH_NODE` payload: Identifier + typeName: IGatsbyNode["internal"]["type"] + plugin: IGatsbyPlugin } interface IStartIncrementalInferenceAction { @@ -1136,6 +1152,11 @@ export interface ISetDomainRequestHeaders { } } +export interface IEnableStatefulSourcePluginAction { + type: `ENABLE_STATEFUL_SOURCE_PLUGIN` + plugin: IGatsbyPlugin +} + export interface IProcessGatsbyImageSourceUrlAction { type: `PROCESS_GATSBY_IMAGE_SOURCE_URL` payload: { diff --git a/packages/gatsby/src/services/__tests__/source-nodes.ts b/packages/gatsby/src/services/__tests__/source-nodes.ts new file mode 100644 index 0000000000000..6b66a4947b73d --- /dev/null +++ b/packages/gatsby/src/services/__tests__/source-nodes.ts @@ -0,0 +1,119 @@ +import { sourceNodes } from "../source-nodes" +import { is } from "../../utils/source-nodes" +import { store } from "../../redux" +import * as apiRunnerNode from "../../utils/api-runner-node" +import { actions } from "../../redux/actions" +import { getDataStore } from "../../datastore" + +jest.mock(`../../utils/api-runner-node`) + +const mockAPIs = {} + +const testPlugin = { + name: `gatsby-source-test`, + version: `1.0.0`, +} + +const sourcePluginSourcingFunctions: Array<(api: any) => void> = [] +function runSourceNodesWithPluginImplementation( + fn: (api: any) => void +): ReturnType { + sourcePluginSourcingFunctions.push(fn) + return sourceNodes({ store }) +} + +describe(`Stateful source plugins`, () => { + beforeAll(() => { + ;(apiRunnerNode as jest.MockedFunction).mockImplementation( + (apiName, opts = {}) => { + if (mockAPIs[apiName]) { + return mockAPIs[apiName]( + { + actions: Object.keys(actions).reduce((acc, actionName) => { + acc[actionName] = (...args): any => + // add test plugin to all actions + store.dispatch(actions[actionName](...args, testPlugin, opts)) + return acc + }, {}), + }, + {} + ) + } + return undefined + } + ) + + mockAPIs[`sourceNodes`] = jest.fn(api => { + while (sourcePluginSourcingFunctions.length) { + sourcePluginSourcingFunctions.shift()?.(api) + } + }) + }) + + beforeEach(() => { + mockAPIs[`sourceNodes`].mockClear() + store.dispatch({ type: `DELETE_CACHE` }) + }) + + it.each([ + [ + `Stale nodes are deleted for plugins using touchNode`, + { needToTouchNodes: true }, + ], + [ + `Stale nodes are not deleted for plugins using actions.enableStatefulSourceNodes()`, + { needToTouchNodes: false }, + ], + ])(`%s`, async (_, { needToTouchNodes }) => { + const nodes = [ + { + id: `1`, + internal: { + type: `Test`, + contentDigest: `1`, + }, + }, + { + id: `2`, + internal: { + type: `Test`, + contentDigest: `1`, + }, + }, + ] + + await runSourceNodesWithPluginImplementation(({ actions }) => { + if (!needToTouchNodes) { + actions.enableStatefulSourceNodes() + } + + nodes.forEach(node => actions.createNode(node)) + }) + + const lmdb = getDataStore() + const allNodesInFirstSource = lmdb.getNodes() + expect(allNodesInFirstSource.length).toBe(2) + + // simulate a new process start up where the cache is warm and no nodes are touched yet + is.initialSourceNodesOfCurrentNodeProcess = true + store.getState().nodesTouched = new Set() + await runSourceNodesWithPluginImplementation(api => { + if (needToTouchNodes) { + api.actions.touchNode(nodes[1]) + } + }) + is.initialSourceNodesOfCurrentNodeProcess = false + + const allNodesInSecondSource = lmdb.getNodes() + + if (needToTouchNodes) { + // stale node should be deleted. Only one was touched and two were created + expect(allNodesInSecondSource.length).toBe(1) + expect(allNodesInSecondSource[0].id).toBe(`2`) + } else { + // stale node should not be deleted + expect(allNodesInSecondSource.length).toBe(2) + expect(allNodesInSecondSource[0].id).toBe(`1`) + } + }) +}) diff --git a/packages/gatsby/src/utils/api-runner-node.js b/packages/gatsby/src/utils/api-runner-node.js index a2b95f6cd6e5b..e3cca8d7f39ad 100644 --- a/packages/gatsby/src/utils/api-runner-node.js +++ b/packages/gatsby/src/utils/api-runner-node.js @@ -33,6 +33,7 @@ const { getNonGatsbyCodeFrameFormatted } = require(`./stack-trace-utils`) const { trackBuildError, decorateEvent } = require(`gatsby-telemetry`) import errorParser from "./api-runner-error-parser" import { wrapNode, wrapNodes } from "./detect-node-mutations" +import { reportOnce } from "./report-once" // Override createContentDigest to remove autogenerated data from nodes to // ensure consistent digests. @@ -98,13 +99,21 @@ const doubleBind = (boundActionCreators, api, plugin, actionOptions) => { const boundActionCreator = boundActionCreators[key] if (typeof boundActionCreator === `function`) { doubleBoundActionCreators[key] = (...args) => { + if (args.length === 0) { + return boundActionCreator(plugin, actionOptions) + } // Let action callers override who the plugin is. Shouldn't be // used that often. - if (args.length === 1) { + else if (args.length === 1) { return boundActionCreator(args[0], plugin, actionOptions) } else if (args.length === 2) { return boundActionCreator(args[0], args[1], actionOptions) } + + reportOnce( + `Unhandled redux action: ${key}, in plugin: ${plugin.name}` + ) + return undefined } } diff --git a/packages/gatsby/src/utils/source-nodes.ts b/packages/gatsby/src/utils/source-nodes.ts index a748cf3a268f3..d3b4ec0f81608 100644 --- a/packages/gatsby/src/utils/source-nodes.ts +++ b/packages/gatsby/src/utils/source-nodes.ts @@ -13,21 +13,23 @@ const { deleteNode } = actions * Finds the name of all plugins which implement Gatsby APIs that * may create nodes, but which have not actually created any nodes. */ -function discoverPluginsWithoutNodes( - storeState: IGatsbyState, - nodes: GatsbyIterable -): Array { +function discoverPluginNamesWithoutNodes(): Array { + const { typeOwners, flattenedPlugins } = store.getState() + // Find out which plugins own already created nodes - const nodeOwnerSet = new Set([`default-site-plugin`]) - nodes.forEach(node => nodeOwnerSet.add(node.internal.owner)) + const pluginNamesThatCreatedNodes = new Set([ + `default-site-plugin`, + // each plugin that owns node types created a node at some point + ...Array.from(typeOwners.pluginsToTypes.keys()), + ]) - return storeState.flattenedPlugins + return flattenedPlugins .filter( plugin => // "Can generate nodes" plugin.nodeAPIs.includes(`sourceNodes`) && // "Has not generated nodes" - !nodeOwnerSet.has(plugin.name) + !pluginNamesThatCreatedNodes.has(plugin.name) ) .map(plugin => plugin.name) } @@ -35,13 +37,10 @@ function discoverPluginsWithoutNodes( /** * Warn about plugins that should have created nodes but didn't. */ -function warnForPluginsWithoutNodes( - state: IGatsbyState, - nodes: GatsbyIterable -): void { - const pluginsWithNoNodes = discoverPluginsWithoutNodes(state, nodes) +function warnForPluginsWithoutNodes(): void { + const pluginNamesWithNoNodes = discoverPluginNamesWithoutNodes() - pluginsWithNoNodes.map(name => + pluginNamesWithNoNodes.map(name => report.warn( `The ${name} plugin has generated no Gatsby nodes. Do you need it? This could also suggest the plugin is misconfigured.` ) @@ -74,23 +73,67 @@ function getStaleNodes( ) } + if (state.statefulSourcePlugins.has(rootNode.internal.owner)) { + return false + } + return !state.nodesTouched.has(rootNode.id) }) } /** - * Find all stale nodes and delete them + * Find all stale nodes and delete them unless the node type has been opted out of stale node garbage collection. */ -function deleteStaleNodes( - state: IGatsbyState, - nodes: GatsbyIterable -): void { - const staleNodes = getStaleNodes(state, nodes) +async function deleteStaleNodes( + previouslyExistingNodeTypeNames: Array +): Promise { + const state = store.getState() + + let deleteCount = 0 + + const cleanupStaleNodesActivity = + report.createProgress(`Clean up stale nodes`) + + cleanupStaleNodesActivity.start() + + const { typeOwners, statefulSourcePlugins } = state + + for (const typeName of previouslyExistingNodeTypeNames) { + const pluginName = typeOwners.typesToPlugins.get(typeName) + + // no need to check this type if its owner has declared its a stateful source plugin + if (pluginName && statefulSourcePlugins.has(pluginName)) { + continue + } - staleNodes.forEach(node => store.dispatch(deleteNode(node))) + report.verbose(`Checking for stale ${typeName} nodes`) + + const nodes = getDataStore().iterateNodesByType(typeName) + const staleNodes = getStaleNodes(state, nodes) + + for (const node of staleNodes) { + store.dispatch(deleteNode(node)) + cleanupStaleNodesActivity.tick() + + if (++deleteCount % 5000) { + // dont block event loop + await new Promise(res => { + setImmediate(() => { + res(null) + }) + }) + } + } + } + + cleanupStaleNodesActivity.end() +} + +// exported for unit tests purposes only to allow internal module state resets +export const is = { + initialSourceNodesOfCurrentNodeProcess: true, } -let isInitialSourcing = true let sourcingCount = 0 export default async ({ webhookBody, @@ -103,9 +146,17 @@ export default async ({ parentSpan?: Span deferNodeMutation?: boolean }): Promise => { - const traceId = isInitialSourcing + const traceId = is.initialSourceNodesOfCurrentNodeProcess ? `initial-sourceNodes` : `sourceNodes #${sourcingCount}` + + // this is persisted to cache between builds, so it will always have an up to date list of previously created types by plugin name + const { typeOwners } = store.getState() + + const previouslyExistingNodeTypeNames: Array = Array.from( + typeOwners.typesToPlugins.keys() || [] + ) + await sourceNodesApiRunner({ traceId, deferNodeMutation, @@ -116,15 +167,18 @@ export default async ({ await getDataStore().ready() - // We only warn for plugins w/o nodes and delete stale nodes on the first sourcing. - if (isInitialSourcing) { - const state = store.getState() - const nodes = getDataStore().iterateNodes() + // We only warn for plugins w/o nodes and delete stale nodes on the first sourceNodes call of the current process. + if (is.initialSourceNodesOfCurrentNodeProcess) { + is.initialSourceNodesOfCurrentNodeProcess = false - warnForPluginsWithoutNodes(state, nodes) + warnForPluginsWithoutNodes() - deleteStaleNodes(state, nodes) - isInitialSourcing = false + if ( + // if this is the very first source and no types existed before this sourceNodes run, there's no need to check for stale nodes. They wont be stale because they were just created. Only check for stale nodes in node types that never existed before. + previouslyExistingNodeTypeNames.length > 0 + ) { + await deleteStaleNodes(previouslyExistingNodeTypeNames) + } } store.dispatch(actions.apiFinished({ apiName: `sourceNodes` })) diff --git a/yarn.lock b/yarn.lock index 93168c96efd34..4012a3f950c6b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13464,7 +13464,7 @@ is-negative-zero@^2.0.2: resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.2.tgz#7bf6f03a28003b8b3965de3ac26f664d765f3150" integrity sha512-dqJvarLawXsFbNDeJW7zAz8ItJ9cd28YufuuFzh0G8pNHjJMnY08Dv7sYX2uF5UpQOwieAeOExEYAWWfu7ZZUA== -is-node-process@^1.0.1, is-node-process@^1.2.0: +is-node-process@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/is-node-process/-/is-node-process-1.2.0.tgz#ea02a1b90ddb3934a19aea414e88edef7e11d134" integrity sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw== @@ -16849,31 +16849,6 @@ msgpackr@^1.5.4: optionalDependencies: msgpackr-extract "^1.0.14" -msw@^0.49.3: - version "0.49.3" - resolved "https://registry.yarnpkg.com/msw/-/msw-0.49.3.tgz#c4ca29eddda3e82ad9e36918dda4a7428eddd7fe" - integrity sha512-kRCbDNbNnRq5LC1H/NUceZlrPAvSrMH6Or0mirIuH69NY84xwDruPn/hkXTovIK1KwDwbk+ZdoSyJlpiekLxEA== - dependencies: - "@mswjs/cookies" "^0.2.2" - "@mswjs/interceptors" "^0.17.5" - "@open-draft/until" "^1.0.3" - "@types/cookie" "^0.4.1" - "@types/js-levenshtein" "^1.1.1" - chalk "4.1.1" - chokidar "^3.4.2" - cookie "^0.4.2" - graphql "^15.0.0 || ^16.0.0" - headers-polyfill "^3.1.0" - inquirer "^8.2.0" - is-node-process "^1.0.1" - js-levenshtein "^1.1.6" - node-fetch "^2.6.7" - outvariant "^1.3.0" - path-to-regexp "^6.2.0" - strict-event-emitter "^0.4.3" - type-fest "^2.19.0" - yargs "^17.3.1" - msw@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/msw/-/msw-1.2.1.tgz#9dd347583eeba5e5c7f33b54be5600a899dc61bd" @@ -17683,7 +17658,7 @@ osenv@^0.1.4, osenv@^0.1.5: os-homedir "^1.0.0" os-tmpdir "^1.0.0" -outvariant@^1.2.1, outvariant@^1.3.0, outvariant@^1.4.0: +outvariant@^1.2.1, outvariant@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/outvariant/-/outvariant-1.4.0.tgz#e742e4bda77692da3eca698ef5bfac62d9fba06e" integrity sha512-AlWY719RF02ujitly7Kk/0QlV+pXGFDHrHf9O2OKqyqgBieaPOIeuSkL8sRK6j2WK+/ZAURq2kZsY0d8JapUiw==