diff --git a/packages/gatsby-source-contentful/src/__tests__/download-contentful-assets.js b/packages/gatsby-source-contentful/src/__tests__/download-contentful-assets.js index 72484e5b5a344..132bf18831a7a 100644 --- a/packages/gatsby-source-contentful/src/__tests__/download-contentful-assets.js +++ b/packages/gatsby-source-contentful/src/__tests__/download-contentful-assets.js @@ -66,17 +66,15 @@ describe(`downloadContentfulAssets`, () => { const assetNodes = [] for (const assetItem of fixtures) { assetNodes.push( - ...(await Promise.all( - createAssetNodes({ - assetItem, - createNode, - createNodeId, - defaultLocale, - locales, - space, - pluginConfig, - }) - )) + ...(await createAssetNodes({ + assetItem, + createNode, + createNodeId, + defaultLocale, + locales, + space, + pluginConfig, + })) ) } diff --git a/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js b/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js index f4e77f963eecb..cc5dfb95ceec5 100644 --- a/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js +++ b/packages/gatsby-source-contentful/src/__tests__/gatsby-node.js @@ -6,6 +6,7 @@ import { sourceNodes, onPreInit, } from "../gatsby-node" +import { existingNodes, is, memoryNodeCounts } from "../backreferences" import { fetchContent, fetchContentTypes } from "../fetch" import { makeId } from "../normalize" @@ -59,7 +60,12 @@ describe(`gatsby-node`, () => { const actions = { createTypes: jest.fn(), - setPluginStatus: jest.fn(), + setPluginStatus: jest.fn(pluginStatusObject => { + pluginStatus = { + ...pluginStatus, + ...pluginStatusObject, + } + }), createNode: jest.fn(async node => { // similar checks as gatsby does if (!_.isPlainObject(node)) { @@ -99,9 +105,20 @@ describe(`gatsby-node`, () => { }), buildInterfaceType: jest.fn(), } + let pluginStatus = {} + const resetPluginStatus = () => { + pluginStatus = {} + } const store = { getState: jest.fn(() => { - return { program: { directory: process.cwd() }, status: {} } + return { + program: { directory: process.cwd() }, + status: { + plugins: { + [`gatsby-source-contentful`]: pluginStatus, + }, + }, + } }), } const cache = createMockCache() @@ -404,7 +421,11 @@ describe(`gatsby-node`, () => { }) } - beforeEach(() => { + beforeEach(async () => { + existingNodes.clear() + is.firstSourceNodesCallOfCurrentNodeProcess = true + resetPluginStatus() + // @ts-ignore fetchContent.mockClear() // @ts-ignore diff --git a/packages/gatsby-source-contentful/src/backreferences.js b/packages/gatsby-source-contentful/src/backreferences.js new file mode 100644 index 0000000000000..c5feef0c54aab --- /dev/null +++ b/packages/gatsby-source-contentful/src/backreferences.js @@ -0,0 +1,139 @@ +// @ts-check +import { hasFeature } from "gatsby-plugin-utils/index" +import { getDataStore } from "gatsby/dist/datastore" +import { untilNextEventLoopTick } from "./utils" + +// Array of all existing Contentful nodes. Make it global and incrementally update it because it's hella slow to recreate this on every data update for large sites. +export const existingNodes = new Map() + +let allNodesLoopCount = 0 + +// "is" === object so it can be overridden by tests +export const is = { + firstSourceNodesCallOfCurrentNodeProcess: true, +} + +export async function getExistingCachedNodes({ + actions, + getNode, + pluginConfig, +}) { + const { touchNode } = actions + + const needToTouchNodes = + !hasFeature(`stateful-source-nodes`) && + is.firstSourceNodesCallOfCurrentNodeProcess + + if (existingNodes.size === 0) { + memoryNodeCountsBySysType.Asset = 0 + memoryNodeCountsBySysType.Entry = 0 + + const dataStore = getDataStore() + const allNodeTypeNames = Array.from(dataStore.getTypes()) + + for (const typeName of allNodeTypeNames) { + const typeNodes = dataStore.iterateNodesByType(typeName) + + const firstNodeOfType = Array.from(typeNodes.slice(0, 1))[0] + + if ( + !firstNodeOfType || + firstNodeOfType.internal.owner !== `gatsby-source-contentful` + ) { + continue + } + + for (const node of typeNodes) { + if (needToTouchNodes) { + touchNode(node) + + if (node?.fields?.includes(`localFile`)) { + // Prevent GraphQL type inference from crashing on this property + const fullNode = getNode(node.id) + const localFileNode = getNode(fullNode.fields.localFile) + touchNode(localFileNode) + } + } + + if (++allNodesLoopCount % 5000 === 0) { + // dont block the event loop + await untilNextEventLoopTick() + } + + addNodeToExistingNodesCache(node) + } + + // dont block the event loop + await untilNextEventLoopTick() + } + } + + is.firstSourceNodesCallOfCurrentNodeProcess = false + + return { + existingNodes, + memoryNodeCountsBySysType, + } +} + +const memoryNodeCountsBySysType = { + Asset: 0, + Entry: 0, +} + +// store only the fields we need to compare to reduce memory usage. if a node is updated we'll use getNode to grab the whole node before updating it +export function addNodeToExistingNodesCache(node) { + if (node.internal.type === `ContentfulTag`) { + return + } + + if ( + node.sys.type in memoryNodeCountsBySysType && + !existingNodes.has(node.id) + ) { + memoryNodeCountsBySysType[node.sys.type] ||= 0 + memoryNodeCountsBySysType[node.sys.type]++ + } + + const cacheNode = { + id: node.id, + contentful_id: node.contentful_id, + sys: { + type: node.sys.type, + }, + node_locale: node.node_locale, + children: node.children, + internal: { + owner: node.internal.owner, + }, + __memcache: true, + } + + for (const key of Object.keys(node)) { + if (key.endsWith(`___NODE`)) { + cacheNode[key] = node[key] + } + } + + existingNodes.set(node.id, cacheNode) +} + +export function removeNodeFromExistingNodesCache(node) { + if (node.internal.type === `ContentfulTag`) { + return + } + + if ( + node.sys.type in memoryNodeCountsBySysType && + existingNodes.has(node.id) + ) { + memoryNodeCountsBySysType[node.sys.type] ||= 0 + memoryNodeCountsBySysType[node.sys.type]-- + + if (memoryNodeCountsBySysType[node.sys.type] < 0) { + memoryNodeCountsBySysType[node.sys.type] = 0 + } + } + + existingNodes.delete(node.id) +} diff --git a/packages/gatsby-source-contentful/src/normalize.js b/packages/gatsby-source-contentful/src/normalize.js index 39428ee912a2b..41c42c601f64c 100644 --- a/packages/gatsby-source-contentful/src/normalize.js +++ b/packages/gatsby-source-contentful/src/normalize.js @@ -3,10 +3,12 @@ import stringify from "json-stringify-safe" import _ from "lodash" import { getGatsbyVersion } from "gatsby-core-utils" import { lt, prerelease } from "semver" +import fastq from "fastq" const typePrefix = `Contentful` export const makeTypeName = type => _.upperFirst(_.camelCase(`${typePrefix} ${type}`)) +export const assetTypeName = makeTypeName(`Asset`) const GATSBY_VERSION_MANIFEST_V2 = `4.3.0` const gatsbyVersion = @@ -79,7 +81,7 @@ export const buildEntryList = ({ contentTypeItems, currentSyncData }) => { export const buildResolvableSet = ({ entryList, - existingNodes = [], + existingNodes = new Map(), assets = [], }) => { const resolvable = new Set() @@ -354,7 +356,55 @@ function contentfulCreateNodeManifest({ } } -export const createNodesForContentType = ({ +function makeQueuedCreateNode({ nodeCount, createNode }) { + if (nodeCount > 5000) { + let createdNodeCount = 0 + + const createNodesQueue = fastq((node, cb) => { + function runCreateNode() { + const maybeNodePromise = createNode(node) + + // checking for `.then` is vastly more performant than using `instanceof Promise` + if (`then` in maybeNodePromise) { + maybeNodePromise.then(() => { + cb(null) + }) + } else { + cb(null) + } + } + + if (++createdNodeCount % 100 === 0) { + setImmediate(() => { + runCreateNode() + }) + } else { + runCreateNode() + } + }, 10) + + const queueFinished = new Promise(resolve => { + createNodesQueue.drain = () => { + resolve(null) + } + }) + + return { + create: (node, callback) => createNodesQueue.push(node, callback), + createNodesPromise: queueFinished, + } + } else { + const nodePromises = [] + const queueFinished = () => Promise.all(nodePromises) + + return { + create: node => nodePromises.push(createNode(node)), + createNodesPromise: queueFinished(), + } + } +} + +export const createNodesForContentType = async ({ contentTypeItem, restrictedNodeFields, conflictFieldPrefix, @@ -371,6 +421,11 @@ export const createNodesForContentType = ({ useNameForId, pluginConfig, }) => { + const { create, createNodesPromise } = makeQueuedCreateNode({ + nodeCount: entries.length, + createNode, + }) + // Establish identifier for content type // Use `name` if specified, otherwise, use internal id (usually a natural-language constant, // but sometimes a base62 uuid generated by Contentful, hence the option) @@ -381,8 +436,6 @@ export const createNodesForContentType = ({ contentTypeItemId = contentTypeItem.sys.id } - const createNodePromises = [] - // Create a node for the content type const contentTypeNode = { id: createNodeId(contentTypeItemId), @@ -400,7 +453,7 @@ export const createNodesForContentType = ({ }, } - createNodePromises.push(createNode(contentTypeNode)) + create(contentTypeNode) locales.forEach(locale => { const localesFallback = buildFallbackChain(locales) @@ -429,243 +482,266 @@ export const createNodesForContentType = ({ const childrenNodes = [] // First create nodes for each of the entries of that content type - const entryNodes = entries - .map(entryItem => { - const entryNodeId = mId( - space.sys.id, - entryItem.sys.id, - entryItem.sys.type - ) - - const existingNode = getNode(entryNodeId) - if (existingNode?.updatedAt === entryItem.sys.updatedAt) { - // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value - // of an entry did not change, then we can trust that none of its children were changed either. - return null - } + const entryNodes = entries.map(entryItem => { + const entryNodeId = mId( + space.sys.id, + entryItem.sys.id, + entryItem.sys.type + ) - // Get localized fields. - const entryItemFields = _.mapValues(entryItem.fields, (v, k) => { - const fieldProps = contentTypeItem.fields.find( - field => field.id === k - ) + const existingNode = getNode(entryNodeId) + if (existingNode?.updatedAt === entryItem.sys.updatedAt) { + // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value + // of an entry did not change, then we can trust that none of its children were changed either. + return null + } - const localizedField = fieldProps.localized - ? getField(v) - : v[defaultLocale] + // Get localized fields. + const entryItemFields = _.mapValues(entryItem.fields, (v, k) => { + const fieldProps = contentTypeItem.fields.find(field => field.id === k) - return localizedField - }) + const localizedField = fieldProps.localized + ? getField(v) + : v[defaultLocale] - // Prefix any conflicting fields - // https://github.com/gatsbyjs/gatsby/pull/1084#pullrequestreview-41662888 - conflictFields.forEach(conflictField => { - entryItemFields[`${conflictFieldPrefix}${conflictField}`] = - entryItemFields[conflictField] - delete entryItemFields[conflictField] - }) + return localizedField + }) - // Add linkages to other nodes based on foreign references - Object.keys(entryItemFields).forEach(entryItemFieldKey => { - if (entryItemFields[entryItemFieldKey]) { - const entryItemFieldValue = entryItemFields[entryItemFieldKey] - if (Array.isArray(entryItemFieldValue)) { - if (entryItemFieldValue[0]?.sys?.type === `Link`) { - // Check if there are any values in entryItemFieldValue to prevent - // creating an empty node field in case when original key field value - // is empty due to links to missing entities - const resolvableEntryItemFieldValue = entryItemFieldValue - .filter(function (v) { - return resolvable.has( - `${v.sys.id}___${v.sys.linkType || v.sys.type}` - ) - }) - .map(function (v) { - return mId( - space.sys.id, - v.sys.id, - v.sys.linkType || v.sys.type - ) - }) - if (resolvableEntryItemFieldValue.length !== 0) { - entryItemFields[`${entryItemFieldKey}___NODE`] = - resolvableEntryItemFieldValue - } + // Prefix any conflicting fields + // https://github.com/gatsbyjs/gatsby/pull/1084#pullrequestreview-41662888 + conflictFields.forEach(conflictField => { + entryItemFields[`${conflictFieldPrefix}${conflictField}`] = + entryItemFields[conflictField] + delete entryItemFields[conflictField] + }) - delete entryItemFields[entryItemFieldKey] - } - } else if (entryItemFieldValue?.sys?.type === `Link`) { - if ( - resolvable.has( - `${entryItemFieldValue.sys.id}___${ - entryItemFieldValue.sys.linkType || - entryItemFieldValue.sys.type - }` - ) - ) { - entryItemFields[`${entryItemFieldKey}___NODE`] = mId( - space.sys.id, - entryItemFieldValue.sys.id, - entryItemFieldValue.sys.linkType || - entryItemFieldValue.sys.type - ) + // Add linkages to other nodes based on foreign references + Object.keys(entryItemFields).forEach(entryItemFieldKey => { + if (entryItemFields[entryItemFieldKey]) { + const entryItemFieldValue = entryItemFields[entryItemFieldKey] + if (Array.isArray(entryItemFieldValue)) { + if (entryItemFieldValue[0]?.sys?.type === `Link`) { + // Check if there are any values in entryItemFieldValue to prevent + // creating an empty node field in case when original key field value + // is empty due to links to missing entities + const resolvableEntryItemFieldValue = entryItemFieldValue + .filter(function (v) { + return resolvable.has( + `${v.sys.id}___${v.sys.linkType || v.sys.type}` + ) + }) + .map(function (v) { + return mId( + space.sys.id, + v.sys.id, + v.sys.linkType || v.sys.type + ) + }) + if (resolvableEntryItemFieldValue.length !== 0) { + entryItemFields[`${entryItemFieldKey}___NODE`] = + resolvableEntryItemFieldValue } + delete entryItemFields[entryItemFieldKey] } + } else if (entryItemFieldValue?.sys?.type === `Link`) { + if ( + resolvable.has( + `${entryItemFieldValue.sys.id}___${ + entryItemFieldValue.sys.linkType || + entryItemFieldValue.sys.type + }` + ) + ) { + entryItemFields[`${entryItemFieldKey}___NODE`] = mId( + space.sys.id, + entryItemFieldValue.sys.id, + entryItemFieldValue.sys.linkType || entryItemFieldValue.sys.type + ) + } + delete entryItemFields[entryItemFieldKey] } - }) + } + }) - // Add reverse linkages if there are any for this node - const foreignReferences = - foreignReferenceMap[`${entryItem.sys.id}___${entryItem.sys.type}`] - if (foreignReferences) { - foreignReferences.forEach(foreignReference => { - const existingReference = entryItemFields[foreignReference.name] - if (existingReference) { - // If the existing reference is a string, we're dealing with a - // many-to-one reference which has already been recorded, so we can - // skip it. However, if it is an array, add it: - if (Array.isArray(existingReference)) { - entryItemFields[foreignReference.name].push( - mId( - foreignReference.spaceId, - foreignReference.id, - foreignReference.type - ) - ) - } - } else { - // If there is one foreign reference, there can be many. - // Best to be safe and put it in an array to start with. - entryItemFields[foreignReference.name] = [ + // Add reverse linkages if there are any for this node + const foreignReferences = + foreignReferenceMap[`${entryItem.sys.id}___${entryItem.sys.type}`] + if (foreignReferences) { + foreignReferences.forEach(foreignReference => { + const existingReference = entryItemFields[foreignReference.name] + if (existingReference) { + // If the existing reference is a string, we're dealing with a + // many-to-one reference which has already been recorded, so we can + // skip it. However, if it is an array, add it: + if (Array.isArray(existingReference)) { + entryItemFields[foreignReference.name].push( mId( foreignReference.spaceId, foreignReference.id, foreignReference.type - ), - ] + ) + ) } - }) - } + } else { + // If there is one foreign reference, there can be many. + // Best to be safe and put it in an array to start with. + entryItemFields[foreignReference.name] = [ + mId( + foreignReference.spaceId, + foreignReference.id, + foreignReference.type + ), + ] + } + }) + } - let entryNode = { - id: entryNodeId, - spaceId: space.sys.id, - contentful_id: entryItem.sys.id, - createdAt: entryItem.sys.createdAt, - updatedAt: entryItem.sys.updatedAt, - parent: null, - children: [], - internal: { - type: `${makeTypeName(contentTypeItemId)}`, - }, - sys: { - type: entryItem.sys.type, - }, - } + let entryNode = { + id: entryNodeId, + spaceId: space.sys.id, + contentful_id: entryItem.sys.id, + createdAt: entryItem.sys.createdAt, + updatedAt: entryItem.sys.updatedAt, + parent: null, + children: [], + internal: { + type: `${makeTypeName(contentTypeItemId)}`, + }, + sys: { + type: entryItem.sys.type, + }, + } - contentfulCreateNodeManifest({ - pluginConfig, - entryItem, - entryNode, - space, - unstable_createNodeManifest, - }) + contentfulCreateNodeManifest({ + pluginConfig, + entryItem, + entryNode, + space, + unstable_createNodeManifest, + }) - // Revision applies to entries, assets, and content types - if (entryItem.sys.revision) { - entryNode.sys.revision = entryItem.sys.revision - } + // Revision applies to entries, assets, and content types + if (entryItem.sys.revision) { + entryNode.sys.revision = entryItem.sys.revision + } - // Content type applies to entries only - if (entryItem.sys.contentType) { - entryNode.sys.contentType = entryItem.sys.contentType + // Content type applies to entries only + if (entryItem.sys.contentType) { + entryNode.sys.contentType = entryItem.sys.contentType + } + + // Replace text fields with text nodes so we can process their markdown + // into HTML. + Object.keys(entryItemFields).forEach(entryItemFieldKey => { + // Ignore fields with "___node" as they're already handled + // and won't be a text field. + if (entryItemFieldKey.includes(`___`)) { + return } - // Replace text fields with text nodes so we can process their markdown - // into HTML. - Object.keys(entryItemFields).forEach(entryItemFieldKey => { - // Ignore fields with "___node" as they're already handled - // and won't be a text field. - if (entryItemFieldKey.includes(`___`)) { - return - } + const fieldType = contentTypeItem.fields.find( + f => + (restrictedNodeFields.includes(f.id) + ? `${conflictFieldPrefix}${f.id}` + : f.id) === entryItemFieldKey + ).type + if (fieldType === `Text`) { + const textNodeId = createNodeId( + `${entryNodeId}${entryItemFieldKey}TextNode` + ) - const fieldType = contentTypeItem.fields.find( - f => - (restrictedNodeFields.includes(f.id) - ? `${conflictFieldPrefix}${f.id}` - : f.id) === entryItemFieldKey - ).type - if (fieldType === `Text`) { - const textNodeId = createNodeId( - `${entryNodeId}${entryItemFieldKey}TextNode` + // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value + // of an entry did not change, then we can trust that none of its children were changed either. + // (That's why child nodes use the updatedAt of the parent node as their digest, too) + const existingNode = getNode(textNodeId) + if (existingNode?.updatedAt !== entryItem.sys.updatedAt) { + const textNode = prepareTextNode( + textNodeId, + entryNode, + entryItemFieldKey, + entryItemFields[entryItemFieldKey] ) - // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value - // of an entry did not change, then we can trust that none of its children were changed either. - // (That's why child nodes use the updatedAt of the parent node as their digest, too) - const existingNode = getNode(textNodeId) - if (existingNode?.updatedAt !== entryItem.sys.updatedAt) { - const textNode = prepareTextNode( - textNodeId, - entryNode, - entryItemFieldKey, - entryItemFields[entryItemFieldKey] - ) - - childrenNodes.push(textNode) - } + childrenNodes.push(textNode) + } - entryItemFields[`${entryItemFieldKey}___NODE`] = textNodeId - delete entryItemFields[entryItemFieldKey] - } else if ( - fieldType === `RichText` && - _.isPlainObject(entryItemFields[entryItemFieldKey]) - ) { - const fieldValue = entryItemFields[entryItemFieldKey] - - const rawReferences = [] - - // Locate all Contentful Links within the rich text data - const traverse = obj => { - // eslint-disable-next-line guard-for-in - for (const k in obj) { - const v = obj[k] - if (v && v.sys && v.sys.type === `Link`) { - rawReferences.push(v) - } else if (v && typeof v === `object`) { - traverse(v) - } + entryItemFields[`${entryItemFieldKey}___NODE`] = textNodeId + delete entryItemFields[entryItemFieldKey] + } else if ( + fieldType === `RichText` && + _.isPlainObject(entryItemFields[entryItemFieldKey]) + ) { + const fieldValue = entryItemFields[entryItemFieldKey] + + const rawReferences = [] + + // Locate all Contentful Links within the rich text data + const traverse = obj => { + // eslint-disable-next-line guard-for-in + for (const k in obj) { + const v = obj[k] + if (v && v.sys && v.sys.type === `Link`) { + rawReferences.push(v) + } else if (v && typeof v === `object`) { + traverse(v) } } + } - traverse(fieldValue) + traverse(fieldValue) - // Build up resolvable reference list - const resolvableReferenceIds = new Set() - rawReferences - .filter(function (v) { - return resolvable.has( - `${v.sys.id}___${v.sys.linkType || v.sys.type}` - ) - }) - .forEach(function (v) { - resolvableReferenceIds.add( - mId(space.sys.id, v.sys.id, v.sys.linkType || v.sys.type) - ) - }) + // Build up resolvable reference list + const resolvableReferenceIds = new Set() + rawReferences + .filter(function (v) { + return resolvable.has( + `${v.sys.id}___${v.sys.linkType || v.sys.type}` + ) + }) + .forEach(function (v) { + resolvableReferenceIds.add( + mId(space.sys.id, v.sys.id, v.sys.linkType || v.sys.type) + ) + }) - entryItemFields[entryItemFieldKey] = { - raw: stringify(fieldValue), - references___NODE: [...resolvableReferenceIds], - } - } else if ( - fieldType === `Object` && - _.isPlainObject(entryItemFields[entryItemFieldKey]) - ) { + entryItemFields[entryItemFieldKey] = { + raw: stringify(fieldValue), + references___NODE: [...resolvableReferenceIds], + } + } else if ( + fieldType === `Object` && + _.isPlainObject(entryItemFields[entryItemFieldKey]) + ) { + const jsonNodeId = createNodeId( + `${entryNodeId}${entryItemFieldKey}JSONNode` + ) + + // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value + // of an entry did not change, then we can trust that none of its children were changed either. + // (That's why child nodes use the updatedAt of the parent node as their digest, too) + const existingNode = getNode(jsonNodeId) + if (existingNode?.updatedAt !== entryItem.sys.updatedAt) { + const jsonNode = prepareJSONNode( + jsonNodeId, + entryNode, + entryItemFieldKey, + entryItemFields[entryItemFieldKey] + ) + childrenNodes.push(jsonNode) + } + + entryItemFields[`${entryItemFieldKey}___NODE`] = jsonNodeId + delete entryItemFields[entryItemFieldKey] + } else if ( + fieldType === `Object` && + _.isArray(entryItemFields[entryItemFieldKey]) + ) { + entryItemFields[`${entryItemFieldKey}___NODE`] = [] + + entryItemFields[entryItemFieldKey].forEach((obj, i) => { const jsonNodeId = createNodeId( - `${entryNodeId}${entryItemFieldKey}JSONNode` + `${entryNodeId}${entryItemFieldKey}${i}JSONNode` ) // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value @@ -677,79 +753,55 @@ export const createNodesForContentType = ({ jsonNodeId, entryNode, entryItemFieldKey, - entryItemFields[entryItemFieldKey] + obj ) childrenNodes.push(jsonNode) } - entryItemFields[`${entryItemFieldKey}___NODE`] = jsonNodeId - delete entryItemFields[entryItemFieldKey] - } else if ( - fieldType === `Object` && - _.isArray(entryItemFields[entryItemFieldKey]) - ) { - entryItemFields[`${entryItemFieldKey}___NODE`] = [] - - entryItemFields[entryItemFieldKey].forEach((obj, i) => { - const jsonNodeId = createNodeId( - `${entryNodeId}${entryItemFieldKey}${i}JSONNode` - ) + entryItemFields[`${entryItemFieldKey}___NODE`].push(jsonNodeId) + }) - // The Contentful model has `.sys.updatedAt` leading for an entry. If the updatedAt value - // of an entry did not change, then we can trust that none of its children were changed either. - // (That's why child nodes use the updatedAt of the parent node as their digest, too) - const existingNode = getNode(jsonNodeId) - if (existingNode?.updatedAt !== entryItem.sys.updatedAt) { - const jsonNode = prepareJSONNode( - jsonNodeId, - entryNode, - entryItemFieldKey, - obj - ) - childrenNodes.push(jsonNode) - } + delete entryItemFields[entryItemFieldKey] + } + }) - entryItemFields[`${entryItemFieldKey}___NODE`].push(jsonNodeId) - }) + entryNode = { + ...entryItemFields, + ...entryNode, + node_locale: locale.code, + } - delete entryItemFields[entryItemFieldKey] - } - }) + // The content of an entry is guaranteed to be updated if and only if the .sys.updatedAt field changed + entryNode.internal.contentDigest = entryItem.sys.updatedAt - entryNode = { - ...entryItemFields, - ...entryNode, - node_locale: locale.code, + // Link tags + if (pluginConfig.get(`enableTags`)) { + entryNode.metadata = { + tags___NODE: entryItem.metadata.tags.map(tag => + createNodeId(`ContentfulTag__${space.sys.id}__${tag.sys.id}`) + ), } + } - // The content of an entry is guaranteed to be updated if and only if the .sys.updatedAt field changed - entryNode.internal.contentDigest = entryItem.sys.updatedAt - - // Link tags - if (pluginConfig.get(`enableTags`)) { - entryNode.metadata = { - tags___NODE: entryItem.metadata.tags.map(tag => - createNodeId(`ContentfulTag__${space.sys.id}__${tag.sys.id}`) - ), - } - } + return entryNode + }) - return entryNode + entryNodes.forEach((entryNode, index) => { + create(entryNode, () => { + entryNodes[index] = undefined }) - .filter(Boolean) - - entryNodes.forEach(entryNode => { - createNodePromises.push(createNode(entryNode)) }) - childrenNodes.forEach(entryNode => { - createNodePromises.push(createNode(entryNode)) + childrenNodes.forEach((entryNode, index) => { + create(entryNode, () => { + childrenNodes[index] = undefined + }) }) }) - return createNodePromises + return createNodesPromise } -export const createAssetNodes = ({ +export const createAssetNodes = async ({ assetItem, createNode, createNodeId, @@ -758,7 +810,13 @@ export const createAssetNodes = ({ space, pluginConfig, }) => { - const createNodePromises = [] + const { create, createNodesPromise } = makeQueuedCreateNode({ + createNode, + nodeCount: locales.length, + }) + + const assetNodes = [] + locales.forEach(locale => { const localesFallback = buildFallbackChain(locales) const mId = makeMakeId({ @@ -793,7 +851,7 @@ export const createAssetNodes = ({ : ``, node_locale: locale.code, internal: { - type: `${makeTypeName(`Asset`)}`, + type: assetTypeName, }, sys: { type: assetItem.sys.type, @@ -825,13 +883,10 @@ export const createAssetNodes = ({ // The content of an entry is guaranteed to be updated if and only if the .sys.updatedAt field changed assetNode.internal.contentDigest = assetItem.sys.updatedAt - // if the node hasn't changed, createNode may return `undefined` instead of a Promise on some versions of Gatsby - const maybePromise = createNode(assetNode) - - createNodePromises.push( - maybePromise?.then ? maybePromise.then(() => assetNode) : assetNode - ) + assetNodes.push(assetNode) + create(assetNode) }) - return createNodePromises + await createNodesPromise + return assetNodes } diff --git a/packages/gatsby-source-contentful/src/source-nodes.js b/packages/gatsby-source-contentful/src/source-nodes.js index 88fd078ab9a78..105181fb97d0d 100644 --- a/packages/gatsby-source-contentful/src/source-nodes.js +++ b/packages/gatsby-source-contentful/src/source-nodes.js @@ -1,7 +1,12 @@ // @ts-check -import { hasFeature } from "gatsby-plugin-utils/has-feature" import isOnline from "is-online" import _ from "lodash" +import { + addNodeToExistingNodesCache, + getExistingCachedNodes, + removeNodeFromExistingNodesCache, +} from "./backreferences" +import { untilNextEventLoopTick } from "./utils" import { downloadContentfulAssets } from "./download-contentful-assets" import { fetchContent } from "./fetch" @@ -12,10 +17,10 @@ import { createAssetNodes, createNodesForContentType, makeId, - makeTypeName, } from "./normalize" import { createPluginConfig } from "./plugin-options" import { CODES } from "./report" +import { hasFeature } from "gatsby-plugin-utils/has-feature" const conflictFieldPrefix = `contentful` @@ -41,13 +46,10 @@ const CONTENT_DIGEST_COUNTER_SEPARATOR = `_COUNT_` * possible for each localized node i.e. get the localized field if it exists * or the fallback field or the default field. */ - -let isFirstSourceNodesCallOfCurrentNodeProcess = true export async function sourceNodes( { actions, getNode, - getNodes, createNodeId, store, cache, @@ -57,61 +59,45 @@ export async function sourceNodes( }, pluginOptions ) { - const hasStatefulSourceNodes = hasFeature(`stateful-source-nodes`) - const needToTouchNodes = !hasStatefulSourceNodes - const { - createNode, + createNode: originalCreateNode, touchNode, - deleteNode, + deleteNode: originalDeleteNode, unstable_createNodeManifest, enableStatefulSourceNodes, } = actions - const online = await isOnline() - - if (hasStatefulSourceNodes) { + if (hasFeature(`stateful-source-nodes`)) { enableStatefulSourceNodes() } - // Gatsby only checks if a node has been touched on the first sourcing. - // As iterating and touching nodes can grow quite expensive on larger sites with - // 1000s of nodes, we'll skip doing this on subsequent sources. - else if (isFirstSourceNodesCallOfCurrentNodeProcess && needToTouchNodes) { - getNodes().forEach(node => { - if (node.internal.owner !== `gatsby-source-contentful`) { - return - } - touchNode(node) - if (node?.fields?.localFile) { - // Prevent GraphQL type inference from crashing on this property - touchNode(getNode(node.fields.localFile)) - } - }) - } - isFirstSourceNodesCallOfCurrentNodeProcess = false + const pluginConfig = createPluginConfig(pluginOptions) - if ( - !online && - process.env.GATSBY_CONTENTFUL_OFFLINE === `true` && - process.env.NODE_ENV !== `production` - ) { - return + // wrap createNode so we can cache them in memory for faster lookups when finding backreferences + const createNode = node => { + addNodeToExistingNodesCache(node) + + return originalCreateNode(node) } - const pluginConfig = createPluginConfig(pluginOptions) - const sourceId = `${pluginConfig.get(`spaceId`)}-${pluginConfig.get( - `environment` - )}` + const deleteNode = node => { + removeNodeFromExistingNodesCache(node) - const fetchActivity = reporter.activityTimer(`Contentful: Fetch data`, { - parentSpan, - }) + return originalDeleteNode(node) + } + + // Array of all existing Contentful nodes + const { existingNodes, memoryNodeCountsBySysType } = + await getExistingCachedNodes({ + actions, + getNode, + pluginConfig, + }) // If the user knows they are offline, serve them cached result // For prod builds though always fail if we can't get the latest data if ( - !online && + !(await isOnline()) && process.env.GATSBY_CONTENTFUL_OFFLINE === `true` && process.env.NODE_ENV !== `production` ) { @@ -121,13 +107,20 @@ export async function sourceNodes( ) return - } - if (process.env.GATSBY_CONTENTFUL_OFFLINE) { + } else if (process.env.GATSBY_CONTENTFUL_OFFLINE) { reporter.info( `Note: \`GATSBY_CONTENTFUL_OFFLINE\` was set but it either was not \`true\`, we _are_ online, or we are in production mode, so the flag is ignored.` ) } + const sourceId = `${pluginConfig.get(`spaceId`)}-${pluginConfig.get( + `environment` + )}` + + const fetchActivity = reporter.activityTimer(`Contentful: Fetch data`, { + parentSpan, + }) + fetchActivity.start() const CACHE_SYNC_TOKEN = `contentful-sync-token-${sourceId}` @@ -148,6 +141,7 @@ export async function sourceNodes( store.getState().status.plugins?.[`gatsby-source-contentful`]?.[ CACHE_SYNC_TOKEN ] + const isCachedBuild = !!syncToken // Actual fetch of data from Contentful const { @@ -202,31 +196,20 @@ export async function sourceNodes( ) processingActivity.start() - // Array of all existing Contentful nodes - const existingNodes = getNodes().filter( - n => - n.internal.owner === `gatsby-source-contentful` && - (pluginConfig.get(`enableTags`) - ? n.internal.type !== `ContentfulTag` - : true) - ) - // Report existing, new and updated nodes const nodeCounts = { newEntry: 0, newAsset: 0, updatedEntry: 0, updatedAsset: 0, - existingEntry: 0, - existingAsset: 0, - deletedEntry: currentSyncData.deletedEntries.length, - deletedAsset: currentSyncData.deletedAssets.length, + deletedEntry: currentSyncData?.deletedEntries?.length || 0, + deletedAsset: currentSyncData?.deletedAssets?.length || 0, } - existingNodes.forEach(node => nodeCounts[`existing${node.sys.type}`]++) - currentSyncData.entries.forEach(entry => + + currentSyncData?.entries?.forEach(entry => entry.sys.revision === 1 ? nodeCounts.newEntry++ : nodeCounts.updatedEntry++ ) - currentSyncData.assets.forEach(asset => + currentSyncData?.assets?.forEach(asset => asset.sys.revision === 1 ? nodeCounts.newAsset++ : nodeCounts.updatedAsset++ ) @@ -234,12 +217,16 @@ export async function sourceNodes( reporter.info(`Contentful: ${nodeCounts.updatedEntry} updated entries`) reporter.info(`Contentful: ${nodeCounts.deletedEntry} deleted entries`) reporter.info( - `Contentful: ${nodeCounts.existingEntry / locales.length} cached entries` + `Contentful: ${ + memoryNodeCountsBySysType.Entry / locales.length + } cached entries` ) reporter.info(`Contentful: ${nodeCounts.newAsset} new assets`) reporter.info(`Contentful: ${nodeCounts.updatedAsset} updated assets`) reporter.info( - `Contentful: ${nodeCounts.existingAsset / locales.length} cached assets` + `Contentful: ${ + memoryNodeCountsBySysType.Asset / locales.length + } cached assets` ) reporter.info(`Contentful: ${nodeCounts.deletedAsset} deleted assets`) @@ -276,7 +263,7 @@ export async function sourceNodes( reporter.verbose(`Resolving Contentful references`) - const newOrUpdatedEntries = new Set() + let newOrUpdatedEntries = new Set() entryList.forEach(entries => { entries.forEach(entry => { newOrUpdatedEntries.add(`${entry.sys.id}___${entry.sys.type}`) @@ -329,15 +316,20 @@ export async function sourceNodes( } // Update existing entry nodes that weren't updated but that need reverse links added or removed. - const existingNodesThatNeedReverseLinksUpdateInDatastore = new Set() - existingNodes - .filter( - n => - n.sys.type === `Entry` && - !newOrUpdatedEntries.has(`${n.id}___${n.sys.type}`) && - !deletedEntryGatsbyReferenceIds.has(n.id) - ) - .forEach(n => { + let existingNodesThatNeedReverseLinksUpdateInDatastore = new Set() + + if (isCachedBuild) { + existingNodes.forEach(n => { + if ( + !( + n.sys.type === `Entry` && + !newOrUpdatedEntries.has(`${n.id}___${n.sys.type}`) && + !deletedEntryGatsbyReferenceIds.has(n.id) + ) + ) { + return + } + if ( n.contentful_id && foreignReferenceMap[`${n.contentful_id}___${n.sys.type}`] @@ -398,10 +390,17 @@ export async function sourceNodes( }) } }) + } + + // allow node to gc if it needs to + // @ts-ignore + newOrUpdatedEntries = undefined + await untilNextEventLoopTick() // We need to call `createNode` on nodes we modified reverse links on, // otherwise changes won't actually persist if (existingNodesThatNeedReverseLinksUpdateInDatastore.size) { + let existingNodesLoopCount = 0 for (const node of existingNodesThatNeedReverseLinksUpdateInDatastore) { function addChildrenToList(node, nodeList = [node]) { for (const childNodeId of node?.children ?? []) { @@ -422,20 +421,10 @@ export async function sourceNodes( // We should not mutate original node as Gatsby will still // compare against what's in in-memory weak cache, so we // clone original node to ensure reference identity is not possible - const nodeToUpdate = _.cloneDeep(nodeToUpdateOriginal) - // We need to remove properties from existing fields - // that are reserved and managed by Gatsby (`.internal.owner`, `.fields`). - // Gatsby automatically will set `.owner` it back - delete nodeToUpdate.internal.owner - // `.fields` need to be created with `createNodeField` action, we can't just re-add them. - // Other plugins (or site itself) will have opportunity to re-generate them in `onCreateNode` lifecycle. - // Contentful content nodes are not using `createNodeField` so it's safe to delete them. - // (Asset nodes DO use `createNodeField` for `localFile` and if we were updating those, then - // we would also need to restore that field ourselves after re-creating a node) - delete nodeToUpdate.fields // plugin adds node field on asset nodes which don't have reverse links - - // We add or modify counter postfix to contentDigest - // to make sure Gatsby treat this as data update + const nodeToUpdate = nodeToUpdateOriginal.__memcache + ? getNode(nodeToUpdateOriginal.id) + : nodeToUpdateOriginal + let counter const [initialContentDigest, counterStr] = nodeToUpdate.internal.contentDigest.split( @@ -452,12 +441,53 @@ export async function sourceNodes( counter++ } - nodeToUpdate.internal.contentDigest = `${initialContentDigest}${CONTENT_DIGEST_COUNTER_SEPARATOR}${counter}` - createNode(nodeToUpdate) + const newNode = { + ...nodeToUpdate, + internal: { + ...nodeToUpdate.internal, + // We need to remove properties from existing fields + // that are reserved and managed by Gatsby (`.internal.owner`, `.fields`). + // Gatsby automatically will set `.owner` it back + owner: undefined, + // We add or modify counter postfix to contentDigest + // to make sure Gatsby treat this as data update + contentDigest: `${initialContentDigest}${CONTENT_DIGEST_COUNTER_SEPARATOR}${counter}`, + }, + // `.fields` need to be created with `createNodeField` action, we can't just re-add them. + // Other plugins (or site itself) will have opportunity to re-generate them in `onCreateNode` lifecycle. + // Contentful content nodes are not using `createNodeField` so it's safe to delete them. + // (Asset nodes DO use `createNodeField` for `localFile` and if we were updating those, then + // we would also need to restore that field ourselves after re-creating a node) + fields: undefined, // plugin adds node field on asset nodes which don't have reverse links + } + + // memory cached nodes are mutated during back reference checks + // so we need to carry over the changes to the updated node + if (node.__memcache) { + for (const key of Object.keys(node)) { + if (!key.endsWith(`___NODE`)) { + continue + } + + newNode[key] = node[key] + } + } + + createNode(newNode) + + if (existingNodesLoopCount++ % 2000 === 0) { + // dont block the event loop + await untilNextEventLoopTick() + } } } } + // allow node to gc if it needs to + // @ts-ignore + existingNodesThatNeedReverseLinksUpdateInDatastore = undefined + await untilNextEventLoopTick() + const creationActivity = reporter.activityTimer(`Contentful: Create nodes`, { parentSpan, }) @@ -478,26 +508,28 @@ export async function sourceNodes( // A contentType can hold lots of entries which create nodes // We wait until all nodes are created and processed until we handle the next one - // TODO add batching in gatsby-core - await Promise.all( - createNodesForContentType({ - contentTypeItem, - restrictedNodeFields, - conflictFieldPrefix, - entries: entryList[i], - createNode, - createNodeId, - getNode, - resolvable, - foreignReferenceMap, - defaultLocale, - locales, - space, - useNameForId: pluginConfig.get(`useNameForId`), - pluginConfig, - unstable_createNodeManifest, - }) - ) + await createNodesForContentType({ + contentTypeItem, + restrictedNodeFields, + conflictFieldPrefix, + entries: entryList[i], + createNode, + createNodeId, + getNode, + resolvable, + foreignReferenceMap, + defaultLocale, + locales, + space, + useNameForId: pluginConfig.get(`useNameForId`), + pluginConfig, + unstable_createNodeManifest, + }) + + // allow node to garbage collect these items if it needs to + contentTypeItems[i] = undefined + entryList[i] = undefined + await untilNextEventLoopTick() } if (assets.length) { @@ -508,20 +540,25 @@ export async function sourceNodes( for (let i = 0; i < assets.length; i++) { // We wait for each asset to be process until handling the next one. assetNodes.push( - ...(await Promise.all( - createAssetNodes({ - assetItem: assets[i], - createNode, - createNodeId, - defaultLocale, - locales, - space, - pluginConfig, - }) - )) + ...(await createAssetNodes({ + assetItem: assets[i], + createNode, + createNodeId, + defaultLocale, + locales, + space, + pluginConfig, + })) ) + + assets[i] = undefined + if (i % 1000 === 0) { + await untilNextEventLoopTick() + } } + await untilNextEventLoopTick() + // Create tags entities if (tagItems.length) { reporter.info(`Creating ${tagItems.length} Contentful Tag nodes`) diff --git a/packages/gatsby-source-contentful/src/utils.js b/packages/gatsby-source-contentful/src/utils.js new file mode 100644 index 0000000000000..ff33ce49aed33 --- /dev/null +++ b/packages/gatsby-source-contentful/src/utils.js @@ -0,0 +1,9 @@ +// When iterating on tons of objects, we don't want to block the event loop +// this helper function returns a promise that resolves on the next tick so that the event loop can continue before we continue running blocking code +export function untilNextEventLoopTick() { + return new Promise(res => { + setImmediate(() => { + res(null) + }) + }) +}