From 115d5c49c6aa30382bbd978684ee6d2a98cae4b4 Mon Sep 17 00:00:00 2001 From: Peter van der Zee <209817+pvdz@users.noreply.github.com> Date: Tue, 28 Jan 2020 14:19:51 +0100 Subject: [PATCH] perf(gatsby): Create index on the fly for non-id index (#20729) * perf(gatsby): Create index on the fly for non-id index Make. It. Fast. Er * elemMatch test by @freiksenet * Drop unused arg * Fix api for loki (by @freiksenet) --- .../gatsby/src/redux/__tests__/run-sift.js | 362 ++++++++++----- packages/gatsby/src/redux/nodes.js | 121 +++++ .../src/redux/reducers/resolved-nodes.js | 1 + packages/gatsby/src/redux/run-sift.js | 377 ++++++++++++--- .../schema/__tests__/fixtures/node-model.js | 13 + .../gatsby/src/schema/__tests__/node-model.js | 436 +++++++++++------- packages/gatsby/src/schema/node-model.js | 16 + 7 files changed, 979 insertions(+), 347 deletions(-) diff --git a/packages/gatsby/src/redux/__tests__/run-sift.js b/packages/gatsby/src/redux/__tests__/run-sift.js index d7ea287b46292..1c559ef2f8a05 100644 --- a/packages/gatsby/src/redux/__tests__/run-sift.js +++ b/packages/gatsby/src/redux/__tests__/run-sift.js @@ -14,6 +14,8 @@ if (!process.env.GATSBY_DB_NODES || process.env.GATSBY_DB_NODES === `redux`) { { id: `id_1`, string: `foo`, + slog: `abc`, + deep: { flat: { search: { chain: 123 } } }, internal: { type: `notTest`, contentDigest: `0`, @@ -22,6 +24,8 @@ if (!process.env.GATSBY_DB_NODES || process.env.GATSBY_DB_NODES === `redux`) { { id: `id_2`, string: `bar`, + slog: `def`, + deep: { flat: { search: { chain: 500 } } }, internal: { type: `test`, contentDigest: `0`, @@ -29,7 +33,9 @@ if (!process.env.GATSBY_DB_NODES || process.env.GATSBY_DB_NODES === `redux`) { }, { id: `id_3`, + slog: `abc`, string: `baz`, + deep: { flat: { search: { chain: 300 } } }, internal: { type: `test`, contentDigest: `0`, @@ -38,6 +44,8 @@ if (!process.env.GATSBY_DB_NODES || process.env.GATSBY_DB_NODES === `redux`) { { id: `id_4`, string: `qux`, + slog: `def`, + deep: { flat: { search: { chain: 300 } } }, internal: { type: `test`, contentDigest: `0`, @@ -62,136 +70,272 @@ if (!process.env.GATSBY_DB_NODES || process.env.GATSBY_DB_NODES === `redux`) { actions.createNode(node, { name: `test` })(store.dispatch) ) }) - - describe(`run-sift`, () => { - const typeName = `test` - const gqlType = new GraphQLObjectType({ - name: typeName, - fields: () => { - return { - id: { type: new GraphQLNonNull(GraphQLID) }, - string: { type: GraphQLString }, - first: { - type: new GraphQLObjectType({ - name: `First`, - fields: { - willBeResolved: { - type: GraphQLString, - resolve: () => `resolvedValue`, - }, - second: { - type: new GraphQLList( - new GraphQLObjectType({ - name: `Second`, - fields: { - willBeResolved: { - type: GraphQLString, - resolve: () => `resolvedValue`, - }, - third: new GraphQLObjectType({ - name: `Third`, - fields: { - foo: GraphQLString, + ;[ + { desc: `with cache`, cb: () => new Map() }, // Avoids sift for flat filters + { desc: `no cache`, cb: () => undefined }, // Always goes through sift + ].forEach(({ desc, cb: createIndexCache }) => { + describe(`run-sift [${desc}]`, () => { + const typeName = `test` + const gqlType = new GraphQLObjectType({ + name: typeName, + fields: () => { + return { + id: { type: new GraphQLNonNull(GraphQLID) }, + string: { type: GraphQLString }, + first: { + type: new GraphQLObjectType({ + name: `First`, + fields: { + willBeResolved: { + type: GraphQLString, + resolve: () => `resolvedValue`, + }, + second: { + type: new GraphQLList( + new GraphQLObjectType({ + name: `Second`, + fields: { + willBeResolved: { + type: GraphQLString, + resolve: () => `resolvedValue`, }, - }), - }, - }) - ), + third: new GraphQLObjectType({ + name: `Third`, + fields: { + foo: GraphQLString, + }, + }), + }, + }) + ), + }, }, - }, - }), - }, - } - }, - }) - describe(`filters by just id correctly`, () => { - it(`eq operator`, async () => { - const queryArgs = { - filter: { - id: { eq: `id_2` }, - }, - } + }), + }, + } + }, + }) + describe(`filters by just id correctly`, () => { + it(`eq operator`, async () => { + const queryArgs = { + filter: { + id: { eq: `id_2` }, + }, + } - const resultSingular = await runSift({ - gqlType, - queryArgs, - firstOnly: true, - nodeTypeNames: [gqlType.name], + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + const resultMany = await runSift({ + gqlType, + queryArgs, + firstOnly: false, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(resultSingular.map(o => o.id)).toEqual([mockNodes()[1].id]) + expect(resultMany.map(o => o.id)).toEqual([mockNodes()[1].id]) }) - const resultMany = await runSift({ - gqlType, - queryArgs, - firstOnly: false, - nodeTypeNames: [gqlType.name], + it(`eq operator honors type`, async () => { + const queryArgs = { + filter: { + id: { eq: `id_1` }, + }, + } + + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + const resultMany = await runSift({ + gqlType, + queryArgs, + firstOnly: false, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + // `id-1` node is not of queried type, so results should be empty + expect(resultSingular).toEqual([]) + expect(resultMany).toEqual(null) }) - expect(resultSingular.map(o => o.id)).toEqual([mockNodes()[1].id]) - expect(resultMany.map(o => o.id)).toEqual([mockNodes()[1].id]) + it(`non-eq operator`, async () => { + const queryArgs = { + filter: { + id: { ne: `id_2` }, + }, + } + + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + const resultMany = await runSift({ + gqlType, + queryArgs, + firstOnly: false, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(resultSingular.map(o => o.id)).toEqual([mockNodes()[2].id]) + expect(resultMany.map(o => o.id)).toEqual([ + mockNodes()[2].id, + mockNodes()[3].id, + ]) + }) + it(`return empty array in case of empty nodes`, async () => { + const queryArgs = { filter: {}, sort: {} } + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [`NonExistentNodeType`], + typedKeyValueIndexes: createIndexCache(), + }) + expect(resultSingular).toEqual([]) + }) }) + describe(`filters by arbitrary property correctly`, () => { + it(`eq operator flat single`, async () => { + const queryArgs = { + filter: { + slog: { eq: `def` }, + }, + } - it(`eq operator honors type`, async () => { - const queryArgs = { - filter: { - id: { eq: `id_1` }, - }, - } + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(Array.isArray(resultSingular)).toBe(true) + expect(resultSingular.length).toEqual(1) - const resultSingular = await runSift({ - gqlType, - queryArgs, - firstOnly: true, - nodeTypeNames: [gqlType.name], + resultSingular.map(node => { + expect(node.slog).toEqual(`def`) + }) }) + it(`eq operator flat many`, async () => { + const queryArgs = { + filter: { + slog: { eq: `def` }, + }, + } - const resultMany = await runSift({ - gqlType, - queryArgs, - firstOnly: false, - nodeTypeNames: [gqlType.name], + const resultMany = await runSift({ + gqlType, + queryArgs, + firstOnly: false, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(Array.isArray(resultMany)).toBe(true) + expect(resultMany.length).toEqual(2) + + resultMany.map(node => { + expect(node.slog).toEqual(`def`) + }) }) + it(`eq operator deep single`, async () => { + const queryArgs = { + filter: { + deep: { flat: { search: { chain: { eq: 300 } } } }, + }, + } - // `id-1` node is not of queried type, so results should be empty - expect(resultSingular).toEqual([]) - expect(resultMany).toEqual(null) - }) + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) - it(`non-eq operator`, async () => { - const queryArgs = { - filter: { - id: { ne: `id_2` }, - }, - } + expect(Array.isArray(resultSingular)).toBe(true) + expect(resultSingular.length).toEqual(1) - const resultSingular = await runSift({ - gqlType, - queryArgs, - firstOnly: true, - nodeTypeNames: [gqlType.name], + resultSingular.map(node => { + expect(node.deep.flat.search.chain).toEqual(300) + }) }) + it(`eq operator deep many`, async () => { + const queryArgs = { + filter: { + deep: { flat: { search: { chain: { eq: 300 } } } }, + }, + } + + const resultMany = await runSift({ + gqlType, + queryArgs, + firstOnly: false, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(Array.isArray(resultMany)).toBe(true) + expect(resultMany.length).toEqual(2) - const resultMany = await runSift({ - gqlType, - queryArgs, - firstOnly: false, - nodeTypeNames: [gqlType.name], + resultMany.map(node => { + expect(node.deep.flat.search.chain).toEqual(300) + }) }) + it(`eq operator deep miss single`, async () => { + const queryArgs = { + filter: { + deep: { flat: { search: { chain: { eq: 999 } } } }, + }, + } - expect(resultSingular.map(o => o.id)).toEqual([mockNodes()[2].id]) - expect(resultMany.map(o => o.id)).toEqual([ - mockNodes()[2].id, - mockNodes()[3].id, - ]) - }) - it(`return empty array in case of empty nodes`, async () => { - const queryArgs = { filter: {}, sort: {} } - const resultSingular = await runSift({ - gqlType, - queryArgs, - firstOnly: true, - nodeTypeNames: [`NonExistentNodeType`], + const resultSingular = await runSift({ + gqlType, + queryArgs, + firstOnly: true, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(Array.isArray(resultSingular)).toBe(true) + expect(resultSingular.length).toEqual(0) + }) + it(`eq operator deep miss many`, async () => { + const queryArgs = { + filter: { + deep: { flat: { search: { chain: { eq: 999 } } } }, + }, + } + + const resultMany = await runSift({ + gqlType, + queryArgs, + firstOnly: false, + nodeTypeNames: [gqlType.name], + typedKeyValueIndexes: createIndexCache(), + }) + + expect(resultMany).toBe(null) }) - expect(resultSingular).toEqual([]) }) }) }) diff --git a/packages/gatsby/src/redux/nodes.js b/packages/gatsby/src/redux/nodes.js index 51949272645d6..2dd71c6e907f7 100644 --- a/packages/gatsby/src/redux/nodes.js +++ b/packages/gatsby/src/redux/nodes.js @@ -154,3 +154,124 @@ const addResolvedNodes = (typeName, arr) => { } exports.addResolvedNodes = addResolvedNodes + +/** + * Given a ("flat") filter path leading up to "eq", a set of node types, and a + * cache, create a cache that for each resulting value of the filter contains + * all the Nodes in a Set (or, if the property is `id`, just the Nodes). + * This cache is used for applying the filter and is a massive improvement over + * looping over all the nodes, when the number of pages (/nodes) scale up. + * + * @param {Array} chain + * @param {Array} nodeTypeNames + * @param {undefined | Map>} typedKeyValueIndexes + * This object lives in query/query-runner.js and is passed down runQuery + * @returns {undefined} + */ +const ensureIndexByTypedChain = ( + chain, + nodeTypeNames, + typedKeyValueIndexes +) => { + const chained = chain.join(`+`) + + const nodeTypeNamePrefix = nodeTypeNames.join(`,`) + `/` + // The format of the typedKey is `type,type/path+to+eqobj` + const typedKey = nodeTypeNamePrefix + chained + + let byKeyValue = typedKeyValueIndexes.get(typedKey) + if (byKeyValue) { + return + } + + const { nodes, resolvedNodesCache } = store.getState() + + byKeyValue = new Map() // Map> + typedKeyValueIndexes.set(typedKey, byKeyValue) + + nodes.forEach(node => { + if (!nodeTypeNames.includes(node.internal.type)) { + return + } + + // There can be a filter that targets `__gatsby_resolved` so fix that first + if (!node.__gatsby_resolved) { + const typeName = node.internal.type + const resolvedNodes = resolvedNodesCache.get(typeName) + node.__gatsby_resolved = resolvedNodes?.get(node.id) + } + + let v = node + let i = 0 + while (i < chain.length && v) { + const nextProp = chain[i++] + v = v[nextProp] + } + + if ( + (typeof v !== `string` && + typeof v !== `number` && + typeof v !== `boolean`) || + i !== chain.length + ) { + // Not sure whether this is supposed to happen, but this means that either + // - The node chain ended with `undefined`, or + // - The node chain ended in something other than a primitive, or + // - A part in the chain in the object was not an object + return + } + + // Special case `id` as that bucket never receives more than one element + if (chained === `id`) { + // Note: this is not a duplicate from `nodes` because this set only + // contains nodes of this type. Page nodes are a subset of all nodes + byKeyValue.set(v, node) + return + } + + let set = byKeyValue.get(v) + if (!set) { + set = new Set() + byKeyValue.set(v, set) + } + set.add(node) + }) +} + +exports.ensureIndexByTypedChain = ensureIndexByTypedChain + +/** + * Given a ("flat") filter path leading up to "eq", a target value to filter + * for, a set of node types, and a pre-generated lookup cache, return the set + * of Nodes (or, if the property is `id` just the Node) which pass the filter. + * This returns `undefined` if there is Node that passes the filter. + * + * Basically if the filter was {a: {b: {slug: {eq: "foo/bar"}}}} then it will + * return all the nodes that have `node.slug === "foo/bar"`. That usually (but + * not always) at most one node for slug, but this filter can apply to anything. + * + * The only exception is `id`, since internally there can be at most one node + * per `id` so there's a minor optimization for that (no need for Sets). + * + * @param {Array} chain Note: `eq` is assumed to be the leaf prop here + * @param {boolean | number | string} value This is the value being filtered for + * @param {Array} nodeTypeNames + * @param {undefined | Map>} typedKeyValueIndexes + * This object lives in query/query-runner.js and is passed down runQuery + * @returns {Array | undefined} + */ +const getNodesByTypedChain = ( + chain, + value, + nodeTypeNames, + typedKeyValueIndexes +) => { + const key = chain.join(`+`) + + const typedKey = nodeTypeNames.join(`,`) + `/` + key + + let byTypedKey = typedKeyValueIndexes?.get(typedKey) + return byTypedKey?.get(value) +} + +exports.getNodesByTypedChain = getNodesByTypedChain diff --git a/packages/gatsby/src/redux/reducers/resolved-nodes.js b/packages/gatsby/src/redux/reducers/resolved-nodes.js index 3c4503406c863..e10e69d8fa0df 100644 --- a/packages/gatsby/src/redux/reducers/resolved-nodes.js +++ b/packages/gatsby/src/redux/reducers/resolved-nodes.js @@ -1,3 +1,4 @@ +// resolvedNodesCache module.exports = (state = new Map(), action) => { switch (action.type) { case `DELETE_CACHE`: diff --git a/packages/gatsby/src/redux/run-sift.js b/packages/gatsby/src/redux/run-sift.js index 8399b1090461a..64e7d185f1051 100644 --- a/packages/gatsby/src/redux/run-sift.js +++ b/packages/gatsby/src/redux/run-sift.js @@ -9,6 +9,12 @@ const { objectToDottedField, liftResolvedFields, } = require(`../db/common/query`) +const { + ensureIndexByTypedChain, + getNodesByTypedChain, + addResolvedNodes, + getNode: siftGetNode, +} = require(`./nodes`) ///////////////////////////////////////////////////////////////////// // Parse filter @@ -74,7 +80,7 @@ function handleFirst(siftArgs, nodes) { } } -function handleMany(siftArgs, nodes, sort, resolvedFields) { +function handleMany(siftArgs, nodes) { let result = _.isEmpty(siftArgs) ? nodes : nodes.filter( @@ -83,89 +89,299 @@ function handleMany(siftArgs, nodes, sort, resolvedFields) { }) ) - if (!result || !result.length) return null - - // Sort results. - if (sort && result.length > 1) { - // create functions that return the item to compare on - const dottedFields = objectToDottedField(resolvedFields) - const dottedFieldKeys = Object.keys(dottedFields) - const sortFields = sort.fields - .map(field => { - if ( - dottedFields[field] || - dottedFieldKeys.some(key => field.startsWith(key)) - ) { - return `__gatsby_resolved.${field}` - } else { - return field - } - }) - .map(field => v => getValueAt(v, field)) - const sortOrder = sort.order.map(order => order.toLowerCase()) - - result = _.orderBy(result, sortFields, sortOrder) + return result?.length ? result : null +} + +/** + * Given an object, assert that it has exactly one leaf property and that this + * leaf is a number, string, or boolean. Additionally confirms that the path + * does not contain the special cased `elemMatch` name. + * Returns undefined if not a flat path, if it contains `elemMatch`, or if the + * leaf value was not a bool, number, or string. + * If array, it contains the property path followed by the leaf value. + * Returns `undefined` if any condition is not met + * + * Example: `{a: {b: {c: "x"}}}` is flat with a chain of `['a', 'b', 'c', 'x']` + * Example: `{a: {b: "x", c: "y"}}` is not flat because x and y are 2 leafs + * + * @param {Object} obj + * @returns {Array|undefined} + */ +const getFlatPropertyChain = obj => { + if (!obj) { + return undefined + } + + let chain = [] + let props = Object.getOwnPropertyNames(obj) + let next = obj + while (props.length === 1) { + const prop = props[0] + if (prop === `elemMatch`) { + // TODO: Support handling this special case without sift as well + return undefined + } + chain.push(prop) + next = next[prop] + if ( + typeof next === `string` || + typeof next === `number` || + typeof next === `boolean` + ) { + chain.push(next) + return chain + } + if (!next) { + return undefined + } + props = Object.getOwnPropertyNames(next) + } + + // This means at least one object in the chain had more than one property + return undefined +} + +/** + * Given the chain of a simple filter, return the set of nodes that pass the + * filter. The chain should be a property chain leading to the property to + * check, followed by the value to check against. Common example: + * `allThings(filter: { fields: { slug: { eq: $slug } } })` + * Only nodes of given node types will be considered + * A fast index is created if one doesn't exist yet so cold call is slower. + * The empty result value is null if firstOnly is false, or else an empty array. + * + * @param {Array} chain Note: `eq` is assumed to be the leaf prop here + * @param {boolean | number | string} targetValue chain.chain.eq === targetValue + * @param {Array} nodeTypeNames + * @param {undefined | Map>} typedKeyValueIndexes + * @returns {Array | undefined} + */ +const runFlatFilterWithoutSift = ( + chain, + targetValue, + nodeTypeNames, + typedKeyValueIndexes +) => { + ensureIndexByTypedChain(chain, nodeTypeNames, typedKeyValueIndexes) + + const nodesByKeyValue = getNodesByTypedChain( + chain, + targetValue, + nodeTypeNames, + typedKeyValueIndexes + ) + + // If we couldn't find the needle then maybe sift can, for example if the + // schema contained a proxy; `slug: String @proxy(from: "slugInternal")` + // There are also cases (and tests) where id exists with a different type + if (!nodesByKeyValue) { + return undefined + } + + if (chain.join(`,`) === `id`) { + // The `id` key is not indexed in Sets (because why) so don't spread it + return [nodesByKeyValue] } - return result + + // In all other cases this must be a non-empty Set because the indexing + // mechanism does not create a Set unless there's a Node for it + return [...nodesByKeyValue] } /** - * Filters a list of nodes using mongodb-like syntax. + * Filters and sorts a list of nodes using mongodb-like syntax. * - * @param args raw graphql query filter as an object - * @param nodes The nodes array to run sift over (Optional - * will load itself if not present) - * @param type gqlType. Created in build-node-types - * @param firstOnly true if you want to return only the first result - * found. This will return a collection of size 1. Not a single - * element - * @returns Collection of results. Collection will be limited to size + * @param args raw graphql query filter/sort as an object + * @property {boolean | number | string} args.type gqlType. See build-node-types + * @property {boolean} args.firstOnly true if you want to return only the first + * result found. This will return a collection of size 1. Not a single element + * @property {{filter?: Object, sort?: Object} | undefined} args.queryArgs + * @property {undefined | Map>} args.typedKeyValueIndexes + * May be undefined. A cache of indexes where you can look up Nodes grouped + * by a key: `types.join(',')+'/'+filterPath.join('+')`, which yields a Map + * which holds a Set of Nodes for the value that the filter is trying to eq + * against. If the property is `id` then there is no Set, it's just the Node. + * This object lives in query/query-runner.js and is passed down runQuery + * @returns Collection of results. Collection will be limited to 1 * if `firstOnly` is true */ -const runSift = (args: Object) => { - const { getNode, addResolvedNodes, getResolvedNode } = require(`./nodes`) - - const { nodeTypeNames } = args - if ( - args.queryArgs?.filter && - Object.getOwnPropertyNames(args.queryArgs.filter).length === 1 && - typeof args.queryArgs.filter?.id?.eq === `string` - ) { - // The args have an id.eq which subsumes all other queries - // Since the id of every node is unique there can only ever be one node found this way. Find it and return it. - let id = args.queryArgs.filter.id.eq - let node = undefined - nodeTypeNames.some(typeName => { - node = getResolvedNode(typeName, id) - return !!node - }) - if (node) { - return [node] +const runFilterAndSort = (args: Object) => { + const { + queryArgs: { filter, sort } = { filter: {}, sort: {} }, + resolvedFields = {}, + firstOnly = false, + nodeTypeNames, + typedKeyValueIndexes, + } = args + + let result = applyFilters( + filter, + firstOnly, + nodeTypeNames, + typedKeyValueIndexes, + resolvedFields + ) + + return sortNodes(result, sort, resolvedFields) +} + +exports.runSift = runFilterAndSort + +/** + * Applies filter. First through a simple approach, which is much faster than + * running sift, but not as versatile and correct. If no nodes were found then + * it falls back to filtering through sift. + * + * @param {Object | undefined} filter + * @param {boolean} firstOnly + * @param {Array} nodeTypeNames + * @param {undefined | Map>} typedKeyValueIndexes + * @param resolvedFields + * @returns {Array | undefined} Collection of results. Collection will be + * limited to 1 if `firstOnly` is true + */ +const applyFilters = ( + filter, + firstOnly, + nodeTypeNames, + typedKeyValueIndexes, + resolvedFields +) => { + let result + if (typedKeyValueIndexes) { + result = filterWithoutSift(filter, nodeTypeNames, typedKeyValueIndexes) + if (result) { + if (firstOnly) { + return result.slice(0, 1) + } + return result } } + return filterWithSift(filter, firstOnly, nodeTypeNames, resolvedFields) +} + +/** + * Check if the filter is "flat" (single leaf) and an "eq". If so, uses custom + * indexes based on filter and types and returns any result it finds. + * If conditions are not met or no nodes are found, returns undefined. + * + * @param {Object | undefined} filter + * @param {Array} nodeTypeNames + * @param {undefined | Map>} typedKeyValueIndexes + * @returns {Array|undefined} Collection of results + */ +const filterWithoutSift = (filter, nodeTypeNames, typedKeyValueIndexes) => { + if (!filter) { + return undefined + } + + // Filter can be any struct of {a: {b: {c: {eq: "x"}}}} and we want to confirm + // there is exactly one leaf in this structure and that this leaf is `eq`. The + // actual names are irrelevant, they are a chain of props on a Node. + + let chainWithNeedle = getFlatPropertyChain(filter) + if (!chainWithNeedle) { + return undefined + } + + // `chainWithNeedle` should now be like: + // `filter = {this: {is: {the: {chain: {eq: needle}}}}}` + // -> + // `['this', 'is', 'the', 'chain', 'eq', needle]` + let targetValue = chainWithNeedle.pop() + let lastPath = chainWithNeedle.pop() + + // This can also be `ne`, `in` or any other grapqhl comparison op + if (lastPath !== `eq`) { + return undefined + } + + return runFlatFilterWithoutSift( + chainWithNeedle, + targetValue, + nodeTypeNames, + typedKeyValueIndexes + ) +} + +/** + * Use sift to apply filters + * + * @param {Object | undefined} filter + * @param {boolean} firstOnly + * @param {Array} nodeTypeNames + * @param resolvedFields + * @returns {Array | undefined | null} Collection of results. Collection + * will be limited to 1 if `firstOnly` is true + */ +const filterWithSift = (filter, firstOnly, nodeTypeNames, resolvedFields) => { let nodes = [] nodeTypeNames.forEach(typeName => addResolvedNodes(typeName, nodes)) - return runSiftOnNodes(nodes, args, getNode) + return _runSiftOnNodes( + nodes, + filter, + firstOnly, + nodeTypeNames, + resolvedFields, + siftGetNode + ) } -exports.runSift = runSift - -const runSiftOnNodes = (nodes, args, getNode) => { +/** + * Given a list of filtered nodes and sorting parameters, sort the nodes + * Note: this entry point is used by GATSBY_DB_NODES=loki + * + * @param {Array} nodes Should be all nodes of given type(s) + * @param args Legacy api arg, see _runSiftOnNodes + * @param {?function(id: string): Node} getNode + * @returns {Array | undefined | null} Collection of results. Collection + * will be limited to 1 if `firstOnly` is true + */ +const runSiftOnNodes = (nodes, args, getNode = siftGetNode) => { const { - queryArgs = { filter: {}, sort: {} }, + queryArgs: { filter } = { filter: {} }, firstOnly = false, resolvedFields = {}, nodeTypeNames, } = args + return _runSiftOnNodes( + nodes, + filter, + firstOnly, + nodeTypeNames, + resolvedFields, + getNode + ) +} + +exports.runSiftOnNodes = runSiftOnNodes + +/** + * Given a list of filtered nodes and sorting parameters, sort the nodes + * + * @param {Array} nodes Should be all nodes of given type(s) + * @param {Object | undefined} filter + * @param {boolean} firstOnly + * @param {Array} nodeTypeNames + * @param resolvedFields + * @param {function(id: string): Node} getNode Note: this is different for loki + * @returns {Array | undefined | null} Collection of results. Collection + * will be limited to 1 if `firstOnly` is true + */ +const _runSiftOnNodes = ( + nodes, + filter, + firstOnly, + nodeTypeNames, + resolvedFields, + getNode +) => { let siftFilter = getFilters( - liftResolvedFields( - toDottedFields(prepareQueryArgs(queryArgs.filter)), - resolvedFields - ) + liftResolvedFields(toDottedFields(prepareQueryArgs(filter)), resolvedFields) ) // If the the query for single node only has a filter for an "id" @@ -177,7 +393,9 @@ const runSiftOnNodes = (nodes, args, getNode) => { !node || (node.internal && !nodeTypeNames.includes(node.internal.type)) ) { - if (firstOnly) return [] + if (firstOnly) { + return [] + } return null } @@ -187,8 +405,39 @@ const runSiftOnNodes = (nodes, args, getNode) => { if (firstOnly) { return handleFirst(siftFilter, nodes) } else { - return handleMany(siftFilter, nodes, queryArgs.sort, resolvedFields) + return handleMany(siftFilter, nodes) } } -exports.runSiftOnNodes = runSiftOnNodes +/** + * Given a list of filtered nodes and sorting parameters, sort the nodes + * + * @param {Array | undefined | null} nodes Pre-filtered list of nodes + * @param {Object | undefined} sort Sorting arguments + * @param resolvedFields + * @returns {Array | undefined | null} Same as input, except sorted + */ +const sortNodes = (nodes, sort, resolvedFields) => { + if (!sort || nodes?.length <= 1) { + return nodes + } + + // create functions that return the item to compare on + const dottedFields = objectToDottedField(resolvedFields) + const dottedFieldKeys = Object.keys(dottedFields) + const sortFields = sort.fields + .map(field => { + if ( + dottedFields[field] || + dottedFieldKeys.some(key => field.startsWith(key)) + ) { + return `__gatsby_resolved.${field}` + } else { + return field + } + }) + .map(field => v => getValueAt(v, field)) + const sortOrder = sort.order.map(order => order.toLowerCase()) + + return _.orderBy(nodes, sortFields, sortOrder) +} diff --git a/packages/gatsby/src/schema/__tests__/fixtures/node-model.js b/packages/gatsby/src/schema/__tests__/fixtures/node-model.js index 5a94aa3be3f40..02ebbac303bd1 100644 --- a/packages/gatsby/src/schema/__tests__/fixtures/node-model.js +++ b/packages/gatsby/src/schema/__tests__/fixtures/node-model.js @@ -28,6 +28,14 @@ const nodes = [ parent: `file1`, children: [], internal: { type: `Post`, contentDigest: `0` }, + nestedObject: [ + { + nestedValue: `1`, + }, + { + nestedValue: `3`, + }, + ], frontmatter: { authors: [`person1`], reviewers: [`person1`, `person2`], @@ -40,6 +48,11 @@ const nodes = [ parent: `file2`, children: [], internal: { type: `Post`, contentDigest: `0` }, + nestedObject: [ + { + nestedValue: `2`, + }, + ], frontmatter: { authors: [`person1`, `person2`], reviewers: [], diff --git a/packages/gatsby/src/schema/__tests__/node-model.js b/packages/gatsby/src/schema/__tests__/node-model.js index 2bb22dddf08c9..e48034b6fd62e 100644 --- a/packages/gatsby/src/schema/__tests__/node-model.js +++ b/packages/gatsby/src/schema/__tests__/node-model.js @@ -285,105 +285,178 @@ describe(`NodeModel`, () => { ) }) }) + ;[ + { desc: `with cache`, cb: () => new Map() }, // Avoids sift for flat filters + { desc: `no cache`, cb: () => undefined }, // Always goes through sift + ].forEach(({ desc, cb: createIndexCache }) => { + describe(`runQuery [${desc}]`, () => { + it(`returns first result only`, async () => { + const type = `Post` + const query = { + filter: { frontmatter: { published: { eq: false } } }, + } + const firstOnly = true + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query, + firstOnly, + type, + }) + expect(result.id).toBe(`post1`) + }) - describe(`runQuery`, () => { - it(`returns first result only`, async () => { - const type = `Post` - const query = { filter: { frontmatter: { published: { eq: false } } } } - const firstOnly = true - const result = await nodeModel.runQuery({ query, firstOnly, type }) - expect(result.id).toBe(`post1`) - }) - - it(`returns all results`, async () => { - const type = `Post` - const query = { filter: { frontmatter: { published: { eq: false } } } } - const firstOnly = false - const result = await nodeModel.runQuery({ query, firstOnly, type }) - expect(result.length).toBe(2) - expect(result[0].id).toBe(`post1`) - expect(result[1].id).toBe(`post3`) - }) - - it(`creates page dependencies`, async () => { - const type = `Post` - const query = { filter: { frontmatter: { published: { eq: false } } } } - const firstOnly = false - await nodeModel.runQuery({ query, firstOnly, type }, { path: `/` }) - expect(createPageDependency).toHaveBeenCalledTimes(2) - expect(createPageDependency).toHaveBeenCalledWith({ - path: `/`, - nodeId: `post1`, + it(`returns all results`, async () => { + const type = `Post` + const query = { + filter: { frontmatter: { published: { eq: false } } }, + } + const firstOnly = false + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query, + firstOnly, + type, + }) + expect(result.length).toBe(2) + expect(result[0].id).toBe(`post1`) + expect(result[1].id).toBe(`post3`) }) - expect(createPageDependency).toHaveBeenCalledWith({ - path: `/`, - nodeId: `post3`, + + it(`creates page dependencies`, async () => { + const type = `Post` + const query = { + filter: { frontmatter: { published: { eq: false } } }, + } + const firstOnly = false + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query, + firstOnly, + type, + }, + { path: `/` } + ) + expect(createPageDependency).toHaveBeenCalledTimes(2) + expect(createPageDependency).toHaveBeenCalledWith({ + path: `/`, + nodeId: `post1`, + }) + expect(createPageDependency).toHaveBeenCalledWith({ + path: `/`, + nodeId: `post3`, + }) }) - }) - it(`creates page dependencies when called with context`, async () => { - const type = `Post` - const query = { filter: { frontmatter: { published: { eq: false } } } } - const firstOnly = false - await nodeModel - .withContext({ path: `/` }) - .runQuery({ query, firstOnly, type }) - expect(createPageDependency).toHaveBeenCalledTimes(2) - expect(createPageDependency).toHaveBeenCalledWith({ - path: `/`, - nodeId: `post1`, + it(`creates page dependencies when called with context`, async () => { + const type = `Post` + const query = { + filter: { frontmatter: { published: { eq: false } } }, + } + const firstOnly = false + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.withContext({ path: `/` }).runQuery({ + query, + firstOnly, + type, + }) + expect(createPageDependency).toHaveBeenCalledTimes(2) + expect(createPageDependency).toHaveBeenCalledWith({ + path: `/`, + nodeId: `post1`, + }) + expect(createPageDependency).toHaveBeenCalledWith({ + path: `/`, + nodeId: `post3`, + }) }) - expect(createPageDependency).toHaveBeenCalledWith({ - path: `/`, - nodeId: `post3`, + + it(`creates page dependencies with connection type`, async () => { + const type = `Post` + const query = { + filter: { frontmatter: { published: { eq: false } } }, + } + const firstOnly = false + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query, + firstOnly, + type, + }, + { path: `/`, connectionType: `Post` } + ) + expect(createPageDependency).toHaveBeenCalledTimes(1) + expect(createPageDependency).toHaveBeenCalledWith({ + path: `/`, + connection: `Post`, + }) }) - }) - it(`creates page dependencies with connection type`, async () => { - const type = `Post` - const query = { filter: { frontmatter: { published: { eq: false } } } } - const firstOnly = false - await nodeModel.runQuery( - { query, firstOnly, type }, - { path: `/`, connectionType: `Post` } - ) - expect(createPageDependency).toHaveBeenCalledTimes(1) - expect(createPageDependency).toHaveBeenCalledWith({ - path: `/`, - connection: `Post`, + it(`doesn't allow querying union types`, () => { + const type = `AllFiles` + const query = {} + const firstOnly = true + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = nodeModel.runQuery({ + query, + firstOnly, + type, + }) + return expect(result).rejects.toThrowError( + `Querying GraphQLUnion types is not supported.` + ) }) - }) - it(`doesn't allow querying union types`, () => { - const type = `AllFiles` - const query = {} - const firstOnly = true - const result = nodeModel.runQuery({ query, firstOnly, type }) - return expect(result).rejects.toThrowError( - `Querying GraphQLUnion types is not supported.` - ) - }) + it(`handles interface types`, async () => { + const type = `TeamMember` + const query = { name: { ne: null } } + const firstOnly = true + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query, + firstOnly, + type, + }) + expect(result.name).toBe(`Person1`) + }) - it(`handles interface types`, async () => { - const type = `TeamMember` - const query = { name: { ne: null } } - const firstOnly = true - const result = await nodeModel.runQuery({ query, firstOnly, type }) - expect(result.name).toBe(`Person1`) - }) + it(`allows passing GraphQLType instead of type name`, async () => { + const type = schema.getType(`File`) + const query = { + filter: { + children: { elemMatch: { internal: { type: { eq: `Post` } } } }, + }, + } + const firstOnly = false + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query, + firstOnly, + type, + }) + expect(result.length).toBe(2) + expect(result[0].id).toBe(`file1`) + expect(result[1].id).toBe(`file3`) + }) - it(`allows passing GraphQLType instead of type name`, async () => { - const type = schema.getType(`File`) - const query = { - filter: { - children: { elemMatch: { internal: { type: { eq: `Post` } } } }, - }, - } - const firstOnly = false - const result = await nodeModel.runQuery({ query, firstOnly, type }) - expect(result.length).toBe(2) - expect(result[0].id).toBe(`file1`) - expect(result[1].id).toBe(`file3`) + it(`handles elemMatch`, async () => { + const type = `Post` + const query = { + filter: { + nestedObject: { elemMatch: { nestedValue: { eq: `2` } } }, + }, + } + const firstOnly = true + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query, + firstOnly, + type, + }) + expect(result).toBeDefined() + expect(result.id).toEqual(`post2`) + }) }) }) @@ -464,64 +537,73 @@ describe(`NodeModel`, () => { createPageDependency, }) }) - - it(`should not resolve prepared nodes more than once`, async () => { - await nodeModel.runQuery( - { - query: { filter: { betterTitle: { eq: `foo` } } }, - firstOnly: false, - type: `Test`, - }, - { path: `/` } - ) - expect(resolveBetterTitleMock.mock.calls.length).toBe(1) - expect(resolveOtherTitleMock.mock.calls.length).toBe(0) - await nodeModel.runQuery( - { - query: { filter: { betterTitle: { eq: `foo` } } }, - firstOnly: false, - type: `Test`, - }, - { path: `/` } - ) - expect(resolveBetterTitleMock.mock.calls.length).toBe(1) - expect(resolveOtherTitleMock.mock.calls.length).toBe(0) - await nodeModel.runQuery( - { - query: { - filter: { betterTitle: { eq: `foo` }, otherTitle: { eq: `Bar` } }, + ;[ + { desc: `with cache`, cb: () => new Map() }, // Avoids sift for flat filters + { desc: `no cache`, cb: () => undefined }, // Always goes through sift + ].forEach(({ desc, cb: createIndexCache }) => { + it(`[${desc}] should not resolve prepared nodes more than once`, async () => { + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query: { filter: { betterTitle: { eq: `foo` } } }, + firstOnly: false, + type: `Test`, }, - firstOnly: false, - type: `Test`, - }, - { path: `/` } - ) - expect(resolveBetterTitleMock.mock.calls.length).toBe(1) - expect(resolveOtherTitleMock.mock.calls.length).toBe(1) - await nodeModel.runQuery( - { - query: { - filter: { betterTitle: { eq: `foo` }, otherTitle: { eq: `Bar` } }, + { path: `/` } + ) + expect(resolveBetterTitleMock.mock.calls.length).toBe(1) + expect(resolveOtherTitleMock.mock.calls.length).toBe(0) + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query: { filter: { betterTitle: { eq: `foo` } } }, + firstOnly: false, + type: `Test`, }, - firstOnly: false, - type: `Test`, - }, - { path: `/` } - ) - expect(resolveBetterTitleMock.mock.calls.length).toBe(1) - expect(resolveOtherTitleMock.mock.calls.length).toBe(1) - await nodeModel.runQuery( - { - query: { - filter: { betterTitle: { eq: `foo` }, otherTitle: { eq: `Bar` } }, + { path: `/` } + ) + expect(resolveBetterTitleMock.mock.calls.length).toBe(1) + expect(resolveOtherTitleMock.mock.calls.length).toBe(0) + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query: { + filter: { betterTitle: { eq: `foo` }, otherTitle: { eq: `Bar` } }, + }, + firstOnly: false, + type: `Test`, }, - firstOnly: true, - type: `Test`, - }, - { path: `/` } - ) - expect(resolveBetterTitleMock.mock.calls.length).toBe(1) - expect(resolveOtherTitleMock.mock.calls.length).toBe(1) + { path: `/` } + ) + expect(resolveBetterTitleMock.mock.calls.length).toBe(1) + expect(resolveOtherTitleMock.mock.calls.length).toBe(1) + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query: { + filter: { betterTitle: { eq: `foo` }, otherTitle: { eq: `Bar` } }, + }, + firstOnly: false, + type: `Test`, + }, + { path: `/` } + ) + expect(resolveBetterTitleMock.mock.calls.length).toBe(1) + expect(resolveOtherTitleMock.mock.calls.length).toBe(1) + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + await nodeModel.runQuery( + { + query: { + filter: { betterTitle: { eq: `foo` }, otherTitle: { eq: `Bar` } }, + }, + firstOnly: true, + type: `Test`, + }, + { path: `/` } + ) + expect(resolveBetterTitleMock.mock.calls.length).toBe(1) + expect(resolveOtherTitleMock.mock.calls.length).toBe(1) + }) }) }) @@ -651,43 +733,49 @@ describe(`NodeModel`, () => { expect(trackedRootNode).not.toEqual(node) }) }) - - describe(`Tracks nodes returned by queries`, () => { - it(`Tracks objects when running query without filter`, async () => { - const result = await nodeModel.runQuery({ - query: {}, - type: schema.getType(`Test`), - firstOnly: false, + ;[ + { desc: `with index cache`, cb: () => new Map() }, // Avoids sift + { desc: `no index cache`, cb: () => undefined }, // Requires sift + ].forEach(({ desc, cb: createIndexCache }) => { + describe(`[${desc}] Tracks nodes returned by queries`, () => { + it(`Tracks objects when running query without filter`, async () => { + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query: {}, + type: schema.getType(`Test`), + firstOnly: false, + }) + + expect(result.length).toEqual(2) + expect( + nodeModel.findRootNodeAncestor(result[0].inlineObject) + ).toEqual(result[0]) + expect( + nodeModel.findRootNodeAncestor(result[1].inlineObject) + ).toEqual(result[1]) }) - expect(result.length).toEqual(2) - expect(nodeModel.findRootNodeAncestor(result[0].inlineObject)).toEqual( - result[0] - ) - expect(nodeModel.findRootNodeAncestor(result[1].inlineObject)).toEqual( - result[1] - ) - }) - - it(`Tracks objects when running query with filter`, async () => { - const result = await nodeModel.runQuery({ - query: { - filter: { - inlineObject: { - field: { - eq: `fieldOfSecondNode`, + it(`Tracks objects when running query with filter`, async () => { + nodeModel.replaceTypeKeyValueCache(createIndexCache()) + const result = await nodeModel.runQuery({ + query: { + filter: { + inlineObject: { + field: { + eq: `fieldOfSecondNode`, + }, }, }, }, - }, - type: schema.getType(`Test`), - firstOnly: false, + type: schema.getType(`Test`), + firstOnly: false, + }) + + expect(result.length).toEqual(1) + expect( + nodeModel.findRootNodeAncestor(result[0].inlineObject) + ).toEqual(result[0]) }) - - expect(result.length).toEqual(1) - expect(nodeModel.findRootNodeAncestor(result[0].inlineObject)).toEqual( - result[0] - ) }) }) }) diff --git a/packages/gatsby/src/schema/node-model.js b/packages/gatsby/src/schema/node-model.js index eecb653acea4e..b9815766c582b 100644 --- a/packages/gatsby/src/schema/node-model.js +++ b/packages/gatsby/src/schema/node-model.js @@ -71,6 +71,21 @@ class LocalNodeModel { this._prepareNodesQueues = {} this._prepareNodesPromises = {} this._preparedNodesCache = new Map() + this.replaceTypeKeyValueCache() + } + + /** + * Replace the cache either with the value passed on (mainly for tests) or + * an empty new Map. + * + * @param {undefined | Map> | Map>} map + * (This cached is used in redux/nodes.js and caches a set of buckets (Sets) + * of Nodes based on filter and tracks this for each set of types which are + * actually queried. If the filter targets `id` directly, only one Node is + * cached instead of a Set of Nodes. + */ + replaceTypeKeyValueCache(map = new Map()) { + this._typedKeyValueIndexes = new Map() // See redux/nodes.js for usage } withContext(context) { @@ -222,6 +237,7 @@ class LocalNodeModel { gqlType, resolvedFields: fieldsToResolve, nodeTypeNames, + typedKeyValueIndexes: this._typedKeyValueIndexes, }) let result = queryResult