diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md index 51964e615035f..615f439f82e20 100644 --- a/benchmarks/memory/README.md +++ b/benchmarks/memory/README.md @@ -38,6 +38,10 @@ A shorthand for start + connect. Stop the container used for testing. +#### yarn docker:stats + +Show a polling display of the container's docker stats. + ### Gatsby These commands are used for interfacing with gatsby. diff --git a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js index 98b84a406a471..aa2854960e820 100644 --- a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js +++ b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js @@ -401,13 +401,15 @@ describe(`applyFastFilters`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) expect(Array.isArray(result)).toBe(true) expect(result.length).toEqual(2) result.map(node => { - expect(node.slog).toEqual(`def`) + expect(getNode(node.id).slog).toEqual(`def`) }) }) @@ -419,13 +421,15 @@ describe(`applyFastFilters`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) expect(Array.isArray(result)).toBe(true) expect(result.length).toEqual(2) result.map(node => { - expect(node.deep.flat.search.chain).toEqual(300) + expect(getNode(node.id).deep.flat.search.chain).toEqual(300) }) }) @@ -438,14 +442,16 @@ describe(`applyFastFilters`, () => { const results = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) // Count is irrelevant as long as it is non-zero and they all match filter expect(Array.isArray(results)).toBe(true) expect(results.length).toEqual(1) - expect(results[0].slog).toEqual(`def`) - expect(results[0].deep.flat.search.chain).toEqual(300) + expect(getNode(results[0].id).slog).toEqual(`def`) + expect(getNode(results[0].id).deep.flat.search.chain).toEqual(300) }) it(`supports elemMatch`, () => { @@ -458,7 +464,9 @@ describe(`applyFastFilters`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) expect(result).not.toBe(undefined) @@ -484,7 +492,9 @@ describe(`edge cases (yay)`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) // Sanity-check @@ -511,7 +521,13 @@ describe(`edge cases (yay)`, () => { await getDataStore().ready() const run = () => - applyFastFilters(createDbQueriesFromObject(filter), [typeName], new Map()) + applyFastFilters( + createDbQueriesFromObject(filter), + [typeName], + new Map(), + [], + [] + ) expect(run).toThrow( `Invariant violation: inconsistent node counters detected` diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 7d6acc42c46e5..fae28594700eb 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -4,8 +4,11 @@ import { IDbQueryElemMatch, FilterValue, FilterValueNullable, + objectToDottedField, } from "../common/query" -import { getDataStore } from "../" +import { getDataStore, getNode } from "../" +import _ from "lodash" +import { getValueAt } from "../../utils/get-value-at" // Only list supported ops here. "CacheableFilterOp" export type FilterOp = // TODO: merge with DbComparator ? @@ -21,6 +24,105 @@ export type FilterOp = // TODO: merge with DbComparator ? // Note: `undefined` is an encoding for a property that does not exist export type FilterCacheKey = string +type GatsbyNodeID = string + +export interface IGatsbyNodePartial { + id: GatsbyNodeID + internal: { + counter: number + } + gatsbyNodePartialInternalData: { + indexFields: Set + } + [k: string]: any +} + +const nodeIdToIdentifierMap = new Map< + GatsbyNodeID, + WeakRef +>() + +/** + * Grabs an instance of IGatsbyNodePartial for the given node. + * This accepts an IGatsbyNode or IGatsbyNodePartial as input, which allows + * us to conditionally store index fields on the partial if we encounter + * one that hasn't been stored on the partial yet. + */ +export const getGatsbyNodePartial = ( + node: IGatsbyNode | IGatsbyNodePartial, + indexFields: Array, + resolvedFields: Record +): IGatsbyNodePartial => { + // first, check if we have the partial in the cache + const cacheKey = `${node.id}_____${node.internal.counter}` + let derefPartial: IGatsbyNodePartial | undefined = undefined + if (nodeIdToIdentifierMap.has(cacheKey)) { + derefPartial = nodeIdToIdentifierMap.get(cacheKey)?.deref() + + // now check if we have it in memory and it has all the fields we need + if ( + derefPartial && + _.every( + indexFields.map(field => + derefPartial!.gatsbyNodePartialInternalData.indexFields.has(field) + ) + ) + ) { + return derefPartial + } + } + + // find all the keys of fields and store them and their values on the partial + // if we've already passed this partial, merge both sets of index fields + const dottedFields = {} + const fieldsToStore = derefPartial + ? new Set([ + ...derefPartial.gatsbyNodePartialInternalData.indexFields, + ...indexFields, + ]) + : new Set(indexFields) + + const sortFieldIds = getSortFieldIdentifierKeys( + [...fieldsToStore], + resolvedFields + ) + let fullNodeObject: IGatsbyNode | undefined = + node.gatsbyNodePartialInternalData ? undefined : (node as IGatsbyNode) + + for (const dottedField of sortFieldIds) { + if (dottedField in node) { + dottedFields[dottedField] = node[dottedField] + } else { + // if we haven't gotten the full node object, fetch it once + if (!fullNodeObject) { + fullNodeObject = getNode(node.id)! + } + + // use the full node object to fetch the value + dottedFields[dottedField] = getValueAt(fullNodeObject, dottedField) + } + } + + // create the partial object + const partial = Object.assign(dottedFields, { + id: node.id, + internal: { + counter: node.internal.counter, + }, + gatsbyNodePartialInternalData: { + indexFields: fieldsToStore, + }, + }) + + // set the object in the cache for later fetching + nodeIdToIdentifierMap.set(cacheKey, new WeakRef(partial)) + + return partial +} + +const sortByIds = (a: IGatsbyNodePartial, b: IGatsbyNodePartial): number => + a.internal.counter - b.internal.counter + export interface IFilterCache { op: FilterOp // In this map `undefined` values represent nodes that did not have the path @@ -30,22 +132,22 @@ export interface IFilterCache { // This arrays may contain duplicates (!) because those only get filtered in the // last step. // TODO: We might decide to make sure these buckets _are_ deduped for eq perf - byValue: Map> + byValue: Map> meta: { // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort - nodesUnordered?: Array + nodesUnordered?: Array // Flat list of all nodes by requested types, ordered by counter (cached for empty filters) - orderedByCounter?: Array + orderedByCounter?: Array // Ordered list of all values (by `<`) found by this filter. No null / undefs valuesAsc?: Array // Flat list of nodes, ordered by valueAsc - nodesByValueAsc?: Array + nodesByValueAsc?: Array // Ranges of nodes per value, maps to the nodesByValueAsc array valueRangesAsc?: Map // Ordered list of all values (by `>`) found by this filter. No null / undefs valuesDesc?: Array // Flat list of nodes, ordered by valueDesc - nodesByValueDesc?: Array + nodesByValueDesc?: Array // Ranges of nodes per value, maps to the nodesByValueDesc array valueRangesDesc?: Map } @@ -59,7 +161,7 @@ export function postIndexingMetaSetup( // Loop through byValue and make sure the buckets are sorted by counter // Since we don't do insertion sort, we have to do it afterwards for (const bucket of filterCache.byValue) { - bucket[1].sort((a, b) => a.internal.counter - b.internal.counter) + bucket[1].sort(sortByIds) } if (op === `$ne` || op === `$nin`) { @@ -79,15 +181,14 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void { // including nodes where the value is null. // A $nin does the same as an $ne except it filters multiple values instead // of just one. - // For `$ne` we will take the list of all targeted nodes and eliminate the // bucket of nodes with a particular value, if it exists at all.. - const arr: Array = [] + const arr: Array = [] filterCache.meta.nodesUnordered = arr filterCache.byValue.forEach(v => { - v.forEach(node => { - arr.push(node) + v.forEach(nodeId => { + arr.push(nodeId) }) }) } @@ -101,15 +202,15 @@ function postIndexingMetaSetupLtLteGtGte( // internal.counter, asc. // This way non-eq ops can simply slice the array to get a range. - const entriesNullable: Array<[FilterValueNullable, Array]> = [ - ...filterCache.byValue.entries(), - ] + const entriesNullable: Array< + [FilterValueNullable, Array] + > = [...filterCache.byValue.entries()] // These range checks never return `null` or `undefined` so filter those out // By filtering them out early, the sort should be faster. Could be ... - const entries: Array<[FilterValue, Array]> = + const entries: Array<[FilterValue, Array]> = entriesNullable.filter(([v]) => v != null) as Array< - [FilterValue, Array] + [FilterValue, Array] > // Sort all arrays by its value, asc. Ignore/allow potential type casting. @@ -133,10 +234,10 @@ function postIndexingMetaSetupLtLteGtGte( entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0)) } - const orderedNodes: Array = [] + const orderedNodes: Array = [] const orderedValues: Array = [] const offsets: Map = new Map() - entries.forEach(([v, bucket]: [FilterValue, Array]) => { + entries.forEach(([v, bucket]: [FilterValue, Array]) => { // Record the range containing all nodes with as filter value v // The last value of the range should be the offset of the next value // (So you should be able to do `nodes.slice(start, stop)` to get them) @@ -176,14 +277,16 @@ export const ensureIndexByQuery = ( filterCacheKey: FilterCacheKey, filterPath: Array, nodeTypeNames: Array, - filtersCache: FiltersCache + filtersCache: FiltersCache, + indexFields: Array, + resolvedFields: Record ): void => { const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -196,7 +299,14 @@ export const ensureIndexByQuery = ( getDataStore() .iterateNodesByType(nodeTypeNames[0]) .forEach(node => { - addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache) + addNodeToFilterCache({ + node, + chain: filterPath, + filterCache, + resolvedNodesCache, + indexFields, + resolvedFields, + }) }) } else { // Here we must first filter for the node type @@ -208,7 +318,14 @@ export const ensureIndexByQuery = ( return } - addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache) + addNodeToFilterCache({ + node, + chain: filterPath, + filterCache, + resolvedNodesCache, + indexFields, + resolvedFields, + }) }) } @@ -218,7 +335,9 @@ export const ensureIndexByQuery = ( export function ensureEmptyFilterCache( filterCacheKey, nodeTypeNames: Array, - filtersCache: FiltersCache + filtersCache: FiltersCache, + indexFields: Array, + resolvedFields: Record ): void { // This is called for queries without any filters // We want to cache the result since it's basically a list of nodes by type(s) @@ -226,11 +345,11 @@ export function ensureEmptyFilterCache( const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const orderedByCounter: Array = [] + const orderedByCounter: Array = [] filtersCache.set(filterCacheKey, { op: `$eq`, // Ignore. - byValue: new Map>(), + byValue: new Map>(), meta: { orderedByCounter, // This is what we want }, @@ -248,7 +367,9 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node) + orderedByCounter.push( + getGatsbyNodePartial(node, indexFields, resolvedFields) + ) }) } else { // Here we must first filter for the node type @@ -265,23 +386,35 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node) + orderedByCounter.push( + getGatsbyNodePartial(node, indexFields, resolvedFields) + ) } }) } // Since each node can only have one type, we shouldn't have to be concerned // about duplicates in this array. Just make sure they're sorted. - orderedByCounter.sort((a, b) => a.internal.counter - b.internal.counter) + orderedByCounter.sort(sortByIds) } -function addNodeToFilterCache( - node: IGatsbyNode, - chain: Array, - filterCache: IFilterCache, +function addNodeToFilterCache({ + node, + chain, + filterCache, resolvedNodesCache, - valueOffset: any = node -): void { + indexFields, + resolvedFields, + valueOffset = node, +}: { + node: IGatsbyNode + chain: Array + filterCache: IFilterCache + resolvedNodesCache: Map + indexFields: Array + resolvedFields: Record + valueOffset?: any +}): void { // There can be a filter that targets `__gatsby_resolved` so fix that first if (!node.__gatsby_resolved) { const typeName = node.internal.type @@ -310,7 +443,9 @@ function addNodeToFilterCache( // Add an entry for each element of the array. This would work for ops // like eq and ne, but not sure about range ops like lt,lte,gt,gte. - v.forEach(v => markNodeForValue(filterCache, node, v)) + v.forEach(v => + markNodeForValue(filterCache, node, v, indexFields, resolvedFields) + ) return } @@ -322,20 +457,26 @@ function addNodeToFilterCache( v = undefined } - markNodeForValue(filterCache, node, v) + markNodeForValue(filterCache, node, v, indexFields, resolvedFields) } function markNodeForValue( filterCache: IFilterCache, node: IGatsbyNode, - value: FilterValueNullable + value: FilterValueNullable, + indexFields: Array, + resolvedFields: Record ): void { let arr = filterCache.byValue.get(value) if (!arr) { arr = [] filterCache.byValue.set(value, arr) } - arr.push(node) + + const partial = getGatsbyNodePartial(node, indexFields, resolvedFields) + if (!arr.includes(partial)) { + arr.push(partial) + } } export const ensureIndexByElemMatch = ( @@ -343,7 +484,9 @@ export const ensureIndexByElemMatch = ( filterCacheKey: FilterCacheKey, filter: IDbQueryElemMatch, nodeTypeNames: Array, - filtersCache: FiltersCache + filtersCache: FiltersCache, + indexFields: Array, + resolvedFields: Record ): void => { // Given an elemMatch filter, generate the cache that contains all nodes that // matches a given value for that sub-query @@ -353,7 +496,7 @@ export const ensureIndexByElemMatch = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -362,13 +505,15 @@ export const ensureIndexByElemMatch = ( getDataStore() .iterateNodesByType(nodeTypeNames[0]) .forEach(node => { - addNodeToBucketWithElemMatch( - node, + addNodeToBucketWithElemMatch({ node, + valueAtCurrentStep: node, filter, filterCache, - resolvedNodesCache - ) + resolvedNodesCache, + indexFields, + resolvedFields, + }) }) } else { // Expensive at scale @@ -379,26 +524,38 @@ export const ensureIndexByElemMatch = ( return } - addNodeToBucketWithElemMatch( - node, + addNodeToBucketWithElemMatch({ node, + valueAtCurrentStep: node, filter, filterCache, - resolvedNodesCache - ) + resolvedNodesCache, + indexFields, + resolvedFields, + }) }) } postIndexingMetaSetup(filterCache, op) } -function addNodeToBucketWithElemMatch( - node: IGatsbyNode, - valueAtCurrentStep: any, // Arbitrary step on the path inside the node - filter: IDbQueryElemMatch, - filterCache: IFilterCache, +function addNodeToBucketWithElemMatch({ + node, + valueAtCurrentStep, // Arbitrary step on the path inside the node + filter, + filterCache, + resolvedNodesCache, + indexFields, + resolvedFields, +}: { + node: IGatsbyNode + valueAtCurrentStep: any // Arbitrary step on the path inside the node + filter: IDbQueryElemMatch + filterCache: IFilterCache resolvedNodesCache -): void { + indexFields: Array + resolvedFields: Record +}): void { // There can be a filter that targets `__gatsby_resolved` so fix that first if (!node.__gatsby_resolved) { const typeName = node.internal.type @@ -432,22 +589,26 @@ function addNodeToBucketWithElemMatch( // work when elements resolve to the same value, but that can't be helped. valueAtCurrentStep.forEach(elem => { if (nestedQuery.type === `elemMatch`) { - addNodeToBucketWithElemMatch( + addNodeToBucketWithElemMatch({ node, - elem, - nestedQuery, + valueAtCurrentStep: elem, + filter: nestedQuery, filterCache, - resolvedNodesCache - ) + resolvedNodesCache, + indexFields, + resolvedFields, + }) } else { // Now take same route as non-elemMatch filters would take - addNodeToFilterCache( + addNodeToFilterCache({ node, - nestedQuery.path, + chain: nestedQuery.path, filterCache, resolvedNodesCache, - elem - ) + indexFields, + resolvedFields, + valueOffset: elem, + }) } }) } @@ -540,7 +701,7 @@ export const getNodesFromCacheByValue = ( filterValue: FilterValueNullable, filtersCache: FiltersCache, wasElemMatch -): Array | undefined => { +): Array | undefined => { const filterCache = filtersCache.get(filterCacheKey) if (!filterCache) { return undefined @@ -573,7 +734,7 @@ export const getNodesFromCacheByValue = ( } const filterValueArr: Array = filterValue - const set: Set = new Set() + const set: Set = new Set() // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works. // For every value in the needle array, find the bucket of nodes for @@ -583,7 +744,7 @@ export const getNodesFromCacheByValue = ( ) const arr = [...set] // this is bad for perf but will guarantee us a unique set :( - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // Note: it's very unlikely that the list of filter values is big so .includes should be fine here if (filterValueArr.includes(null)) { @@ -622,7 +783,7 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to re-sort - return [...set].sort((A, B) => A.internal.counter - B.internal.counter) + return [...set].sort(sortByIds) } if (op === `$ne`) { @@ -632,7 +793,7 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to resort here - return [...set].sort((A, B) => A.internal.counter - B.internal.counter) + return [...set].sort(sortByIds) } if (op === `$regex`) { @@ -649,7 +810,7 @@ export const getNodesFromCacheByValue = ( } const regex = filterValue - const arr: Array = [] + const arr: Array = [] filterCache.byValue.forEach((nodes, value) => { // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce. // Note: for legacy reasons partial paths should also be included for regex @@ -661,7 +822,7 @@ export const getNodesFromCacheByValue = ( // TODO: we _can_ cache this list as well. Might make sense if it turns out that $regex is mostly used with literals // TODO: it may make sense to first collect all buckets and then to .concat them, or merge sort them - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { @@ -706,7 +867,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -746,7 +907,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue < filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -764,7 +925,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -804,7 +965,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue <= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -822,7 +983,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -862,7 +1023,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue > filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -880,7 +1041,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -920,7 +1081,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue >= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -935,7 +1096,7 @@ export const getNodesFromCacheByValue = ( function removeBucketFromSet( filterValue: FilterValueNullable, filterCache: IFilterCache, - set: Set + set: Set ): void { if (filterValue === null) { // Edge case: $ne with `null` returns only the nodes that contain the full @@ -960,29 +1121,27 @@ function removeBucketFromSet( * list that is also ordered by node.internal.counter */ export function intersectNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { let pointerA = 0 let pointerB = 0 // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items? - const result: Array = [] + const result: Array = [] const maxA = a.length const maxB = b.length - let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list + let lastAdded: IGatsbyNodePartial | undefined = undefined // Used to dedupe the list while (pointerA < maxA && pointerB < maxB) { - const nodeA = a[pointerA] - const nodeB = b[pointerB] - const counterA = nodeA.internal.counter - const counterB = nodeB.internal.counter + const counterA = a[pointerA].internal.counter + const counterB = b[pointerB].internal.counter if (counterA < counterB) { pointerA++ } else if (counterA > counterB) { pointerB++ } else { - if (nodeA !== nodeB) { + if (a[pointerA] !== b[pointerB]) { throw new Error( `Invariant violation: inconsistent node counters detected` ) @@ -991,9 +1150,9 @@ export function intersectNodesByCounter( // Since input arrays are sorted, the same node should be grouped // back to back, so even if both input arrays contained the same node // twice, this check would prevent the result from getting duplicate nodes - if (lastAdded !== nodeA) { - result.push(nodeA) - lastAdded = nodeA + if (lastAdded !== a[pointerA]) { + result.push(a[pointerA]) + lastAdded = a[pointerA] } pointerA++ pointerB++ @@ -1011,12 +1170,12 @@ export function intersectNodesByCounter( * list that is also ordered by node.internal.counter */ export function unionNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? - const arr: Array = [] - let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list + const arr: Array = [] + let lastAdded: IGatsbyNodePartial | undefined = undefined // Used to dedupe the list let pointerA = 0 let pointerB = 0 @@ -1024,27 +1183,25 @@ export function unionNodesByCounter( const maxB = b.length while (pointerA < maxA && pointerB < maxB) { - const nodeA = a[pointerA] - const nodeB = b[pointerB] - const counterA = nodeA.internal.counter - const counterB = nodeB.internal.counter + const counterA = a[pointerA].internal.counter + const counterB = b[pointerB].internal.counter if (counterA < counterB) { - if (lastAdded !== nodeA) { - arr.push(nodeA) - lastAdded = nodeA + if (lastAdded !== a[pointerA]) { + arr.push(a[pointerA]) + lastAdded = a[pointerA] } pointerA++ } else if (counterA > counterB) { - if (lastAdded !== nodeB) { - arr.push(nodeB) - lastAdded = nodeB + if (lastAdded !== b[pointerB]) { + arr.push(b[pointerB]) + lastAdded = b[pointerB] } pointerB++ } else { - if (lastAdded !== nodeA) { - arr.push(nodeA) - lastAdded = nodeA + if (lastAdded !== a[pointerA]) { + arr.push(a[pointerA]) + lastAdded = a[pointerA] } pointerA++ pointerB++ @@ -1052,19 +1209,17 @@ export function unionNodesByCounter( } while (pointerA < maxA) { - const nodeA = a[pointerA] - if (lastAdded !== nodeA) { - arr.push(nodeA) - lastAdded = nodeA + if (lastAdded !== a[pointerA]) { + arr.push(a[pointerA]) + lastAdded = a[pointerA] } pointerA++ } while (pointerB < maxB) { - const nodeB = b[pointerB] - if (lastAdded !== nodeB) { - arr.push(nodeB) - lastAdded = nodeB + if (lastAdded !== b[pointerB]) { + arr.push(b[pointerB]) + lastAdded = b[pointerB] } pointerB++ } @@ -1072,11 +1227,11 @@ export function unionNodesByCounter( return arr } -function expensiveDedupeInline(arr: Array): void { +function expensiveDedupeInline(arr: Array): void { // An elemMatch filter may cause duplicates to appear in a bucket. // Since the bucket is sorted those should now be back to back // Worst case this is a fast O(n) loop that does nothing. - let prev: IGatsbyNode | undefined = undefined + let prev: IGatsbyNodePartial | undefined = undefined // We copy-on-find because a splice is expensive and we can't use Sets @@ -1094,3 +1249,23 @@ function expensiveDedupeInline(arr: Array): void { } arr.length = j } + +export function getSortFieldIdentifierKeys( + indexFields: Array, + resolvedFields: Record +): Array { + const dottedFields = objectToDottedField(resolvedFields) + const dottedFieldKeys = Object.keys(dottedFields) + const fieldKeys = indexFields.map(field => { + if ( + dottedFields[field] || + dottedFieldKeys.some(key => field.startsWith(key)) + ) { + return `__gatsby_resolved.${field}` + } else { + return field + } + }) + + return fieldKeys +} diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 53eaeddce63d6..23605f9896c9d 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -1,5 +1,3 @@ -import { IGatsbyNode } from "../../redux/types" -import { getValueAt } from "../../utils/get-value-at" import _ from "lodash" import { DbQuery, @@ -7,7 +5,6 @@ import { IDbQueryElemMatch, IInputQuery, FilterValueNullable, - objectToDottedField, createDbQueriesFromObject, prefixResolvedFields, prepareQueryArgs, @@ -22,15 +19,26 @@ import { getNodesFromCacheByValue, intersectNodesByCounter, IFilterCache, + IGatsbyNodePartial, + getSortFieldIdentifierKeys, + getGatsbyNodePartial, } from "./indexing" import { IGraphQLRunnerStats } from "../../query/types" import { IRunQueryArgs, IQueryResult } from "../types" import { GatsbyIterable } from "../common/iterable" +import { getNode } from "../" export interface IRunFilterArg extends IRunQueryArgs { filtersCache: FiltersCache } +type ISortParameters = + | { + fields: Array + order: Array + } + | undefined + /** * Creates a key for one filterCache inside FiltersCache */ @@ -73,8 +81,10 @@ function createFilterCacheKey( export function applyFastFilters( filters: Array, nodeTypeNames: Array, - filtersCache: FiltersCache -): Array | null { + filtersCache: FiltersCache, + sortFields: Array, + resolvedFields: any +): Array | null { if (!filtersCache) { // If no filter cache is passed on, explicitly don't use one return null @@ -83,7 +93,9 @@ export function applyFastFilters( const nodesPerValueArrs = getBucketsForFilters( filters, nodeTypeNames, - filtersCache + filtersCache, + sortFields, + resolvedFields ) if (!nodesPerValueArrs) { @@ -101,8 +113,8 @@ export function applyFastFilters( while (nodesPerValueArrs.length > 1) { // TS limitation: cannot guard against .pop(), so we must double cast - const a = nodesPerValueArrs.pop() as unknown as Array - const b = nodesPerValueArrs.pop() as unknown as Array + const a = nodesPerValueArrs.pop() as unknown as Array + const b = nodesPerValueArrs.pop() as unknown as Array nodesPerValueArrs.push(intersectNodesByCounter(a, b)) } @@ -123,9 +135,11 @@ export function applyFastFilters( function getBucketsForFilters( filters: Array, nodeTypeNames: Array, - filtersCache: FiltersCache -): Array> | undefined { - const nodesPerValueArrs: Array> = [] + filtersCache: FiltersCache, + sortFields: Array, + resolvedFields: any +): Array> | undefined { + const nodesPerValueArrs: Array> = [] // Fail fast while trying to create and get the value-cache for each path const every = filters.every(filter => { @@ -138,7 +152,9 @@ function getBucketsForFilters( q, nodeTypeNames, filtersCache, - nodesPerValueArrs + nodesPerValueArrs, + sortFields, + resolvedFields ) } else { // (Let TS warn us if a new query type gets added) @@ -148,7 +164,9 @@ function getBucketsForFilters( q, nodeTypeNames, filtersCache, - nodesPerValueArrs + nodesPerValueArrs, + sortFields, + resolvedFields ) } }) @@ -170,7 +188,9 @@ function getBucketsForQueryFilter( filter: IDbQueryQuery, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array>, + sortFields: Array, + resolvedFields: any ): boolean { const { path: filterPath, @@ -178,12 +198,15 @@ function getBucketsForQueryFilter( } = filter if (!filtersCache.has(filterCacheKey)) { + // indexFields = sortFields ensureIndexByQuery( comparator as FilterOp, filterCacheKey, filterPath, nodeTypeNames, - filtersCache + filtersCache, + sortFields, + resolvedFields ) } @@ -213,7 +236,9 @@ function collectBucketForElemMatch( filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array>, + sortFields: Array, + resolvedFields: any ): boolean { // Get comparator and target value for this elemMatch let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init) @@ -230,14 +255,15 @@ function collectBucketForElemMatch( break } } - if (!filtersCache.has(filterCacheKey)) { ensureIndexByElemMatch( comparator, filterCacheKey, filter, nodeTypeNames, - filtersCache + filtersCache, + sortFields, + resolvedFields ) } @@ -284,7 +310,8 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { nodeTypeNames, filtersCache, resolvedFields, - stats + stats, + sort ) const sortedResult = sortNodes(result, sort, resolvedFields, stats) @@ -295,7 +322,8 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { ? sortedResult.slice(skip, limit ? skip + (limit ?? 0) : undefined) : sortedResult - return { entries: new GatsbyIterable(entries), totalCount } + const nodeObjects = entries.map(nodeIds => getNode(nodeIds.id)!) + return { entries: new GatsbyIterable(nodeObjects), totalCount } } /** @@ -306,8 +334,9 @@ function convertAndApplyFastFilters( nodeTypeNames: Array, filtersCache: FiltersCache, resolvedFields: Record, - stats: IGraphQLRunnerStats -): Array { + stats: IGraphQLRunnerStats, + sort: ISortParameters +): Array { const filters = filterFields ? prefixResolvedFields( createDbQueriesFromObject(prepareQueryArgs(filterFields)), @@ -333,18 +362,30 @@ function convertAndApplyFastFilters( if (filters.length === 0) { const filterCacheKey = createFilterCacheKey(nodeTypeNames, null) if (!filtersCache.has(filterCacheKey)) { - ensureEmptyFilterCache(filterCacheKey, nodeTypeNames, filtersCache) + ensureEmptyFilterCache( + filterCacheKey, + nodeTypeNames, + filtersCache, + sort?.fields || [], + resolvedFields + ) } // If there's a filter, there (now) must be an entry for this cache key const filterCache = filtersCache.get(filterCacheKey) as IFilterCache // If there is no filter then the ensureCache step will populate this: - const cache = filterCache.meta.orderedByCounter as Array + const cache = filterCache.meta.orderedByCounter as Array return cache.slice(0) } - const result = applyFastFilters(filters, nodeTypeNames, filtersCache) + const result = applyFastFilters( + filters, + nodeTypeNames, + filtersCache, + sort?.fields || [], + resolvedFields + ) if (result) { if (stats) { @@ -388,37 +429,23 @@ function filterToStats( * Returns same reference as input, sorted inline */ function sortNodes( - nodes: Array, - sort: - | { - fields: Array - order: Array - } - | undefined, + nodes: Array, + sort: ISortParameters, resolvedFields: any, stats: IGraphQLRunnerStats -): Array { +): Array { if (!sort || sort.fields?.length === 0 || !nodes || nodes.length === 0) { return nodes } // create functions that return the item to compare on - const dottedFields = objectToDottedField(resolvedFields) - const dottedFieldKeys = Object.keys(dottedFields) - const sortFields = sort.fields.map(field => { - if ( - dottedFields[field] || - dottedFieldKeys.some(key => field.startsWith(key)) - ) { - return `__gatsby_resolved.${field}` - } else { - return field - } - }) + const sortFields = getSortFieldIdentifierKeys(sort.fields, resolvedFields) const sortFns = sortFields.map( field => (v): ((any) => any) => - getValueAt(v, field) + field in v + ? v[field] + : getGatsbyNodePartial(v, sort.fields, resolvedFields)[field] ) const sortOrder = sort.order.map(order => typeof order === `boolean` ? order : order.toLowerCase()