diff --git a/benchmarks/memory/.dockerignore b/benchmarks/memory/.dockerignore
new file mode 100644
index 0000000000000..cbd3fdd9b6b92
--- /dev/null
+++ b/benchmarks/memory/.dockerignore
@@ -0,0 +1,23 @@
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+README.md
diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile
new file mode 100644
index 0000000000000..80f6e52c38966
--- /dev/null
+++ b/benchmarks/memory/Dockerfile
@@ -0,0 +1,14 @@
+FROM node:14-buster
+ENV NODE_ENV=production
+ENV CI=1
+ENV GATSBY_CPU_COUNT=4
+RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y
+RUN npm i -g gatsby-cli gatsby-dev-cli
+WORKDIR /usr/src/app
+RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc
+
+# set up gatsby-dev
+RUN gatsby-dev --set-path-to-repo /usr/src/gatsby
+
+# keep the process running
+ENTRYPOINT ["tail", "-f", "/dev/null"]
\ No newline at end of file
diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md
new file mode 100644
index 0000000000000..445abd8415bf4
--- /dev/null
+++ b/benchmarks/memory/README.md
@@ -0,0 +1,97 @@
+# Gatsby Memory Benchmark
+
+The goal of this benchmark is to test Gatsby's memory usage and look for potential optimizations.
+
+## The Docker Container
+
+The docker container used in these tests sets up a Debian instance with node 14 installed (as well as npm/yarn/etc).
+It has ports 9000 (for hosting gatsby) and 9229 (for debugging) exposed.
+
+Within the container, two points to your local filesystem are mounted:
+
+- /usr/src/gatsby : Your local gatsby repo
+- /usr/src/site : The memory benchmark gatsby site
+
+## Commands
+
+### Docker
+
+These commands are used for interfacing with docker and have built-in utilities for managing the docker container.
+
+#### yarn docker:build
+
+Builds the container used for testing.
+
+#### yarn docker:start
+
+Starts the container built by `yarn docker:build`.
+
+#### yarn docker:connect
+
+Connects to the container started by `yarn docker:start`.
+
+#### yarn docker:start-and-connect
+
+A shorthand for start + connect.
+
+#### yarn docker:stop
+
+Stop the container used for testing.
+
+### Gatsby
+
+These commands are used for interfacing with gatsby.
+
+#### yarn gatsby:build
+
+Simply an alias to `yarn gatsby build`.
+
+#### yarn gatsby:serve
+
+Starts `gatsby serve` on port 9000 and sets the host properly to work inside docker.
+
+#### yarn gatsby:develop
+
+Starts `gatsby develop` on port 9000 and sets the host properly to work inside docker.
+
+#### yarn gatsby:build:debug
+
+Runs `gatsby build` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229.
+
+#### yarn gatsby:develop:debug
+
+Runs `gatsby develop` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229.
+
+## Setup
+
+Currently we can reproduce builds crashing with out default settings
+
+- Docker container running with 2GB limit
+- 300 nodes x ~2MB each = ~600MB of "just" nodes data in each process (number of nodes can be controlled with NUM_NODES env var)
+- 3 workers + main process (GATSBY_CPU_COUNT set to 4 in docker image, but you can specify different value with env var)
+- `eq_field` template using fast filters (single `eq` specifically)
+
+Goal is to make `eq_field` template to not cause crashes, then add next template (different operator) that cause crashes and repeat until all queries can be handled with set memory limits.
+
+### Workflow
+
+While `gatsby-dev` command is available inside docker, from my testing it seems like it doesn't pick up file changes when run there. Workflow that seems to work reliably:
+
+When starting working with this benchmark:
+
+- start `yarn watch` (possibly with `--scope`) in monorepo
+- start `gatsby-dev` outside of docker in benchmark directory (just like with regular site)
+- `yarn docker:connect` to get inside docker
+- `npm rebuild` to rebuild binaries inside docker
+
+And repeat as many times as you want:
+
+- make changes to `gatsby` source code as you normally would
+- run `yarn build` inside docker
+
+## Testing
+
+TODO
+
+- How to configure memory limits
+- Where to look
diff --git a/benchmarks/memory/gatsby-config.js b/benchmarks/memory/gatsby-config.js
new file mode 100644
index 0000000000000..5ae66ab282a51
--- /dev/null
+++ b/benchmarks/memory/gatsby-config.js
@@ -0,0 +1,3 @@
+module.exports = {
+ plugins: [],
+}
diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js
new file mode 100644
index 0000000000000..f020ac0079ba0
--- /dev/null
+++ b/benchmarks/memory/gatsby-node.js
@@ -0,0 +1,226 @@
+const { cpuCoreCount } = require(`gatsby-core-utils`)
+
+const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10)
+
+const NUM_KEYS_IN_LARGE_SIZE_OBJ = 1024
+
+exports.sourceNodes = async ({ actions, reporter }) => {
+ const contentDigest = Date.now().toString() // make each sourcing mark everything as dirty
+
+ const activity = reporter.createProgress(`Creating test nodes`, NUM_NODES)
+ activity.start()
+
+ for (let i = 0; i < NUM_NODES; i++) {
+ const largeSizeObj = {}
+ for (let j = 1; j <= NUM_KEYS_IN_LARGE_SIZE_OBJ; j++) {
+ largeSizeObj[`key_${j}`] = `x`.repeat(1024)
+ }
+
+ // each node is ~2MB
+ const node = {
+ id: `memory-${i}`,
+ idClone: `memory-${i}`,
+ fooBar: [`foo`, `bar`, `baz`, `foobar`][i % 4],
+ number1: i,
+ number2: NUM_NODES - i,
+ number3: i % 20,
+ largeSizeObj,
+ largeSizeString: `x`.repeat(1024 * 1024),
+ internal: {
+ contentDigest,
+ type: `Test`,
+ },
+ }
+
+ actions.createNode(node)
+
+ if (i % 100 === 99) {
+ activity.tick(100)
+ await new Promise(resolve => setImmediate(resolve))
+ }
+ }
+
+ activity.tick(NUM_NODES % 100)
+
+ await new Promise(resolve => setTimeout(resolve, 100))
+
+ activity.end()
+}
+
+exports.createSchemaCustomization = ({ actions, schema }) => {
+ actions.createTypes([
+ schema.buildObjectType({
+ name: `TestLargeSizeObj`,
+ fields: Object.fromEntries(
+ new Array(NUM_KEYS_IN_LARGE_SIZE_OBJ)
+ .fill(`String`)
+ .map((value, index) => [`key_${index + 1}`, value])
+ ),
+ }),
+ schema.buildObjectType({
+ name: `Test`,
+ fields: {
+ idClone: `String`,
+ fooBar: `String`,
+ number1: `Int`,
+ number2: `Int`,
+ number3: `Int`,
+ largeSizeString: `String`,
+ largeSizeObj: `TestLargeSizeObj`,
+ idCloneWithResolver: {
+ type: `String`,
+ resolve: source => {
+ return source.idClone
+ },
+ },
+ },
+ interfaces: ["Node"],
+ extensions: {
+ infer: false,
+ },
+ }),
+ ])
+}
+
+const printedMessages = new Set()
+exports.createResolvers = ({ createResolvers }) => {
+ createResolvers({
+ Query: {
+ workerInfo: {
+ type: `String`,
+ args: {
+ label: `String!`,
+ },
+ resolve: (_, args) => {
+ const msg = `${args.label} on ${
+ process.env.GATSBY_WORKER_ID
+ ? `worker #${process.env.GATSBY_WORKER_ID}`
+ : `main`
+ }`
+ if (!printedMessages.has(msg)) {
+ printedMessages.add(msg)
+ console.log(msg)
+ }
+ return msg
+ },
+ },
+ },
+ })
+}
+
+const WORKER_BATCH_SIZE =
+ Number(process.env.GATSBY_PARALLEL_QUERY_CHUNK_SIZE) || 50
+
+let enabledTemplates = new Set()
+exports.onPreBootstrap = () => {
+ const availableTemplates = new Set([
+ `eq_id`, // this should skip node-model and fast filters completely and should be very cheap already
+ `eq_field`, // this needs fast filters for eq operator on non-id field
+ `eq_field_with_resolver`, // / this needs fast filters for eq operator on non-id field + materialization
+ `ne_field_collection_sort_skip_limit`, // collection query to check code path applying sorting and skip/limit
+ ])
+ enabledTemplates = new Set(
+ process.env.TEMPLATES
+ ? process.env.TEMPLATES.split(`,`).filter(template =>
+ availableTemplates.has(template)
+ )
+ : availableTemplates
+ )
+
+ console.info(`Enabled templates`, enabledTemplates)
+}
+
+exports.createPages = async ({ actions, graphql }) => {
+ const numWorkers = Math.max(1, cpuCoreCount() - 1)
+
+ // we do want ALL available workers to execute each query type
+ const minNumOfPagesToSaturateAllWorkers = WORKER_BATCH_SIZE * numWorkers
+
+ const { data } = await graphql(`
+ {
+ allTest {
+ nodes {
+ id
+ idClone
+ }
+ }
+ }
+ `)
+
+ // we might need to "duplicate" pages if node count is less than number of needed pages
+ const repeatCount = Math.min(
+ 1,
+ Math.ceil(minNumOfPagesToSaturateAllWorkers / data.allTest.nodes.length)
+ )
+
+ function createEnoughToSaturate(template, cb) {
+ if (!enabledTemplates.has(template)) {
+ return
+ }
+ console.log(`Creating pages with template "${template}"`)
+ let counter = 0
+ for (let i = 0; i < repeatCount; i++) {
+ let j = 0
+ for (const node of data.allTest.nodes) {
+ const { context } = cb(node, j)
+
+ actions.createPage({
+ path: `/${template}/${counter++}`,
+ component: require.resolve(`./src/templates/${template}`),
+ context,
+ })
+
+ if (counter >= minNumOfPagesToSaturateAllWorkers) {
+ break
+ }
+
+ j++
+ }
+ }
+ }
+
+ // fast path (eq: { id: x })
+ createEnoughToSaturate(`eq_id`, node => {
+ return {
+ context: {
+ id: node.id,
+ },
+ }
+ })
+
+ // (eq: { idClone: x })
+ createEnoughToSaturate(`eq_field`, node => {
+ return {
+ context: {
+ id: node.id,
+ },
+ }
+ })
+
+ // (eq: { idCloneWithResolver: x })
+ createEnoughToSaturate(`eq_field_with_resolver`, node => {
+ return {
+ context: {
+ id: node.id,
+ },
+ }
+ })
+
+ // allTest(
+ // filter: { idClone: { ne: $id } }
+ // sort: { fields: [number3], order: [ASC] }
+ // limit: 10
+ // skip: $skip
+ // )
+ createEnoughToSaturate(
+ `ne_field_collection_sort_skip_limit`,
+ (node, index) => {
+ return {
+ context: {
+ id: node.id,
+ skip: Math.max(index, NUM_NODES - 10), // limit is set to 10, so just setting upper bound so queries for last nodes do have 10 items
+ },
+ }
+ }
+ )
+}
diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json
new file mode 100644
index 0000000000000..2d63ab39c23be
--- /dev/null
+++ b/benchmarks/memory/package.json
@@ -0,0 +1,32 @@
+{
+ "name": "memory-usage-benchmark",
+ "private": true,
+ "version": "1.0.0",
+ "description": "Test site stress testing memory usage",
+ "license": "MIT",
+ "scripts": {
+ "gatsby:build": "yarn gatsby build",
+ "gatsby:serve": "yarn gatsby serve -H 0.0.0.0 -p 9000",
+ "gatsby:develop": "NODE_ENV=development yarn gatsby develop -H 0.0.0.0 -p 9000",
+ "gatsby:build:debug": "node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby build",
+ "gatsby:develop:debug": "NODE_ENV=development node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby develop -H 0.0.0.0 -p 9000",
+ "docker:build": "docker build -t gatsby-memory .",
+ "docker:start": "./scripts/docker-start",
+ "docker:connect": "./scripts/docker-connect",
+ "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect",
+ "docker:stop": "./scripts/docker-stop",
+ "docker:stats": "./scripts/docker-stats"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/gatsbyjs/gatsby/tree/master/benchmarks/memory"
+ },
+ "bugs": {
+ "url": "https://github.com/gatsbyjs/gatsby/issues"
+ },
+ "dependencies": {
+ "gatsby": "^4",
+ "react": "^17.0.2",
+ "react-dom": "^17.0.2"
+ }
+}
diff --git a/benchmarks/memory/scripts/docker-connect b/benchmarks/memory/scripts/docker-connect
new file mode 100755
index 0000000000000..af6582a97d6f8
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-connect
@@ -0,0 +1,9 @@
+DOCKER_ID=$(./scripts/docker-get-id)
+
+if [ -z "$DOCKER_ID" ]; then
+ echo "\nNo gatsby-memory is running. Start one with \`yarn docker:start\`.\n"
+ return 1
+fi
+
+echo "Connecting to container $DOCKER_ID...\n"
+docker exec -it $DOCKER_ID bash
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-get-id b/benchmarks/memory/scripts/docker-get-id
new file mode 100755
index 0000000000000..064e21e32607c
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-get-id
@@ -0,0 +1,8 @@
+DOCKER_ID=$(\
+ docker ps --format '{{.Image}}:{{.ID}}' | \
+ grep "gatsby-memory" | \
+ head -n 1 | \
+ sed 's/gatsby\-memory://'\
+)
+
+echo $DOCKER_ID
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start
new file mode 100755
index 0000000000000..235d3526b4d9b
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-start
@@ -0,0 +1,20 @@
+DOCKER_ID=$(./scripts/docker-get-id)
+if [ -n "$DOCKER_ID" ]; then
+ echo "\nA gatsby-memory container is already running with id $DOCKER_ID."
+ echo "Please use that container, or run \`yarn docker:stop\` to stop it.\n"
+ return 1
+fi
+
+DOCKER_ID=$(\
+ docker run -td \
+ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \
+ --mount type=bind,source="$(pwd)",target=/usr/src/app \
+ --publish 9229:9229 \
+ --publish 9000:9000 \
+ --memory="2g" \
+ --memory-swap="2g" \
+ gatsby-memory \
+ | head -c 12 \
+)
+
+echo "\nStarted container id ${DOCKER_ID}! Run \`yarn docker:connect\` to connect to the container.\n"
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-stats b/benchmarks/memory/scripts/docker-stats
new file mode 100755
index 0000000000000..9fb96494108b7
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-stats
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+DOCKER_ID=$(./scripts/docker-get-id)
+if [ -z "$DOCKER_ID" ]; then
+ echo -e "\nNo gatsby-memory container was found. Run \`yarn docker:start\` to start one.\n"
+ exit 1
+fi
+
+FORMAT="Gatsby Memory Benchmark Container----CPU: {{.CPUPerc }}--Memory: {{.MemUsage}}--Network: {{.NetIO}}"
+STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT")
+clear
+
+while [ -n "$STATS" ]; do
+ echo $STATS | sed "s/--/\n/g"
+ DOCKER_ID=$(./scripts/docker-get-id)
+ STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT")
+ clear
+done
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-stop b/benchmarks/memory/scripts/docker-stop
new file mode 100755
index 0000000000000..95dbec9e55704
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-stop
@@ -0,0 +1,9 @@
+DOCKER_ID=$(./scripts/docker-get-id)
+
+if [ -z "$DOCKER_ID" ]; then
+ echo "\nNo gatsby-memory is running.\n"
+ return 1
+fi
+
+DOCKER_ID=$(docker kill $DOCKER_ID)
+echo "\nStopped container $DOCKER_ID.\n"
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker
new file mode 100755
index 0000000000000..43ede33d240db
--- /dev/null
+++ b/benchmarks/memory/scripts/enforce-docker
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+if [ ! -f /.dockerenv ]; then
+ DOCKER_ID=$(./scripts/docker-get-id)
+ COMMAND="start-and-connect"
+ if [ -n "$DOCKER_ID" ]; then
+ COMMAND="connect"
+ fi
+ echo -e "\nThis must be run inside the docker container. Please run \`yarn docker:${COMMAND}\` and try again.\n"
+ exit 1
+fi
+
+${@:1}
\ No newline at end of file
diff --git a/benchmarks/memory/src/pages/index.js b/benchmarks/memory/src/pages/index.js
new file mode 100644
index 0000000000000..8729fdc41578e
--- /dev/null
+++ b/benchmarks/memory/src/pages/index.js
@@ -0,0 +1,5 @@
+import React from "react"
+
+export default function Home() {
+ return
Hello world!
+}
diff --git a/benchmarks/memory/src/templates/eq_field.js b/benchmarks/memory/src/templates/eq_field.js
new file mode 100644
index 0000000000000..c881ada4c05f3
--- /dev/null
+++ b/benchmarks/memory/src/templates/eq_field.js
@@ -0,0 +1,20 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+ return (
+
+
{JSON.stringify(data, null, 2)}
+
+ )
+}
+
+export const q = graphql`
+ query ($id: String!) {
+ test(idClone: { eq: $id }) {
+ id
+ fooBar
+ }
+ workerInfo(label: "eq-field")
+ }
+`
diff --git a/benchmarks/memory/src/templates/eq_field_with_resolver.js b/benchmarks/memory/src/templates/eq_field_with_resolver.js
new file mode 100644
index 0000000000000..ffc066340c721
--- /dev/null
+++ b/benchmarks/memory/src/templates/eq_field_with_resolver.js
@@ -0,0 +1,20 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+ return (
+
+
{JSON.stringify(data, null, 2)}
+
+ )
+}
+
+export const q = graphql`
+ query ($id: String!) {
+ test(idCloneWithResolver: { eq: $id }) {
+ id
+ fooBar
+ }
+ workerInfo(label: "eq-field-with-resolver")
+ }
+`
diff --git a/benchmarks/memory/src/templates/eq_id.js b/benchmarks/memory/src/templates/eq_id.js
new file mode 100644
index 0000000000000..3bca139fc3c26
--- /dev/null
+++ b/benchmarks/memory/src/templates/eq_id.js
@@ -0,0 +1,20 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+ return (
+
+
{JSON.stringify(data, null, 2)}
+
+ )
+}
+
+export const q = graphql`
+ query ($id: String!) {
+ test(id: { eq: $id }) {
+ id
+ fooBar
+ }
+ workerInfo(label: "eq-id")
+ }
+`
diff --git a/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js b/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js
new file mode 100644
index 0000000000000..a57663a40e0cd
--- /dev/null
+++ b/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js
@@ -0,0 +1,27 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+ return (
+
+
{JSON.stringify(data, null, 2)}
+
+ )
+}
+
+export const q = graphql`
+ query ($id: String!, $skip: Int!) {
+ allTest(
+ filter: { idClone: { ne: $id } }
+ sort: { fields: [number3], order: [ASC] }
+ limit: 10
+ skip: $skip
+ ) {
+ nodes {
+ id
+ fooBar
+ }
+ }
+ workerInfo(label: "ne-field-collection-sort-skip-limit")
+ }
+`
diff --git a/integration-tests/cache-resilience/gatsby-node.js b/integration-tests/cache-resilience/gatsby-node.js
index 21ab949eb533a..3847cc1170211 100644
--- a/integration-tests/cache-resilience/gatsby-node.js
+++ b/integration-tests/cache-resilience/gatsby-node.js
@@ -3,7 +3,7 @@ const v8 = require(`v8`)
const glob = require(`glob`)
const path = require(`path`)
const _ = require(`lodash`)
-const { open } = require(`lmdb-store`)
+const { open } = require(`lmdb`)
const { saveState } = require(`gatsby/dist/redux/save-state`)
diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json
index fb28bf3df283f..305897fceae42 100644
--- a/packages/gatsby/package.json
+++ b/packages/gatsby/package.json
@@ -101,7 +101,7 @@
"joi": "^17.4.2",
"json-loader": "^0.5.7",
"latest-version": "5.1.0",
- "lmdb-store": "^1.6.11",
+ "lmdb": "2.1.7",
"lodash": "^4.17.21",
"md5-file": "^5.0.0",
"meant": "^1.0.3",
diff --git a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js
index 98b84a406a471..8b20b188a3e5f 100644
--- a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js
+++ b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js
@@ -407,7 +407,7 @@ describe(`applyFastFilters`, () => {
expect(result.length).toEqual(2)
result.map(node => {
- expect(node.slog).toEqual(`def`)
+ expect(getNode(node.id).slog).toEqual(`def`)
})
})
@@ -425,7 +425,7 @@ describe(`applyFastFilters`, () => {
expect(result.length).toEqual(2)
result.map(node => {
- expect(node.deep.flat.search.chain).toEqual(300)
+ expect(getNode(node.id).deep.flat.search.chain).toEqual(300)
})
})
@@ -444,8 +444,8 @@ describe(`applyFastFilters`, () => {
// Count is irrelevant as long as it is non-zero and they all match filter
expect(Array.isArray(results)).toBe(true)
expect(results.length).toEqual(1)
- expect(results[0].slog).toEqual(`def`)
- expect(results[0].deep.flat.search.chain).toEqual(300)
+ expect(getNode(results[0].id).slog).toEqual(`def`)
+ expect(getNode(results[0].id).deep.flat.search.chain).toEqual(300)
})
it(`supports elemMatch`, () => {
diff --git a/packages/gatsby/src/datastore/common/iterable.ts b/packages/gatsby/src/datastore/common/iterable.ts
index 2948c6378e172..8e1abe698b906 100644
--- a/packages/gatsby/src/datastore/common/iterable.ts
+++ b/packages/gatsby/src/datastore/common/iterable.ts
@@ -1,3 +1,5 @@
+// @ts-ignore
+import { clearKeptObjects } from "lmdb"
/**
* Wrapper for any iterable providing chainable interface and convenience methods
* similar to array.
@@ -10,10 +12,18 @@
export class GatsbyIterable {
constructor(private source: Iterable | (() => Iterable)) {}
- [Symbol.iterator](): Iterator {
+ *[Symbol.iterator](): Generator {
const source =
typeof this.source === `function` ? this.source() : this.source
- return source[Symbol.iterator]()
+
+ let i = 0
+ for (const val of source) {
+ yield val
+
+ if (++i % 100 === 0) {
+ clearKeptObjects()
+ }
+ }
}
concat(other: Iterable): GatsbyIterable {
diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts
index 7d6acc42c46e5..5f9c072fbb42b 100644
--- a/packages/gatsby/src/datastore/in-memory/indexing.ts
+++ b/packages/gatsby/src/datastore/in-memory/indexing.ts
@@ -5,7 +5,7 @@ import {
FilterValue,
FilterValueNullable,
} from "../common/query"
-import { getDataStore } from "../"
+import { getDataStore, getNode } from "../"
// Only list supported ops here. "CacheableFilterOp"
export type FilterOp = // TODO: merge with DbComparator ?
@@ -21,6 +21,39 @@ export type FilterOp = // TODO: merge with DbComparator ?
// Note: `undefined` is an encoding for a property that does not exist
export type FilterCacheKey = string
+type GatsbyNodeID = string
+
+export interface IGatsbyNodeIdentifiers {
+ id: GatsbyNodeID
+ counter: number
+}
+
+const nodeIdToIdentifierMap = new Map<
+ GatsbyNodeID,
+ WeakRef
+>()
+
+const getIdentifierObjectFromNode = (
+ node: IGatsbyNode
+): IGatsbyNodeIdentifiers => {
+ const cacheKey = `${node.id}_____${node.internal.counter}`
+ if (nodeIdToIdentifierMap.has(cacheKey)) {
+ const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref()
+ if (maybeStillExist) {
+ return maybeStillExist
+ }
+ }
+
+ const identifier = { id: node.id, counter: node.internal.counter }
+ nodeIdToIdentifierMap.set(cacheKey, new WeakRef(identifier))
+ return identifier
+}
+
+const sortByIds = (
+ a: IGatsbyNodeIdentifiers,
+ b: IGatsbyNodeIdentifiers
+): number => a.counter - b.counter
+
export interface IFilterCache {
op: FilterOp
// In this map `undefined` values represent nodes that did not have the path
@@ -30,22 +63,22 @@ export interface IFilterCache {
// This arrays may contain duplicates (!) because those only get filtered in the
// last step.
// TODO: We might decide to make sure these buckets _are_ deduped for eq perf
- byValue: Map>
+ byValue: Map>
meta: {
// Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort
- nodesUnordered?: Array
+ nodesUnordered?: Array
// Flat list of all nodes by requested types, ordered by counter (cached for empty filters)
- orderedByCounter?: Array
+ orderedByCounter?: Array
// Ordered list of all values (by `<`) found by this filter. No null / undefs
valuesAsc?: Array
// Flat list of nodes, ordered by valueAsc
- nodesByValueAsc?: Array
+ nodesByValueAsc?: Array
// Ranges of nodes per value, maps to the nodesByValueAsc array
valueRangesAsc?: Map
// Ordered list of all values (by `>`) found by this filter. No null / undefs
valuesDesc?: Array
// Flat list of nodes, ordered by valueDesc
- nodesByValueDesc?: Array
+ nodesByValueDesc?: Array
// Ranges of nodes per value, maps to the nodesByValueDesc array
valueRangesDesc?: Map
}
@@ -59,7 +92,7 @@ export function postIndexingMetaSetup(
// Loop through byValue and make sure the buckets are sorted by counter
// Since we don't do insertion sort, we have to do it afterwards
for (const bucket of filterCache.byValue) {
- bucket[1].sort((a, b) => a.internal.counter - b.internal.counter)
+ bucket[1].sort(sortByIds)
}
if (op === `$ne` || op === `$nin`) {
@@ -79,15 +112,14 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void {
// including nodes where the value is null.
// A $nin does the same as an $ne except it filters multiple values instead
// of just one.
-
// For `$ne` we will take the list of all targeted nodes and eliminate the
// bucket of nodes with a particular value, if it exists at all..
- const arr: Array = []
+ const arr: Array = []
filterCache.meta.nodesUnordered = arr
filterCache.byValue.forEach(v => {
- v.forEach(node => {
- arr.push(node)
+ v.forEach(nodeId => {
+ arr.push(nodeId)
})
})
}
@@ -101,15 +133,15 @@ function postIndexingMetaSetupLtLteGtGte(
// internal.counter, asc.
// This way non-eq ops can simply slice the array to get a range.
- const entriesNullable: Array<[FilterValueNullable, Array]> = [
- ...filterCache.byValue.entries(),
- ]
+ const entriesNullable: Array<
+ [FilterValueNullable, Array]
+ > = [...filterCache.byValue.entries()]
// These range checks never return `null` or `undefined` so filter those out
// By filtering them out early, the sort should be faster. Could be ...
- const entries: Array<[FilterValue, Array]> =
+ const entries: Array<[FilterValue, Array]> =
entriesNullable.filter(([v]) => v != null) as Array<
- [FilterValue, Array]
+ [FilterValue, Array]
>
// Sort all arrays by its value, asc. Ignore/allow potential type casting.
@@ -133,19 +165,21 @@ function postIndexingMetaSetupLtLteGtGte(
entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0))
}
- const orderedNodes: Array = []
+ const orderedNodes: Array = []
const orderedValues: Array = []
const offsets: Map = new Map()
- entries.forEach(([v, bucket]: [FilterValue, Array]) => {
- // Record the range containing all nodes with as filter value v
- // The last value of the range should be the offset of the next value
- // (So you should be able to do `nodes.slice(start, stop)` to get them)
- offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length])
- // We could do `arr.push(...bucket)` here but that's not safe with very
- // large sets, so we use a regular loop
- bucket.forEach(node => orderedNodes.push(node))
- orderedValues.push(v)
- })
+ entries.forEach(
+ ([v, bucket]: [FilterValue, Array]) => {
+ // Record the range containing all nodes with as filter value v
+ // The last value of the range should be the offset of the next value
+ // (So you should be able to do `nodes.slice(start, stop)` to get them)
+ offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length])
+ // We could do `arr.push(...bucket)` here but that's not safe with very
+ // large sets, so we use a regular loop
+ bucket.forEach(node => orderedNodes.push(node))
+ orderedValues.push(v)
+ }
+ )
if (op === `$lt` || op === `$lte`) {
filterCache.meta.valuesAsc = orderedValues
@@ -178,12 +212,19 @@ export const ensureIndexByQuery = (
nodeTypeNames: Array,
filtersCache: FiltersCache
): void => {
+ const readableWorkerId = process.env.GATSBY_WORKER_ID
+ ? `worker #${process.env.GATSBY_WORKER_ID}`
+ : `main`
+
+ console.log(
+ `ensureIndexByQuery "${filterCacheKey}" start ${readableWorkerId}`
+ )
const state = store.getState()
const resolvedNodesCache = state.resolvedNodesCache
const filterCache: IFilterCache = {
op,
- byValue: new Map>(),
+ byValue: new Map>(),
meta: {},
} as IFilterCache
filtersCache.set(filterCacheKey, filterCache)
@@ -213,6 +254,8 @@ export const ensureIndexByQuery = (
}
postIndexingMetaSetup(filterCache, op)
+
+ console.log(`ensureIndexByQuery "${filterCacheKey}" end ${readableWorkerId}`)
}
export function ensureEmptyFilterCache(
@@ -226,11 +269,11 @@ export function ensureEmptyFilterCache(
const state = store.getState()
const resolvedNodesCache = state.resolvedNodesCache
- const orderedByCounter: Array = []
+ const orderedByCounter: Array = []
filtersCache.set(filterCacheKey, {
op: `$eq`, // Ignore.
- byValue: new Map>(),
+ byValue: new Map>(),
meta: {
orderedByCounter, // This is what we want
},
@@ -248,7 +291,7 @@ export function ensureEmptyFilterCache(
node.__gatsby_resolved = resolved
}
}
- orderedByCounter.push(node)
+ orderedByCounter.push(getIdentifierObjectFromNode(node))
})
} else {
// Here we must first filter for the node type
@@ -265,14 +308,14 @@ export function ensureEmptyFilterCache(
node.__gatsby_resolved = resolved
}
}
- orderedByCounter.push(node)
+ orderedByCounter.push(getIdentifierObjectFromNode(node))
}
})
}
// Since each node can only have one type, we shouldn't have to be concerned
// about duplicates in this array. Just make sure they're sorted.
- orderedByCounter.sort((a, b) => a.internal.counter - b.internal.counter)
+ orderedByCounter.sort(sortByIds)
}
function addNodeToFilterCache(
@@ -335,7 +378,7 @@ function markNodeForValue(
arr = []
filterCache.byValue.set(value, arr)
}
- arr.push(node)
+ arr.push(getIdentifierObjectFromNode(node))
}
export const ensureIndexByElemMatch = (
@@ -353,7 +396,7 @@ export const ensureIndexByElemMatch = (
const filterCache: IFilterCache = {
op,
- byValue: new Map>(),
+ byValue: new Map>(),
meta: {},
} as IFilterCache
filtersCache.set(filterCacheKey, filterCache)
@@ -540,7 +583,7 @@ export const getNodesFromCacheByValue = (
filterValue: FilterValueNullable,
filtersCache: FiltersCache,
wasElemMatch
-): Array | undefined => {
+): Array | undefined => {
const filterCache = filtersCache.get(filterCacheKey)
if (!filterCache) {
return undefined
@@ -573,7 +616,7 @@ export const getNodesFromCacheByValue = (
}
const filterValueArr: Array = filterValue
- const set: Set = new Set()
+ const set: Set = new Set()
// TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works.
// For every value in the needle array, find the bucket of nodes for
@@ -583,7 +626,7 @@ export const getNodesFromCacheByValue = (
)
const arr = [...set] // this is bad for perf but will guarantee us a unique set :(
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// Note: it's very unlikely that the list of filter values is big so .includes should be fine here
if (filterValueArr.includes(null)) {
@@ -622,7 +665,7 @@ export const getNodesFromCacheByValue = (
// TODO: there's probably a more efficient algorithm to do set
// subtraction in such a way that we don't have to re-sort
- return [...set].sort((A, B) => A.internal.counter - B.internal.counter)
+ return [...set].sort(sortByIds)
}
if (op === `$ne`) {
@@ -632,7 +675,7 @@ export const getNodesFromCacheByValue = (
// TODO: there's probably a more efficient algorithm to do set
// subtraction in such a way that we don't have to resort here
- return [...set].sort((A, B) => A.internal.counter - B.internal.counter)
+ return [...set].sort(sortByIds)
}
if (op === `$regex`) {
@@ -649,7 +692,7 @@ export const getNodesFromCacheByValue = (
}
const regex = filterValue
- const arr: Array = []
+ const arr: Array = []
filterCache.byValue.forEach((nodes, value) => {
// TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce.
// Note: for legacy reasons partial paths should also be included for regex
@@ -661,7 +704,7 @@ export const getNodesFromCacheByValue = (
// TODO: we _can_ cache this list as well. Might make sense if it turns out that $regex is mostly used with literals
// TODO: it may make sense to first collect all buckets and then to .concat them, or merge sort them
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
@@ -706,7 +749,7 @@ export const getNodesFromCacheByValue = (
const range = ranges!.get(filterValue)
if (range) {
const arr = nodes!.slice(0, range[0])
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -746,7 +789,7 @@ export const getNodesFromCacheByValue = (
// So we have to consider weak comparison and may have to include the pivot
const until = pivotValue < filterValue ? inclPivot : exclPivot
const arr = nodes!.slice(0, until)
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -764,7 +807,7 @@ export const getNodesFromCacheByValue = (
const range = ranges!.get(filterValue)
if (range) {
const arr = nodes!.slice(0, range[1])
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -804,7 +847,7 @@ export const getNodesFromCacheByValue = (
// So we have to consider weak comparison and may have to include the pivot
const until = pivotValue <= filterValue ? inclPivot : exclPivot
const arr = nodes!.slice(0, until)
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -822,7 +865,7 @@ export const getNodesFromCacheByValue = (
const range = ranges!.get(filterValue)
if (range) {
const arr = nodes!.slice(0, range[0]).reverse()
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -862,7 +905,7 @@ export const getNodesFromCacheByValue = (
// So we have to consider weak comparison and may have to include the pivot
const until = pivotValue > filterValue ? inclPivot : exclPivot
const arr = nodes!.slice(0, until).reverse()
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -880,7 +923,7 @@ export const getNodesFromCacheByValue = (
const range = ranges!.get(filterValue)
if (range) {
const arr = nodes!.slice(0, range[1]).reverse()
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -920,7 +963,7 @@ export const getNodesFromCacheByValue = (
// So we have to consider weak comparison and may have to include the pivot
const until = pivotValue >= filterValue ? inclPivot : exclPivot
const arr = nodes!.slice(0, until).reverse()
- arr.sort((A, B) => A.internal.counter - B.internal.counter)
+ arr.sort(sortByIds)
// elemMatch can cause a node to appear in multiple buckets so we must dedupe
if (wasElemMatch) {
expensiveDedupeInline(arr)
@@ -935,7 +978,7 @@ export const getNodesFromCacheByValue = (
function removeBucketFromSet(
filterValue: FilterValueNullable,
filterCache: IFilterCache,
- set: Set
+ set: Set
): void {
if (filterValue === null) {
// Edge case: $ne with `null` returns only the nodes that contain the full
@@ -960,22 +1003,24 @@ function removeBucketFromSet(
* list that is also ordered by node.internal.counter
*/
export function intersectNodesByCounter(
- a: Array,
- b: Array
-): Array {
+ a: Array,
+ b: Array
+): Array {
let pointerA = 0
let pointerB = 0
// TODO: perf check: is it helpful to init the array to min(maxA,maxB) items?
- const result: Array = []
+ const result: Array = []
const maxA = a.length
const maxB = b.length
let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list
+ // TODO some optimization could be done here to not call getNode
+
while (pointerA < maxA && pointerB < maxB) {
- const nodeA = a[pointerA]
- const nodeB = b[pointerB]
- const counterA = nodeA.internal.counter
- const counterB = nodeB.internal.counter
+ const nodeA = getNode(a[pointerA].id)
+ const nodeB = getNode(b[pointerB].id)
+ const counterA = a[pointerA].counter
+ const counterB = b[pointerB].counter
if (counterA < counterB) {
pointerA++
@@ -992,7 +1037,7 @@ export function intersectNodesByCounter(
// back to back, so even if both input arrays contained the same node
// twice, this check would prevent the result from getting duplicate nodes
if (lastAdded !== nodeA) {
- result.push(nodeA)
+ result.push(a[pointerA])
lastAdded = nodeA
}
pointerA++
@@ -1011,39 +1056,41 @@ export function intersectNodesByCounter(
* list that is also ordered by node.internal.counter
*/
export function unionNodesByCounter(
- a: Array,
- b: Array
-): Array {
+ a: Array,
+ b: Array
+): Array {
// TODO: perf check: is it helpful to init the array to max(maxA,maxB) items?
- const arr: Array = []
+ const arr: Array = []
let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list
+ // TODO some optimization could be done here to not call getNode
+
let pointerA = 0
let pointerB = 0
const maxA = a.length
const maxB = b.length
while (pointerA < maxA && pointerB < maxB) {
- const nodeA = a[pointerA]
- const nodeB = b[pointerB]
+ const nodeA = getNode(a[pointerA].id)!
+ const nodeB = getNode(b[pointerB].id)!
const counterA = nodeA.internal.counter
const counterB = nodeB.internal.counter
if (counterA < counterB) {
if (lastAdded !== nodeA) {
- arr.push(nodeA)
+ arr.push(a[pointerA])
lastAdded = nodeA
}
pointerA++
} else if (counterA > counterB) {
if (lastAdded !== nodeB) {
- arr.push(nodeB)
+ arr.push(b[pointerB])
lastAdded = nodeB
}
pointerB++
} else {
if (lastAdded !== nodeA) {
- arr.push(nodeA)
+ arr.push(a[pointerA])
lastAdded = nodeA
}
pointerA++
@@ -1052,18 +1099,18 @@ export function unionNodesByCounter(
}
while (pointerA < maxA) {
- const nodeA = a[pointerA]
+ const nodeA = getNode(a[pointerA].id)!
if (lastAdded !== nodeA) {
- arr.push(nodeA)
+ arr.push(a[pointerA])
lastAdded = nodeA
}
pointerA++
}
while (pointerB < maxB) {
- const nodeB = b[pointerB]
+ const nodeB = getNode(b[pointerB].id)!
if (lastAdded !== nodeB) {
- arr.push(nodeB)
+ arr.push(b[pointerB])
lastAdded = nodeB
}
pointerB++
@@ -1072,11 +1119,11 @@ export function unionNodesByCounter(
return arr
}
-function expensiveDedupeInline(arr: Array): void {
+function expensiveDedupeInline(arr: Array): void {
// An elemMatch filter may cause duplicates to appear in a bucket.
// Since the bucket is sorted those should now be back to back
// Worst case this is a fast O(n) loop that does nothing.
- let prev: IGatsbyNode | undefined = undefined
+ let prev: IGatsbyNodeIdentifiers | undefined = undefined
// We copy-on-find because a splice is expensive and we can't use Sets
diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts
index 53eaeddce63d6..e76901d9a084b 100644
--- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts
+++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts
@@ -22,11 +22,18 @@ import {
getNodesFromCacheByValue,
intersectNodesByCounter,
IFilterCache,
+ IGatsbyNodeIdentifiers,
} from "./indexing"
import { IGraphQLRunnerStats } from "../../query/types"
import { IRunQueryArgs, IQueryResult } from "../types"
import { GatsbyIterable } from "../common/iterable"
+import { getNode } from "../"
+// @ts-ignore
+import { clearKeptObjects } from "lmdb"
+function isGatsbyNode(node: IGatsbyNode | undefined): node is IGatsbyNode {
+ return !!node
+}
export interface IRunFilterArg extends IRunQueryArgs {
filtersCache: FiltersCache
}
@@ -74,7 +81,7 @@ export function applyFastFilters(
filters: Array,
nodeTypeNames: Array,
filtersCache: FiltersCache
-): Array | null {
+): Array | null {
if (!filtersCache) {
// If no filter cache is passed on, explicitly don't use one
return null
@@ -101,8 +108,10 @@ export function applyFastFilters(
while (nodesPerValueArrs.length > 1) {
// TS limitation: cannot guard against .pop(), so we must double cast
- const a = nodesPerValueArrs.pop() as unknown as Array
- const b = nodesPerValueArrs.pop() as unknown as Array
+ const a =
+ nodesPerValueArrs.pop() as unknown as Array
+ const b =
+ nodesPerValueArrs.pop() as unknown as Array
nodesPerValueArrs.push(intersectNodesByCounter(a, b))
}
@@ -124,8 +133,8 @@ function getBucketsForFilters(
filters: Array,
nodeTypeNames: Array,
filtersCache: FiltersCache
-): Array> | undefined {
- const nodesPerValueArrs: Array> = []
+): Array> | undefined {
+ const nodesPerValueArrs: Array> = []
// Fail fast while trying to create and get the value-cache for each path
const every = filters.every(filter => {
@@ -170,7 +179,7 @@ function getBucketsForQueryFilter(
filter: IDbQueryQuery,
nodeTypeNames: Array,
filtersCache: FiltersCache,
- nodesPerValueArrs: Array>
+ nodesPerValueArrs: Array>
): boolean {
const {
path: filterPath,
@@ -187,6 +196,14 @@ function getBucketsForQueryFilter(
)
}
+ const readableWorkerId = process.env.GATSBY_WORKER_ID
+ ? `worker #${process.env.GATSBY_WORKER_ID}`
+ : `main`
+
+ console.log(
+ `getBucketsForQueryFilter "${filterCacheKey}" start ${readableWorkerId}`
+ )
+
const nodesPerValue = getNodesFromCacheByValue(
filterCacheKey,
filterValue as FilterValueNullable,
@@ -202,6 +219,10 @@ function getBucketsForQueryFilter(
// mechanism does not create an array unless there's a IGatsbyNode for it
nodesPerValueArrs.push(nodesPerValue)
+ console.log(
+ `getBucketsForQueryFilter "${filterCacheKey}" end ${readableWorkerId}`
+ )
+
return true
}
@@ -213,7 +234,7 @@ function collectBucketForElemMatch(
filter: IDbQueryElemMatch,
nodeTypeNames: Array,
filtersCache: FiltersCache,
- nodesPerValueArrs: Array>
+ nodesPerValueArrs: Array>
): boolean {
// Get comparator and target value for this elemMatch
let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init)
@@ -295,7 +316,12 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult {
? sortedResult.slice(skip, limit ? skip + (limit ?? 0) : undefined)
: sortedResult
- return { entries: new GatsbyIterable(entries), totalCount }
+ return {
+ entries: new GatsbyIterable(entries)
+ .map(nodeIds => getNode(nodeIds.id))
+ .filter(isGatsbyNode) as GatsbyIterable,
+ totalCount,
+ }
}
/**
@@ -307,7 +333,7 @@ function convertAndApplyFastFilters(
filtersCache: FiltersCache,
resolvedFields: Record,
stats: IGraphQLRunnerStats
-): Array {
+): Array {
const filters = filterFields
? prefixResolvedFields(
createDbQueriesFromObject(prepareQueryArgs(filterFields)),
@@ -339,7 +365,8 @@ function convertAndApplyFastFilters(
// If there's a filter, there (now) must be an entry for this cache key
const filterCache = filtersCache.get(filterCacheKey) as IFilterCache
// If there is no filter then the ensureCache step will populate this:
- const cache = filterCache.meta.orderedByCounter as Array
+ const cache = filterCache.meta
+ .orderedByCounter as Array
return cache.slice(0)
}
@@ -388,7 +415,7 @@ function filterToStats(
* Returns same reference as input, sorted inline
*/
function sortNodes(
- nodes: Array,
+ nodes: Array,
sort:
| {
fields: Array
@@ -397,7 +424,7 @@ function sortNodes(
| undefined,
resolvedFields: any,
stats: IGraphQLRunnerStats
-): Array {
+): Array {
if (!sort || sort.fields?.length === 0 || !nodes || nodes.length === 0) {
return nodes
}
@@ -415,10 +442,18 @@ function sortNodes(
return field
}
})
+ let i = 0
const sortFns = sortFields.map(
field =>
- (v): ((any) => any) =>
- getValueAt(v, field)
+ (v: IGatsbyNodeIdentifiers): ((any) => any) => {
+ i++
+ // lodash sorting needs ArrayLike thing, which our iterable isn't
+ // so for now this hack will do
+ if (i % 100 === 0) {
+ clearKeptObjects()
+ }
+ return getValueAt(getNode(v.id)!, field)
+ }
)
const sortOrder = sort.order.map(order =>
typeof order === `boolean` ? order : order.toLowerCase()
diff --git a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
index 6d216352222ec..101fb0aff8afa 100644
--- a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
+++ b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
@@ -1,4 +1,4 @@
-import { RootDatabase, open, ArrayLikeIterable } from "lmdb-store"
+import { RootDatabase, open, ArrayLikeIterable } from "lmdb"
// import { performance } from "perf_hooks"
import { ActionsUnion, IGatsbyNode } from "../../redux/types"
import { updateNodes } from "./updates/nodes"
@@ -70,7 +70,9 @@ function getDatabases(): ILmdbDatabases {
// FIXME: sharedStructuresKey breaks tests - probably need some cleanup for it on DELETE_CACHE
// sharedStructuresKey: Symbol.for(`structures`),
// @ts-ignore
- cache: true,
+ cache: {
+ expirer: false,
+ },
}),
nodesByType: rootDb.openDB({
name: `nodesByType`,
@@ -184,10 +186,10 @@ function updateDataStore(action: ActionsUnion): void {
const dbs = getDatabases()
// Force sync commit
dbs.nodes.transactionSync(() => {
- dbs.nodes.clear()
- dbs.nodesByType.clear()
- dbs.metadata.clear()
- dbs.indexes.clear()
+ dbs.nodes.clearSync()
+ dbs.nodesByType.clearSync()
+ dbs.metadata.clearSync()
+ dbs.indexes.clearSync()
})
break
}
@@ -229,8 +231,8 @@ function updateDataStore(action: ActionsUnion): void {
function clearIndexes(): void {
const dbs = getDatabases()
dbs.nodes.transactionSync(() => {
- dbs.metadata.clear()
- dbs.indexes.clear()
+ dbs.metadata.clearSync()
+ dbs.indexes.clearSync()
})
}
diff --git a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts
index d2426e3959ef4..1622548020f27 100644
--- a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts
+++ b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts
@@ -1,5 +1,5 @@
import { ActionsUnion, IGatsbyNode } from "../../../redux/types"
-import { Database } from "lmdb-store"
+import type { Database } from "lmdb"
type NodeId = string
diff --git a/packages/gatsby/src/datastore/types.ts b/packages/gatsby/src/datastore/types.ts
index 80b6e8d7bdb24..58a1c0e3653e2 100644
--- a/packages/gatsby/src/datastore/types.ts
+++ b/packages/gatsby/src/datastore/types.ts
@@ -1,4 +1,4 @@
-import { Database } from "lmdb-store"
+import { Database } from "lmdb"
import { IGatsbyNode } from "../redux/types"
import { GatsbyGraphQLType } from "../../index"
import { IInputQuery } from "./common/query"
diff --git a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
index 8a5ffedb83a1c..b7248a3bbec30 100644
--- a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
+++ b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
@@ -70,6 +70,21 @@ export async function createGraphqlEngineBundle(
],
module: {
rules: [
+ {
+ test: require.resolve(`lmdb`),
+ parser: { amd: false },
+ use: [
+ {
+ loader: require.resolve(`@vercel/webpack-asset-relocator-loader`),
+ options: {
+ outputAssetBase: `assets`,
+ },
+ },
+ {
+ loader: require.resolve(`./lmdb-bundling-patch`),
+ },
+ ],
+ },
{
test: /\.m?js$/,
type: `javascript/auto`,
diff --git a/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts
new file mode 100644
index 0000000000000..690036aeae2fc
--- /dev/null
+++ b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts
@@ -0,0 +1,31 @@
+import { createRequireFromPath } from "gatsby-core-utils"
+
+export default function (source: string): string {
+ let lmdbBinaryLocation
+ try {
+ const lmdbRequire = createRequireFromPath(require.resolve(`lmdb`))
+ const nodeGypBuild = lmdbRequire(`node-gyp-build`)
+ const path = require(`path`)
+
+ lmdbBinaryLocation = nodeGypBuild.path(
+ path.dirname(require.resolve(`lmdb`)).replace(`/dist`, ``)
+ )
+ } catch (e) {
+ console.error(`ln`, e)
+ return source
+ }
+
+ return source
+ .replace(
+ `require$1('node-gyp-build')(dirName)`,
+ `require(${JSON.stringify(lmdbBinaryLocation)})`
+ )
+ .replace(
+ `require$2.resolve('./dict/dict.txt')`,
+ `require.resolve('../dict/dict.txt')`
+ )
+ .replace(
+ /fs\.readFileSync\(new URL\('\.\/dict\/dict\.txt',\s*\(typeof\s*document\s*===\s*'undefined'\s*\?\s*new\s*\(require\('u'\s*\+\s*'rl'\)\.URL\)\s*\('file:'\s*\+\s*__filename\).href\s*:\s*\(document\.currentScript\s*&&\s*document\.currentScript\.src\s*\|\|\s*new URL\('index\.cjs',\s*document\.baseURI\)\.href\)\)\.replace\(\/dist\[\\\\\\\/\]index\.cjs\$\/,\s*''\)\)\)/g,
+ `fs.readFileSync(require.resolve('../dict/dict.txt'))`
+ )
+}
diff --git a/packages/gatsby/src/schema/node-model.js b/packages/gatsby/src/schema/node-model.js
index ac750c7a792b3..40850852cf2e3 100644
--- a/packages/gatsby/src/schema/node-model.js
+++ b/packages/gatsby/src/schema/node-model.js
@@ -482,6 +482,7 @@ class LocalNodeModel {
)
if (!_.isEmpty(actualFieldsToResolve)) {
+ console.log(`materialization`, { typeName, actualFieldsToResolve })
const resolvedNodes = new Map()
for (const node of getDataStore().iterateNodesByType(typeName)) {
this.trackInlineObjectsInRootNode(node)
diff --git a/packages/gatsby/src/utils/cache-lmdb.ts b/packages/gatsby/src/utils/cache-lmdb.ts
index 08ab4abdd8a34..98e94f45c1dc3 100644
--- a/packages/gatsby/src/utils/cache-lmdb.ts
+++ b/packages/gatsby/src/utils/cache-lmdb.ts
@@ -1,4 +1,4 @@
-import { open, RootDatabase, Database, DatabaseOptions } from "lmdb-store"
+import { open, RootDatabase, Database, DatabaseOptions } from "lmdb"
import fs from "fs-extra"
import path from "path"
diff --git a/yarn.lock b/yarn.lock
index 36cff191c4bc8..dfa5bc2c4451d 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -14612,17 +14612,16 @@ livereload-js@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/livereload-js/-/livereload-js-2.3.0.tgz#c3ab22e8aaf5bf3505d80d098cbad67726548c9a"
-lmdb-store@^1.6.11:
- version "1.6.11"
- resolved "https://registry.yarnpkg.com/lmdb-store/-/lmdb-store-1.6.11.tgz#801da597af8c7a01c81f87d5cc7a7497e381236d"
- integrity sha512-hIvoGmHGsFhb2VRCmfhodA/837ULtJBwRHSHKIzhMB7WtPH6BRLPsvXp1MwD3avqGzuZfMyZDUp3tccLvr721Q==
+lmdb@2.1.7:
+ version "2.1.7"
+ resolved "https://registry.yarnpkg.com/lmdb/-/lmdb-2.1.7.tgz#0f518102032037e248f201210943f0b94db04155"
+ integrity sha512-i6EFEBBlQ130J4BfJUbYgZFKQDz83xhpM47vzs0BMpXiJ7D4NjecO1Y3X54D341dwkLmTphlIyro5nTkKFXoMQ==
dependencies:
+ msgpackr "^1.5.2"
nan "^2.14.2"
node-gyp-build "^4.2.3"
- ordered-binary "^1.0.0"
- weak-lru-cache "^1.0.0"
- optionalDependencies:
- msgpackr "^1.4.7"
+ ordered-binary "^1.2.3"
+ weak-lru-cache "^1.2.1"
load-bmfont@^1.3.1, load-bmfont@^1.4.0:
version "1.4.0"
@@ -16233,10 +16232,10 @@ msgpackr-extract@^1.0.14:
nan "^2.14.2"
node-gyp-build "^4.2.3"
-msgpackr@^1.4.7:
- version "1.4.7"
- resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.4.7.tgz#d802ade841e7d2e873000b491cdda6574a3d5748"
- integrity sha512-bhC8Ed1au3L3oHaR/fe4lk4w7PLGFcWQ5XY/Tk9N6tzDRz8YndjCG68TD8zcvYZoxNtw767eF/7VpaTpU9kf9w==
+msgpackr@^1.5.2:
+ version "1.5.2"
+ resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.5.2.tgz#b400c9885642bdec27b284f8bdadbd6570b448b7"
+ integrity sha512-OCguCkbG34x1ddO4vAzEm/4J1GTo512k9SoxV8K+EGfI/onFdpemRf0HpsVRFpxadXr4JBFgHsQUitgTlw7ZYQ==
optionalDependencies:
msgpackr-extract "^1.0.14"
@@ -17137,10 +17136,10 @@ ora@^5.4.1:
strip-ansi "^6.0.0"
wcwidth "^1.0.1"
-ordered-binary@^1.0.0:
- version "1.1.3"
- resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.1.3.tgz#11dbc0a4cb7f8248183b9845e031b443be82571e"
- integrity sha512-tDTls+KllrZKJrqRXUYJtIcWIyoQycP7cVN7kzNNnhHKF2bMKHflcAQK+pF2Eb1iVaQodHxqZQr0yv4HWLGBhQ==
+ordered-binary@^1.2.3:
+ version "1.2.3"
+ resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.2.3.tgz#518f637692a74d372e56230effae37b811575e36"
+ integrity sha512-fEwMk8TNUtzQDjXKYS2ANW3fNZ/gMReCPOAsLHaqw+UDnq/8ddXAcX4lGRpTK7kAghAjkmJs1EXXbcrDbg+ruw==
ordered-read-streams@^1.0.0:
version "1.0.1"
@@ -24502,10 +24501,10 @@ wcwidth@^1.0.0, wcwidth@^1.0.1:
dependencies:
defaults "^1.0.3"
-weak-lru-cache@^1.0.0:
- version "1.1.2"
- resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.1.2.tgz#a909a97372aabdfbfe3eb33580af255b3b198834"
- integrity sha512-Bi5ae8Bev3YulgtLTafpmHmvl3vGbanRkv+qqA2AX8c3qj/MUdvSuaHq7ukDYBcMDINIaRPTPEkXSNCqqWivuA==
+weak-lru-cache@^1.2.1:
+ version "1.2.1"
+ resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.2.1.tgz#6b4f2da7e1701f845e71522417f1df1e39503df8"
+ integrity sha512-O5ag1F0Xk6ui+Fg5LlosTcVAyHs6DeyiDDbOapNtFCx/KjZ82B3U9stM9hvzbVclKWn9ABPjaINX/nQkGkJkKg==
web-namespaces@^1.0.0:
version "1.1.2"