diff --git a/benchmarks/memory/.dockerignore b/benchmarks/memory/.dockerignore
new file mode 100644
index 0000000000000..cbd3fdd9b6b92
--- /dev/null
+++ b/benchmarks/memory/.dockerignore
@@ -0,0 +1,23 @@
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+README.md
diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile
new file mode 100644
index 0000000000000..80f6e52c38966
--- /dev/null
+++ b/benchmarks/memory/Dockerfile
@@ -0,0 +1,14 @@
+FROM node:14-buster
+ENV NODE_ENV=production
+ENV CI=1
+ENV GATSBY_CPU_COUNT=4
+RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y
+RUN npm i -g gatsby-cli gatsby-dev-cli
+WORKDIR /usr/src/app
+RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n  - /usr/src/gatsby : Your local gatsby repo\\n  - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc
+
+# set up gatsby-dev
+RUN gatsby-dev --set-path-to-repo /usr/src/gatsby
+
+# keep the process running
+ENTRYPOINT ["tail", "-f", "/dev/null"]
\ No newline at end of file
diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md
new file mode 100644
index 0000000000000..445abd8415bf4
--- /dev/null
+++ b/benchmarks/memory/README.md
@@ -0,0 +1,97 @@
+# Gatsby Memory Benchmark
+
+The goal of this benchmark is to test Gatsby's memory usage and look for potential optimizations.
+
+## The Docker Container
+
+The docker container used in these tests sets up a Debian instance with node 14 installed (as well as npm/yarn/etc).
+It has ports 9000 (for hosting gatsby) and 9229 (for debugging) exposed.
+
+Within the container, two points to your local filesystem are mounted:
+
+- /usr/src/gatsby : Your local gatsby repo
+- /usr/src/site : The memory benchmark gatsby site
+
+## Commands
+
+### Docker
+
+These commands are used for interfacing with docker and have built-in utilities for managing the docker container.
+
+#### yarn docker:build
+
+Builds the container used for testing.
+
+#### yarn docker:start
+
+Starts the container built by `yarn docker:build`.
+
+#### yarn docker:connect
+
+Connects to the container started by `yarn docker:start`.
+
+#### yarn docker:start-and-connect
+
+A shorthand for start + connect.
+
+#### yarn docker:stop
+
+Stop the container used for testing.
+
+### Gatsby
+
+These commands are used for interfacing with gatsby.
+
+#### yarn gatsby:build
+
+Simply an alias to `yarn gatsby build`.
+
+#### yarn gatsby:serve
+
+Starts `gatsby serve` on port 9000 and sets the host properly to work inside docker.
+
+#### yarn gatsby:develop
+
+Starts `gatsby develop` on port 9000 and sets the host properly to work inside docker.
+
+#### yarn gatsby:build:debug
+
+Runs `gatsby build` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229.
+
+#### yarn gatsby:develop:debug
+
+Runs `gatsby develop` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229.
+
+## Setup
+
+Currently we can reproduce builds crashing with out default settings
+
+- Docker container running with 2GB limit
+- 300 nodes x ~2MB each = ~600MB of "just" nodes data in each process (number of nodes can be controlled with NUM_NODES env var)
+- 3 workers + main process (GATSBY_CPU_COUNT set to 4 in docker image, but you can specify different value with env var)
+- `eq_field` template using fast filters (single `eq` specifically)
+
+Goal is to make `eq_field` template to not cause crashes, then add next template (different operator) that cause crashes and repeat until all queries can be handled with set memory limits.
+
+### Workflow
+
+While `gatsby-dev` command is available inside docker, from my testing it seems like it doesn't pick up file changes when run there. Workflow that seems to work reliably:
+
+When starting working with this benchmark:
+
+- start `yarn watch` (possibly with `--scope`) in monorepo
+- start `gatsby-dev` outside of docker in benchmark directory (just like with regular site)
+- `yarn docker:connect` to get inside docker
+- `npm rebuild` to rebuild binaries inside docker
+
+And repeat as many times as you want:
+
+- make changes to `gatsby` source code as you normally would
+- run `yarn build` inside docker
+
+## Testing
+
+TODO
+
+- How to configure memory limits
+- Where to look
diff --git a/benchmarks/memory/gatsby-config.js b/benchmarks/memory/gatsby-config.js
new file mode 100644
index 0000000000000..5ae66ab282a51
--- /dev/null
+++ b/benchmarks/memory/gatsby-config.js
@@ -0,0 +1,3 @@
+module.exports = {
+  plugins: [],
+}
diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js
new file mode 100644
index 0000000000000..f020ac0079ba0
--- /dev/null
+++ b/benchmarks/memory/gatsby-node.js
@@ -0,0 +1,226 @@
+const { cpuCoreCount } = require(`gatsby-core-utils`)
+
+const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10)
+
+const NUM_KEYS_IN_LARGE_SIZE_OBJ = 1024
+
+exports.sourceNodes = async ({ actions, reporter }) => {
+  const contentDigest = Date.now().toString() // make each sourcing mark everything as dirty
+
+  const activity = reporter.createProgress(`Creating test nodes`, NUM_NODES)
+  activity.start()
+
+  for (let i = 0; i < NUM_NODES; i++) {
+    const largeSizeObj = {}
+    for (let j = 1; j <= NUM_KEYS_IN_LARGE_SIZE_OBJ; j++) {
+      largeSizeObj[`key_${j}`] = `x`.repeat(1024)
+    }
+
+    // each node is ~2MB
+    const node = {
+      id: `memory-${i}`,
+      idClone: `memory-${i}`,
+      fooBar: [`foo`, `bar`, `baz`, `foobar`][i % 4],
+      number1: i,
+      number2: NUM_NODES - i,
+      number3: i % 20,
+      largeSizeObj,
+      largeSizeString: `x`.repeat(1024 * 1024),
+      internal: {
+        contentDigest,
+        type: `Test`,
+      },
+    }
+
+    actions.createNode(node)
+
+    if (i % 100 === 99) {
+      activity.tick(100)
+      await new Promise(resolve => setImmediate(resolve))
+    }
+  }
+
+  activity.tick(NUM_NODES % 100)
+
+  await new Promise(resolve => setTimeout(resolve, 100))
+
+  activity.end()
+}
+
+exports.createSchemaCustomization = ({ actions, schema }) => {
+  actions.createTypes([
+    schema.buildObjectType({
+      name: `TestLargeSizeObj`,
+      fields: Object.fromEntries(
+        new Array(NUM_KEYS_IN_LARGE_SIZE_OBJ)
+          .fill(`String`)
+          .map((value, index) => [`key_${index + 1}`, value])
+      ),
+    }),
+    schema.buildObjectType({
+      name: `Test`,
+      fields: {
+        idClone: `String`,
+        fooBar: `String`,
+        number1: `Int`,
+        number2: `Int`,
+        number3: `Int`,
+        largeSizeString: `String`,
+        largeSizeObj: `TestLargeSizeObj`,
+        idCloneWithResolver: {
+          type: `String`,
+          resolve: source => {
+            return source.idClone
+          },
+        },
+      },
+      interfaces: ["Node"],
+      extensions: {
+        infer: false,
+      },
+    }),
+  ])
+}
+
+const printedMessages = new Set()
+exports.createResolvers = ({ createResolvers }) => {
+  createResolvers({
+    Query: {
+      workerInfo: {
+        type: `String`,
+        args: {
+          label: `String!`,
+        },
+        resolve: (_, args) => {
+          const msg = `${args.label} on ${
+            process.env.GATSBY_WORKER_ID
+              ? `worker #${process.env.GATSBY_WORKER_ID}`
+              : `main`
+          }`
+          if (!printedMessages.has(msg)) {
+            printedMessages.add(msg)
+            console.log(msg)
+          }
+          return msg
+        },
+      },
+    },
+  })
+}
+
+const WORKER_BATCH_SIZE =
+  Number(process.env.GATSBY_PARALLEL_QUERY_CHUNK_SIZE) || 50
+
+let enabledTemplates = new Set()
+exports.onPreBootstrap = () => {
+  const availableTemplates = new Set([
+    `eq_id`, // this should skip node-model and fast filters completely and should be very cheap already
+    `eq_field`, // this needs fast filters for eq operator on non-id field
+    `eq_field_with_resolver`, // / this needs fast filters for eq operator on non-id field + materialization
+    `ne_field_collection_sort_skip_limit`, // collection query to check code path applying sorting and skip/limit
+  ])
+  enabledTemplates = new Set(
+    process.env.TEMPLATES
+      ? process.env.TEMPLATES.split(`,`).filter(template =>
+          availableTemplates.has(template)
+        )
+      : availableTemplates
+  )
+
+  console.info(`Enabled templates`, enabledTemplates)
+}
+
+exports.createPages = async ({ actions, graphql }) => {
+  const numWorkers = Math.max(1, cpuCoreCount() - 1)
+
+  // we do want ALL available workers to execute each query type
+  const minNumOfPagesToSaturateAllWorkers = WORKER_BATCH_SIZE * numWorkers
+
+  const { data } = await graphql(`
+    {
+      allTest {
+        nodes {
+          id
+          idClone
+        }
+      }
+    }
+  `)
+
+  // we might need to "duplicate" pages if node count is less than number of needed pages
+  const repeatCount = Math.min(
+    1,
+    Math.ceil(minNumOfPagesToSaturateAllWorkers / data.allTest.nodes.length)
+  )
+
+  function createEnoughToSaturate(template, cb) {
+    if (!enabledTemplates.has(template)) {
+      return
+    }
+    console.log(`Creating pages with template "${template}"`)
+    let counter = 0
+    for (let i = 0; i < repeatCount; i++) {
+      let j = 0
+      for (const node of data.allTest.nodes) {
+        const { context } = cb(node, j)
+
+        actions.createPage({
+          path: `/${template}/${counter++}`,
+          component: require.resolve(`./src/templates/${template}`),
+          context,
+        })
+
+        if (counter >= minNumOfPagesToSaturateAllWorkers) {
+          break
+        }
+
+        j++
+      }
+    }
+  }
+
+  // fast path (eq: { id: x })
+  createEnoughToSaturate(`eq_id`, node => {
+    return {
+      context: {
+        id: node.id,
+      },
+    }
+  })
+
+  // (eq: { idClone: x })
+  createEnoughToSaturate(`eq_field`, node => {
+    return {
+      context: {
+        id: node.id,
+      },
+    }
+  })
+
+  // (eq: { idCloneWithResolver: x })
+  createEnoughToSaturate(`eq_field_with_resolver`, node => {
+    return {
+      context: {
+        id: node.id,
+      },
+    }
+  })
+
+  // allTest(
+  //   filter: { idClone: { ne: $id } }
+  //   sort: { fields: [number3], order: [ASC] }
+  //   limit: 10
+  //   skip: $skip
+  // )
+  createEnoughToSaturate(
+    `ne_field_collection_sort_skip_limit`,
+    (node, index) => {
+      return {
+        context: {
+          id: node.id,
+          skip: Math.max(index, NUM_NODES - 10), // limit is set to 10, so just setting upper bound so queries for last nodes do have 10 items
+        },
+      }
+    }
+  )
+}
diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json
new file mode 100644
index 0000000000000..2d63ab39c23be
--- /dev/null
+++ b/benchmarks/memory/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "memory-usage-benchmark",
+  "private": true,
+  "version": "1.0.0",
+  "description": "Test site stress testing memory usage",
+  "license": "MIT",
+  "scripts": {
+    "gatsby:build": "yarn gatsby build",
+    "gatsby:serve": "yarn gatsby serve -H 0.0.0.0 -p 9000",
+    "gatsby:develop": "NODE_ENV=development yarn gatsby develop -H 0.0.0.0 -p 9000",
+    "gatsby:build:debug": "node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby build",
+    "gatsby:develop:debug": "NODE_ENV=development node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby develop -H 0.0.0.0 -p 9000",
+    "docker:build": "docker build -t gatsby-memory .",
+    "docker:start": "./scripts/docker-start",
+    "docker:connect": "./scripts/docker-connect",
+    "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect",
+    "docker:stop": "./scripts/docker-stop",
+    "docker:stats": "./scripts/docker-stats"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/gatsbyjs/gatsby/tree/master/benchmarks/memory"
+  },
+  "bugs": {
+    "url": "https://github.com/gatsbyjs/gatsby/issues"
+  },
+  "dependencies": {
+    "gatsby": "^4",
+    "react": "^17.0.2",
+    "react-dom": "^17.0.2"
+  }
+}
diff --git a/benchmarks/memory/scripts/docker-connect b/benchmarks/memory/scripts/docker-connect
new file mode 100755
index 0000000000000..af6582a97d6f8
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-connect
@@ -0,0 +1,9 @@
+DOCKER_ID=$(./scripts/docker-get-id)
+
+if [ -z "$DOCKER_ID" ]; then
+  echo "\nNo gatsby-memory is running. Start one with \`yarn docker:start\`.\n"
+  return 1
+fi
+
+echo "Connecting to container $DOCKER_ID...\n"
+docker exec -it $DOCKER_ID bash
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-get-id b/benchmarks/memory/scripts/docker-get-id
new file mode 100755
index 0000000000000..064e21e32607c
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-get-id
@@ -0,0 +1,8 @@
+DOCKER_ID=$(\
+  docker ps --format '{{.Image}}:{{.ID}}' | \
+  grep "gatsby-memory" | \
+  head -n 1 | \
+  sed 's/gatsby\-memory://'\
+)
+
+echo $DOCKER_ID
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start
new file mode 100755
index 0000000000000..235d3526b4d9b
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-start
@@ -0,0 +1,20 @@
+DOCKER_ID=$(./scripts/docker-get-id)
+if [ -n "$DOCKER_ID" ]; then
+  echo "\nA gatsby-memory container is already running with id $DOCKER_ID."
+  echo "Please use that container, or run \`yarn docker:stop\` to stop it.\n"
+  return 1
+fi
+
+DOCKER_ID=$(\
+  docker run -td \
+  --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \
+  --mount type=bind,source="$(pwd)",target=/usr/src/app \
+  --publish 9229:9229 \
+  --publish 9000:9000 \
+  --memory="2g" \
+  --memory-swap="2g" \
+  gatsby-memory \
+  | head -c 12 \
+)
+
+echo "\nStarted container id ${DOCKER_ID}! Run \`yarn docker:connect\` to connect to the container.\n"
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-stats b/benchmarks/memory/scripts/docker-stats
new file mode 100755
index 0000000000000..9fb96494108b7
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-stats
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+DOCKER_ID=$(./scripts/docker-get-id)
+if [ -z "$DOCKER_ID" ]; then
+  echo -e "\nNo gatsby-memory container was found. Run \`yarn docker:start\` to start one.\n"
+  exit 1
+fi
+
+FORMAT="Gatsby Memory Benchmark Container----CPU: {{.CPUPerc }}--Memory: {{.MemUsage}}--Network: {{.NetIO}}"
+STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT")
+clear
+
+while [ -n "$STATS" ]; do
+  echo $STATS | sed "s/--/\n/g"
+  DOCKER_ID=$(./scripts/docker-get-id)
+  STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT")
+  clear
+done
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/docker-stop b/benchmarks/memory/scripts/docker-stop
new file mode 100755
index 0000000000000..95dbec9e55704
--- /dev/null
+++ b/benchmarks/memory/scripts/docker-stop
@@ -0,0 +1,9 @@
+DOCKER_ID=$(./scripts/docker-get-id)
+
+if [ -z "$DOCKER_ID" ]; then
+  echo "\nNo gatsby-memory is running.\n"
+  return 1
+fi
+
+DOCKER_ID=$(docker kill $DOCKER_ID)
+echo "\nStopped container $DOCKER_ID.\n"
\ No newline at end of file
diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker
new file mode 100755
index 0000000000000..43ede33d240db
--- /dev/null
+++ b/benchmarks/memory/scripts/enforce-docker
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+if [ ! -f /.dockerenv ]; then
+  DOCKER_ID=$(./scripts/docker-get-id)
+  COMMAND="start-and-connect"
+  if [ -n "$DOCKER_ID" ]; then
+    COMMAND="connect"
+  fi
+  echo -e "\nThis must be run inside the docker container. Please run \`yarn docker:${COMMAND}\` and try again.\n"
+  exit 1
+fi
+
+${@:1}
\ No newline at end of file
diff --git a/benchmarks/memory/src/pages/index.js b/benchmarks/memory/src/pages/index.js
new file mode 100644
index 0000000000000..8729fdc41578e
--- /dev/null
+++ b/benchmarks/memory/src/pages/index.js
@@ -0,0 +1,5 @@
+import React from "react"
+
+export default function Home() {
+  return <div>Hello world!</div>
+}
diff --git a/benchmarks/memory/src/templates/eq_field.js b/benchmarks/memory/src/templates/eq_field.js
new file mode 100644
index 0000000000000..c881ada4c05f3
--- /dev/null
+++ b/benchmarks/memory/src/templates/eq_field.js
@@ -0,0 +1,20 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+  return (
+    <div>
+      <pre>{JSON.stringify(data, null, 2)}</pre>
+    </div>
+  )
+}
+
+export const q = graphql`
+  query ($id: String!) {
+    test(idClone: { eq: $id }) {
+      id
+      fooBar
+    }
+    workerInfo(label: "eq-field")
+  }
+`
diff --git a/benchmarks/memory/src/templates/eq_field_with_resolver.js b/benchmarks/memory/src/templates/eq_field_with_resolver.js
new file mode 100644
index 0000000000000..ffc066340c721
--- /dev/null
+++ b/benchmarks/memory/src/templates/eq_field_with_resolver.js
@@ -0,0 +1,20 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+  return (
+    <div>
+      <pre>{JSON.stringify(data, null, 2)}</pre>
+    </div>
+  )
+}
+
+export const q = graphql`
+  query ($id: String!) {
+    test(idCloneWithResolver: { eq: $id }) {
+      id
+      fooBar
+    }
+    workerInfo(label: "eq-field-with-resolver")
+  }
+`
diff --git a/benchmarks/memory/src/templates/eq_id.js b/benchmarks/memory/src/templates/eq_id.js
new file mode 100644
index 0000000000000..3bca139fc3c26
--- /dev/null
+++ b/benchmarks/memory/src/templates/eq_id.js
@@ -0,0 +1,20 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+  return (
+    <div>
+      <pre>{JSON.stringify(data, null, 2)}</pre>
+    </div>
+  )
+}
+
+export const q = graphql`
+  query ($id: String!) {
+    test(id: { eq: $id }) {
+      id
+      fooBar
+    }
+    workerInfo(label: "eq-id")
+  }
+`
diff --git a/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js b/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js
new file mode 100644
index 0000000000000..a57663a40e0cd
--- /dev/null
+++ b/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js
@@ -0,0 +1,27 @@
+import React from "react"
+import { graphql } from "gatsby"
+
+export default function Home({ data }) {
+  return (
+    <div>
+      <pre>{JSON.stringify(data, null, 2)}</pre>
+    </div>
+  )
+}
+
+export const q = graphql`
+  query ($id: String!, $skip: Int!) {
+    allTest(
+      filter: { idClone: { ne: $id } }
+      sort: { fields: [number3], order: [ASC] }
+      limit: 10
+      skip: $skip
+    ) {
+      nodes {
+        id
+        fooBar
+      }
+    }
+    workerInfo(label: "ne-field-collection-sort-skip-limit")
+  }
+`
diff --git a/integration-tests/cache-resilience/gatsby-node.js b/integration-tests/cache-resilience/gatsby-node.js
index 21ab949eb533a..3847cc1170211 100644
--- a/integration-tests/cache-resilience/gatsby-node.js
+++ b/integration-tests/cache-resilience/gatsby-node.js
@@ -3,7 +3,7 @@ const v8 = require(`v8`)
 const glob = require(`glob`)
 const path = require(`path`)
 const _ = require(`lodash`)
-const { open } = require(`lmdb-store`)
+const { open } = require(`lmdb`)
 
 const { saveState } = require(`gatsby/dist/redux/save-state`)
 
diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json
index fb28bf3df283f..305897fceae42 100644
--- a/packages/gatsby/package.json
+++ b/packages/gatsby/package.json
@@ -101,7 +101,7 @@
     "joi": "^17.4.2",
     "json-loader": "^0.5.7",
     "latest-version": "5.1.0",
-    "lmdb-store": "^1.6.11",
+    "lmdb": "2.1.7",
     "lodash": "^4.17.21",
     "md5-file": "^5.0.0",
     "meant": "^1.0.3",
diff --git a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js
index 98b84a406a471..8b20b188a3e5f 100644
--- a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js
+++ b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js
@@ -407,7 +407,7 @@ describe(`applyFastFilters`, () => {
     expect(result.length).toEqual(2)
 
     result.map(node => {
-      expect(node.slog).toEqual(`def`)
+      expect(getNode(node.id).slog).toEqual(`def`)
     })
   })
 
@@ -425,7 +425,7 @@ describe(`applyFastFilters`, () => {
     expect(result.length).toEqual(2)
 
     result.map(node => {
-      expect(node.deep.flat.search.chain).toEqual(300)
+      expect(getNode(node.id).deep.flat.search.chain).toEqual(300)
     })
   })
 
@@ -444,8 +444,8 @@ describe(`applyFastFilters`, () => {
     // Count is irrelevant as long as it is non-zero and they all match filter
     expect(Array.isArray(results)).toBe(true)
     expect(results.length).toEqual(1)
-    expect(results[0].slog).toEqual(`def`)
-    expect(results[0].deep.flat.search.chain).toEqual(300)
+    expect(getNode(results[0].id).slog).toEqual(`def`)
+    expect(getNode(results[0].id).deep.flat.search.chain).toEqual(300)
   })
 
   it(`supports elemMatch`, () => {
diff --git a/packages/gatsby/src/datastore/common/iterable.ts b/packages/gatsby/src/datastore/common/iterable.ts
index 2948c6378e172..8e1abe698b906 100644
--- a/packages/gatsby/src/datastore/common/iterable.ts
+++ b/packages/gatsby/src/datastore/common/iterable.ts
@@ -1,3 +1,5 @@
+// @ts-ignore
+import { clearKeptObjects } from "lmdb"
 /**
  * Wrapper for any iterable providing chainable interface and convenience methods
  * similar to array.
@@ -10,10 +12,18 @@
 export class GatsbyIterable<T> {
   constructor(private source: Iterable<T> | (() => Iterable<T>)) {}
 
-  [Symbol.iterator](): Iterator<T> {
+  *[Symbol.iterator](): Generator<T> {
     const source =
       typeof this.source === `function` ? this.source() : this.source
-    return source[Symbol.iterator]()
+
+    let i = 0
+    for (const val of source) {
+      yield val
+
+      if (++i % 100 === 0) {
+        clearKeptObjects()
+      }
+    }
   }
 
   concat<U>(other: Iterable<U>): GatsbyIterable<T | U> {
diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts
index 7d6acc42c46e5..5f9c072fbb42b 100644
--- a/packages/gatsby/src/datastore/in-memory/indexing.ts
+++ b/packages/gatsby/src/datastore/in-memory/indexing.ts
@@ -5,7 +5,7 @@ import {
   FilterValue,
   FilterValueNullable,
 } from "../common/query"
-import { getDataStore } from "../"
+import { getDataStore, getNode } from "../"
 
 // Only list supported ops here. "CacheableFilterOp"
 export type FilterOp =  // TODO: merge with DbComparator ?
@@ -21,6 +21,39 @@ export type FilterOp =  // TODO: merge with DbComparator ?
 // Note: `undefined` is an encoding for a property that does not exist
 
 export type FilterCacheKey = string
+type GatsbyNodeID = string
+
+export interface IGatsbyNodeIdentifiers {
+  id: GatsbyNodeID
+  counter: number
+}
+
+const nodeIdToIdentifierMap = new Map<
+  GatsbyNodeID,
+  WeakRef<IGatsbyNodeIdentifiers>
+>()
+
+const getIdentifierObjectFromNode = (
+  node: IGatsbyNode
+): IGatsbyNodeIdentifiers => {
+  const cacheKey = `${node.id}_____${node.internal.counter}`
+  if (nodeIdToIdentifierMap.has(cacheKey)) {
+    const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref()
+    if (maybeStillExist) {
+      return maybeStillExist
+    }
+  }
+
+  const identifier = { id: node.id, counter: node.internal.counter }
+  nodeIdToIdentifierMap.set(cacheKey, new WeakRef(identifier))
+  return identifier
+}
+
+const sortByIds = (
+  a: IGatsbyNodeIdentifiers,
+  b: IGatsbyNodeIdentifiers
+): number => a.counter - b.counter
+
 export interface IFilterCache {
   op: FilterOp
   // In this map `undefined` values represent nodes that did not have the path
@@ -30,22 +63,22 @@ export interface IFilterCache {
   // This arrays may contain duplicates (!) because those only get filtered in the
   // last step.
   // TODO: We might decide to make sure these buckets _are_ deduped for eq perf
-  byValue: Map<FilterValueNullable, Array<IGatsbyNode>>
+  byValue: Map<FilterValueNullable, Array<IGatsbyNodeIdentifiers>>
   meta: {
     // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort
-    nodesUnordered?: Array<IGatsbyNode>
+    nodesUnordered?: Array<IGatsbyNodeIdentifiers>
     // Flat list of all nodes by requested types, ordered by counter (cached for empty filters)
-    orderedByCounter?: Array<IGatsbyNode>
+    orderedByCounter?: Array<IGatsbyNodeIdentifiers>
     // Ordered list of all values (by `<`) found by this filter. No null / undefs
     valuesAsc?: Array<FilterValue>
     // Flat list of nodes, ordered by valueAsc
-    nodesByValueAsc?: Array<IGatsbyNode>
+    nodesByValueAsc?: Array<IGatsbyNodeIdentifiers>
     // Ranges of nodes per value, maps to the nodesByValueAsc array
     valueRangesAsc?: Map<FilterValue, [number, number]>
     // Ordered list of all values (by `>`) found by this filter. No null / undefs
     valuesDesc?: Array<FilterValue>
     // Flat list of nodes, ordered by valueDesc
-    nodesByValueDesc?: Array<IGatsbyNode>
+    nodesByValueDesc?: Array<IGatsbyNodeIdentifiers>
     // Ranges of nodes per value, maps to the nodesByValueDesc array
     valueRangesDesc?: Map<FilterValue, [number, number]>
   }
@@ -59,7 +92,7 @@ export function postIndexingMetaSetup(
   // Loop through byValue and make sure the buckets are sorted by counter
   // Since we don't do insertion sort, we have to do it afterwards
   for (const bucket of filterCache.byValue) {
-    bucket[1].sort((a, b) => a.internal.counter - b.internal.counter)
+    bucket[1].sort(sortByIds)
   }
 
   if (op === `$ne` || op === `$nin`) {
@@ -79,15 +112,14 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void {
   // including nodes where the value is null.
   // A $nin does the same as an $ne except it filters multiple values instead
   // of just one.
-
   // For `$ne` we will take the list of all targeted nodes and eliminate the
   // bucket of nodes with a particular value, if it exists at all..
 
-  const arr: Array<IGatsbyNode> = []
+  const arr: Array<IGatsbyNodeIdentifiers> = []
   filterCache.meta.nodesUnordered = arr
   filterCache.byValue.forEach(v => {
-    v.forEach(node => {
-      arr.push(node)
+    v.forEach(nodeId => {
+      arr.push(nodeId)
     })
   })
 }
@@ -101,15 +133,15 @@ function postIndexingMetaSetupLtLteGtGte(
   // internal.counter, asc.
   // This way non-eq ops can simply slice the array to get a range.
 
-  const entriesNullable: Array<[FilterValueNullable, Array<IGatsbyNode>]> = [
-    ...filterCache.byValue.entries(),
-  ]
+  const entriesNullable: Array<
+    [FilterValueNullable, Array<IGatsbyNodeIdentifiers>]
+  > = [...filterCache.byValue.entries()]
 
   // These range checks never return `null` or `undefined` so filter those out
   // By filtering them out early, the sort should be faster. Could be ...
-  const entries: Array<[FilterValue, Array<IGatsbyNode>]> =
+  const entries: Array<[FilterValue, Array<IGatsbyNodeIdentifiers>]> =
     entriesNullable.filter(([v]) => v != null) as Array<
-      [FilterValue, Array<IGatsbyNode>]
+      [FilterValue, Array<IGatsbyNodeIdentifiers>]
     >
 
   // Sort all arrays by its value, asc. Ignore/allow potential type casting.
@@ -133,19 +165,21 @@ function postIndexingMetaSetupLtLteGtGte(
     entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0))
   }
 
-  const orderedNodes: Array<IGatsbyNode> = []
+  const orderedNodes: Array<IGatsbyNodeIdentifiers> = []
   const orderedValues: Array<FilterValue> = []
   const offsets: Map<FilterValue, [number, number]> = new Map()
-  entries.forEach(([v, bucket]: [FilterValue, Array<IGatsbyNode>]) => {
-    // Record the range containing all nodes with as filter value v
-    // The last value of the range should be the offset of the next value
-    // (So you should be able to do `nodes.slice(start, stop)` to get them)
-    offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length])
-    // We could do `arr.push(...bucket)` here but that's not safe with very
-    // large sets, so we use a regular loop
-    bucket.forEach(node => orderedNodes.push(node))
-    orderedValues.push(v)
-  })
+  entries.forEach(
+    ([v, bucket]: [FilterValue, Array<IGatsbyNodeIdentifiers>]) => {
+      // Record the range containing all nodes with as filter value v
+      // The last value of the range should be the offset of the next value
+      // (So you should be able to do `nodes.slice(start, stop)` to get them)
+      offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length])
+      // We could do `arr.push(...bucket)` here but that's not safe with very
+      // large sets, so we use a regular loop
+      bucket.forEach(node => orderedNodes.push(node))
+      orderedValues.push(v)
+    }
+  )
 
   if (op === `$lt` || op === `$lte`) {
     filterCache.meta.valuesAsc = orderedValues
@@ -178,12 +212,19 @@ export const ensureIndexByQuery = (
   nodeTypeNames: Array<string>,
   filtersCache: FiltersCache
 ): void => {
+  const readableWorkerId = process.env.GATSBY_WORKER_ID
+    ? `worker #${process.env.GATSBY_WORKER_ID}`
+    : `main`
+
+  console.log(
+    `ensureIndexByQuery "${filterCacheKey}" start ${readableWorkerId}`
+  )
   const state = store.getState()
   const resolvedNodesCache = state.resolvedNodesCache
 
   const filterCache: IFilterCache = {
     op,
-    byValue: new Map<FilterValueNullable, Array<IGatsbyNode>>(),
+    byValue: new Map<FilterValueNullable, Array<IGatsbyNodeIdentifiers>>(),
     meta: {},
   } as IFilterCache
   filtersCache.set(filterCacheKey, filterCache)
@@ -213,6 +254,8 @@ export const ensureIndexByQuery = (
   }
 
   postIndexingMetaSetup(filterCache, op)
+
+  console.log(`ensureIndexByQuery "${filterCacheKey}" end ${readableWorkerId}`)
 }
 
 export function ensureEmptyFilterCache(
@@ -226,11 +269,11 @@ export function ensureEmptyFilterCache(
 
   const state = store.getState()
   const resolvedNodesCache = state.resolvedNodesCache
-  const orderedByCounter: Array<IGatsbyNode> = []
+  const orderedByCounter: Array<IGatsbyNodeIdentifiers> = []
 
   filtersCache.set(filterCacheKey, {
     op: `$eq`, // Ignore.
-    byValue: new Map<FilterValueNullable, Array<IGatsbyNode>>(),
+    byValue: new Map<FilterValueNullable, Array<IGatsbyNodeIdentifiers>>(),
     meta: {
       orderedByCounter, // This is what we want
     },
@@ -248,7 +291,7 @@ export function ensureEmptyFilterCache(
             node.__gatsby_resolved = resolved
           }
         }
-        orderedByCounter.push(node)
+        orderedByCounter.push(getIdentifierObjectFromNode(node))
       })
   } else {
     // Here we must first filter for the node type
@@ -265,14 +308,14 @@ export function ensureEmptyFilterCache(
               node.__gatsby_resolved = resolved
             }
           }
-          orderedByCounter.push(node)
+          orderedByCounter.push(getIdentifierObjectFromNode(node))
         }
       })
   }
 
   // Since each node can only have one type, we shouldn't have to be concerned
   // about duplicates in this array. Just make sure they're sorted.
-  orderedByCounter.sort((a, b) => a.internal.counter - b.internal.counter)
+  orderedByCounter.sort(sortByIds)
 }
 
 function addNodeToFilterCache(
@@ -335,7 +378,7 @@ function markNodeForValue(
     arr = []
     filterCache.byValue.set(value, arr)
   }
-  arr.push(node)
+  arr.push(getIdentifierObjectFromNode(node))
 }
 
 export const ensureIndexByElemMatch = (
@@ -353,7 +396,7 @@ export const ensureIndexByElemMatch = (
 
   const filterCache: IFilterCache = {
     op,
-    byValue: new Map<FilterValueNullable, Array<IGatsbyNode>>(),
+    byValue: new Map<FilterValueNullable, Array<IGatsbyNodeIdentifiers>>(),
     meta: {},
   } as IFilterCache
   filtersCache.set(filterCacheKey, filterCache)
@@ -540,7 +583,7 @@ export const getNodesFromCacheByValue = (
   filterValue: FilterValueNullable,
   filtersCache: FiltersCache,
   wasElemMatch
-): Array<IGatsbyNode> | undefined => {
+): Array<IGatsbyNodeIdentifiers> | undefined => {
   const filterCache = filtersCache.get(filterCacheKey)
   if (!filterCache) {
     return undefined
@@ -573,7 +616,7 @@ export const getNodesFromCacheByValue = (
     }
     const filterValueArr: Array<FilterValueNullable> = filterValue
 
-    const set: Set<IGatsbyNode> = new Set()
+    const set: Set<IGatsbyNodeIdentifiers> = new Set()
 
     // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works.
     // For every value in the needle array, find the bucket of nodes for
@@ -583,7 +626,7 @@ export const getNodesFromCacheByValue = (
     )
 
     const arr = [...set] // this is bad for perf but will guarantee us a unique set :(
-    arr.sort((A, B) => A.internal.counter - B.internal.counter)
+    arr.sort(sortByIds)
 
     // Note: it's very unlikely that the list of filter values is big so .includes should be fine here
     if (filterValueArr.includes(null)) {
@@ -622,7 +665,7 @@ export const getNodesFromCacheByValue = (
 
     // TODO: there's probably a more efficient algorithm to do set
     //       subtraction in such a way that we don't have to re-sort
-    return [...set].sort((A, B) => A.internal.counter - B.internal.counter)
+    return [...set].sort(sortByIds)
   }
 
   if (op === `$ne`) {
@@ -632,7 +675,7 @@ export const getNodesFromCacheByValue = (
 
     // TODO: there's probably a more efficient algorithm to do set
     //       subtraction in such a way that we don't have to resort here
-    return [...set].sort((A, B) => A.internal.counter - B.internal.counter)
+    return [...set].sort(sortByIds)
   }
 
   if (op === `$regex`) {
@@ -649,7 +692,7 @@ export const getNodesFromCacheByValue = (
     }
     const regex = filterValue
 
-    const arr: Array<IGatsbyNode> = []
+    const arr: Array<IGatsbyNodeIdentifiers> = []
     filterCache.byValue.forEach((nodes, value) => {
       // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce.
       // Note: for legacy reasons partial paths should also be included for regex
@@ -661,7 +704,7 @@ export const getNodesFromCacheByValue = (
     // TODO: we _can_ cache this list as well. Might make sense if it turns out that $regex is mostly used with literals
     // TODO: it may make sense to first collect all buckets and then to .concat them, or merge sort them
 
-    arr.sort((A, B) => A.internal.counter - B.internal.counter)
+    arr.sort(sortByIds)
 
     // elemMatch can cause a node to appear in multiple buckets so we must dedupe
     if (wasElemMatch) {
@@ -706,7 +749,7 @@ export const getNodesFromCacheByValue = (
     const range = ranges!.get(filterValue)
     if (range) {
       const arr = nodes!.slice(0, range[0])
-      arr.sort((A, B) => A.internal.counter - B.internal.counter)
+      arr.sort(sortByIds)
       // elemMatch can cause a node to appear in multiple buckets so we must dedupe
       if (wasElemMatch) {
         expensiveDedupeInline(arr)
@@ -746,7 +789,7 @@ export const getNodesFromCacheByValue = (
     // So we have to consider weak comparison and may have to include the pivot
     const until = pivotValue < filterValue ? inclPivot : exclPivot
     const arr = nodes!.slice(0, until)
-    arr.sort((A, B) => A.internal.counter - B.internal.counter)
+    arr.sort(sortByIds)
     // elemMatch can cause a node to appear in multiple buckets so we must dedupe
     if (wasElemMatch) {
       expensiveDedupeInline(arr)
@@ -764,7 +807,7 @@ export const getNodesFromCacheByValue = (
     const range = ranges!.get(filterValue)
     if (range) {
       const arr = nodes!.slice(0, range[1])
-      arr.sort((A, B) => A.internal.counter - B.internal.counter)
+      arr.sort(sortByIds)
       // elemMatch can cause a node to appear in multiple buckets so we must dedupe
       if (wasElemMatch) {
         expensiveDedupeInline(arr)
@@ -804,7 +847,7 @@ export const getNodesFromCacheByValue = (
     // So we have to consider weak comparison and may have to include the pivot
     const until = pivotValue <= filterValue ? inclPivot : exclPivot
     const arr = nodes!.slice(0, until)
-    arr.sort((A, B) => A.internal.counter - B.internal.counter)
+    arr.sort(sortByIds)
     // elemMatch can cause a node to appear in multiple buckets so we must dedupe
     if (wasElemMatch) {
       expensiveDedupeInline(arr)
@@ -822,7 +865,7 @@ export const getNodesFromCacheByValue = (
     const range = ranges!.get(filterValue)
     if (range) {
       const arr = nodes!.slice(0, range[0]).reverse()
-      arr.sort((A, B) => A.internal.counter - B.internal.counter)
+      arr.sort(sortByIds)
       // elemMatch can cause a node to appear in multiple buckets so we must dedupe
       if (wasElemMatch) {
         expensiveDedupeInline(arr)
@@ -862,7 +905,7 @@ export const getNodesFromCacheByValue = (
     // So we have to consider weak comparison and may have to include the pivot
     const until = pivotValue > filterValue ? inclPivot : exclPivot
     const arr = nodes!.slice(0, until).reverse()
-    arr.sort((A, B) => A.internal.counter - B.internal.counter)
+    arr.sort(sortByIds)
     // elemMatch can cause a node to appear in multiple buckets so we must dedupe
     if (wasElemMatch) {
       expensiveDedupeInline(arr)
@@ -880,7 +923,7 @@ export const getNodesFromCacheByValue = (
     const range = ranges!.get(filterValue)
     if (range) {
       const arr = nodes!.slice(0, range[1]).reverse()
-      arr.sort((A, B) => A.internal.counter - B.internal.counter)
+      arr.sort(sortByIds)
       // elemMatch can cause a node to appear in multiple buckets so we must dedupe
       if (wasElemMatch) {
         expensiveDedupeInline(arr)
@@ -920,7 +963,7 @@ export const getNodesFromCacheByValue = (
     // So we have to consider weak comparison and may have to include the pivot
     const until = pivotValue >= filterValue ? inclPivot : exclPivot
     const arr = nodes!.slice(0, until).reverse()
-    arr.sort((A, B) => A.internal.counter - B.internal.counter)
+    arr.sort(sortByIds)
     // elemMatch can cause a node to appear in multiple buckets so we must dedupe
     if (wasElemMatch) {
       expensiveDedupeInline(arr)
@@ -935,7 +978,7 @@ export const getNodesFromCacheByValue = (
 function removeBucketFromSet(
   filterValue: FilterValueNullable,
   filterCache: IFilterCache,
-  set: Set<IGatsbyNode>
+  set: Set<IGatsbyNodeIdentifiers>
 ): void {
   if (filterValue === null) {
     // Edge case: $ne with `null` returns only the nodes that contain the full
@@ -960,22 +1003,24 @@ function removeBucketFromSet(
  * list that is also ordered by node.internal.counter
  */
 export function intersectNodesByCounter(
-  a: Array<IGatsbyNode>,
-  b: Array<IGatsbyNode>
-): Array<IGatsbyNode> {
+  a: Array<IGatsbyNodeIdentifiers>,
+  b: Array<IGatsbyNodeIdentifiers>
+): Array<IGatsbyNodeIdentifiers> {
   let pointerA = 0
   let pointerB = 0
   // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items?
-  const result: Array<IGatsbyNode> = []
+  const result: Array<IGatsbyNodeIdentifiers> = []
   const maxA = a.length
   const maxB = b.length
   let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list
 
+  // TODO some optimization could be done here to not call getNode
+
   while (pointerA < maxA && pointerB < maxB) {
-    const nodeA = a[pointerA]
-    const nodeB = b[pointerB]
-    const counterA = nodeA.internal.counter
-    const counterB = nodeB.internal.counter
+    const nodeA = getNode(a[pointerA].id)
+    const nodeB = getNode(b[pointerB].id)
+    const counterA = a[pointerA].counter
+    const counterB = b[pointerB].counter
 
     if (counterA < counterB) {
       pointerA++
@@ -992,7 +1037,7 @@ export function intersectNodesByCounter(
       // back to back, so even if both input arrays contained the same node
       // twice, this check would prevent the result from getting duplicate nodes
       if (lastAdded !== nodeA) {
-        result.push(nodeA)
+        result.push(a[pointerA])
         lastAdded = nodeA
       }
       pointerA++
@@ -1011,39 +1056,41 @@ export function intersectNodesByCounter(
  * list that is also ordered by node.internal.counter
  */
 export function unionNodesByCounter(
-  a: Array<IGatsbyNode>,
-  b: Array<IGatsbyNode>
-): Array<IGatsbyNode> {
+  a: Array<IGatsbyNodeIdentifiers>,
+  b: Array<IGatsbyNodeIdentifiers>
+): Array<IGatsbyNodeIdentifiers> {
   // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items?
-  const arr: Array<IGatsbyNode> = []
+  const arr: Array<IGatsbyNodeIdentifiers> = []
   let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list
 
+  // TODO some optimization could be done here to not call getNode
+
   let pointerA = 0
   let pointerB = 0
   const maxA = a.length
   const maxB = b.length
 
   while (pointerA < maxA && pointerB < maxB) {
-    const nodeA = a[pointerA]
-    const nodeB = b[pointerB]
+    const nodeA = getNode(a[pointerA].id)!
+    const nodeB = getNode(b[pointerB].id)!
     const counterA = nodeA.internal.counter
     const counterB = nodeB.internal.counter
 
     if (counterA < counterB) {
       if (lastAdded !== nodeA) {
-        arr.push(nodeA)
+        arr.push(a[pointerA])
         lastAdded = nodeA
       }
       pointerA++
     } else if (counterA > counterB) {
       if (lastAdded !== nodeB) {
-        arr.push(nodeB)
+        arr.push(b[pointerB])
         lastAdded = nodeB
       }
       pointerB++
     } else {
       if (lastAdded !== nodeA) {
-        arr.push(nodeA)
+        arr.push(a[pointerA])
         lastAdded = nodeA
       }
       pointerA++
@@ -1052,18 +1099,18 @@ export function unionNodesByCounter(
   }
 
   while (pointerA < maxA) {
-    const nodeA = a[pointerA]
+    const nodeA = getNode(a[pointerA].id)!
     if (lastAdded !== nodeA) {
-      arr.push(nodeA)
+      arr.push(a[pointerA])
       lastAdded = nodeA
     }
     pointerA++
   }
 
   while (pointerB < maxB) {
-    const nodeB = b[pointerB]
+    const nodeB = getNode(b[pointerB].id)!
     if (lastAdded !== nodeB) {
-      arr.push(nodeB)
+      arr.push(b[pointerB])
       lastAdded = nodeB
     }
     pointerB++
@@ -1072,11 +1119,11 @@ export function unionNodesByCounter(
   return arr
 }
 
-function expensiveDedupeInline(arr: Array<IGatsbyNode>): void {
+function expensiveDedupeInline(arr: Array<IGatsbyNodeIdentifiers>): void {
   // An elemMatch filter may cause duplicates to appear in a bucket.
   // Since the bucket is sorted those should now be back to back
   // Worst case this is a fast O(n) loop that does nothing.
-  let prev: IGatsbyNode | undefined = undefined
+  let prev: IGatsbyNodeIdentifiers | undefined = undefined
 
   // We copy-on-find because a splice is expensive and we can't use Sets
 
diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts
index 53eaeddce63d6..e76901d9a084b 100644
--- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts
+++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts
@@ -22,11 +22,18 @@ import {
   getNodesFromCacheByValue,
   intersectNodesByCounter,
   IFilterCache,
+  IGatsbyNodeIdentifiers,
 } from "./indexing"
 import { IGraphQLRunnerStats } from "../../query/types"
 import { IRunQueryArgs, IQueryResult } from "../types"
 import { GatsbyIterable } from "../common/iterable"
+import { getNode } from "../"
+// @ts-ignore
+import { clearKeptObjects } from "lmdb"
 
+function isGatsbyNode(node: IGatsbyNode | undefined): node is IGatsbyNode {
+  return !!node
+}
 export interface IRunFilterArg extends IRunQueryArgs {
   filtersCache: FiltersCache
 }
@@ -74,7 +81,7 @@ export function applyFastFilters(
   filters: Array<DbQuery>,
   nodeTypeNames: Array<string>,
   filtersCache: FiltersCache
-): Array<IGatsbyNode> | null {
+): Array<IGatsbyNodeIdentifiers> | null {
   if (!filtersCache) {
     // If no filter cache is passed on, explicitly don't use one
     return null
@@ -101,8 +108,10 @@ export function applyFastFilters(
 
     while (nodesPerValueArrs.length > 1) {
       // TS limitation: cannot guard against .pop(), so we must double cast
-      const a = nodesPerValueArrs.pop() as unknown as Array<IGatsbyNode>
-      const b = nodesPerValueArrs.pop() as unknown as Array<IGatsbyNode>
+      const a =
+        nodesPerValueArrs.pop() as unknown as Array<IGatsbyNodeIdentifiers>
+      const b =
+        nodesPerValueArrs.pop() as unknown as Array<IGatsbyNodeIdentifiers>
       nodesPerValueArrs.push(intersectNodesByCounter(a, b))
     }
 
@@ -124,8 +133,8 @@ function getBucketsForFilters(
   filters: Array<DbQuery>,
   nodeTypeNames: Array<string>,
   filtersCache: FiltersCache
-): Array<Array<IGatsbyNode>> | undefined {
-  const nodesPerValueArrs: Array<Array<IGatsbyNode>> = []
+): Array<Array<IGatsbyNodeIdentifiers>> | undefined {
+  const nodesPerValueArrs: Array<Array<IGatsbyNodeIdentifiers>> = []
 
   // Fail fast while trying to create and get the value-cache for each path
   const every = filters.every(filter => {
@@ -170,7 +179,7 @@ function getBucketsForQueryFilter(
   filter: IDbQueryQuery,
   nodeTypeNames: Array<string>,
   filtersCache: FiltersCache,
-  nodesPerValueArrs: Array<Array<IGatsbyNode>>
+  nodesPerValueArrs: Array<Array<IGatsbyNodeIdentifiers>>
 ): boolean {
   const {
     path: filterPath,
@@ -187,6 +196,14 @@ function getBucketsForQueryFilter(
     )
   }
 
+  const readableWorkerId = process.env.GATSBY_WORKER_ID
+    ? `worker #${process.env.GATSBY_WORKER_ID}`
+    : `main`
+
+  console.log(
+    `getBucketsForQueryFilter "${filterCacheKey}" start ${readableWorkerId}`
+  )
+
   const nodesPerValue = getNodesFromCacheByValue(
     filterCacheKey,
     filterValue as FilterValueNullable,
@@ -202,6 +219,10 @@ function getBucketsForQueryFilter(
   // mechanism does not create an array unless there's a IGatsbyNode for it
   nodesPerValueArrs.push(nodesPerValue)
 
+  console.log(
+    `getBucketsForQueryFilter "${filterCacheKey}" end ${readableWorkerId}`
+  )
+
   return true
 }
 
@@ -213,7 +234,7 @@ function collectBucketForElemMatch(
   filter: IDbQueryElemMatch,
   nodeTypeNames: Array<string>,
   filtersCache: FiltersCache,
-  nodesPerValueArrs: Array<Array<IGatsbyNode>>
+  nodesPerValueArrs: Array<Array<IGatsbyNodeIdentifiers>>
 ): boolean {
   // Get comparator and target value for this elemMatch
   let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init)
@@ -295,7 +316,12 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult {
       ? sortedResult.slice(skip, limit ? skip + (limit ?? 0) : undefined)
       : sortedResult
 
-  return { entries: new GatsbyIterable(entries), totalCount }
+  return {
+    entries: new GatsbyIterable(entries)
+      .map(nodeIds => getNode(nodeIds.id))
+      .filter(isGatsbyNode) as GatsbyIterable<IGatsbyNode>,
+    totalCount,
+  }
 }
 
 /**
@@ -307,7 +333,7 @@ function convertAndApplyFastFilters(
   filtersCache: FiltersCache,
   resolvedFields: Record<string, any>,
   stats: IGraphQLRunnerStats
-): Array<IGatsbyNode> {
+): Array<IGatsbyNodeIdentifiers> {
   const filters = filterFields
     ? prefixResolvedFields(
         createDbQueriesFromObject(prepareQueryArgs(filterFields)),
@@ -339,7 +365,8 @@ function convertAndApplyFastFilters(
     // If there's a filter, there (now) must be an entry for this cache key
     const filterCache = filtersCache.get(filterCacheKey) as IFilterCache
     // If there is no filter then the ensureCache step will populate this:
-    const cache = filterCache.meta.orderedByCounter as Array<IGatsbyNode>
+    const cache = filterCache.meta
+      .orderedByCounter as Array<IGatsbyNodeIdentifiers>
 
     return cache.slice(0)
   }
@@ -388,7 +415,7 @@ function filterToStats(
  * Returns same reference as input, sorted inline
  */
 function sortNodes(
-  nodes: Array<IGatsbyNode>,
+  nodes: Array<IGatsbyNodeIdentifiers>,
   sort:
     | {
         fields: Array<string>
@@ -397,7 +424,7 @@ function sortNodes(
     | undefined,
   resolvedFields: any,
   stats: IGraphQLRunnerStats
-): Array<IGatsbyNode> {
+): Array<IGatsbyNodeIdentifiers> {
   if (!sort || sort.fields?.length === 0 || !nodes || nodes.length === 0) {
     return nodes
   }
@@ -415,10 +442,18 @@ function sortNodes(
       return field
     }
   })
+  let i = 0
   const sortFns = sortFields.map(
     field =>
-      (v): ((any) => any) =>
-        getValueAt(v, field)
+      (v: IGatsbyNodeIdentifiers): ((any) => any) => {
+        i++
+        // lodash sorting needs ArrayLike thing, which our iterable isn't
+        // so for now this hack will do
+        if (i % 100 === 0) {
+          clearKeptObjects()
+        }
+        return getValueAt(getNode(v.id)!, field)
+      }
   )
   const sortOrder = sort.order.map(order =>
     typeof order === `boolean` ? order : order.toLowerCase()
diff --git a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
index 6d216352222ec..101fb0aff8afa 100644
--- a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
+++ b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
@@ -1,4 +1,4 @@
-import { RootDatabase, open, ArrayLikeIterable } from "lmdb-store"
+import { RootDatabase, open, ArrayLikeIterable } from "lmdb"
 // import { performance } from "perf_hooks"
 import { ActionsUnion, IGatsbyNode } from "../../redux/types"
 import { updateNodes } from "./updates/nodes"
@@ -70,7 +70,9 @@ function getDatabases(): ILmdbDatabases {
         // FIXME: sharedStructuresKey breaks tests - probably need some cleanup for it on DELETE_CACHE
         // sharedStructuresKey: Symbol.for(`structures`),
         // @ts-ignore
-        cache: true,
+        cache: {
+          expirer: false,
+        },
       }),
       nodesByType: rootDb.openDB({
         name: `nodesByType`,
@@ -184,10 +186,10 @@ function updateDataStore(action: ActionsUnion): void {
       const dbs = getDatabases()
       // Force sync commit
       dbs.nodes.transactionSync(() => {
-        dbs.nodes.clear()
-        dbs.nodesByType.clear()
-        dbs.metadata.clear()
-        dbs.indexes.clear()
+        dbs.nodes.clearSync()
+        dbs.nodesByType.clearSync()
+        dbs.metadata.clearSync()
+        dbs.indexes.clearSync()
       })
       break
     }
@@ -229,8 +231,8 @@ function updateDataStore(action: ActionsUnion): void {
 function clearIndexes(): void {
   const dbs = getDatabases()
   dbs.nodes.transactionSync(() => {
-    dbs.metadata.clear()
-    dbs.indexes.clear()
+    dbs.metadata.clearSync()
+    dbs.indexes.clearSync()
   })
 }
 
diff --git a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts
index d2426e3959ef4..1622548020f27 100644
--- a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts
+++ b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts
@@ -1,5 +1,5 @@
 import { ActionsUnion, IGatsbyNode } from "../../../redux/types"
-import { Database } from "lmdb-store"
+import type { Database } from "lmdb"
 
 type NodeId = string
 
diff --git a/packages/gatsby/src/datastore/types.ts b/packages/gatsby/src/datastore/types.ts
index 80b6e8d7bdb24..58a1c0e3653e2 100644
--- a/packages/gatsby/src/datastore/types.ts
+++ b/packages/gatsby/src/datastore/types.ts
@@ -1,4 +1,4 @@
-import { Database } from "lmdb-store"
+import { Database } from "lmdb"
 import { IGatsbyNode } from "../redux/types"
 import { GatsbyGraphQLType } from "../../index"
 import { IInputQuery } from "./common/query"
diff --git a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
index 8a5ffedb83a1c..b7248a3bbec30 100644
--- a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
+++ b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts
@@ -70,6 +70,21 @@ export async function createGraphqlEngineBundle(
     ],
     module: {
       rules: [
+        {
+          test: require.resolve(`lmdb`),
+          parser: { amd: false },
+          use: [
+            {
+              loader: require.resolve(`@vercel/webpack-asset-relocator-loader`),
+              options: {
+                outputAssetBase: `assets`,
+              },
+            },
+            {
+              loader: require.resolve(`./lmdb-bundling-patch`),
+            },
+          ],
+        },
         {
           test: /\.m?js$/,
           type: `javascript/auto`,
diff --git a/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts
new file mode 100644
index 0000000000000..690036aeae2fc
--- /dev/null
+++ b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts
@@ -0,0 +1,31 @@
+import { createRequireFromPath } from "gatsby-core-utils"
+
+export default function (source: string): string {
+  let lmdbBinaryLocation
+  try {
+    const lmdbRequire = createRequireFromPath(require.resolve(`lmdb`))
+    const nodeGypBuild = lmdbRequire(`node-gyp-build`)
+    const path = require(`path`)
+
+    lmdbBinaryLocation = nodeGypBuild.path(
+      path.dirname(require.resolve(`lmdb`)).replace(`/dist`, ``)
+    )
+  } catch (e) {
+    console.error(`ln`, e)
+    return source
+  }
+
+  return source
+    .replace(
+      `require$1('node-gyp-build')(dirName)`,
+      `require(${JSON.stringify(lmdbBinaryLocation)})`
+    )
+    .replace(
+      `require$2.resolve('./dict/dict.txt')`,
+      `require.resolve('../dict/dict.txt')`
+    )
+    .replace(
+      /fs\.readFileSync\(new URL\('\.\/dict\/dict\.txt',\s*\(typeof\s*document\s*===\s*'undefined'\s*\?\s*new\s*\(require\('u'\s*\+\s*'rl'\)\.URL\)\s*\('file:'\s*\+\s*__filename\).href\s*:\s*\(document\.currentScript\s*&&\s*document\.currentScript\.src\s*\|\|\s*new URL\('index\.cjs',\s*document\.baseURI\)\.href\)\)\.replace\(\/dist\[\\\\\\\/\]index\.cjs\$\/,\s*''\)\)\)/g,
+      `fs.readFileSync(require.resolve('../dict/dict.txt'))`
+    )
+}
diff --git a/packages/gatsby/src/schema/node-model.js b/packages/gatsby/src/schema/node-model.js
index ac750c7a792b3..40850852cf2e3 100644
--- a/packages/gatsby/src/schema/node-model.js
+++ b/packages/gatsby/src/schema/node-model.js
@@ -482,6 +482,7 @@ class LocalNodeModel {
     )
 
     if (!_.isEmpty(actualFieldsToResolve)) {
+      console.log(`materialization`, { typeName, actualFieldsToResolve })
       const resolvedNodes = new Map()
       for (const node of getDataStore().iterateNodesByType(typeName)) {
         this.trackInlineObjectsInRootNode(node)
diff --git a/packages/gatsby/src/utils/cache-lmdb.ts b/packages/gatsby/src/utils/cache-lmdb.ts
index 08ab4abdd8a34..98e94f45c1dc3 100644
--- a/packages/gatsby/src/utils/cache-lmdb.ts
+++ b/packages/gatsby/src/utils/cache-lmdb.ts
@@ -1,4 +1,4 @@
-import { open, RootDatabase, Database, DatabaseOptions } from "lmdb-store"
+import { open, RootDatabase, Database, DatabaseOptions } from "lmdb"
 import fs from "fs-extra"
 import path from "path"
 
diff --git a/yarn.lock b/yarn.lock
index 36cff191c4bc8..dfa5bc2c4451d 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -14612,17 +14612,16 @@ livereload-js@^2.3.0:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/livereload-js/-/livereload-js-2.3.0.tgz#c3ab22e8aaf5bf3505d80d098cbad67726548c9a"
 
-lmdb-store@^1.6.11:
-  version "1.6.11"
-  resolved "https://registry.yarnpkg.com/lmdb-store/-/lmdb-store-1.6.11.tgz#801da597af8c7a01c81f87d5cc7a7497e381236d"
-  integrity sha512-hIvoGmHGsFhb2VRCmfhodA/837ULtJBwRHSHKIzhMB7WtPH6BRLPsvXp1MwD3avqGzuZfMyZDUp3tccLvr721Q==
+lmdb@2.1.7:
+  version "2.1.7"
+  resolved "https://registry.yarnpkg.com/lmdb/-/lmdb-2.1.7.tgz#0f518102032037e248f201210943f0b94db04155"
+  integrity sha512-i6EFEBBlQ130J4BfJUbYgZFKQDz83xhpM47vzs0BMpXiJ7D4NjecO1Y3X54D341dwkLmTphlIyro5nTkKFXoMQ==
   dependencies:
+    msgpackr "^1.5.2"
     nan "^2.14.2"
     node-gyp-build "^4.2.3"
-    ordered-binary "^1.0.0"
-    weak-lru-cache "^1.0.0"
-  optionalDependencies:
-    msgpackr "^1.4.7"
+    ordered-binary "^1.2.3"
+    weak-lru-cache "^1.2.1"
 
 load-bmfont@^1.3.1, load-bmfont@^1.4.0:
   version "1.4.0"
@@ -16233,10 +16232,10 @@ msgpackr-extract@^1.0.14:
     nan "^2.14.2"
     node-gyp-build "^4.2.3"
 
-msgpackr@^1.4.7:
-  version "1.4.7"
-  resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.4.7.tgz#d802ade841e7d2e873000b491cdda6574a3d5748"
-  integrity sha512-bhC8Ed1au3L3oHaR/fe4lk4w7PLGFcWQ5XY/Tk9N6tzDRz8YndjCG68TD8zcvYZoxNtw767eF/7VpaTpU9kf9w==
+msgpackr@^1.5.2:
+  version "1.5.2"
+  resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.5.2.tgz#b400c9885642bdec27b284f8bdadbd6570b448b7"
+  integrity sha512-OCguCkbG34x1ddO4vAzEm/4J1GTo512k9SoxV8K+EGfI/onFdpemRf0HpsVRFpxadXr4JBFgHsQUitgTlw7ZYQ==
   optionalDependencies:
     msgpackr-extract "^1.0.14"
 
@@ -17137,10 +17136,10 @@ ora@^5.4.1:
     strip-ansi "^6.0.0"
     wcwidth "^1.0.1"
 
-ordered-binary@^1.0.0:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.1.3.tgz#11dbc0a4cb7f8248183b9845e031b443be82571e"
-  integrity sha512-tDTls+KllrZKJrqRXUYJtIcWIyoQycP7cVN7kzNNnhHKF2bMKHflcAQK+pF2Eb1iVaQodHxqZQr0yv4HWLGBhQ==
+ordered-binary@^1.2.3:
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.2.3.tgz#518f637692a74d372e56230effae37b811575e36"
+  integrity sha512-fEwMk8TNUtzQDjXKYS2ANW3fNZ/gMReCPOAsLHaqw+UDnq/8ddXAcX4lGRpTK7kAghAjkmJs1EXXbcrDbg+ruw==
 
 ordered-read-streams@^1.0.0:
   version "1.0.1"
@@ -24502,10 +24501,10 @@ wcwidth@^1.0.0, wcwidth@^1.0.1:
   dependencies:
     defaults "^1.0.3"
 
-weak-lru-cache@^1.0.0:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.1.2.tgz#a909a97372aabdfbfe3eb33580af255b3b198834"
-  integrity sha512-Bi5ae8Bev3YulgtLTafpmHmvl3vGbanRkv+qqA2AX8c3qj/MUdvSuaHq7ukDYBcMDINIaRPTPEkXSNCqqWivuA==
+weak-lru-cache@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.2.1.tgz#6b4f2da7e1701f845e71522417f1df1e39503df8"
+  integrity sha512-O5ag1F0Xk6ui+Fg5LlosTcVAyHs6DeyiDDbOapNtFCx/KjZ82B3U9stM9hvzbVclKWn9ABPjaINX/nQkGkJkKg==
 
 web-namespaces@^1.0.0:
   version "1.1.2"