diff --git a/benchmarks/memory/.dockerignore b/benchmarks/memory/.dockerignore new file mode 100644 index 0000000000000..cbd3fdd9b6b92 --- /dev/null +++ b/benchmarks/memory/.dockerignore @@ -0,0 +1,23 @@ +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +README.md diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile new file mode 100644 index 0000000000000..80f6e52c38966 --- /dev/null +++ b/benchmarks/memory/Dockerfile @@ -0,0 +1,14 @@ +FROM node:14-buster +ENV NODE_ENV=production +ENV CI=1 +ENV GATSBY_CPU_COUNT=4 +RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y +RUN npm i -g gatsby-cli gatsby-dev-cli +WORKDIR /usr/src/app +RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc + +# set up gatsby-dev +RUN gatsby-dev --set-path-to-repo /usr/src/gatsby + +# keep the process running +ENTRYPOINT ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md new file mode 100644 index 0000000000000..445abd8415bf4 --- /dev/null +++ b/benchmarks/memory/README.md @@ -0,0 +1,97 @@ +# Gatsby Memory Benchmark + +The goal of this benchmark is to test Gatsby's memory usage and look for potential optimizations. + +## The Docker Container + +The docker container used in these tests sets up a Debian instance with node 14 installed (as well as npm/yarn/etc). +It has ports 9000 (for hosting gatsby) and 9229 (for debugging) exposed. + +Within the container, two points to your local filesystem are mounted: + +- /usr/src/gatsby : Your local gatsby repo +- /usr/src/site : The memory benchmark gatsby site + +## Commands + +### Docker + +These commands are used for interfacing with docker and have built-in utilities for managing the docker container. + +#### yarn docker:build + +Builds the container used for testing. + +#### yarn docker:start + +Starts the container built by `yarn docker:build`. + +#### yarn docker:connect + +Connects to the container started by `yarn docker:start`. + +#### yarn docker:start-and-connect + +A shorthand for start + connect. + +#### yarn docker:stop + +Stop the container used for testing. + +### Gatsby + +These commands are used for interfacing with gatsby. + +#### yarn gatsby:build + +Simply an alias to `yarn gatsby build`. + +#### yarn gatsby:serve + +Starts `gatsby serve` on port 9000 and sets the host properly to work inside docker. + +#### yarn gatsby:develop + +Starts `gatsby develop` on port 9000 and sets the host properly to work inside docker. + +#### yarn gatsby:build:debug + +Runs `gatsby build` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. + +#### yarn gatsby:develop:debug + +Runs `gatsby develop` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. + +## Setup + +Currently we can reproduce builds crashing with out default settings + +- Docker container running with 2GB limit +- 300 nodes x ~2MB each = ~600MB of "just" nodes data in each process (number of nodes can be controlled with NUM_NODES env var) +- 3 workers + main process (GATSBY_CPU_COUNT set to 4 in docker image, but you can specify different value with env var) +- `eq_field` template using fast filters (single `eq` specifically) + +Goal is to make `eq_field` template to not cause crashes, then add next template (different operator) that cause crashes and repeat until all queries can be handled with set memory limits. + +### Workflow + +While `gatsby-dev` command is available inside docker, from my testing it seems like it doesn't pick up file changes when run there. Workflow that seems to work reliably: + +When starting working with this benchmark: + +- start `yarn watch` (possibly with `--scope`) in monorepo +- start `gatsby-dev` outside of docker in benchmark directory (just like with regular site) +- `yarn docker:connect` to get inside docker +- `npm rebuild` to rebuild binaries inside docker + +And repeat as many times as you want: + +- make changes to `gatsby` source code as you normally would +- run `yarn build` inside docker + +## Testing + +TODO + +- How to configure memory limits +- Where to look diff --git a/benchmarks/memory/gatsby-config.js b/benchmarks/memory/gatsby-config.js new file mode 100644 index 0000000000000..5ae66ab282a51 --- /dev/null +++ b/benchmarks/memory/gatsby-config.js @@ -0,0 +1,3 @@ +module.exports = { + plugins: [], +} diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js new file mode 100644 index 0000000000000..f020ac0079ba0 --- /dev/null +++ b/benchmarks/memory/gatsby-node.js @@ -0,0 +1,226 @@ +const { cpuCoreCount } = require(`gatsby-core-utils`) + +const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10) + +const NUM_KEYS_IN_LARGE_SIZE_OBJ = 1024 + +exports.sourceNodes = async ({ actions, reporter }) => { + const contentDigest = Date.now().toString() // make each sourcing mark everything as dirty + + const activity = reporter.createProgress(`Creating test nodes`, NUM_NODES) + activity.start() + + for (let i = 0; i < NUM_NODES; i++) { + const largeSizeObj = {} + for (let j = 1; j <= NUM_KEYS_IN_LARGE_SIZE_OBJ; j++) { + largeSizeObj[`key_${j}`] = `x`.repeat(1024) + } + + // each node is ~2MB + const node = { + id: `memory-${i}`, + idClone: `memory-${i}`, + fooBar: [`foo`, `bar`, `baz`, `foobar`][i % 4], + number1: i, + number2: NUM_NODES - i, + number3: i % 20, + largeSizeObj, + largeSizeString: `x`.repeat(1024 * 1024), + internal: { + contentDigest, + type: `Test`, + }, + } + + actions.createNode(node) + + if (i % 100 === 99) { + activity.tick(100) + await new Promise(resolve => setImmediate(resolve)) + } + } + + activity.tick(NUM_NODES % 100) + + await new Promise(resolve => setTimeout(resolve, 100)) + + activity.end() +} + +exports.createSchemaCustomization = ({ actions, schema }) => { + actions.createTypes([ + schema.buildObjectType({ + name: `TestLargeSizeObj`, + fields: Object.fromEntries( + new Array(NUM_KEYS_IN_LARGE_SIZE_OBJ) + .fill(`String`) + .map((value, index) => [`key_${index + 1}`, value]) + ), + }), + schema.buildObjectType({ + name: `Test`, + fields: { + idClone: `String`, + fooBar: `String`, + number1: `Int`, + number2: `Int`, + number3: `Int`, + largeSizeString: `String`, + largeSizeObj: `TestLargeSizeObj`, + idCloneWithResolver: { + type: `String`, + resolve: source => { + return source.idClone + }, + }, + }, + interfaces: ["Node"], + extensions: { + infer: false, + }, + }), + ]) +} + +const printedMessages = new Set() +exports.createResolvers = ({ createResolvers }) => { + createResolvers({ + Query: { + workerInfo: { + type: `String`, + args: { + label: `String!`, + }, + resolve: (_, args) => { + const msg = `${args.label} on ${ + process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + }` + if (!printedMessages.has(msg)) { + printedMessages.add(msg) + console.log(msg) + } + return msg + }, + }, + }, + }) +} + +const WORKER_BATCH_SIZE = + Number(process.env.GATSBY_PARALLEL_QUERY_CHUNK_SIZE) || 50 + +let enabledTemplates = new Set() +exports.onPreBootstrap = () => { + const availableTemplates = new Set([ + `eq_id`, // this should skip node-model and fast filters completely and should be very cheap already + `eq_field`, // this needs fast filters for eq operator on non-id field + `eq_field_with_resolver`, // / this needs fast filters for eq operator on non-id field + materialization + `ne_field_collection_sort_skip_limit`, // collection query to check code path applying sorting and skip/limit + ]) + enabledTemplates = new Set( + process.env.TEMPLATES + ? process.env.TEMPLATES.split(`,`).filter(template => + availableTemplates.has(template) + ) + : availableTemplates + ) + + console.info(`Enabled templates`, enabledTemplates) +} + +exports.createPages = async ({ actions, graphql }) => { + const numWorkers = Math.max(1, cpuCoreCount() - 1) + + // we do want ALL available workers to execute each query type + const minNumOfPagesToSaturateAllWorkers = WORKER_BATCH_SIZE * numWorkers + + const { data } = await graphql(` + { + allTest { + nodes { + id + idClone + } + } + } + `) + + // we might need to "duplicate" pages if node count is less than number of needed pages + const repeatCount = Math.min( + 1, + Math.ceil(minNumOfPagesToSaturateAllWorkers / data.allTest.nodes.length) + ) + + function createEnoughToSaturate(template, cb) { + if (!enabledTemplates.has(template)) { + return + } + console.log(`Creating pages with template "${template}"`) + let counter = 0 + for (let i = 0; i < repeatCount; i++) { + let j = 0 + for (const node of data.allTest.nodes) { + const { context } = cb(node, j) + + actions.createPage({ + path: `/${template}/${counter++}`, + component: require.resolve(`./src/templates/${template}`), + context, + }) + + if (counter >= minNumOfPagesToSaturateAllWorkers) { + break + } + + j++ + } + } + } + + // fast path (eq: { id: x }) + createEnoughToSaturate(`eq_id`, node => { + return { + context: { + id: node.id, + }, + } + }) + + // (eq: { idClone: x }) + createEnoughToSaturate(`eq_field`, node => { + return { + context: { + id: node.id, + }, + } + }) + + // (eq: { idCloneWithResolver: x }) + createEnoughToSaturate(`eq_field_with_resolver`, node => { + return { + context: { + id: node.id, + }, + } + }) + + // allTest( + // filter: { idClone: { ne: $id } } + // sort: { fields: [number3], order: [ASC] } + // limit: 10 + // skip: $skip + // ) + createEnoughToSaturate( + `ne_field_collection_sort_skip_limit`, + (node, index) => { + return { + context: { + id: node.id, + skip: Math.max(index, NUM_NODES - 10), // limit is set to 10, so just setting upper bound so queries for last nodes do have 10 items + }, + } + } + ) +} diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json new file mode 100644 index 0000000000000..2d63ab39c23be --- /dev/null +++ b/benchmarks/memory/package.json @@ -0,0 +1,32 @@ +{ + "name": "memory-usage-benchmark", + "private": true, + "version": "1.0.0", + "description": "Test site stress testing memory usage", + "license": "MIT", + "scripts": { + "gatsby:build": "yarn gatsby build", + "gatsby:serve": "yarn gatsby serve -H 0.0.0.0 -p 9000", + "gatsby:develop": "NODE_ENV=development yarn gatsby develop -H 0.0.0.0 -p 9000", + "gatsby:build:debug": "node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby build", + "gatsby:develop:debug": "NODE_ENV=development node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby develop -H 0.0.0.0 -p 9000", + "docker:build": "docker build -t gatsby-memory .", + "docker:start": "./scripts/docker-start", + "docker:connect": "./scripts/docker-connect", + "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect", + "docker:stop": "./scripts/docker-stop", + "docker:stats": "./scripts/docker-stats" + }, + "repository": { + "type": "git", + "url": "https://github.com/gatsbyjs/gatsby/tree/master/benchmarks/memory" + }, + "bugs": { + "url": "https://github.com/gatsbyjs/gatsby/issues" + }, + "dependencies": { + "gatsby": "^4", + "react": "^17.0.2", + "react-dom": "^17.0.2" + } +} diff --git a/benchmarks/memory/scripts/docker-connect b/benchmarks/memory/scripts/docker-connect new file mode 100755 index 0000000000000..af6582a97d6f8 --- /dev/null +++ b/benchmarks/memory/scripts/docker-connect @@ -0,0 +1,9 @@ +DOCKER_ID=$(./scripts/docker-get-id) + +if [ -z "$DOCKER_ID" ]; then + echo "\nNo gatsby-memory is running. Start one with \`yarn docker:start\`.\n" + return 1 +fi + +echo "Connecting to container $DOCKER_ID...\n" +docker exec -it $DOCKER_ID bash \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-get-id b/benchmarks/memory/scripts/docker-get-id new file mode 100755 index 0000000000000..064e21e32607c --- /dev/null +++ b/benchmarks/memory/scripts/docker-get-id @@ -0,0 +1,8 @@ +DOCKER_ID=$(\ + docker ps --format '{{.Image}}:{{.ID}}' | \ + grep "gatsby-memory" | \ + head -n 1 | \ + sed 's/gatsby\-memory://'\ +) + +echo $DOCKER_ID \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start new file mode 100755 index 0000000000000..235d3526b4d9b --- /dev/null +++ b/benchmarks/memory/scripts/docker-start @@ -0,0 +1,20 @@ +DOCKER_ID=$(./scripts/docker-get-id) +if [ -n "$DOCKER_ID" ]; then + echo "\nA gatsby-memory container is already running with id $DOCKER_ID." + echo "Please use that container, or run \`yarn docker:stop\` to stop it.\n" + return 1 +fi + +DOCKER_ID=$(\ + docker run -td \ + --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ + --mount type=bind,source="$(pwd)",target=/usr/src/app \ + --publish 9229:9229 \ + --publish 9000:9000 \ + --memory="2g" \ + --memory-swap="2g" \ + gatsby-memory \ + | head -c 12 \ +) + +echo "\nStarted container id ${DOCKER_ID}! Run \`yarn docker:connect\` to connect to the container.\n" \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-stats b/benchmarks/memory/scripts/docker-stats new file mode 100755 index 0000000000000..9fb96494108b7 --- /dev/null +++ b/benchmarks/memory/scripts/docker-stats @@ -0,0 +1,18 @@ +#!/bin/bash + +DOCKER_ID=$(./scripts/docker-get-id) +if [ -z "$DOCKER_ID" ]; then + echo -e "\nNo gatsby-memory container was found. Run \`yarn docker:start\` to start one.\n" + exit 1 +fi + +FORMAT="Gatsby Memory Benchmark Container----CPU: {{.CPUPerc }}--Memory: {{.MemUsage}}--Network: {{.NetIO}}" +STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT") +clear + +while [ -n "$STATS" ]; do + echo $STATS | sed "s/--/\n/g" + DOCKER_ID=$(./scripts/docker-get-id) + STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT") + clear +done \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-stop b/benchmarks/memory/scripts/docker-stop new file mode 100755 index 0000000000000..95dbec9e55704 --- /dev/null +++ b/benchmarks/memory/scripts/docker-stop @@ -0,0 +1,9 @@ +DOCKER_ID=$(./scripts/docker-get-id) + +if [ -z "$DOCKER_ID" ]; then + echo "\nNo gatsby-memory is running.\n" + return 1 +fi + +DOCKER_ID=$(docker kill $DOCKER_ID) +echo "\nStopped container $DOCKER_ID.\n" \ No newline at end of file diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker new file mode 100755 index 0000000000000..43ede33d240db --- /dev/null +++ b/benchmarks/memory/scripts/enforce-docker @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ ! -f /.dockerenv ]; then + DOCKER_ID=$(./scripts/docker-get-id) + COMMAND="start-and-connect" + if [ -n "$DOCKER_ID" ]; then + COMMAND="connect" + fi + echo -e "\nThis must be run inside the docker container. Please run \`yarn docker:${COMMAND}\` and try again.\n" + exit 1 +fi + +${@:1} \ No newline at end of file diff --git a/benchmarks/memory/src/pages/index.js b/benchmarks/memory/src/pages/index.js new file mode 100644 index 0000000000000..8729fdc41578e --- /dev/null +++ b/benchmarks/memory/src/pages/index.js @@ -0,0 +1,5 @@ +import React from "react" + +export default function Home() { + return
Hello world!
+} diff --git a/benchmarks/memory/src/templates/eq_field.js b/benchmarks/memory/src/templates/eq_field.js new file mode 100644 index 0000000000000..c881ada4c05f3 --- /dev/null +++ b/benchmarks/memory/src/templates/eq_field.js @@ -0,0 +1,20 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + query ($id: String!) { + test(idClone: { eq: $id }) { + id + fooBar + } + workerInfo(label: "eq-field") + } +` diff --git a/benchmarks/memory/src/templates/eq_field_with_resolver.js b/benchmarks/memory/src/templates/eq_field_with_resolver.js new file mode 100644 index 0000000000000..ffc066340c721 --- /dev/null +++ b/benchmarks/memory/src/templates/eq_field_with_resolver.js @@ -0,0 +1,20 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + query ($id: String!) { + test(idCloneWithResolver: { eq: $id }) { + id + fooBar + } + workerInfo(label: "eq-field-with-resolver") + } +` diff --git a/benchmarks/memory/src/templates/eq_id.js b/benchmarks/memory/src/templates/eq_id.js new file mode 100644 index 0000000000000..3bca139fc3c26 --- /dev/null +++ b/benchmarks/memory/src/templates/eq_id.js @@ -0,0 +1,20 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + query ($id: String!) { + test(id: { eq: $id }) { + id + fooBar + } + workerInfo(label: "eq-id") + } +` diff --git a/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js b/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js new file mode 100644 index 0000000000000..a57663a40e0cd --- /dev/null +++ b/benchmarks/memory/src/templates/ne_field_collection_sort_skip_limit.js @@ -0,0 +1,27 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + query ($id: String!, $skip: Int!) { + allTest( + filter: { idClone: { ne: $id } } + sort: { fields: [number3], order: [ASC] } + limit: 10 + skip: $skip + ) { + nodes { + id + fooBar + } + } + workerInfo(label: "ne-field-collection-sort-skip-limit") + } +` diff --git a/integration-tests/cache-resilience/gatsby-node.js b/integration-tests/cache-resilience/gatsby-node.js index 21ab949eb533a..3847cc1170211 100644 --- a/integration-tests/cache-resilience/gatsby-node.js +++ b/integration-tests/cache-resilience/gatsby-node.js @@ -3,7 +3,7 @@ const v8 = require(`v8`) const glob = require(`glob`) const path = require(`path`) const _ = require(`lodash`) -const { open } = require(`lmdb-store`) +const { open } = require(`lmdb`) const { saveState } = require(`gatsby/dist/redux/save-state`) diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json index fb28bf3df283f..305897fceae42 100644 --- a/packages/gatsby/package.json +++ b/packages/gatsby/package.json @@ -101,7 +101,7 @@ "joi": "^17.4.2", "json-loader": "^0.5.7", "latest-version": "5.1.0", - "lmdb-store": "^1.6.11", + "lmdb": "2.1.7", "lodash": "^4.17.21", "md5-file": "^5.0.0", "meant": "^1.0.3", diff --git a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js index 98b84a406a471..8b20b188a3e5f 100644 --- a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js +++ b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js @@ -407,7 +407,7 @@ describe(`applyFastFilters`, () => { expect(result.length).toEqual(2) result.map(node => { - expect(node.slog).toEqual(`def`) + expect(getNode(node.id).slog).toEqual(`def`) }) }) @@ -425,7 +425,7 @@ describe(`applyFastFilters`, () => { expect(result.length).toEqual(2) result.map(node => { - expect(node.deep.flat.search.chain).toEqual(300) + expect(getNode(node.id).deep.flat.search.chain).toEqual(300) }) }) @@ -444,8 +444,8 @@ describe(`applyFastFilters`, () => { // Count is irrelevant as long as it is non-zero and they all match filter expect(Array.isArray(results)).toBe(true) expect(results.length).toEqual(1) - expect(results[0].slog).toEqual(`def`) - expect(results[0].deep.flat.search.chain).toEqual(300) + expect(getNode(results[0].id).slog).toEqual(`def`) + expect(getNode(results[0].id).deep.flat.search.chain).toEqual(300) }) it(`supports elemMatch`, () => { diff --git a/packages/gatsby/src/datastore/common/iterable.ts b/packages/gatsby/src/datastore/common/iterable.ts index 2948c6378e172..8e1abe698b906 100644 --- a/packages/gatsby/src/datastore/common/iterable.ts +++ b/packages/gatsby/src/datastore/common/iterable.ts @@ -1,3 +1,5 @@ +// @ts-ignore +import { clearKeptObjects } from "lmdb" /** * Wrapper for any iterable providing chainable interface and convenience methods * similar to array. @@ -10,10 +12,18 @@ export class GatsbyIterable { constructor(private source: Iterable | (() => Iterable)) {} - [Symbol.iterator](): Iterator { + *[Symbol.iterator](): Generator { const source = typeof this.source === `function` ? this.source() : this.source - return source[Symbol.iterator]() + + let i = 0 + for (const val of source) { + yield val + + if (++i % 100 === 0) { + clearKeptObjects() + } + } } concat(other: Iterable): GatsbyIterable { diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 7d6acc42c46e5..5f9c072fbb42b 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -5,7 +5,7 @@ import { FilterValue, FilterValueNullable, } from "../common/query" -import { getDataStore } from "../" +import { getDataStore, getNode } from "../" // Only list supported ops here. "CacheableFilterOp" export type FilterOp = // TODO: merge with DbComparator ? @@ -21,6 +21,39 @@ export type FilterOp = // TODO: merge with DbComparator ? // Note: `undefined` is an encoding for a property that does not exist export type FilterCacheKey = string +type GatsbyNodeID = string + +export interface IGatsbyNodeIdentifiers { + id: GatsbyNodeID + counter: number +} + +const nodeIdToIdentifierMap = new Map< + GatsbyNodeID, + WeakRef +>() + +const getIdentifierObjectFromNode = ( + node: IGatsbyNode +): IGatsbyNodeIdentifiers => { + const cacheKey = `${node.id}_____${node.internal.counter}` + if (nodeIdToIdentifierMap.has(cacheKey)) { + const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref() + if (maybeStillExist) { + return maybeStillExist + } + } + + const identifier = { id: node.id, counter: node.internal.counter } + nodeIdToIdentifierMap.set(cacheKey, new WeakRef(identifier)) + return identifier +} + +const sortByIds = ( + a: IGatsbyNodeIdentifiers, + b: IGatsbyNodeIdentifiers +): number => a.counter - b.counter + export interface IFilterCache { op: FilterOp // In this map `undefined` values represent nodes that did not have the path @@ -30,22 +63,22 @@ export interface IFilterCache { // This arrays may contain duplicates (!) because those only get filtered in the // last step. // TODO: We might decide to make sure these buckets _are_ deduped for eq perf - byValue: Map> + byValue: Map> meta: { // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort - nodesUnordered?: Array + nodesUnordered?: Array // Flat list of all nodes by requested types, ordered by counter (cached for empty filters) - orderedByCounter?: Array + orderedByCounter?: Array // Ordered list of all values (by `<`) found by this filter. No null / undefs valuesAsc?: Array // Flat list of nodes, ordered by valueAsc - nodesByValueAsc?: Array + nodesByValueAsc?: Array // Ranges of nodes per value, maps to the nodesByValueAsc array valueRangesAsc?: Map // Ordered list of all values (by `>`) found by this filter. No null / undefs valuesDesc?: Array // Flat list of nodes, ordered by valueDesc - nodesByValueDesc?: Array + nodesByValueDesc?: Array // Ranges of nodes per value, maps to the nodesByValueDesc array valueRangesDesc?: Map } @@ -59,7 +92,7 @@ export function postIndexingMetaSetup( // Loop through byValue and make sure the buckets are sorted by counter // Since we don't do insertion sort, we have to do it afterwards for (const bucket of filterCache.byValue) { - bucket[1].sort((a, b) => a.internal.counter - b.internal.counter) + bucket[1].sort(sortByIds) } if (op === `$ne` || op === `$nin`) { @@ -79,15 +112,14 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void { // including nodes where the value is null. // A $nin does the same as an $ne except it filters multiple values instead // of just one. - // For `$ne` we will take the list of all targeted nodes and eliminate the // bucket of nodes with a particular value, if it exists at all.. - const arr: Array = [] + const arr: Array = [] filterCache.meta.nodesUnordered = arr filterCache.byValue.forEach(v => { - v.forEach(node => { - arr.push(node) + v.forEach(nodeId => { + arr.push(nodeId) }) }) } @@ -101,15 +133,15 @@ function postIndexingMetaSetupLtLteGtGte( // internal.counter, asc. // This way non-eq ops can simply slice the array to get a range. - const entriesNullable: Array<[FilterValueNullable, Array]> = [ - ...filterCache.byValue.entries(), - ] + const entriesNullable: Array< + [FilterValueNullable, Array] + > = [...filterCache.byValue.entries()] // These range checks never return `null` or `undefined` so filter those out // By filtering them out early, the sort should be faster. Could be ... - const entries: Array<[FilterValue, Array]> = + const entries: Array<[FilterValue, Array]> = entriesNullable.filter(([v]) => v != null) as Array< - [FilterValue, Array] + [FilterValue, Array] > // Sort all arrays by its value, asc. Ignore/allow potential type casting. @@ -133,19 +165,21 @@ function postIndexingMetaSetupLtLteGtGte( entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0)) } - const orderedNodes: Array = [] + const orderedNodes: Array = [] const orderedValues: Array = [] const offsets: Map = new Map() - entries.forEach(([v, bucket]: [FilterValue, Array]) => { - // Record the range containing all nodes with as filter value v - // The last value of the range should be the offset of the next value - // (So you should be able to do `nodes.slice(start, stop)` to get them) - offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length]) - // We could do `arr.push(...bucket)` here but that's not safe with very - // large sets, so we use a regular loop - bucket.forEach(node => orderedNodes.push(node)) - orderedValues.push(v) - }) + entries.forEach( + ([v, bucket]: [FilterValue, Array]) => { + // Record the range containing all nodes with as filter value v + // The last value of the range should be the offset of the next value + // (So you should be able to do `nodes.slice(start, stop)` to get them) + offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length]) + // We could do `arr.push(...bucket)` here but that's not safe with very + // large sets, so we use a regular loop + bucket.forEach(node => orderedNodes.push(node)) + orderedValues.push(v) + } + ) if (op === `$lt` || op === `$lte`) { filterCache.meta.valuesAsc = orderedValues @@ -178,12 +212,19 @@ export const ensureIndexByQuery = ( nodeTypeNames: Array, filtersCache: FiltersCache ): void => { + const readableWorkerId = process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + + console.log( + `ensureIndexByQuery "${filterCacheKey}" start ${readableWorkerId}` + ) const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -213,6 +254,8 @@ export const ensureIndexByQuery = ( } postIndexingMetaSetup(filterCache, op) + + console.log(`ensureIndexByQuery "${filterCacheKey}" end ${readableWorkerId}`) } export function ensureEmptyFilterCache( @@ -226,11 +269,11 @@ export function ensureEmptyFilterCache( const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const orderedByCounter: Array = [] + const orderedByCounter: Array = [] filtersCache.set(filterCacheKey, { op: `$eq`, // Ignore. - byValue: new Map>(), + byValue: new Map>(), meta: { orderedByCounter, // This is what we want }, @@ -248,7 +291,7 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node) + orderedByCounter.push(getIdentifierObjectFromNode(node)) }) } else { // Here we must first filter for the node type @@ -265,14 +308,14 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node) + orderedByCounter.push(getIdentifierObjectFromNode(node)) } }) } // Since each node can only have one type, we shouldn't have to be concerned // about duplicates in this array. Just make sure they're sorted. - orderedByCounter.sort((a, b) => a.internal.counter - b.internal.counter) + orderedByCounter.sort(sortByIds) } function addNodeToFilterCache( @@ -335,7 +378,7 @@ function markNodeForValue( arr = [] filterCache.byValue.set(value, arr) } - arr.push(node) + arr.push(getIdentifierObjectFromNode(node)) } export const ensureIndexByElemMatch = ( @@ -353,7 +396,7 @@ export const ensureIndexByElemMatch = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -540,7 +583,7 @@ export const getNodesFromCacheByValue = ( filterValue: FilterValueNullable, filtersCache: FiltersCache, wasElemMatch -): Array | undefined => { +): Array | undefined => { const filterCache = filtersCache.get(filterCacheKey) if (!filterCache) { return undefined @@ -573,7 +616,7 @@ export const getNodesFromCacheByValue = ( } const filterValueArr: Array = filterValue - const set: Set = new Set() + const set: Set = new Set() // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works. // For every value in the needle array, find the bucket of nodes for @@ -583,7 +626,7 @@ export const getNodesFromCacheByValue = ( ) const arr = [...set] // this is bad for perf but will guarantee us a unique set :( - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // Note: it's very unlikely that the list of filter values is big so .includes should be fine here if (filterValueArr.includes(null)) { @@ -622,7 +665,7 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to re-sort - return [...set].sort((A, B) => A.internal.counter - B.internal.counter) + return [...set].sort(sortByIds) } if (op === `$ne`) { @@ -632,7 +675,7 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to resort here - return [...set].sort((A, B) => A.internal.counter - B.internal.counter) + return [...set].sort(sortByIds) } if (op === `$regex`) { @@ -649,7 +692,7 @@ export const getNodesFromCacheByValue = ( } const regex = filterValue - const arr: Array = [] + const arr: Array = [] filterCache.byValue.forEach((nodes, value) => { // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce. // Note: for legacy reasons partial paths should also be included for regex @@ -661,7 +704,7 @@ export const getNodesFromCacheByValue = ( // TODO: we _can_ cache this list as well. Might make sense if it turns out that $regex is mostly used with literals // TODO: it may make sense to first collect all buckets and then to .concat them, or merge sort them - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { @@ -706,7 +749,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -746,7 +789,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue < filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -764,7 +807,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -804,7 +847,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue <= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -822,7 +865,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -862,7 +905,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue > filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -880,7 +923,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -920,7 +963,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue >= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -935,7 +978,7 @@ export const getNodesFromCacheByValue = ( function removeBucketFromSet( filterValue: FilterValueNullable, filterCache: IFilterCache, - set: Set + set: Set ): void { if (filterValue === null) { // Edge case: $ne with `null` returns only the nodes that contain the full @@ -960,22 +1003,24 @@ function removeBucketFromSet( * list that is also ordered by node.internal.counter */ export function intersectNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { let pointerA = 0 let pointerB = 0 // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items? - const result: Array = [] + const result: Array = [] const maxA = a.length const maxB = b.length let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list + // TODO some optimization could be done here to not call getNode + while (pointerA < maxA && pointerB < maxB) { - const nodeA = a[pointerA] - const nodeB = b[pointerB] - const counterA = nodeA.internal.counter - const counterB = nodeB.internal.counter + const nodeA = getNode(a[pointerA].id) + const nodeB = getNode(b[pointerB].id) + const counterA = a[pointerA].counter + const counterB = b[pointerB].counter if (counterA < counterB) { pointerA++ @@ -992,7 +1037,7 @@ export function intersectNodesByCounter( // back to back, so even if both input arrays contained the same node // twice, this check would prevent the result from getting duplicate nodes if (lastAdded !== nodeA) { - result.push(nodeA) + result.push(a[pointerA]) lastAdded = nodeA } pointerA++ @@ -1011,39 +1056,41 @@ export function intersectNodesByCounter( * list that is also ordered by node.internal.counter */ export function unionNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? - const arr: Array = [] + const arr: Array = [] let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list + // TODO some optimization could be done here to not call getNode + let pointerA = 0 let pointerB = 0 const maxA = a.length const maxB = b.length while (pointerA < maxA && pointerB < maxB) { - const nodeA = a[pointerA] - const nodeB = b[pointerB] + const nodeA = getNode(a[pointerA].id)! + const nodeB = getNode(b[pointerB].id)! const counterA = nodeA.internal.counter const counterB = nodeB.internal.counter if (counterA < counterB) { if (lastAdded !== nodeA) { - arr.push(nodeA) + arr.push(a[pointerA]) lastAdded = nodeA } pointerA++ } else if (counterA > counterB) { if (lastAdded !== nodeB) { - arr.push(nodeB) + arr.push(b[pointerB]) lastAdded = nodeB } pointerB++ } else { if (lastAdded !== nodeA) { - arr.push(nodeA) + arr.push(a[pointerA]) lastAdded = nodeA } pointerA++ @@ -1052,18 +1099,18 @@ export function unionNodesByCounter( } while (pointerA < maxA) { - const nodeA = a[pointerA] + const nodeA = getNode(a[pointerA].id)! if (lastAdded !== nodeA) { - arr.push(nodeA) + arr.push(a[pointerA]) lastAdded = nodeA } pointerA++ } while (pointerB < maxB) { - const nodeB = b[pointerB] + const nodeB = getNode(b[pointerB].id)! if (lastAdded !== nodeB) { - arr.push(nodeB) + arr.push(b[pointerB]) lastAdded = nodeB } pointerB++ @@ -1072,11 +1119,11 @@ export function unionNodesByCounter( return arr } -function expensiveDedupeInline(arr: Array): void { +function expensiveDedupeInline(arr: Array): void { // An elemMatch filter may cause duplicates to appear in a bucket. // Since the bucket is sorted those should now be back to back // Worst case this is a fast O(n) loop that does nothing. - let prev: IGatsbyNode | undefined = undefined + let prev: IGatsbyNodeIdentifiers | undefined = undefined // We copy-on-find because a splice is expensive and we can't use Sets diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 53eaeddce63d6..e76901d9a084b 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -22,11 +22,18 @@ import { getNodesFromCacheByValue, intersectNodesByCounter, IFilterCache, + IGatsbyNodeIdentifiers, } from "./indexing" import { IGraphQLRunnerStats } from "../../query/types" import { IRunQueryArgs, IQueryResult } from "../types" import { GatsbyIterable } from "../common/iterable" +import { getNode } from "../" +// @ts-ignore +import { clearKeptObjects } from "lmdb" +function isGatsbyNode(node: IGatsbyNode | undefined): node is IGatsbyNode { + return !!node +} export interface IRunFilterArg extends IRunQueryArgs { filtersCache: FiltersCache } @@ -74,7 +81,7 @@ export function applyFastFilters( filters: Array, nodeTypeNames: Array, filtersCache: FiltersCache -): Array | null { +): Array | null { if (!filtersCache) { // If no filter cache is passed on, explicitly don't use one return null @@ -101,8 +108,10 @@ export function applyFastFilters( while (nodesPerValueArrs.length > 1) { // TS limitation: cannot guard against .pop(), so we must double cast - const a = nodesPerValueArrs.pop() as unknown as Array - const b = nodesPerValueArrs.pop() as unknown as Array + const a = + nodesPerValueArrs.pop() as unknown as Array + const b = + nodesPerValueArrs.pop() as unknown as Array nodesPerValueArrs.push(intersectNodesByCounter(a, b)) } @@ -124,8 +133,8 @@ function getBucketsForFilters( filters: Array, nodeTypeNames: Array, filtersCache: FiltersCache -): Array> | undefined { - const nodesPerValueArrs: Array> = [] +): Array> | undefined { + const nodesPerValueArrs: Array> = [] // Fail fast while trying to create and get the value-cache for each path const every = filters.every(filter => { @@ -170,7 +179,7 @@ function getBucketsForQueryFilter( filter: IDbQueryQuery, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { const { path: filterPath, @@ -187,6 +196,14 @@ function getBucketsForQueryFilter( ) } + const readableWorkerId = process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + + console.log( + `getBucketsForQueryFilter "${filterCacheKey}" start ${readableWorkerId}` + ) + const nodesPerValue = getNodesFromCacheByValue( filterCacheKey, filterValue as FilterValueNullable, @@ -202,6 +219,10 @@ function getBucketsForQueryFilter( // mechanism does not create an array unless there's a IGatsbyNode for it nodesPerValueArrs.push(nodesPerValue) + console.log( + `getBucketsForQueryFilter "${filterCacheKey}" end ${readableWorkerId}` + ) + return true } @@ -213,7 +234,7 @@ function collectBucketForElemMatch( filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { // Get comparator and target value for this elemMatch let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init) @@ -295,7 +316,12 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { ? sortedResult.slice(skip, limit ? skip + (limit ?? 0) : undefined) : sortedResult - return { entries: new GatsbyIterable(entries), totalCount } + return { + entries: new GatsbyIterable(entries) + .map(nodeIds => getNode(nodeIds.id)) + .filter(isGatsbyNode) as GatsbyIterable, + totalCount, + } } /** @@ -307,7 +333,7 @@ function convertAndApplyFastFilters( filtersCache: FiltersCache, resolvedFields: Record, stats: IGraphQLRunnerStats -): Array { +): Array { const filters = filterFields ? prefixResolvedFields( createDbQueriesFromObject(prepareQueryArgs(filterFields)), @@ -339,7 +365,8 @@ function convertAndApplyFastFilters( // If there's a filter, there (now) must be an entry for this cache key const filterCache = filtersCache.get(filterCacheKey) as IFilterCache // If there is no filter then the ensureCache step will populate this: - const cache = filterCache.meta.orderedByCounter as Array + const cache = filterCache.meta + .orderedByCounter as Array return cache.slice(0) } @@ -388,7 +415,7 @@ function filterToStats( * Returns same reference as input, sorted inline */ function sortNodes( - nodes: Array, + nodes: Array, sort: | { fields: Array @@ -397,7 +424,7 @@ function sortNodes( | undefined, resolvedFields: any, stats: IGraphQLRunnerStats -): Array { +): Array { if (!sort || sort.fields?.length === 0 || !nodes || nodes.length === 0) { return nodes } @@ -415,10 +442,18 @@ function sortNodes( return field } }) + let i = 0 const sortFns = sortFields.map( field => - (v): ((any) => any) => - getValueAt(v, field) + (v: IGatsbyNodeIdentifiers): ((any) => any) => { + i++ + // lodash sorting needs ArrayLike thing, which our iterable isn't + // so for now this hack will do + if (i % 100 === 0) { + clearKeptObjects() + } + return getValueAt(getNode(v.id)!, field) + } ) const sortOrder = sort.order.map(order => typeof order === `boolean` ? order : order.toLowerCase() diff --git a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts index 6d216352222ec..101fb0aff8afa 100644 --- a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts +++ b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts @@ -1,4 +1,4 @@ -import { RootDatabase, open, ArrayLikeIterable } from "lmdb-store" +import { RootDatabase, open, ArrayLikeIterable } from "lmdb" // import { performance } from "perf_hooks" import { ActionsUnion, IGatsbyNode } from "../../redux/types" import { updateNodes } from "./updates/nodes" @@ -70,7 +70,9 @@ function getDatabases(): ILmdbDatabases { // FIXME: sharedStructuresKey breaks tests - probably need some cleanup for it on DELETE_CACHE // sharedStructuresKey: Symbol.for(`structures`), // @ts-ignore - cache: true, + cache: { + expirer: false, + }, }), nodesByType: rootDb.openDB({ name: `nodesByType`, @@ -184,10 +186,10 @@ function updateDataStore(action: ActionsUnion): void { const dbs = getDatabases() // Force sync commit dbs.nodes.transactionSync(() => { - dbs.nodes.clear() - dbs.nodesByType.clear() - dbs.metadata.clear() - dbs.indexes.clear() + dbs.nodes.clearSync() + dbs.nodesByType.clearSync() + dbs.metadata.clearSync() + dbs.indexes.clearSync() }) break } @@ -229,8 +231,8 @@ function updateDataStore(action: ActionsUnion): void { function clearIndexes(): void { const dbs = getDatabases() dbs.nodes.transactionSync(() => { - dbs.metadata.clear() - dbs.indexes.clear() + dbs.metadata.clearSync() + dbs.indexes.clearSync() }) } diff --git a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts index d2426e3959ef4..1622548020f27 100644 --- a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts +++ b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts @@ -1,5 +1,5 @@ import { ActionsUnion, IGatsbyNode } from "../../../redux/types" -import { Database } from "lmdb-store" +import type { Database } from "lmdb" type NodeId = string diff --git a/packages/gatsby/src/datastore/types.ts b/packages/gatsby/src/datastore/types.ts index 80b6e8d7bdb24..58a1c0e3653e2 100644 --- a/packages/gatsby/src/datastore/types.ts +++ b/packages/gatsby/src/datastore/types.ts @@ -1,4 +1,4 @@ -import { Database } from "lmdb-store" +import { Database } from "lmdb" import { IGatsbyNode } from "../redux/types" import { GatsbyGraphQLType } from "../../index" import { IInputQuery } from "./common/query" diff --git a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts index 8a5ffedb83a1c..b7248a3bbec30 100644 --- a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts +++ b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts @@ -70,6 +70,21 @@ export async function createGraphqlEngineBundle( ], module: { rules: [ + { + test: require.resolve(`lmdb`), + parser: { amd: false }, + use: [ + { + loader: require.resolve(`@vercel/webpack-asset-relocator-loader`), + options: { + outputAssetBase: `assets`, + }, + }, + { + loader: require.resolve(`./lmdb-bundling-patch`), + }, + ], + }, { test: /\.m?js$/, type: `javascript/auto`, diff --git a/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts new file mode 100644 index 0000000000000..690036aeae2fc --- /dev/null +++ b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts @@ -0,0 +1,31 @@ +import { createRequireFromPath } from "gatsby-core-utils" + +export default function (source: string): string { + let lmdbBinaryLocation + try { + const lmdbRequire = createRequireFromPath(require.resolve(`lmdb`)) + const nodeGypBuild = lmdbRequire(`node-gyp-build`) + const path = require(`path`) + + lmdbBinaryLocation = nodeGypBuild.path( + path.dirname(require.resolve(`lmdb`)).replace(`/dist`, ``) + ) + } catch (e) { + console.error(`ln`, e) + return source + } + + return source + .replace( + `require$1('node-gyp-build')(dirName)`, + `require(${JSON.stringify(lmdbBinaryLocation)})` + ) + .replace( + `require$2.resolve('./dict/dict.txt')`, + `require.resolve('../dict/dict.txt')` + ) + .replace( + /fs\.readFileSync\(new URL\('\.\/dict\/dict\.txt',\s*\(typeof\s*document\s*===\s*'undefined'\s*\?\s*new\s*\(require\('u'\s*\+\s*'rl'\)\.URL\)\s*\('file:'\s*\+\s*__filename\).href\s*:\s*\(document\.currentScript\s*&&\s*document\.currentScript\.src\s*\|\|\s*new URL\('index\.cjs',\s*document\.baseURI\)\.href\)\)\.replace\(\/dist\[\\\\\\\/\]index\.cjs\$\/,\s*''\)\)\)/g, + `fs.readFileSync(require.resolve('../dict/dict.txt'))` + ) +} diff --git a/packages/gatsby/src/schema/node-model.js b/packages/gatsby/src/schema/node-model.js index ac750c7a792b3..40850852cf2e3 100644 --- a/packages/gatsby/src/schema/node-model.js +++ b/packages/gatsby/src/schema/node-model.js @@ -482,6 +482,7 @@ class LocalNodeModel { ) if (!_.isEmpty(actualFieldsToResolve)) { + console.log(`materialization`, { typeName, actualFieldsToResolve }) const resolvedNodes = new Map() for (const node of getDataStore().iterateNodesByType(typeName)) { this.trackInlineObjectsInRootNode(node) diff --git a/packages/gatsby/src/utils/cache-lmdb.ts b/packages/gatsby/src/utils/cache-lmdb.ts index 08ab4abdd8a34..98e94f45c1dc3 100644 --- a/packages/gatsby/src/utils/cache-lmdb.ts +++ b/packages/gatsby/src/utils/cache-lmdb.ts @@ -1,4 +1,4 @@ -import { open, RootDatabase, Database, DatabaseOptions } from "lmdb-store" +import { open, RootDatabase, Database, DatabaseOptions } from "lmdb" import fs from "fs-extra" import path from "path" diff --git a/yarn.lock b/yarn.lock index 36cff191c4bc8..dfa5bc2c4451d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -14612,17 +14612,16 @@ livereload-js@^2.3.0: version "2.3.0" resolved "https://registry.yarnpkg.com/livereload-js/-/livereload-js-2.3.0.tgz#c3ab22e8aaf5bf3505d80d098cbad67726548c9a" -lmdb-store@^1.6.11: - version "1.6.11" - resolved "https://registry.yarnpkg.com/lmdb-store/-/lmdb-store-1.6.11.tgz#801da597af8c7a01c81f87d5cc7a7497e381236d" - integrity sha512-hIvoGmHGsFhb2VRCmfhodA/837ULtJBwRHSHKIzhMB7WtPH6BRLPsvXp1MwD3avqGzuZfMyZDUp3tccLvr721Q== +lmdb@2.1.7: + version "2.1.7" + resolved "https://registry.yarnpkg.com/lmdb/-/lmdb-2.1.7.tgz#0f518102032037e248f201210943f0b94db04155" + integrity sha512-i6EFEBBlQ130J4BfJUbYgZFKQDz83xhpM47vzs0BMpXiJ7D4NjecO1Y3X54D341dwkLmTphlIyro5nTkKFXoMQ== dependencies: + msgpackr "^1.5.2" nan "^2.14.2" node-gyp-build "^4.2.3" - ordered-binary "^1.0.0" - weak-lru-cache "^1.0.0" - optionalDependencies: - msgpackr "^1.4.7" + ordered-binary "^1.2.3" + weak-lru-cache "^1.2.1" load-bmfont@^1.3.1, load-bmfont@^1.4.0: version "1.4.0" @@ -16233,10 +16232,10 @@ msgpackr-extract@^1.0.14: nan "^2.14.2" node-gyp-build "^4.2.3" -msgpackr@^1.4.7: - version "1.4.7" - resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.4.7.tgz#d802ade841e7d2e873000b491cdda6574a3d5748" - integrity sha512-bhC8Ed1au3L3oHaR/fe4lk4w7PLGFcWQ5XY/Tk9N6tzDRz8YndjCG68TD8zcvYZoxNtw767eF/7VpaTpU9kf9w== +msgpackr@^1.5.2: + version "1.5.2" + resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.5.2.tgz#b400c9885642bdec27b284f8bdadbd6570b448b7" + integrity sha512-OCguCkbG34x1ddO4vAzEm/4J1GTo512k9SoxV8K+EGfI/onFdpemRf0HpsVRFpxadXr4JBFgHsQUitgTlw7ZYQ== optionalDependencies: msgpackr-extract "^1.0.14" @@ -17137,10 +17136,10 @@ ora@^5.4.1: strip-ansi "^6.0.0" wcwidth "^1.0.1" -ordered-binary@^1.0.0: - version "1.1.3" - resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.1.3.tgz#11dbc0a4cb7f8248183b9845e031b443be82571e" - integrity sha512-tDTls+KllrZKJrqRXUYJtIcWIyoQycP7cVN7kzNNnhHKF2bMKHflcAQK+pF2Eb1iVaQodHxqZQr0yv4HWLGBhQ== +ordered-binary@^1.2.3: + version "1.2.3" + resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.2.3.tgz#518f637692a74d372e56230effae37b811575e36" + integrity sha512-fEwMk8TNUtzQDjXKYS2ANW3fNZ/gMReCPOAsLHaqw+UDnq/8ddXAcX4lGRpTK7kAghAjkmJs1EXXbcrDbg+ruw== ordered-read-streams@^1.0.0: version "1.0.1" @@ -24502,10 +24501,10 @@ wcwidth@^1.0.0, wcwidth@^1.0.1: dependencies: defaults "^1.0.3" -weak-lru-cache@^1.0.0: - version "1.1.2" - resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.1.2.tgz#a909a97372aabdfbfe3eb33580af255b3b198834" - integrity sha512-Bi5ae8Bev3YulgtLTafpmHmvl3vGbanRkv+qqA2AX8c3qj/MUdvSuaHq7ukDYBcMDINIaRPTPEkXSNCqqWivuA== +weak-lru-cache@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.2.1.tgz#6b4f2da7e1701f845e71522417f1df1e39503df8" + integrity sha512-O5ag1F0Xk6ui+Fg5LlosTcVAyHs6DeyiDDbOapNtFCx/KjZ82B3U9stM9hvzbVclKWn9ABPjaINX/nQkGkJkKg== web-namespaces@^1.0.0: version "1.1.2"