From 7f59b33111b6b35366109213c83d7704acb6f930 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Thu, 20 Jan 2022 18:51:15 +0100 Subject: [PATCH 01/42] starting point --- benchmarks/memory/.dockerignore | 23 +++ benchmarks/memory/Dockerfile | 139 +++++++++++++++++++ benchmarks/memory/gatsby-config.js | 3 + benchmarks/memory/gatsby-node.js | 88 ++++++++++++ benchmarks/memory/package.json | 24 ++++ benchmarks/memory/plugins/wat/gatsby-node.js | 18 +++ benchmarks/memory/plugins/wat/index.js | 0 benchmarks/memory/plugins/wat/package.json | 1 + benchmarks/memory/scripts/run-docker.js | 0 benchmarks/memory/src/pages/double_eq.js | 19 +++ benchmarks/memory/src/pages/eq_field.js | 19 +++ benchmarks/memory/src/pages/eq_id.js | 19 +++ benchmarks/memory/src/pages/index.js | 5 + 13 files changed, 358 insertions(+) create mode 100644 benchmarks/memory/.dockerignore create mode 100644 benchmarks/memory/Dockerfile create mode 100644 benchmarks/memory/gatsby-config.js create mode 100644 benchmarks/memory/gatsby-node.js create mode 100644 benchmarks/memory/package.json create mode 100644 benchmarks/memory/plugins/wat/gatsby-node.js create mode 100644 benchmarks/memory/plugins/wat/index.js create mode 100644 benchmarks/memory/plugins/wat/package.json create mode 100644 benchmarks/memory/scripts/run-docker.js create mode 100644 benchmarks/memory/src/pages/double_eq.js create mode 100644 benchmarks/memory/src/pages/eq_field.js create mode 100644 benchmarks/memory/src/pages/eq_id.js create mode 100644 benchmarks/memory/src/pages/index.js diff --git a/benchmarks/memory/.dockerignore b/benchmarks/memory/.dockerignore new file mode 100644 index 0000000000000..cbd3fdd9b6b92 --- /dev/null +++ b/benchmarks/memory/.dockerignore @@ -0,0 +1,23 @@ +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +README.md diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile new file mode 100644 index 0000000000000..e772addadfe7b --- /dev/null +++ b/benchmarks/memory/Dockerfile @@ -0,0 +1,139 @@ +FROM node:14.17.6 as build + +RUN npx pkg-fetch@3.2.3 node14 linux x64 + +FROM debian:buster +ENV NODE_ENV=production + +RUN apt-get update && apt-get upgrade && apt-get install git curl npm -y + +# RUN echo "Install packages" && \ +# apt-get update && \ +# DEBIAN_FRONTEND=noninteractive \ +# # apt-get upgrade installs foo AND upgrades packages in one step +# apt-get upgrade --yes --no-install-recommends \ +# # FROM buildpack-deps:buster-curl +# ca-certificates \ +# curl \ +# dirmngr \ +# gnupg \ +# netbase \ +# wget \ +# # FROM buildpack-deps:buster-scm +# git \ +# # mercurial \ +# # openssh-client \ +# # subversion \ +# procps \ +# # FROM buildpack-deps:buster +# autoconf \ +# automake \ +# bzip2 \ +# default-libmysqlclient-dev \ +# dpkg-dev \ +# file \ +# g++ \ +# gcc \ +# imagemagick \ +# libbz2-dev \ +# libc6-dev \ +# libcurl4-openssl-dev \ +# libdb-dev \ +# libevent-dev \ +# libffi-dev \ +# libgdbm-dev \ +# libglib2.0-dev \ +# libgmp-dev \ +# libjpeg-dev \ +# libkrb5-dev \ +# liblzma-dev \ +# libmagickcore-dev \ +# libmagickwand-dev \ +# libmaxminddb-dev \ +# libncurses5-dev \ +# libncursesw5-dev \ +# libpng-dev \ +# libpq-dev \ +# libreadline-dev \ +# libsqlite3-dev \ +# libssl-dev \ +# libtool \ +# libwebp-dev \ +# libxml2-dev \ +# libxslt-dev \ +# libyaml-dev \ +# make \ +# patch \ +# unzip \ +# xz-utils \ +# zlib1g-dev \ +# ######################## +# # Gatsby added packages +# # Should leave/include packages that make compiling node C/C++ extensions possible/easier/faster +# # A bunch of these packages are probably unnecessary since we no longer compile libvips here +# build-essential \ +# cdbs \ +# debhelper \ +# dh-autoreconf \ +# fftw3-dev \ +# flex \ +# bison \ +# gettext \ +# gtk-doc-tools \ +# libcfitsio-dev \ +# libexif-gtk-dev \ +# libfontconfig1-dev \ +# libfreetype6-dev \ +# libgif-dev \ +# libgirepository1.0-dev \ +# libgsf-1-dev \ +# libice-dev \ +# liblcms2-dev \ +# libmatio-dev \ +# libopenexr-dev \ +# libopenslide-dev \ +# liborc-0.4-dev \ +# libpango1.0-dev \ +# libpoppler-glib-dev \ +# librsvg2-dev \ +# libtiff-dev \ +# libxml-parser-perl \ +# pkg-config \ +# git-lfs \ +# # These make installation of cwebp-bin faster by about 30s. gatsby-plugin-sharp -> imagemin-webp -> cwebp-bin +# libgl1-mesa-dev \ +# liblz4-tool \ +# # tooling for cloud team +# ack \ +# curl \ +# dnsutils \ +# htop \ +# less \ +# lsof \ +# ncdu \ +# tig \ +# vim \ +# && \ +# apt-get clean && \ +# rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +ENV YARN_VERSION 1.22.5 + +RUN echo "Install NVM" && \ + # clone nvm + git clone --quiet --depth=1 https://github.com/nvm-sh/nvm.git --branch=v0.35.0 /root/.nvm && \ + # Add here any versions we expect to be popular, and want to be cached + # "node" is the latest version + # Also install npm, yarn and lerna under that version of node + #nvm install 10 && nvm exec 10 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ + #nvm install 12 && nvm exec 12 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ + /root/.nvm/nvm.sh install 14 && nvm exec 14 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ + /root/.nvm/nvm.sh alias default 14 && \ + /root/.nvm/nvm.sh ls && \ + /usr/local/bin/nvm ls + +RUN npm i -g gatsby-dev-cli yarn@^1.22 + +WORKDIR /usr/src/app + +CMD ["yarn", "start"] diff --git a/benchmarks/memory/gatsby-config.js b/benchmarks/memory/gatsby-config.js new file mode 100644 index 0000000000000..10141cf5319df --- /dev/null +++ b/benchmarks/memory/gatsby-config.js @@ -0,0 +1,3 @@ +module.exports = { + plugins: [`wat`], +} diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js new file mode 100644 index 0000000000000..dd1634f503323 --- /dev/null +++ b/benchmarks/memory/gatsby-node.js @@ -0,0 +1,88 @@ +const { + takeHeapSnapshot, +} = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) + +// exports.createSchemaCustomization = ({ actions }) => { +// actions.createTypes(` +// type Test implements Node @dontInfer { +// id: ID! +// nodeNum: Int! +// nodeNumStr: String! +// pageNum: Int! +// pageNumStr: String! +// fooBar: String! +// fooBar2: String! +// fooBarArray: [TestFooBarArray!] +// text: String! +// random: Int! +// randomPage: Int! +// } +// type TestFooBarArray { +// fooBar: String! +// } +// type SitePage implements Node @dontInfer { +// id: ID! +// } +// `) +// } +const NUM_NODES = 200 + +exports.sourceNodes = async ({ actions }) => { + await takeHeapSnapshot(`sourceNodes-1`) + + for (let i = 0; i < NUM_NODES; i++) { + const largeSizeObj = {} + for (let j = 1; j <= 1024; j++) { + largeSizeObj[`key_${j}`] = `x`.repeat(1024) + } + + const node = { + id: `memory-${i}`, + idClone: `memory-${i}`, + fooBar: [`foo`, `bar`, `baz`, `foobar`][i % 4], + number1: 5, + number2: 7, + largeSizeObj, + largeSizeString: `x`.repeat(1024 * 1024), + internal: { + contentDigest: `hash`, // we won't be changing nodes so this can be hardcoded + type: `Test`, + }, + } + + actions.createNode(node) + + if (i % 100 === 99) { + await new Promise(resolve => setImmediate(resolve)) + } + } + + await new Promise(resolve => setTimeout(resolve, 100)) + + await takeHeapSnapshot(`sourceNodes-2`) +} + +// exports.onCreateNode = ({ node, actions, getNode }) => { +// if (node.internal.type === `TestChild`) { +// const grandpa = getNode(node.parent) +// console.log({ grandpa }) + +// actions.createNode({ +// id: `${node.id} << test child2`, +// parent: node.id, +// internal: { +// type: `TestGrandChild`, +// contentDigest: `wa`, +// }, +// }) +// } +// } + +exports.createPages = async ({ getNode, action, graphql }) => { + debugger + + const node = getNode(`memory-1`) + // console.log({ node }) + // console.info(`just using node`, node.id) + await takeHeapSnapshot(`create-pages`) +} diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json new file mode 100644 index 0000000000000..ebb21344d9e4b --- /dev/null +++ b/benchmarks/memory/package.json @@ -0,0 +1,24 @@ +{ + "name": "memory-usage-benchmark", + "private": true, + "version": "1.0.0", + "description": "Test site stress testing memory usage", + "license": "MIT", + "scripts": { + "start": "echo hello.js", + "build-image": "docker build -t gatsby-memory .", + "run-container": "docker run --mount type=bind,source=\"$(pwd)/../..\",target=/usr/src/app gatsby-memory" + }, + "repository": { + "type": "git", + "url": "https://github.com/gatsbyjs/gatsby/tree/master/benchmarks/memory" + }, + "bugs": { + "url": "https://github.com/gatsbyjs/gatsby/issues" + }, + "dependencies": { + "gatsby": "4.6.0-next.3-dev-1642528625779", + "react": "^17.0.2", + "react-dom": "^17.0.2" + } +} diff --git a/benchmarks/memory/plugins/wat/gatsby-node.js b/benchmarks/memory/plugins/wat/gatsby-node.js new file mode 100644 index 0000000000000..744a389e4d0ea --- /dev/null +++ b/benchmarks/memory/plugins/wat/gatsby-node.js @@ -0,0 +1,18 @@ +// exports.sourceNodes = () => { +// console.log(`wat`) +// } +// exports.onCreateNode = ({ node, actions, getNode }) => { +// if (node.internal.type === `Test`) { +// const fromLMDB = getNode(node.id) + +// console.log({ node, fromLMDB }) +// actions.createNode({ +// id: `${node.id} << test child`, +// parent: node.id, +// internal: { +// type: `TestChild`, +// contentDigest: `wa`, +// }, +// }) +// } +// } diff --git a/benchmarks/memory/plugins/wat/index.js b/benchmarks/memory/plugins/wat/index.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/benchmarks/memory/plugins/wat/package.json b/benchmarks/memory/plugins/wat/package.json new file mode 100644 index 0000000000000..0967ef424bce6 --- /dev/null +++ b/benchmarks/memory/plugins/wat/package.json @@ -0,0 +1 @@ +{} diff --git a/benchmarks/memory/scripts/run-docker.js b/benchmarks/memory/scripts/run-docker.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/benchmarks/memory/src/pages/double_eq.js b/benchmarks/memory/src/pages/double_eq.js new file mode 100644 index 0000000000000..43777d9fb007f --- /dev/null +++ b/benchmarks/memory/src/pages/double_eq.js @@ -0,0 +1,19 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + { + test(number1: { gt: 4 }, number2: { lt: 10 }) { + id + fooBar + } + } +` diff --git a/benchmarks/memory/src/pages/eq_field.js b/benchmarks/memory/src/pages/eq_field.js new file mode 100644 index 0000000000000..5d85bbc4c0d03 --- /dev/null +++ b/benchmarks/memory/src/pages/eq_field.js @@ -0,0 +1,19 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + { + test(idClone: { eq: "memory-2" }) { + id + fooBar + } + } +` diff --git a/benchmarks/memory/src/pages/eq_id.js b/benchmarks/memory/src/pages/eq_id.js new file mode 100644 index 0000000000000..cde339ed799ab --- /dev/null +++ b/benchmarks/memory/src/pages/eq_id.js @@ -0,0 +1,19 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + { + test(id: { eq: "memory-2" }) { + id + fooBar + } + } +` diff --git a/benchmarks/memory/src/pages/index.js b/benchmarks/memory/src/pages/index.js new file mode 100644 index 0000000000000..8729fdc41578e --- /dev/null +++ b/benchmarks/memory/src/pages/index.js @@ -0,0 +1,5 @@ +import React from "react" + +export default function Home() { + return
Hello world!
+} From d22f00e86f5d8c6110445035d884c07caf8439f1 Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 20 Jan 2022 13:20:49 -0500 Subject: [PATCH 02/42] Get docker running --- benchmarks/memory/Dockerfile | 138 +-------------------------------- benchmarks/memory/package.json | 4 +- 2 files changed, 5 insertions(+), 137 deletions(-) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index e772addadfe7b..595e20fdfb10d 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -1,139 +1,7 @@ -FROM node:14.17.6 as build - -RUN npx pkg-fetch@3.2.3 node14 linux x64 - -FROM debian:buster +FROM node:14-buster ENV NODE_ENV=production - -RUN apt-get update && apt-get upgrade && apt-get install git curl npm -y - -# RUN echo "Install packages" && \ -# apt-get update && \ -# DEBIAN_FRONTEND=noninteractive \ -# # apt-get upgrade installs foo AND upgrades packages in one step -# apt-get upgrade --yes --no-install-recommends \ -# # FROM buildpack-deps:buster-curl -# ca-certificates \ -# curl \ -# dirmngr \ -# gnupg \ -# netbase \ -# wget \ -# # FROM buildpack-deps:buster-scm -# git \ -# # mercurial \ -# # openssh-client \ -# # subversion \ -# procps \ -# # FROM buildpack-deps:buster -# autoconf \ -# automake \ -# bzip2 \ -# default-libmysqlclient-dev \ -# dpkg-dev \ -# file \ -# g++ \ -# gcc \ -# imagemagick \ -# libbz2-dev \ -# libc6-dev \ -# libcurl4-openssl-dev \ -# libdb-dev \ -# libevent-dev \ -# libffi-dev \ -# libgdbm-dev \ -# libglib2.0-dev \ -# libgmp-dev \ -# libjpeg-dev \ -# libkrb5-dev \ -# liblzma-dev \ -# libmagickcore-dev \ -# libmagickwand-dev \ -# libmaxminddb-dev \ -# libncurses5-dev \ -# libncursesw5-dev \ -# libpng-dev \ -# libpq-dev \ -# libreadline-dev \ -# libsqlite3-dev \ -# libssl-dev \ -# libtool \ -# libwebp-dev \ -# libxml2-dev \ -# libxslt-dev \ -# libyaml-dev \ -# make \ -# patch \ -# unzip \ -# xz-utils \ -# zlib1g-dev \ -# ######################## -# # Gatsby added packages -# # Should leave/include packages that make compiling node C/C++ extensions possible/easier/faster -# # A bunch of these packages are probably unnecessary since we no longer compile libvips here -# build-essential \ -# cdbs \ -# debhelper \ -# dh-autoreconf \ -# fftw3-dev \ -# flex \ -# bison \ -# gettext \ -# gtk-doc-tools \ -# libcfitsio-dev \ -# libexif-gtk-dev \ -# libfontconfig1-dev \ -# libfreetype6-dev \ -# libgif-dev \ -# libgirepository1.0-dev \ -# libgsf-1-dev \ -# libice-dev \ -# liblcms2-dev \ -# libmatio-dev \ -# libopenexr-dev \ -# libopenslide-dev \ -# liborc-0.4-dev \ -# libpango1.0-dev \ -# libpoppler-glib-dev \ -# librsvg2-dev \ -# libtiff-dev \ -# libxml-parser-perl \ -# pkg-config \ -# git-lfs \ -# # These make installation of cwebp-bin faster by about 30s. gatsby-plugin-sharp -> imagemin-webp -> cwebp-bin -# libgl1-mesa-dev \ -# liblz4-tool \ -# # tooling for cloud team -# ack \ -# curl \ -# dnsutils \ -# htop \ -# less \ -# lsof \ -# ncdu \ -# tig \ -# vim \ -# && \ -# apt-get clean && \ -# rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -ENV YARN_VERSION 1.22.5 - -RUN echo "Install NVM" && \ - # clone nvm - git clone --quiet --depth=1 https://github.com/nvm-sh/nvm.git --branch=v0.35.0 /root/.nvm && \ - # Add here any versions we expect to be popular, and want to be cached - # "node" is the latest version - # Also install npm, yarn and lerna under that version of node - #nvm install 10 && nvm exec 10 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ - #nvm install 12 && nvm exec 12 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ - /root/.nvm/nvm.sh install 14 && nvm exec 14 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ - /root/.nvm/nvm.sh alias default 14 && \ - /root/.nvm/nvm.sh ls && \ - /usr/local/bin/nvm ls - -RUN npm i -g gatsby-dev-cli yarn@^1.22 - +RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y +RUN npm i -g gatsby-dev-cli WORKDIR /usr/src/app CMD ["yarn", "start"] diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index ebb21344d9e4b..868725e610d6b 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,9 +5,9 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "start": "echo hello.js", + "start": "echo $PWD; echo -e '\n'; ls", "build-image": "docker build -t gatsby-memory .", - "run-container": "docker run --mount type=bind,source=\"$(pwd)/../..\",target=/usr/src/app gatsby-memory" + "run-container": "docker run --mount type=bind,source=\"$(pwd)\",target=/usr/src/app gatsby-memory" }, "repository": { "type": "git", From cdb008e55d12ea8a7635b61f2685c2b6ae1883a3 Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 20 Jan 2022 15:42:45 -0500 Subject: [PATCH 03/42] Add yarn commands for docker --- benchmarks/memory/Dockerfile | 12 +++++++++++- benchmarks/memory/gatsby-node.js | 12 ++++++------ benchmarks/memory/package.json | 8 +++++--- benchmarks/memory/scripts/docker-connect | 9 +++++++++ benchmarks/memory/scripts/docker-get-id | 8 ++++++++ benchmarks/memory/scripts/docker-start | 17 +++++++++++++++++ benchmarks/memory/scripts/docker-stop | 9 +++++++++ benchmarks/memory/scripts/run-docker.js | 0 8 files changed, 65 insertions(+), 10 deletions(-) create mode 100755 benchmarks/memory/scripts/docker-connect create mode 100755 benchmarks/memory/scripts/docker-get-id create mode 100755 benchmarks/memory/scripts/docker-start create mode 100755 benchmarks/memory/scripts/docker-stop delete mode 100644 benchmarks/memory/scripts/run-docker.js diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 595e20fdfb10d..91ff1cb53a400 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -3,5 +3,15 @@ ENV NODE_ENV=production RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y RUN npm i -g gatsby-dev-cli WORKDIR /usr/src/app +RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc -CMD ["yarn", "start"] + +# TODO figure out port forwarding +EXPOSE 9229 + + +# set up gatsby-dev +RUN gatsby-dev --set-path-to-repo /usr/src/gatsby + +# keep the process running +ENTRYPOINT ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index dd1634f503323..8e1a737d19d72 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -1,6 +1,6 @@ -const { - takeHeapSnapshot, -} = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) +// const { +// takeHeapSnapshot, +// } = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) // exports.createSchemaCustomization = ({ actions }) => { // actions.createTypes(` @@ -28,7 +28,7 @@ const { const NUM_NODES = 200 exports.sourceNodes = async ({ actions }) => { - await takeHeapSnapshot(`sourceNodes-1`) + // await takeHeapSnapshot(`sourceNodes-1`) for (let i = 0; i < NUM_NODES; i++) { const largeSizeObj = {} @@ -59,7 +59,7 @@ exports.sourceNodes = async ({ actions }) => { await new Promise(resolve => setTimeout(resolve, 100)) - await takeHeapSnapshot(`sourceNodes-2`) + // await takeHeapSnapshot(`sourceNodes-2`) } // exports.onCreateNode = ({ node, actions, getNode }) => { @@ -84,5 +84,5 @@ exports.createPages = async ({ getNode, action, graphql }) => { const node = getNode(`memory-1`) // console.log({ node }) // console.info(`just using node`, node.id) - await takeHeapSnapshot(`create-pages`) + // await takeHeapSnapshot(`create-pages`) } diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 868725e610d6b..53023d2d641a5 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,9 +5,11 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "start": "echo $PWD; echo -e '\n'; ls", - "build-image": "docker build -t gatsby-memory .", - "run-container": "docker run --mount type=bind,source=\"$(pwd)\",target=/usr/src/app gatsby-memory" + "start": "echo $PWD; ls ..", + "docker:build": "docker build -t gatsby-memory .", + "docker:start": "./scripts/docker-start", + "docker:connect": "./scripts/docker-connect", + "docker:stop": "./scripts/docker-stop" }, "repository": { "type": "git", diff --git a/benchmarks/memory/scripts/docker-connect b/benchmarks/memory/scripts/docker-connect new file mode 100755 index 0000000000000..af6582a97d6f8 --- /dev/null +++ b/benchmarks/memory/scripts/docker-connect @@ -0,0 +1,9 @@ +DOCKER_ID=$(./scripts/docker-get-id) + +if [ -z "$DOCKER_ID" ]; then + echo "\nNo gatsby-memory is running. Start one with \`yarn docker:start\`.\n" + return 1 +fi + +echo "Connecting to container $DOCKER_ID...\n" +docker exec -it $DOCKER_ID bash \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-get-id b/benchmarks/memory/scripts/docker-get-id new file mode 100755 index 0000000000000..064e21e32607c --- /dev/null +++ b/benchmarks/memory/scripts/docker-get-id @@ -0,0 +1,8 @@ +DOCKER_ID=$(\ + docker ps --format '{{.Image}}:{{.ID}}' | \ + grep "gatsby-memory" | \ + head -n 1 | \ + sed 's/gatsby\-memory://'\ +) + +echo $DOCKER_ID \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start new file mode 100755 index 0000000000000..6528dd2a8f7d3 --- /dev/null +++ b/benchmarks/memory/scripts/docker-start @@ -0,0 +1,17 @@ +DOCKER_ID=$(./scripts/docker-get-id) +if [ -n "$DOCKER_ID" ]; then + echo "\nA gatsby-memory container is already running with id $DOCKER_ID." + echo "Please use that container, or run \`yarn docker:stop\` to stop it.\n" + return 1 +fi + +# TODO ports still not working here... +DOCKER_ID=$(\ + docker run -td \ + --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ + --mount type=bind,source="$(pwd)",target=/usr/src/app \ + gatsby-memory \ + | head -c 12 \ +) + +echo "\nStarted container id ${DOCKER_ID}! Run \`yarn docker:connect\` to connect to the container.\n" \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-stop b/benchmarks/memory/scripts/docker-stop new file mode 100755 index 0000000000000..95dbec9e55704 --- /dev/null +++ b/benchmarks/memory/scripts/docker-stop @@ -0,0 +1,9 @@ +DOCKER_ID=$(./scripts/docker-get-id) + +if [ -z "$DOCKER_ID" ]; then + echo "\nNo gatsby-memory is running.\n" + return 1 +fi + +DOCKER_ID=$(docker kill $DOCKER_ID) +echo "\nStopped container $DOCKER_ID.\n" \ No newline at end of file diff --git a/benchmarks/memory/scripts/run-docker.js b/benchmarks/memory/scripts/run-docker.js deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 435885a8e0c03b43518fe29c4cd293a5e6009565 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Fri, 21 Jan 2022 14:19:49 +0100 Subject: [PATCH 04/42] set initial mem limit, create lot of pages --- benchmarks/memory/Dockerfile | 2 +- benchmarks/memory/gatsby-node.js | 132 +++++++++++------- benchmarks/memory/scripts/docker-start | 5 + .../src/{pages => templates}/double_eq.js | 0 .../src/{pages => templates}/eq_field.js | 5 +- .../memory/src/{pages => templates}/eq_id.js | 5 +- 6 files changed, 94 insertions(+), 55 deletions(-) rename benchmarks/memory/src/{pages => templates}/double_eq.js (100%) rename benchmarks/memory/src/{pages => templates}/eq_field.js (73%) rename benchmarks/memory/src/{pages => templates}/eq_id.js (74%) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 91ff1cb53a400..7601139ba9f4b 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -1,7 +1,7 @@ FROM node:14-buster ENV NODE_ENV=production RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y -RUN npm i -g gatsby-dev-cli +RUN npm i -g gatsby-cli gatsby-dev-cli WORKDIR /usr/src/app RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index 8e1a737d19d72..5bf3fa5cc05e3 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -1,35 +1,8 @@ -// const { -// takeHeapSnapshot, -// } = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) +const { cpuCoreCount } = require(`gatsby-core-utils`) -// exports.createSchemaCustomization = ({ actions }) => { -// actions.createTypes(` -// type Test implements Node @dontInfer { -// id: ID! -// nodeNum: Int! -// nodeNumStr: String! -// pageNum: Int! -// pageNumStr: String! -// fooBar: String! -// fooBar2: String! -// fooBarArray: [TestFooBarArray!] -// text: String! -// random: Int! -// randomPage: Int! -// } -// type TestFooBarArray { -// fooBar: String! -// } -// type SitePage implements Node @dontInfer { -// id: ID! -// } -// `) -// } -const NUM_NODES = 200 +const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10) exports.sourceNodes = async ({ actions }) => { - // await takeHeapSnapshot(`sourceNodes-1`) - for (let i = 0; i < NUM_NODES; i++) { const largeSizeObj = {} for (let j = 1; j <= 1024; j++) { @@ -58,31 +31,90 @@ exports.sourceNodes = async ({ actions }) => { } await new Promise(resolve => setTimeout(resolve, 100)) +} - // await takeHeapSnapshot(`sourceNodes-2`) +const printedMessages = new Set() +exports.createResolvers = ({ createResolvers }) => { + createResolvers({ + Query: { + workerInfo: { + type: `String`, + args: { + label: `String!`, + }, + resolve: (_, args) => { + const msg = `${args.label} on ${ + process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + }` + if (!printedMessages.has(msg)) { + printedMessages.add(msg) + console.log(msg) + } + return msg + }, + }, + }, + }) } -// exports.onCreateNode = ({ node, actions, getNode }) => { -// if (node.internal.type === `TestChild`) { -// const grandpa = getNode(node.parent) -// console.log({ grandpa }) +const WORKER_BATCH_SIZE = 50 +exports.createPages = async ({ actions, graphql }) => { + const numWorkers = Math.max(1, cpuCoreCount() - 1) + + // we do want ALL available workers to execute each query type + const minNumOfPagesToSaturateAllWorkers = WORKER_BATCH_SIZE * numWorkers + + const { data } = await graphql(` + { + allTest { + nodes { + id + idClone + } + } + } + `) + + // we might need to "duplicate" pages if node count is less than number of needed pages + const repeatCount = Math.min( + 1, + Math.ceil(minNumOfPagesToSaturateAllWorkers / data.allTest.nodes.length) + ) -// actions.createNode({ -// id: `${node.id} << test child2`, -// parent: node.id, -// internal: { -// type: `TestGrandChild`, -// contentDigest: `wa`, -// }, -// }) -// } -// } + function createEnoughToSaturate(cb) { + let counter = 0 + for (let i = 0; i < repeatCount; i++) { + for (const node of data.allTest.nodes) { + const { template, context } = cb(node) -exports.createPages = async ({ getNode, action, graphql }) => { - debugger + actions.createPage({ + path: `/${template}/${counter++}`, + component: require.resolve(`./src/templates/${template}`), + context, + }) + } + } + } - const node = getNode(`memory-1`) - // console.log({ node }) - // console.info(`just using node`, node.id) - // await takeHeapSnapshot(`create-pages`) + // fast path (eq: { id: x }) + createEnoughToSaturate(node => { + return { + template: `eq_id`, + context: { + id: node.id, + }, + } + }) + + // (eq: { idClone: x }) + createEnoughToSaturate(node => { + return { + template: `eq_field`, + context: { + id: node.id, + }, + } + }) } diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start index 6528dd2a8f7d3..f68497e2524f3 100755 --- a/benchmarks/memory/scripts/docker-start +++ b/benchmarks/memory/scripts/docker-start @@ -10,6 +10,11 @@ DOCKER_ID=$(\ docker run -td \ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ --mount type=bind,source="$(pwd)",target=/usr/src/app \ + --publish 9229:9229 \ + --publish 8000:8000 \ + --publish 9000:9000 \ + --memory="2g" \ + --memory-swap="2g" \ gatsby-memory \ | head -c 12 \ ) diff --git a/benchmarks/memory/src/pages/double_eq.js b/benchmarks/memory/src/templates/double_eq.js similarity index 100% rename from benchmarks/memory/src/pages/double_eq.js rename to benchmarks/memory/src/templates/double_eq.js diff --git a/benchmarks/memory/src/pages/eq_field.js b/benchmarks/memory/src/templates/eq_field.js similarity index 73% rename from benchmarks/memory/src/pages/eq_field.js rename to benchmarks/memory/src/templates/eq_field.js index 5d85bbc4c0d03..c881ada4c05f3 100644 --- a/benchmarks/memory/src/pages/eq_field.js +++ b/benchmarks/memory/src/templates/eq_field.js @@ -10,10 +10,11 @@ export default function Home({ data }) { } export const q = graphql` - { - test(idClone: { eq: "memory-2" }) { + query ($id: String!) { + test(idClone: { eq: $id }) { id fooBar } + workerInfo(label: "eq-field") } ` diff --git a/benchmarks/memory/src/pages/eq_id.js b/benchmarks/memory/src/templates/eq_id.js similarity index 74% rename from benchmarks/memory/src/pages/eq_id.js rename to benchmarks/memory/src/templates/eq_id.js index cde339ed799ab..3bca139fc3c26 100644 --- a/benchmarks/memory/src/pages/eq_id.js +++ b/benchmarks/memory/src/templates/eq_id.js @@ -10,10 +10,11 @@ export default function Home({ data }) { } export const q = graphql` - { - test(id: { eq: "memory-2" }) { + query ($id: String!) { + test(id: { eq: $id }) { id fooBar } + workerInfo(label: "eq-id") } ` From cbb7315b9cd9787beb406723e432a1be0f5a0581 Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 08:47:11 -0500 Subject: [PATCH 05/42] Set up yarn commands for serve --- benchmarks/memory/Dockerfile | 2 +- benchmarks/memory/package.json | 4 +++- benchmarks/memory/scripts/docker-start | 1 + benchmarks/memory/scripts/enforce-docker | 13 +++++++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100755 benchmarks/memory/scripts/enforce-docker diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 91ff1cb53a400..7277d15c0ee4e 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -7,7 +7,7 @@ RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /us # TODO figure out port forwarding -EXPOSE 9229 +# EXPOSE 9229 # set up gatsby-dev diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 53023d2d641a5..55e4d8fc65179 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,10 +5,12 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "start": "echo $PWD; ls ..", + "gatsby:build": "./scripts/enforce-docker yarn gatsby build", + "gatsby:serve": "./scripts/enforce-docker yarn gatsby serve -H 0.0.0.0 -p 9229", "docker:build": "docker build -t gatsby-memory .", "docker:start": "./scripts/docker-start", "docker:connect": "./scripts/docker-connect", + "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect", "docker:stop": "./scripts/docker-stop" }, "repository": { diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start index 6528dd2a8f7d3..826ce6345d247 100755 --- a/benchmarks/memory/scripts/docker-start +++ b/benchmarks/memory/scripts/docker-start @@ -8,6 +8,7 @@ fi # TODO ports still not working here... DOCKER_ID=$(\ docker run -td \ + -p 127.0.0.1:9229:9229 \ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ --mount type=bind,source="$(pwd)",target=/usr/src/app \ gatsby-memory \ diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker new file mode 100755 index 0000000000000..9f5bf467a12df --- /dev/null +++ b/benchmarks/memory/scripts/enforce-docker @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ ! -f /.dockerenv ]; then + DOCKER_ID=$(./scripts/docker-get-id) + COMMAND="start-and-connect" + if [ -n "$DOCKER_ID" ]; then + COMMAND="connect" + fi + echo -e "\nThis must be run inside the docker container. Please run \`yarn docker:${COMMAND}\` and try again.\n" + exit 1 +fi + +eval ${@:2} \ No newline at end of file From 7b99cc832bdfb2578cd24b2ac81baadb12f2506e Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 10:26:01 -0500 Subject: [PATCH 06/42] Get debug commands working in docker --- benchmarks/memory/Dockerfile | 5 ----- benchmarks/memory/package.json | 7 +++++-- benchmarks/memory/scripts/docker-start | 3 --- benchmarks/memory/scripts/enforce-docker | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 44e2a3536be45..2ed56572f9780 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -5,11 +5,6 @@ RUN npm i -g gatsby-cli gatsby-dev-cli WORKDIR /usr/src/app RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc - -# TODO figure out port forwarding -# EXPOSE 9229 - - # set up gatsby-dev RUN gatsby-dev --set-path-to-repo /usr/src/gatsby diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 55e4d8fc65179..b29a2a04ad36f 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,8 +5,11 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "gatsby:build": "./scripts/enforce-docker yarn gatsby build", - "gatsby:serve": "./scripts/enforce-docker yarn gatsby serve -H 0.0.0.0 -p 9229", + "gatsby:build": "yarn gatsby build", + "gatsby:serve": "yarn gatsby serve -H 0.0.0.0 -p 9000", + "gatsby:develop": "NODE_ENV=development yarn gatsby develop -H 0.0.0.0 -p 9000", + "gatsby:build:debug": "node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby build", + "gatsby:develop:debug": "NODE_ENV=development node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby develop -H 0.0.0.0 -p 9000", "docker:build": "docker build -t gatsby-memory .", "docker:start": "./scripts/docker-start", "docker:connect": "./scripts/docker-connect", diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start index 6440939b76309..235d3526b4d9b 100755 --- a/benchmarks/memory/scripts/docker-start +++ b/benchmarks/memory/scripts/docker-start @@ -5,14 +5,11 @@ if [ -n "$DOCKER_ID" ]; then return 1 fi -# TODO ports still not working here... DOCKER_ID=$(\ docker run -td \ - -p 127.0.0.1:9229:9229 \ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ --mount type=bind,source="$(pwd)",target=/usr/src/app \ --publish 9229:9229 \ - --publish 8000:8000 \ --publish 9000:9000 \ --memory="2g" \ --memory-swap="2g" \ diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker index 9f5bf467a12df..43ede33d240db 100755 --- a/benchmarks/memory/scripts/enforce-docker +++ b/benchmarks/memory/scripts/enforce-docker @@ -10,4 +10,4 @@ if [ ! -f /.dockerenv ]; then exit 1 fi -eval ${@:2} \ No newline at end of file +${@:1} \ No newline at end of file From d6127b194ac02d03d851e44fa300f45dbe2d604e Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 10:37:14 -0500 Subject: [PATCH 07/42] Add README --- benchmarks/memory/README.md | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 benchmarks/memory/README.md diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md new file mode 100644 index 0000000000000..07380f4104b85 --- /dev/null +++ b/benchmarks/memory/README.md @@ -0,0 +1,70 @@ +# Gatsby Memory Benchmark + +The goal of this benchmark is to test Gatsby's memory usage and look for potential optimizations. + +## The Docker Container + +The docker container used in these tests sets up a Debian instance with node 14 installed (as well as npm/yarn/etc). +It has ports 9000 (for hosting gatsby) and 9229 (for debugging) exposed. + +Within the container, two points to your local filesystem are mounted: + +- /usr/src/gatsby : Your local gatsby repo +- /usr/src/site : The memory benchmark gatsby site + +## Commands + +### Docker + +These commands are used for interfacing with docker and have built-in utilities for managing the docker container. + +#### yarn docker:build + +Builds the container used for testing. + +#### yarn docker:start + +Starts the container built by `yarn docker:build`. + +#### yarn docker:connect + +Connects to the container started by `yarn docker:start`. + +#### yarn docker:start-and-connect + +A shorthand for start + connect. + +#### yarn docker:stop + +Stop the container used for testing. + +### Gatsby + +These commands are used for interfacing with gatsby. + +#### yarn gatsby:build + +Simply an alias to `yarn gatsby build`. + +#### yarn gatsby:serve + +Starts `gatsby serve` on port 9000 and sets the host properly to work inside docker. + +#### yarn gatsby:develop + +Starts `gatsby develop` on port 9000 and sets the host properly to work inside docker. + +#### yarn gatsby:build:debug + +Runs `gatsby build` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. + +#### yarn gatsby:develop:debug + +Runs `gatsby develop` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. + +## Testing + +TODO + +- How to configure memory limits +- Where to look From bde2d8c55f596dfeda9d995eeabd736bd095a862 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Thu, 20 Jan 2022 18:51:15 +0100 Subject: [PATCH 08/42] starting point --- benchmarks/memory/.dockerignore | 23 +++ benchmarks/memory/Dockerfile | 139 +++++++++++++++++++ benchmarks/memory/gatsby-config.js | 3 + benchmarks/memory/gatsby-node.js | 88 ++++++++++++ benchmarks/memory/package.json | 24 ++++ benchmarks/memory/plugins/wat/gatsby-node.js | 18 +++ benchmarks/memory/plugins/wat/index.js | 0 benchmarks/memory/plugins/wat/package.json | 1 + benchmarks/memory/scripts/run-docker.js | 0 benchmarks/memory/src/pages/double_eq.js | 19 +++ benchmarks/memory/src/pages/eq_field.js | 19 +++ benchmarks/memory/src/pages/eq_id.js | 19 +++ benchmarks/memory/src/pages/index.js | 5 + 13 files changed, 358 insertions(+) create mode 100644 benchmarks/memory/.dockerignore create mode 100644 benchmarks/memory/Dockerfile create mode 100644 benchmarks/memory/gatsby-config.js create mode 100644 benchmarks/memory/gatsby-node.js create mode 100644 benchmarks/memory/package.json create mode 100644 benchmarks/memory/plugins/wat/gatsby-node.js create mode 100644 benchmarks/memory/plugins/wat/index.js create mode 100644 benchmarks/memory/plugins/wat/package.json create mode 100644 benchmarks/memory/scripts/run-docker.js create mode 100644 benchmarks/memory/src/pages/double_eq.js create mode 100644 benchmarks/memory/src/pages/eq_field.js create mode 100644 benchmarks/memory/src/pages/eq_id.js create mode 100644 benchmarks/memory/src/pages/index.js diff --git a/benchmarks/memory/.dockerignore b/benchmarks/memory/.dockerignore new file mode 100644 index 0000000000000..cbd3fdd9b6b92 --- /dev/null +++ b/benchmarks/memory/.dockerignore @@ -0,0 +1,23 @@ +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +README.md diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile new file mode 100644 index 0000000000000..e772addadfe7b --- /dev/null +++ b/benchmarks/memory/Dockerfile @@ -0,0 +1,139 @@ +FROM node:14.17.6 as build + +RUN npx pkg-fetch@3.2.3 node14 linux x64 + +FROM debian:buster +ENV NODE_ENV=production + +RUN apt-get update && apt-get upgrade && apt-get install git curl npm -y + +# RUN echo "Install packages" && \ +# apt-get update && \ +# DEBIAN_FRONTEND=noninteractive \ +# # apt-get upgrade installs foo AND upgrades packages in one step +# apt-get upgrade --yes --no-install-recommends \ +# # FROM buildpack-deps:buster-curl +# ca-certificates \ +# curl \ +# dirmngr \ +# gnupg \ +# netbase \ +# wget \ +# # FROM buildpack-deps:buster-scm +# git \ +# # mercurial \ +# # openssh-client \ +# # subversion \ +# procps \ +# # FROM buildpack-deps:buster +# autoconf \ +# automake \ +# bzip2 \ +# default-libmysqlclient-dev \ +# dpkg-dev \ +# file \ +# g++ \ +# gcc \ +# imagemagick \ +# libbz2-dev \ +# libc6-dev \ +# libcurl4-openssl-dev \ +# libdb-dev \ +# libevent-dev \ +# libffi-dev \ +# libgdbm-dev \ +# libglib2.0-dev \ +# libgmp-dev \ +# libjpeg-dev \ +# libkrb5-dev \ +# liblzma-dev \ +# libmagickcore-dev \ +# libmagickwand-dev \ +# libmaxminddb-dev \ +# libncurses5-dev \ +# libncursesw5-dev \ +# libpng-dev \ +# libpq-dev \ +# libreadline-dev \ +# libsqlite3-dev \ +# libssl-dev \ +# libtool \ +# libwebp-dev \ +# libxml2-dev \ +# libxslt-dev \ +# libyaml-dev \ +# make \ +# patch \ +# unzip \ +# xz-utils \ +# zlib1g-dev \ +# ######################## +# # Gatsby added packages +# # Should leave/include packages that make compiling node C/C++ extensions possible/easier/faster +# # A bunch of these packages are probably unnecessary since we no longer compile libvips here +# build-essential \ +# cdbs \ +# debhelper \ +# dh-autoreconf \ +# fftw3-dev \ +# flex \ +# bison \ +# gettext \ +# gtk-doc-tools \ +# libcfitsio-dev \ +# libexif-gtk-dev \ +# libfontconfig1-dev \ +# libfreetype6-dev \ +# libgif-dev \ +# libgirepository1.0-dev \ +# libgsf-1-dev \ +# libice-dev \ +# liblcms2-dev \ +# libmatio-dev \ +# libopenexr-dev \ +# libopenslide-dev \ +# liborc-0.4-dev \ +# libpango1.0-dev \ +# libpoppler-glib-dev \ +# librsvg2-dev \ +# libtiff-dev \ +# libxml-parser-perl \ +# pkg-config \ +# git-lfs \ +# # These make installation of cwebp-bin faster by about 30s. gatsby-plugin-sharp -> imagemin-webp -> cwebp-bin +# libgl1-mesa-dev \ +# liblz4-tool \ +# # tooling for cloud team +# ack \ +# curl \ +# dnsutils \ +# htop \ +# less \ +# lsof \ +# ncdu \ +# tig \ +# vim \ +# && \ +# apt-get clean && \ +# rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +ENV YARN_VERSION 1.22.5 + +RUN echo "Install NVM" && \ + # clone nvm + git clone --quiet --depth=1 https://github.com/nvm-sh/nvm.git --branch=v0.35.0 /root/.nvm && \ + # Add here any versions we expect to be popular, and want to be cached + # "node" is the latest version + # Also install npm, yarn and lerna under that version of node + #nvm install 10 && nvm exec 10 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ + #nvm install 12 && nvm exec 12 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ + /root/.nvm/nvm.sh install 14 && nvm exec 14 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ + /root/.nvm/nvm.sh alias default 14 && \ + /root/.nvm/nvm.sh ls && \ + /usr/local/bin/nvm ls + +RUN npm i -g gatsby-dev-cli yarn@^1.22 + +WORKDIR /usr/src/app + +CMD ["yarn", "start"] diff --git a/benchmarks/memory/gatsby-config.js b/benchmarks/memory/gatsby-config.js new file mode 100644 index 0000000000000..10141cf5319df --- /dev/null +++ b/benchmarks/memory/gatsby-config.js @@ -0,0 +1,3 @@ +module.exports = { + plugins: [`wat`], +} diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js new file mode 100644 index 0000000000000..dd1634f503323 --- /dev/null +++ b/benchmarks/memory/gatsby-node.js @@ -0,0 +1,88 @@ +const { + takeHeapSnapshot, +} = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) + +// exports.createSchemaCustomization = ({ actions }) => { +// actions.createTypes(` +// type Test implements Node @dontInfer { +// id: ID! +// nodeNum: Int! +// nodeNumStr: String! +// pageNum: Int! +// pageNumStr: String! +// fooBar: String! +// fooBar2: String! +// fooBarArray: [TestFooBarArray!] +// text: String! +// random: Int! +// randomPage: Int! +// } +// type TestFooBarArray { +// fooBar: String! +// } +// type SitePage implements Node @dontInfer { +// id: ID! +// } +// `) +// } +const NUM_NODES = 200 + +exports.sourceNodes = async ({ actions }) => { + await takeHeapSnapshot(`sourceNodes-1`) + + for (let i = 0; i < NUM_NODES; i++) { + const largeSizeObj = {} + for (let j = 1; j <= 1024; j++) { + largeSizeObj[`key_${j}`] = `x`.repeat(1024) + } + + const node = { + id: `memory-${i}`, + idClone: `memory-${i}`, + fooBar: [`foo`, `bar`, `baz`, `foobar`][i % 4], + number1: 5, + number2: 7, + largeSizeObj, + largeSizeString: `x`.repeat(1024 * 1024), + internal: { + contentDigest: `hash`, // we won't be changing nodes so this can be hardcoded + type: `Test`, + }, + } + + actions.createNode(node) + + if (i % 100 === 99) { + await new Promise(resolve => setImmediate(resolve)) + } + } + + await new Promise(resolve => setTimeout(resolve, 100)) + + await takeHeapSnapshot(`sourceNodes-2`) +} + +// exports.onCreateNode = ({ node, actions, getNode }) => { +// if (node.internal.type === `TestChild`) { +// const grandpa = getNode(node.parent) +// console.log({ grandpa }) + +// actions.createNode({ +// id: `${node.id} << test child2`, +// parent: node.id, +// internal: { +// type: `TestGrandChild`, +// contentDigest: `wa`, +// }, +// }) +// } +// } + +exports.createPages = async ({ getNode, action, graphql }) => { + debugger + + const node = getNode(`memory-1`) + // console.log({ node }) + // console.info(`just using node`, node.id) + await takeHeapSnapshot(`create-pages`) +} diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json new file mode 100644 index 0000000000000..ebb21344d9e4b --- /dev/null +++ b/benchmarks/memory/package.json @@ -0,0 +1,24 @@ +{ + "name": "memory-usage-benchmark", + "private": true, + "version": "1.0.0", + "description": "Test site stress testing memory usage", + "license": "MIT", + "scripts": { + "start": "echo hello.js", + "build-image": "docker build -t gatsby-memory .", + "run-container": "docker run --mount type=bind,source=\"$(pwd)/../..\",target=/usr/src/app gatsby-memory" + }, + "repository": { + "type": "git", + "url": "https://github.com/gatsbyjs/gatsby/tree/master/benchmarks/memory" + }, + "bugs": { + "url": "https://github.com/gatsbyjs/gatsby/issues" + }, + "dependencies": { + "gatsby": "4.6.0-next.3-dev-1642528625779", + "react": "^17.0.2", + "react-dom": "^17.0.2" + } +} diff --git a/benchmarks/memory/plugins/wat/gatsby-node.js b/benchmarks/memory/plugins/wat/gatsby-node.js new file mode 100644 index 0000000000000..744a389e4d0ea --- /dev/null +++ b/benchmarks/memory/plugins/wat/gatsby-node.js @@ -0,0 +1,18 @@ +// exports.sourceNodes = () => { +// console.log(`wat`) +// } +// exports.onCreateNode = ({ node, actions, getNode }) => { +// if (node.internal.type === `Test`) { +// const fromLMDB = getNode(node.id) + +// console.log({ node, fromLMDB }) +// actions.createNode({ +// id: `${node.id} << test child`, +// parent: node.id, +// internal: { +// type: `TestChild`, +// contentDigest: `wa`, +// }, +// }) +// } +// } diff --git a/benchmarks/memory/plugins/wat/index.js b/benchmarks/memory/plugins/wat/index.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/benchmarks/memory/plugins/wat/package.json b/benchmarks/memory/plugins/wat/package.json new file mode 100644 index 0000000000000..0967ef424bce6 --- /dev/null +++ b/benchmarks/memory/plugins/wat/package.json @@ -0,0 +1 @@ +{} diff --git a/benchmarks/memory/scripts/run-docker.js b/benchmarks/memory/scripts/run-docker.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/benchmarks/memory/src/pages/double_eq.js b/benchmarks/memory/src/pages/double_eq.js new file mode 100644 index 0000000000000..43777d9fb007f --- /dev/null +++ b/benchmarks/memory/src/pages/double_eq.js @@ -0,0 +1,19 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + { + test(number1: { gt: 4 }, number2: { lt: 10 }) { + id + fooBar + } + } +` diff --git a/benchmarks/memory/src/pages/eq_field.js b/benchmarks/memory/src/pages/eq_field.js new file mode 100644 index 0000000000000..5d85bbc4c0d03 --- /dev/null +++ b/benchmarks/memory/src/pages/eq_field.js @@ -0,0 +1,19 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + { + test(idClone: { eq: "memory-2" }) { + id + fooBar + } + } +` diff --git a/benchmarks/memory/src/pages/eq_id.js b/benchmarks/memory/src/pages/eq_id.js new file mode 100644 index 0000000000000..cde339ed799ab --- /dev/null +++ b/benchmarks/memory/src/pages/eq_id.js @@ -0,0 +1,19 @@ +import React from "react" +import { graphql } from "gatsby" + +export default function Home({ data }) { + return ( +
+
{JSON.stringify(data, null, 2)}
+
+ ) +} + +export const q = graphql` + { + test(id: { eq: "memory-2" }) { + id + fooBar + } + } +` diff --git a/benchmarks/memory/src/pages/index.js b/benchmarks/memory/src/pages/index.js new file mode 100644 index 0000000000000..8729fdc41578e --- /dev/null +++ b/benchmarks/memory/src/pages/index.js @@ -0,0 +1,5 @@ +import React from "react" + +export default function Home() { + return
Hello world!
+} From 3f8f47a41d9d7af21e9bf35f38a50f7650492dd6 Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 20 Jan 2022 13:20:49 -0500 Subject: [PATCH 09/42] Get docker running --- benchmarks/memory/Dockerfile | 138 +-------------------------------- benchmarks/memory/package.json | 4 +- 2 files changed, 5 insertions(+), 137 deletions(-) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index e772addadfe7b..595e20fdfb10d 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -1,139 +1,7 @@ -FROM node:14.17.6 as build - -RUN npx pkg-fetch@3.2.3 node14 linux x64 - -FROM debian:buster +FROM node:14-buster ENV NODE_ENV=production - -RUN apt-get update && apt-get upgrade && apt-get install git curl npm -y - -# RUN echo "Install packages" && \ -# apt-get update && \ -# DEBIAN_FRONTEND=noninteractive \ -# # apt-get upgrade installs foo AND upgrades packages in one step -# apt-get upgrade --yes --no-install-recommends \ -# # FROM buildpack-deps:buster-curl -# ca-certificates \ -# curl \ -# dirmngr \ -# gnupg \ -# netbase \ -# wget \ -# # FROM buildpack-deps:buster-scm -# git \ -# # mercurial \ -# # openssh-client \ -# # subversion \ -# procps \ -# # FROM buildpack-deps:buster -# autoconf \ -# automake \ -# bzip2 \ -# default-libmysqlclient-dev \ -# dpkg-dev \ -# file \ -# g++ \ -# gcc \ -# imagemagick \ -# libbz2-dev \ -# libc6-dev \ -# libcurl4-openssl-dev \ -# libdb-dev \ -# libevent-dev \ -# libffi-dev \ -# libgdbm-dev \ -# libglib2.0-dev \ -# libgmp-dev \ -# libjpeg-dev \ -# libkrb5-dev \ -# liblzma-dev \ -# libmagickcore-dev \ -# libmagickwand-dev \ -# libmaxminddb-dev \ -# libncurses5-dev \ -# libncursesw5-dev \ -# libpng-dev \ -# libpq-dev \ -# libreadline-dev \ -# libsqlite3-dev \ -# libssl-dev \ -# libtool \ -# libwebp-dev \ -# libxml2-dev \ -# libxslt-dev \ -# libyaml-dev \ -# make \ -# patch \ -# unzip \ -# xz-utils \ -# zlib1g-dev \ -# ######################## -# # Gatsby added packages -# # Should leave/include packages that make compiling node C/C++ extensions possible/easier/faster -# # A bunch of these packages are probably unnecessary since we no longer compile libvips here -# build-essential \ -# cdbs \ -# debhelper \ -# dh-autoreconf \ -# fftw3-dev \ -# flex \ -# bison \ -# gettext \ -# gtk-doc-tools \ -# libcfitsio-dev \ -# libexif-gtk-dev \ -# libfontconfig1-dev \ -# libfreetype6-dev \ -# libgif-dev \ -# libgirepository1.0-dev \ -# libgsf-1-dev \ -# libice-dev \ -# liblcms2-dev \ -# libmatio-dev \ -# libopenexr-dev \ -# libopenslide-dev \ -# liborc-0.4-dev \ -# libpango1.0-dev \ -# libpoppler-glib-dev \ -# librsvg2-dev \ -# libtiff-dev \ -# libxml-parser-perl \ -# pkg-config \ -# git-lfs \ -# # These make installation of cwebp-bin faster by about 30s. gatsby-plugin-sharp -> imagemin-webp -> cwebp-bin -# libgl1-mesa-dev \ -# liblz4-tool \ -# # tooling for cloud team -# ack \ -# curl \ -# dnsutils \ -# htop \ -# less \ -# lsof \ -# ncdu \ -# tig \ -# vim \ -# && \ -# apt-get clean && \ -# rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -ENV YARN_VERSION 1.22.5 - -RUN echo "Install NVM" && \ - # clone nvm - git clone --quiet --depth=1 https://github.com/nvm-sh/nvm.git --branch=v0.35.0 /root/.nvm && \ - # Add here any versions we expect to be popular, and want to be cached - # "node" is the latest version - # Also install npm, yarn and lerna under that version of node - #nvm install 10 && nvm exec 10 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ - #nvm install 12 && nvm exec 12 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ - /root/.nvm/nvm.sh install 14 && nvm exec 14 npm install -g npm@^6.14 yarn@^1.22 lerna@${GATSBY_DEFAULT_LERNA_VERSION} && \ - /root/.nvm/nvm.sh alias default 14 && \ - /root/.nvm/nvm.sh ls && \ - /usr/local/bin/nvm ls - -RUN npm i -g gatsby-dev-cli yarn@^1.22 - +RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y +RUN npm i -g gatsby-dev-cli WORKDIR /usr/src/app CMD ["yarn", "start"] diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index ebb21344d9e4b..868725e610d6b 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,9 +5,9 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "start": "echo hello.js", + "start": "echo $PWD; echo -e '\n'; ls", "build-image": "docker build -t gatsby-memory .", - "run-container": "docker run --mount type=bind,source=\"$(pwd)/../..\",target=/usr/src/app gatsby-memory" + "run-container": "docker run --mount type=bind,source=\"$(pwd)\",target=/usr/src/app gatsby-memory" }, "repository": { "type": "git", From 007ae958ce0ad62d4f2ad6d2565986b7c043d4aa Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 20 Jan 2022 15:42:45 -0500 Subject: [PATCH 10/42] Add yarn commands for docker --- benchmarks/memory/Dockerfile | 12 +++++++++++- benchmarks/memory/gatsby-node.js | 12 ++++++------ benchmarks/memory/package.json | 8 +++++--- benchmarks/memory/scripts/docker-connect | 9 +++++++++ benchmarks/memory/scripts/docker-get-id | 8 ++++++++ benchmarks/memory/scripts/docker-start | 17 +++++++++++++++++ benchmarks/memory/scripts/docker-stop | 9 +++++++++ benchmarks/memory/scripts/run-docker.js | 0 8 files changed, 65 insertions(+), 10 deletions(-) create mode 100755 benchmarks/memory/scripts/docker-connect create mode 100755 benchmarks/memory/scripts/docker-get-id create mode 100755 benchmarks/memory/scripts/docker-start create mode 100755 benchmarks/memory/scripts/docker-stop delete mode 100644 benchmarks/memory/scripts/run-docker.js diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 595e20fdfb10d..91ff1cb53a400 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -3,5 +3,15 @@ ENV NODE_ENV=production RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y RUN npm i -g gatsby-dev-cli WORKDIR /usr/src/app +RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc -CMD ["yarn", "start"] + +# TODO figure out port forwarding +EXPOSE 9229 + + +# set up gatsby-dev +RUN gatsby-dev --set-path-to-repo /usr/src/gatsby + +# keep the process running +ENTRYPOINT ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index dd1634f503323..8e1a737d19d72 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -1,6 +1,6 @@ -const { - takeHeapSnapshot, -} = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) +// const { +// takeHeapSnapshot, +// } = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) // exports.createSchemaCustomization = ({ actions }) => { // actions.createTypes(` @@ -28,7 +28,7 @@ const { const NUM_NODES = 200 exports.sourceNodes = async ({ actions }) => { - await takeHeapSnapshot(`sourceNodes-1`) + // await takeHeapSnapshot(`sourceNodes-1`) for (let i = 0; i < NUM_NODES; i++) { const largeSizeObj = {} @@ -59,7 +59,7 @@ exports.sourceNodes = async ({ actions }) => { await new Promise(resolve => setTimeout(resolve, 100)) - await takeHeapSnapshot(`sourceNodes-2`) + // await takeHeapSnapshot(`sourceNodes-2`) } // exports.onCreateNode = ({ node, actions, getNode }) => { @@ -84,5 +84,5 @@ exports.createPages = async ({ getNode, action, graphql }) => { const node = getNode(`memory-1`) // console.log({ node }) // console.info(`just using node`, node.id) - await takeHeapSnapshot(`create-pages`) + // await takeHeapSnapshot(`create-pages`) } diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 868725e610d6b..53023d2d641a5 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,9 +5,11 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "start": "echo $PWD; echo -e '\n'; ls", - "build-image": "docker build -t gatsby-memory .", - "run-container": "docker run --mount type=bind,source=\"$(pwd)\",target=/usr/src/app gatsby-memory" + "start": "echo $PWD; ls ..", + "docker:build": "docker build -t gatsby-memory .", + "docker:start": "./scripts/docker-start", + "docker:connect": "./scripts/docker-connect", + "docker:stop": "./scripts/docker-stop" }, "repository": { "type": "git", diff --git a/benchmarks/memory/scripts/docker-connect b/benchmarks/memory/scripts/docker-connect new file mode 100755 index 0000000000000..af6582a97d6f8 --- /dev/null +++ b/benchmarks/memory/scripts/docker-connect @@ -0,0 +1,9 @@ +DOCKER_ID=$(./scripts/docker-get-id) + +if [ -z "$DOCKER_ID" ]; then + echo "\nNo gatsby-memory is running. Start one with \`yarn docker:start\`.\n" + return 1 +fi + +echo "Connecting to container $DOCKER_ID...\n" +docker exec -it $DOCKER_ID bash \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-get-id b/benchmarks/memory/scripts/docker-get-id new file mode 100755 index 0000000000000..064e21e32607c --- /dev/null +++ b/benchmarks/memory/scripts/docker-get-id @@ -0,0 +1,8 @@ +DOCKER_ID=$(\ + docker ps --format '{{.Image}}:{{.ID}}' | \ + grep "gatsby-memory" | \ + head -n 1 | \ + sed 's/gatsby\-memory://'\ +) + +echo $DOCKER_ID \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start new file mode 100755 index 0000000000000..6528dd2a8f7d3 --- /dev/null +++ b/benchmarks/memory/scripts/docker-start @@ -0,0 +1,17 @@ +DOCKER_ID=$(./scripts/docker-get-id) +if [ -n "$DOCKER_ID" ]; then + echo "\nA gatsby-memory container is already running with id $DOCKER_ID." + echo "Please use that container, or run \`yarn docker:stop\` to stop it.\n" + return 1 +fi + +# TODO ports still not working here... +DOCKER_ID=$(\ + docker run -td \ + --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ + --mount type=bind,source="$(pwd)",target=/usr/src/app \ + gatsby-memory \ + | head -c 12 \ +) + +echo "\nStarted container id ${DOCKER_ID}! Run \`yarn docker:connect\` to connect to the container.\n" \ No newline at end of file diff --git a/benchmarks/memory/scripts/docker-stop b/benchmarks/memory/scripts/docker-stop new file mode 100755 index 0000000000000..95dbec9e55704 --- /dev/null +++ b/benchmarks/memory/scripts/docker-stop @@ -0,0 +1,9 @@ +DOCKER_ID=$(./scripts/docker-get-id) + +if [ -z "$DOCKER_ID" ]; then + echo "\nNo gatsby-memory is running.\n" + return 1 +fi + +DOCKER_ID=$(docker kill $DOCKER_ID) +echo "\nStopped container $DOCKER_ID.\n" \ No newline at end of file diff --git a/benchmarks/memory/scripts/run-docker.js b/benchmarks/memory/scripts/run-docker.js deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 48fa848de0561e9afc0995a98047f7b0fadb0bfe Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 08:47:11 -0500 Subject: [PATCH 11/42] Set up yarn commands for serve --- benchmarks/memory/Dockerfile | 2 +- benchmarks/memory/package.json | 4 +++- benchmarks/memory/scripts/docker-start | 1 + benchmarks/memory/scripts/enforce-docker | 13 +++++++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100755 benchmarks/memory/scripts/enforce-docker diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 91ff1cb53a400..7277d15c0ee4e 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -7,7 +7,7 @@ RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /us # TODO figure out port forwarding -EXPOSE 9229 +# EXPOSE 9229 # set up gatsby-dev diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 53023d2d641a5..55e4d8fc65179 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,10 +5,12 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "start": "echo $PWD; ls ..", + "gatsby:build": "./scripts/enforce-docker yarn gatsby build", + "gatsby:serve": "./scripts/enforce-docker yarn gatsby serve -H 0.0.0.0 -p 9229", "docker:build": "docker build -t gatsby-memory .", "docker:start": "./scripts/docker-start", "docker:connect": "./scripts/docker-connect", + "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect", "docker:stop": "./scripts/docker-stop" }, "repository": { diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start index 6528dd2a8f7d3..826ce6345d247 100755 --- a/benchmarks/memory/scripts/docker-start +++ b/benchmarks/memory/scripts/docker-start @@ -8,6 +8,7 @@ fi # TODO ports still not working here... DOCKER_ID=$(\ docker run -td \ + -p 127.0.0.1:9229:9229 \ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ --mount type=bind,source="$(pwd)",target=/usr/src/app \ gatsby-memory \ diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker new file mode 100755 index 0000000000000..9f5bf467a12df --- /dev/null +++ b/benchmarks/memory/scripts/enforce-docker @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ ! -f /.dockerenv ]; then + DOCKER_ID=$(./scripts/docker-get-id) + COMMAND="start-and-connect" + if [ -n "$DOCKER_ID" ]; then + COMMAND="connect" + fi + echo -e "\nThis must be run inside the docker container. Please run \`yarn docker:${COMMAND}\` and try again.\n" + exit 1 +fi + +eval ${@:2} \ No newline at end of file From e3f83ea9d9de0a7bce2611b52c8f69e8e02ab0b6 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Fri, 21 Jan 2022 14:19:49 +0100 Subject: [PATCH 12/42] set initial mem limit, create lot of pages --- benchmarks/memory/Dockerfile | 2 +- benchmarks/memory/gatsby-node.js | 132 +++++++++++------- benchmarks/memory/scripts/docker-start | 5 + .../src/{pages => templates}/double_eq.js | 0 .../src/{pages => templates}/eq_field.js | 5 +- .../memory/src/{pages => templates}/eq_id.js | 5 +- 6 files changed, 94 insertions(+), 55 deletions(-) rename benchmarks/memory/src/{pages => templates}/double_eq.js (100%) rename benchmarks/memory/src/{pages => templates}/eq_field.js (73%) rename benchmarks/memory/src/{pages => templates}/eq_id.js (74%) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 7277d15c0ee4e..44e2a3536be45 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -1,7 +1,7 @@ FROM node:14-buster ENV NODE_ENV=production RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y -RUN npm i -g gatsby-dev-cli +RUN npm i -g gatsby-cli gatsby-dev-cli WORKDIR /usr/src/app RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index 8e1a737d19d72..5bf3fa5cc05e3 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -1,35 +1,8 @@ -// const { -// takeHeapSnapshot, -// } = require(`./node_modules/gatsby/dist/utils/debug-memory.js`) +const { cpuCoreCount } = require(`gatsby-core-utils`) -// exports.createSchemaCustomization = ({ actions }) => { -// actions.createTypes(` -// type Test implements Node @dontInfer { -// id: ID! -// nodeNum: Int! -// nodeNumStr: String! -// pageNum: Int! -// pageNumStr: String! -// fooBar: String! -// fooBar2: String! -// fooBarArray: [TestFooBarArray!] -// text: String! -// random: Int! -// randomPage: Int! -// } -// type TestFooBarArray { -// fooBar: String! -// } -// type SitePage implements Node @dontInfer { -// id: ID! -// } -// `) -// } -const NUM_NODES = 200 +const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10) exports.sourceNodes = async ({ actions }) => { - // await takeHeapSnapshot(`sourceNodes-1`) - for (let i = 0; i < NUM_NODES; i++) { const largeSizeObj = {} for (let j = 1; j <= 1024; j++) { @@ -58,31 +31,90 @@ exports.sourceNodes = async ({ actions }) => { } await new Promise(resolve => setTimeout(resolve, 100)) +} - // await takeHeapSnapshot(`sourceNodes-2`) +const printedMessages = new Set() +exports.createResolvers = ({ createResolvers }) => { + createResolvers({ + Query: { + workerInfo: { + type: `String`, + args: { + label: `String!`, + }, + resolve: (_, args) => { + const msg = `${args.label} on ${ + process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + }` + if (!printedMessages.has(msg)) { + printedMessages.add(msg) + console.log(msg) + } + return msg + }, + }, + }, + }) } -// exports.onCreateNode = ({ node, actions, getNode }) => { -// if (node.internal.type === `TestChild`) { -// const grandpa = getNode(node.parent) -// console.log({ grandpa }) +const WORKER_BATCH_SIZE = 50 +exports.createPages = async ({ actions, graphql }) => { + const numWorkers = Math.max(1, cpuCoreCount() - 1) + + // we do want ALL available workers to execute each query type + const minNumOfPagesToSaturateAllWorkers = WORKER_BATCH_SIZE * numWorkers + + const { data } = await graphql(` + { + allTest { + nodes { + id + idClone + } + } + } + `) + + // we might need to "duplicate" pages if node count is less than number of needed pages + const repeatCount = Math.min( + 1, + Math.ceil(minNumOfPagesToSaturateAllWorkers / data.allTest.nodes.length) + ) -// actions.createNode({ -// id: `${node.id} << test child2`, -// parent: node.id, -// internal: { -// type: `TestGrandChild`, -// contentDigest: `wa`, -// }, -// }) -// } -// } + function createEnoughToSaturate(cb) { + let counter = 0 + for (let i = 0; i < repeatCount; i++) { + for (const node of data.allTest.nodes) { + const { template, context } = cb(node) -exports.createPages = async ({ getNode, action, graphql }) => { - debugger + actions.createPage({ + path: `/${template}/${counter++}`, + component: require.resolve(`./src/templates/${template}`), + context, + }) + } + } + } - const node = getNode(`memory-1`) - // console.log({ node }) - // console.info(`just using node`, node.id) - // await takeHeapSnapshot(`create-pages`) + // fast path (eq: { id: x }) + createEnoughToSaturate(node => { + return { + template: `eq_id`, + context: { + id: node.id, + }, + } + }) + + // (eq: { idClone: x }) + createEnoughToSaturate(node => { + return { + template: `eq_field`, + context: { + id: node.id, + }, + } + }) } diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start index 826ce6345d247..6440939b76309 100755 --- a/benchmarks/memory/scripts/docker-start +++ b/benchmarks/memory/scripts/docker-start @@ -11,6 +11,11 @@ DOCKER_ID=$(\ -p 127.0.0.1:9229:9229 \ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ --mount type=bind,source="$(pwd)",target=/usr/src/app \ + --publish 9229:9229 \ + --publish 8000:8000 \ + --publish 9000:9000 \ + --memory="2g" \ + --memory-swap="2g" \ gatsby-memory \ | head -c 12 \ ) diff --git a/benchmarks/memory/src/pages/double_eq.js b/benchmarks/memory/src/templates/double_eq.js similarity index 100% rename from benchmarks/memory/src/pages/double_eq.js rename to benchmarks/memory/src/templates/double_eq.js diff --git a/benchmarks/memory/src/pages/eq_field.js b/benchmarks/memory/src/templates/eq_field.js similarity index 73% rename from benchmarks/memory/src/pages/eq_field.js rename to benchmarks/memory/src/templates/eq_field.js index 5d85bbc4c0d03..c881ada4c05f3 100644 --- a/benchmarks/memory/src/pages/eq_field.js +++ b/benchmarks/memory/src/templates/eq_field.js @@ -10,10 +10,11 @@ export default function Home({ data }) { } export const q = graphql` - { - test(idClone: { eq: "memory-2" }) { + query ($id: String!) { + test(idClone: { eq: $id }) { id fooBar } + workerInfo(label: "eq-field") } ` diff --git a/benchmarks/memory/src/pages/eq_id.js b/benchmarks/memory/src/templates/eq_id.js similarity index 74% rename from benchmarks/memory/src/pages/eq_id.js rename to benchmarks/memory/src/templates/eq_id.js index cde339ed799ab..3bca139fc3c26 100644 --- a/benchmarks/memory/src/pages/eq_id.js +++ b/benchmarks/memory/src/templates/eq_id.js @@ -10,10 +10,11 @@ export default function Home({ data }) { } export const q = graphql` - { - test(id: { eq: "memory-2" }) { + query ($id: String!) { + test(id: { eq: $id }) { id fooBar } + workerInfo(label: "eq-id") } ` From c5f812f34628296db25606484796a0b3ed26e37c Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 10:26:01 -0500 Subject: [PATCH 13/42] Get debug commands working in docker --- benchmarks/memory/Dockerfile | 5 ----- benchmarks/memory/package.json | 7 +++++-- benchmarks/memory/scripts/docker-start | 3 --- benchmarks/memory/scripts/enforce-docker | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 44e2a3536be45..2ed56572f9780 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -5,11 +5,6 @@ RUN npm i -g gatsby-cli gatsby-dev-cli WORKDIR /usr/src/app RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n - /usr/src/gatsby : Your local gatsby repo\\n - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc - -# TODO figure out port forwarding -# EXPOSE 9229 - - # set up gatsby-dev RUN gatsby-dev --set-path-to-repo /usr/src/gatsby diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 55e4d8fc65179..b29a2a04ad36f 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -5,8 +5,11 @@ "description": "Test site stress testing memory usage", "license": "MIT", "scripts": { - "gatsby:build": "./scripts/enforce-docker yarn gatsby build", - "gatsby:serve": "./scripts/enforce-docker yarn gatsby serve -H 0.0.0.0 -p 9229", + "gatsby:build": "yarn gatsby build", + "gatsby:serve": "yarn gatsby serve -H 0.0.0.0 -p 9000", + "gatsby:develop": "NODE_ENV=development yarn gatsby develop -H 0.0.0.0 -p 9000", + "gatsby:build:debug": "node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby build", + "gatsby:develop:debug": "NODE_ENV=development node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby develop -H 0.0.0.0 -p 9000", "docker:build": "docker build -t gatsby-memory .", "docker:start": "./scripts/docker-start", "docker:connect": "./scripts/docker-connect", diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start index 6440939b76309..235d3526b4d9b 100755 --- a/benchmarks/memory/scripts/docker-start +++ b/benchmarks/memory/scripts/docker-start @@ -5,14 +5,11 @@ if [ -n "$DOCKER_ID" ]; then return 1 fi -# TODO ports still not working here... DOCKER_ID=$(\ docker run -td \ - -p 127.0.0.1:9229:9229 \ --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \ --mount type=bind,source="$(pwd)",target=/usr/src/app \ --publish 9229:9229 \ - --publish 8000:8000 \ --publish 9000:9000 \ --memory="2g" \ --memory-swap="2g" \ diff --git a/benchmarks/memory/scripts/enforce-docker b/benchmarks/memory/scripts/enforce-docker index 9f5bf467a12df..43ede33d240db 100755 --- a/benchmarks/memory/scripts/enforce-docker +++ b/benchmarks/memory/scripts/enforce-docker @@ -10,4 +10,4 @@ if [ ! -f /.dockerenv ]; then exit 1 fi -eval ${@:2} \ No newline at end of file +${@:1} \ No newline at end of file From bada30cd18061d48772518877ac6a292ba02494c Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 10:37:14 -0500 Subject: [PATCH 14/42] Add README --- benchmarks/memory/README.md | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 benchmarks/memory/README.md diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md new file mode 100644 index 0000000000000..07380f4104b85 --- /dev/null +++ b/benchmarks/memory/README.md @@ -0,0 +1,70 @@ +# Gatsby Memory Benchmark + +The goal of this benchmark is to test Gatsby's memory usage and look for potential optimizations. + +## The Docker Container + +The docker container used in these tests sets up a Debian instance with node 14 installed (as well as npm/yarn/etc). +It has ports 9000 (for hosting gatsby) and 9229 (for debugging) exposed. + +Within the container, two points to your local filesystem are mounted: + +- /usr/src/gatsby : Your local gatsby repo +- /usr/src/site : The memory benchmark gatsby site + +## Commands + +### Docker + +These commands are used for interfacing with docker and have built-in utilities for managing the docker container. + +#### yarn docker:build + +Builds the container used for testing. + +#### yarn docker:start + +Starts the container built by `yarn docker:build`. + +#### yarn docker:connect + +Connects to the container started by `yarn docker:start`. + +#### yarn docker:start-and-connect + +A shorthand for start + connect. + +#### yarn docker:stop + +Stop the container used for testing. + +### Gatsby + +These commands are used for interfacing with gatsby. + +#### yarn gatsby:build + +Simply an alias to `yarn gatsby build`. + +#### yarn gatsby:serve + +Starts `gatsby serve` on port 9000 and sets the host properly to work inside docker. + +#### yarn gatsby:develop + +Starts `gatsby develop` on port 9000 and sets the host properly to work inside docker. + +#### yarn gatsby:build:debug + +Runs `gatsby build` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. + +#### yarn gatsby:develop:debug + +Runs `gatsby develop` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. + +## Testing + +TODO + +- How to configure memory limits +- Where to look From 1d0c6624d087ac265026197d53187c58ac635a56 Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 21 Jan 2022 15:34:57 -0500 Subject: [PATCH 15/42] Add docker:stats to view status easier --- benchmarks/memory/package.json | 3 ++- benchmarks/memory/scripts/docker-stats | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100755 benchmarks/memory/scripts/docker-stats diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index b29a2a04ad36f..3a1fb33f19897 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -14,7 +14,8 @@ "docker:start": "./scripts/docker-start", "docker:connect": "./scripts/docker-connect", "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect", - "docker:stop": "./scripts/docker-stop" + "docker:stop": "./scripts/docker-stop", + "docker:stats": "./scripts/docker-stats" }, "repository": { "type": "git", diff --git a/benchmarks/memory/scripts/docker-stats b/benchmarks/memory/scripts/docker-stats new file mode 100755 index 0000000000000..9fb96494108b7 --- /dev/null +++ b/benchmarks/memory/scripts/docker-stats @@ -0,0 +1,18 @@ +#!/bin/bash + +DOCKER_ID=$(./scripts/docker-get-id) +if [ -z "$DOCKER_ID" ]; then + echo -e "\nNo gatsby-memory container was found. Run \`yarn docker:start\` to start one.\n" + exit 1 +fi + +FORMAT="Gatsby Memory Benchmark Container----CPU: {{.CPUPerc }}--Memory: {{.MemUsage}}--Network: {{.NetIO}}" +STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT") +clear + +while [ -n "$STATS" ]; do + echo $STATS | sed "s/--/\n/g" + DOCKER_ID=$(./scripts/docker-get-id) + STATS=$(docker stats $DOCKER_ID --no-stream --format="$FORMAT") + clear +done \ No newline at end of file From f42b723323241298ad9f797e6fbba818e1612f59 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Tue, 25 Jan 2022 22:55:23 +0100 Subject: [PATCH 16/42] drop unused parts of site --- benchmarks/memory/gatsby-config.js | 2 +- benchmarks/memory/plugins/wat/gatsby-node.js | 18 ------------------ benchmarks/memory/plugins/wat/index.js | 0 benchmarks/memory/plugins/wat/package.json | 1 - 4 files changed, 1 insertion(+), 20 deletions(-) delete mode 100644 benchmarks/memory/plugins/wat/gatsby-node.js delete mode 100644 benchmarks/memory/plugins/wat/index.js delete mode 100644 benchmarks/memory/plugins/wat/package.json diff --git a/benchmarks/memory/gatsby-config.js b/benchmarks/memory/gatsby-config.js index 10141cf5319df..5ae66ab282a51 100644 --- a/benchmarks/memory/gatsby-config.js +++ b/benchmarks/memory/gatsby-config.js @@ -1,3 +1,3 @@ module.exports = { - plugins: [`wat`], + plugins: [], } diff --git a/benchmarks/memory/plugins/wat/gatsby-node.js b/benchmarks/memory/plugins/wat/gatsby-node.js deleted file mode 100644 index 744a389e4d0ea..0000000000000 --- a/benchmarks/memory/plugins/wat/gatsby-node.js +++ /dev/null @@ -1,18 +0,0 @@ -// exports.sourceNodes = () => { -// console.log(`wat`) -// } -// exports.onCreateNode = ({ node, actions, getNode }) => { -// if (node.internal.type === `Test`) { -// const fromLMDB = getNode(node.id) - -// console.log({ node, fromLMDB }) -// actions.createNode({ -// id: `${node.id} << test child`, -// parent: node.id, -// internal: { -// type: `TestChild`, -// contentDigest: `wa`, -// }, -// }) -// } -// } diff --git a/benchmarks/memory/plugins/wat/index.js b/benchmarks/memory/plugins/wat/index.js deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/benchmarks/memory/plugins/wat/package.json b/benchmarks/memory/plugins/wat/package.json deleted file mode 100644 index 0967ef424bce6..0000000000000 --- a/benchmarks/memory/plugins/wat/package.json +++ /dev/null @@ -1 +0,0 @@ -{} From ae35ac08d8a1562313d0ba59dd296a9706cb1b38 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Tue, 25 Jan 2022 23:01:49 +0100 Subject: [PATCH 17/42] show progress on node creation --- benchmarks/memory/gatsby-node.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index 5bf3fa5cc05e3..a0d5e6b4cc89c 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -2,7 +2,10 @@ const { cpuCoreCount } = require(`gatsby-core-utils`) const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10) -exports.sourceNodes = async ({ actions }) => { +exports.sourceNodes = async ({ actions, reporter }) => { + const activity = reporter.createProgress(`Creating test nodes`, NUM_NODES) + activity.start() + for (let i = 0; i < NUM_NODES; i++) { const largeSizeObj = {} for (let j = 1; j <= 1024; j++) { @@ -26,11 +29,16 @@ exports.sourceNodes = async ({ actions }) => { actions.createNode(node) if (i % 100 === 99) { + activity.tick(100) await new Promise(resolve => setImmediate(resolve)) } } + activity.tick(NUM_NODES % 100) + await new Promise(resolve => setTimeout(resolve, 100)) + + activity.end() } const printedMessages = new Set() From 72ca8bc076ccd1837767fd2a74b592a71288333f Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Tue, 25 Jan 2022 23:06:31 +0100 Subject: [PATCH 18/42] use GATSBY_PARALLEL_QUERY_CHUNK_SIZE if provided --- benchmarks/memory/gatsby-node.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index a0d5e6b4cc89c..0a0e0e2c07a0d 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -67,7 +67,9 @@ exports.createResolvers = ({ createResolvers }) => { }) } -const WORKER_BATCH_SIZE = 50 +const WORKER_BATCH_SIZE = + Number(process.env.GATSBY_PARALLEL_QUERY_CHUNK_SIZE) || 50 + exports.createPages = async ({ actions, graphql }) => { const numWorkers = Math.max(1, cpuCoreCount() - 1) @@ -102,6 +104,10 @@ exports.createPages = async ({ actions, graphql }) => { component: require.resolve(`./src/templates/${template}`), context, }) + + if (counter >= minNumOfPagesToSaturateAllWorkers) { + break + } } } } From 0c8c2acc4029ee9bcbae1c8f0fbac347c120629d Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Tue, 25 Jan 2022 23:16:21 +0100 Subject: [PATCH 19/42] don't use gatsby-dev inspired version of gatsby --- benchmarks/memory/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json index 3a1fb33f19897..2d63ab39c23be 100644 --- a/benchmarks/memory/package.json +++ b/benchmarks/memory/package.json @@ -25,7 +25,7 @@ "url": "https://github.com/gatsbyjs/gatsby/issues" }, "dependencies": { - "gatsby": "4.6.0-next.3-dev-1642528625779", + "gatsby": "^4", "react": "^17.0.2", "react-dom": "^17.0.2" } From bf3a1e525678084d63ea082d4dcb409e0799bf89 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Wed, 26 Jan 2022 00:17:34 +0100 Subject: [PATCH 20/42] mark nodes as dirty on each build --- benchmarks/memory/gatsby-node.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index 0a0e0e2c07a0d..11fdde4302e3f 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -3,6 +3,8 @@ const { cpuCoreCount } = require(`gatsby-core-utils`) const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10) exports.sourceNodes = async ({ actions, reporter }) => { + const contentDigest = Date.now().toString() // make each sourcing mark everything as dirty + const activity = reporter.createProgress(`Creating test nodes`, NUM_NODES) activity.start() @@ -21,7 +23,7 @@ exports.sourceNodes = async ({ actions, reporter }) => { largeSizeObj, largeSizeString: `x`.repeat(1024 * 1024), internal: { - contentDigest: `hash`, // we won't be changing nodes so this can be hardcoded + contentDigest, type: `Test`, }, } From e037a6c6f06da1f0da00d2b6e1e40d758a580986 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Wed, 26 Jan 2022 00:47:32 +0100 Subject: [PATCH 21/42] consistently use same amount of workers 3 (+1 main process) --- benchmarks/memory/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile index 2ed56572f9780..80f6e52c38966 100644 --- a/benchmarks/memory/Dockerfile +++ b/benchmarks/memory/Dockerfile @@ -1,5 +1,7 @@ FROM node:14-buster ENV NODE_ENV=production +ENV CI=1 +ENV GATSBY_CPU_COUNT=4 RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y RUN npm i -g gatsby-cli gatsby-dev-cli WORKDIR /usr/src/app From 60d0236f80dfd7e289f8e9a4ebc7867e63552051 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Wed, 26 Jan 2022 00:48:04 +0100 Subject: [PATCH 22/42] add TEMPLATES env var to control which queries to run --- benchmarks/memory/gatsby-node.js | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index 11fdde4302e3f..7157dcc361991 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -14,6 +14,7 @@ exports.sourceNodes = async ({ actions, reporter }) => { largeSizeObj[`key_${j}`] = `x`.repeat(1024) } + // each node is ~2MB const node = { id: `memory-${i}`, idClone: `memory-${i}`, @@ -72,6 +73,23 @@ exports.createResolvers = ({ createResolvers }) => { const WORKER_BATCH_SIZE = Number(process.env.GATSBY_PARALLEL_QUERY_CHUNK_SIZE) || 50 +let enabledTemplates = new Set() +exports.onPreBootstrap = () => { + const availableTemplates = new Set([ + `eq_id`, // this should skip node-model and fast filters completely and should be very cheap already + `eq_field`, // this needs fast filters for eq operator on non-id field + ]) + enabledTemplates = new Set( + process.env.TEMPLATES + ? process.env.TEMPLATES.split(`,`).filter(template => + availableTemplates.has(template) + ) + : availableTemplates + ) + + console.info(`Enabled templates`, enabledTemplates) +} + exports.createPages = async ({ actions, graphql }) => { const numWorkers = Math.max(1, cpuCoreCount() - 1) @@ -95,11 +113,15 @@ exports.createPages = async ({ actions, graphql }) => { Math.ceil(minNumOfPagesToSaturateAllWorkers / data.allTest.nodes.length) ) - function createEnoughToSaturate(cb) { + function createEnoughToSaturate(template, cb) { + if (!enabledTemplates.has(template)) { + return + } + console.log(`Creating pages with template "${template}"`) let counter = 0 for (let i = 0; i < repeatCount; i++) { for (const node of data.allTest.nodes) { - const { template, context } = cb(node) + const { context } = cb(node) actions.createPage({ path: `/${template}/${counter++}`, @@ -115,9 +137,8 @@ exports.createPages = async ({ actions, graphql }) => { } // fast path (eq: { id: x }) - createEnoughToSaturate(node => { + createEnoughToSaturate(`eq_id`, node => { return { - template: `eq_id`, context: { id: node.id, }, @@ -125,9 +146,8 @@ exports.createPages = async ({ actions, graphql }) => { }) // (eq: { idClone: x }) - createEnoughToSaturate(node => { + createEnoughToSaturate(`eq_field`, node => { return { - template: `eq_field`, context: { id: node.id, }, From e5492b13df9887e4be41355a5379758ef2117a02 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Wed, 26 Jan 2022 01:06:30 +0100 Subject: [PATCH 23/42] some additions to README --- benchmarks/memory/README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md index 07380f4104b85..445abd8415bf4 100644 --- a/benchmarks/memory/README.md +++ b/benchmarks/memory/README.md @@ -62,6 +62,33 @@ Runs `gatsby build` with `inspect-brk` set to start the [debugging process](http Runs `gatsby develop` with `inspect-brk` set to start the [debugging process](https://www.gatsbyjs.com/docs/debugging-the-build-process/) on port 9229. +## Setup + +Currently we can reproduce builds crashing with out default settings + +- Docker container running with 2GB limit +- 300 nodes x ~2MB each = ~600MB of "just" nodes data in each process (number of nodes can be controlled with NUM_NODES env var) +- 3 workers + main process (GATSBY_CPU_COUNT set to 4 in docker image, but you can specify different value with env var) +- `eq_field` template using fast filters (single `eq` specifically) + +Goal is to make `eq_field` template to not cause crashes, then add next template (different operator) that cause crashes and repeat until all queries can be handled with set memory limits. + +### Workflow + +While `gatsby-dev` command is available inside docker, from my testing it seems like it doesn't pick up file changes when run there. Workflow that seems to work reliably: + +When starting working with this benchmark: + +- start `yarn watch` (possibly with `--scope`) in monorepo +- start `gatsby-dev` outside of docker in benchmark directory (just like with regular site) +- `yarn docker:connect` to get inside docker +- `npm rebuild` to rebuild binaries inside docker + +And repeat as many times as you want: + +- make changes to `gatsby` source code as you normally would +- run `yarn build` inside docker + ## Testing TODO From d470672ed956a0eab3b1686540af89ba00bac6a1 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Wed, 26 Jan 2022 01:06:56 +0100 Subject: [PATCH 24/42] drop more unused things --- benchmarks/memory/src/templates/double_eq.js | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 benchmarks/memory/src/templates/double_eq.js diff --git a/benchmarks/memory/src/templates/double_eq.js b/benchmarks/memory/src/templates/double_eq.js deleted file mode 100644 index 43777d9fb007f..0000000000000 --- a/benchmarks/memory/src/templates/double_eq.js +++ /dev/null @@ -1,19 +0,0 @@ -import React from "react" -import { graphql } from "gatsby" - -export default function Home({ data }) { - return ( -
-
{JSON.stringify(data, null, 2)}
-
- ) -} - -export const q = graphql` - { - test(number1: { gt: 4 }, number2: { lt: 10 }) { - id - fooBar - } - } -` From 56f3c938bc773125be6476de69742072809982b1 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Wed, 26 Jan 2022 16:57:33 +0100 Subject: [PATCH 25/42] tmp: use ids instead of full nodes for fast filters --- .../src/datastore/in-memory/indexing.ts | 163 +++++++++++------- .../datastore/in-memory/run-fast-filters.ts | 35 +++- 2 files changed, 127 insertions(+), 71 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 7d6acc42c46e5..4ece80f0d0b83 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -5,7 +5,7 @@ import { FilterValue, FilterValueNullable, } from "../common/query" -import { getDataStore } from "../" +import { getDataStore, getNode } from "../" // Only list supported ops here. "CacheableFilterOp" export type FilterOp = // TODO: merge with DbComparator ? @@ -21,6 +21,7 @@ export type FilterOp = // TODO: merge with DbComparator ? // Note: `undefined` is an encoding for a property that does not exist export type FilterCacheKey = string +export type GatsbyNodeID = string export interface IFilterCache { op: FilterOp // In this map `undefined` values represent nodes that did not have the path @@ -30,22 +31,22 @@ export interface IFilterCache { // This arrays may contain duplicates (!) because those only get filtered in the // last step. // TODO: We might decide to make sure these buckets _are_ deduped for eq perf - byValue: Map> + byValue: Map> meta: { // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort - nodesUnordered?: Array + nodesUnordered?: Array // Flat list of all nodes by requested types, ordered by counter (cached for empty filters) - orderedByCounter?: Array + orderedByCounter?: Array // Ordered list of all values (by `<`) found by this filter. No null / undefs valuesAsc?: Array // Flat list of nodes, ordered by valueAsc - nodesByValueAsc?: Array + nodesByValueAsc?: Array // Ranges of nodes per value, maps to the nodesByValueAsc array valueRangesAsc?: Map // Ordered list of all values (by `>`) found by this filter. No null / undefs valuesDesc?: Array // Flat list of nodes, ordered by valueDesc - nodesByValueDesc?: Array + nodesByValueDesc?: Array // Ranges of nodes per value, maps to the nodesByValueDesc array valueRangesDesc?: Map } @@ -59,7 +60,11 @@ export function postIndexingMetaSetup( // Loop through byValue and make sure the buckets are sorted by counter // Since we don't do insertion sort, we have to do it afterwards for (const bucket of filterCache.byValue) { - bucket[1].sort((a, b) => a.internal.counter - b.internal.counter) + bucket[1].sort( + (a, b) => + (getNode(a)?.internal?.counter ?? 0) - + (getNode(b)?.internal?.counter ?? 0) + ) } if (op === `$ne` || op === `$nin`) { @@ -79,15 +84,14 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void { // including nodes where the value is null. // A $nin does the same as an $ne except it filters multiple values instead // of just one. - // For `$ne` we will take the list of all targeted nodes and eliminate the // bucket of nodes with a particular value, if it exists at all.. - const arr: Array = [] + const arr: Array = [] filterCache.meta.nodesUnordered = arr filterCache.byValue.forEach(v => { - v.forEach(node => { - arr.push(node) + v.forEach(nodeId => { + arr.push(nodeId) }) }) } @@ -101,15 +105,15 @@ function postIndexingMetaSetupLtLteGtGte( // internal.counter, asc. // This way non-eq ops can simply slice the array to get a range. - const entriesNullable: Array<[FilterValueNullable, Array]> = [ + const entriesNullable: Array<[FilterValueNullable, Array]> = [ ...filterCache.byValue.entries(), ] // These range checks never return `null` or `undefined` so filter those out // By filtering them out early, the sort should be faster. Could be ... - const entries: Array<[FilterValue, Array]> = + const entries: Array<[FilterValue, Array]> = entriesNullable.filter(([v]) => v != null) as Array< - [FilterValue, Array] + [FilterValue, Array] > // Sort all arrays by its value, asc. Ignore/allow potential type casting. @@ -133,10 +137,10 @@ function postIndexingMetaSetupLtLteGtGte( entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0)) } - const orderedNodes: Array = [] + const orderedNodes: Array = [] const orderedValues: Array = [] const offsets: Map = new Map() - entries.forEach(([v, bucket]: [FilterValue, Array]) => { + entries.forEach(([v, bucket]: [FilterValue, Array]) => { // Record the range containing all nodes with as filter value v // The last value of the range should be the offset of the next value // (So you should be able to do `nodes.slice(start, stop)` to get them) @@ -178,12 +182,19 @@ export const ensureIndexByQuery = ( nodeTypeNames: Array, filtersCache: FiltersCache ): void => { + const readableWorkerId = process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + + console.log( + `ensureIndexByQuery "${filterCacheKey}" start ${readableWorkerId}` + ) const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -213,6 +224,8 @@ export const ensureIndexByQuery = ( } postIndexingMetaSetup(filterCache, op) + + console.log(`ensureIndexByQuery "${filterCacheKey}" end ${readableWorkerId}`) } export function ensureEmptyFilterCache( @@ -226,11 +239,11 @@ export function ensureEmptyFilterCache( const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const orderedByCounter: Array = [] + const orderedByCounter: Array = [] filtersCache.set(filterCacheKey, { op: `$eq`, // Ignore. - byValue: new Map>(), + byValue: new Map>(), meta: { orderedByCounter, // This is what we want }, @@ -248,7 +261,7 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node) + orderedByCounter.push(node.id) }) } else { // Here we must first filter for the node type @@ -265,14 +278,16 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node) + orderedByCounter.push(node.id) } }) } // Since each node can only have one type, we shouldn't have to be concerned // about duplicates in this array. Just make sure they're sorted. - orderedByCounter.sort((a, b) => a.internal.counter - b.internal.counter) + orderedByCounter.sort( + (a, b) => getNode(a)!.internal.counter - getNode(b)!.internal.counter + ) } function addNodeToFilterCache( @@ -335,7 +350,7 @@ function markNodeForValue( arr = [] filterCache.byValue.set(value, arr) } - arr.push(node) + arr.push(node.id) } export const ensureIndexByElemMatch = ( @@ -353,7 +368,7 @@ export const ensureIndexByElemMatch = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -540,7 +555,7 @@ export const getNodesFromCacheByValue = ( filterValue: FilterValueNullable, filtersCache: FiltersCache, wasElemMatch -): Array | undefined => { +): Array | undefined => { const filterCache = filtersCache.get(filterCacheKey) if (!filterCache) { return undefined @@ -573,7 +588,7 @@ export const getNodesFromCacheByValue = ( } const filterValueArr: Array = filterValue - const set: Set = new Set() + const set: Set = new Set() // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works. // For every value in the needle array, find the bucket of nodes for @@ -583,7 +598,9 @@ export const getNodesFromCacheByValue = ( ) const arr = [...set] // this is bad for perf but will guarantee us a unique set :( - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // Note: it's very unlikely that the list of filter values is big so .includes should be fine here if (filterValueArr.includes(null)) { @@ -622,7 +639,9 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to re-sort - return [...set].sort((A, B) => A.internal.counter - B.internal.counter) + return [...set].sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) } if (op === `$ne`) { @@ -632,7 +651,9 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to resort here - return [...set].sort((A, B) => A.internal.counter - B.internal.counter) + return [...set].sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) } if (op === `$regex`) { @@ -649,7 +670,7 @@ export const getNodesFromCacheByValue = ( } const regex = filterValue - const arr: Array = [] + const arr: Array = [] filterCache.byValue.forEach((nodes, value) => { // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce. // Note: for legacy reasons partial paths should also be included for regex @@ -661,7 +682,9 @@ export const getNodesFromCacheByValue = ( // TODO: we _can_ cache this list as well. Might make sense if it turns out that $regex is mostly used with literals // TODO: it may make sense to first collect all buckets and then to .concat them, or merge sort them - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { @@ -706,7 +729,9 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -746,7 +771,9 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue < filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -764,7 +791,9 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -804,7 +833,9 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue <= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -822,7 +853,9 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -862,7 +895,9 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue > filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -880,7 +915,9 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -920,7 +957,9 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue >= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort((A, B) => A.internal.counter - B.internal.counter) + arr.sort( + (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter + ) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -935,7 +974,7 @@ export const getNodesFromCacheByValue = ( function removeBucketFromSet( filterValue: FilterValueNullable, filterCache: IFilterCache, - set: Set + set: Set ): void { if (filterValue === null) { // Edge case: $ne with `null` returns only the nodes that contain the full @@ -960,20 +999,20 @@ function removeBucketFromSet( * list that is also ordered by node.internal.counter */ export function intersectNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { let pointerA = 0 let pointerB = 0 // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items? - const result: Array = [] + const result: Array = [] const maxA = a.length const maxB = b.length let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list while (pointerA < maxA && pointerB < maxB) { - const nodeA = a[pointerA] - const nodeB = b[pointerB] + const nodeA = getNode(a[pointerA])! + const nodeB = getNode(b[pointerB])! const counterA = nodeA.internal.counter const counterB = nodeB.internal.counter @@ -992,7 +1031,7 @@ export function intersectNodesByCounter( // back to back, so even if both input arrays contained the same node // twice, this check would prevent the result from getting duplicate nodes if (lastAdded !== nodeA) { - result.push(nodeA) + result.push(nodeA.id) lastAdded = nodeA } pointerA++ @@ -1011,11 +1050,11 @@ export function intersectNodesByCounter( * list that is also ordered by node.internal.counter */ export function unionNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? - const arr: Array = [] + const arr: Array = [] let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list let pointerA = 0 @@ -1024,26 +1063,26 @@ export function unionNodesByCounter( const maxB = b.length while (pointerA < maxA && pointerB < maxB) { - const nodeA = a[pointerA] - const nodeB = b[pointerB] + const nodeA = getNode(a[pointerA])! + const nodeB = getNode(b[pointerB])! const counterA = nodeA.internal.counter const counterB = nodeB.internal.counter if (counterA < counterB) { if (lastAdded !== nodeA) { - arr.push(nodeA) + arr.push(nodeA.id) lastAdded = nodeA } pointerA++ } else if (counterA > counterB) { if (lastAdded !== nodeB) { - arr.push(nodeB) + arr.push(nodeB.id) lastAdded = nodeB } pointerB++ } else { if (lastAdded !== nodeA) { - arr.push(nodeA) + arr.push(nodeA.id) lastAdded = nodeA } pointerA++ @@ -1052,18 +1091,18 @@ export function unionNodesByCounter( } while (pointerA < maxA) { - const nodeA = a[pointerA] + const nodeA = getNode(a[pointerA])! if (lastAdded !== nodeA) { - arr.push(nodeA) + arr.push(nodeA.id) lastAdded = nodeA } pointerA++ } while (pointerB < maxB) { - const nodeB = b[pointerB] + const nodeB = getNode(b[pointerB])! if (lastAdded !== nodeB) { - arr.push(nodeB) + arr.push(nodeB.id) lastAdded = nodeB } pointerB++ @@ -1072,11 +1111,11 @@ export function unionNodesByCounter( return arr } -function expensiveDedupeInline(arr: Array): void { +function expensiveDedupeInline(arr: Array): void { // An elemMatch filter may cause duplicates to appear in a bucket. // Since the bucket is sorted those should now be back to back // Worst case this is a fast O(n) loop that does nothing. - let prev: IGatsbyNode | undefined = undefined + let prev: GatsbyNodeID | undefined = undefined // We copy-on-find because a splice is expensive and we can't use Sets diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 53eaeddce63d6..da2df4d09b54f 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -22,11 +22,16 @@ import { getNodesFromCacheByValue, intersectNodesByCounter, IFilterCache, + GatsbyNodeID, } from "./indexing" import { IGraphQLRunnerStats } from "../../query/types" import { IRunQueryArgs, IQueryResult } from "../types" import { GatsbyIterable } from "../common/iterable" +import { getNode } from "../" +function isGatsbyNode(node: IGatsbyNode | undefined): node is IGatsbyNode { + return !!node +} export interface IRunFilterArg extends IRunQueryArgs { filtersCache: FiltersCache } @@ -101,8 +106,8 @@ export function applyFastFilters( while (nodesPerValueArrs.length > 1) { // TS limitation: cannot guard against .pop(), so we must double cast - const a = nodesPerValueArrs.pop() as unknown as Array - const b = nodesPerValueArrs.pop() as unknown as Array + const a = nodesPerValueArrs.pop() as unknown as Array + const b = nodesPerValueArrs.pop() as unknown as Array nodesPerValueArrs.push(intersectNodesByCounter(a, b)) } @@ -113,7 +118,7 @@ export function applyFastFilters( return null } - return result + return result.map(getNode).filter(isGatsbyNode) } } @@ -124,8 +129,8 @@ function getBucketsForFilters( filters: Array, nodeTypeNames: Array, filtersCache: FiltersCache -): Array> | undefined { - const nodesPerValueArrs: Array> = [] +): Array> | undefined { + const nodesPerValueArrs: Array> = [] // Fail fast while trying to create and get the value-cache for each path const every = filters.every(filter => { @@ -170,7 +175,7 @@ function getBucketsForQueryFilter( filter: IDbQueryQuery, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { const { path: filterPath, @@ -187,6 +192,14 @@ function getBucketsForQueryFilter( ) } + const readableWorkerId = process.env.GATSBY_WORKER_ID + ? `worker #${process.env.GATSBY_WORKER_ID}` + : `main` + + console.log( + `getBucketsForQueryFilter "${filterCacheKey}" start ${readableWorkerId}` + ) + const nodesPerValue = getNodesFromCacheByValue( filterCacheKey, filterValue as FilterValueNullable, @@ -202,6 +215,10 @@ function getBucketsForQueryFilter( // mechanism does not create an array unless there's a IGatsbyNode for it nodesPerValueArrs.push(nodesPerValue) + console.log( + `getBucketsForQueryFilter "${filterCacheKey}" end ${readableWorkerId}` + ) + return true } @@ -213,7 +230,7 @@ function collectBucketForElemMatch( filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { // Get comparator and target value for this elemMatch let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init) @@ -339,9 +356,9 @@ function convertAndApplyFastFilters( // If there's a filter, there (now) must be an entry for this cache key const filterCache = filtersCache.get(filterCacheKey) as IFilterCache // If there is no filter then the ensureCache step will populate this: - const cache = filterCache.meta.orderedByCounter as Array + const cache = filterCache.meta.orderedByCounter as Array - return cache.slice(0) + return cache.slice(0).map(getNode).filter(isGatsbyNode) } const result = applyFastFilters(filters, nodeTypeNames, filtersCache) From 22d8ad0c5f7b84ef9804a47ed9478bd6aff1f536 Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 26 Jan 2022 15:01:22 -0500 Subject: [PATCH 26/42] Remove a ton of calls to getNode --- .../src/datastore/in-memory/indexing.ts | 166 ++++++++---------- .../datastore/in-memory/run-fast-filters.ts | 20 +-- 2 files changed, 85 insertions(+), 101 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 4ece80f0d0b83..4862c53f7c0d0 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -21,7 +21,17 @@ export type FilterOp = // TODO: merge with DbComparator ? // Note: `undefined` is an encoding for a property that does not exist export type FilterCacheKey = string -export type GatsbyNodeID = string +type GatsbyNodeID = string + +export type GatsbyNodeIdentifiers = { + id: GatsbyNodeID, + counter: number, +} + +const getIdentifierObjectFromNode = (node: IGatsbyNode) => { return { id: node.id, counter: node.internal.counter }} + +const sortByIds = (a: GatsbyNodeIdentifiers, b: GatsbyNodeIdentifiers) => a.counter - b.counter; + export interface IFilterCache { op: FilterOp // In this map `undefined` values represent nodes that did not have the path @@ -31,22 +41,22 @@ export interface IFilterCache { // This arrays may contain duplicates (!) because those only get filtered in the // last step. // TODO: We might decide to make sure these buckets _are_ deduped for eq perf - byValue: Map> + byValue: Map> meta: { // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort - nodesUnordered?: Array + nodesUnordered?: Array // Flat list of all nodes by requested types, ordered by counter (cached for empty filters) - orderedByCounter?: Array + orderedByCounter?: Array // Ordered list of all values (by `<`) found by this filter. No null / undefs valuesAsc?: Array // Flat list of nodes, ordered by valueAsc - nodesByValueAsc?: Array + nodesByValueAsc?: Array // Ranges of nodes per value, maps to the nodesByValueAsc array valueRangesAsc?: Map // Ordered list of all values (by `>`) found by this filter. No null / undefs valuesDesc?: Array // Flat list of nodes, ordered by valueDesc - nodesByValueDesc?: Array + nodesByValueDesc?: Array // Ranges of nodes per value, maps to the nodesByValueDesc array valueRangesDesc?: Map } @@ -60,11 +70,7 @@ export function postIndexingMetaSetup( // Loop through byValue and make sure the buckets are sorted by counter // Since we don't do insertion sort, we have to do it afterwards for (const bucket of filterCache.byValue) { - bucket[1].sort( - (a, b) => - (getNode(a)?.internal?.counter ?? 0) - - (getNode(b)?.internal?.counter ?? 0) - ) + bucket[1].sort(sortByIds) } if (op === `$ne` || op === `$nin`) { @@ -87,7 +93,7 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void { // For `$ne` we will take the list of all targeted nodes and eliminate the // bucket of nodes with a particular value, if it exists at all.. - const arr: Array = [] + const arr: Array = [] filterCache.meta.nodesUnordered = arr filterCache.byValue.forEach(v => { v.forEach(nodeId => { @@ -105,15 +111,15 @@ function postIndexingMetaSetupLtLteGtGte( // internal.counter, asc. // This way non-eq ops can simply slice the array to get a range. - const entriesNullable: Array<[FilterValueNullable, Array]> = [ + const entriesNullable: Array<[FilterValueNullable, Array]> = [ ...filterCache.byValue.entries(), ] // These range checks never return `null` or `undefined` so filter those out // By filtering them out early, the sort should be faster. Could be ... - const entries: Array<[FilterValue, Array]> = + const entries: Array<[FilterValue, Array]> = entriesNullable.filter(([v]) => v != null) as Array< - [FilterValue, Array] + [FilterValue, Array] > // Sort all arrays by its value, asc. Ignore/allow potential type casting. @@ -137,10 +143,10 @@ function postIndexingMetaSetupLtLteGtGte( entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0)) } - const orderedNodes: Array = [] + const orderedNodes: Array = [] const orderedValues: Array = [] const offsets: Map = new Map() - entries.forEach(([v, bucket]: [FilterValue, Array]) => { + entries.forEach(([v, bucket]: [FilterValue, Array]) => { // Record the range containing all nodes with as filter value v // The last value of the range should be the offset of the next value // (So you should be able to do `nodes.slice(start, stop)` to get them) @@ -194,7 +200,7 @@ export const ensureIndexByQuery = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -239,11 +245,11 @@ export function ensureEmptyFilterCache( const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const orderedByCounter: Array = [] + const orderedByCounter: Array = [] filtersCache.set(filterCacheKey, { op: `$eq`, // Ignore. - byValue: new Map>(), + byValue: new Map>(), meta: { orderedByCounter, // This is what we want }, @@ -261,7 +267,7 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node.id) + orderedByCounter.push(getIdentifierObjectFromNode(node)) }) } else { // Here we must first filter for the node type @@ -278,16 +284,14 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(node.id) + orderedByCounter.push(getIdentifierObjectFromNode(node)) } }) } // Since each node can only have one type, we shouldn't have to be concerned // about duplicates in this array. Just make sure they're sorted. - orderedByCounter.sort( - (a, b) => getNode(a)!.internal.counter - getNode(b)!.internal.counter - ) + orderedByCounter.sort(sortByIds) } function addNodeToFilterCache( @@ -350,7 +354,7 @@ function markNodeForValue( arr = [] filterCache.byValue.set(value, arr) } - arr.push(node.id) + arr.push(getIdentifierObjectFromNode(node)) } export const ensureIndexByElemMatch = ( @@ -368,7 +372,7 @@ export const ensureIndexByElemMatch = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -555,7 +559,7 @@ export const getNodesFromCacheByValue = ( filterValue: FilterValueNullable, filtersCache: FiltersCache, wasElemMatch -): Array | undefined => { +): Array | undefined => { const filterCache = filtersCache.get(filterCacheKey) if (!filterCache) { return undefined @@ -588,7 +592,7 @@ export const getNodesFromCacheByValue = ( } const filterValueArr: Array = filterValue - const set: Set = new Set() + const set: Set = new Set() // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works. // For every value in the needle array, find the bucket of nodes for @@ -598,9 +602,7 @@ export const getNodesFromCacheByValue = ( ) const arr = [...set] // this is bad for perf but will guarantee us a unique set :( - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // Note: it's very unlikely that the list of filter values is big so .includes should be fine here if (filterValueArr.includes(null)) { @@ -639,9 +641,7 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to re-sort - return [...set].sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + return [...set].sort(sortByIds) } if (op === `$ne`) { @@ -651,9 +651,7 @@ export const getNodesFromCacheByValue = ( // TODO: there's probably a more efficient algorithm to do set // subtraction in such a way that we don't have to resort here - return [...set].sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + return [...set].sort(sortByIds) } if (op === `$regex`) { @@ -670,7 +668,7 @@ export const getNodesFromCacheByValue = ( } const regex = filterValue - const arr: Array = [] + const arr: Array = [] filterCache.byValue.forEach((nodes, value) => { // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce. // Note: for legacy reasons partial paths should also be included for regex @@ -682,9 +680,7 @@ export const getNodesFromCacheByValue = ( // TODO: we _can_ cache this list as well. Might make sense if it turns out that $regex is mostly used with literals // TODO: it may make sense to first collect all buckets and then to .concat them, or merge sort them - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { @@ -729,9 +725,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]) - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -771,9 +765,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue < filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -791,9 +783,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]) - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -833,9 +823,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue <= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until) - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -853,9 +841,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[0]).reverse() - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -895,9 +881,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue > filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -915,9 +899,7 @@ export const getNodesFromCacheByValue = ( const range = ranges!.get(filterValue) if (range) { const arr = nodes!.slice(0, range[1]).reverse() - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -957,9 +939,7 @@ export const getNodesFromCacheByValue = ( // So we have to consider weak comparison and may have to include the pivot const until = pivotValue >= filterValue ? inclPivot : exclPivot const arr = nodes!.slice(0, until).reverse() - arr.sort( - (A, B) => getNode(A)!.internal.counter - getNode(B)!.internal.counter - ) + arr.sort(sortByIds) // elemMatch can cause a node to appear in multiple buckets so we must dedupe if (wasElemMatch) { expensiveDedupeInline(arr) @@ -974,7 +954,7 @@ export const getNodesFromCacheByValue = ( function removeBucketFromSet( filterValue: FilterValueNullable, filterCache: IFilterCache, - set: Set + set: Set ): void { if (filterValue === null) { // Edge case: $ne with `null` returns only the nodes that contain the full @@ -999,22 +979,24 @@ function removeBucketFromSet( * list that is also ordered by node.internal.counter */ export function intersectNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { let pointerA = 0 let pointerB = 0 // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items? - const result: Array = [] + const result: Array = [] const maxA = a.length const maxB = b.length let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list + + // TODO some optimization could be done here to not call getNode while (pointerA < maxA && pointerB < maxB) { - const nodeA = getNode(a[pointerA])! - const nodeB = getNode(b[pointerB])! - const counterA = nodeA.internal.counter - const counterB = nodeB.internal.counter + const nodeA = getNode(a[pointerA].id) + const nodeB = getNode(b[pointerB].id) + const counterA = a[pointerA].counter + const counterB = b[pointerB].counter if (counterA < counterB) { pointerA++ @@ -1031,7 +1013,7 @@ export function intersectNodesByCounter( // back to back, so even if both input arrays contained the same node // twice, this check would prevent the result from getting duplicate nodes if (lastAdded !== nodeA) { - result.push(nodeA.id) + result.push(a[pointerA]) lastAdded = nodeA } pointerA++ @@ -1050,39 +1032,41 @@ export function intersectNodesByCounter( * list that is also ordered by node.internal.counter */ export function unionNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? - const arr: Array = [] + const arr: Array = [] let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list + // TODO some optimization could be done here to not call getNode + let pointerA = 0 let pointerB = 0 const maxA = a.length const maxB = b.length while (pointerA < maxA && pointerB < maxB) { - const nodeA = getNode(a[pointerA])! - const nodeB = getNode(b[pointerB])! + const nodeA = getNode(a[pointerA].id)! + const nodeB = getNode(b[pointerB].id)! const counterA = nodeA.internal.counter const counterB = nodeB.internal.counter if (counterA < counterB) { if (lastAdded !== nodeA) { - arr.push(nodeA.id) + arr.push(a[pointerA]) lastAdded = nodeA } pointerA++ } else if (counterA > counterB) { if (lastAdded !== nodeB) { - arr.push(nodeB.id) + arr.push(b[pointerB]) lastAdded = nodeB } pointerB++ } else { if (lastAdded !== nodeA) { - arr.push(nodeA.id) + arr.push(a[pointerA]) lastAdded = nodeA } pointerA++ @@ -1091,18 +1075,18 @@ export function unionNodesByCounter( } while (pointerA < maxA) { - const nodeA = getNode(a[pointerA])! + const nodeA = getNode(a[pointerA].id)! if (lastAdded !== nodeA) { - arr.push(nodeA.id) + arr.push(a[pointerA]) lastAdded = nodeA } pointerA++ } while (pointerB < maxB) { - const nodeB = getNode(b[pointerB])! + const nodeB = getNode(b[pointerB].id)! if (lastAdded !== nodeB) { - arr.push(nodeB.id) + arr.push(b[pointerB]) lastAdded = nodeB } pointerB++ @@ -1111,11 +1095,11 @@ export function unionNodesByCounter( return arr } -function expensiveDedupeInline(arr: Array): void { +function expensiveDedupeInline(arr: Array): void { // An elemMatch filter may cause duplicates to appear in a bucket. // Since the bucket is sorted those should now be back to back // Worst case this is a fast O(n) loop that does nothing. - let prev: GatsbyNodeID | undefined = undefined + let prev: GatsbyNodeIdentifiers | undefined = undefined // We copy-on-find because a splice is expensive and we can't use Sets diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index da2df4d09b54f..6e975a1a23114 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -22,7 +22,7 @@ import { getNodesFromCacheByValue, intersectNodesByCounter, IFilterCache, - GatsbyNodeID, + GatsbyNodeIdentifiers, } from "./indexing" import { IGraphQLRunnerStats } from "../../query/types" import { IRunQueryArgs, IQueryResult } from "../types" @@ -106,8 +106,8 @@ export function applyFastFilters( while (nodesPerValueArrs.length > 1) { // TS limitation: cannot guard against .pop(), so we must double cast - const a = nodesPerValueArrs.pop() as unknown as Array - const b = nodesPerValueArrs.pop() as unknown as Array + const a = nodesPerValueArrs.pop() as unknown as Array + const b = nodesPerValueArrs.pop() as unknown as Array nodesPerValueArrs.push(intersectNodesByCounter(a, b)) } @@ -118,7 +118,7 @@ export function applyFastFilters( return null } - return result.map(getNode).filter(isGatsbyNode) + return result.map(nodeIds => getNode(nodeIds.id)).filter(isGatsbyNode) } } @@ -129,8 +129,8 @@ function getBucketsForFilters( filters: Array, nodeTypeNames: Array, filtersCache: FiltersCache -): Array> | undefined { - const nodesPerValueArrs: Array> = [] +): Array> | undefined { + const nodesPerValueArrs: Array> = [] // Fail fast while trying to create and get the value-cache for each path const every = filters.every(filter => { @@ -175,7 +175,7 @@ function getBucketsForQueryFilter( filter: IDbQueryQuery, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { const { path: filterPath, @@ -230,7 +230,7 @@ function collectBucketForElemMatch( filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { // Get comparator and target value for this elemMatch let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init) @@ -356,9 +356,9 @@ function convertAndApplyFastFilters( // If there's a filter, there (now) must be an entry for this cache key const filterCache = filtersCache.get(filterCacheKey) as IFilterCache // If there is no filter then the ensureCache step will populate this: - const cache = filterCache.meta.orderedByCounter as Array + const cache = filterCache.meta.orderedByCounter as Array - return cache.slice(0).map(getNode).filter(isGatsbyNode) + return cache.slice(0).map(nodeIds => getNode(nodeIds.id)).filter(isGatsbyNode) } const result = applyFastFilters(filters, nodeTypeNames, filtersCache) From cd9c891699d89ab5980fe7286d7f37c983951447 Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 26 Jan 2022 15:03:11 -0500 Subject: [PATCH 27/42] Remove a ton of calls to getNode --- .../src/datastore/in-memory/indexing.ts | 101 ++++++++++-------- .../datastore/in-memory/run-fast-filters.ts | 24 +++-- 2 files changed, 70 insertions(+), 55 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 4862c53f7c0d0..4a995dadc853a 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -23,14 +23,21 @@ export type FilterOp = // TODO: merge with DbComparator ? export type FilterCacheKey = string type GatsbyNodeID = string -export type GatsbyNodeIdentifiers = { - id: GatsbyNodeID, - counter: number, +export interface IGatsbyNodeIdentifiers { + id: GatsbyNodeID + counter: number } -const getIdentifierObjectFromNode = (node: IGatsbyNode) => { return { id: node.id, counter: node.internal.counter }} +const getIdentifierObjectFromNode = ( + node: IGatsbyNode +): IGatsbyNodeIdentifiers => { + return { id: node.id, counter: node.internal.counter } +} -const sortByIds = (a: GatsbyNodeIdentifiers, b: GatsbyNodeIdentifiers) => a.counter - b.counter; +const sortByIds = ( + a: IGatsbyNodeIdentifiers, + b: IGatsbyNodeIdentifiers +): number => a.counter - b.counter export interface IFilterCache { op: FilterOp @@ -41,22 +48,22 @@ export interface IFilterCache { // This arrays may contain duplicates (!) because those only get filtered in the // last step. // TODO: We might decide to make sure these buckets _are_ deduped for eq perf - byValue: Map> + byValue: Map> meta: { // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort - nodesUnordered?: Array + nodesUnordered?: Array // Flat list of all nodes by requested types, ordered by counter (cached for empty filters) - orderedByCounter?: Array + orderedByCounter?: Array // Ordered list of all values (by `<`) found by this filter. No null / undefs valuesAsc?: Array // Flat list of nodes, ordered by valueAsc - nodesByValueAsc?: Array + nodesByValueAsc?: Array // Ranges of nodes per value, maps to the nodesByValueAsc array valueRangesAsc?: Map // Ordered list of all values (by `>`) found by this filter. No null / undefs valuesDesc?: Array // Flat list of nodes, ordered by valueDesc - nodesByValueDesc?: Array + nodesByValueDesc?: Array // Ranges of nodes per value, maps to the nodesByValueDesc array valueRangesDesc?: Map } @@ -93,7 +100,7 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void { // For `$ne` we will take the list of all targeted nodes and eliminate the // bucket of nodes with a particular value, if it exists at all.. - const arr: Array = [] + const arr: Array = [] filterCache.meta.nodesUnordered = arr filterCache.byValue.forEach(v => { v.forEach(nodeId => { @@ -111,15 +118,15 @@ function postIndexingMetaSetupLtLteGtGte( // internal.counter, asc. // This way non-eq ops can simply slice the array to get a range. - const entriesNullable: Array<[FilterValueNullable, Array]> = [ - ...filterCache.byValue.entries(), - ] + const entriesNullable: Array< + [FilterValueNullable, Array] + > = [...filterCache.byValue.entries()] // These range checks never return `null` or `undefined` so filter those out // By filtering them out early, the sort should be faster. Could be ... - const entries: Array<[FilterValue, Array]> = + const entries: Array<[FilterValue, Array]> = entriesNullable.filter(([v]) => v != null) as Array< - [FilterValue, Array] + [FilterValue, Array] > // Sort all arrays by its value, asc. Ignore/allow potential type casting. @@ -143,19 +150,21 @@ function postIndexingMetaSetupLtLteGtGte( entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0)) } - const orderedNodes: Array = [] + const orderedNodes: Array = [] const orderedValues: Array = [] const offsets: Map = new Map() - entries.forEach(([v, bucket]: [FilterValue, Array]) => { - // Record the range containing all nodes with as filter value v - // The last value of the range should be the offset of the next value - // (So you should be able to do `nodes.slice(start, stop)` to get them) - offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length]) - // We could do `arr.push(...bucket)` here but that's not safe with very - // large sets, so we use a regular loop - bucket.forEach(node => orderedNodes.push(node)) - orderedValues.push(v) - }) + entries.forEach( + ([v, bucket]: [FilterValue, Array]) => { + // Record the range containing all nodes with as filter value v + // The last value of the range should be the offset of the next value + // (So you should be able to do `nodes.slice(start, stop)` to get them) + offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length]) + // We could do `arr.push(...bucket)` here but that's not safe with very + // large sets, so we use a regular loop + bucket.forEach(node => orderedNodes.push(node)) + orderedValues.push(v) + } + ) if (op === `$lt` || op === `$lte`) { filterCache.meta.valuesAsc = orderedValues @@ -200,7 +209,7 @@ export const ensureIndexByQuery = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -245,11 +254,11 @@ export function ensureEmptyFilterCache( const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const orderedByCounter: Array = [] + const orderedByCounter: Array = [] filtersCache.set(filterCacheKey, { op: `$eq`, // Ignore. - byValue: new Map>(), + byValue: new Map>(), meta: { orderedByCounter, // This is what we want }, @@ -372,7 +381,7 @@ export const ensureIndexByElemMatch = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -559,7 +568,7 @@ export const getNodesFromCacheByValue = ( filterValue: FilterValueNullable, filtersCache: FiltersCache, wasElemMatch -): Array | undefined => { +): Array | undefined => { const filterCache = filtersCache.get(filterCacheKey) if (!filterCache) { return undefined @@ -592,7 +601,7 @@ export const getNodesFromCacheByValue = ( } const filterValueArr: Array = filterValue - const set: Set = new Set() + const set: Set = new Set() // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works. // For every value in the needle array, find the bucket of nodes for @@ -668,7 +677,7 @@ export const getNodesFromCacheByValue = ( } const regex = filterValue - const arr: Array = [] + const arr: Array = [] filterCache.byValue.forEach((nodes, value) => { // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce. // Note: for legacy reasons partial paths should also be included for regex @@ -954,7 +963,7 @@ export const getNodesFromCacheByValue = ( function removeBucketFromSet( filterValue: FilterValueNullable, filterCache: IFilterCache, - set: Set + set: Set ): void { if (filterValue === null) { // Edge case: $ne with `null` returns only the nodes that contain the full @@ -979,17 +988,17 @@ function removeBucketFromSet( * list that is also ordered by node.internal.counter */ export function intersectNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { let pointerA = 0 let pointerB = 0 // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items? - const result: Array = [] + const result: Array = [] const maxA = a.length const maxB = b.length let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list - + // TODO some optimization could be done here to not call getNode while (pointerA < maxA && pointerB < maxB) { @@ -1032,11 +1041,11 @@ export function intersectNodesByCounter( * list that is also ordered by node.internal.counter */ export function unionNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? - const arr: Array = [] + const arr: Array = [] let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list // TODO some optimization could be done here to not call getNode @@ -1095,11 +1104,11 @@ export function unionNodesByCounter( return arr } -function expensiveDedupeInline(arr: Array): void { +function expensiveDedupeInline(arr: Array): void { // An elemMatch filter may cause duplicates to appear in a bucket. // Since the bucket is sorted those should now be back to back // Worst case this is a fast O(n) loop that does nothing. - let prev: GatsbyNodeIdentifiers | undefined = undefined + let prev: IGatsbyNodeIdentifiers | undefined = undefined // We copy-on-find because a splice is expensive and we can't use Sets diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 6e975a1a23114..31ead5f9899cf 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -22,7 +22,7 @@ import { getNodesFromCacheByValue, intersectNodesByCounter, IFilterCache, - GatsbyNodeIdentifiers, + IGatsbyNodeIdentifiers, } from "./indexing" import { IGraphQLRunnerStats } from "../../query/types" import { IRunQueryArgs, IQueryResult } from "../types" @@ -106,8 +106,10 @@ export function applyFastFilters( while (nodesPerValueArrs.length > 1) { // TS limitation: cannot guard against .pop(), so we must double cast - const a = nodesPerValueArrs.pop() as unknown as Array - const b = nodesPerValueArrs.pop() as unknown as Array + const a = + nodesPerValueArrs.pop() as unknown as Array + const b = + nodesPerValueArrs.pop() as unknown as Array nodesPerValueArrs.push(intersectNodesByCounter(a, b)) } @@ -129,8 +131,8 @@ function getBucketsForFilters( filters: Array, nodeTypeNames: Array, filtersCache: FiltersCache -): Array> | undefined { - const nodesPerValueArrs: Array> = [] +): Array> | undefined { + const nodesPerValueArrs: Array> = [] // Fail fast while trying to create and get the value-cache for each path const every = filters.every(filter => { @@ -175,7 +177,7 @@ function getBucketsForQueryFilter( filter: IDbQueryQuery, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { const { path: filterPath, @@ -230,7 +232,7 @@ function collectBucketForElemMatch( filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array> ): boolean { // Get comparator and target value for this elemMatch let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init) @@ -356,9 +358,13 @@ function convertAndApplyFastFilters( // If there's a filter, there (now) must be an entry for this cache key const filterCache = filtersCache.get(filterCacheKey) as IFilterCache // If there is no filter then the ensureCache step will populate this: - const cache = filterCache.meta.orderedByCounter as Array + const cache = filterCache.meta + .orderedByCounter as Array - return cache.slice(0).map(nodeIds => getNode(nodeIds.id)).filter(isGatsbyNode) + return cache + .slice(0) + .map(nodeIds => getNode(nodeIds.id)) + .filter(isGatsbyNode) } const result = applyFastFilters(filters, nodeTypeNames, filtersCache) From ebf00f476820fd91b97f189f3f775b728a682986 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Thu, 27 Jan 2022 19:33:25 +0100 Subject: [PATCH 28/42] make IGatsbyNodeIdentifiers unique --- .../gatsby/src/datastore/in-memory/indexing.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 4a995dadc853a..5f9c072fbb42b 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -28,10 +28,25 @@ export interface IGatsbyNodeIdentifiers { counter: number } +const nodeIdToIdentifierMap = new Map< + GatsbyNodeID, + WeakRef +>() + const getIdentifierObjectFromNode = ( node: IGatsbyNode ): IGatsbyNodeIdentifiers => { - return { id: node.id, counter: node.internal.counter } + const cacheKey = `${node.id}_____${node.internal.counter}` + if (nodeIdToIdentifierMap.has(cacheKey)) { + const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref() + if (maybeStillExist) { + return maybeStillExist + } + } + + const identifier = { id: node.id, counter: node.internal.counter } + nodeIdToIdentifierMap.set(cacheKey, new WeakRef(identifier)) + return identifier } const sortByIds = ( From aa5d94341342c91ef871d9150389cb846e2a46af Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 27 Jan 2022 14:41:10 -0500 Subject: [PATCH 29/42] Move converting to actual ndoes outside of fast filters --- .../src/datastore/in-memory/run-fast-filters.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 31ead5f9899cf..f229a28b261b7 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -79,7 +79,7 @@ export function applyFastFilters( filters: Array, nodeTypeNames: Array, filtersCache: FiltersCache -): Array | null { +): Array | null { if (!filtersCache) { // If no filter cache is passed on, explicitly don't use one return null @@ -120,7 +120,7 @@ export function applyFastFilters( return null } - return result.map(nodeIds => getNode(nodeIds.id)).filter(isGatsbyNode) + return result } } @@ -306,7 +306,10 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { stats ) - const sortedResult = sortNodes(result, sort, resolvedFields, stats) + const resultingNodes = result + .map(nodeIds => getNode(nodeIds.id)) + .filter(isGatsbyNode) + const sortedResult = sortNodes(resultingNodes, sort, resolvedFields, stats) const totalCount = async (): Promise => sortedResult.length const entries = @@ -326,7 +329,7 @@ function convertAndApplyFastFilters( filtersCache: FiltersCache, resolvedFields: Record, stats: IGraphQLRunnerStats -): Array { +): Array { const filters = filterFields ? prefixResolvedFields( createDbQueriesFromObject(prepareQueryArgs(filterFields)), @@ -361,10 +364,7 @@ function convertAndApplyFastFilters( const cache = filterCache.meta .orderedByCounter as Array - return cache - .slice(0) - .map(nodeIds => getNode(nodeIds.id)) - .filter(isGatsbyNode) + return cache.slice(0) } const result = applyFastFilters(filters, nodeTypeNames, filtersCache) From 8f85c001db0318590bf612b92fd439cfe9c5b0d6 Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 28 Jan 2022 10:41:27 -0500 Subject: [PATCH 30/42] Get sorting working, not performant --- packages/gatsby/src/datastore/in-memory/run-fast-filters.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index f229a28b261b7..8ae3b22d3b211 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -438,10 +438,13 @@ function sortNodes( return field } }) + + // TODO this is not performant (probably)! + // fix this up const sortFns = sortFields.map( field => (v): ((any) => any) => - getValueAt(v, field) + getValueAt(getNode(v.id)!, field) ) const sortOrder = sort.order.map(order => typeof order === `boolean` ? order : order.toLowerCase() From 26fd2555609cd4c7b9add0322376f034fb697e80 Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 28 Jan 2022 10:47:58 -0500 Subject: [PATCH 31/42] Revert "Get sorting working, not performant" This reverts commit 8f85c001db0318590bf612b92fd439cfe9c5b0d6. --- packages/gatsby/src/datastore/in-memory/run-fast-filters.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 8ae3b22d3b211..f229a28b261b7 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -438,13 +438,10 @@ function sortNodes( return field } }) - - // TODO this is not performant (probably)! - // fix this up const sortFns = sortFields.map( field => (v): ((any) => any) => - getValueAt(getNode(v.id)!, field) + getValueAt(v, field) ) const sortOrder = sort.order.map(order => typeof order === `boolean` ? order : order.toLowerCase() From 5c49293d66b496459078f8aaa62ab120036db334 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Fri, 28 Jan 2022 11:35:53 +0100 Subject: [PATCH 32/42] adjust fast filters tests --- .../gatsby/src/datastore/__tests__/run-fast-filters.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js index 98b84a406a471..8b20b188a3e5f 100644 --- a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js +++ b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js @@ -407,7 +407,7 @@ describe(`applyFastFilters`, () => { expect(result.length).toEqual(2) result.map(node => { - expect(node.slog).toEqual(`def`) + expect(getNode(node.id).slog).toEqual(`def`) }) }) @@ -425,7 +425,7 @@ describe(`applyFastFilters`, () => { expect(result.length).toEqual(2) result.map(node => { - expect(node.deep.flat.search.chain).toEqual(300) + expect(getNode(node.id).deep.flat.search.chain).toEqual(300) }) }) @@ -444,8 +444,8 @@ describe(`applyFastFilters`, () => { // Count is irrelevant as long as it is non-zero and they all match filter expect(Array.isArray(results)).toBe(true) expect(results.length).toEqual(1) - expect(results[0].slog).toEqual(`def`) - expect(results[0].deep.flat.search.chain).toEqual(300) + expect(getNode(results[0].id).slog).toEqual(`def`) + expect(getNode(results[0].id).deep.flat.search.chain).toEqual(300) }) it(`supports elemMatch`, () => { From 2364c00f9015ae49540b9a54dd562dabaed24f10 Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 2 Feb 2022 15:18:32 -0500 Subject: [PATCH 33/42] Get sorting working without full nodes --- benchmarks/memory/gatsby-node.js | 2 +- .../src/datastore/in-memory/indexing.ts | 240 ++++++++++++------ .../datastore/in-memory/run-fast-filters.ts | 140 +++++----- 3 files changed, 245 insertions(+), 137 deletions(-) diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js index 7157dcc361991..3b936e20aa612 100644 --- a/benchmarks/memory/gatsby-node.js +++ b/benchmarks/memory/gatsby-node.js @@ -98,7 +98,7 @@ exports.createPages = async ({ actions, graphql }) => { const { data } = await graphql(` { - allTest { + allTest(sort: {fields: fooBar}) { nodes { id idClone diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 5f9c072fbb42b..7b21dbda530a9 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -4,8 +4,11 @@ import { IDbQueryElemMatch, FilterValue, FilterValueNullable, + objectToDottedField, } from "../common/query" import { getDataStore, getNode } from "../" +import _ from "lodash" +import { getValueAt } from "../../utils/get-value-at" // Only list supported ops here. "CacheableFilterOp" export type FilterOp = // TODO: merge with DbComparator ? @@ -23,36 +26,67 @@ export type FilterOp = // TODO: merge with DbComparator ? export type FilterCacheKey = string type GatsbyNodeID = string -export interface IGatsbyNodeIdentifiers { +export interface IGatsbyNodePartial { + isGatsbyNodePartial: boolean id: GatsbyNodeID - counter: number + internal: { + counter: number + } + indexFields: Set + [k: string]: any } const nodeIdToIdentifierMap = new Map< GatsbyNodeID, - WeakRef + WeakRef >() -const getIdentifierObjectFromNode = ( - node: IGatsbyNode -): IGatsbyNodeIdentifiers => { +export const getGatsbyNodePartial = ( + node: IGatsbyNode | IGatsbyNodePartial, + indexFields: Array, + resolvedFields: any +): IGatsbyNodePartial => { const cacheKey = `${node.id}_____${node.internal.counter}` if (nodeIdToIdentifierMap.has(cacheKey)) { const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref() - if (maybeStillExist) { + if ( + maybeStillExist && + _.isEqual(new Set(indexFields), maybeStillExist.indexFields) + ) { return maybeStillExist } } - const identifier = { id: node.id, counter: node.internal.counter } - nodeIdToIdentifierMap.set(cacheKey, new WeakRef(identifier)) - return identifier + const dottedFields = {} + + for (const dottedField of getSortFieldIdentifierKeys( + indexFields, + resolvedFields + )) { + if (dottedField in node) { + dottedFields[dottedField] = node[dottedField] + } else { + dottedFields[dottedField] = getValueAt( + node.isGatsbyNodePartial ? getNode(node.id)! : node, + dottedField + ) + } + } + + const partial = Object.assign(dottedFields, { + isGatsbyNodePartial: true, + id: node.id, + internal: { + counter: node.internal.counter, + }, + indexFields: new Set(indexFields), + }) + nodeIdToIdentifierMap.set(cacheKey, new WeakRef(partial)) + return partial } -const sortByIds = ( - a: IGatsbyNodeIdentifiers, - b: IGatsbyNodeIdentifiers -): number => a.counter - b.counter +const sortByIds = (a: IGatsbyNodePartial, b: IGatsbyNodePartial): number => + a.internal.counter - b.internal.counter export interface IFilterCache { op: FilterOp @@ -63,22 +97,22 @@ export interface IFilterCache { // This arrays may contain duplicates (!) because those only get filtered in the // last step. // TODO: We might decide to make sure these buckets _are_ deduped for eq perf - byValue: Map> + byValue: Map> meta: { // Used by ne/nin, which will create a Set from this array and then remove another set(s) and sort - nodesUnordered?: Array + nodesUnordered?: Array // Flat list of all nodes by requested types, ordered by counter (cached for empty filters) - orderedByCounter?: Array + orderedByCounter?: Array // Ordered list of all values (by `<`) found by this filter. No null / undefs valuesAsc?: Array // Flat list of nodes, ordered by valueAsc - nodesByValueAsc?: Array + nodesByValueAsc?: Array // Ranges of nodes per value, maps to the nodesByValueAsc array valueRangesAsc?: Map // Ordered list of all values (by `>`) found by this filter. No null / undefs valuesDesc?: Array // Flat list of nodes, ordered by valueDesc - nodesByValueDesc?: Array + nodesByValueDesc?: Array // Ranges of nodes per value, maps to the nodesByValueDesc array valueRangesDesc?: Map } @@ -115,7 +149,7 @@ function postIndexingMetaSetupNeNin(filterCache: IFilterCache): void { // For `$ne` we will take the list of all targeted nodes and eliminate the // bucket of nodes with a particular value, if it exists at all.. - const arr: Array = [] + const arr: Array = [] filterCache.meta.nodesUnordered = arr filterCache.byValue.forEach(v => { v.forEach(nodeId => { @@ -134,14 +168,14 @@ function postIndexingMetaSetupLtLteGtGte( // This way non-eq ops can simply slice the array to get a range. const entriesNullable: Array< - [FilterValueNullable, Array] + [FilterValueNullable, Array] > = [...filterCache.byValue.entries()] // These range checks never return `null` or `undefined` so filter those out // By filtering them out early, the sort should be faster. Could be ... - const entries: Array<[FilterValue, Array]> = + const entries: Array<[FilterValue, Array]> = entriesNullable.filter(([v]) => v != null) as Array< - [FilterValue, Array] + [FilterValue, Array] > // Sort all arrays by its value, asc. Ignore/allow potential type casting. @@ -165,21 +199,19 @@ function postIndexingMetaSetupLtLteGtGte( entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0)) } - const orderedNodes: Array = [] + const orderedNodes: Array = [] const orderedValues: Array = [] const offsets: Map = new Map() - entries.forEach( - ([v, bucket]: [FilterValue, Array]) => { - // Record the range containing all nodes with as filter value v - // The last value of the range should be the offset of the next value - // (So you should be able to do `nodes.slice(start, stop)` to get them) - offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length]) - // We could do `arr.push(...bucket)` here but that's not safe with very - // large sets, so we use a regular loop - bucket.forEach(node => orderedNodes.push(node)) - orderedValues.push(v) - } - ) + entries.forEach(([v, bucket]: [FilterValue, Array]) => { + // Record the range containing all nodes with as filter value v + // The last value of the range should be the offset of the next value + // (So you should be able to do `nodes.slice(start, stop)` to get them) + offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.length]) + // We could do `arr.push(...bucket)` here but that's not safe with very + // large sets, so we use a regular loop + bucket.forEach(node => orderedNodes.push(node)) + orderedValues.push(v) + }) if (op === `$lt` || op === `$lte`) { filterCache.meta.valuesAsc = orderedValues @@ -210,7 +242,9 @@ export const ensureIndexByQuery = ( filterCacheKey: FilterCacheKey, filterPath: Array, nodeTypeNames: Array, - filtersCache: FiltersCache + filtersCache: FiltersCache, + indexFields: Array, + resolvedFields: any ): void => { const readableWorkerId = process.env.GATSBY_WORKER_ID ? `worker #${process.env.GATSBY_WORKER_ID}` @@ -224,7 +258,7 @@ export const ensureIndexByQuery = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -237,7 +271,14 @@ export const ensureIndexByQuery = ( getDataStore() .iterateNodesByType(nodeTypeNames[0]) .forEach(node => { - addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache) + addNodeToFilterCache( + node, + filterPath, + filterCache, + resolvedNodesCache, + indexFields, + resolvedFields + ) }) } else { // Here we must first filter for the node type @@ -249,7 +290,14 @@ export const ensureIndexByQuery = ( return } - addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache) + addNodeToFilterCache( + node, + filterPath, + filterCache, + resolvedNodesCache, + indexFields, + resolvedFields + ) }) } @@ -261,7 +309,9 @@ export const ensureIndexByQuery = ( export function ensureEmptyFilterCache( filterCacheKey, nodeTypeNames: Array, - filtersCache: FiltersCache + filtersCache: FiltersCache, + indexFields: Array, + resolvedFields: any ): void { // This is called for queries without any filters // We want to cache the result since it's basically a list of nodes by type(s) @@ -269,11 +319,11 @@ export function ensureEmptyFilterCache( const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const orderedByCounter: Array = [] + const orderedByCounter: Array = [] filtersCache.set(filterCacheKey, { op: `$eq`, // Ignore. - byValue: new Map>(), + byValue: new Map>(), meta: { orderedByCounter, // This is what we want }, @@ -291,7 +341,9 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(getIdentifierObjectFromNode(node)) + orderedByCounter.push( + getGatsbyNodePartial(node, indexFields, resolvedFields) + ) }) } else { // Here we must first filter for the node type @@ -308,7 +360,9 @@ export function ensureEmptyFilterCache( node.__gatsby_resolved = resolved } } - orderedByCounter.push(getIdentifierObjectFromNode(node)) + orderedByCounter.push( + getGatsbyNodePartial(node, indexFields, resolvedFields) + ) } }) } @@ -323,6 +377,8 @@ function addNodeToFilterCache( chain: Array, filterCache: IFilterCache, resolvedNodesCache, + indexFields: Array, + resolvedFields: any, valueOffset: any = node ): void { // There can be a filter that targets `__gatsby_resolved` so fix that first @@ -353,7 +409,9 @@ function addNodeToFilterCache( // Add an entry for each element of the array. This would work for ops // like eq and ne, but not sure about range ops like lt,lte,gt,gte. - v.forEach(v => markNodeForValue(filterCache, node, v)) + v.forEach(v => + markNodeForValue(filterCache, node, v, indexFields, resolvedFields) + ) return } @@ -365,20 +423,26 @@ function addNodeToFilterCache( v = undefined } - markNodeForValue(filterCache, node, v) + markNodeForValue(filterCache, node, v, indexFields, resolvedFields) } function markNodeForValue( filterCache: IFilterCache, node: IGatsbyNode, - value: FilterValueNullable + value: FilterValueNullable, + indexFields: Array, + resolvedFields: any ): void { let arr = filterCache.byValue.get(value) if (!arr) { arr = [] filterCache.byValue.set(value, arr) } - arr.push(getIdentifierObjectFromNode(node)) + + const partial = getGatsbyNodePartial(node, indexFields, resolvedFields) + if (!arr.includes(partial)) { + arr.push(partial) + } } export const ensureIndexByElemMatch = ( @@ -386,7 +450,9 @@ export const ensureIndexByElemMatch = ( filterCacheKey: FilterCacheKey, filter: IDbQueryElemMatch, nodeTypeNames: Array, - filtersCache: FiltersCache + filtersCache: FiltersCache, + indexFields: Array, + resolvedFields: any ): void => { // Given an elemMatch filter, generate the cache that contains all nodes that // matches a given value for that sub-query @@ -396,7 +462,7 @@ export const ensureIndexByElemMatch = ( const filterCache: IFilterCache = { op, - byValue: new Map>(), + byValue: new Map>(), meta: {}, } as IFilterCache filtersCache.set(filterCacheKey, filterCache) @@ -410,7 +476,9 @@ export const ensureIndexByElemMatch = ( node, filter, filterCache, - resolvedNodesCache + resolvedNodesCache, + indexFields, + resolvedFields ) }) } else { @@ -427,7 +495,9 @@ export const ensureIndexByElemMatch = ( node, filter, filterCache, - resolvedNodesCache + resolvedNodesCache, + indexFields, + resolvedFields ) }) } @@ -440,7 +510,9 @@ function addNodeToBucketWithElemMatch( valueAtCurrentStep: any, // Arbitrary step on the path inside the node filter: IDbQueryElemMatch, filterCache: IFilterCache, - resolvedNodesCache + resolvedNodesCache, + indexFields: Array, + resolvedFields: any ): void { // There can be a filter that targets `__gatsby_resolved` so fix that first if (!node.__gatsby_resolved) { @@ -480,7 +552,9 @@ function addNodeToBucketWithElemMatch( elem, nestedQuery, filterCache, - resolvedNodesCache + resolvedNodesCache, + indexFields, + resolvedFields ) } else { // Now take same route as non-elemMatch filters would take @@ -489,6 +563,8 @@ function addNodeToBucketWithElemMatch( nestedQuery.path, filterCache, resolvedNodesCache, + indexFields, + resolvedFields, elem ) } @@ -583,12 +659,14 @@ export const getNodesFromCacheByValue = ( filterValue: FilterValueNullable, filtersCache: FiltersCache, wasElemMatch -): Array | undefined => { +): Array | undefined => { const filterCache = filtersCache.get(filterCacheKey) if (!filterCache) { return undefined } + // TODO we need to pass indexFields here and reload identifiers to be able to sort properly + const op = filterCache.op if (op === `$eq`) { @@ -616,7 +694,7 @@ export const getNodesFromCacheByValue = ( } const filterValueArr: Array = filterValue - const set: Set = new Set() + const set: Set = new Set() // TODO: we can also mergeSort for every step. this may perform worse because of how memory in js works. // For every value in the needle array, find the bucket of nodes for @@ -692,7 +770,7 @@ export const getNodesFromCacheByValue = ( } const regex = filterValue - const arr: Array = [] + const arr: Array = [] filterCache.byValue.forEach((nodes, value) => { // TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce. // Note: for legacy reasons partial paths should also be included for regex @@ -978,7 +1056,7 @@ export const getNodesFromCacheByValue = ( function removeBucketFromSet( filterValue: FilterValueNullable, filterCache: IFilterCache, - set: Set + set: Set ): void { if (filterValue === null) { // Edge case: $ne with `null` returns only the nodes that contain the full @@ -1003,13 +1081,13 @@ function removeBucketFromSet( * list that is also ordered by node.internal.counter */ export function intersectNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { let pointerA = 0 let pointerB = 0 // TODO: perf check: is it helpful to init the array to min(maxA,maxB) items? - const result: Array = [] + const result: Array = [] const maxA = a.length const maxB = b.length let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list @@ -1019,8 +1097,8 @@ export function intersectNodesByCounter( while (pointerA < maxA && pointerB < maxB) { const nodeA = getNode(a[pointerA].id) const nodeB = getNode(b[pointerB].id) - const counterA = a[pointerA].counter - const counterB = b[pointerB].counter + const counterA = a[pointerA].internal.counter + const counterB = b[pointerB].internal.counter if (counterA < counterB) { pointerA++ @@ -1056,11 +1134,11 @@ export function intersectNodesByCounter( * list that is also ordered by node.internal.counter */ export function unionNodesByCounter( - a: Array, - b: Array -): Array { + a: Array, + b: Array +): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? - const arr: Array = [] + const arr: Array = [] let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list // TODO some optimization could be done here to not call getNode @@ -1119,11 +1197,11 @@ export function unionNodesByCounter( return arr } -function expensiveDedupeInline(arr: Array): void { +function expensiveDedupeInline(arr: Array): void { // An elemMatch filter may cause duplicates to appear in a bucket. // Since the bucket is sorted those should now be back to back // Worst case this is a fast O(n) loop that does nothing. - let prev: IGatsbyNodeIdentifiers | undefined = undefined + let prev: IGatsbyNodePartial | undefined = undefined // We copy-on-find because a splice is expensive and we can't use Sets @@ -1141,3 +1219,23 @@ function expensiveDedupeInline(arr: Array): void { } arr.length = j } + +export function getSortFieldIdentifierKeys( + indexFields: Array, + resolvedFields: any +): Array { + const dottedFields = objectToDottedField(resolvedFields) + const dottedFieldKeys = Object.keys(dottedFields) + const fieldKeys = indexFields.map(field => { + if ( + dottedFields[field] || + dottedFieldKeys.some(key => field.startsWith(key)) + ) { + return `__gatsby_resolved.${field}` + } else { + return field + } + }) + + return fieldKeys +} diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index f229a28b261b7..a818fa600c6af 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -1,5 +1,4 @@ import { IGatsbyNode } from "../../redux/types" -import { getValueAt } from "../../utils/get-value-at" import _ from "lodash" import { DbQuery, @@ -7,7 +6,6 @@ import { IDbQueryElemMatch, IInputQuery, FilterValueNullable, - objectToDottedField, createDbQueriesFromObject, prefixResolvedFields, prepareQueryArgs, @@ -22,7 +20,9 @@ import { getNodesFromCacheByValue, intersectNodesByCounter, IFilterCache, - IGatsbyNodeIdentifiers, + IGatsbyNodePartial, + getSortFieldIdentifierKeys, + getGatsbyNodePartial, } from "./indexing" import { IGraphQLRunnerStats } from "../../query/types" import { IRunQueryArgs, IQueryResult } from "../types" @@ -36,6 +36,13 @@ export interface IRunFilterArg extends IRunQueryArgs { filtersCache: FiltersCache } +type ISortParameters = + | { + fields: Array + order: Array + } + | undefined + /** * Creates a key for one filterCache inside FiltersCache */ @@ -78,8 +85,10 @@ function createFilterCacheKey( export function applyFastFilters( filters: Array, nodeTypeNames: Array, - filtersCache: FiltersCache -): Array | null { + filtersCache: FiltersCache, + sortFields: Array, + resolvedFields: any +): Array | null { if (!filtersCache) { // If no filter cache is passed on, explicitly don't use one return null @@ -88,7 +97,9 @@ export function applyFastFilters( const nodesPerValueArrs = getBucketsForFilters( filters, nodeTypeNames, - filtersCache + filtersCache, + sortFields, + resolvedFields ) if (!nodesPerValueArrs) { @@ -106,10 +117,8 @@ export function applyFastFilters( while (nodesPerValueArrs.length > 1) { // TS limitation: cannot guard against .pop(), so we must double cast - const a = - nodesPerValueArrs.pop() as unknown as Array - const b = - nodesPerValueArrs.pop() as unknown as Array + const a = nodesPerValueArrs.pop() as unknown as Array + const b = nodesPerValueArrs.pop() as unknown as Array nodesPerValueArrs.push(intersectNodesByCounter(a, b)) } @@ -130,9 +139,11 @@ export function applyFastFilters( function getBucketsForFilters( filters: Array, nodeTypeNames: Array, - filtersCache: FiltersCache -): Array> | undefined { - const nodesPerValueArrs: Array> = [] + filtersCache: FiltersCache, + sortFields: Array, + resolvedFields: any +): Array> | undefined { + const nodesPerValueArrs: Array> = [] // Fail fast while trying to create and get the value-cache for each path const every = filters.every(filter => { @@ -145,7 +156,9 @@ function getBucketsForFilters( q, nodeTypeNames, filtersCache, - nodesPerValueArrs + nodesPerValueArrs, + sortFields, + resolvedFields ) } else { // (Let TS warn us if a new query type gets added) @@ -155,7 +168,9 @@ function getBucketsForFilters( q, nodeTypeNames, filtersCache, - nodesPerValueArrs + nodesPerValueArrs, + sortFields, + resolvedFields ) } }) @@ -177,7 +192,9 @@ function getBucketsForQueryFilter( filter: IDbQueryQuery, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array>, + sortFields: Array, + resolvedFields: any ): boolean { const { path: filterPath, @@ -185,23 +202,18 @@ function getBucketsForQueryFilter( } = filter if (!filtersCache.has(filterCacheKey)) { + // indexFields = sortFields ensureIndexByQuery( comparator as FilterOp, filterCacheKey, filterPath, nodeTypeNames, - filtersCache + filtersCache, + sortFields, + resolvedFields ) } - const readableWorkerId = process.env.GATSBY_WORKER_ID - ? `worker #${process.env.GATSBY_WORKER_ID}` - : `main` - - console.log( - `getBucketsForQueryFilter "${filterCacheKey}" start ${readableWorkerId}` - ) - const nodesPerValue = getNodesFromCacheByValue( filterCacheKey, filterValue as FilterValueNullable, @@ -217,10 +229,6 @@ function getBucketsForQueryFilter( // mechanism does not create an array unless there's a IGatsbyNode for it nodesPerValueArrs.push(nodesPerValue) - console.log( - `getBucketsForQueryFilter "${filterCacheKey}" end ${readableWorkerId}` - ) - return true } @@ -232,7 +240,9 @@ function collectBucketForElemMatch( filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache, - nodesPerValueArrs: Array> + nodesPerValueArrs: Array>, + sortFields: Array, + resolvedFields: any ): boolean { // Get comparator and target value for this elemMatch let comparator: FilterOp = `$eq` // (Must be overridden but TS requires init) @@ -249,14 +259,15 @@ function collectBucketForElemMatch( break } } - if (!filtersCache.has(filterCacheKey)) { ensureIndexByElemMatch( comparator, filterCacheKey, filter, nodeTypeNames, - filtersCache + filtersCache, + sortFields, + resolvedFields ) } @@ -303,13 +314,11 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { nodeTypeNames, filtersCache, resolvedFields, - stats + stats, + sort ) - const resultingNodes = result - .map(nodeIds => getNode(nodeIds.id)) - .filter(isGatsbyNode) - const sortedResult = sortNodes(resultingNodes, sort, resolvedFields, stats) + const sortedResult = sortNodes(result, sort, resolvedFields, stats) const totalCount = async (): Promise => sortedResult.length const entries = @@ -317,7 +326,10 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { ? sortedResult.slice(skip, limit ? skip + (limit ?? 0) : undefined) : sortedResult - return { entries: new GatsbyIterable(entries), totalCount } + const nodeObjects = entries + .map(nodeIds => getNode(nodeIds.id)) + .filter(isGatsbyNode) + return { entries: new GatsbyIterable(nodeObjects), totalCount } } /** @@ -328,8 +340,9 @@ function convertAndApplyFastFilters( nodeTypeNames: Array, filtersCache: FiltersCache, resolvedFields: Record, - stats: IGraphQLRunnerStats -): Array { + stats: IGraphQLRunnerStats, + sort: ISortParameters +): Array { const filters = filterFields ? prefixResolvedFields( createDbQueriesFromObject(prepareQueryArgs(filterFields)), @@ -355,19 +368,30 @@ function convertAndApplyFastFilters( if (filters.length === 0) { const filterCacheKey = createFilterCacheKey(nodeTypeNames, null) if (!filtersCache.has(filterCacheKey)) { - ensureEmptyFilterCache(filterCacheKey, nodeTypeNames, filtersCache) + ensureEmptyFilterCache( + filterCacheKey, + nodeTypeNames, + filtersCache, + sort?.fields || [], + resolvedFields + ) } // If there's a filter, there (now) must be an entry for this cache key const filterCache = filtersCache.get(filterCacheKey) as IFilterCache // If there is no filter then the ensureCache step will populate this: - const cache = filterCache.meta - .orderedByCounter as Array + const cache = filterCache.meta.orderedByCounter as Array return cache.slice(0) } - const result = applyFastFilters(filters, nodeTypeNames, filtersCache) + const result = applyFastFilters( + filters, + nodeTypeNames, + filtersCache, + sort?.fields || [], + resolvedFields + ) if (result) { if (stats) { @@ -411,37 +435,23 @@ function filterToStats( * Returns same reference as input, sorted inline */ function sortNodes( - nodes: Array, - sort: - | { - fields: Array - order: Array - } - | undefined, + nodes: Array, + sort: ISortParameters, resolvedFields: any, stats: IGraphQLRunnerStats -): Array { +): Array { if (!sort || sort.fields?.length === 0 || !nodes || nodes.length === 0) { return nodes } // create functions that return the item to compare on - const dottedFields = objectToDottedField(resolvedFields) - const dottedFieldKeys = Object.keys(dottedFields) - const sortFields = sort.fields.map(field => { - if ( - dottedFields[field] || - dottedFieldKeys.some(key => field.startsWith(key)) - ) { - return `__gatsby_resolved.${field}` - } else { - return field - } - }) + const sortFields = getSortFieldIdentifierKeys(sort.fields, resolvedFields) const sortFns = sortFields.map( field => (v): ((any) => any) => - getValueAt(v, field) + field in v + ? v[field] + : getGatsbyNodePartial(v, sort.fields, resolvedFields)[field] ) const sortOrder = sort.order.map(order => typeof order === `boolean` ? order : order.toLowerCase() From fa6d14f2448c9930743e138f297b608980f2cf0e Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 2 Feb 2022 16:18:27 -0500 Subject: [PATCH 34/42] Fix fast filters test --- .../datastore/__tests__/run-fast-filters.js | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js index 8b20b188a3e5f..aa2854960e820 100644 --- a/packages/gatsby/src/datastore/__tests__/run-fast-filters.js +++ b/packages/gatsby/src/datastore/__tests__/run-fast-filters.js @@ -401,7 +401,9 @@ describe(`applyFastFilters`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) expect(Array.isArray(result)).toBe(true) expect(result.length).toEqual(2) @@ -419,7 +421,9 @@ describe(`applyFastFilters`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) expect(Array.isArray(result)).toBe(true) expect(result.length).toEqual(2) @@ -438,7 +442,9 @@ describe(`applyFastFilters`, () => { const results = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) // Count is irrelevant as long as it is non-zero and they all match filter @@ -458,7 +464,9 @@ describe(`applyFastFilters`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) expect(result).not.toBe(undefined) @@ -484,7 +492,9 @@ describe(`edge cases (yay)`, () => { const result = applyFastFilters( createDbQueriesFromObject(filter), [typeName], - new Map() + new Map(), + [], + [] ) // Sanity-check @@ -511,7 +521,13 @@ describe(`edge cases (yay)`, () => { await getDataStore().ready() const run = () => - applyFastFilters(createDbQueriesFromObject(filter), [typeName], new Map()) + applyFastFilters( + createDbQueriesFromObject(filter), + [typeName], + new Map(), + [], + [] + ) expect(run).toThrow( `Invariant violation: inconsistent node counters detected` From 9df231247b8fad87d1b840b04a650fb7673dee12 Mon Sep 17 00:00:00 2001 From: Josh Date: Mon, 7 Feb 2022 15:43:02 -0500 Subject: [PATCH 35/42] Remove debug logging --- packages/gatsby/src/datastore/in-memory/indexing.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 7b21dbda530a9..811ecce707dc5 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -246,13 +246,6 @@ export const ensureIndexByQuery = ( indexFields: Array, resolvedFields: any ): void => { - const readableWorkerId = process.env.GATSBY_WORKER_ID - ? `worker #${process.env.GATSBY_WORKER_ID}` - : `main` - - console.log( - `ensureIndexByQuery "${filterCacheKey}" start ${readableWorkerId}` - ) const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache @@ -302,8 +295,6 @@ export const ensureIndexByQuery = ( } postIndexingMetaSetup(filterCache, op) - - console.log(`ensureIndexByQuery "${filterCacheKey}" end ${readableWorkerId}`) } export function ensureEmptyFilterCache( From 7c3c9fd7484c5fb55dcf307bec819d4614c5de8a Mon Sep 17 00:00:00 2001 From: Josh Date: Mon, 7 Feb 2022 15:46:39 -0500 Subject: [PATCH 36/42] Remove additional calls to getNode --- .../src/datastore/in-memory/indexing.ts | 44 +++++++------------ 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 811ecce707dc5..f3f9237b9e89d 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -1081,13 +1081,9 @@ export function intersectNodesByCounter( const result: Array = [] const maxA = a.length const maxB = b.length - let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list - - // TODO some optimization could be done here to not call getNode + let lastAdded: IGatsbyNodePartial | undefined = undefined // Used to dedupe the list while (pointerA < maxA && pointerB < maxB) { - const nodeA = getNode(a[pointerA].id) - const nodeB = getNode(b[pointerB].id) const counterA = a[pointerA].internal.counter const counterB = b[pointerB].internal.counter @@ -1096,7 +1092,7 @@ export function intersectNodesByCounter( } else if (counterA > counterB) { pointerB++ } else { - if (nodeA !== nodeB) { + if (a[pointerA] !== b[pointerB]) { throw new Error( `Invariant violation: inconsistent node counters detected` ) @@ -1105,9 +1101,9 @@ export function intersectNodesByCounter( // Since input arrays are sorted, the same node should be grouped // back to back, so even if both input arrays contained the same node // twice, this check would prevent the result from getting duplicate nodes - if (lastAdded !== nodeA) { + if (lastAdded !== a[pointerA]) { result.push(a[pointerA]) - lastAdded = nodeA + lastAdded = a[pointerA] } pointerA++ pointerB++ @@ -1130,9 +1126,7 @@ export function unionNodesByCounter( ): Array { // TODO: perf check: is it helpful to init the array to max(maxA,maxB) items? const arr: Array = [] - let lastAdded: IGatsbyNode | undefined = undefined // Used to dedupe the list - - // TODO some optimization could be done here to not call getNode + let lastAdded: IGatsbyNodePartial | undefined = undefined // Used to dedupe the list let pointerA = 0 let pointerB = 0 @@ -1140,27 +1134,25 @@ export function unionNodesByCounter( const maxB = b.length while (pointerA < maxA && pointerB < maxB) { - const nodeA = getNode(a[pointerA].id)! - const nodeB = getNode(b[pointerB].id)! - const counterA = nodeA.internal.counter - const counterB = nodeB.internal.counter + const counterA = a[pointerA].internal.counter + const counterB = b[pointerB].internal.counter if (counterA < counterB) { - if (lastAdded !== nodeA) { + if (lastAdded !== a[pointerA]) { arr.push(a[pointerA]) - lastAdded = nodeA + lastAdded = a[pointerA] } pointerA++ } else if (counterA > counterB) { - if (lastAdded !== nodeB) { + if (lastAdded !== b[pointerB]) { arr.push(b[pointerB]) - lastAdded = nodeB + lastAdded = b[pointerB] } pointerB++ } else { - if (lastAdded !== nodeA) { + if (lastAdded !== a[pointerA]) { arr.push(a[pointerA]) - lastAdded = nodeA + lastAdded = a[pointerA] } pointerA++ pointerB++ @@ -1168,19 +1160,17 @@ export function unionNodesByCounter( } while (pointerA < maxA) { - const nodeA = getNode(a[pointerA].id)! - if (lastAdded !== nodeA) { + if (lastAdded !== a[pointerA]) { arr.push(a[pointerA]) - lastAdded = nodeA + lastAdded = a[pointerA] } pointerA++ } while (pointerB < maxB) { - const nodeB = getNode(b[pointerB].id)! - if (lastAdded !== nodeB) { + if (lastAdded !== b[pointerB]) { arr.push(b[pointerB]) - lastAdded = nodeB + lastAdded = b[pointerB] } pointerB++ } From 3bd14c85559cb61379503399509e8bbf2e7981d7 Mon Sep 17 00:00:00 2001 From: Josh Date: Mon, 7 Feb 2022 15:57:44 -0500 Subject: [PATCH 37/42] More optimization and comments for getGatsbyNodePartial --- .../src/datastore/in-memory/indexing.ts | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index f3f9237b9e89d..425c6e1e4cc43 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -41,14 +41,23 @@ const nodeIdToIdentifierMap = new Map< WeakRef >() +/** + * Grabs an instance of IGatsbyNodePartial for the given node. + * This accepts an IGatsbyNode or IGatsbyNodePartial as input, which allows + * us to conditionally store index fields on the partial if we encounter + * one that hasn't been stored on the partial yet. + */ export const getGatsbyNodePartial = ( node: IGatsbyNode | IGatsbyNodePartial, indexFields: Array, resolvedFields: any ): IGatsbyNodePartial => { + // first, check if we have the partial in the cache const cacheKey = `${node.id}_____${node.internal.counter}` if (nodeIdToIdentifierMap.has(cacheKey)) { const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref() + + // now check if we have it in memory and it has all the fields we need if ( maybeStillExist && _.isEqual(new Set(indexFields), maybeStillExist.indexFields) @@ -57,22 +66,28 @@ export const getGatsbyNodePartial = ( } } + // find all the keys of fields and store them and their values on the partial const dottedFields = {} + const sortFieldIds = getSortFieldIdentifierKeys(indexFields, resolvedFields) + let fullNodeObject: IGatsbyNode | undefined = node.isGatsbyNodePartial + ? undefined + : (node as IGatsbyNode) - for (const dottedField of getSortFieldIdentifierKeys( - indexFields, - resolvedFields - )) { + for (const dottedField of sortFieldIds) { if (dottedField in node) { dottedFields[dottedField] = node[dottedField] } else { - dottedFields[dottedField] = getValueAt( - node.isGatsbyNodePartial ? getNode(node.id)! : node, - dottedField - ) + // if we haven't gotten the full node object, fetch it once + if (!fullNodeObject) { + fullNodeObject = getNode(node.id)! + } + + // use the full node object to fetch the value + dottedFields[dottedField] = getValueAt(fullNodeObject, dottedField) } } + // create the partial object const partial = Object.assign(dottedFields, { isGatsbyNodePartial: true, id: node.id, @@ -81,7 +96,10 @@ export const getGatsbyNodePartial = ( }, indexFields: new Set(indexFields), }) + + // set the object in the cache for later fetching nodeIdToIdentifierMap.set(cacheKey, new WeakRef(partial)) + return partial } From 99bd831e30aad1c850fab0a7ed32e44394b27498 Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 9 Feb 2022 09:15:34 -0500 Subject: [PATCH 38/42] Add types and fix indexFields check --- .../src/datastore/in-memory/indexing.ts | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 425c6e1e4cc43..8f9f60748d129 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -50,7 +50,7 @@ const nodeIdToIdentifierMap = new Map< export const getGatsbyNodePartial = ( node: IGatsbyNode | IGatsbyNodePartial, indexFields: Array, - resolvedFields: any + resolvedFields: Record ): IGatsbyNodePartial => { // first, check if we have the partial in the cache const cacheKey = `${node.id}_____${node.internal.counter}` @@ -60,7 +60,7 @@ export const getGatsbyNodePartial = ( // now check if we have it in memory and it has all the fields we need if ( maybeStillExist && - _.isEqual(new Set(indexFields), maybeStillExist.indexFields) + _.every(indexFields.map(field => maybeStillExist.indexFields.has(field))) ) { return maybeStillExist } @@ -262,7 +262,7 @@ export const ensureIndexByQuery = ( nodeTypeNames: Array, filtersCache: FiltersCache, indexFields: Array, - resolvedFields: any + resolvedFields: Record ): void => { const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache @@ -320,7 +320,7 @@ export function ensureEmptyFilterCache( nodeTypeNames: Array, filtersCache: FiltersCache, indexFields: Array, - resolvedFields: any + resolvedFields: Record ): void { // This is called for queries without any filters // We want to cache the result since it's basically a list of nodes by type(s) @@ -387,7 +387,7 @@ function addNodeToFilterCache( filterCache: IFilterCache, resolvedNodesCache, indexFields: Array, - resolvedFields: any, + resolvedFields: Record, valueOffset: any = node ): void { // There can be a filter that targets `__gatsby_resolved` so fix that first @@ -440,7 +440,7 @@ function markNodeForValue( node: IGatsbyNode, value: FilterValueNullable, indexFields: Array, - resolvedFields: any + resolvedFields: Record ): void { let arr = filterCache.byValue.get(value) if (!arr) { @@ -461,7 +461,7 @@ export const ensureIndexByElemMatch = ( nodeTypeNames: Array, filtersCache: FiltersCache, indexFields: Array, - resolvedFields: any + resolvedFields: Record ): void => { // Given an elemMatch filter, generate the cache that contains all nodes that // matches a given value for that sub-query @@ -521,7 +521,7 @@ function addNodeToBucketWithElemMatch( filterCache: IFilterCache, resolvedNodesCache, indexFields: Array, - resolvedFields: any + resolvedFields: Record ): void { // There can be a filter that targets `__gatsby_resolved` so fix that first if (!node.__gatsby_resolved) { @@ -674,8 +674,6 @@ export const getNodesFromCacheByValue = ( return undefined } - // TODO we need to pass indexFields here and reload identifiers to be able to sort properly - const op = filterCache.op if (op === `$eq`) { @@ -1221,7 +1219,7 @@ function expensiveDedupeInline(arr: Array): void { export function getSortFieldIdentifierKeys( indexFields: Array, - resolvedFields: any + resolvedFields: Record ): Array { const dottedFields = objectToDottedField(resolvedFields) const dottedFieldKeys = Object.keys(dottedFields) From 1341fb5a8afe9c1918399013333d1eeb5767a5e6 Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 9 Feb 2022 09:50:11 -0500 Subject: [PATCH 39/42] Make function args an object for internal functions --- .../src/datastore/in-memory/indexing.ts | 96 +++++++++++-------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 8f9f60748d129..bf13d11bd3be5 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -282,14 +282,14 @@ export const ensureIndexByQuery = ( getDataStore() .iterateNodesByType(nodeTypeNames[0]) .forEach(node => { - addNodeToFilterCache( + addNodeToFilterCache({ node, - filterPath, + chain: filterPath, filterCache, resolvedNodesCache, indexFields, - resolvedFields - ) + resolvedFields, + }) }) } else { // Here we must first filter for the node type @@ -301,14 +301,14 @@ export const ensureIndexByQuery = ( return } - addNodeToFilterCache( + addNodeToFilterCache({ node, - filterPath, + chain: filterPath, filterCache, resolvedNodesCache, indexFields, - resolvedFields - ) + resolvedFields, + }) }) } @@ -381,15 +381,23 @@ export function ensureEmptyFilterCache( orderedByCounter.sort(sortByIds) } -function addNodeToFilterCache( - node: IGatsbyNode, - chain: Array, - filterCache: IFilterCache, +function addNodeToFilterCache({ + node, + chain, + filterCache, resolvedNodesCache, - indexFields: Array, - resolvedFields: Record, - valueOffset: any = node -): void { + indexFields, + resolvedFields, + valueOffset = node, +}: { + node: IGatsbyNode + chain: Array + filterCache: IFilterCache + resolvedNodesCache: Map + indexFields: Array + resolvedFields: Record + valueOffset?: any +}): void { // There can be a filter that targets `__gatsby_resolved` so fix that first if (!node.__gatsby_resolved) { const typeName = node.internal.type @@ -480,15 +488,15 @@ export const ensureIndexByElemMatch = ( getDataStore() .iterateNodesByType(nodeTypeNames[0]) .forEach(node => { - addNodeToBucketWithElemMatch( - node, + addNodeToBucketWithElemMatch({ node, + valueAtCurrentStep: node, filter, filterCache, resolvedNodesCache, indexFields, - resolvedFields - ) + resolvedFields, + }) }) } else { // Expensive at scale @@ -499,30 +507,38 @@ export const ensureIndexByElemMatch = ( return } - addNodeToBucketWithElemMatch( - node, + addNodeToBucketWithElemMatch({ node, + valueAtCurrentStep: node, filter, filterCache, resolvedNodesCache, indexFields, - resolvedFields - ) + resolvedFields, + }) }) } postIndexingMetaSetup(filterCache, op) } -function addNodeToBucketWithElemMatch( - node: IGatsbyNode, - valueAtCurrentStep: any, // Arbitrary step on the path inside the node - filter: IDbQueryElemMatch, - filterCache: IFilterCache, +function addNodeToBucketWithElemMatch({ + node, + valueAtCurrentStep, // Arbitrary step on the path inside the node + filter, + filterCache, resolvedNodesCache, - indexFields: Array, + indexFields, + resolvedFields, +}: { + node: IGatsbyNode + valueAtCurrentStep: any // Arbitrary step on the path inside the node + filter: IDbQueryElemMatch + filterCache: IFilterCache + resolvedNodesCache + indexFields: Array resolvedFields: Record -): void { +}): void { // There can be a filter that targets `__gatsby_resolved` so fix that first if (!node.__gatsby_resolved) { const typeName = node.internal.type @@ -556,26 +572,26 @@ function addNodeToBucketWithElemMatch( // work when elements resolve to the same value, but that can't be helped. valueAtCurrentStep.forEach(elem => { if (nestedQuery.type === `elemMatch`) { - addNodeToBucketWithElemMatch( + addNodeToBucketWithElemMatch({ node, - elem, - nestedQuery, + valueAtCurrentStep: elem, + filter: nestedQuery, filterCache, resolvedNodesCache, indexFields, - resolvedFields - ) + resolvedFields, + }) } else { // Now take same route as non-elemMatch filters would take - addNodeToFilterCache( + addNodeToFilterCache({ node, - nestedQuery.path, + chain: nestedQuery.path, filterCache, resolvedNodesCache, indexFields, resolvedFields, - elem - ) + valueOffset: elem, + }) } }) } From 999e9c658e83de4e7207e68f3d4cb0d302211a29 Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 10 Feb 2022 10:14:17 -0500 Subject: [PATCH 40/42] Rename deref var and move partial internals to new object on partial --- .../src/datastore/in-memory/indexing.ts | 27 +++++++++++-------- .../datastore/in-memory/run-fast-filters.ts | 4 +-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index bf13d11bd3be5..2dac09ba08c31 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -27,12 +27,13 @@ export type FilterCacheKey = string type GatsbyNodeID = string export interface IGatsbyNodePartial { - isGatsbyNodePartial: boolean id: GatsbyNodeID internal: { counter: number } - indexFields: Set + gatsbyNodePartialInternalData: { + indexFields: Set + } [k: string]: any } @@ -55,23 +56,26 @@ export const getGatsbyNodePartial = ( // first, check if we have the partial in the cache const cacheKey = `${node.id}_____${node.internal.counter}` if (nodeIdToIdentifierMap.has(cacheKey)) { - const maybeStillExist = nodeIdToIdentifierMap.get(cacheKey)?.deref() + const derefPartial = nodeIdToIdentifierMap.get(cacheKey)?.deref() // now check if we have it in memory and it has all the fields we need if ( - maybeStillExist && - _.every(indexFields.map(field => maybeStillExist.indexFields.has(field))) + derefPartial && + _.every( + indexFields.map(field => + derefPartial.gatsbyNodePartialInternalData.indexFields.has(field) + ) + ) ) { - return maybeStillExist + return derefPartial } } // find all the keys of fields and store them and their values on the partial const dottedFields = {} const sortFieldIds = getSortFieldIdentifierKeys(indexFields, resolvedFields) - let fullNodeObject: IGatsbyNode | undefined = node.isGatsbyNodePartial - ? undefined - : (node as IGatsbyNode) + let fullNodeObject: IGatsbyNode | undefined = + node.gatsbyNodePartialInternalData ? undefined : (node as IGatsbyNode) for (const dottedField of sortFieldIds) { if (dottedField in node) { @@ -89,12 +93,13 @@ export const getGatsbyNodePartial = ( // create the partial object const partial = Object.assign(dottedFields, { - isGatsbyNodePartial: true, id: node.id, internal: { counter: node.internal.counter, }, - indexFields: new Set(indexFields), + gatsbyNodePartialInternalData: { + indexFields: new Set(indexFields), + }, }) // set the object in the cache for later fetching diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index a818fa600c6af..55810dd153819 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -326,9 +326,7 @@ export function runFastFiltersAndSort(args: IRunFilterArg): IQueryResult { ? sortedResult.slice(skip, limit ? skip + (limit ?? 0) : undefined) : sortedResult - const nodeObjects = entries - .map(nodeIds => getNode(nodeIds.id)) - .filter(isGatsbyNode) + const nodeObjects = entries.map(nodeIds => getNode(nodeIds.id)!) return { entries: new GatsbyIterable(nodeObjects), totalCount } } From 095e814536d17d6be4e7e12e78657658d81e87d3 Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 10 Feb 2022 10:36:51 -0500 Subject: [PATCH 41/42] TS fix --- packages/gatsby/src/datastore/in-memory/run-fast-filters.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts index 55810dd153819..23605f9896c9d 100644 --- a/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts +++ b/packages/gatsby/src/datastore/in-memory/run-fast-filters.ts @@ -1,4 +1,3 @@ -import { IGatsbyNode } from "../../redux/types" import _ from "lodash" import { DbQuery, @@ -29,9 +28,6 @@ import { IRunQueryArgs, IQueryResult } from "../types" import { GatsbyIterable } from "../common/iterable" import { getNode } from "../" -function isGatsbyNode(node: IGatsbyNode | undefined): node is IGatsbyNode { - return !!node -} export interface IRunFilterArg extends IRunQueryArgs { filtersCache: FiltersCache } From 04fb87489d567d0837e59cf20f2c82bd56b47ffe Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 10 Feb 2022 15:15:52 -0500 Subject: [PATCH 42/42] Merge sets of indexFields --- .../src/datastore/in-memory/indexing.ts | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/packages/gatsby/src/datastore/in-memory/indexing.ts b/packages/gatsby/src/datastore/in-memory/indexing.ts index 2dac09ba08c31..fae28594700eb 100644 --- a/packages/gatsby/src/datastore/in-memory/indexing.ts +++ b/packages/gatsby/src/datastore/in-memory/indexing.ts @@ -55,15 +55,16 @@ export const getGatsbyNodePartial = ( ): IGatsbyNodePartial => { // first, check if we have the partial in the cache const cacheKey = `${node.id}_____${node.internal.counter}` + let derefPartial: IGatsbyNodePartial | undefined = undefined if (nodeIdToIdentifierMap.has(cacheKey)) { - const derefPartial = nodeIdToIdentifierMap.get(cacheKey)?.deref() + derefPartial = nodeIdToIdentifierMap.get(cacheKey)?.deref() // now check if we have it in memory and it has all the fields we need if ( derefPartial && _.every( indexFields.map(field => - derefPartial.gatsbyNodePartialInternalData.indexFields.has(field) + derefPartial!.gatsbyNodePartialInternalData.indexFields.has(field) ) ) ) { @@ -72,8 +73,19 @@ export const getGatsbyNodePartial = ( } // find all the keys of fields and store them and their values on the partial + // if we've already passed this partial, merge both sets of index fields const dottedFields = {} - const sortFieldIds = getSortFieldIdentifierKeys(indexFields, resolvedFields) + const fieldsToStore = derefPartial + ? new Set([ + ...derefPartial.gatsbyNodePartialInternalData.indexFields, + ...indexFields, + ]) + : new Set(indexFields) + + const sortFieldIds = getSortFieldIdentifierKeys( + [...fieldsToStore], + resolvedFields + ) let fullNodeObject: IGatsbyNode | undefined = node.gatsbyNodePartialInternalData ? undefined : (node as IGatsbyNode) @@ -98,7 +110,7 @@ export const getGatsbyNodePartial = ( counter: node.internal.counter, }, gatsbyNodePartialInternalData: { - indexFields: new Set(indexFields), + indexFields: fieldsToStore, }, })