Skip to content

Commit

Permalink
refactor(gatsby): new dirty tracking implementation for queries (#27504)
Browse files Browse the repository at this point in the history
* Remove dead code

* Consistently use job hash for static-query identification on the frontend (and queryId in node)

* New reducer to track the state of queries

* tmp: output newly calculated dirty queries (still using old calculation for actual query running to compare)

* Add new `QUERY_START` action

* Remove redundant component-data-dependencies reducer (now handled in the queries reducer)

* Actually use the new query tracking (and remove the old one)

* Fix data-tracking test

* Shape of tracked component state should match component reducer

* remove page-component machine (as we track query state in `queries` reducer now)

* Remove DELETE_COMPONENTS_DEPENDENCIES action

* Cleanup

* Cleanup

* Re-enable previously skipped test

* Cleanup

* Do-not re-run queries with babel extraction errors

* WIP: tests for the queries reducer

* Track babel errors per component (not per page/static query)

* tests for the queries reducer

* rename test

* Cleanup / update snapshots

* Add missing snapshot

* fix integration tests?

* Revert "fix integration tests?"

This reverts commit 066a4e1

* Restore DELETE_COMPONENTS_DEPENDENCIES as a no-op for BC

* Take into account deletePage/createPage pattern in onCreatePage

* Update test snapshot

* Do not mark page query as dirty when component has babel errors

* Use flag constants vs. literal values in tests

* Rename FLAG_ERROR_BABEL to FLAG_ERROR_EXTRACTION
  • Loading branch information
vladar authored Nov 5, 2020
1 parent e824841 commit 9d322a4
Show file tree
Hide file tree
Showing 23 changed files with 1,322 additions and 954 deletions.
3 changes: 1 addition & 2 deletions packages/gatsby/src/bootstrap/page-hot-reloader.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { emitter, store } from "../redux"
import apiRunnerNode from "../utils/api-runner-node"
import { boundActionCreators } from "../redux/actions"
const { deletePage, deleteComponentsDependencies } = boundActionCreators
const { deletePage } = boundActionCreators
import report from "gatsby-cli/lib/reporter"
import {
ICreateNodeAction,
Expand Down Expand Up @@ -39,7 +39,6 @@ const runCreatePages = async (): Promise<void> => {
page.updatedAt < timestamp &&
page.path !== `/404.html`
) {
deleteComponentsDependencies([page.path])
deletePage(page)
}
})
Expand Down
24 changes: 13 additions & 11 deletions packages/gatsby/src/query/__tests__/data-tracking.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,18 @@ const setup = async ({ restart = isFirstRun, clearCache = false } = {}) => {
})

Object.entries(staticQueries).forEach(([id, query]) => {
store.dispatch({
type: `REPLACE_STATIC_QUERY`,
payload: {
id: `sq--${id}`,
hash: `sq--${id}`,
query,
},
})
// Mimic real code behavior by only calling this action when static query text changes
const lastQuery = mockPersistedState.staticQueryComponents?.get(`sq--${id}`)
if (lastQuery?.query !== query) {
store.dispatch({
type: `REPLACE_STATIC_QUERY`,
payload: {
id: `sq--${id}`,
hash: `sq--${id}`,
query,
},
})
}
})

const queryIds = queryUtil.calcInitialDirtyQueryIds(store.getState())
Expand Down Expand Up @@ -1238,9 +1242,7 @@ describe(`query caching between builds`, () => {
expect(staticQueriesThatRan).toEqual([])
}, 999999)

// TO-DO: this is known issue - we always rerun queries for pages with no dependencies
// this mean that we will retry to rerun them every time we restart gatsby
it.skip(`rerunning should not run any queries (with restart)`, async () => {
it(`rerunning should not run any queries (with restart)`, async () => {
const {
pathsOfPagesWithQueriesThatRan,
staticQueriesThatRan,
Expand Down
305 changes: 28 additions & 277 deletions packages/gatsby/src/query/index.js
Original file line number Diff line number Diff line change
@@ -1,152 +1,36 @@
// @flow

const _ = require(`lodash`)
const Queue = require(`better-queue`)
// const convertHrtime = require(`convert-hrtime`)
const { store, emitter } = require(`../redux`)
const { boundActionCreators } = require(`../redux/actions`)
const report = require(`gatsby-cli/lib/reporter`)
const { store } = require(`../redux`)
const { hasFlag, FLAG_ERROR_EXTRACTION } = require(`../redux/reducers/queries`)
const queryQueue = require(`./queue`)
const { GraphQLRunner } = require(`./graphql-runner`)
const pageDataUtil = require(`../utils/page-data`)

const seenIdsWithoutDataDependencies = new Set()
let queuedDirtyActions = []
const extractedQueryIds = new Set()

// Remove pages from seenIdsWithoutDataDependencies when they're deleted
// so their query will be run again if they're created again.
emitter.on(`DELETE_PAGE`, action => {
seenIdsWithoutDataDependencies.delete(action.payload.path)
})

emitter.on(`CREATE_NODE`, action => {
queuedDirtyActions.push(action)
})

emitter.on(`DELETE_NODE`, action => {
queuedDirtyActions.push({ payload: action.payload })
})

// ///////////////////////////////////////////////////////////////////
// Calculate dirty static/page queries

const popExtractedQueries = () => {
const queries = [...extractedQueryIds]
extractedQueryIds.clear()
return queries
}

const findIdsWithoutDataDependencies = state => {
const allTrackedIds = new Set()
const boundAddToTrackedIds = allTrackedIds.add.bind(allTrackedIds)
state.componentDataDependencies.nodes.forEach(dependenciesOnNode => {
dependenciesOnNode.forEach(boundAddToTrackedIds)
})
state.componentDataDependencies.connections.forEach(
dependenciesOnConnection => {
dependenciesOnConnection.forEach(boundAddToTrackedIds)
/**
* Calculates the set of dirty query IDs (page.paths, or staticQuery.id's).
*
* Dirty state is tracked in `queries` reducer, here we simply filter
* them from all tracked queries.
*/
const calcDirtyQueryIds = state => {
const { trackedQueries, trackedComponents, deletedQueries } = state.queries

const queriesWithBabelErrors = new Set()
for (const component of trackedComponents.values()) {
if (hasFlag(component.errors, FLAG_ERROR_EXTRACTION)) {
for (const queryId of component.pages) {
queriesWithBabelErrors.add(queryId)
}
}
)

// Get list of paths not already tracked and run the queries for these
// paths.
const notTrackedIds = new Set(
[
...Array.from(state.pages.values(), p => p.path),
...[...state.staticQueryComponents.values()].map(c => c.id),
].filter(
x => !allTrackedIds.has(x) && !seenIdsWithoutDataDependencies.has(x)
)
)

// Add new IDs to our seen array so we don't keep trying to run queries for them.
// Pages without queries can't be tracked.
for (const notTrackedId of notTrackedIds) {
seenIdsWithoutDataDependencies.add(notTrackedId)
}

return notTrackedIds
}

const popNodeQueries = state => {
const actions = _.uniq(queuedDirtyActions, a => a.payload.id)
const uniqDirties = actions.reduce((dirtyIds, action) => {
const node = action.payload

if (!node || !node.id || !node.internal.type) return dirtyIds

// Find components that depend on this node so are now dirty.
if (state.componentDataDependencies.nodes.has(node.id)) {
state.componentDataDependencies.nodes.get(node.id).forEach(n => {
if (n) {
dirtyIds.add(n)
}
})
// Note: trackedQueries contains both - page and static query ids
const dirtyQueryIds = []
for (const [queryId, query] of trackedQueries) {
if (deletedQueries.has(queryId)) {
continue
}

// Find connections that depend on this node so are now invalid.
if (state.componentDataDependencies.connections.has(node.internal.type)) {
state.componentDataDependencies.connections
.get(node.internal.type)
.forEach(n => {
if (n) {
dirtyIds.add(n)
}
})
if (query.dirty > 0 && !queriesWithBabelErrors.has(queryId)) {
dirtyQueryIds.push(queryId)
}

return dirtyIds
}, new Set())

boundActionCreators.deleteComponentsDependencies([...uniqDirties])

queuedDirtyActions = []
return uniqDirties
}

const popNodeAndDepQueries = state => {
const nodeQueries = popNodeQueries(state)

const noDepQueries = findIdsWithoutDataDependencies(state)

return _.uniq([...nodeQueries, ...noDepQueries])
}

/**
* Calculates the set of dirty query IDs (page.paths, or
* staticQuery.hash's). These are queries that:
*
* - depend on nodes or node collections (via
* `actions.createPageDependency`) that have changed.
* - do NOT have node dependencies. Since all queries should return
* data, then this implies that node dependencies have not been
* tracked, and therefore these queries haven't been run before
* - have been recently extracted (see `./query-watcher.js`)
*
* Note, this function pops queries off internal queues, so it's up
* to the caller to reference the results
*/

const calcDirtyQueryIds = state =>
_.union(popNodeAndDepQueries(state), popExtractedQueries())

/**
* Same as `calcDirtyQueryIds`, except that we only include extracted
* queries that depend on nodes or haven't been run yet. We do this
* because the page component reducer/machine always enqueues
* extractedQueryIds but during bootstrap we may not want to run those
* page queries if their data hasn't changed since the last time we
* ran Gatsby.
*/
const calcInitialDirtyQueryIds = state => {
const nodeAndNoDepQueries = popNodeAndDepQueries(state)

const extractedQueriesThatNeedRunning = _.intersection(
popExtractedQueries(),
nodeAndNoDepQueries
)
return _.union(extractedQueriesThatNeedRunning, nodeAndNoDepQueries)
}
return dirtyQueryIds
}

/**
Expand Down Expand Up @@ -176,37 +60,14 @@ const createStaticQueryJob = (state, queryId) => {
const component = state.staticQueryComponents.get(queryId)
const { hash, id, query, componentPath } = component
return {
id: hash,
id: queryId,
hash,
query,
componentPath,
context: { path: id },
}
}

/**
* Creates activity object which:
* - creates actual progress activity if there are any queries that need to be run
* - creates activity-like object that just cancels pending activity if there are no queries to run
*/
const createQueryRunningActivity = (queryJobsCount, parentSpan) => {
if (queryJobsCount) {
const activity = report.createProgress(`run queries`, queryJobsCount, 0, {
id: `query-running`,
parentSpan,
})
activity.start()
return activity
} else {
return {
done: () => {
report.completeActivity(`query-running`)
},
tick: () => {},
}
}
}

const processStaticQueries = async (
queryIds,
{ state, activity, graphqlRunner, graphqlTracing }
Expand Down Expand Up @@ -258,120 +119,10 @@ const createPageQueryJob = (state, page) => {
}
}

// ///////////////////////////////////////////////////////////////////
// Listener for gatsby develop

// Initialized via `startListening`
let listenerQueue

/**
* Run any dirty queries. See `calcQueries` for what constitutes a
* dirty query
*/
const runQueuedQueries = () => {
if (listenerQueue) {
const state = store.getState()
const { staticQueryIds, pageQueryIds } = groupQueryIds(
calcDirtyQueryIds(state)
)
const pages = _.filter(pageQueryIds.map(id => state.pages.get(id)))
const queryJobs = [
...staticQueryIds.map(id => createStaticQueryJob(state, id)),
...pages.map(page => createPageQueryJob(state, page)),
]
listenerQueue.push(queryJobs)
}
}

/**
* Starts a background process that processes any dirty queries
* whenever one of the following occurs:
*
* 1. A node has changed (but only after the api call has finished
* running)
* 2. A component query (e.g. by editing a React Component) has
* changed
*
* For what constitutes a dirty query, see `calcQueries`
*/

const startListeningToDevelopQueue = ({ graphqlTracing } = {}) => {
// We use a queue to process batches of queries so that they are
// processed consecutively
let graphqlRunner = null
const developQueue = queryQueue.createDevelopQueue(() => {
if (!graphqlRunner) {
graphqlRunner = new GraphQLRunner(store, { graphqlTracing })
}
return graphqlRunner
})
listenerQueue = new Queue((queryJobs, callback) => {
const activity = createQueryRunningActivity(queryJobs.length)

const onFinish = (...arg) => {
pageDataUtil.enqueueFlush()
activity.done()
return callback(...arg)
}

return queryQueue
.processBatch(developQueue, queryJobs, activity)
.then(() => onFinish(null))
.catch(onFinish)
})

emitter.on(`API_RUNNING_START`, () => {
report.pendingActivity({ id: `query-running` })
})

emitter.on(`API_RUNNING_QUEUE_EMPTY`, runQueuedQueries)
;[
`DELETE_CACHE`,
`CREATE_NODE`,
`DELETE_NODE`,
`DELETE_NODES`,
`SET_SCHEMA_COMPOSER`,
`SET_SCHEMA`,
`ADD_FIELD_TO_NODE`,
`ADD_CHILD_NODE_TO_PARENT_NODE`,
].forEach(eventType => {
emitter.on(eventType, event => {
graphqlRunner = null
})
})
}

const enqueueExtractedQueryId = pathname => {
extractedQueryIds.add(pathname)
}

const getPagesForComponent = componentPath => {
const state = store.getState()
return [...state.pages.values()].filter(
p => p.componentPath === componentPath
)
}

const enqueueExtractedPageComponent = componentPath => {
const pages = getPagesForComponent(componentPath)
// Remove page data dependencies before re-running queries because
// the changing of the query could have changed the data dependencies.
// Re-running the queries will add back data dependencies.
boundActionCreators.deleteComponentsDependencies(
pages.map(p => p.path || p.id)
)
pages.forEach(page => enqueueExtractedQueryId(page.path))
runQueuedQueries()
}

module.exports = {
calcInitialDirtyQueryIds,
calcInitialDirtyQueryIds: calcDirtyQueryIds,
calcDirtyQueryIds,
processPageQueries,
processStaticQueries,
groupQueryIds,
startListeningToDevelopQueue,
runQueuedQueries,
enqueueExtractedQueryId,
enqueueExtractedPageComponent,
}
Loading

0 comments on commit 9d322a4

Please sign in to comment.