From 7d7dd523a17489d0bd202d52209fca8fcb7ccc38 Mon Sep 17 00:00:00 2001 From: Robert DeLuca Date: Fri, 8 Nov 2019 16:31:15 -0600 Subject: [PATCH] feat: Introduce asset discovery response caching behind flag (#419) * test: [Spike] Add network caching to asset discovery behind flag * Only cache 200 status code responses Keep the caching layer real thin, since its an optimization * Add inline doc, clean up console.logs * Pull method out into its own util Up next is to test * Add tests for response cache util * Refactor `getResponseCache` function * Remove extra early testing snapshots * Remove `body` from mocked response obj * Code review: Update test to be more reliable --- package.json | 2 +- src/commands/exec.ts | 6 ++ .../asset-discovery-configuration.ts | 1 + src/configuration/configuration.ts | 1 + src/services/asset-discovery-service.ts | 16 +++++- src/utils/configuration.ts | 1 + src/utils/response-cache.ts | 47 +++++++++++++++ test/utils/response-cache.test.ts | 57 +++++++++++++++++++ 8 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 src/utils/response-cache.ts create mode 100644 test/utils/response-cache.test.ts diff --git a/package.json b/package.json index ca4689c2..291832f2 100644 --- a/package.json +++ b/package.json @@ -147,7 +147,7 @@ "test": "yarn test:unit && yarn test:client && yarn test:integration && yarn test:command:snapshot && yarn test:command:upload", "test:unit": "PERCY_TOKEN=abc mocha \"test/**/*.test.ts\" --exclude \"test/percy-agent-client/**/*.test.ts\" --exclude \"test/integration/**/*\"", "test:client": "karma start ./test/percy-agent-client/karma.conf.js", - "test:integration": "yarn build-client && node ./bin/run exec -h *.localtest.me -c .ci.percy.yml -- mocha test/integration/**/*.test.ts", + "test:integration": "yarn build-client && node ./bin/run exec --cache-responses -h *.localtest.me -c .ci.percy.yml -- mocha test/integration/**/*.test.ts", "test:command:snapshot": "./bin/run snapshot test/integration/test-static-site -b /dummy-base-url/ -i **/red-keep.** -s **/*.{html}", "test:command:upload": "./bin/run upload test/integration/test-static-images", "version": "oclif-dev readme && git add README.md", diff --git a/src/commands/exec.ts b/src/commands/exec.ts index 287884f2..e93f04e8 100644 --- a/src/commands/exec.ts +++ b/src/commands/exec.ts @@ -27,6 +27,12 @@ export default class Exec extends PercyCommand { 'Asset discovery network idle timeout (in milliseconds)', ].join(' '), }), + 'cache-responses': flags.boolean({ + description: [ + `[default: ${DEFAULT_CONFIGURATION.agent['asset-discovery']['cache-responses']}]`, + 'Enable caching network responses for asset discovery (experimental)', + ].join(' '), + }), 'port': flags.integer({ char: 'p', description: [ diff --git a/src/configuration/asset-discovery-configuration.ts b/src/configuration/asset-discovery-configuration.ts index d0ef5cc6..dedbc59a 100644 --- a/src/configuration/asset-discovery-configuration.ts +++ b/src/configuration/asset-discovery-configuration.ts @@ -4,4 +4,5 @@ export interface AssetDiscoveryConfiguration { 'network-idle-timeout': number, 'page-pool-size-min': number, 'page-pool-size-max': number + 'cache-responses': boolean } diff --git a/src/configuration/configuration.ts b/src/configuration/configuration.ts index 88d7ebdb..69c4f4ec 100644 --- a/src/configuration/configuration.ts +++ b/src/configuration/configuration.ts @@ -27,6 +27,7 @@ export const DEFAULT_CONFIGURATION: Configuration = { 'network-idle-timeout': 50, // ms 'page-pool-size-min': 1, // pages 'page-pool-size-max': 5, // pages + 'cache-responses': false, }, }, 'static-snapshots': { diff --git a/src/services/asset-discovery-service.ts b/src/services/asset-discovery-service.ts index d05853e6..fb1a7a34 100644 --- a/src/services/asset-discovery-service.ts +++ b/src/services/asset-discovery-service.ts @@ -5,6 +5,7 @@ import { AssetDiscoveryConfiguration } from '../configuration/asset-discovery-co import { DEFAULT_CONFIGURATION } from '../configuration/configuration' import { SnapshotOptions } from '../percy-agent-client/snapshot-options' import { logError, profile } from '../utils/logger' +import { cacheResponse, getResponseCache } from '../utils/response-cache' import waitForNetworkIdle from '../utils/wait-for-network-idle' import PercyClientService from './percy-client-service' import ResponseService from './response-service' @@ -189,13 +190,15 @@ export class AssetDiscoveryService extends PercyClientService { ]) as {}) page.on('request', async (request) => { + const requestUrl = request.url() + try { if (!this.shouldRequestResolve(request)) { await request.abort() return } - if (request.url() === rootResourceUrl) { + if (requestUrl === rootResourceUrl) { await request.respond({ body: domSnapshot, contentType: 'text/html', @@ -204,6 +207,13 @@ export class AssetDiscoveryService extends PercyClientService { return } + if (this.configuration['cache-responses'] === true && getResponseCache(requestUrl)) { + logger.debug(`Asset cache hit for ${requestUrl}`) + await request.respond(getResponseCache(requestUrl)) + + return + } + await request.continue() } catch (error) { logError(error) @@ -215,7 +225,11 @@ export class AssetDiscoveryService extends PercyClientService { // We could also listen on 'response', but then we'd have to check if it was successful. page.on('requestfinished', async (request) => { const response = request.response() + if (response) { + if (this.configuration['cache-responses'] === true) { + await cacheResponse(response, logger) + } // Parallelize the work in processResponse as much as possible, but make sure to // wait for it to complete before returning from the asset discovery phase. const promise = this.responseService.processResponse( diff --git a/src/utils/configuration.ts b/src/utils/configuration.ts index 5315d91a..6059669e 100644 --- a/src/utils/configuration.ts +++ b/src/utils/configuration.ts @@ -41,6 +41,7 @@ function transform(flags: any, args: any) { 'asset-discovery': { 'allowed-hostnames': flags['allowed-hostname'], 'network-idle-timeout': flags['network-idle-timeout'], + 'cache-responses': flags['cache-responses'], }, }, 'static-snapshots': { diff --git a/src/utils/response-cache.ts b/src/utils/response-cache.ts new file mode 100644 index 00000000..3d42abba --- /dev/null +++ b/src/utils/response-cache.ts @@ -0,0 +1,47 @@ +import { Response } from 'puppeteer' +let responseCache = {} as any + +/** + * Keep an in-memory cache of asset responses. + * + * When enabled, asset responses will be kept in memory. When the asset is + * re-requested, it will be responsed with what the cached response. This makes + * it so servers aren't being hounded for the same asset over and over again. + */ +export async function cacheResponse(response: Response, logger: any) { + const responseUrl = response.url() + const statusCode = response.status() + + if (!!responseCache[responseUrl]) { + logger.debug(`Asset already in cache ${responseUrl}`) + return + } + + if (![200, 201].includes(statusCode)) { + return + } + + try { + const buffer = await response.buffer() + + responseCache[responseUrl] = { + status: response.status(), + headers: response.headers(), + body: buffer, + } + + logger.debug(`Added ${responseUrl} to asset discovery cache`) + } catch (error) { + logger.debug(`Could not cache response ${responseUrl}: ${error}`) + } +} + +export function getResponseCache(url: string) { + return responseCache[url] +} + +export function _setResponseCache(newResponseCache: any) { + responseCache = newResponseCache + + return responseCache +} diff --git a/test/utils/response-cache.test.ts b/test/utils/response-cache.test.ts new file mode 100644 index 00000000..aa920015 --- /dev/null +++ b/test/utils/response-cache.test.ts @@ -0,0 +1,57 @@ +import { expect } from 'chai' +import { _setResponseCache, cacheResponse, getResponseCache } from '../../src/utils/response-cache' + +// Mock logger +const logger = { debug() { return '' }} +const defaultResponse = { + url() { return 'http://example.com/foo.txt' }, + status() { return 200 }, + headers() { return 'fake headers' }, + buffer() { return 'hello' }, +} as any + +describe('Response cache util', () => { + beforeEach(() => { + _setResponseCache({}) + }) + + it('200 status code response adds to the cache', async () => { + await cacheResponse(defaultResponse, logger) + + expect(getResponseCache('http://example.com/foo.txt')).to.eql({ + status: 200, + body: 'hello', + headers: 'fake headers', + }) + }) + + it('201 status code response adds to the cache', async () => { + await cacheResponse({ ...defaultResponse, status() { return 201 } }, logger) + + expect(getResponseCache('http://example.com/foo.txt')).to.eql({ + status: 201, + body: 'hello', + headers: 'fake headers', + }) + }) + + it('calling the cache with the same URL does nothing', async () => { + await cacheResponse(defaultResponse, logger) + await cacheResponse({ ...defaultResponse, status() { return 201 } }, logger) + + expect(getResponseCache('http://example.com/foo.txt')).to.eql({ + status: 200, + body: 'hello', + headers: 'fake headers', + }) + }) + + it('non-200 status code response does not add to the cache', async () => { + await cacheResponse({ ...defaultResponse, status() { return 300 } }, logger) + await cacheResponse({ ...defaultResponse, status() { return 500 } }, logger) + await cacheResponse({ ...defaultResponse, status() { return 401 } }, logger) + await cacheResponse({ ...defaultResponse, status() { return 404 } }, logger) + + expect(getResponseCache('http://example.com/foo.txt')).to.eql(undefined) + }) +})