From 8caec2d94cf5e17deb1e54d097cf4b427da14eb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benedikt=20R=C3=B6tsch?= Date: Tue, 9 Nov 2021 20:05:47 +0100 Subject: [PATCH] feat(gatsby-core-utils): Add retry on HTTP status codes to `fetchRemoteFile` (#33461) * feat: add retry to fetch remote file * refactor to retry withing stream error event handler * review changes * fix tests * add test and retry for network errors * remove retry logger Co-authored-by: Ward Peeters (cherry picked from commit 00dc5896d2eb63f201d061c4be15093bbf162d56) --- .../src/__tests__/fetch-remote-file.js | 161 ++++++++++++++++-- .../src/fetch-remote-file.ts | 98 ++++++++++- 2 files changed, 238 insertions(+), 21 deletions(-) diff --git a/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js b/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js index f37fe8c1eed08..86c6485edf7b4 100644 --- a/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js +++ b/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js @@ -1,4 +1,5 @@ // @ts-check + import path from "path" import zlib from "zlib" import os from "os" @@ -77,6 +78,8 @@ async function getFileContent(file, req, options = {}) { } } +let attempts503 = 0 + const server = setupServer( rest.get(`http://external.com/logo.svg`, async (req, res, ctx) => { const { content, contentLength } = await getFileContent( @@ -140,7 +143,44 @@ const server = setupServer( ctx.status(500), ctx.body(content) ) - }) + }), + rest.get(`http://external.com/503-twice.svg`, async (req, res, ctx) => { + const errorContent = `Server error` + attempts503++ + + if (attempts503 < 3) { + return res( + ctx.set(`Content-Type`, `text/html`), + ctx.set(`Content-Length`, String(errorContent.length)), + ctx.status(503), + ctx.body(errorContent) + ) + } + + const { content, contentLength } = await getFileContent( + path.join(__dirname, `./fixtures/gatsby-logo.svg`), + req + ) + + return res( + ctx.set(`Content-Type`, `image/svg+xml`), + ctx.set(`Content-Length`, contentLength), + ctx.status(200), + ctx.body(content) + ) + }), + rest.get(`http://external.com/503-forever.svg`, async (req, res, ctx) => { + const errorContent = `Server error` + return res( + ctx.set(`Content-Type`, `text/html`), + ctx.set(`Content-Length`, String(errorContent.length)), + ctx.status(503), + ctx.body(errorContent) + ) + }), + rest.get(`http://external.com/network-error.svg`, (req, res) => + res.networkError(`ECONNREFUSED`) + ) ) function getFetchInWorkerContext(workerId) { @@ -205,6 +245,10 @@ describe(`fetch-remote-file`, () => { }) }) + afterEach(() => { + jest.useRealTimers() + }) + it(`downloads and create a file`, async () => { const filePath = await fetchRemoteFile({ url: `http://external.com/logo.svg`, @@ -304,8 +348,6 @@ describe(`fetch-remote-file`, () => { jest.runAllTimers() await requests[0] - jest.useRealTimers() - // we still expect 2 fetches because cache can't save fast enough expect(gotStream).toBeCalledTimes(2) expect(fsMove).toBeCalledTimes(1) @@ -365,18 +407,12 @@ describe(`fetch-remote-file`, () => { jest.runAllTimers() await requests[0] - jest.useRealTimers() - // we still expect 4 fetches because cache can't save fast enough expect(gotStream).toBeCalledTimes(4) expect(fsMove).toBeCalledTimes(2) }) it(`doesn't keep lock when file download failed`, async () => { - // we don't want to wait for polling to finish - jest.useFakeTimers() - jest.runAllTimers() - const cacheInternals = new Map() const workerCache = { get(key) { @@ -398,8 +434,6 @@ describe(`fetch-remote-file`, () => { }) ).rejects.toThrow() - jest.runAllTimers() - await expect( fetchRemoteFileInstanceTwo({ url: `http://external.com/500.jpg`, @@ -407,9 +441,7 @@ describe(`fetch-remote-file`, () => { }) ).rejects.toThrow() - jest.useRealTimers() - - expect(gotStream).toBeCalledTimes(1) + expect(gotStream).toBeCalledTimes(3) expect(fsMove).toBeCalledTimes(0) }) @@ -428,7 +460,30 @@ describe(`fetch-remote-file`, () => { url: `http://external.com/500.jpg`, cache, }) - ).rejects.toThrow(`Response code 500 (Internal Server Error)`) + ).rejects.toThrowErrorMatchingInlineSnapshot(` +"Unable to fetch: +http://external.com/500.jpg +--- +Reason: Response code 500 (Internal Server Error) +--- +Fetch details: +{ + \\"attempt\\": 3, + \\"method\\": \\"GET\\", + \\"responseStatusCode\\": 500, + \\"responseStatusMessage\\": \\"Internal Server Error\\", + \\"requestHeaders\\": { + \\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\", + \\"accept-encoding\\": \\"gzip, deflate, br\\" + }, + \\"responseHeaders\\": { + \\"x-powered-by\\": \\"msw\\", + \\"content-length\\": \\"12\\", + \\"content-type\\": \\"text/html\\" + } +} +---" +`) }) describe(`retries the download`, () => { @@ -457,5 +512,81 @@ describe(`fetch-remote-file`, () => { ) expect(gotStream).toBeCalledTimes(2) }) + + it(`Retries when server returns 503 error till server returns 200`, async () => { + const fetchRemoteFileInstance = fetchRemoteFile({ + url: `http://external.com/503-twice.svg`, + cache, + }) + + const filePath = await fetchRemoteFileInstance + + expect(path.basename(filePath)).toBe(`503-twice.svg`) + expect(getFileSize(filePath)).resolves.toBe( + await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`)) + ) + expect(gotStream).toBeCalledTimes(3) + }) + + it(`Stops retry when maximum attempts is reached`, async () => { + await expect( + fetchRemoteFile({ + url: `http://external.com/503-forever.svg`, + cache, + }) + ).rejects.toThrowErrorMatchingInlineSnapshot(` +"Unable to fetch: +http://external.com/503-forever.svg +--- +Reason: Response code 503 (Service Unavailable) +--- +Fetch details: +{ + \\"attempt\\": 3, + \\"method\\": \\"GET\\", + \\"responseStatusCode\\": 503, + \\"responseStatusMessage\\": \\"Service Unavailable\\", + \\"requestHeaders\\": { + \\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\", + \\"accept-encoding\\": \\"gzip, deflate, br\\" + }, + \\"responseHeaders\\": { + \\"x-powered-by\\": \\"msw\\", + \\"content-length\\": \\"12\\", + \\"content-type\\": \\"text/html\\" + } +} +---" +`) + + expect(gotStream).toBeCalledTimes(3) + }) + // @todo retry on network errors + it(`Retries on network errors`, async () => { + await expect( + fetchRemoteFile({ + url: `http://external.com/network-error.svg`, + cache, + }) + ).rejects.toThrowErrorMatchingInlineSnapshot(` +"Unable to fetch: +http://external.com/network-error.svg +--- +Reason: ECONNREFUSED +--- +Fetch details: +{ + \\"attempt\\": 3, + \\"method\\": \\"GET\\", + \\"requestHeaders\\": { + \\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\", + \\"accept-encoding\\": \\"gzip, deflate, br\\" + } +} +---" +`) + + expect(gotStream).toBeCalledTimes(3) + }) }) }) diff --git a/packages/gatsby-core-utils/src/fetch-remote-file.ts b/packages/gatsby-core-utils/src/fetch-remote-file.ts index c5e000289ee7e..d3b21179e0320 100644 --- a/packages/gatsby-core-utils/src/fetch-remote-file.ts +++ b/packages/gatsby-core-utils/src/fetch-remote-file.ts @@ -1,4 +1,4 @@ -import got, { Headers, Options } from "got" +import got, { Headers, Options, RequestError } from "got" import fileType from "file-type" import path from "path" import fs from "fs-extra" @@ -8,7 +8,6 @@ import { getRemoteFileExtension, createFilePath, } from "./filename-utils" - import type { IncomingMessage } from "http" import type { GatsbyCache } from "gatsby" @@ -22,6 +21,7 @@ export interface IFetchRemoteFileOptions { httpHeaders?: Headers ext?: string name?: string + maxAttempts?: number } // copied from gatsby-worker @@ -48,6 +48,28 @@ const INCOMPLETE_RETRY_LIMIT = process.env.GATSBY_INCOMPLETE_RETRY_LIMIT ? parseInt(process.env.GATSBY_INCOMPLETE_RETRY_LIMIT, 10) : 3 +// jest doesn't allow us to run all timings infinitely, so we set it 0 in tests +const BACKOFF_TIME = process.env.NODE_ENV === `test` ? 0 : 1000 + +function range(start: number, end: number): Array { + return Array(end - start) + .fill(null) + .map((_, i) => start + i) +} + +// Based on the defaults of https://github.com/JustinBeckwith/retry-axios +const STATUS_CODES_TO_RETRY = [...range(100, 200), 429, ...range(500, 600)] +const ERROR_CODES_TO_RETRY = [ + `ETIMEDOUT`, + `ECONNRESET`, + `EADDRINUSE`, + `ECONNREFUSED`, + `EPIPE`, + `ENOTFOUND`, + `ENETUNREACH`, + `EAI_AGAIN`, +] + let fetchCache = new Map() let latestBuildId = `` @@ -335,6 +357,9 @@ function requestRemoteNode( // Fixes a bug in latest got where progress.total gets reset when stream ends, even if it wasn't complete. let totalSize: number | null = null responseStream.on(`downloadProgress`, progress => { + // reset the timeout on each progress event to make sure large files don't timeout + resetTimeout() + if ( progress.total != null && (!totalSize || totalSize < progress.total) @@ -359,9 +384,71 @@ function requestRemoteNode( fsWriteStream.close() fs.removeSync(tmpFilename) - process.nextTick(() => { - reject(error) - }) + if (!(error instanceof RequestError)) { + return reject(error) + } + + // This is a replacement for the stream retry logic of got + // till we can update all got instances to v12 + // https://github.com/sindresorhus/got/blob/main/documentation/7-retry.md + // https://github.com/sindresorhus/got/blob/main/documentation/3-streams.md#retry + const statusCode = error.response?.statusCode + const errorCode = error.code || error.message // got gives error.code, but msw/node returns the error codes in the message only + + if ( + // HTTP STATUS CODE ERRORS + (statusCode && STATUS_CODES_TO_RETRY.includes(statusCode)) || + // GENERAL NETWORK ERRORS + (errorCode && ERROR_CODES_TO_RETRY.includes(errorCode)) + ) { + if (attempt < INCOMPLETE_RETRY_LIMIT) { + setTimeout(() => { + resolve( + requestRemoteNode( + url, + headers, + tmpFilename, + httpOptions, + attempt + 1 + ) + ) + }, BACKOFF_TIME * attempt) + + return undefined + } + // Throw user friendly error + error.message = [ + `Unable to fetch:`, + url, + `---`, + `Reason: ${error.message}`, + `---`, + ].join(`\n`) + + // Gather details about what went wrong from the error object and the request + const details = Object.entries({ + attempt, + method: error.options?.method, + errorCode: error.code, + responseStatusCode: error.response?.statusCode, + responseStatusMessage: error.response?.statusMessage, + requestHeaders: error.options?.headers, + responseHeaders: error.response?.headers, + }) + // Remove undefined values from the details to keep it clean + .reduce((a, [k, v]) => (v === undefined ? a : ((a[k] = v), a)), {}) + + if (Object.keys(details).length) { + error.message = [ + error.message, + `Fetch details:`, + JSON.stringify(details, null, 2), + `---`, + ].join(`\n`) + } + } + + return reject(error) }) responseStream.on(`response`, response => { @@ -399,7 +486,6 @@ function requestRemoteNode( ) } } - return resolve(response) }) })