Skip to content

Commit

Permalink
feat(gatsby-core-utils): Add retry on HTTP status codes to `fetchRemo…
Browse files Browse the repository at this point in the history
…teFile` (#33461) (#33925)

Co-authored-by: Ward Peeters <ward@coding-tech.com>
Co-authored-by: Benedikt Rötsch <axe312ger@users.noreply.github.com>
  • Loading branch information
3 people authored Nov 10, 2021
1 parent 3b06677 commit 62a5612
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 21 deletions.
161 changes: 146 additions & 15 deletions packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// @ts-check

import path from "path"
import zlib from "zlib"
import os from "os"
Expand Down Expand Up @@ -77,6 +78,8 @@ async function getFileContent(file, req, options = {}) {
}
}

let attempts503 = 0

const server = setupServer(
rest.get(`http://external.com/logo.svg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
Expand Down Expand Up @@ -140,7 +143,44 @@ const server = setupServer(
ctx.status(500),
ctx.body(content)
)
})
}),
rest.get(`http://external.com/503-twice.svg`, async (req, res, ctx) => {
const errorContent = `Server error`
attempts503++

if (attempts503 < 3) {
return res(
ctx.set(`Content-Type`, `text/html`),
ctx.set(`Content-Length`, String(errorContent.length)),
ctx.status(503),
ctx.body(errorContent)
)
}

const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
req
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
}),
rest.get(`http://external.com/503-forever.svg`, async (req, res, ctx) => {
const errorContent = `Server error`
return res(
ctx.set(`Content-Type`, `text/html`),
ctx.set(`Content-Length`, String(errorContent.length)),
ctx.status(503),
ctx.body(errorContent)
)
}),
rest.get(`http://external.com/network-error.svg`, (req, res) =>
res.networkError(`ECONNREFUSED`)
)
)

function getFetchInWorkerContext(workerId) {
Expand Down Expand Up @@ -205,6 +245,10 @@ describe(`fetch-remote-file`, () => {
})
})

afterEach(() => {
jest.useRealTimers()
})

it(`downloads and create a file`, async () => {
const filePath = await fetchRemoteFile({
url: `http://external.com/logo.svg`,
Expand Down Expand Up @@ -304,8 +348,6 @@ describe(`fetch-remote-file`, () => {
jest.runAllTimers()
await requests[0]

jest.useRealTimers()

// we still expect 2 fetches because cache can't save fast enough
expect(gotStream).toBeCalledTimes(2)
expect(fsMove).toBeCalledTimes(1)
Expand Down Expand Up @@ -365,18 +407,12 @@ describe(`fetch-remote-file`, () => {
jest.runAllTimers()
await requests[0]

jest.useRealTimers()

// we still expect 4 fetches because cache can't save fast enough
expect(gotStream).toBeCalledTimes(4)
expect(fsMove).toBeCalledTimes(2)
})

it(`doesn't keep lock when file download failed`, async () => {
// we don't want to wait for polling to finish
jest.useFakeTimers()
jest.runAllTimers()

const cacheInternals = new Map()
const workerCache = {
get(key) {
Expand All @@ -398,18 +434,14 @@ describe(`fetch-remote-file`, () => {
})
).rejects.toThrow()

jest.runAllTimers()

await expect(
fetchRemoteFileInstanceTwo({
url: `http://external.com/500.jpg`,
cache: workerCache,
})
).rejects.toThrow()

jest.useRealTimers()

expect(gotStream).toBeCalledTimes(1)
expect(gotStream).toBeCalledTimes(3)
expect(fsMove).toBeCalledTimes(0)
})

Expand All @@ -428,7 +460,30 @@ describe(`fetch-remote-file`, () => {
url: `http://external.com/500.jpg`,
cache,
})
).rejects.toThrow(`Response code 500 (Internal Server Error)`)
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Unable to fetch:
http://external.com/500.jpg
---
Reason: Response code 500 (Internal Server Error)
---
Fetch details:
{
\\"attempt\\": 3,
\\"method\\": \\"GET\\",
\\"responseStatusCode\\": 500,
\\"responseStatusMessage\\": \\"Internal Server Error\\",
\\"requestHeaders\\": {
\\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\",
\\"accept-encoding\\": \\"gzip, deflate, br\\"
},
\\"responseHeaders\\": {
\\"x-powered-by\\": \\"msw\\",
\\"content-length\\": \\"12\\",
\\"content-type\\": \\"text/html\\"
}
}
---"
`)
})

describe(`retries the download`, () => {
Expand Down Expand Up @@ -457,5 +512,81 @@ describe(`fetch-remote-file`, () => {
)
expect(gotStream).toBeCalledTimes(2)
})

it(`Retries when server returns 503 error till server returns 200`, async () => {
const fetchRemoteFileInstance = fetchRemoteFile({
url: `http://external.com/503-twice.svg`,
cache,
})

const filePath = await fetchRemoteFileInstance

expect(path.basename(filePath)).toBe(`503-twice.svg`)
expect(getFileSize(filePath)).resolves.toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(3)
})

it(`Stops retry when maximum attempts is reached`, async () => {
await expect(
fetchRemoteFile({
url: `http://external.com/503-forever.svg`,
cache,
})
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Unable to fetch:
http://external.com/503-forever.svg
---
Reason: Response code 503 (Service Unavailable)
---
Fetch details:
{
\\"attempt\\": 3,
\\"method\\": \\"GET\\",
\\"responseStatusCode\\": 503,
\\"responseStatusMessage\\": \\"Service Unavailable\\",
\\"requestHeaders\\": {
\\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\",
\\"accept-encoding\\": \\"gzip, deflate, br\\"
},
\\"responseHeaders\\": {
\\"x-powered-by\\": \\"msw\\",
\\"content-length\\": \\"12\\",
\\"content-type\\": \\"text/html\\"
}
}
---"
`)

expect(gotStream).toBeCalledTimes(3)
})
// @todo retry on network errors
it(`Retries on network errors`, async () => {
await expect(
fetchRemoteFile({
url: `http://external.com/network-error.svg`,
cache,
})
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Unable to fetch:
http://external.com/network-error.svg
---
Reason: ECONNREFUSED
---
Fetch details:
{
\\"attempt\\": 3,
\\"method\\": \\"GET\\",
\\"requestHeaders\\": {
\\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\",
\\"accept-encoding\\": \\"gzip, deflate, br\\"
}
}
---"
`)

expect(gotStream).toBeCalledTimes(3)
})
})
})
98 changes: 92 additions & 6 deletions packages/gatsby-core-utils/src/fetch-remote-file.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import got, { Headers, Options } from "got"
import got, { Headers, Options, RequestError } from "got"
import fileType from "file-type"
import path from "path"
import fs from "fs-extra"
Expand All @@ -8,7 +8,6 @@ import {
getRemoteFileExtension,
createFilePath,
} from "./filename-utils"

import type { IncomingMessage } from "http"
import type { GatsbyCache } from "gatsby"

Expand All @@ -22,6 +21,7 @@ export interface IFetchRemoteFileOptions {
httpHeaders?: Headers
ext?: string
name?: string
maxAttempts?: number
}

// copied from gatsby-worker
Expand All @@ -48,6 +48,28 @@ const INCOMPLETE_RETRY_LIMIT = process.env.GATSBY_INCOMPLETE_RETRY_LIMIT
? parseInt(process.env.GATSBY_INCOMPLETE_RETRY_LIMIT, 10)
: 3

// jest doesn't allow us to run all timings infinitely, so we set it 0 in tests
const BACKOFF_TIME = process.env.NODE_ENV === `test` ? 0 : 1000

function range(start: number, end: number): Array<number> {
return Array(end - start)
.fill(null)
.map((_, i) => start + i)
}

// Based on the defaults of https://github.com/JustinBeckwith/retry-axios
const STATUS_CODES_TO_RETRY = [...range(100, 200), 429, ...range(500, 600)]
const ERROR_CODES_TO_RETRY = [
`ETIMEDOUT`,
`ECONNRESET`,
`EADDRINUSE`,
`ECONNREFUSED`,
`EPIPE`,
`ENOTFOUND`,
`ENETUNREACH`,
`EAI_AGAIN`,
]

let fetchCache = new Map()
let latestBuildId = ``

Expand Down Expand Up @@ -335,6 +357,9 @@ function requestRemoteNode(
// Fixes a bug in latest got where progress.total gets reset when stream ends, even if it wasn't complete.
let totalSize: number | null = null
responseStream.on(`downloadProgress`, progress => {
// reset the timeout on each progress event to make sure large files don't timeout
resetTimeout()

if (
progress.total != null &&
(!totalSize || totalSize < progress.total)
Expand All @@ -359,9 +384,71 @@ function requestRemoteNode(
fsWriteStream.close()
fs.removeSync(tmpFilename)

process.nextTick(() => {
reject(error)
})
if (!(error instanceof RequestError)) {
return reject(error)
}

// This is a replacement for the stream retry logic of got
// till we can update all got instances to v12
// https://github.com/sindresorhus/got/blob/main/documentation/7-retry.md
// https://github.com/sindresorhus/got/blob/main/documentation/3-streams.md#retry
const statusCode = error.response?.statusCode
const errorCode = error.code || error.message // got gives error.code, but msw/node returns the error codes in the message only

if (
// HTTP STATUS CODE ERRORS
(statusCode && STATUS_CODES_TO_RETRY.includes(statusCode)) ||
// GENERAL NETWORK ERRORS
(errorCode && ERROR_CODES_TO_RETRY.includes(errorCode))
) {
if (attempt < INCOMPLETE_RETRY_LIMIT) {
setTimeout(() => {
resolve(
requestRemoteNode(
url,
headers,
tmpFilename,
httpOptions,
attempt + 1
)
)
}, BACKOFF_TIME * attempt)

return undefined
}
// Throw user friendly error
error.message = [
`Unable to fetch:`,
url,
`---`,
`Reason: ${error.message}`,
`---`,
].join(`\n`)

// Gather details about what went wrong from the error object and the request
const details = Object.entries({
attempt,
method: error.options?.method,
errorCode: error.code,
responseStatusCode: error.response?.statusCode,
responseStatusMessage: error.response?.statusMessage,
requestHeaders: error.options?.headers,
responseHeaders: error.response?.headers,
})
// Remove undefined values from the details to keep it clean
.reduce((a, [k, v]) => (v === undefined ? a : ((a[k] = v), a)), {})

if (Object.keys(details).length) {
error.message = [
error.message,
`Fetch details:`,
JSON.stringify(details, null, 2),
`---`,
].join(`\n`)
}
}

return reject(error)
})

responseStream.on(`response`, response => {
Expand Down Expand Up @@ -399,7 +486,6 @@ function requestRemoteNode(
)
}
}

return resolve(response)
})
})
Expand Down

0 comments on commit 62a5612

Please sign in to comment.