Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gatsby-core-utils): Add retry on HTTP status codes to fetchRemoteFile #33461

Merged
merged 6 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 146 additions & 15 deletions packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// @ts-check

import path from "path"
import zlib from "zlib"
import os from "os"
Expand Down Expand Up @@ -77,6 +78,8 @@ async function getFileContent(file, req, options = {}) {
}
}

let attempts503 = 0

const server = setupServer(
rest.get(`http://external.com/logo.svg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
Expand Down Expand Up @@ -140,7 +143,44 @@ const server = setupServer(
ctx.status(500),
ctx.body(content)
)
})
}),
rest.get(`http://external.com/503-twice.svg`, async (req, res, ctx) => {
const errorContent = `Server error`
attempts503++

if (attempts503 < 3) {
return res(
ctx.set(`Content-Type`, `text/html`),
ctx.set(`Content-Length`, String(errorContent.length)),
ctx.status(503),
ctx.body(errorContent)
)
}

const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
req
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
}),
rest.get(`http://external.com/503-forever.svg`, async (req, res, ctx) => {
const errorContent = `Server error`
return res(
ctx.set(`Content-Type`, `text/html`),
ctx.set(`Content-Length`, String(errorContent.length)),
ctx.status(503),
ctx.body(errorContent)
)
}),
rest.get(`http://external.com/network-error.svg`, (req, res) =>
res.networkError(`ECONNREFUSED`)
)
)

function getFetchInWorkerContext(workerId) {
Expand Down Expand Up @@ -205,6 +245,10 @@ describe(`fetch-remote-file`, () => {
})
})

afterEach(() => {
jest.useRealTimers()
})

it(`downloads and create a file`, async () => {
const filePath = await fetchRemoteFile({
url: `http://external.com/logo.svg`,
Expand Down Expand Up @@ -304,8 +348,6 @@ describe(`fetch-remote-file`, () => {
jest.runAllTimers()
await requests[0]

jest.useRealTimers()

// we still expect 2 fetches because cache can't save fast enough
expect(gotStream).toBeCalledTimes(2)
expect(fsMove).toBeCalledTimes(1)
Expand Down Expand Up @@ -365,18 +407,12 @@ describe(`fetch-remote-file`, () => {
jest.runAllTimers()
await requests[0]

jest.useRealTimers()

// we still expect 4 fetches because cache can't save fast enough
expect(gotStream).toBeCalledTimes(4)
expect(fsMove).toBeCalledTimes(2)
})

it(`doesn't keep lock when file download failed`, async () => {
// we don't want to wait for polling to finish
jest.useFakeTimers()
jest.runAllTimers()

const cacheInternals = new Map()
const workerCache = {
get(key) {
Expand All @@ -398,18 +434,14 @@ describe(`fetch-remote-file`, () => {
})
).rejects.toThrow()

jest.runAllTimers()

await expect(
fetchRemoteFileInstanceTwo({
url: `http://external.com/500.jpg`,
cache: workerCache,
})
).rejects.toThrow()

jest.useRealTimers()

expect(gotStream).toBeCalledTimes(1)
expect(gotStream).toBeCalledTimes(3)
expect(fsMove).toBeCalledTimes(0)
})

Expand All @@ -428,7 +460,30 @@ describe(`fetch-remote-file`, () => {
url: `http://external.com/500.jpg`,
cache,
})
).rejects.toThrow(`Response code 500 (Internal Server Error)`)
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Unable to fetch:
http://external.com/500.jpg
---
Reason: Response code 500 (Internal Server Error)
---
Fetch details:
{
\\"attempt\\": 3,
\\"method\\": \\"GET\\",
\\"responseStatusCode\\": 500,
\\"responseStatusMessage\\": \\"Internal Server Error\\",
\\"requestHeaders\\": {
\\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\",
\\"accept-encoding\\": \\"gzip, deflate, br\\"
},
\\"responseHeaders\\": {
\\"x-powered-by\\": \\"msw\\",
\\"content-length\\": \\"12\\",
\\"content-type\\": \\"text/html\\"
}
}
---"
`)
})

describe(`retries the download`, () => {
Expand Down Expand Up @@ -457,5 +512,81 @@ describe(`fetch-remote-file`, () => {
)
expect(gotStream).toBeCalledTimes(2)
})

it(`Retries when server returns 503 error till server returns 200`, async () => {
const fetchRemoteFileInstance = fetchRemoteFile({
url: `http://external.com/503-twice.svg`,
cache,
})

const filePath = await fetchRemoteFileInstance

expect(path.basename(filePath)).toBe(`503-twice.svg`)
expect(getFileSize(filePath)).resolves.toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(3)
})

it(`Stops retry when maximum attempts is reached`, async () => {
await expect(
fetchRemoteFile({
url: `http://external.com/503-forever.svg`,
cache,
})
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Unable to fetch:
http://external.com/503-forever.svg
---
Reason: Response code 503 (Service Unavailable)
---
Fetch details:
{
\\"attempt\\": 3,
\\"method\\": \\"GET\\",
\\"responseStatusCode\\": 503,
\\"responseStatusMessage\\": \\"Service Unavailable\\",
\\"requestHeaders\\": {
\\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\",
\\"accept-encoding\\": \\"gzip, deflate, br\\"
},
\\"responseHeaders\\": {
\\"x-powered-by\\": \\"msw\\",
\\"content-length\\": \\"12\\",
\\"content-type\\": \\"text/html\\"
}
}
---"
`)

expect(gotStream).toBeCalledTimes(3)
})
// @todo retry on network errors
it(`Retries on network errors`, async () => {
await expect(
fetchRemoteFile({
url: `http://external.com/network-error.svg`,
cache,
})
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Unable to fetch:
http://external.com/network-error.svg
---
Reason: ECONNREFUSED
---
Fetch details:
{
\\"attempt\\": 3,
\\"method\\": \\"GET\\",
\\"requestHeaders\\": {
\\"user-agent\\": \\"got (https://github.com/sindresorhus/got)\\",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of scope for this.. but... what if we send a user-agent that contains GatsbyJS and the current version? Could imagine that would increase the visibility in logs, and might even help in support tickets.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

\\"accept-encoding\\": \\"gzip, deflate, br\\"
}
}
---"
`)

expect(gotStream).toBeCalledTimes(3)
})
})
})
98 changes: 92 additions & 6 deletions packages/gatsby-core-utils/src/fetch-remote-file.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import got, { Headers, Options } from "got"
import got, { Headers, Options, RequestError } from "got"
import fileType from "file-type"
import path from "path"
import fs from "fs-extra"
Expand All @@ -8,7 +8,6 @@ import {
getRemoteFileExtension,
createFilePath,
} from "./filename-utils"

import type { IncomingMessage } from "http"
import type { GatsbyCache } from "gatsby"

Expand All @@ -22,6 +21,7 @@ export interface IFetchRemoteFileOptions {
httpHeaders?: Headers
ext?: string
name?: string
maxAttempts?: number
}

// copied from gatsby-worker
Expand All @@ -48,6 +48,28 @@ const INCOMPLETE_RETRY_LIMIT = process.env.GATSBY_INCOMPLETE_RETRY_LIMIT
? parseInt(process.env.GATSBY_INCOMPLETE_RETRY_LIMIT, 10)
: 3

// jest doesn't allow us to run all timings infinitely, so we set it 0 in tests
const BACKOFF_TIME = process.env.NODE_ENV === `test` ? 0 : 1000

function range(start: number, end: number): Array<number> {
return Array(end - start)
.fill(null)
.map((_, i) => start + i)
}

// Based on the defaults of https://github.com/JustinBeckwith/retry-axios
const STATUS_CODES_TO_RETRY = [...range(100, 200), 429, ...range(500, 600)]
axe312ger marked this conversation as resolved.
Show resolved Hide resolved
const ERROR_CODES_TO_RETRY = [
`ETIMEDOUT`,
`ECONNRESET`,
`EADDRINUSE`,
`ECONNREFUSED`,
`EPIPE`,
`ENOTFOUND`,
`ENETUNREACH`,
`EAI_AGAIN`,
]

let fetchCache = new Map()
let latestBuildId = ``

Expand Down Expand Up @@ -335,6 +357,9 @@ function requestRemoteNode(
// Fixes a bug in latest got where progress.total gets reset when stream ends, even if it wasn't complete.
let totalSize: number | null = null
responseStream.on(`downloadProgress`, progress => {
// reset the timeout on each progress event to make sure large files don't timeout
resetTimeout()
axe312ger marked this conversation as resolved.
Show resolved Hide resolved

if (
progress.total != null &&
(!totalSize || totalSize < progress.total)
Expand All @@ -359,9 +384,71 @@ function requestRemoteNode(
fsWriteStream.close()
fs.removeSync(tmpFilename)

process.nextTick(() => {
reject(error)
})
if (!(error instanceof RequestError)) {
return reject(error)
}

// This is a replacement for the stream retry logic of got
// till we can update all got instances to v12
// https://github.com/sindresorhus/got/blob/main/documentation/7-retry.md
// https://github.com/sindresorhus/got/blob/main/documentation/3-streams.md#retry
const statusCode = error.response?.statusCode
const errorCode = error.code || error.message // got gives error.code, but msw/node returns the error codes in the message only

if (
// HTTP STATUS CODE ERRORS
(statusCode && STATUS_CODES_TO_RETRY.includes(statusCode)) ||
// GENERAL NETWORK ERRORS
(errorCode && ERROR_CODES_TO_RETRY.includes(errorCode))
) {
if (attempt < INCOMPLETE_RETRY_LIMIT) {
setTimeout(() => {
resolve(
requestRemoteNode(
url,
headers,
tmpFilename,
httpOptions,
attempt + 1
)
)
}, BACKOFF_TIME * attempt)

return undefined
}
// Throw user friendly error
error.message = [
`Unable to fetch:`,
url,
`---`,
`Reason: ${error.message}`,
`---`,
].join(`\n`)

// Gather details about what went wrong from the error object and the request
const details = Object.entries({
attempt,
method: error.options?.method,
errorCode: error.code,
responseStatusCode: error.response?.statusCode,
responseStatusMessage: error.response?.statusMessage,
requestHeaders: error.options?.headers,
responseHeaders: error.response?.headers,
})
// Remove undefined values from the details to keep it clean
.reduce((a, [k, v]) => (v === undefined ? a : ((a[k] = v), a)), {})

if (Object.keys(details).length) {
error.message = [
error.message,
`Fetch details:`,
JSON.stringify(details, null, 2),
`---`,
].join(`\n`)
}
}

return reject(error)
})

responseStream.on(`response`, response => {
Expand Down Expand Up @@ -399,7 +486,6 @@ function requestRemoteNode(
)
}
}

return resolve(response)
})
})
Expand Down