Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: decode percent-encoded path in getPath #2714

Merged
merged 10 commits into from
May 22, 2024
54 changes: 50 additions & 4 deletions benchmarks/utils/src/get-path.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,61 @@ bench('noop', () => {})
const request = new Request('http://localhost/about/me')

group('getPath', () => {
bench('slice + indexOf', () => {
bench('slice + indexOf : w/o decodeURI', () => {
const url = request.url
const queryIndex = url.indexOf('?', 8)
url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
})

bench('regexp', () => {
bench('regexp : w/o decodeURI', () => {
const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/)
match ? match[1] : ''
return match ? match[1] : ''
})

bench('slice + indexOf', () => {
const url = request.url
const queryIndex = url.indexOf('?', 8)
const path = url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
return path.includes('%') ? decodeURIComponent(path) : path
})

bench('slice + for-loop + flag', () => {
const url = request.url
let start = url.indexOf('/', 8)
let i = start
let hasPercentEncoding = false
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
hasPercentEncoding = true
} else if (charCode === 63) {
// '?'
break
}
}
return hasPercentEncoding ? decodeURIComponent(url.slice(start, i)) : url.slice(start, i)
})

bench('slice + for-loop + immediate return', () => {
const url = request.url
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return decodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
})
})

Expand Down
43 changes: 40 additions & 3 deletions deno_dist/utils/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
return null
}

/**
* Try to apply decodeURI() to given string.
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
* @param str The string to decode.
* @returns The decoded string that sometimes contains undecodable percent encoding.
* @example
* tryDecodeURI('Hello%20World') // 'Hello World'
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
*/
const tryDecodeURI = (str: string): string => {
try {
return decodeURI(str)
} catch {
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
try {
return decodeURI(match)
} catch {
return match
}
})
}
}

export const getPath = (request: Request): string => {
// Optimized: indexOf() + slice() is faster than RegExp
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
}

export const getQueryStrings = (url: string): string => {
Expand Down
83 changes: 83 additions & 0 deletions src/hono.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,89 @@ describe('Routing', () => {
expect(res.status).toBe(404)
})
})

describe('Encoded path', () => {
let app: Hono
beforeEach(() => {
app = new Hono()
})

it('should decode path parameter', async () => {
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))

const res = await app.request('http://localhost/users/%C3%A7awa%20y%C3%AE%3F')
expect(res.status).toBe(200)
expect(await res.text()).toBe('id is çawa yî?')
})

it('should decode "/"', async () => {
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))

const res = await app.request('http://localhost/users/hono%2Fposts') // %2F is '/'
expect(res.status).toBe(200)
expect(await res.text()).toBe('id is hono/posts')
})

it('should decode alphabets', async () => {
app.get('/users/static', (c) => c.text('static'))

const res = await app.request('http://localhost/users/%73tatic') // %73 is 's'
expect(res.status).toBe(200)
expect(await res.text()).toBe('static')
})

it('should decode alphabets with invalid UTF-8 sequence', async () => {
app.get('/static/:path', (c) => {
try {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
} catch (e) {
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
}
})

const res = await app.request('http://localhost/%73tatic/%A4%A2') // %73 is 's', %A4%A2 is invalid UTF-8 sequence
expect(res.status).toBe(200)
expect(await res.text()).toBe('by c.req.url: %A4%A2')
})

it('should decode alphabets with invalid percent encoding', async () => {
app.get('/static/:path', (c) => {
try {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
} catch (e) {
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
}
})

const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
expect(res.status).toBe(200)
expect(await res.text()).toBe('by c.req.url: %a')
})

it('should be able to catch URIError', async () => {
app.onError((err, c) => {
if (err instanceof URIError) {
return c.text(err.message, 400)
}
throw err
})
app.get('/static/:path', (c) => {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
})

const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
expect(res.status).toBe(400)
expect(await res.text()).toBe('URI malformed')
})

it('should not double decode', async () => {
app.get('/users/:id', (c) => c.text(`posts of ${c.req.param('id')}`))

const res = await app.request('http://localhost/users/%2525') // %25 is '%'
expect(res.status).toBe(200)
expect(await res.text()).toBe('posts of %25')
})
})
})

describe('param and query', () => {
Expand Down
43 changes: 40 additions & 3 deletions src/utils/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
return null
}

/**
* Try to apply decodeURI() to given string.
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
* @param str The string to decode.
* @returns The decoded string that sometimes contains undecodable percent encoding.
* @example
* tryDecodeURI('Hello%20World') // 'Hello World'
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
*/
const tryDecodeURI = (str: string): string => {
try {
return decodeURI(str)
} catch {
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
try {
return decodeURI(match)
} catch {
return match
}
})
}
}

export const getPath = (request: Request): string => {
// Optimized: indexOf() + slice() is faster than RegExp
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
}

export const getQueryStrings = (url: string): string => {
Expand Down