Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(rules): Better Redirect Rules #1256

Merged
merged 20 commits into from
Aug 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
7f68ab4
fix(mv3): :wrench: Modifying the default local redirect behaviour.
whizzzkid Aug 4, 2023
d94b9bb
Merge branch 'rc/3.0-mv3' into fix/default-rules
whizzzkid Aug 4, 2023
a817045
Merge branch 'rc/3.0-mv3' into fix/default-rules
whizzzkid Aug 4, 2023
045e660
fix(mv3): :wrench: Modifying the default local redirect behaviour.
whizzzkid Aug 4, 2023
f6561b9
Merge remote-tracking branch 'refs/remotes/origin/fix/default-rules' …
whizzzkid Aug 4, 2023
c020638
Merge branch 'feat/redirection-tests' into fix/default-rules
whizzzkid Aug 4, 2023
fc085b5
fix(mv3): :bug: Making rules less greedy
whizzzkid Aug 10, 2023
007f41f
fix(mv3): :sparkles: Dynamic Rules for subdomain gateways.
whizzzkid Aug 11, 2023
f18579b
fix(types): Adding ambient types for is-ipfs.
whizzzkid Aug 11, 2023
3e72d36
fix(test):
whizzzkid Aug 12, 2023
d36a282
fix(test): helper
whizzzkid Aug 12, 2023
6ee2d31
feat(mv3): less greedy rules
whizzzkid Aug 12, 2023
e592755
feat: Adding simpler regex for redirects from similar namespaces.
whizzzkid Aug 12, 2023
5c85d84
fix(lint): :rotating_light: Warnings
whizzzkid Aug 12, 2023
fca5fe2
feat(mv3): Better Default Rules (#1260)
whizzzkid Aug 15, 2023
832679d
Update add-on/src/lib/redirect-handler/blockOrObserve.ts
whizzzkid Aug 16, 2023
5e22cea
fix(docs): :pencil2: Adding comments
whizzzkid Aug 16, 2023
dbc672c
refactor(regexfilters): Better Structure and Readability (#1261)
whizzzkid Aug 25, 2023
381f63c
fix(mv3): no blanket redirect for subdomains without namespaces.
whizzzkid Aug 25, 2023
4020796
fix(lint): unused import
whizzzkid Aug 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions add-on/src/lib/redirect-handler/baseRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
export interface IRegexFilter {
originUrl: string
redirectUrl: string
}

export interface IFilter {
regexFilter: string
regexSubstitution: string
}

/**
* Base class for all regex filters.
*/
export class RegexFilter {
readonly _redirectUrl!: string
readonly _originUrl!: string
readonly originURL: URL
readonly redirectURL: URL
readonly originNS: string
readonly redirectNS: string
// by default we cannot handle the request.
private _canHandle = false
regexFilter!: string
regexSubstitution!: string

constructor ({ originUrl, redirectUrl }: IRegexFilter) {
this._originUrl = originUrl
this._redirectUrl = redirectUrl
this.originURL = new URL(this._originUrl)
this.redirectURL = new URL(this._redirectUrl)
this.redirectNS = this.computeNamespaceFromUrl(this.redirectURL)
this.originNS = this.computeNamespaceFromUrl(this.originURL)
this.computeFilter()
this.normalizeRegexFilter()
}

/**
* Getter for the originUrl provided at construction.
*/
get originUrl (): string {
return this._originUrl
}

/**
* Getter for the redirectUrl provided at construction.
*/
get redirectUrl (): string {
return this._redirectUrl
}

/**
* Getter for the canHandle flag.
*/
get canHandle (): boolean {
return this._canHandle
}

/**
* Setter for the canHandle flag.
*/
set canHandle (value: boolean) {
this._canHandle = value
}

/**
* Getter for the filter. This is the regex filter and substitution.
*/
get filter (): IFilter {
if (!this.canHandle) {
throw new Error('Cannot handle this request')
}

return {
regexFilter: this.regexFilter,
regexSubstitution: this.regexSubstitution
}
}

/**
* Compute the regex filter and substitution.
* This is the main method that needs to be implemented by subclasses.
*/
computeFilter (): void {
throw new Error('Method not implemented.')
}

/**
* Normalize the regex filter. This is a helper method that can be used by subclasses.
*/
normalizeRegexFilter (): void {
this.regexFilter = this.regexFilter.replace(/https?\??/ig, 'https?')
}

/**
* Compute the namespace from the URL. This finds the first path segment.
* e.g. http://<gateway>/<namespace>/path/to/file/or/cid
*
* @param url URL
*/
computeNamespaceFromUrl ({ pathname }: URL): string {
// regex to match the first path segment.
return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase()
}
}
76 changes: 40 additions & 36 deletions add-on/src/lib/redirect-handler/blockOrObserve.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import debug from 'debug'
import browser from 'webextension-polyfill'
import { CompanionState } from '../../types/companion.js'
import { IFilter, IRegexFilter, RegexFilter } from './baseRegexFilter.js'
import { CommonPatternRedirectRegexFilter } from './commonPatternRedirectRegexFilter.js'
import { NamespaceRedirectRegexFilter } from './namespaceRedirectRegexFilter.js'
import { SubdomainRedirectRegexFilter } from './subdomainRedirectRegexFilter.js'

// this won't work in webworker context. Needs to be enabled manually
// https://github.com/debug-js/debug/issues/916
const log = debug('ipfs-companion:redirect-handler:blockOrObserve')
log.error = debug('ipfs-companion:redirect-handler:blockOrObserve:error')

export const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns'])

export const GLOBAL_STATE_CHANGE = 'GLOBAL_STATE_CHANGE'
export const GLOBAL_STATE_OPTION_CHANGE = 'GLOBAL_STATE_OPTION_CHANGE'
export const DELETE_RULE_REQUEST = 'DELETE_RULE_REQUEST'
export const DELETE_RULE_REQUEST_SUCCESS = 'DELETE_RULE_REQUEST_SUCCESS'

// We need to match the rest of the URL, so we can use a wildcard.
export const RULE_REGEX_ENDING = '((?:[^\\.]|$).*)$'

interface regexFilterMap {
Expand All @@ -21,6 +29,7 @@ interface regexFilterMap {
interface redirectHandlerInput {
originUrl: string
redirectUrl: string
getPort: (state: CompanionState) => string
}

type messageToSelfType = typeof GLOBAL_STATE_CHANGE | typeof GLOBAL_STATE_OPTION_CHANGE | typeof DELETE_RULE_REQUEST
Expand All @@ -29,6 +38,8 @@ interface messageToSelf {
value?: string | Record<string, unknown>
}

export const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})`

// We need to check if the browser supports the declarativeNetRequest API.
// TODO: replace with check for `Blocking` in `chrome.webRequest.OnBeforeRequestOptions`
// which is currently a bug https://bugs.chromium.org/p/chromium/issues/detail?id=1427952
Expand Down Expand Up @@ -75,11 +86,18 @@ const savedRegexFilters: Map<string, regexFilterMap> = new Map()
const DEFAULT_LOCAL_RULES: redirectHandlerInput[] = [
{
originUrl: 'http://127.0.0.1',
redirectUrl: 'http://localhost'
redirectUrl: 'http://localhost',
getPort: ({ gwURLString }): string => new URL(gwURLString).port
},
{
originUrl: 'http://[::1]',
redirectUrl: 'http://localhost'
redirectUrl: 'http://localhost',
getPort: ({ gwURLString }): string => new URL(gwURLString).port
},
{
originUrl: 'http://localhost',
redirectUrl: 'http://127.0.0.1',
getPort: ({ apiURL }): string => new URL(apiURL).port
}
]

Expand All @@ -101,7 +119,7 @@ export function isLocalHost (url: string): boolean {
* @param str URL string to escape
* @returns
*/
function escapeURLRegex (str: string): string {
export function escapeURLRegex (str: string): string {
// these characters are allowed in the URL, but not in the regex.
// eslint-disable-next-line no-useless-escape
const ALLOWED_CHARS_URL_REGEX = /([:\/\?#\[\]@!$&'\(\ )\*\+,;=\-_\.~])/g
Expand All @@ -115,38 +133,24 @@ function escapeURLRegex (str: string): string {
* @param redirectUrl
* @returns
*/
function constructRegexFilter ({ originUrl, redirectUrl }: redirectHandlerInput): {
regexSubstitution: string
regexFilter: string
} {
// We can traverse the URL from the end, and find the first character that is different.
let commonIdx = 1
while (commonIdx < Math.min(originUrl.length, redirectUrl.length)) {
if (originUrl[originUrl.length - commonIdx] !== redirectUrl[redirectUrl.length - commonIdx]) {
break
function constructRegexFilter ({ originUrl, redirectUrl }: IRegexFilter): IFilter {
// the order is very important here, because we want to match the best possible filter.
const filtersToTryInOrder: Array<typeof RegexFilter> = [
SubdomainRedirectRegexFilter,
NamespaceRedirectRegexFilter,
CommonPatternRedirectRegexFilter
]

for (const Filter of filtersToTryInOrder) {
const filter = new Filter({ originUrl, redirectUrl })
if (filter.canHandle) {
return filter.filter
}
commonIdx += 1
}

// We can now construct the regex filter and substitution.
let regexSubstitution = redirectUrl.slice(0, redirectUrl.length - commonIdx + 1) + '\\1'
// We need to escape the characters that are allowed in the URL, but not in the regex.
const regexFilterFirst = escapeURLRegex(originUrl.slice(0, originUrl.length - commonIdx + 1))
// We need to match the rest of the URL, so we can use a wildcard.
const RULE_REGEX_ENDING = '((?:[^\\.]|$).*)$'
let regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`.replace(/https?/ig, 'https?')

// This method does not parse:
// originUrl: "https://awesome.ipfs.io/"
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/"
// that ends up with capturing all urls which we do not want.
if (regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) {
const subdomain = new URL(originUrl).hostname
regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}`
regexSubstitution = regexSubstitution.replace('\\1', `/${subdomain}\\1`)
}

return { regexSubstitution, regexFilter }
// this is just to satisfy the compiler, this should never happen. Because CommonPatternRedirectRegexFilter can always
// handle.
return new CommonPatternRedirectRegexFilter({ originUrl, redirectUrl }).filter
}

// If the browser supports the declarativeNetRequest API, we can block the request.
Expand Down Expand Up @@ -248,10 +252,10 @@ async function reconcileRulesAndRemoveOld (state: CompanionState): Promise<void>
}

// make sure that the default rules are added.
for (const { originUrl, redirectUrl } of DEFAULT_LOCAL_RULES) {
const { port } = new URL(state.gwURLString)
const regexFilter = `^${escapeURLRegex(`${originUrl}:${port}`)}(.*)$`
const regexSubstitution = `${redirectUrl}:${port}\\1`
for (const { originUrl, redirectUrl, getPort } of DEFAULT_LOCAL_RULES) {
const port = getPort(state)
const regexFilter = `^${escapeURLRegex(`${originUrl}:${port}`)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}`
const regexSubstitution = `${redirectUrl}:${port}/\\1/\\2`

if (!savedRegexFilters.has(regexFilter)) {
// We need to add the new rule.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { RegexFilter } from './baseRegexFilter.js'
import { RULE_REGEX_ENDING, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles redirects like:
* origin: '^https?\\:\\/\\/awesome\\.ipfs\\.io\\/(.*)'
* destination: 'http://localhost:8081/ipns/awesome.ipfs.io/$1'
*/
export class CommonPatternRedirectRegexFilter extends RegexFilter {
computeFilter (): void {
// this filter is the worst case scenario, we can handle any redirect.
this.canHandle = true
// We can traverse the URL from the end, and find the first character that is different.
let commonIdx = 1
const leastLength = Math.min(this.originUrl.length, this.redirectUrl.length)
while (commonIdx < leastLength) {
if (this.originUrl[this.originUrl.length - commonIdx] !== this.redirectUrl[this.redirectUrl.length - commonIdx]) {
break
}
commonIdx += 1
}

// We can now construct the regex filter and substitution.
this.regexSubstitution = this.redirectUrl.slice(0, this.redirectUrl.length - commonIdx + 1) + '\\1'
// We need to escape the characters that are allowed in the URL, but not in the regex.
const regexFilterFirst = escapeURLRegex(this.originUrl.slice(0, this.originUrl.length - commonIdx + 1))
this.regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`
// calling normalize should add the protocol in the regexFilter.
this.normalizeRegexFilter()

// This method does not parse:
// originUrl: "https://awesome.ipfs.io/"
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/"
// that ends up with capturing all urls which we do not want.
if (this.regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) {
const subdomain = new URL(this.originUrl).hostname
this.regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}`
this.regexSubstitution = this.regexSubstitution.replace('\\1', `/${subdomain}\\1`)
}
}
}
25 changes: 25 additions & 0 deletions add-on/src/lib/redirect-handler/namespaceRedirectRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { RegexFilter } from './baseRegexFilter.js'
import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles namespace redirects like:
* origin: '^https?\\:\\/\\/ipfs\\.io\\/(ipfs|ipns)\\/(.*)'
* destination: 'http://localhost:8080/$1/$2'
*/
export class NamespaceRedirectRegexFilter extends RegexFilter {
computeFilter (): void {
this.canHandle = DEFAULT_NAMESPACES.has(this.originNS) &&
DEFAULT_NAMESPACES.has(this.redirectNS) &&
this.originNS === this.redirectNS &&
this.originURL.searchParams.get('uri') == null
// if the namespaces are the same, we can generate simpler regex.
// The only value that needs special handling is the `uri` param.
// A redirect like
// https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU
const [originFirst, originLast] = this.originUrl.split(`/${this.originNS}/`)
this.regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}`
this.regexSubstitution = this.redirectUrl
.replace(`/${this.redirectNS}/`, '/\\1/')
.replace(originLast, '\\2')
}
}
70 changes: 70 additions & 0 deletions add-on/src/lib/redirect-handler/subdomainRedirectRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { IRegexFilter, RegexFilter } from './baseRegexFilter.js'
import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles subdomain redirects like:
* origin: '^https?\\:\\/\\/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy\\.ipfs\\.dweb\\.link'
* destination: 'http://localhost:8080/ipfs/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy'
*/
export class SubdomainRedirectRegexFilter extends RegexFilter {
constructor ({ originUrl, redirectUrl }: IRegexFilter) {
super({ originUrl, redirectUrl })
}

computeFilter (): void {
this.regexSubstitution = this.redirectUrl
this.regexFilter = this.originUrl
if (!DEFAULT_NAMESPACES.has(this.originNS) && DEFAULT_NAMESPACES.has(this.redirectNS)) {
// We'll use this to match the origin URL later.
this.regexFilter = `^${escapeURLRegex(this.regexFilter)}`
this.normalizeRegexFilter()
const origRegexFilter = this.regexFilter
// tld and root are known, we are just interested in the remainder of URL.
const [tld, root, ...urlParts] = this.originURL.hostname.split('.').reverse()
// can use the staticUrlParts to match the origin URL later.
const staticUrlParts = [root, tld]
// regex to match the start of the URL, this remains common.
const commonStaticUrlStart = escapeURLRegex(`^${this.originURL.protocol}//`)
// going though the subdomains to find a namespace or CID.
while (urlParts.length > 0) {
// get the urlPart at the 0th index and remove it from the array.
const subdomainPart = urlParts.shift() as string
// this needs to be computed for every iteration as the staticUrlParts changes
const commonStaticUrlEnd = `\\.${escapeURLRegex(staticUrlParts.join('.'))}\\/${RULE_REGEX_ENDING}`

// this does not work for subdomains where namespace is not provided.
// e.g. https://helia-identify.on.fleek.co/
// e.g. https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.on.fleek.co/
// check if the subdomainPart is a namespace.
if (DEFAULT_NAMESPACES.has(subdomainPart)) {
// We found a namespace, this is going to match group 2, i.e. namespace.
// e.g https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.ipfs.dweb.link
this.regexFilter = `${commonStaticUrlStart}(.*?)\\.${defaultNSRegexStr}${commonStaticUrlEnd}`

this.regexSubstitution = this._redirectUrl
.replace(urlParts.reverse().join('.'), '\\1') // replace urlParts or CID.
.replace(`/${subdomainPart}/`, '/\\2/') // replace namespace dynamically.

const pathWithSearch = this.originURL.pathname + this.originURL.search
if (pathWithSearch !== '/') {
this.regexSubstitution = this.regexSubstitution.replace(pathWithSearch, '/\\3') // replace path
} else {
this.regexSubstitution += '\\3'
}

// no need to continue, we found a namespace.
break
}

// till we find a namespace or CID, we keep adding subdomains to the staticUrlParts.
staticUrlParts.unshift(subdomainPart)
}

if (this.regexFilter !== origRegexFilter) {
// this means we constructed a regexFilter with dynamic parts, instead of the original regexFilter which was
// static. There might be other suited regexFilters in that case.
this.canHandle = true
}
}
}
}
3 changes: 3 additions & 0 deletions add-on/src/types/global.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
declare module 'is-ipfs' {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

function cid (value: string): boolean
}
Loading