diff --git a/add-on/src/lib/redirect-handler/baseRegexFilter.ts b/add-on/src/lib/redirect-handler/baseRegexFilter.ts new file mode 100644 index 000000000..eecfb46c1 --- /dev/null +++ b/add-on/src/lib/redirect-handler/baseRegexFilter.ts @@ -0,0 +1,104 @@ +export interface IRegexFilter { + originUrl: string + redirectUrl: string +} + +export interface IFilter { + regexFilter: string + regexSubstitution: string +} + +/** + * Base class for all regex filters. + */ +export class RegexFilter { + readonly _redirectUrl!: string + readonly _originUrl!: string + readonly originURL: URL + readonly redirectURL: URL + readonly originNS: string + readonly redirectNS: string + // by default we cannot handle the request. + private _canHandle = false + regexFilter!: string + regexSubstitution!: string + + constructor ({ originUrl, redirectUrl }: IRegexFilter) { + this._originUrl = originUrl + this._redirectUrl = redirectUrl + this.originURL = new URL(this._originUrl) + this.redirectURL = new URL(this._redirectUrl) + this.redirectNS = this.computeNamespaceFromUrl(this.redirectURL) + this.originNS = this.computeNamespaceFromUrl(this.originURL) + this.computeFilter() + this.normalizeRegexFilter() + } + + /** + * Getter for the originUrl provided at construction. + */ + get originUrl (): string { + return this._originUrl + } + + /** + * Getter for the redirectUrl provided at construction. + */ + get redirectUrl (): string { + return this._redirectUrl + } + + /** + * Getter for the canHandle flag. + */ + get canHandle (): boolean { + return this._canHandle + } + + /** + * Setter for the canHandle flag. + */ + set canHandle (value: boolean) { + this._canHandle = value + } + + /** + * Getter for the filter. This is the regex filter and substitution. + */ + get filter (): IFilter { + if (!this.canHandle) { + throw new Error('Cannot handle this request') + } + + return { + regexFilter: this.regexFilter, + regexSubstitution: this.regexSubstitution + } + } + + /** + * Compute the regex filter and substitution. + * This is the main method that needs to be implemented by subclasses. + */ + computeFilter (): void { + throw new Error('Method not implemented.') + } + + /** + * Normalize the regex filter. This is a helper method that can be used by subclasses. + */ + normalizeRegexFilter (): void { + this.regexFilter = this.regexFilter.replace(/https?\??/ig, 'https?') + } + + /** + * Compute the namespace from the URL. This finds the first path segment. + * e.g. http:////path/to/file/or/cid + * + * @param url URL + */ + computeNamespaceFromUrl ({ pathname }: URL): string { + // regex to match the first path segment. + return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase() + } +} diff --git a/add-on/src/lib/redirect-handler/blockOrObserve.ts b/add-on/src/lib/redirect-handler/blockOrObserve.ts index 04c00705e..9c07341fc 100644 --- a/add-on/src/lib/redirect-handler/blockOrObserve.ts +++ b/add-on/src/lib/redirect-handler/blockOrObserve.ts @@ -1,14 +1,17 @@ import debug from 'debug' import browser from 'webextension-polyfill' import { CompanionState } from '../../types/companion.js' -import isIPFS from 'is-ipfs' +import { IFilter, IRegexFilter, RegexFilter } from './baseRegexFilter.js' +import { CommonPatternRedirectRegexFilter } from './commonPatternRedirectRegexFilter.js' +import { NamespaceRedirectRegexFilter } from './namespaceRedirectRegexFilter.js' +import { SubdomainRedirectRegexFilter } from './subdomainRedirectRegexFilter.js' // this won't work in webworker context. Needs to be enabled manually // https://github.com/debug-js/debug/issues/916 const log = debug('ipfs-companion:redirect-handler:blockOrObserve') log.error = debug('ipfs-companion:redirect-handler:blockOrObserve:error') -const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns']) +export const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns']) export const GLOBAL_STATE_CHANGE = 'GLOBAL_STATE_CHANGE' export const GLOBAL_STATE_OPTION_CHANGE = 'GLOBAL_STATE_OPTION_CHANGE' @@ -35,7 +38,7 @@ interface messageToSelf { value?: string | Record } -const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})` +export const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})` // We need to check if the browser supports the declarativeNetRequest API. // TODO: replace with check for `Blocking` in `chrome.webRequest.OnBeforeRequestOptions` @@ -116,25 +119,13 @@ export function isLocalHost (url: string): boolean { * @param str URL string to escape * @returns */ -function escapeURLRegex (str: string): string { +export function escapeURLRegex (str: string): string { // these characters are allowed in the URL, but not in the regex. // eslint-disable-next-line no-useless-escape const ALLOWED_CHARS_URL_REGEX = /([:\/\?#\[\]@!$&'\(\ )\*\+,;=\-_\.~])/g return str.replace(ALLOWED_CHARS_URL_REGEX, '\\$1') } -/** - * Compute the namespace from the URL. This finds the first path segment. - * e.g. http:////path/to/file/or/cid - * - * @param url string - */ -function computeNamespaceFromUrl (url: string): string { - const { pathname } = new URL(url) - // regex to match the first path segment. - return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase() -} - /** * Construct a regex filter and substitution for a redirect. * @@ -142,112 +133,24 @@ function computeNamespaceFromUrl (url: string): string { * @param redirectUrl * @returns */ -function constructRegexFilter ({ originUrl, redirectUrl }: redirectHandlerInput): { - regexSubstitution: string - regexFilter: string -} { - let regexSubstitution = redirectUrl - let regexFilter = originUrl - const originURL = new URL(originUrl) - const redirectNS = computeNamespaceFromUrl(redirectUrl) - const originNS = computeNamespaceFromUrl(originUrl) - if (!DEFAULT_NAMESPACES.has(originNS) && DEFAULT_NAMESPACES.has(redirectNS)) { - // A redirect like https://github.com/ipfs/ipfs-companion/issues/1255 - regexFilter = `^${escapeURLRegex(regexFilter)}`.replace(/https?/ig, 'https?') - const origRegexFilter = regexFilter - - const [tld, root, ...subdomain] = originURL.hostname.split('.').reverse() - const staticUrl = [root, tld] - while (subdomain.length > 0) { - const subdomainPart = subdomain.shift() - const commonStaticUrlStart = `^${originURL.protocol}\\:\\/\\/` - const commonStaticUrlEnd = `\\.${escapeURLRegex(staticUrl.join('.'))}\\/${RULE_REGEX_ENDING}` - if (isIPFS.cid(subdomainPart as string)) { - // We didn't find a namespace, but we found a CID - // e.g. https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.on.fleek.co - regexFilter = `${commonStaticUrlStart}(.*?)${commonStaticUrlEnd}` - regexSubstitution = redirectUrl - .replace(subdomainPart as string, '\\1') // replace CID - .replace(new RegExp(`${originURL.pathname}?$`), '\\2') // replace path - - break - } - if (DEFAULT_NAMESPACES.has(subdomainPart as string)) { - // We found a namespace, this is going to match group 2, i.e. namespace. - // e.g https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.ipfs.dweb.link - regexFilter = `${commonStaticUrlStart}(.*?)\\.${defaultNSRegexStr}${commonStaticUrlEnd}` - - regexSubstitution = redirectUrl - .replace(subdomain.reverse().join('.'), '\\1') // replace subdomain or CID. - .replace(`/${subdomainPart as string}/`, '/\\2/') // replace namespace dynamically. - - const pathWithSearch = originURL.pathname + originURL.search - if (pathWithSearch !== '/') { - regexSubstitution = regexSubstitution.replace(pathWithSearch, '/\\3') // replace path - } else { - regexSubstitution += '\\3' - } - - break - } - // till we find a namespace or CID, we keep adding subdomains to the staticUrl. - staticUrl.unshift(subdomainPart as string) - } - - if (regexFilter !== origRegexFilter) { - // we found a valid regexFilter, so we can return. - return { regexSubstitution, regexFilter } - } else { - // we didn't find a valid regexFilter, so we can return the default. - regexFilter = originUrl - } - } - - // if the namespaces are the same, we can generate simpler regex. - // The only value that needs special handling is the `uri` param. - if ( - DEFAULT_NAMESPACES.has(originNS) && - DEFAULT_NAMESPACES.has(redirectNS) && - originNS === redirectNS && - originURL.searchParams.get('uri') == null - ) { - // A redirect like - // https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU - const [originFirst, originLast] = originUrl.split(`/${originNS}/`) - regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}` - .replace(/https?/ig, 'https?') - regexSubstitution = redirectUrl - .replace(`/${redirectNS}/`, '/\\1/') - .replace(originLast, '\\2') - return { regexSubstitution, regexFilter } - } - - // We can traverse the URL from the end, and find the first character that is different. - let commonIdx = 1 - while (commonIdx < Math.min(originUrl.length, redirectUrl.length)) { - if (originUrl[originUrl.length - commonIdx] !== redirectUrl[redirectUrl.length - commonIdx]) { - break +function constructRegexFilter ({ originUrl, redirectUrl }: IRegexFilter): IFilter { + // the order is very important here, because we want to match the best possible filter. + const filtersToTryInOrder: Array = [ + SubdomainRedirectRegexFilter, + NamespaceRedirectRegexFilter, + CommonPatternRedirectRegexFilter + ] + + for (const Filter of filtersToTryInOrder) { + const filter = new Filter({ originUrl, redirectUrl }) + if (filter.canHandle) { + return filter.filter } - commonIdx += 1 - } - - // We can now construct the regex filter and substitution. - regexSubstitution = redirectUrl.slice(0, redirectUrl.length - commonIdx + 1) + '\\1' - // We need to escape the characters that are allowed in the URL, but not in the regex. - const regexFilterFirst = escapeURLRegex(originUrl.slice(0, originUrl.length - commonIdx + 1)) - regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`.replace(/https?/ig, 'https?') - - // This method does not parse: - // originUrl: "https://awesome.ipfs.io/" - // redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/" - // that ends up with capturing all urls which we do not want. - if (regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) { - const subdomain = new URL(originUrl).hostname - regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}` - regexSubstitution = regexSubstitution.replace('\\1', `/${subdomain}\\1`) } - return { regexSubstitution, regexFilter } + // this is just to satisfy the compiler, this should never happen. Because CommonPatternRedirectRegexFilter can always + // handle. + return new CommonPatternRedirectRegexFilter({ originUrl, redirectUrl }).filter } // If the browser supports the declarativeNetRequest API, we can block the request. diff --git a/add-on/src/lib/redirect-handler/commonPatternRedirectRegexFilter.ts b/add-on/src/lib/redirect-handler/commonPatternRedirectRegexFilter.ts new file mode 100644 index 000000000..7de2e29ef --- /dev/null +++ b/add-on/src/lib/redirect-handler/commonPatternRedirectRegexFilter.ts @@ -0,0 +1,41 @@ +import { RegexFilter } from './baseRegexFilter.js' +import { RULE_REGEX_ENDING, escapeURLRegex } from './blockOrObserve.js' + +/** + * Handles redirects like: + * origin: '^https?\\:\\/\\/awesome\\.ipfs\\.io\\/(.*)' + * destination: 'http://localhost:8081/ipns/awesome.ipfs.io/$1' + */ +export class CommonPatternRedirectRegexFilter extends RegexFilter { + computeFilter (): void { + // this filter is the worst case scenario, we can handle any redirect. + this.canHandle = true + // We can traverse the URL from the end, and find the first character that is different. + let commonIdx = 1 + const leastLength = Math.min(this.originUrl.length, this.redirectUrl.length) + while (commonIdx < leastLength) { + if (this.originUrl[this.originUrl.length - commonIdx] !== this.redirectUrl[this.redirectUrl.length - commonIdx]) { + break + } + commonIdx += 1 + } + + // We can now construct the regex filter and substitution. + this.regexSubstitution = this.redirectUrl.slice(0, this.redirectUrl.length - commonIdx + 1) + '\\1' + // We need to escape the characters that are allowed in the URL, but not in the regex. + const regexFilterFirst = escapeURLRegex(this.originUrl.slice(0, this.originUrl.length - commonIdx + 1)) + this.regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}` + // calling normalize should add the protocol in the regexFilter. + this.normalizeRegexFilter() + + // This method does not parse: + // originUrl: "https://awesome.ipfs.io/" + // redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/" + // that ends up with capturing all urls which we do not want. + if (this.regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) { + const subdomain = new URL(this.originUrl).hostname + this.regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}` + this.regexSubstitution = this.regexSubstitution.replace('\\1', `/${subdomain}\\1`) + } + } +} diff --git a/add-on/src/lib/redirect-handler/namespaceRedirectRegexFilter.ts b/add-on/src/lib/redirect-handler/namespaceRedirectRegexFilter.ts new file mode 100644 index 000000000..7a5cdd4e5 --- /dev/null +++ b/add-on/src/lib/redirect-handler/namespaceRedirectRegexFilter.ts @@ -0,0 +1,25 @@ +import { RegexFilter } from './baseRegexFilter.js' +import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js' + +/** + * Handles namespace redirects like: + * origin: '^https?\\:\\/\\/ipfs\\.io\\/(ipfs|ipns)\\/(.*)' + * destination: 'http://localhost:8080/$1/$2' + */ +export class NamespaceRedirectRegexFilter extends RegexFilter { + computeFilter (): void { + this.canHandle = DEFAULT_NAMESPACES.has(this.originNS) && + DEFAULT_NAMESPACES.has(this.redirectNS) && + this.originNS === this.redirectNS && + this.originURL.searchParams.get('uri') == null + // if the namespaces are the same, we can generate simpler regex. + // The only value that needs special handling is the `uri` param. + // A redirect like + // https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU + const [originFirst, originLast] = this.originUrl.split(`/${this.originNS}/`) + this.regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}` + this.regexSubstitution = this.redirectUrl + .replace(`/${this.redirectNS}/`, '/\\1/') + .replace(originLast, '\\2') + } +} diff --git a/add-on/src/lib/redirect-handler/subdomainRedirectRegexFilter.ts b/add-on/src/lib/redirect-handler/subdomainRedirectRegexFilter.ts new file mode 100644 index 000000000..e8825d7f3 --- /dev/null +++ b/add-on/src/lib/redirect-handler/subdomainRedirectRegexFilter.ts @@ -0,0 +1,81 @@ +import isIPFS from 'is-ipfs' +import { IRegexFilter, RegexFilter } from './baseRegexFilter.js' +import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js' + +/** + * Handles subdomain redirects like: + * origin: '^https?\\:\\/\\/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy\\.ipfs\\.dweb\\.link' + * destination: 'http://localhost:8080/ipfs/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy' + */ +export class SubdomainRedirectRegexFilter extends RegexFilter { + constructor ({ originUrl, redirectUrl }: IRegexFilter) { + super({ originUrl, redirectUrl }) + } + + computeFilter (): void { + this.regexSubstitution = this.redirectUrl + this.regexFilter = this.originUrl + if (!DEFAULT_NAMESPACES.has(this.originNS) && DEFAULT_NAMESPACES.has(this.redirectNS)) { + // We'll use this to match the origin URL later. + this.regexFilter = `^${escapeURLRegex(this.regexFilter)}` + this.normalizeRegexFilter() + const origRegexFilter = this.regexFilter + // tld and root are known, we are just interested in the remainder of URL. + const [tld, root, ...urlParts] = this.originURL.hostname.split('.').reverse() + // can use the staticUrlParts to match the origin URL later. + const staticUrlParts = [root, tld] + // regex to match the start of the URL, this remains common. + const commonStaticUrlStart = escapeURLRegex(`^${this.originURL.protocol}//`) + // going though the subdomains to find a namespace or CID. + while (urlParts.length > 0) { + // get the urlPart at the 0th index and remove it from the array. + const subdomainPart = urlParts.shift() as string + // this needs to be computed for every iteration as the staticUrlParts changes + const commonStaticUrlEnd = `\\.${escapeURLRegex(staticUrlParts.join('.'))}\\/${RULE_REGEX_ENDING}` + + // check if the subdomainPart is a CID. + if (isIPFS.cid(subdomainPart)) { + // We didn't find a namespace, but we found a CID + // e.g. https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.on.fleek.co + this.regexFilter = `${commonStaticUrlStart}(.*?)${commonStaticUrlEnd}` + this.regexSubstitution = this._redirectUrl + .replace(subdomainPart, '\\1') // replace CID + .replace(new RegExp(`${this.originURL.pathname}?$`), '\\2') // replace path + + // no need to continue, we found a CID. + break + } + + // check if the subdomainPart is a namespace. + if (DEFAULT_NAMESPACES.has(subdomainPart)) { + // We found a namespace, this is going to match group 2, i.e. namespace. + // e.g https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.ipfs.dweb.link + this.regexFilter = `${commonStaticUrlStart}(.*?)\\.${defaultNSRegexStr}${commonStaticUrlEnd}` + + this.regexSubstitution = this._redirectUrl + .replace(urlParts.reverse().join('.'), '\\1') // replace urlParts or CID. + .replace(`/${subdomainPart}/`, '/\\2/') // replace namespace dynamically. + + const pathWithSearch = this.originURL.pathname + this.originURL.search + if (pathWithSearch !== '/') { + this.regexSubstitution = this.regexSubstitution.replace(pathWithSearch, '/\\3') // replace path + } else { + this.regexSubstitution += '\\3' + } + + // no need to continue, we found a namespace. + break + } + + // till we find a namespace or CID, we keep adding subdomains to the staticUrlParts. + staticUrlParts.unshift(subdomainPart) + } + + if (this.regexFilter !== origRegexFilter) { + // this means we constructed a regexFilter with dynamic parts, instead of the original regexFilter which was + // static. There might be other suited regexFilters in that case. + this.canHandle = true + } + } + } +} diff --git a/test/functional/lib/ipfs-request-gateway-redirect.test.js b/test/functional/lib/ipfs-request-gateway-redirect.test.js index a2f9582d0..ea6f91278 100644 --- a/test/functional/lib/ipfs-request-gateway-redirect.test.js +++ b/test/functional/lib/ipfs-request-gateway-redirect.test.js @@ -327,7 +327,7 @@ describe(`[${manifestVersion}] gateway-redirect:`, function () { modifiedRequestCallResp: await modifyRequest.onBeforeRequest(request), MV2Expectation: `http://localhost:8080/ipfs/${cid}/`, MV3Expectation: { - origin: '^https:\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.dweb\\.link\\/', + origin: '^https?\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.dweb\\.link\\/', destination: 'http://localhost:8080/\\2/\\1/\\3' } }) @@ -340,7 +340,7 @@ describe(`[${manifestVersion}] gateway-redirect:`, function () { modifiedRequestCallResp: await modifyRequest.onBeforeRequest(request), MV2Expectation: `http://localhost:8080/ipfs/${cid}/`, MV3Expectation: { - origin: '^https:\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.cf\\-ipfs\\.com\\/', + origin: '^https?\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.cf\\-ipfs\\.com\\/', destination: 'http://localhost:8080/\\2/\\1/\\3' } }) @@ -353,7 +353,7 @@ describe(`[${manifestVersion}] gateway-redirect:`, function () { modifiedRequestCallResp: await modifyRequest.onBeforeRequest(request), MV2Expectation: 'http://localhost:8080/ipfs/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy/%3Ffilename=test.jpg?arg=val', MV3Expectation: { - origin: '^https:\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.dweb\\.link\\/', + origin: '^https?\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.dweb\\.link\\/', destination: 'http://localhost:8080/\\2/\\1/\\3' } }) @@ -366,7 +366,7 @@ describe(`[${manifestVersion}] gateway-redirect:`, function () { modifiedRequestCallResp: await modifyRequest.onBeforeRequest(request), MV2Expectation: `http://localhost:8080/ipns/${peerid}/`, MV3Expectation: { - origin: '^https:\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.dweb\\.link\\/', + origin: '^https?\\:\\/\\/(.*?)\\.(ipfs|ipns)\\.dweb\\.link\\/', destination: 'http://localhost:8080/\\2/\\1/\\3' } })