-
Notifications
You must be signed in to change notification settings - Fork 327
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor(regexfilters): Better Structure and Readability #1261
Changes from all commits
43478d6
fe40f59
7a631b6
d5d47d7
05c979f
7f78475
be5a7da
df85418
524b9bf
8261aec
67bbd4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
export interface IRegexFilter { | ||
originUrl: string | ||
redirectUrl: string | ||
} | ||
|
||
export interface IFilter { | ||
regexFilter: string | ||
regexSubstitution: string | ||
} | ||
|
||
/** | ||
* Base class for all regex filters. | ||
*/ | ||
export class RegexFilter { | ||
readonly _redirectUrl!: string | ||
readonly _originUrl!: string | ||
readonly originURL: URL | ||
readonly redirectURL: URL | ||
readonly originNS: string | ||
readonly redirectNS: string | ||
// by default we cannot handle the request. | ||
private _canHandle = false | ||
regexFilter!: string | ||
regexSubstitution!: string | ||
|
||
constructor ({ originUrl, redirectUrl }: IRegexFilter) { | ||
this._originUrl = originUrl | ||
this._redirectUrl = redirectUrl | ||
this.originURL = new URL(this._originUrl) | ||
this.redirectURL = new URL(this._redirectUrl) | ||
this.redirectNS = this.computeNamespaceFromUrl(this.redirectURL) | ||
this.originNS = this.computeNamespaceFromUrl(this.originURL) | ||
this.computeFilter() | ||
this.normalizeRegexFilter() | ||
} | ||
|
||
/** | ||
* Getter for the originUrl provided at construction. | ||
*/ | ||
get originUrl (): string { | ||
return this._originUrl | ||
} | ||
|
||
/** | ||
* Getter for the redirectUrl provided at construction. | ||
*/ | ||
get redirectUrl (): string { | ||
return this._redirectUrl | ||
} | ||
|
||
/** | ||
* Getter for the canHandle flag. | ||
*/ | ||
get canHandle (): boolean { | ||
return this._canHandle | ||
} | ||
|
||
/** | ||
* Setter for the canHandle flag. | ||
*/ | ||
set canHandle (value: boolean) { | ||
this._canHandle = value | ||
} | ||
|
||
/** | ||
* Getter for the filter. This is the regex filter and substitution. | ||
*/ | ||
get filter (): IFilter { | ||
if (!this.canHandle) { | ||
throw new Error('Cannot handle this request') | ||
} | ||
|
||
return { | ||
regexFilter: this.regexFilter, | ||
regexSubstitution: this.regexSubstitution | ||
} | ||
} | ||
|
||
/** | ||
* Compute the regex filter and substitution. | ||
* This is the main method that needs to be implemented by subclasses. | ||
*/ | ||
computeFilter (): void { | ||
throw new Error('Method not implemented.') | ||
} | ||
|
||
/** | ||
* Normalize the regex filter. This is a helper method that can be used by subclasses. | ||
*/ | ||
normalizeRegexFilter (): void { | ||
this.regexFilter = this.regexFilter.replace(/https?\??/ig, 'https?') | ||
} | ||
|
||
/** | ||
* Compute the namespace from the URL. This finds the first path segment. | ||
* e.g. http://<gateway>/<namespace>/path/to/file/or/cid | ||
* | ||
* @param url URL | ||
*/ | ||
computeNamespaceFromUrl ({ pathname }: URL): string { | ||
// regex to match the first path segment. | ||
return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase() | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,17 @@ | ||
import debug from 'debug' | ||
import browser from 'webextension-polyfill' | ||
import { CompanionState } from '../../types/companion.js' | ||
import isIPFS from 'is-ipfs' | ||
import { IFilter, IRegexFilter, RegexFilter } from './baseRegexFilter.js' | ||
import { CommonPatternRedirectRegexFilter } from './commonPatternRedirectRegexFilter.js' | ||
import { NamespaceRedirectRegexFilter } from './namespaceRedirectRegexFilter.js' | ||
import { SubdomainRedirectRegexFilter } from './subdomainRedirectRegexFilter.js' | ||
|
||
// this won't work in webworker context. Needs to be enabled manually | ||
// https://github.com/debug-js/debug/issues/916 | ||
const log = debug('ipfs-companion:redirect-handler:blockOrObserve') | ||
log.error = debug('ipfs-companion:redirect-handler:blockOrObserve:error') | ||
|
||
const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns']) | ||
export const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns']) | ||
|
||
export const GLOBAL_STATE_CHANGE = 'GLOBAL_STATE_CHANGE' | ||
export const GLOBAL_STATE_OPTION_CHANGE = 'GLOBAL_STATE_OPTION_CHANGE' | ||
|
@@ -35,7 +38,7 @@ interface messageToSelf { | |
value?: string | Record<string, unknown> | ||
} | ||
|
||
const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})` | ||
export const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})` | ||
|
||
// We need to check if the browser supports the declarativeNetRequest API. | ||
// TODO: replace with check for `Blocking` in `chrome.webRequest.OnBeforeRequestOptions` | ||
|
@@ -116,138 +119,38 @@ export function isLocalHost (url: string): boolean { | |
* @param str URL string to escape | ||
* @returns | ||
*/ | ||
function escapeURLRegex (str: string): string { | ||
export function escapeURLRegex (str: string): string { | ||
// these characters are allowed in the URL, but not in the regex. | ||
// eslint-disable-next-line no-useless-escape | ||
const ALLOWED_CHARS_URL_REGEX = /([:\/\?#\[\]@!$&'\(\ )\*\+,;=\-_\.~])/g | ||
return str.replace(ALLOWED_CHARS_URL_REGEX, '\\$1') | ||
} | ||
|
||
/** | ||
* Compute the namespace from the URL. This finds the first path segment. | ||
* e.g. http://<gateway>/<namespace>/path/to/file/or/cid | ||
* | ||
* @param url string | ||
*/ | ||
function computeNamespaceFromUrl (url: string): string { | ||
const { pathname } = new URL(url) | ||
// regex to match the first path segment. | ||
return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase() | ||
} | ||
|
||
/** | ||
* Construct a regex filter and substitution for a redirect. | ||
* | ||
* @param originUrl | ||
* @param redirectUrl | ||
* @returns | ||
*/ | ||
function constructRegexFilter ({ originUrl, redirectUrl }: redirectHandlerInput): { | ||
regexSubstitution: string | ||
regexFilter: string | ||
} { | ||
let regexSubstitution = redirectUrl | ||
let regexFilter = originUrl | ||
const originURL = new URL(originUrl) | ||
const redirectNS = computeNamespaceFromUrl(redirectUrl) | ||
const originNS = computeNamespaceFromUrl(originUrl) | ||
if (!DEFAULT_NAMESPACES.has(originNS) && DEFAULT_NAMESPACES.has(redirectNS)) { | ||
// A redirect like https://github.com/ipfs/ipfs-companion/issues/1255 | ||
regexFilter = `^${escapeURLRegex(regexFilter)}`.replace(/https?/ig, 'https?') | ||
const origRegexFilter = regexFilter | ||
|
||
const [tld, root, ...subdomain] = originURL.hostname.split('.').reverse() | ||
const staticUrl = [root, tld] | ||
while (subdomain.length > 0) { | ||
const subdomainPart = subdomain.shift() | ||
const commonStaticUrlStart = `^${originURL.protocol}\\:\\/\\/` | ||
const commonStaticUrlEnd = `\\.${escapeURLRegex(staticUrl.join('.'))}\\/${RULE_REGEX_ENDING}` | ||
if (isIPFS.cid(subdomainPart as string)) { | ||
// We didn't find a namespace, but we found a CID | ||
// e.g. https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.on.fleek.co | ||
regexFilter = `${commonStaticUrlStart}(.*?)${commonStaticUrlEnd}` | ||
regexSubstitution = redirectUrl | ||
.replace(subdomainPart as string, '\\1') // replace CID | ||
.replace(new RegExp(`${originURL.pathname}?$`), '\\2') // replace path | ||
|
||
break | ||
} | ||
if (DEFAULT_NAMESPACES.has(subdomainPart as string)) { | ||
// We found a namespace, this is going to match group 2, i.e. namespace. | ||
// e.g https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.ipfs.dweb.link | ||
regexFilter = `${commonStaticUrlStart}(.*?)\\.${defaultNSRegexStr}${commonStaticUrlEnd}` | ||
|
||
regexSubstitution = redirectUrl | ||
.replace(subdomain.reverse().join('.'), '\\1') // replace subdomain or CID. | ||
.replace(`/${subdomainPart as string}/`, '/\\2/') // replace namespace dynamically. | ||
|
||
const pathWithSearch = originURL.pathname + originURL.search | ||
if (pathWithSearch !== '/') { | ||
regexSubstitution = regexSubstitution.replace(pathWithSearch, '/\\3') // replace path | ||
} else { | ||
regexSubstitution += '\\3' | ||
} | ||
|
||
break | ||
} | ||
// till we find a namespace or CID, we keep adding subdomains to the staticUrl. | ||
staticUrl.unshift(subdomainPart as string) | ||
} | ||
|
||
if (regexFilter !== origRegexFilter) { | ||
// we found a valid regexFilter, so we can return. | ||
return { regexSubstitution, regexFilter } | ||
} else { | ||
// we didn't find a valid regexFilter, so we can return the default. | ||
regexFilter = originUrl | ||
} | ||
} | ||
|
||
// if the namespaces are the same, we can generate simpler regex. | ||
// The only value that needs special handling is the `uri` param. | ||
if ( | ||
DEFAULT_NAMESPACES.has(originNS) && | ||
DEFAULT_NAMESPACES.has(redirectNS) && | ||
originNS === redirectNS && | ||
originURL.searchParams.get('uri') == null | ||
) { | ||
// A redirect like | ||
// https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU | ||
const [originFirst, originLast] = originUrl.split(`/${originNS}/`) | ||
regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}` | ||
.replace(/https?/ig, 'https?') | ||
regexSubstitution = redirectUrl | ||
.replace(`/${redirectNS}/`, '/\\1/') | ||
.replace(originLast, '\\2') | ||
return { regexSubstitution, regexFilter } | ||
} | ||
|
||
// We can traverse the URL from the end, and find the first character that is different. | ||
let commonIdx = 1 | ||
while (commonIdx < Math.min(originUrl.length, redirectUrl.length)) { | ||
if (originUrl[originUrl.length - commonIdx] !== redirectUrl[redirectUrl.length - commonIdx]) { | ||
break | ||
function constructRegexFilter ({ originUrl, redirectUrl }: IRegexFilter): IFilter { | ||
// the order is very important here, because we want to match the best possible filter. | ||
const filtersToTryInOrder: Array<typeof RegexFilter> = [ | ||
SubdomainRedirectRegexFilter, | ||
NamespaceRedirectRegexFilter, | ||
CommonPatternRedirectRegexFilter | ||
] | ||
|
||
for (const Filter of filtersToTryInOrder) { | ||
const filter = new Filter({ originUrl, redirectUrl }) | ||
if (filter.canHandle) { | ||
return filter.filter | ||
Comment on lines
+144
to
+147
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this makes sure, we init the filter and if the filter can handle, then return the values. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is so much cleaner and easy to follow.. and with the regexFilter's abstracted into a class, I think it'll be much easier for all of us. Huge improvement! |
||
} | ||
commonIdx += 1 | ||
} | ||
|
||
// We can now construct the regex filter and substitution. | ||
regexSubstitution = redirectUrl.slice(0, redirectUrl.length - commonIdx + 1) + '\\1' | ||
// We need to escape the characters that are allowed in the URL, but not in the regex. | ||
const regexFilterFirst = escapeURLRegex(originUrl.slice(0, originUrl.length - commonIdx + 1)) | ||
regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`.replace(/https?/ig, 'https?') | ||
|
||
// This method does not parse: | ||
// originUrl: "https://awesome.ipfs.io/" | ||
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/" | ||
// that ends up with capturing all urls which we do not want. | ||
if (regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) { | ||
const subdomain = new URL(originUrl).hostname | ||
regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}` | ||
regexSubstitution = regexSubstitution.replace('\\1', `/${subdomain}\\1`) | ||
} | ||
|
||
return { regexSubstitution, regexFilter } | ||
// this is just to satisfy the compiler, this should never happen. Because CommonPatternRedirectRegexFilter can always | ||
// handle. | ||
return new CommonPatternRedirectRegexFilter({ originUrl, redirectUrl }).filter | ||
} | ||
|
||
// If the browser supports the declarativeNetRequest API, we can block the request. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import { RegexFilter } from './baseRegexFilter.js' | ||
import { RULE_REGEX_ENDING, escapeURLRegex } from './blockOrObserve.js' | ||
|
||
/** | ||
* Handles redirects like: | ||
* origin: '^https?\\:\\/\\/awesome\\.ipfs\\.io\\/(.*)' | ||
* destination: 'http://localhost:8081/ipns/awesome.ipfs.io/$1' | ||
*/ | ||
export class CommonPatternRedirectRegexFilter extends RegexFilter { | ||
computeFilter (): void { | ||
// this filter is the worst case scenario, we can handle any redirect. | ||
this.canHandle = true | ||
// We can traverse the URL from the end, and find the first character that is different. | ||
let commonIdx = 1 | ||
const leastLength = Math.min(this.originUrl.length, this.redirectUrl.length) | ||
while (commonIdx < leastLength) { | ||
if (this.originUrl[this.originUrl.length - commonIdx] !== this.redirectUrl[this.redirectUrl.length - commonIdx]) { | ||
break | ||
} | ||
commonIdx += 1 | ||
} | ||
|
||
// We can now construct the regex filter and substitution. | ||
this.regexSubstitution = this.redirectUrl.slice(0, this.redirectUrl.length - commonIdx + 1) + '\\1' | ||
// We need to escape the characters that are allowed in the URL, but not in the regex. | ||
const regexFilterFirst = escapeURLRegex(this.originUrl.slice(0, this.originUrl.length - commonIdx + 1)) | ||
this.regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}` | ||
// calling normalize should add the protocol in the regexFilter. | ||
this.normalizeRegexFilter() | ||
|
||
// This method does not parse: | ||
// originUrl: "https://awesome.ipfs.io/" | ||
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/" | ||
// that ends up with capturing all urls which we do not want. | ||
if (this.regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) { | ||
const subdomain = new URL(this.originUrl).hostname | ||
this.regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}` | ||
this.regexSubstitution = this.regexSubstitution.replace('\\1', `/${subdomain}\\1`) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { RegexFilter } from './baseRegexFilter.js' | ||
import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js' | ||
|
||
/** | ||
* Handles namespace redirects like: | ||
* origin: '^https?\\:\\/\\/ipfs\\.io\\/(ipfs|ipns)\\/(.*)' | ||
* destination: 'http://localhost:8080/$1/$2' | ||
*/ | ||
export class NamespaceRedirectRegexFilter extends RegexFilter { | ||
computeFilter (): void { | ||
this.canHandle = DEFAULT_NAMESPACES.has(this.originNS) && | ||
DEFAULT_NAMESPACES.has(this.redirectNS) && | ||
this.originNS === this.redirectNS && | ||
this.originURL.searchParams.get('uri') == null | ||
// if the namespaces are the same, we can generate simpler regex. | ||
// The only value that needs special handling is the `uri` param. | ||
// A redirect like | ||
// https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU | ||
const [originFirst, originLast] = this.originUrl.split(`/${this.originNS}/`) | ||
this.regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}` | ||
this.regexSubstitution = this.redirectUrl | ||
.replace(`/${this.redirectNS}/`, '/\\1/') | ||
.replace(originLast, '\\2') | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we have a method that filters have to implement called
setCanHandle
or should we expect_canHandle
to be set by child classes?I've seen a few different patterns in the child classes and I think we could normalize them so things don't get out of hand in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I already had the
set canHandle()
implemented, just didn't use it right, it's refactored now, I think it'll make more sense now.