Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
Add scripts to trivialize complex rewrites to CDNs
Browse files Browse the repository at this point in the history
  • Loading branch information
Chan Chak Shing committed Jul 15, 2018
1 parent 2979aae commit 87a9087
Show file tree
Hide file tree
Showing 4 changed files with 826 additions and 0 deletions.
61 changes: 61 additions & 0 deletions utils/trivialize-cdn-rules/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage

# nyc test coverage
.nyc_output

# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# TypeScript v1 declaration files
typings/

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variables file
.env

# next.js build output
.next
159 changes: 159 additions & 0 deletions utils/trivialize-cdn-rules/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
'use strict';

const util = require('util');
const path = require('path');

const fs = require('graceful-fs');
const xml2js = require('xml2js');
const request = require('sync-request');
const chalk = require('chalk');

const readdir = util.promisify(fs.readdir);
const readFile = util.promisify(fs.readFile);
const parseXML = util.promisify(xml2js.parseString);

const rulesDir = 'src/chrome/content/rules/'

const log = (level, filename, message) => {
switch(level) {
case 'WARN':
console.warn(chalk.yellow(`[${level}]: ${chalk.bold(filename)}: ${message}`));
break;
case 'INFO':
console.info(chalk.green(`[${level}]: ${chalk.bold(filename)}: ${message}`));
break;
case 'FAIL':
default:
console.error(chalk.red(`[${level}]: ${chalk.bold(filename)}: ${message}`));
break;
}
}

const supportedCDNsRegexs = [
{ // Cloudfront.net
fromRe: /^\^http(?:s\?)?:\/\/((([\\a-z0-9äö_-]+)\.)*([\\a-z0-9äö-]+))\/$/,
toRe: /^https:\/\/\w+\.cloudfront\.net\/$/,
},
{ // 2o7.net
fromRe: /^\^http(?:s\?)?:\/\/((([\\a-z0-9äö_-]+)\.)*([\\a-z0-9äö-]+))\/$/,
toRe: /^https:\/\/[\w-]+\.122\.2o7\.net\/$/,
},
{ // amazonaws.com
fromRe: /^\^http(?:s\?)?:\/\/((([\\a-z0-9äö_-]+)\.)*([\\a-z0-9äö-]+))\/$/,
toRe: /^https:\/\/s3\.amazonaws\.com\//,
}
]

const escapeRegExp = (str) => {
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
}

const isSecureConnectionOkay = (host) => {
// FIXME: terrible performance...
let response = request('GET', `https://${host}/`, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0'
},
timeout: 3000,
socketTimeout: 3000,
maxRedirects: 5,
});
if (response) {
return true;
} else {
return false;
}
}

const trivializeGenericRewrites = async (fstat, content, rules) => {
return new Promise((resolve, reject) => {
let rewrittenAtLeastOnce = false;
let originalContent = content;

for (const rule of rules) {
for (const supportedCDNsRegex of supportedCDNsRegexs) {
if (supportedCDNsRegex.fromRe.test(rule.from) && supportedCDNsRegex.toRe.test(rule.to)) {
let host = rule.from.replace(supportedCDNsRegex.fromRe, '$1').replace(/\\\./g, '.');

if (host !== null && isSecureConnectionOkay(host)) {
// TODO: replace rule here...
const ruleRe = `\n([\t ]*)<rule\\s*from=\\s*"${escapeRegExp(rule.from)}"(\\s*)to=\\s*"${escapeRegExp(rule.to)}"\\s*?/>[\t ]*\n`;
const ruleRegex = new RegExp(ruleRe, 'g');

if (ruleRegex.test(content)) {
content = content.replace(ruleRegex, `\n$1<rule from="^http://${host.replace(/\./g, '\\.')}/"$2to="https://${host}/" />\n`)
if (originalContent != content) {
rewrittenAtLeastOnce = true;
}
}
break;
}
}
}
}

if (rewrittenAtLeastOnce) {
try {
fs.writeFileSync(fstat.fullname, content, { encoding: 'utf8' });
resolve(rewrittenAtLeastOnce);
} catch (error) {
reject(error);
}
} else {
resolve(rewrittenAtLeastOnce);
}
});
}

const trivializeCDNRewrites = async (fstat) => {
return new Promise((resolve, reject) => {
(async () => { // async wrapper for await keyword...
let content = await readFile(fstat.fullname, { encoding: 'utf8' }).catch(error => reject(error));
let $ = await parseXML(content).catch(error => reject(error));
let rules = $.ruleset.rule.map(rule => rule.$);
let rewrittenAtLeastOnce = false;

await trivializeGenericRewrites(fstat, content, rules)
.then(rewritten => {
if (rewritten) {
rewrittenAtLeastOnce = true;
}
})
.catch(error => {
reject(error);
})

// TODO: Add support for more CDNs
resolve(rewrittenAtLeastOnce);
})();
})
}

(async () => {
await readdir(rulesDir)
.then(filenames => {
return filenames.filter(filename => filename.endsWith('.xml'));
})
.then(filenames => {
return filenames.map(filename => ({
fullname: path.join(rulesDir, filename),
filename
}))
})
.then(async (fstats) => {
return Promise.all(fstats.map(fstat => {
return trivializeCDNRewrites(fstat)
.then(rewritten => {
if (rewritten) {
log('INFO', fstat.filename, 'trivialized')
}
})
.catch(error => {
log('FAIL', fstat.filename, error);
})
}))
})
.catch(error => {
log('FAIL', "::Promise.all::", error);
})
})();
Loading

0 comments on commit 87a9087

Please sign in to comment.