Skip to content

Commit

Permalink
[kbn/babel-register] improve cache performance (#150261)
Browse files Browse the repository at this point in the history
After #146212 it feels like the
babel-register cache is getting invalidated more frequently for some
reason. The current version of the cache only stores a single cache
entry for each file path, which shouldn't be too big of a problem but
with these changes several versions of a file will be cached. The
performance seems about equal, but because the cache contains multiple
versions of a single file we should spend less time transpiling files
when switching branches often.

---------

Co-authored-by: kibanamachine <[email protected]>
  • Loading branch information
Spencer and kibanamachine authored Feb 7, 2023
1 parent 55b66e2 commit 1ee97e1
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 210 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,7 @@
"cypress-react-selector": "^3.0.0",
"cypress-real-events": "^1.7.6",
"cypress-recurse": "^1.26.0",
"date-fns": "^2.29.3",
"debug": "^2.6.9",
"delete-empty": "^2.0.0",
"dependency-check": "^4.1.0",
Expand Down
1 change: 1 addition & 0 deletions packages/kbn-babel-register/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ BUNDLER_DEPS = [
"@npm//chalk",
"@npm//pirates",
"@npm//lmdb",
"@npm//date-fns",
"@npm//source-map-support",
"//packages/kbn-repo-packages",
"//packages/kbn-repo-info",
Expand Down
5 changes: 1 addition & 4 deletions packages/kbn-babel-register/cache/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ const Fs = require('fs');
const Path = require('path');
const Crypto = require('crypto');

const { readHashOfPackageMap } = require('@kbn/repo-packages');
const babel = require('@babel/core');
const peggy = require('@kbn/peggy');
const { REPO_ROOT, UPSTREAM_BRANCH } = require('@kbn/repo-info');
Expand All @@ -25,7 +24,6 @@ const { getBabelOptions } = require('@kbn/babel-transform');
*/
function determineCachePrefix() {
const json = JSON.stringify({
synthPkgMapHash: readHashOfPackageMap(),
babelVersion: babel.version,
peggyVersion: peggy.version,
// get a config for a fake js, ts, and tsx file to make sure we
Expand Down Expand Up @@ -63,8 +61,7 @@ function getCache() {
if (lmdbAvailable()) {
log?.write('lmdb is available, using lmdb cache\n');
return new (require('./lmdb_cache').LmdbCache)({
pathRoot: REPO_ROOT,
dir: Path.resolve(REPO_ROOT, 'data/babel_register_cache_v1', UPSTREAM_BRANCH),
dir: Path.resolve(REPO_ROOT, 'data/babel_register_cache', UPSTREAM_BRANCH),
prefix: determineCachePrefix(),
log,
});
Expand Down
212 changes: 72 additions & 140 deletions packages/kbn-babel-register/cache/lmdb_cache.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@
*/

const Path = require('path');
const Crypto = require('crypto');
const startOfDay = /** @type {import('date-fns/startOfDay').default} */ (
/** @type {unknown} */ (require('date-fns/startOfDay'))
);

const chalk = require('chalk');
const LmdbStore = require('lmdb');

const GLOBAL_ATIME = `${Date.now()}`;
const GLOBAL_ATIME = startOfDay(new Date()).valueOf();
const MINUTE = 1000 * 60;
const HOUR = MINUTE * 60;
const DAY = HOUR * 24;

/** @typedef {import('./types').Cache} CacheInterface */
/** @typedef {import('lmdb').Database<string, string>} Db */
/** @typedef {import('lmdb').Database<import('./types').CacheEntry, string>} Db */

/**
* @param {Db} db
Expand All @@ -31,147 +35,90 @@ const dbName = (db) =>
* @implements {CacheInterface}
*/
class LmdbCache {
/** @type {import('lmdb').RootDatabase<string, string>} */
#codes;
/** @type {Db} */
#atimes;
/** @type {Db} */
#mtimes;
/** @type {Db} */
#sourceMaps;
/** @type {string} */
#pathRoot;
/** @type {string} */
#prefix;
/** @type {import('lmdb').RootDatabase<import('./types').CacheEntry, string>} */
#db;
/** @type {import('stream').Writable | undefined} */
#log;
/** @type {ReturnType<typeof setTimeout>} */
#timer;
/** @type {string} */
#prefix;

/**
* @param {import('./types').CacheConfig} config
*/
constructor(config) {
if (!Path.isAbsolute(config.pathRoot)) {
throw new Error('cache requires an absolute path to resolve paths relative to');
}

this.#pathRoot = config.pathRoot;
this.#prefix = config.prefix;
this.#log = config.log;

this.#codes = LmdbStore.open(config.dir, {
name: 'codes',
encoding: 'string',
maxReaders: 500,
});

// TODO: redundant 'name' syntax is necessary because of a bug that I have yet to fix
this.#atimes = this.#codes.openDB('atimes', {
name: 'atimes',
encoding: 'string',
});

this.#mtimes = this.#codes.openDB('mtimes', {
name: 'mtimes',
encoding: 'string',
});

this.#sourceMaps = this.#codes.openDB('sourceMaps', {
name: 'sourceMaps',
encoding: 'string',
this.#prefix = config.prefix;
this.#db = LmdbStore.open(Path.resolve(config.dir, 'v5'), {
name: 'db',
encoding: 'json',
});

// after the process has been running for 30 minutes prune the
// keys which haven't been used in 30 days. We use `unref()` to
// make sure this timer doesn't hold other processes open
// unexpectedly
this.#timer = setTimeout(() => {
this.#pruneOldKeys().catch((error) => {
const lastClean = this.#db.get('@last clean');
if (!lastClean || lastClean[0] < GLOBAL_ATIME - 7 * DAY) {
try {
this.#pruneOldKeys();
} catch (error) {
process.stderr.write(`
Failed to cleanup @kbn/babel-register cache:
${error.stack.split('\n').join('\n ')}
To eliminate this problem you may want to delete the "${Path.relative(process.cwd(), config.dir)}"
directory and report this error to the Operations team.\n`);
});
}, 30 * MINUTE);

// timer.unref is not defined in jest which emulates the dom by default
if (typeof this.#timer.unref === 'function') {
this.#timer.unref();
} finally {
this.#db.putSync('@last clean', [GLOBAL_ATIME, '', {}]);
}
}
}

/**
* Get the cache key of the path and source from disk of a file
* @param {string} path
* @param {string} source
* @returns {string}
*/
getMtime(path) {
return this.#safeGet(this.#mtimes, this.#getKey(path));
getKey(path, source) {
return `${this.#prefix}:${Crypto.createHash('sha1').update(path).update(source).digest('hex')}`;
}

/**
* @param {string} path
* @param {string} key
* @returns {string|undefined}
*/
getCode(path) {
const key = this.#getKey(path);
const code = this.#safeGet(this.#codes, key);
getCode(key) {
const entry = this.#safeGet(this.#db, key);

if (code !== undefined) {
if (entry !== undefined && entry[0] !== GLOBAL_ATIME) {
// when we use a file from the cache set the "atime" of that cache entry
// so that we know which cache items we use and which haven't been
// touched in a long time (currently 30 days)
this.#safePut(this.#atimes, key, GLOBAL_ATIME);
// used in a long time (currently 30 days)
this.#safePut(this.#db, key, [GLOBAL_ATIME, entry[1], entry[2]]);
}

return code;
return entry?.[1];
}

/**
* @param {string} path
*/
getSourceMap(path) {
const map = this.#safeGet(this.#sourceMaps, this.#getKey(path));
if (typeof map === 'string') {
return JSON.parse(map);
}
}

close() {
clearTimeout(this.#timer);
}

/**
* @param {string} path
* @param {{ mtime: string; code: string; map?: any }} file
* @param {string} key
* @returns {object|undefined}
*/
async update(path, file) {
const key = this.#getKey(path);

this.#safePut(this.#atimes, key, GLOBAL_ATIME);
this.#safePut(this.#mtimes, key, file.mtime);
this.#safePut(this.#codes, key, file.code);

if (file.map) {
this.#safePut(this.#sourceMaps, key, JSON.stringify(file.map));
getSourceMap(key) {
const entry = this.#safeGet(this.#db, key);
if (entry) {
return entry[2];
}
}

/**
* @param {string} path
* @param {string} key
* @param {{ code: string, map: object }} entry
*/
#getKey(path) {
const normalizedPath =
Path.sep !== '/'
? Path.relative(this.#pathRoot, path).split(Path.sep).join('/')
: Path.relative(this.#pathRoot, path);

return `${this.#prefix}:${normalizedPath}`;
async update(key, entry) {
this.#safePut(this.#db, key, [GLOBAL_ATIME, entry.code, entry.map]);
}

/**
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
*/
#safeGet(db, key) {
Expand All @@ -190,9 +137,9 @@ directory and report this error to the Operations team.\n`);
}

/**
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
* @param {string} value
* @param {import('./types').CacheEntry} value
*/
#safePut(db, key, value) {
try {
Expand All @@ -205,7 +152,7 @@ directory and report this error to the Operations team.\n`);

/**
* @param {string} type
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
*/
#debug(type, db, key) {
Expand All @@ -214,7 +161,7 @@ directory and report this error to the Operations team.\n`);

/**
* @param {'GET' | 'PUT'} type
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
* @param {Error} error
*/
Expand All @@ -227,51 +174,36 @@ directory and report this error to the Operations team.\n`);
);
}

async #pruneOldKeys() {
try {
const ATIME_LIMIT = Date.now() - 30 * DAY;
const BATCH_SIZE = 1000;
#pruneOldKeys() {
const ATIME_LIMIT = Date.now() - 30 * DAY;

/** @type {string[]} */
const validKeys = [];
/** @type {string[]} */
const invalidKeys = [];
/** @type {string[]} */
const toDelete = [];
const flushDeletes = () => {
if (!toDelete.length) {
return;
}

for (const { key, value } of this.#atimes.getRange()) {
const atime = parseInt(`${value}`, 10);
if (Number.isNaN(atime) || atime < ATIME_LIMIT) {
invalidKeys.push(key);
} else {
validKeys.push(key);
this.#db.transactionSync(() => {
for (const key of toDelete) {
this.#db.removeSync(key);
}
});
};

if (validKeys.length + invalidKeys.length >= BATCH_SIZE) {
const promises = new Set();

if (invalidKeys.length) {
for (const k of invalidKeys) {
// all these promises are the same currently, so Set() will
// optimise this to a single promise, but I wouldn't be shocked
// if a future version starts returning independent promises so
// this is just for some future-proofing
promises.add(this.#atimes.remove(k));
promises.add(this.#mtimes.remove(k));
promises.add(this.#codes.remove(k));
promises.add(this.#sourceMaps.remove(k));
}
} else {
// delay a smidge to allow other things to happen before the next batch of checks
promises.add(new Promise((resolve) => setTimeout(resolve, 1)));
}
for (const { key, value } of this.#db.getRange()) {
if (Number.isNaN(value[0]) || value[0] < ATIME_LIMIT) {
toDelete.push(key);

invalidKeys.length = 0;
validKeys.length = 0;
await Promise.all(Array.from(promises));
// flush deletes early if there are many deleted
if (toDelete.length > 10_000) {
flushDeletes();
}
}
} catch {
// ignore errors, the cache is totally disposable and will rebuild if there is some sort of corruption
}

// delete all the old keys
flushDeletes();
}
}

Expand Down
Loading

0 comments on commit 1ee97e1

Please sign in to comment.