From b739ef68d4d92d7af78b2a91b8581e9f3077df96 Mon Sep 17 00:00:00 2001 From: Rico Hermans Date: Thu, 10 Aug 2023 11:47:33 +0200 Subject: [PATCH] fix(kernel): package cache fails under parallelism (#4215) The package cache mechanism that was turned on by default in #4181 works in theory under parallelism, but not in practice. Typically the CDK CLI will prevent CDK apps from running in parallel, but Python testing frameworks like `tox` use subprocess parallelism to speed up test runs, leading to the jsii imports being executed at the same time. Since jsii is sync, the locking needs to be sync. The sync locking feature of the `lockfile` library doesn't have wait support (for good reason), and so when a lock is already acquired by another process it quickly burns through its 12 retries in a hot loop, and then exits with an error. Two changes to address this: - (Ab)use `Atomics.wait()` to get a synchronous sleeping primitive; since `lockSync` doesn't support synchronous sleep, we build our own retry loop with synchronous sleep around `lockSync`. - Since the extracted directory is immutable: if the marker file in the extracted directory exists, we can treat it as evidence that the directory has been completely written and we can skip trying to vy for exclusive access to write it. This avoids all lock contention after the very first CDK app execution. Fixes #4207. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0 --- .../@jsii/kernel/src/disk-cache/disk-cache.ts | 134 ++++++++++++++++-- packages/@jsii/kernel/src/tar-cache/index.ts | 34 ++--- 2 files changed, 134 insertions(+), 34 deletions(-) diff --git a/packages/@jsii/kernel/src/disk-cache/disk-cache.ts b/packages/@jsii/kernel/src/disk-cache/disk-cache.ts index 821ab3bab8..ee8f7db1c1 100644 --- a/packages/@jsii/kernel/src/disk-cache/disk-cache.ts +++ b/packages/@jsii/kernel/src/disk-cache/disk-cache.ts @@ -10,7 +10,7 @@ import { utimesSync, writeFileSync, } from 'fs'; -import { lockSync, unlockSync } from 'lockfile'; +import { lockSync, Options, unlockSync } from 'lockfile'; import { dirname, join } from 'path'; import { digestFile } from './digest-file'; @@ -152,9 +152,62 @@ export class Entry { return join(this.path, MARKER_FILE_NAME); } + /** + * Whether the directory has been completely written + * + * The presence of the marker file is a signal that we can skip trying to lock the directory. + */ + public get isComplete(): boolean { + return existsSync(this.#markerFile); + } + + /** + * Retrieve an entry from the cache + * + * If the entry doesn't exist yet, use 'cb' to produce the file contents. + */ + public retrieve(cb: (path: string) => void): { + path: string; + cache: 'hit' | 'miss'; + } { + // If the marker file already exists, update its timestamp and immediately return. + // We don't even try to lock. + if (this.isComplete) { + this.#touchMarkerFile(); + return { path: this.path, cache: 'hit' }; + } + + let cache: 'hit' | 'miss' = 'miss'; + this.lock((lock) => { + // While we all fought to acquire the lock, someone else might have completed already. + if (this.isComplete) { + cache = 'hit'; + return; + } + + // !!!IMPORTANT!!! + // Extract directly into the final target directory, as certain antivirus + // software configurations on Windows will make a `renameSync` operation + // fail with EPERM until the files have been fully analyzed. + mkdirSync(this.path, { recursive: true }); + try { + cb(this.path); + } catch (error) { + rmSync(this.path, { force: true, recursive: true }); + throw error; + } + lock.markComplete(); + }); + return { path: this.path, cache }; + } + public lock(cb: (entry: LockedEntry) => T): T { mkdirSync(dirname(this.path), { recursive: true }); - lockSync(this.#lockFile, { retries: 12, stale: 5_000 }); + lockSyncWithWait(this.#lockFile, { + retries: 12, + // Extracting the largest tarball takes ~5s + stale: 10_000, + }); let disposed = false; try { return cb({ @@ -179,20 +232,13 @@ export class Entry { mkdirSync(dirname(join(this.path, name)), { recursive: true }); writeFileSync(join(this.path, name), content); }, - touch: () => { + markComplete: () => { if (disposed) { throw new Error( `Cannot touch ${this.path} once the lock block was returned!`, ); } - if (this.pathExists) { - if (existsSync(this.#markerFile)) { - const now = new Date(); - utimesSync(this.#markerFile, now, now); - } else { - writeFileSync(this.#markerFile, ''); - } - } + this.#touchMarkerFile(); }, }); } finally { @@ -201,6 +247,23 @@ export class Entry { } } + /** + * Update the timestamp on the marker file + */ + #touchMarkerFile() { + if (this.pathExists) { + try { + const now = new Date(); + utimesSync(this.#markerFile, now, now); + } catch (e: any) { + if (e.code !== 'ENOENT') { + throw e; + } + writeFileSync(this.#markerFile, ''); + } + } + } + public read(file: string): Buffer | undefined { try { return readFileSync(join(this.path, file)); @@ -217,7 +280,12 @@ export interface LockedEntry { delete(): void; write(name: string, data: Buffer): void; - touch(): void; + /** + * Mark the entry has having been completed + * + * The modification time of this file is used for cleanup. + */ + markComplete(): void; } function* directoriesUnder( @@ -242,3 +310,45 @@ function* directoriesUnder( } } } + +/** + * We must use 'lockSync', but that doesn't support waiting (because waiting is only supported for async APIs) + * so we have to build our own looping locker with waits + */ +function lockSyncWithWait(path: string, options: Options) { + let retries = options.retries ?? 0; + let sleep = 100; + + // eslint-disable-next-line no-constant-condition + while (true) { + try { + lockSync(path, { + retries: 0, + stale: options.stale, + }); + return; + } catch (e: any) { + if (retries === 0) { + throw e; + } + retries--; + + if (e.code === 'EEXIST') { + // Most common case, needs longest sleep. Randomize the herd. + sleepSync(Math.floor(Math.random() * sleep)); + sleep *= 1.5; + } else { + sleepSync(5); + } + } + } +} + +/** + * Abuse Atomics.wait() to come up with a sync sleep + * + * We must use a sync sleep because all of jsii is sync. + */ +function sleepSync(ms: number) { + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms); +} diff --git a/packages/@jsii/kernel/src/tar-cache/index.ts b/packages/@jsii/kernel/src/tar-cache/index.ts index b94939f835..1bc67cdfa9 100644 --- a/packages/@jsii/kernel/src/tar-cache/index.ts +++ b/packages/@jsii/kernel/src/tar-cache/index.ts @@ -55,6 +55,9 @@ export function extract( } } +/** + * Extract the tarball into a cached directory, symlink that directory into the target location + */ function extractViaCache( file: string, outDir: string, @@ -66,28 +69,12 @@ function extractViaCache( const dirCache = DiskCache.inDirectory(cacheRoot); const entry = dirCache.entryFor(file, ...comments); - const { path, cache } = entry.lock((lock) => { - let cache: 'hit' | 'miss' = 'hit'; - if (!entry.pathExists) { - // !!!IMPORTANT!!! - // Extract directly into the final target directory, as certain antivirus - // software configurations on Windows will make a `renameSync` operation - // fail with EPERM until the files have been fully analyzed. - mkdirSync(entry.path, { recursive: true }); - try { - untarInto({ - ...options, - cwd: entry.path, - file, - }); - } catch (error) { - rmSync(entry.path, { force: true, recursive: true }); - throw error; - } - cache = 'miss'; - } - lock.touch(); - return { path: entry.path, cache }; + const { path, cache } = entry.retrieve((path) => { + untarInto({ + ...options, + cwd: path, + file, + }); }); link(path, outDir); @@ -95,6 +82,9 @@ function extractViaCache( return { cache }; } +/** + * Extract directory into the target location + */ function extractToOutDir( file: string, cwd: string,