Skip to content

Commit

Permalink
feat(kernel): experimental runtime package cache (#3724)
Browse files Browse the repository at this point in the history
Adds an experimental (hence opt-in) feature that caches the contents of
loaded libraries in a directory that persists between executions, in
order to spare the time it takes to extract the tarballs.

When this feature is enabled, packages present in the cache will be used
as-is (i.e: they are not checked for tampering) instead of being
extracted from the tarball. The cache is keyed on:
- The hash of the tarball
- The name of the library
- The version of the library

Objects in the cache will expire if they are not used for 30 days, and
are subsequently removed from disk (this avoids a cache growing
extremely large over time).

In order to enable the feature, the following environment variables are
used:
- `JSII_RUNTIME_PACKAGE_CACHE` must be set to `enabled` in order for the
  package cache to be active at all;
- `JSII_RUNTIME_PACKAGE_CACHE_ROOT` can be used to change which
  directory is used as a cache root. It defaults to:
  * On MacOS: `$HOME/Library/Caches/com.amazonaws.jsii`
  * On Linux: `$HOME/.cache/aws/jsii/package-cache`
  * On Windows: `%LOCALAPPDATA%\AWS\jsii\package-cache`
  * On other platforms: `$TMP/aws-jsii-package-cache`
- `JSII_RUNTIME_PACKAGE_CACHE_TTL` can be used to change the default
  time entries will remain in cache before expiring if they are not
  used. This defaults to 30 days, and the value is expressed in days.
  Set to `0` to immediately expire all the cache's content.

When troubleshooting load performance, it is possible to obtain timing
data for some critical parts of the library load process within the jsii
kernel by setting `JSII_DEBUG_TIMING` environment variable.

Related to #3389



---

By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].

[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
  • Loading branch information
RomainMuller authored Aug 30, 2022
1 parent 4a52d4c commit 4c2dcd5
Show file tree
Hide file tree
Showing 13 changed files with 574 additions and 24 deletions.
2 changes: 2 additions & 0 deletions packages/@jsii/kernel/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@
"dependencies": {
"@jsii/spec": "^0.0.0",
"fs-extra": "^10.1.0",
"lockfile": "^1.0.4",
"tar": "^6.1.11"
},
"devDependencies": {
"@scope/jsii-calc-base": "^0.0.0",
"@scope/jsii-calc-lib": "^0.0.0",
"@types/fs-extra": "^9.0.13",
"@types/lockfile": "^1.0.2",
"@types/tar": "^6.1.2",
"jest-expect-message": "^1.0.2",
"jsii-build-tools": "^0.0.0",
Expand Down
27 changes: 27 additions & 0 deletions packages/@jsii/kernel/src/disk-cache/digest-file.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { createHash } from 'crypto';
import { openSync, readSync, closeSync } from 'fs';

const ALGORITHM = 'sha256';

export function digestFile(
path: string,
...comments: readonly string[]
): Buffer {
const hash = createHash(ALGORITHM);

const buffer = Buffer.alloc(16_384);
const fd = openSync(path, 'r');
try {
let bytesRead = 0;
while ((bytesRead = readSync(fd, buffer)) > 0) {
hash.update(buffer.slice(0, bytesRead));
}
for (const comment of comments) {
hash.update('\0');
hash.update(comment);
}
return hash.digest();
} finally {
closeSync(fd);
}
}
244 changes: 244 additions & 0 deletions packages/@jsii/kernel/src/disk-cache/disk-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
realpathSync,
rmdirSync,
rmSync,
statSync,
utimesSync,
writeFileSync,
} from 'fs';
import { lockSync, unlockSync } from 'lockfile';
import { dirname, join } from 'path';

import { digestFile } from './digest-file';

const MARKER_FILE_NAME = '.jsii-runtime-package-cache';

const ONE_DAY_IN_MS = 86_400_000;
const PRUNE_AFTER_MILLISECONDS = process.env.JSII_RUNTIME_PACKAGE_CACHE_TTL
? parseInt(process.env.JSII_RUNTIME_PACKAGE_CACHE_TTL, 10) * ONE_DAY_IN_MS
: 30 * ONE_DAY_IN_MS;

export class DiskCache {
private static readonly CACHE = new Map<string, DiskCache>();

public static inDirectory(path: string): DiskCache {
const didCreate = mkdirSync(path, { recursive: true }) != null;
if (didCreate && process.platform === 'darwin') {
// Mark the directories for no iCloud sync, no Spotlight indexing, no TimeMachine backup
// @see https://michaelbach.de/2019/03/19/MacOS-nosync-noindex-nobackup.html
writeFileSync(join(path, '.nobackup'), '');
writeFileSync(join(path, '.noindex'), '');
writeFileSync(join(path, '.nosync'), '');
}

path = realpathSync(path);
if (!this.CACHE.has(path)) {
this.CACHE.set(path, new DiskCache(path));
}
return this.CACHE.get(path)!;
}

readonly #root: string;

private constructor(root: string) {
this.#root = root;
process.once('beforeExit', () => this.pruneExpiredEntries());
}

public entry(...key: readonly string[]): Entry {
if (key.length === 0) {
throw new Error(`Cache entry key must contain at least 1 element!`);
}

return new Entry(
join(
this.#root,
...key.flatMap((s) =>
s
.replace(/[^@a-z0-9_.\\/-]+/g, '_')
.split(/[\\/]+/)
.map((ss) => {
if (ss === '..') {
throw new Error(
`A cache entry key cannot contain a '..' path segment! (${s})`,
);
}
return ss;
}),
),
),
);
}

public entryFor(path: string, ...comments: readonly string[]): Entry {
const rawDigest = digestFile(path, ...comments);
return this.entry(...comments, rawDigest.toString('hex'));
}

public pruneExpiredEntries() {
const cutOff = new Date(Date.now() - PRUNE_AFTER_MILLISECONDS);
for (const entry of this.entries()) {
if (entry.atime < cutOff) {
entry.lock((lockedEntry) => {
// Check again in case it's been accessed which we waited for the lock...
if (entry.atime > cutOff) {
return;
}
lockedEntry.delete();
});
}
}

for (const dir of directoriesUnder(this.#root, true)) {
if (process.platform === 'darwin') {
try {
rmSync(join(dir, '.DS_Store'), { force: true });
} catch {
// Ignore errors...
}
}
if (readdirSync(dir).length === 0) {
try {
rmdirSync(dir);
} catch {
// Ignore errors, directory may no longer be empty...
}
}
}
}

private *entries(): Generator<Entry, void, void> {
yield* inDirectory(this.#root);

function* inDirectory(dir: string): Generator<Entry, void, void> {
if (existsSync(join(dir, MARKER_FILE_NAME))) {
return yield new Entry(dir);
}
for (const file of directoriesUnder(dir)) {
yield* inDirectory(file);
}
}
}
}

export class Entry {
public constructor(public readonly path: string) {}

public get atime(): Date {
try {
const stat = statSync(this.markerFile);
return stat.atime;
} catch (err: any) {
if (err.code !== 'ENOENT') {
throw err;
}
return new Date(0);
}
}

public get pathExists() {
return existsSync(this.path);
}

private get lockFile(): string {
return `${this.path}.lock`;
}

private get markerFile(): string {
return join(this.path, MARKER_FILE_NAME);
}

public lock<T>(cb: (entry: LockedEntry) => T): T {
mkdirSync(dirname(this.path), { recursive: true });
lockSync(this.lockFile, { retries: 12, stale: 5_000 });
let disposed = false;
try {
return cb({
delete: () => {
if (disposed) {
throw new Error(
`Cannot delete ${this.path} once the lock block was returned!`,
);
}
rmSync(this.path, { force: true, recursive: true });
},
write: (name, content) => {
if (disposed) {
throw new Error(
`Cannot write ${join(
this.path,
name,
)} once the lock block was returned!`,
);
}

mkdirSync(dirname(join(this.path, name)), { recursive: true });
writeFileSync(join(this.path, name), content);
},
touch: () => {
if (disposed) {
throw new Error(
`Cannot touch ${this.path} once the lock block was returned!`,
);
}
if (this.pathExists) {
if (existsSync(this.markerFile)) {
const now = new Date();
utimesSync(this.markerFile, now, now);
} else {
writeFileSync(this.markerFile, '');
}
}
},
});
} finally {
disposed = true;
unlockSync(this.lockFile);
}
}

public read(file: string): Buffer | undefined {
try {
return readFileSync(join(this.path, file));
} catch (error: any) {
if (error.code === 'ENOENT') {
return undefined;
}
throw error;
}
}
}

export interface LockedEntry {
delete(): void;
write(name: string, data: Buffer): void;

touch(): void;
}

function* directoriesUnder(
root: string,
recursive = false,
ignoreErrors = true,
): Generator<string, void, void> {
for (const file of readdirSync(root)) {
const path = join(root, file);
try {
const stat = statSync(path);
if (stat.isDirectory()) {
if (recursive) {
yield* directoriesUnder(path, recursive, ignoreErrors);
}
yield path;
}
} catch (error) {
if (!ignoreErrors) {
throw error;
}
}
}
}
1 change: 1 addition & 0 deletions packages/@jsii/kernel/src/disk-cache/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './disk-cache';
11 changes: 11 additions & 0 deletions packages/@jsii/kernel/src/kernel.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ import {
WireStruct,
TOKEN_STRUCT,
} from './api';
import { DiskCache } from './disk-cache';
import { Kernel } from './kernel';
import { closeRecording, recordInteraction } from './recording';
import * as tar from './tar-cache';
import { defaultCacheRoot } from './tar-cache/default-cache-root';

/* eslint-disable require-atomic-updates */

Expand Down Expand Up @@ -49,6 +52,11 @@ if (recordingOutput) {
console.error(`JSII_RECORD=${recordingOutput}`);
}

afterAll(() => {
// Jest prevents execution of "beforeExit" events.
DiskCache.inDirectory(defaultCacheRoot()).pruneExpiredEntries();
});

function defineTest(
name: string,
method: (sandbox: Kernel) => Promise<any> | any,
Expand Down Expand Up @@ -2147,6 +2155,9 @@ defineTest('invokeBinScript() return output', (sandbox) => {
const testNames: { [name: string]: boolean } = {};

async function createCalculatorSandbox(name: string) {
// Run half the tests with cache, half without cache... so we test both.
tar.setPackageCacheEnabled(!tar.getPackageCacheEnabled());

if (name in testNames) {
throw new Error(`Duplicate test name: ${name}`);
}
Expand Down
Loading

0 comments on commit 4c2dcd5

Please sign in to comment.