Skip to content

Commit

Permalink
feat(cli): add cli for listing, filtering and grouping files in AWS (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
blacha authored Jun 28, 2022
1 parent 0ee4f83 commit b4dec98
Show file tree
Hide file tree
Showing 8 changed files with 249 additions and 68 deletions.
69 changes: 69 additions & 0 deletions packages/cli/src/cli/aws/__tests__/action.aws.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { fsa } from '@chunkd/fs';
import o from 'ospec';
import { asyncFilter, chunkArray } from '../action.aws.list.js';

o.spec('chunkArray', () => {
o('should chunk a array', () => {
o(chunkArray([1, 2, 3, 4], 2)).deepEquals([
[1, 2],
[3, 4],
]);
});

o('should chunk a set', () => {
o(chunkArray(new Set([1, 2, 3, 4, 4]), 2)).deepEquals([
[1, 2],
[3, 4],
]);
});

o('should chunk small set', () => {
o(chunkArray(new Set([1]), 2)).deepEquals([[1]]);
});

o('should chunk large set', () => {
o(chunkArray(new Set([1, 2, 3, 4, 4]), 5)).deepEquals([[1, 2, 3, 4]]);
});

o('should chunk into single sets', () => {
o(chunkArray(new Set([1, 2, 3, 4, 4]), 1)).deepEquals([[1], [2], [3], [4]]);
});
});

o.spec('asyncFilter', () => {
const fileList = [
'a.tiff',
'B.TIFF',
'/foo/bar/baz.tiff',
'/foo/xls.ts',
'c:\\foo\\bar.txt',
's3://foo/bar.tiff',
's3://foo/bar.ts',
's3://foo/bar/baz.tif',
];
async function* generator(): AsyncGenerator<string> {
for (const file of fileList) yield file;
}
o('should filter all', async () => {
o(await fsa.toArray(asyncFilter(generator(), '*'))).deepEquals(fileList);
});

o('should filter exact', async () => {
for (const file of fileList) {
if (file.startsWith('c:\\')) continue; // not a valid regexp
o(await fsa.toArray(asyncFilter(generator(), file))).deepEquals([file]);
}
});

o('should filter suffix', async () => {
o(await fsa.toArray(asyncFilter(generator(), '.tiff$'))).deepEquals(
fileList.filter((f) => f.toLowerCase().endsWith('.tiff')),
);
});

o('should filter tif or tiff', async () => {
o(await fsa.toArray(asyncFilter(generator(), '.tiff?$'))).deepEquals(
fileList.filter((f) => f.toLowerCase().endsWith('.tiff') || f.toLowerCase().endsWith('.tif')),
);
});
});
101 changes: 101 additions & 0 deletions packages/cli/src/cli/aws/action.aws.list.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { Env, fsa, LogConfig, RoleRegister } from '@basemaps/shared';
import { CommandLineAction, CommandLineIntegerParameter, CommandLineStringParameter } from '@rushstack/ts-command-line';

export class CommandList extends CommandLineAction {
private filter: CommandLineStringParameter;
private output: CommandLineStringParameter;
private group: CommandLineIntegerParameter;
private config: CommandLineStringParameter;

public constructor() {
super({
actionName: 'list',
summary: 'List a location and return the files in a JSON array',
documentation: 'Find the correct role to assume, assume it and list a s3 location',
});
}

protected onDefineParameters(): void {
this.filter = this.defineStringParameter({
argumentName: 'FILTER',
parameterLongName: '--filter',
description: 'filter files eg ".*.tiff"',
});
this.group = this.defineIntegerParameter({
argumentName: 'GROUP',
parameterLongName: '--group',
description: 'Group files into this number per group',
});
this.output = this.defineStringParameter({
argumentName: 'OUTPUT',
parameterLongName: '--output',
required: true,
description: 'Output location for the listing',
});
this.config = this.defineStringParameter({
argumentName: 'CONFIG',
parameterLongName: '--config',
description: 'Location of a configuration file containing role->bucket mapping information',
});
this.defineCommandLineRemainder({
description: 'List of locations to iterate',
});
}

async onExecute(): Promise<void> {
const logger = LogConfig.get();
const config = this.config.value;
if (config) {
logger.info({ path: config }, 'Role:Config');
process.env[Env.AwsRoleConfigBucket] = config;
}
const paths = this.remainder?.values ?? [];
if (paths.length === 0) throw new Error('No listing paths given');
const outputPath = this.output.value;
if (outputPath == null) throw new Error('Missing --output path');

const filter = this.filter.value ?? '*'; // Filter everything by default

const outputFiles = new Set<string>();
for (const targetPath of paths) {
logger.debug({ path: targetPath }, 'List');
const assumedRole = await RoleRegister.findRole(targetPath);
if (assumedRole) logger.debug({ path: targetPath, roleArn: assumedRole?.roleArn }, 'List:Role');

const fileList = await fsa.toArray(asyncFilter(fsa.list(targetPath), filter));
logger.debug({ path: targetPath, fileCount: fileList.length }, 'List:Count');

for (const file of fileList) outputFiles.add(file);
}

if (this.group.value == null) {
await fsa.write(outputPath, JSON.stringify([...outputFiles.values()]));
} else {
await fsa.write(outputPath, JSON.stringify(chunkArray(outputFiles, this.group.value)));
}
}
}

export async function* asyncFilter(source: AsyncGenerator<string>, filter: string): AsyncGenerator<string> {
if (filter === '*') return yield* source;

const re = new RegExp(filter.toLowerCase(), 'i');
for await (const f of source) {
// Always match on lowercase
if (re.test(f.toLowerCase())) yield f;
}
}

export function chunkArray<T>(values: Set<T> | T[], size: number): T[][] {
const output: T[][] = [];
let current: T[] = [];
for (const v of values) {
current.push(v);
if (current.length >= size) {
output.push(current);
current = [];
}
}
if (current.length > 0) output.push(current);
return output;
}
3 changes: 3 additions & 0 deletions packages/cli/src/cli/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env node
import { BaseCommandLine } from '@basemaps/shared/build/cli/base.js';
import 'source-map-support/register.js';
import { CommandList } from './aws/action.aws.list.js';
import { CommandCogCreate } from './cogify/action.cog.js';
import { CommandJobCreate } from './cogify/action.job.js';
import { CommandBundle } from './config/action.bundle.js';
Expand All @@ -24,5 +25,7 @@ export class BasemapsConfigCommandLine extends BaseCommandLine {
this.addAction(new CommandScreenShot());

this.addAction(new CommandSprites());

this.addAction(new CommandList());
}
}
2 changes: 1 addition & 1 deletion packages/lambda-cog/src/__tests__/tile.import.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import sinon from 'sinon';
import { LambdaAlbRequest, LambdaHttpRequest } from '@linzjs/lambda';
import { Context } from 'aws-lambda';
import { Import } from '../routes/import.js';
import { RoleConfig } from '../import/imagery.find.js';
import { RoleConfig } from '@basemaps/shared';
import { CogJobFactory } from '@basemaps/cli';
import { ConfigProcessingJob } from '@basemaps/config';

Expand Down
65 changes: 0 additions & 65 deletions packages/lambda-cog/src/import/imagery.find.ts
Original file line number Diff line number Diff line change
@@ -1,69 +1,4 @@
import { Env } from '@basemaps/shared';
import { fsa } from '@chunkd/fs';
import { AwsCredentials } from '@chunkd/source-aws-v2';

export interface RoleConfig {
bucket: string;
accountId: string;
roleArn: string;
externalId?: string;
roleSessionDuration?: number;
}

interface BucketConfig {
v: number;
buckets: RoleConfig[];
version: string;
package: string;
hash: string;
updatedAt: string;
}

/** Attempt to list the target path, if it fails with a 403 Forbidden assume we do not have permission to do read it */
export async function canRead(path: string): Promise<boolean> {
try {
await fsa.list(path).next();
return true;
} catch (e: any) {
// Permission denied
if (e.code === 403) return false;
// Un related error
throw e;
}
}

export class RoleRegister {
/** Get all imagery source aws roles */
static async _loadRoles(): Promise<RoleConfig[]> {
const configBucket = Env.get(Env.AwsRoleConfigBucket);
if (configBucket == null) return [];
const configPath = `s3://${configBucket}/config.json`;
const config: BucketConfig = await fsa.readJson(configPath);
return config.buckets;
}

static _loadRolesPromise: Promise<RoleConfig[]> | undefined;
static loadRoles(): Promise<RoleConfig[]> {
if (RoleRegister._loadRolesPromise == null) RoleRegister._loadRolesPromise = this._loadRoles();
return RoleRegister._loadRolesPromise;
}

static async findRole(path: string): Promise<RoleConfig | undefined> {
const isAbleToRead = await canRead(path);
// If we can directly read/write this path we don't need to register a role for it
if (isAbleToRead) return;

const roles = await this.loadRoles();
const targetRole = roles.find((f) => path.startsWith(`s3://${f.bucket}`));
if (targetRole == null) throw new Error(`Failed to read ${path}`);

fsa.register(
`s3://${targetRole.bucket}`,
AwsCredentials.fsFromRole(targetRole.roleArn, targetRole.externalId, targetRole.roleSessionDuration),
);
return targetRole;
}
}

/** Search for the imagery across all of our buckets */
export async function findImagery(path: string): Promise<{ files: string[]; totalSize: number }> {
Expand Down
4 changes: 2 additions & 2 deletions packages/lambda-cog/src/routes/import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import { CogJobFactory } from '@basemaps/cli';
import { JobCreationContext } from '@basemaps/cli/build/cog/cog.stac.job';
import { ConfigProcessingJob, JobStatus } from '@basemaps/config';
import { Nztm2000QuadTms, TileMatrixSets } from '@basemaps/geo';
import { Config, Env, extractYearRangeFromName, fsa } from '@basemaps/shared';
import { Config, Env, extractYearRangeFromName, fsa, RoleRegister } from '@basemaps/shared';
import { HttpHeader, LambdaHttpRequest, LambdaHttpResponse } from '@linzjs/lambda';
import { createHash } from 'crypto';
import { basename } from 'path';
import * as ulid from 'ulid';
import { findImagery, RoleRegister } from '../import/imagery.find.js';
import { findImagery } from '../import/imagery.find.js';

const MaxImagePixelSize = 128000;

Expand Down
2 changes: 2 additions & 0 deletions packages/shared/src/file/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
export * from './file.config.js';
export { RoleRegister, RoleConfig, BucketConfig } from './role.registry.js';

import { parseUri } from '@chunkd/core';
import { fsa as fsaSource } from '@chunkd/fs';
import { FsAwsS3 } from '@chunkd/source-aws';
Expand Down
71 changes: 71 additions & 0 deletions packages/shared/src/file/role.registry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { Env } from '../const.js';
import { fsa } from '@chunkd/fs';
import { AwsCredentials } from '@chunkd/source-aws-v2';

export interface RoleConfig {
bucket: string;
accountId: string;
roleArn: string;
externalId?: string;
roleSessionDuration?: number;
}

export interface BucketConfig {
v: number;
buckets: RoleConfig[];
version: string;
package: string;
hash: string;
updatedAt: string;
}
function isForbidden(e: unknown): boolean {
if (typeof e !== 'object') return false;
if (e == null) return false;
return (e as { code: unknown }).code === 403;
}

/** Attempt to list the target path, if it fails with a 403 Forbidden assume we do not have permission to do read it */
export async function canRead(path: string): Promise<boolean> {
try {
await fsa.list(path).next();
return true;
} catch (e: unknown) {
// Permission denied
if (isForbidden(e)) return false;
// Un related error
throw e;
}
}

export class RoleRegister {
/** Get all imagery source aws roles */
static async _loadRoles(): Promise<RoleConfig[]> {
const configBucket = Env.get(Env.AwsRoleConfigBucket);
if (configBucket == null) return [];
const configPath = `s3://${configBucket}/config.json`;
const config: BucketConfig = await fsa.readJson(configPath);
return config.buckets;
}

static _loadRolesPromise: Promise<RoleConfig[]> | undefined;
static loadRoles(): Promise<RoleConfig[]> {
if (RoleRegister._loadRolesPromise == null) RoleRegister._loadRolesPromise = this._loadRoles();
return RoleRegister._loadRolesPromise;
}

static async findRole(path: string): Promise<RoleConfig | undefined> {
const isAbleToRead = await canRead(path);
// If we can directly read/write this path we don't need to register a role for it
if (isAbleToRead) return;

const roles = await this.loadRoles();
const targetRole = roles.find((f) => path.startsWith(`s3://${f.bucket}`));
if (targetRole == null) throw new Error(`Failed to read ${path}`);

fsa.register(
`s3://${targetRole.bucket}`,
AwsCredentials.fsFromRole(targetRole.roleArn, targetRole.externalId, targetRole.roleSessionDuration),
);
return targetRole;
}
}

0 comments on commit b4dec98

Please sign in to comment.