-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(cli): add cli for listing, filtering and grouping files in AWS (#…
- Loading branch information
Showing
8 changed files
with
249 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import { fsa } from '@chunkd/fs'; | ||
import o from 'ospec'; | ||
import { asyncFilter, chunkArray } from '../action.aws.list.js'; | ||
|
||
o.spec('chunkArray', () => { | ||
o('should chunk a array', () => { | ||
o(chunkArray([1, 2, 3, 4], 2)).deepEquals([ | ||
[1, 2], | ||
[3, 4], | ||
]); | ||
}); | ||
|
||
o('should chunk a set', () => { | ||
o(chunkArray(new Set([1, 2, 3, 4, 4]), 2)).deepEquals([ | ||
[1, 2], | ||
[3, 4], | ||
]); | ||
}); | ||
|
||
o('should chunk small set', () => { | ||
o(chunkArray(new Set([1]), 2)).deepEquals([[1]]); | ||
}); | ||
|
||
o('should chunk large set', () => { | ||
o(chunkArray(new Set([1, 2, 3, 4, 4]), 5)).deepEquals([[1, 2, 3, 4]]); | ||
}); | ||
|
||
o('should chunk into single sets', () => { | ||
o(chunkArray(new Set([1, 2, 3, 4, 4]), 1)).deepEquals([[1], [2], [3], [4]]); | ||
}); | ||
}); | ||
|
||
o.spec('asyncFilter', () => { | ||
const fileList = [ | ||
'a.tiff', | ||
'B.TIFF', | ||
'/foo/bar/baz.tiff', | ||
'/foo/xls.ts', | ||
'c:\\foo\\bar.txt', | ||
's3://foo/bar.tiff', | ||
's3://foo/bar.ts', | ||
's3://foo/bar/baz.tif', | ||
]; | ||
async function* generator(): AsyncGenerator<string> { | ||
for (const file of fileList) yield file; | ||
} | ||
o('should filter all', async () => { | ||
o(await fsa.toArray(asyncFilter(generator(), '*'))).deepEquals(fileList); | ||
}); | ||
|
||
o('should filter exact', async () => { | ||
for (const file of fileList) { | ||
if (file.startsWith('c:\\')) continue; // not a valid regexp | ||
o(await fsa.toArray(asyncFilter(generator(), file))).deepEquals([file]); | ||
} | ||
}); | ||
|
||
o('should filter suffix', async () => { | ||
o(await fsa.toArray(asyncFilter(generator(), '.tiff$'))).deepEquals( | ||
fileList.filter((f) => f.toLowerCase().endsWith('.tiff')), | ||
); | ||
}); | ||
|
||
o('should filter tif or tiff', async () => { | ||
o(await fsa.toArray(asyncFilter(generator(), '.tiff?$'))).deepEquals( | ||
fileList.filter((f) => f.toLowerCase().endsWith('.tiff') || f.toLowerCase().endsWith('.tif')), | ||
); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import { Env, fsa, LogConfig, RoleRegister } from '@basemaps/shared'; | ||
import { CommandLineAction, CommandLineIntegerParameter, CommandLineStringParameter } from '@rushstack/ts-command-line'; | ||
|
||
export class CommandList extends CommandLineAction { | ||
private filter: CommandLineStringParameter; | ||
private output: CommandLineStringParameter; | ||
private group: CommandLineIntegerParameter; | ||
private config: CommandLineStringParameter; | ||
|
||
public constructor() { | ||
super({ | ||
actionName: 'list', | ||
summary: 'List a location and return the files in a JSON array', | ||
documentation: 'Find the correct role to assume, assume it and list a s3 location', | ||
}); | ||
} | ||
|
||
protected onDefineParameters(): void { | ||
this.filter = this.defineStringParameter({ | ||
argumentName: 'FILTER', | ||
parameterLongName: '--filter', | ||
description: 'filter files eg ".*.tiff"', | ||
}); | ||
this.group = this.defineIntegerParameter({ | ||
argumentName: 'GROUP', | ||
parameterLongName: '--group', | ||
description: 'Group files into this number per group', | ||
}); | ||
this.output = this.defineStringParameter({ | ||
argumentName: 'OUTPUT', | ||
parameterLongName: '--output', | ||
required: true, | ||
description: 'Output location for the listing', | ||
}); | ||
this.config = this.defineStringParameter({ | ||
argumentName: 'CONFIG', | ||
parameterLongName: '--config', | ||
description: 'Location of a configuration file containing role->bucket mapping information', | ||
}); | ||
this.defineCommandLineRemainder({ | ||
description: 'List of locations to iterate', | ||
}); | ||
} | ||
|
||
async onExecute(): Promise<void> { | ||
const logger = LogConfig.get(); | ||
const config = this.config.value; | ||
if (config) { | ||
logger.info({ path: config }, 'Role:Config'); | ||
process.env[Env.AwsRoleConfigBucket] = config; | ||
} | ||
const paths = this.remainder?.values ?? []; | ||
if (paths.length === 0) throw new Error('No listing paths given'); | ||
const outputPath = this.output.value; | ||
if (outputPath == null) throw new Error('Missing --output path'); | ||
|
||
const filter = this.filter.value ?? '*'; // Filter everything by default | ||
|
||
const outputFiles = new Set<string>(); | ||
for (const targetPath of paths) { | ||
logger.debug({ path: targetPath }, 'List'); | ||
const assumedRole = await RoleRegister.findRole(targetPath); | ||
if (assumedRole) logger.debug({ path: targetPath, roleArn: assumedRole?.roleArn }, 'List:Role'); | ||
|
||
const fileList = await fsa.toArray(asyncFilter(fsa.list(targetPath), filter)); | ||
logger.debug({ path: targetPath, fileCount: fileList.length }, 'List:Count'); | ||
|
||
for (const file of fileList) outputFiles.add(file); | ||
} | ||
|
||
if (this.group.value == null) { | ||
await fsa.write(outputPath, JSON.stringify([...outputFiles.values()])); | ||
} else { | ||
await fsa.write(outputPath, JSON.stringify(chunkArray(outputFiles, this.group.value))); | ||
} | ||
} | ||
} | ||
|
||
export async function* asyncFilter(source: AsyncGenerator<string>, filter: string): AsyncGenerator<string> { | ||
if (filter === '*') return yield* source; | ||
|
||
const re = new RegExp(filter.toLowerCase(), 'i'); | ||
for await (const f of source) { | ||
// Always match on lowercase | ||
if (re.test(f.toLowerCase())) yield f; | ||
} | ||
} | ||
|
||
export function chunkArray<T>(values: Set<T> | T[], size: number): T[][] { | ||
const output: T[][] = []; | ||
let current: T[] = []; | ||
for (const v of values) { | ||
current.push(v); | ||
if (current.length >= size) { | ||
output.push(current); | ||
current = []; | ||
} | ||
} | ||
if (current.length > 0) output.push(current); | ||
return output; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import { Env } from '../const.js'; | ||
import { fsa } from '@chunkd/fs'; | ||
import { AwsCredentials } from '@chunkd/source-aws-v2'; | ||
|
||
export interface RoleConfig { | ||
bucket: string; | ||
accountId: string; | ||
roleArn: string; | ||
externalId?: string; | ||
roleSessionDuration?: number; | ||
} | ||
|
||
export interface BucketConfig { | ||
v: number; | ||
buckets: RoleConfig[]; | ||
version: string; | ||
package: string; | ||
hash: string; | ||
updatedAt: string; | ||
} | ||
function isForbidden(e: unknown): boolean { | ||
if (typeof e !== 'object') return false; | ||
if (e == null) return false; | ||
return (e as { code: unknown }).code === 403; | ||
} | ||
|
||
/** Attempt to list the target path, if it fails with a 403 Forbidden assume we do not have permission to do read it */ | ||
export async function canRead(path: string): Promise<boolean> { | ||
try { | ||
await fsa.list(path).next(); | ||
return true; | ||
} catch (e: unknown) { | ||
// Permission denied | ||
if (isForbidden(e)) return false; | ||
// Un related error | ||
throw e; | ||
} | ||
} | ||
|
||
export class RoleRegister { | ||
/** Get all imagery source aws roles */ | ||
static async _loadRoles(): Promise<RoleConfig[]> { | ||
const configBucket = Env.get(Env.AwsRoleConfigBucket); | ||
if (configBucket == null) return []; | ||
const configPath = `s3://${configBucket}/config.json`; | ||
const config: BucketConfig = await fsa.readJson(configPath); | ||
return config.buckets; | ||
} | ||
|
||
static _loadRolesPromise: Promise<RoleConfig[]> | undefined; | ||
static loadRoles(): Promise<RoleConfig[]> { | ||
if (RoleRegister._loadRolesPromise == null) RoleRegister._loadRolesPromise = this._loadRoles(); | ||
return RoleRegister._loadRolesPromise; | ||
} | ||
|
||
static async findRole(path: string): Promise<RoleConfig | undefined> { | ||
const isAbleToRead = await canRead(path); | ||
// If we can directly read/write this path we don't need to register a role for it | ||
if (isAbleToRead) return; | ||
|
||
const roles = await this.loadRoles(); | ||
const targetRole = roles.find((f) => path.startsWith(`s3://${f.bucket}`)); | ||
if (targetRole == null) throw new Error(`Failed to read ${path}`); | ||
|
||
fsa.register( | ||
`s3://${targetRole.bucket}`, | ||
AwsCredentials.fsFromRole(targetRole.roleArn, targetRole.externalId, targetRole.roleSessionDuration), | ||
); | ||
return targetRole; | ||
} | ||
} |