diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 86415d5de15dc..cbbea0b1dbf84 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -211,7 +211,7 @@ A Glue table describes a table of data in S3: its structure (column names and ty ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { database: myDatabase, columns: [{ name: 'col1', @@ -230,7 +230,7 @@ By default, a S3 bucket will be created to store the table's data but you can ma ```ts declare const myBucket: s3.Bucket; declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { bucket: myBucket, s3Prefix: 'my-table/', // ... @@ -247,7 +247,7 @@ Glue tables can be configured to contain user-defined properties, to describe th ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { storageParameters: [ glue.StorageParameter.skipHeaderLineCount(1), glue.StorageParameter.compressionType(glue.CompressionType.GZIP), @@ -269,7 +269,7 @@ To improve query performance, a table can specify `partitionKeys` on which data ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { database: myDatabase, columns: [{ name: 'col1', @@ -300,7 +300,7 @@ property: ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { database: myDatabase, columns: [{ name: 'col1', @@ -337,7 +337,7 @@ If you have a table with a large number of partitions that grows over time, cons ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { database: myDatabase, columns: [{ name: 'col1', @@ -355,6 +355,28 @@ new glue.Table(this, 'MyTable', { }); ``` +### Glue Connections + +Glue connections allow external data connections to third party databases and data warehouses. However, these connections can also be assigned to Glue Tables, allowing you to query external data sources using the Glue Data Catalog. + +Whereas `S3Table` will point to (and if needed, create) a bucket to store the tables' data, `ExternalTable` will point to an existing table in a data source. For example, to create a table in Glue that points to a table in Redshift: + +```ts +declare const myConnection: glue.Connection; +declare const myDatabase: glue.Database; +new glue.ExternalTable(this, 'MyTable', { + connection: myConnection, + externalDataLocation: 'default_db_public_example', // A table in Redshift + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + ## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) You can enable encryption on a Table's data: @@ -363,7 +385,7 @@ You can enable encryption on a Table's data: ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { encryption: glue.TableEncryption.S3_MANAGED, // ... database: myDatabase, @@ -380,7 +402,7 @@ new glue.Table(this, 'MyTable', { ```ts declare const myDatabase: glue.Database; // KMS key is created automatically -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { encryption: glue.TableEncryption.KMS, // ... database: myDatabase, @@ -392,7 +414,7 @@ new glue.Table(this, 'MyTable', { }); // with an explicit KMS key -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { encryption: glue.TableEncryption.KMS, encryptionKey: new kms.Key(this, 'MyKey'), // ... @@ -409,7 +431,7 @@ new glue.Table(this, 'MyTable', { ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { encryption: glue.TableEncryption.KMS_MANAGED, // ... database: myDatabase, @@ -426,7 +448,7 @@ new glue.Table(this, 'MyTable', { ```ts declare const myDatabase: glue.Database; // KMS key is created automatically -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { encryption: glue.TableEncryption.CLIENT_SIDE_KMS, // ... database: myDatabase, @@ -438,7 +460,7 @@ new glue.Table(this, 'MyTable', { }); // with an explicit KMS key -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { encryption: glue.TableEncryption.CLIENT_SIDE_KMS, encryptionKey: new kms.Key(this, 'MyKey'), // ... @@ -451,7 +473,7 @@ new glue.Table(this, 'MyTable', { }); ``` -*Note: you cannot provide a `Bucket` when creating the `Table` if you wish to use server-side encryption (`KMS`, `KMS_MANAGED` or `S3_MANAGED`)*. +*Note: you cannot provide a `Bucket` when creating the `S3Table` if you wish to use server-side encryption (`KMS`, `KMS_MANAGED` or `S3_MANAGED`)*. ## Types @@ -459,7 +481,7 @@ A table's schema is a collection of columns, each of which have a `name` and a ` ```ts declare const myDatabase: glue.Database; -new glue.Table(this, 'MyTable', { +new glue.S3Table(this, 'MyTable', { columns: [{ name: 'primitive_column', type: glue.Schema.STRING, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts new file mode 100644 index 0000000000000..cca62739a9bcc --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts @@ -0,0 +1,171 @@ +import { CfnTable } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Construct } from 'constructs'; +import { IConnection } from './connection'; +import { Column } from './schema'; +import { PartitionIndex, TableBase, TableBaseProps } from './table-base'; + +export interface ExternalTableProps extends TableBaseProps { + /** + * The connection the table will use when performing reads and writes. + * + * @default - No connection + */ + readonly connection: IConnection; + + /** + * The data source location of the glue table, (e.g. `default_db_public_example` for Redshift). + * + * If this property is set, it will override both `bucket` and `s3Prefix`. + * + * @default - No outsourced data source location + */ + readonly externalDataLocation: string; +} + +/** + * A Glue table that targets an external data location (e.g. A table in a Redshift Cluster). + */ +export class ExternalTable extends TableBase { + /** + * Name of this table. + */ + public readonly tableName: string; + + /** + * ARN of this table. + */ + public readonly tableArn: string; + + /** + * The connection associated to this table + */ + public readonly connection: IConnection; + + /** + * This table's partition indexes. + */ + public readonly partitionIndexes?: PartitionIndex[]; + + protected readonly tableResource: CfnTable; + + constructor(scope: Construct, id: string, props: ExternalTableProps) { + super(scope, id, props); + this.connection = props.connection; + this.tableResource = new CfnTable(this, 'Table', { + catalogId: props.database.catalogId, + + databaseName: props.database.databaseName, + + tableInput: { + name: this.physicalName, + description: props.description || `${this.physicalName} generated by CDK`, + + partitionKeys: renderColumns(props.partitionKeys), + + parameters: { + 'classification': props.dataFormat.classificationString?.value, + 'has_encrypted_data': true, + 'partition_filtering.enabled': props.enablePartitionFiltering, + 'connectionName': props.connection.connectionName, + }, + storageDescriptor: { + location: props.externalDataLocation, + compressed: this.compressed, + storedAsSubDirectories: props.storedAsSubDirectories ?? false, + columns: renderColumns(props.columns), + inputFormat: props.dataFormat.inputFormat.className, + outputFormat: props.dataFormat.outputFormat.className, + serdeInfo: { + serializationLibrary: props.dataFormat.serializationLibrary.className, + }, + parameters: props.storageParameters ? props.storageParameters.reduce((acc, param) => { + if (param.key in acc) { + throw new Error(`Duplicate storage parameter key: ${param.key}`); + } + const key = param.key; + acc[key] = param.value; + return acc; + }, {} as { [key: string]: string }) : undefined, + }, + + tableType: 'EXTERNAL_TABLE', + }, + }); + + this.tableName = this.getResourceNameAttribute(this.tableResource.ref); + this.tableArn = this.stack.formatArn({ + service: 'glue', + resource: 'table', + resourceName: `${this.database.databaseName}/${this.tableName}`, + }); + this.node.defaultChild = this.tableResource; + + // Partition index creation relies on created table. + if (props.partitionIndexes) { + this.partitionIndexes = props.partitionIndexes; + this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); + } + } + + /** + * Grant read permissions to the table + * + * @param grantee the principal + */ + public grantRead(grantee: iam.IGrantable): iam.Grant { + const ret = this.grant(grantee, readPermissions); + return ret; + } + + /** + * Grant write permissions to the table + * + * @param grantee the principal + */ + public grantWrite(grantee: iam.IGrantable): iam.Grant { + const ret = this.grant(grantee, writePermissions); + return ret; + } + + /** + * Grant read and write permissions to the table + * + * @param grantee the principal + */ + public grantReadWrite(grantee: iam.IGrantable): iam.Grant { + const ret = this.grant(grantee, [...readPermissions, ...writePermissions]); + return ret; + } +} + +const readPermissions = [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', +]; + +const writePermissions = [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', +]; + +function renderColumns(columns?: Array) { + if (columns === undefined) { + return undefined; + } + return columns.map(column => { + return { + name: column.name, + type: column.type.inputString, + comment: column.comment, + }; + }); +} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index c6a611242c925..1b9514c14625e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -5,9 +5,12 @@ export * from './connection'; export * from './data-format'; export * from './data-quality-ruleset'; export * from './database'; +export * from './external-table'; export * from './job'; export * from './job-executable'; +export * from './s3-table'; export * from './schema'; export * from './security-configuration'; export * from './storage-parameter'; -export * from './table'; +export * from './table-base'; +export * from './table-deprecated'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts new file mode 100644 index 0000000000000..ccb8c9d87bd6c --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts @@ -0,0 +1,300 @@ +import { CfnTable } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as kms from 'aws-cdk-lib/aws-kms'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Construct } from 'constructs'; +import { Column } from './schema'; +import { PartitionIndex, TableBase, TableBaseProps } from './table-base'; + +/** + * Encryption options for a Table. + * + * @see https://docs.aws.amazon.com/athena/latest/ug/encryption.html + */ +export enum TableEncryption { + /** + * Server side encryption (SSE) with an Amazon S3-managed key. + * + * @see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html + */ + S3_MANAGED = 'SSE-S3', + + /** + * Server-side encryption (SSE) with an AWS KMS key managed by the account owner. + * + * @see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html + */ + KMS = 'SSE-KMS', + + /** + * Server-side encryption (SSE) with an AWS KMS key managed by the KMS service. + */ + KMS_MANAGED = 'SSE-KMS-MANAGED', + + /** + * Client-side encryption (CSE) with an AWS KMS key managed by the account owner. + * + * @see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingClientSideEncryption.html + */ + CLIENT_SIDE_KMS = 'CSE-KMS' +} + +export interface S3TableProps extends TableBaseProps { + /** + * S3 bucket in which to store data. + * + * @default one is created for you + */ + readonly bucket?: s3.IBucket; + + /** + * S3 prefix under which table objects are stored. + * + * @default - No prefix. The data will be stored under the root of the bucket. + */ + readonly s3Prefix?: string; + + /** + * The kind of encryption to secure the data with. + * + * You can only provide this option if you are not explicitly passing in a bucket. + * + * If you choose `SSE-KMS`, you *can* provide an un-managed KMS key with `encryptionKey`. + * If you choose `CSE-KMS`, you *must* provide an un-managed KMS key with `encryptionKey`. + * + * @default BucketEncryption.S3_MANAGED + */ + readonly encryption?: TableEncryption; + + /** + * External KMS key to use for bucket encryption. + * + * The `encryption` property must be `SSE-KMS` or `CSE-KMS`. + * + * @default key is managed by KMS. + */ + readonly encryptionKey?: kms.IKey; +} + +/** + * A Glue table that targets a S3 dataset. + */ +export class S3Table extends TableBase { + /** + * Name of this table. + */ + public readonly tableName: string; + + /** + * ARN of this table. + */ + public readonly tableArn: string; + + /** + * S3 bucket in which the table's data resides. + */ + public readonly bucket: s3.IBucket; + + /** + * S3 Key Prefix under which this table's files are stored in S3. + */ + public readonly s3Prefix: string; + + /** + * The type of encryption enabled for the table. + */ + public readonly encryption: TableEncryption; + + /** + * The KMS key used to secure the data if `encryption` is set to `CSE-KMS` or `SSE-KMS`. Otherwise, `undefined`. + */ + public readonly encryptionKey?: kms.IKey; + + /** + * This table's partition indexes. + */ + public readonly partitionIndexes?: PartitionIndex[]; + + protected readonly tableResource: CfnTable; + + constructor(scope: Construct, id: string, props: S3TableProps) { + super(scope, id, props); + this.s3Prefix = props.s3Prefix ?? ''; + const { bucket, encryption, encryptionKey } = createBucket(this, props); + this.bucket = bucket; + this.encryption = encryption; + this.encryptionKey = encryptionKey; + + this.tableResource = new CfnTable(this, 'Table', { + catalogId: props.database.catalogId, + + databaseName: props.database.databaseName, + + tableInput: { + name: this.physicalName, + description: props.description || `${this.physicalName} generated by CDK`, + + partitionKeys: renderColumns(props.partitionKeys), + + parameters: { + 'classification': props.dataFormat.classificationString?.value, + 'has_encrypted_data': true, + 'partition_filtering.enabled': props.enablePartitionFiltering, + }, + storageDescriptor: { + location: `s3://${this.bucket.bucketName}/${this.s3Prefix}`, + compressed: this.compressed, + storedAsSubDirectories: props.storedAsSubDirectories ?? false, + columns: renderColumns(props.columns), + inputFormat: props.dataFormat.inputFormat.className, + outputFormat: props.dataFormat.outputFormat.className, + serdeInfo: { + serializationLibrary: props.dataFormat.serializationLibrary.className, + }, + parameters: props.storageParameters ? props.storageParameters.reduce((acc, param) => { + if (param.key in acc) { + throw new Error(`Duplicate storage parameter key: ${param.key}`); + } + const key = param.key; + acc[key] = param.value; + return acc; + }, {} as { [key: string]: string }) : undefined, + }, + + tableType: 'EXTERNAL_TABLE', + }, + }); + + this.tableName = this.getResourceNameAttribute(this.tableResource.ref); + this.tableArn = this.stack.formatArn({ + service: 'glue', + resource: 'table', + resourceName: `${this.database.databaseName}/${this.tableName}`, + }); + this.node.defaultChild = this.tableResource; + + // Partition index creation relies on created table. + if (props.partitionIndexes) { + this.partitionIndexes = props.partitionIndexes; + this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); + } + } + + /** + * Grant read permissions to the table and the underlying data stored in S3 to an IAM principal. + * + * @param grantee the principal + */ + public grantRead(grantee: iam.IGrantable): iam.Grant { + const ret = this.grant(grantee, readPermissions); + if (this.encryptionKey && this.encryption === TableEncryption.CLIENT_SIDE_KMS) { this.encryptionKey.grantDecrypt(grantee); } + this.bucket.grantRead(grantee, this.generateS3PrefixForGrant()); + return ret; + } + + /** + * Grant write permissions to the table and the underlying data stored in S3 to an IAM principal. + * + * @param grantee the principal + */ + public grantWrite(grantee: iam.IGrantable): iam.Grant { + const ret = this.grant(grantee, writePermissions); + if (this.encryptionKey && this.encryption === TableEncryption.CLIENT_SIDE_KMS) { this.encryptionKey.grantEncrypt(grantee); } + this.bucket.grantWrite(grantee, this.generateS3PrefixForGrant()); + return ret; + } + + /** + * Grant read and write permissions to the table and the underlying data stored in S3 to an IAM principal. + * + * @param grantee the principal + */ + public grantReadWrite(grantee: iam.IGrantable): iam.Grant { + const ret = this.grant(grantee, [...readPermissions, ...writePermissions]); + if (this.encryptionKey && this.encryption === TableEncryption.CLIENT_SIDE_KMS) { this.encryptionKey.grantEncryptDecrypt(grantee); } + this.bucket.grantReadWrite(grantee, this.generateS3PrefixForGrant()); + return ret; + } + + protected generateS3PrefixForGrant() { + return this.s3Prefix + '*'; + } +} + +const readPermissions = [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', +]; + +const writePermissions = [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', +]; + +// map TableEncryption to bucket's SSE configuration (s3.BucketEncryption) +const encryptionMappings = { + [TableEncryption.S3_MANAGED]: s3.BucketEncryption.S3_MANAGED, + [TableEncryption.KMS_MANAGED]: s3.BucketEncryption.KMS_MANAGED, + [TableEncryption.KMS]: s3.BucketEncryption.KMS, + [TableEncryption.CLIENT_SIDE_KMS]: s3.BucketEncryption.S3_MANAGED, +}; + +// create the bucket to store a table's data depending on the `encryption` and `encryptionKey` properties. +function createBucket(table: S3Table, props: S3TableProps) { + let bucket = props.bucket; + + if (bucket && (props.encryption !== undefined && props.encryption !== TableEncryption.CLIENT_SIDE_KMS)) { + throw new Error('you can not specify encryption settings if you also provide a bucket'); + } + + const encryption = props.encryption || TableEncryption.S3_MANAGED; + + let encryptionKey: kms.IKey | undefined; + if (encryption === TableEncryption.CLIENT_SIDE_KMS && props.encryptionKey === undefined) { + // CSE-KMS should behave the same as SSE-KMS - use the provided key or create one automatically + // Since Bucket only knows about SSE, we repeat the logic for CSE-KMS at the Table level. + encryptionKey = new kms.Key(table, 'Key'); + } else { + encryptionKey = props.encryptionKey; + } + + // create the bucket if none was provided + if (!bucket) { + if (encryption === TableEncryption.CLIENT_SIDE_KMS) { + bucket = new s3.Bucket(table, 'Bucket'); + } else { + bucket = new s3.Bucket(table, 'Bucket', { + encryption: encryptionMappings[encryption], + encryptionKey, + }); + encryptionKey = bucket.encryptionKey; + } + } + + return { + bucket, + encryption, + encryptionKey, + }; +} + +function renderColumns(columns?: Array) { + if (columns === undefined) { + return undefined; + } + return columns.map(column => { + return { + name: column.name, + type: column.type.inputString, + comment: column.comment, + }; + }); +} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts similarity index 53% rename from packages/@aws-cdk/aws-glue-alpha/lib/table.ts rename to packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts index f62e6af2010a4..e77875c6c75e3 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts @@ -1,7 +1,5 @@ import { CfnTable } from 'aws-cdk-lib/aws-glue'; import * as iam from 'aws-cdk-lib/aws-iam'; -import * as kms from 'aws-cdk-lib/aws-kms'; -import * as s3 from 'aws-cdk-lib/aws-s3'; import { ArnFormat, Fn, IResource, Lazy, Names, Resource, Stack } from 'aws-cdk-lib/core'; import * as cr from 'aws-cdk-lib/custom-resources'; import { AwsCustomResource } from 'aws-cdk-lib/custom-resources'; @@ -41,45 +39,12 @@ export interface ITable extends IResource { readonly tableName: string; } -/** - * Encryption options for a Table. - * - * @see https://docs.aws.amazon.com/athena/latest/ug/encryption.html - */ -export enum TableEncryption { - /** - * Server side encryption (SSE) with an Amazon S3-managed key. - * - * @see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html - */ - S3_MANAGED = 'SSE-S3', - - /** - * Server-side encryption (SSE) with an AWS KMS key managed by the account owner. - * - * @see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html - */ - KMS = 'SSE-KMS', - - /** - * Server-side encryption (SSE) with an AWS KMS key managed by the KMS service. - */ - KMS_MANAGED = 'SSE-KMS-MANAGED', - - /** - * Client-side encryption (CSE) with an AWS KMS key managed by the account owner. - * - * @see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingClientSideEncryption.html - */ - CLIENT_SIDE_KMS = 'CSE-KMS' -} - export interface TableAttributes { readonly tableArn: string; readonly tableName: string; } -export interface TableProps { +export interface TableBaseProps { /** * Name of the table. * @@ -99,20 +64,6 @@ export interface TableProps { */ readonly database: IDatabase; - /** - * S3 bucket in which to store data. - * - * @default one is created for you - */ - readonly bucket?: s3.IBucket; - - /** - * S3 prefix under which table objects are stored. - * - * @default - No prefix. The data will be stored under the root of the bucket. - */ - readonly s3Prefix?: string; - /** * Columns of the table. */ @@ -146,27 +97,6 @@ export interface TableProps { */ readonly compressed?: boolean; - /** - * The kind of encryption to secure the data with. - * - * You can only provide this option if you are not explicitly passing in a bucket. - * - * If you choose `SSE-KMS`, you *can* provide an un-managed KMS key with `encryptionKey`. - * If you choose `CSE-KMS`, you *must* provide an un-managed KMS key with `encryptionKey`. - * - * @default BucketEncryption.S3_MANAGED - */ - readonly encryption?: TableEncryption; - - /** - * External KMS key to use for bucket encryption. - * - * The `encryption` property must be `SSE-KMS` or `CSE-KMS`. - * - * @default key is managed by KMS. - */ - readonly encryptionKey?: kms.IKey; - /** * Indicates whether the table data is stored in subdirectories. * @@ -222,12 +152,12 @@ export interface TableProps { /** * A Glue table. */ -export class Table extends Resource implements ITable { +export abstract class TableBase extends Resource implements ITable { public static fromTableArn(scope: Construct, id: string, tableArn: string): ITable { const tableName = Fn.select(1, Fn.split('/', Stack.of(scope).splitArn(tableArn, ArnFormat.SLASH_RESOURCE_NAME).resourceName!)); - return Table.fromTableAttributes(scope, id, { + return TableBase.fromTableAttributes(scope, id, { tableArn, tableName, }); @@ -249,6 +179,11 @@ export class Table extends Resource implements ITable { return new Import(scope, id); } + protected abstract readonly tableResource: CfnTable; + public abstract readonly tableName: string; + public abstract readonly tableArn: string; + public abstract readonly partitionIndexes?: PartitionIndex[]; + /** * Database this table belongs to. */ @@ -259,36 +194,6 @@ export class Table extends Resource implements ITable { */ public readonly compressed: boolean; - /** - * The type of encryption enabled for the table. - */ - public readonly encryption: TableEncryption; - - /** - * The KMS key used to secure the data if `encryption` is set to `CSE-KMS` or `SSE-KMS`. Otherwise, `undefined`. - */ - public readonly encryptionKey?: kms.IKey; - - /** - * S3 bucket in which the table's data resides. - */ - public readonly bucket: s3.IBucket; - - /** - * S3 Key Prefix under which this table's files are stored in S3. - */ - public readonly s3Prefix: string; - - /** - * Name of this table. - */ - public readonly tableName: string; - - /** - * ARN of this table. - */ - public readonly tableArn: string; - /** * Format of this table's data files. */ @@ -304,11 +209,6 @@ export class Table extends Resource implements ITable { */ public readonly partitionKeys?: Column[]; - /** - * This table's partition indexes. - */ - public readonly partitionIndexes?: PartitionIndex[]; - /** * The tables' storage descriptor properties. */ @@ -321,7 +221,7 @@ export class Table extends Resource implements ITable { */ private partitionIndexCustomResources: AwsCustomResource[] = []; - constructor(scope: Construct, id: string, props: TableProps) { + constructor(scope: Construct, id: string, props: TableBaseProps) { super(scope, id, { physicalName: props.tableName ?? Lazy.string({ @@ -331,7 +231,6 @@ export class Table extends Resource implements ITable { this.database = props.database; this.dataFormat = props.dataFormat; - this.s3Prefix = props.s3Prefix ?? ''; validateSchema(props.columns, props.partitionKeys); this.columns = props.columns; @@ -339,66 +238,12 @@ export class Table extends Resource implements ITable { this.storageParameters = props.storageParameters; this.compressed = props.compressed ?? false; - const { bucket, encryption, encryptionKey } = createBucket(this, props); - this.bucket = bucket; - this.encryption = encryption; - this.encryptionKey = encryptionKey; - - const tableResource = new CfnTable(this, 'Table', { - catalogId: props.database.catalogId, - - databaseName: props.database.databaseName, - - tableInput: { - name: this.physicalName, - description: props.description || `${this.physicalName} generated by CDK`, - - partitionKeys: renderColumns(props.partitionKeys), - - parameters: { - 'classification': props.dataFormat.classificationString?.value, - 'has_encrypted_data': true, - 'partition_filtering.enabled': props.enablePartitionFiltering, - }, - storageDescriptor: { - location: `s3://${this.bucket.bucketName}/${this.s3Prefix}`, - compressed: this.compressed, - storedAsSubDirectories: props.storedAsSubDirectories ?? false, - columns: renderColumns(props.columns), - inputFormat: props.dataFormat.inputFormat.className, - outputFormat: props.dataFormat.outputFormat.className, - serdeInfo: { - serializationLibrary: props.dataFormat.serializationLibrary.className, - }, - parameters: props.storageParameters ? props.storageParameters.reduce((acc, param) => { - if (param.key in acc) { - throw new Error(`Duplicate storage parameter key: ${param.key}`); - } - const key = param.key; - acc[key] = param.value; - return acc; - }, {} as { [key: string]: string }) : undefined, - }, - - tableType: 'EXTERNAL_TABLE', - }, - }); - - this.tableName = this.getResourceNameAttribute(tableResource.ref); - this.tableArn = this.stack.formatArn({ - service: 'glue', - resource: 'table', - resourceName: `${this.database.databaseName}/${this.tableName}`, - }); - this.node.defaultChild = tableResource; - - // Partition index creation relies on created table. - if (props.partitionIndexes) { - this.partitionIndexes = props.partitionIndexes; - this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); - } } + public abstract grantRead(grantee: iam.IGrantable): iam.Grant; + public abstract grantWrite(grantee: iam.IGrantable): iam.Grant; + public abstract grantReadWrite(grantee: iam.IGrantable): iam.Grant; + /** * Add a partition index to the table. You can have a maximum of 3 partition * indexes to a table. Partition index keys must be a subset of the table's @@ -466,42 +311,6 @@ export class Table extends Resource implements ITable { } } - /** - * Grant read permissions to the table and the underlying data stored in S3 to an IAM principal. - * - * @param grantee the principal - */ - public grantRead(grantee: iam.IGrantable): iam.Grant { - const ret = this.grant(grantee, readPermissions); - if (this.encryptionKey && this.encryption === TableEncryption.CLIENT_SIDE_KMS) { this.encryptionKey.grantDecrypt(grantee); } - this.bucket.grantRead(grantee, this.getS3PrefixForGrant()); - return ret; - } - - /** - * Grant write permissions to the table and the underlying data stored in S3 to an IAM principal. - * - * @param grantee the principal - */ - public grantWrite(grantee: iam.IGrantable): iam.Grant { - const ret = this.grant(grantee, writePermissions); - if (this.encryptionKey && this.encryption === TableEncryption.CLIENT_SIDE_KMS) { this.encryptionKey.grantEncrypt(grantee); } - this.bucket.grantWrite(grantee, this.getS3PrefixForGrant()); - return ret; - } - - /** - * Grant read and write permissions to the table and the underlying data stored in S3 to an IAM principal. - * - * @param grantee the principal - */ - public grantReadWrite(grantee: iam.IGrantable): iam.Grant { - const ret = this.grant(grantee, [...readPermissions, ...writePermissions]); - if (this.encryptionKey && this.encryption === TableEncryption.CLIENT_SIDE_KMS) { this.encryptionKey.grantEncryptDecrypt(grantee); } - this.bucket.grantReadWrite(grantee, this.getS3PrefixForGrant()); - return ret; - } - /** * Grant the given identity custom permissions. */ @@ -528,10 +337,6 @@ export class Table extends Resource implements ITable { actions, }); } - - private getS3PrefixForGrant() { - return this.s3Prefix + '*'; - } } function validateSchema(columns: Column[], partitionKeys?: Column[]): void { @@ -547,81 +352,3 @@ function validateSchema(columns: Column[], partitionKeys?: Column[]): void { names.add(column.name); }); } - -// map TableEncryption to bucket's SSE configuration (s3.BucketEncryption) -const encryptionMappings = { - [TableEncryption.S3_MANAGED]: s3.BucketEncryption.S3_MANAGED, - [TableEncryption.KMS_MANAGED]: s3.BucketEncryption.KMS_MANAGED, - [TableEncryption.KMS]: s3.BucketEncryption.KMS, - [TableEncryption.CLIENT_SIDE_KMS]: s3.BucketEncryption.S3_MANAGED, -}; - -// create the bucket to store a table's data depending on the `encryption` and `encryptionKey` properties. -function createBucket(table: Table, props: TableProps) { - let bucket = props.bucket; - - if (bucket && (props.encryption !== undefined && props.encryption !== TableEncryption.CLIENT_SIDE_KMS)) { - throw new Error('you can not specify encryption settings if you also provide a bucket'); - } - - const encryption = props.encryption || TableEncryption.S3_MANAGED; - - let encryptionKey: kms.IKey | undefined; - if (encryption === TableEncryption.CLIENT_SIDE_KMS && props.encryptionKey === undefined) { - // CSE-KMS should behave the same as SSE-KMS - use the provided key or create one automatically - // Since Bucket only knows about SSE, we repeat the logic for CSE-KMS at the Table level. - encryptionKey = new kms.Key(table, 'Key'); - } else { - encryptionKey = props.encryptionKey; - } - - // create the bucket if none was provided - if (!bucket) { - if (encryption === TableEncryption.CLIENT_SIDE_KMS) { - bucket = new s3.Bucket(table, 'Bucket'); - } else { - bucket = new s3.Bucket(table, 'Bucket', { - encryption: encryptionMappings[encryption], - encryptionKey, - }); - encryptionKey = bucket.encryptionKey; - } - } - - return { - bucket, - encryption, - encryptionKey, - }; -} - -const readPermissions = [ - 'glue:BatchGetPartition', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:GetTable', - 'glue:GetTables', - 'glue:GetTableVersion', - 'glue:GetTableVersions', -]; - -const writePermissions = [ - 'glue:BatchCreatePartition', - 'glue:BatchDeletePartition', - 'glue:CreatePartition', - 'glue:DeletePartition', - 'glue:UpdatePartition', -]; - -function renderColumns(columns?: Array) { - if (columns === undefined) { - return undefined; - } - return columns.map(column => { - return { - name: column.name, - type: column.type.inputString, - comment: column.comment, - }; - }); -} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/table-deprecated.ts b/packages/@aws-cdk/aws-glue-alpha/lib/table-deprecated.ts new file mode 100644 index 0000000000000..cf3ad58b492ae --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/table-deprecated.ts @@ -0,0 +1,10 @@ +import { S3Table, S3TableProps } from './s3-table'; + +export interface TableProps extends S3TableProps {} + +/** + * A Glue table. + * + * @deprecated Use {@link S3Table} instead. + */ +export class Table extends S3Table {} diff --git a/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts new file mode 100644 index 0000000000000..012722e050ebc --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts @@ -0,0 +1,1087 @@ +import * as cdk from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { CfnTable } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as glue from '../lib'; + +const externalDataLocation = 'default_db.public.test'; +const readPermissions = [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', +]; +const writePermissions = [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', +]; + +test('unpartitioned JSON table', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + + const tableStack = new cdk.Stack(app, 'table'); + const connection = new glue.Connection(tableStack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(tableStack, 'Table', { + database, + connection, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + externalDataLocation, + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: externalDataLocation, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('partitioned JSON table', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + + const tableStack = new cdk.Stack(app, 'table'); + const connection = new glue.Connection(tableStack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + PartitionKeys: [ + { + Name: 'year', + Type: 'smallint', + }, + ], + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: externalDataLocation, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('compressed table', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Name: 'table', + Description: 'table generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: true, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: externalDataLocation, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('table.node.defaultChild', () => { + // GIVEN + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + // WHEN + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + // THEN + expect(table.node.defaultChild instanceof CfnTable).toEqual(true); +}); + +describe('add partition index', () => { + test('fails if no partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keyNames: ['part'], + })).toThrowError(/The table must have partition keys to create a partition index/); + }); + + test('fails if partition index does not match partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keyNames: ['not-part'], + })).toThrowError(/All index keys must also be partition keys/); + }); + + test('fails with index name < 1 character', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + expect(() => table.addPartitionIndex({ + indexName: '', + keyNames: ['part'], + })).toThrowError(/Index name must be between 1 and 255 characters, but got 0/); + }); + + test('fails with > 3 indexes', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const indexes: glue.PartitionIndex[] = [{ + indexName: 'ind1', + keyNames: ['part'], + }, { + indexName: 'ind2', + keyNames: ['part'], + }, { + indexName: 'ind3', + keyNames: ['part'], + }, { + indexName: 'ind4', + keyNames: ['part'], + }]; + + expect(() => new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: indexes, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + })).toThrowError('Maximum number of partition indexes allowed is 3'); + }); +}); + +describe('grants', () => { + test('custom permissions', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + table.grant(user, ['glue:UpdateTable']); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: 'glue:UpdateTable', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); + + test('read only', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + table.grantRead(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: readPermissions, + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); + + test('write only', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + table.grantWrite(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: writePermissions, + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); + + test('read and write', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + const table = new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + table.grantReadWrite(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); +}); + +describe('validate', () => { + test('at least one', () => { + expect(() => { + createTable({ + columns: [], + }); + }).toThrowError('you must specify at least one column for the table'); + }); + + test('unique column names', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }, { + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + }); + + test('unique partition keys', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'p1', + type: glue.Schema.STRING, + }, { + name: 'p1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'p1' is duplicated"); + }); + + test('column names and partition keys are all unique', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + }); + + test('unique storage descriptor parameters', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + expect(() => new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(2), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), + glue.StorageParameter.custom(glue.StorageParameters.COMPRESSION_TYPE, 'true'), + ], + connection, + externalDataLocation, + })).toThrowError('Duplicate storage parameter key: compression_type'); + }); +}); + +describe('Table.fromTableArn', () => { + test('success', () => { + // GIVEN + const stack = new cdk.Stack(); + + // WHEN + const table = glue.ExternalTable.fromTableArn(stack, 'boom', 'arn:aws:glue:us-east-1:123456789012:table/db1/tbl1'); + + // THEN + expect(table.tableArn).toEqual('arn:aws:glue:us-east-1:123456789012:table/db1/tbl1'); + expect(table.tableName).toEqual('tbl1'); + }); + + test('throws if no ARN is provided', () => { + // GIVEN + const stack = new cdk.Stack(); + + // THEN + expect(() => glue.ExternalTable.fromTableArn(stack, 'boom', '')).toThrowError(/ARNs must start with \"arn:\" and have at least 6 components: /); + }); +}); + +test.each([ + ['enabled', true], + ['disabled', false], +])('Partition filtering on table %s', (_, enabled) => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + const connection = new glue.Connection(dbStack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const tableStack = new cdk.Stack(app, 'table'); + new glue.ExternalTable(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + enablePartitionFiltering: enabled, + connection, + externalDataLocation, + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + 'classification': 'json', + 'has_encrypted_data': true, + 'partition_filtering.enabled': enabled, + }, + PartitionKeys: Match.anyValue(), + StorageDescriptor: Match.anyValue(), + TableType: Match.anyValue(), + }, + }); +}); + +test('Partition filtering on table is not defined (default behavior)', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + const connection = new glue.Connection(dbStack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + + const tableStack = new cdk.Stack(app, 'table'); + new glue.ExternalTable(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + enablePartitionFiltering: undefined, + connection, + externalDataLocation, + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + PartitionKeys: Match.anyValue(), + StorageDescriptor: Match.anyValue(), + TableType: Match.anyValue(), + }, + }); +}); + +test('can specify a physical name', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'Table', { + database, + tableName: 'my_table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Name: 'my_table', + Description: 'my_table generated by CDK', + }, + }); +}); + +test('can specify a description', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'Table', { + database, + tableName: 'my_table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + description: 'This is a test table.', + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Name: 'my_table', + Description: 'This is a test table.', + }, + }); +}); + +test('storage descriptor parameters', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(2), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), + glue.StorageParameter.custom('separatorChar', ','), + ], + connection, + externalDataLocation, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + StorageDescriptor: { + Parameters: { + 'skip.header.line.count': '2', + 'separatorChar': ',', + 'foo': 'bar', + 'compression_type': 'gzip', + }, + }, + }, + }); +}); + +test('can associate an external location with the glue table', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'Table', { + database, + tableName: 'my_table', + connection, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + externalDataLocation, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + StorageDescriptor: { + Location: externalDataLocation, + }, + Parameters: { + connectionName: { + Ref: 'Connection89AD5CF5', + }, + }, + }, + }); +}); + +function createTable(props: Pick>): void { + const stack = new cdk.Stack(); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'table', { + ...props, + database: new glue.Database(stack, 'db'), + dataFormat: glue.DataFormat.JSON, + connection, + externalDataLocation, + }); +} diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/aws-cdk-glue.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/aws-cdk-glue.assets.json new file mode 100644 index 0000000000000..61f1993482c17 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/aws-cdk-glue.assets.json @@ -0,0 +1,19 @@ +{ + "version": "34.0.0", + "files": { + "84e0753dbcfc1fd4c21499f0bad1d34eee7e6a23678af76661541de677da38e8": { + "source": { + "path": "aws-cdk-glue.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "84e0753dbcfc1fd4c21499f0bad1d34eee7e6a23678af76661541de677da38e8.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/aws-cdk-glue.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/aws-cdk-glue.template.json new file mode 100644 index 0000000000000..c6316d12d1455 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/aws-cdk-glue.template.json @@ -0,0 +1,122 @@ +{ + "Resources": { + "MyDatabase1E2517DB": { + "Type": "AWS::Glue::Database", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseInput": { + "Name": "my_database" + } + } + }, + "MyConnection5621880D": { + "Type": "AWS::Glue::Connection", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "ConnectionInput": { + "ConnectionProperties": { + "JDBC_CONNECTION_URL": "jdbc:mysql://mysql.example.com:3306", + "USERNAME": "username", + "PASSWORD": "password" + }, + "ConnectionType": "JDBC", + "Name": "my_connection" + } + } + }, + "MyTableWithCustomLocationTable43A19D42": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "custom_location_table generated by CDK", + "Name": "custom_location_table", + "Parameters": { + "classification": "json", + "has_encrypted_data": true, + "connectionName": { + "Ref": "MyConnection5621880D" + } + }, + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": "default_db.public.test", + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json new file mode 100644 index 0000000000000..711c72d940d8a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json @@ -0,0 +1,19 @@ +{ + "version": "34.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/cdk.out new file mode 100644 index 0000000000000..2313ab5436501 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"34.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/integ.json new file mode 100644 index 0000000000000..71d86f03a0888 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "34.0.0", + "testCases": { + "aws-cdk-glue-table-integ/DefaultTest": { + "stacks": [ + "aws-cdk-glue" + ], + "assertionStack": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert", + "assertionStackName": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/manifest.json new file mode 100644 index 0000000000000..20944a5cf59ef --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/manifest.json @@ -0,0 +1,123 @@ +{ + "version": "34.0.0", + "artifacts": { + "aws-cdk-glue.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-cdk-glue.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-cdk-glue": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-cdk-glue.template.json", + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/84e0753dbcfc1fd4c21499f0bad1d34eee7e6a23678af76661541de677da38e8.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-cdk-glue.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-cdk-glue.assets" + ], + "metadata": { + "/aws-cdk-glue/MyDatabase/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "MyDatabase1E2517DB" + } + ], + "/aws-cdk-glue/MyConnection/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "MyConnection5621880D" + } + ], + "/aws-cdk-glue/MyTableWithCustomLocation/Table": [ + { + "type": "aws:cdk:logicalId", + "data": "MyTableWithCustomLocationTable43A19D42" + } + ], + "/aws-cdk-glue/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-cdk-glue/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-cdk-glue" + }, + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json", + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets" + ], + "metadata": { + "/aws-cdk-glue-table-integ/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-cdk-glue-table-integ/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/tree.json new file mode 100644 index 0000000000000..9b7fae3e761ec --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.js.snapshot/tree.json @@ -0,0 +1,239 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-cdk-glue": { + "id": "aws-cdk-glue", + "path": "aws-cdk-glue", + "children": { + "MyDatabase": { + "id": "MyDatabase", + "path": "aws-cdk-glue/MyDatabase", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue/MyDatabase/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Database", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseInput": { + "name": "my_database" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnDatabase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.Database", + "version": "0.0.0" + } + }, + "MyConnection": { + "id": "MyConnection", + "path": "aws-cdk-glue/MyConnection", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue/MyConnection/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Connection", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "connectionInput": { + "connectionProperties": { + "JDBC_CONNECTION_URL": "jdbc:mysql://mysql.example.com:3306", + "USERNAME": "username", + "PASSWORD": "password" + }, + "connectionType": "JDBC", + "name": "my_connection" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnConnection", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.Connection", + "version": "0.0.0" + } + }, + "MyTableWithCustomLocation": { + "id": "MyTableWithCustomLocation", + "path": "aws-cdk-glue/MyTableWithCustomLocation", + "children": { + "Table": { + "id": "Table", + "path": "aws-cdk-glue/MyTableWithCustomLocation/Table", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Table", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "tableInput": { + "name": "custom_location_table", + "description": "custom_location_table generated by CDK", + "parameters": { + "classification": "json", + "has_encrypted_data": true, + "connectionName": { + "Ref": "MyConnection5621880D" + } + }, + "storageDescriptor": { + "location": "default_db.public.test", + "compressed": false, + "storedAsSubDirectories": false, + "columns": [ + { + "name": "col1", + "type": "string" + }, + { + "name": "col2", + "type": "string", + "comment": "col2 comment" + }, + { + "name": "col3", + "type": "array" + }, + { + "name": "col4", + "type": "map" + }, + { + "name": "col5", + "type": "struct" + } + ], + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + } + }, + "tableType": "EXTERNAL_TABLE" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnTable", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ExternalTable", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-cdk-glue/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-cdk-glue/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-cdk-glue-table-integ": { + "id": "aws-cdk-glue-table-integ", + "path": "aws-cdk-glue-table-integ", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-cdk-glue-table-integ/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-cdk-glue-table-integ/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.2.70" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.2.70" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.ts new file mode 100644 index 0000000000000..f451a671fe172 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.external-table.ts @@ -0,0 +1,57 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; + +const app = new cdk.App(); + +const stack = new cdk.Stack(app, 'aws-cdk-glue'); + +const database = new glue.Database(stack, 'MyDatabase', { + databaseName: 'my_database', +}); + +const connection = new glue.Connection(stack, 'MyConnection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:mysql://mysql.example.com:3306', + USERNAME: 'username', + PASSWORD: 'password', + }, +}); + +const columns = [{ + name: 'col1', + type: glue.Schema.STRING, +}, { + name: 'col2', + type: glue.Schema.STRING, + comment: 'col2 comment', +}, { + name: 'col3', + type: glue.Schema.array(glue.Schema.STRING), +}, { + name: 'col4', + type: glue.Schema.map(glue.Schema.STRING, glue.Schema.STRING), +}, { + name: 'col5', + type: glue.Schema.struct([{ + name: 'col1', + type: glue.Schema.STRING, + }]), +}]; + +new glue.ExternalTable(stack, 'MyTableWithCustomLocation', { + database, + connection, + tableName: 'custom_location_table', + columns, + dataFormat: glue.DataFormat.JSON, + externalDataLocation: 'default_db.public.test', +}); + +new integ.IntegTest(app, 'aws-cdk-glue-table-integ', { + testCases: [stack], +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.partition-index.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.partition-index.ts index 0982b86fa1a68..5455b6eeab618 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.partition-index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.partition-index.ts @@ -1,6 +1,6 @@ #!/usr/bin/env node -import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib'; +import * as s3 from 'aws-cdk-lib/aws-s3'; import * as glue from '../lib'; /** @@ -38,7 +38,7 @@ const partitionKeys = [{ type: glue.Schema.BIG_INT, }]; -const csvTable = new glue.Table(stack, 'CSVTable', { +const csvTable = new glue.S3Table(stack, 'CSVTable', { database, bucket, tableName: 'csv_table', @@ -56,7 +56,7 @@ csvTable.addPartitionIndex({ keyNames: ['month', 'year'], }); -const jsonTable = new glue.Table(stack, 'JSONTable', { +const jsonTable = new glue.S3Table(stack, 'JSONTable', { database, bucket, tableName: 'json_table', diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json index d714c2d54aaba..e08d0476521bf 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json @@ -1,7 +1,7 @@ { - "version": "33.0.0", + "version": "34.0.0", "files": { - "dc6c1c5f05a8e365822e6d61c41b6fc6afd58d20a2784614b906ae1587c68754": { + "7a0cca4ed21fb1c6d9b050e5fd7c8d857b13af8ef7b8cead40cd08d2e25fc892": { "source": { "path": "aws-cdk-glue.template.json", "packaging": "file" @@ -9,7 +9,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "dc6c1c5f05a8e365822e6d61c41b6fc6afd58d20a2784614b906ae1587c68754.json", + "objectKey": "7a0cca4ed21fb1c6d9b050e5fd7c8d857b13af8ef7b8cead40cd08d2e25fc892.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json index f4415b64a1333..75020f0d007ad 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json @@ -488,6 +488,70 @@ } } }, + "MyTableWithConnectionTable4BCA8495": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "connection_table generated by CDK", + "Name": "connection_table", + "Parameters": { + "classification": "json", + "has_encrypted_data": true + }, + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, "MyTableWithStorageDescriptorParametersTable1A347345": { "Type": "AWS::Glue::Table", "Properties": { @@ -559,6 +623,70 @@ } } }, + "MyDeprecatedTableAA0364FD": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "deprecated_table generated by CDK", + "Name": "deprecated_table", + "Parameters": { + "classification": "json", + "has_encrypted_data": true + }, + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, "MyUserDC45028B": { "Type": "AWS::IAM::User" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json index 670371bb340e7..711c72d940d8a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "34.0.0", "files": { "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { "source": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out index 560dae10d018f..2313ab5436501 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"33.0.0"} \ No newline at end of file +{"version":"34.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json index ab7e38c81b5c6..71d86f03a0888 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "34.0.0", "testCases": { "aws-cdk-glue-table-integ/DefaultTest": { "stacks": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json index 02bab428fcf54..d511e7b0795fb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "34.0.0", "artifacts": { "aws-cdk-glue.assets": { "type": "cdk:asset-manifest", @@ -17,7 +17,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/dc6c1c5f05a8e365822e6d61c41b6fc6afd58d20a2784614b906ae1587c68754.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/7a0cca4ed21fb1c6d9b050e5fd7c8d857b13af8ef7b8cead40cd08d2e25fc892.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ @@ -93,12 +93,24 @@ "data": "MyPartitionFilteredTable324BA27A" } ], + "/aws-cdk-glue/MyTableWithConnection/Table": [ + { + "type": "aws:cdk:logicalId", + "data": "MyTableWithConnectionTable4BCA8495" + } + ], "/aws-cdk-glue/MyTableWithStorageDescriptorParameters/Table": [ { "type": "aws:cdk:logicalId", "data": "MyTableWithStorageDescriptorParametersTable1A347345" } ], + "/aws-cdk-glue/MyDeprecatedTable/Table": [ + { + "type": "aws:cdk:logicalId", + "data": "MyDeprecatedTableAA0364FD" + } + ], "/aws-cdk-glue/MyUser/Resource": [ { "type": "aws:cdk:logicalId", @@ -135,10 +147,19 @@ "data": "CheckBootstrapVersion" } ], - "MyPartitionFilteredTableBucket6ACAA137": [ + "MyConnection5621880D": [ + { + "type": "aws:cdk:logicalId", + "data": "MyConnection5621880D", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } + ], + "MyTableWithCustomLocationTable43A19D42": [ { "type": "aws:cdk:logicalId", - "data": "MyPartitionFilteredTableBucket6ACAA137", + "data": "MyTableWithCustomLocationTable43A19D42", "trace": [ "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" ] diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json index 71c545fff8226..ca372333c5ca8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json @@ -143,7 +143,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Table", + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", "version": "0.0.0" } }, @@ -231,7 +231,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Table", + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", "version": "0.0.0" } }, @@ -319,7 +319,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Table", + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", "version": "0.0.0" } }, @@ -407,7 +407,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Table", + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", "version": "0.0.0" } }, @@ -584,7 +584,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Table", + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", "version": "0.0.0" } }, @@ -667,7 +667,89 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Table", + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", + "version": "0.0.0" + } + }, + "MyTableWithConnection": { + "id": "MyTableWithConnection", + "path": "aws-cdk-glue/MyTableWithConnection", + "children": { + "Table": { + "id": "Table", + "path": "aws-cdk-glue/MyTableWithConnection/Table", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Table", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "tableInput": { + "name": "connection_table", + "description": "connection_table generated by CDK", + "parameters": { + "classification": "json", + "has_encrypted_data": true + }, + "storageDescriptor": { + "location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "compressed": false, + "storedAsSubDirectories": false, + "columns": [ + { + "name": "col1", + "type": "string" + }, + { + "name": "col2", + "type": "string", + "comment": "col2 comment" + }, + { + "name": "col3", + "type": "array" + }, + { + "name": "col4", + "type": "map" + }, + { + "name": "col5", + "type": "struct" + } + ], + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + } + }, + "tableType": "EXTERNAL_TABLE" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnTable", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", "version": "0.0.0" } }, @@ -755,6 +837,88 @@ } } }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", + "version": "0.0.0" + } + }, + "MyDeprecatedTable": { + "id": "MyDeprecatedTable", + "path": "aws-cdk-glue/MyDeprecatedTable", + "children": { + "Table": { + "id": "Table", + "path": "aws-cdk-glue/MyDeprecatedTable/Table", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Table", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "tableInput": { + "name": "deprecated_table", + "description": "deprecated_table generated by CDK", + "parameters": { + "classification": "json", + "has_encrypted_data": true + }, + "storageDescriptor": { + "location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "compressed": false, + "storedAsSubDirectories": false, + "columns": [ + { + "name": "col1", + "type": "string" + }, + { + "name": "col2", + "type": "string", + "comment": "col2 comment" + }, + { + "name": "col3", + "type": "array" + }, + { + "name": "col4", + "type": "map" + }, + { + "name": "col5", + "type": "struct" + } + ], + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + } + }, + "tableType": "EXTERNAL_TABLE" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnTable", + "version": "0.0.0" + } + } + }, "constructInfo": { "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" @@ -1190,7 +1354,7 @@ "path": "aws-cdk-glue-table-integ/DefaultTest/Default", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.2.69" + "version": "10.2.70" } }, "DeployAssert": { @@ -1236,7 +1400,7 @@ "path": "Tree", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.2.69" + "version": "10.2.70" } } }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts index 5633ccac75c1f..7c175b5bfa3bc 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts @@ -44,7 +44,7 @@ const partitionKeys = [{ type: glue.Schema.SMALL_INT, }]; -const avroTable = new glue.Table(stack, 'AVROTable', { +const avroTable = new glue.S3Table(stack, 'AVROTable', { database, bucket, tableName: 'avro_table', @@ -53,7 +53,7 @@ const avroTable = new glue.Table(stack, 'AVROTable', { dataFormat: glue.DataFormat.AVRO, }); -const csvTable = new glue.Table(stack, 'CSVTable', { +const csvTable = new glue.S3Table(stack, 'CSVTable', { database, bucket, tableName: 'csv_table', @@ -62,7 +62,7 @@ const csvTable = new glue.Table(stack, 'CSVTable', { dataFormat: glue.DataFormat.CSV, }); -const jsonTable = new glue.Table(stack, 'JSONTable', { +const jsonTable = new glue.S3Table(stack, 'JSONTable', { database, bucket, tableName: 'json_table', @@ -71,7 +71,7 @@ const jsonTable = new glue.Table(stack, 'JSONTable', { dataFormat: glue.DataFormat.JSON, }); -const parquetTable = new glue.Table(stack, 'ParquetTable', { +const parquetTable = new glue.S3Table(stack, 'ParquetTable', { database, bucket, tableName: 'parquet_table', @@ -80,7 +80,7 @@ const parquetTable = new glue.Table(stack, 'ParquetTable', { dataFormat: glue.DataFormat.PARQUET, }); -const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { +const encryptedTable = new glue.S3Table(stack, 'MyEncryptedTable', { database, tableName: 'my_encrypted_table', columns, @@ -92,7 +92,7 @@ const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { }), }); -new glue.Table(stack, 'MyPartitionFilteredTable', { +new glue.S3Table(stack, 'MyPartitionFilteredTable', { database, bucket, tableName: 'partition_filtered_table', @@ -101,7 +101,15 @@ new glue.Table(stack, 'MyPartitionFilteredTable', { enablePartitionFiltering: true, }); -new glue.Table(stack, 'MyTableWithStorageDescriptorParameters', { +new glue.S3Table(stack, 'MyTableWithConnection', { + database, + bucket, + tableName: 'connection_table', + columns, + dataFormat: glue.DataFormat.JSON, +}); + +new glue.S3Table(stack, 'MyTableWithStorageDescriptorParameters', { database, bucket, tableName: 'table_with_storage_descriptor_parameters', @@ -116,6 +124,14 @@ new glue.Table(stack, 'MyTableWithStorageDescriptorParameters', { ], }); +new glue.Table(stack, 'MyDeprecatedTable', { + database, + bucket, + tableName: 'deprecated_table', + columns, + dataFormat: glue.DataFormat.JSON, +}); + const user = new iam.User(stack, 'MyUser'); csvTable.grantReadWrite(user); encryptedTable.grantReadWrite(user); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts new file mode 100644 index 0000000000000..c5e498ad61c41 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts @@ -0,0 +1,1105 @@ +import * as cdk from 'aws-cdk-lib'; +import { Template } from 'aws-cdk-lib/assertions'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as kms from 'aws-cdk-lib/aws-kms'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as glue from '../lib'; + +test('encrypted table: SSE-S3', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + encryption: glue.TableEncryption.S3_MANAGED, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.S3_MANAGED); + expect(table.encryptionKey).toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Name: 'table', + Description: 'table generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + SSEAlgorithm: 'AES256', + }, + }, + ], + }, + }); +}); + +test('encrypted table: SSE-KMS (implicitly created key)', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + encryption: glue.TableEncryption.KMS, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.KMS); + expect(table.encryptionKey).toEqual(table.bucket?.encryptionKey); + + Template.fromStack(stack).hasResourceProperties('AWS::KMS::Key', { + Description: 'Created by Default/Table/Bucket', + }); + + Template.fromStack(stack).hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + KMSMasterKeyID: { + 'Fn::GetAtt': [ + 'TableBucketKey3E9F984A', + 'Arn', + ], + }, + SSEAlgorithm: 'aws:kms', + }, + }, + ], + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Name: 'table', + Description: 'table generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('encrypted table: SSE-KMS (explicitly created key)', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const encryptionKey = new kms.Key(stack, 'MyKey', { + description: 'OurKey', + }); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + encryption: glue.TableEncryption.KMS, + encryptionKey, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.KMS); + expect(table.encryptionKey).toEqual(table.bucket?.encryptionKey); + expect(table.encryptionKey).not.toEqual(undefined); + + Template.fromStack(stack).hasResourceProperties('AWS::KMS::Key', { + Description: 'OurKey', + }); + + Template.fromStack(stack).hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + KMSMasterKeyID: { + 'Fn::GetAtt': [ + 'MyKey6AB29FA6', + 'Arn', + ], + }, + SSEAlgorithm: 'aws:kms', + }, + }, + ], + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Description: 'table generated by CDK', + Name: 'table', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('encrypted table: SSE-KMS_MANAGED', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + encryption: glue.TableEncryption.KMS_MANAGED, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.KMS_MANAGED); + expect(table.encryptionKey).toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(stack).hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + SSEAlgorithm: 'aws:kms', + }, + }, + ], + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Name: 'table', + Description: 'table generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('encrypted table: CSE-KMS (implicitly created key)', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.CLIENT_SIDE_KMS); + expect(table.encryptionKey).not.toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(stack).resourceCountIs('AWS::KMS::Key', 1); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Description: 'table generated by CDK', + Name: 'table', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('encrypted table: CSE-KMS (explicitly created key)', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const encryptionKey = new kms.Key(stack, 'MyKey', { + description: 'MyKey', + }); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + encryptionKey, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.CLIENT_SIDE_KMS); + expect(table.encryptionKey).not.toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(stack).hasResourceProperties('AWS::KMS::Key', { + Description: 'MyKey', + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Description: 'table generated by CDK', + Name: 'table', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('encrypted table: CSE-KMS (explicitly passed bucket and key)', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + const bucket = new s3.Bucket(stack, 'Bucket'); + const encryptionKey = new kms.Key(stack, 'MyKey', { + description: 'MyKey', + }); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + bucket, + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + encryptionKey, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.CLIENT_SIDE_KMS); + expect(table.encryptionKey).not.toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(stack).hasResourceProperties('AWS::KMS::Key', { + Description: 'MyKey', + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Description: 'table generated by CDK', + Name: 'table', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'Bucket83908E77', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('explicit s3 bucket and prefix', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const stack = new cdk.Stack(app, 'app'); + const bucket = new s3.Bucket(stack, 'ExplicitBucket'); + const database = new glue.Database(dbStack, 'Database'); + + new glue.S3Table(stack, 'Table', { + database, + bucket, + s3Prefix: 'prefix/', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Description: 'apptablecb9c398f generated by CDK', + Name: 'apptablecb9c398f', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'ExplicitBucket0AA51A3F', + }, + '/prefix/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('explicit s3 bucket and with empty prefix', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const stack = new cdk.Stack(app, 'app'); + const bucket = new s3.Bucket(stack, 'ExplicitBucket'); + const database = new glue.Database(dbStack, 'Database'); + + new glue.S3Table(stack, 'Table', { + database, + bucket, + s3Prefix: '', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Description: 'apptablecb9c398f generated by CDK', + Name: 'apptablecb9c398f', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'ExplicitBucket0AA51A3F', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +describe('grants', () => { + test('custom permissions', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + + table.grant(user, ['glue:UpdateTable']); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: 'glue:UpdateTable', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); + + test('read only', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + + table.grantRead(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); + + test('write only', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + + table.grantWrite(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + { + Action: [ + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); + + test('read and write', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + + table.grantReadWrite(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); +}); + +describe('validate', () => { + test('can not specify an explicit bucket and encryption', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: glue.TableEncryption.KMS, + }); + }).toThrowError('you can not specify encryption settings if you also provide a bucket'); + }); + + test('can explicitly pass bucket if Encryption undefined', () => { + expect(() => createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: undefined, + })).not.toThrow(); + }); + + test('can explicitly pass bucket if encryption is not set', () => { + expect(() => createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: undefined, + })).not.toThrow(); + }); + + test('can explicitly pass bucket if ClientSideKms', () => { + expect(() => createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + })).not.toThrow(); + }); +}); + +function createTable(props: Pick>): void { + const stack = new cdk.Stack(); + new glue.S3Table(stack, 'table', { + ...props, + database: new glue.Database(stack, 'db'), + dataFormat: glue.DataFormat.JSON, + }); +} diff --git a/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts new file mode 100644 index 0000000000000..d39345c288f24 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts @@ -0,0 +1,684 @@ +import * as cdk from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { CfnTable } from 'aws-cdk-lib/aws-glue'; +import * as glue from '../lib'; + +test('unpartitioned JSON table', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + + const tableStack = new cdk.Stack(app, 'table'); + const table = new glue.S3Table(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.S3_MANAGED); + + Template.fromStack(tableStack).hasResource('AWS::S3::Bucket', { + Type: 'AWS::S3::Bucket', + DeletionPolicy: 'Retain', + UpdateReplacePolicy: 'Retain', + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('partitioned JSON table', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + + const tableStack = new cdk.Stack(app, 'table'); + const table = new glue.S3Table(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryption).toEqual(glue.TableEncryption.S3_MANAGED); + expect(table.encryptionKey).toEqual(undefined); + expect(table.bucket).not.toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + PartitionKeys: [ + { + Name: 'year', + Type: 'smallint', + }, + ], + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: false, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('compressed table', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + expect(table.encryptionKey).toEqual(undefined); + expect(table.bucket?.encryptionKey).toEqual(undefined); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + Ref: 'DatabaseB269D8BB', + }, + TableInput: { + Name: 'table', + Description: 'table generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + StorageDescriptor: { + Columns: [ + { + Name: 'col', + Type: 'string', + }, + ], + Compressed: true, + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + Location: { + 'Fn::Join': [ + '', + [ + 's3://', + { + Ref: 'TableBucketDA42407C', + }, + '/', + ], + ], + }, + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + StoredAsSubDirectories: false, + }, + TableType: 'EXTERNAL_TABLE', + }, + }); +}); + +test('table.node.defaultChild', () => { + // GIVEN + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + // WHEN + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + + // THEN + expect(table.node.defaultChild instanceof CfnTable).toEqual(true); +}); + +describe('parition indexes', () => { + test('fails with > 3 indexes', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const indexes: glue.PartitionIndex[] = [{ + indexName: 'ind1', + keyNames: ['part'], + }, { + indexName: 'ind2', + keyNames: ['part'], + }, { + indexName: 'ind3', + keyNames: ['part'], + }, { + indexName: 'ind4', + keyNames: ['part'], + }]; + + expect(() => new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: indexes, + dataFormat: glue.DataFormat.JSON, + })).toThrowError('Maximum number of partition indexes allowed is 3'); + }); + + test('no indexName', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const indexes: glue.PartitionIndex[] = [{ + keyNames: ['part'], + }]; + + new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: indexes, + dataFormat: glue.DataFormat.JSON, + }); + }); + + describe('add partition index', () => { + test('fails if no partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keyNames: ['part'], + })).toThrowError(/The table must have partition keys to create a partition index/); + }); + + test('fails if partition index does not match partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keyNames: ['not-part'], + })).toThrowError(/All index keys must also be partition keys/); + }); + + test('fails with index name < 1 character', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database'); + + const table = new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: '', + keyNames: ['part'], + })).toThrowError(/Index name must be between 1 and 255 characters, but got 0/); + }); + }); +}); + +describe('validate', () => { + test('at least one column', () => { + expect(() => { + createTable({ + columns: [], + }); + }).toThrowError('you must specify at least one column for the table'); + }); + + test('unique column names', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }, { + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + }); + + test('unique partition keys', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'p1', + type: glue.Schema.STRING, + }, { + name: 'p1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'p1' is duplicated"); + }); + + test('column names and partition keys are all unique', () => { + expect(() => { + createTable({ + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + }); + + test('unique storage descriptor parameters', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + + expect(() => new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(2), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), + glue.StorageParameter.custom(glue.StorageParameters.COMPRESSION_TYPE, 'true'), + ], + })).toThrowError('Duplicate storage parameter key: compression_type'); + }); +}); + +describe('Table.fromTableArn', () => { + test('success', () => { + // GIVEN + const stack = new cdk.Stack(); + + // WHEN + const table = glue.ExternalTable.fromTableArn(stack, 'boom', 'arn:aws:glue:us-east-1:123456789012:table/db1/tbl1'); + + // THEN + expect(table.tableArn).toEqual('arn:aws:glue:us-east-1:123456789012:table/db1/tbl1'); + expect(table.tableName).toEqual('tbl1'); + }); + + test('throws if no ARN is provided', () => { + // GIVEN + const stack = new cdk.Stack(); + + // THEN + expect(() => glue.ExternalTable.fromTableArn(stack, 'boom', '')).toThrowError(/ARNs must start with \"arn:\" and have at least 6 components: /); + }); +}); + +test.each([ + ['enabled', true], + ['disabled', false], +])('Partition filtering on table %s', (_, enabled) => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + + const tableStack = new cdk.Stack(app, 'table'); + new glue.S3Table(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + enablePartitionFiltering: enabled, + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + 'classification': 'json', + 'has_encrypted_data': true, + 'partition_filtering.enabled': enabled, + }, + PartitionKeys: Match.anyValue(), + StorageDescriptor: Match.anyValue(), + TableType: Match.anyValue(), + }, + }); +}); + +test('Partition filtering on table is not defined (default behavior)', () => { + const app = new cdk.App(); + const dbStack = new cdk.Stack(app, 'db'); + const database = new glue.Database(dbStack, 'Database'); + + const tableStack = new cdk.Stack(app, 'table'); + new glue.S3Table(tableStack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + enablePartitionFiltering: undefined, + }); + + Template.fromStack(tableStack).hasResourceProperties('AWS::Glue::Table', { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: { + 'Fn::ImportValue': 'db:ExportsOutputRefDatabaseB269D8BB88F4B1C4', + }, + TableInput: { + Name: 'tabletable8fff2c2b', + Description: 'tabletable8fff2c2b generated by CDK', + Parameters: { + classification: 'json', + has_encrypted_data: true, + }, + PartitionKeys: Match.anyValue(), + StorageDescriptor: Match.anyValue(), + TableType: Match.anyValue(), + }, + }); +}); + +test('can specify a physical name', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + new glue.S3Table(stack, 'Table', { + database, + tableName: 'my_table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Name: 'my_table', + Description: 'my_table generated by CDK', + }, + }); +}); + +test('can specify a description', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + new glue.S3Table(stack, 'Table', { + database, + tableName: 'my_table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + description: 'This is a test table.', + dataFormat: glue.DataFormat.JSON, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Name: 'my_table', + Description: 'This is a test table.', + }, + }); +}); + +test('storage descriptor parameters', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(2), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), + glue.StorageParameter.custom('separatorChar', ','), + ], + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + StorageDescriptor: { + Parameters: { + 'skip.header.line.count': '2', + 'separatorChar': ',', + 'foo': 'bar', + 'compression_type': 'gzip', + }, + }, + }, + }); +}); + +test('can specify there are subdirectories', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storedAsSubDirectories: true, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + StorageDescriptor: { + StoredAsSubDirectories: true, + }, + }, + }); +}); + +test('data format without classification string', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const dataFormat = new glue.DataFormat({ + inputFormat: glue.InputFormat.TEXT, + outputFormat: glue.OutputFormat.HIVE_IGNORE_KEY_TEXT, + serializationLibrary: glue.SerializationLibrary.OPENX_JSON, + }); + new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Parameters: { + classification: Match.absent(), + }, + StorageDescriptor: { + InputFormat: 'org.apache.hadoop.mapred.TextInputFormat', + OutputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + SerdeInfo: { + SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', + }, + }, + }, + }); +}); + +function createTable(props: Pick>): void { + const stack = new cdk.Stack(); + new glue.S3Table(stack, 'table', { + ...props, + database: new glue.Database(stack, 'db'), + dataFormat: glue.DataFormat.JSON, + }); +} diff --git a/packages/@aws-cdk/aws-glue-alpha/test/table.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/table-deprecated.test.ts similarity index 99% rename from packages/@aws-cdk/aws-glue-alpha/test/table.test.ts rename to packages/@aws-cdk/aws-glue-alpha/test/table-deprecated.test.ts index 487a5c5343e14..9803d72a36240 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/table-deprecated.test.ts @@ -1654,7 +1654,7 @@ test('storage descriptor parameters', () => { }); }); -function createTable(props: Pick>): void { +function createTable(props: Pick>): void { const stack = new cdk.Stack(); new glue.Table(stack, 'table', { ...props,