From 6118d80862cbcbd6dca7a83fc3971a37022353dc Mon Sep 17 00:00:00 2001 From: Prashanna B Date: Fri, 12 Apr 2024 08:09:57 +0000 Subject: [PATCH] PySpark and Scala Flex ETL jobs L2 constructs --- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 7 + packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 2 + .../lib/jobs/pysparkflex-etl-job.ts | 180 +++++++++ .../lib/jobs/scala-spark-flex-etl-job.ts | 203 ++++++++++ ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + .../aws-glue-job-pysparkflex-etl.assets.json | 32 ++ ...aws-glue-job-pysparkflex-etl.template.json | 204 ++++++++++ ...efaultTestDeployAssert3F3EC951.assets.json | 19 + ...aultTestDeployAssert3F3EC951.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 ++++++ .../tree.json | 373 +++++++++++++++++ .../test/integ.job-pysparkflex-etl.ts | 66 +++ ...3f8703573eb6b69528c5d52190d72579c91602.jar | Bin 0 -> 782 bytes ...ws-glue-job-scalasparkflex-etl.assets.json | 32 ++ ...-glue-job-scalasparkflex-etl.template.json | 206 ++++++++++ ...efaultTestDeployAssert8009E6FC.assets.json | 19 + ...aultTestDeployAssert8009E6FC.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 ++++++ .../tree.json | 375 ++++++++++++++++++ .../test/integ.job-scalasparkflex-etl.ts | 63 +++ .../test/job-jar/helloworld.jar | Bin 0 -> 782 bytes .../test/pysparkflex-etl-jobs.test.ts | 54 +++ .../test/scalasparkflex-etl-jobs.test.ts | 57 +++ 27 files changed, 2253 insertions(+) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 7b1cfd7896fdf..5cae06b9a8006 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -51,6 +51,12 @@ export enum WorkerType { Z_2X = 'Z.2X', } +/** + * The number of workers of a defined workerType that are allocated when a job runs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html + */ + /** * Job states emitted by Glue to CloudWatch Events. * @@ -196,6 +202,7 @@ export enum PythonVersion { * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) */ THREE_NINE = '3.9', + } /** diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..c63140567e53a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -16,9 +16,11 @@ export * from './constants'; export * from './jobs/job'; // export * from './jobs/flex-job'; export * from './jobs/pyspark-etl-job'; +export * from './jobs/pysparkflex-etl-job'; // export * from './jobs/python-shell-job'; // export * from './jobs/ray-job'; // export * from './jobs/scala-spark-etl-job'; +export * from './jobs/scala-spark-flex-etl-job'; export * from './jobs/spark-ui-utils'; // export * from './jobs/spark-etl-job'; //export * from './jobs/streaming-job'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts new file mode 100644 index 0000000000000..296d1947524e1 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts @@ -0,0 +1,180 @@ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType, ExecutionClass } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + +/** + * Flex Jobs class + * + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * —enable-continuous-cloudwatch-log + * + */ + +export interface PySparkFlexEtlJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + * @default - no extra files + */ + readonly extraPythonFiles?: string[]; + +} + +/** + * A Python Spark ETL Glue Job + */ +export class PySparkFlexEtlJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * PySparkFlexEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: PySparkFlexEtlJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + /*if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } */ + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + executionClass: ExecutionClass.FLEX, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: PySparkFlexEtlJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts new file mode 100644 index 0000000000000..dc46dec4aeaae --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -0,0 +1,203 @@ +/** + * Spark ETL Jobs class + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. + */ + +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType, ExecutionClass } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + +/** + * Flex Jobs class + * + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * —enable-continuous-cloudwatch-log + * + */ + +export interface ScalaSparkFlexEtlJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + * @default - no extra files + */ + readonly extraPythonFiles?: string[]; + + /** + * Scala class to be passed as Default Argument to the ETL job + * @default - your scala class + */ + readonly className?: string; + +} + +/** + * A Python Spark ETL Glue Job + */ +export class ScalaSparkFlexEtlJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * PySparkFlexEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: ScalaSparkFlexEtlJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + if (props.className === undefined) { + throw new Error('className must be set for Scala ETL Jobs'); + } + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + /*if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } */ + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + executionClass: ExecutionClass.FLEX, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: ScalaSparkFlexEtlJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.SCALA; + args['--class'] = props.className!; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json new file mode 100644 index 0000000000000..b5ad08acc9ab4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea": { + "source": { + "path": "aws-glue-job-pysparkflex-etl.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json new file mode 100644 index 0000000000000..af52f7c3eca39 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json @@ -0,0 +1,204 @@ +{ + "Resources": { + "IAMServiceRole61C662C4": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "IAMServiceRoleDefaultPolicy379D1A0E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "Roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "BasicPySparkFlexEtlJobC50DC250": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "ExecutionClass": "FLEX", + "GlueVersion": "3.0", + "NumberOfWorkers": 10, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "WorkerType": "G.2X" + } + }, + "OverridePySparkFlexEtlJob8EE4CFA1": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "Description": "Optional Override PySpark Flex Etl Job", + "ExecutionClass": "FLEX", + "GlueVersion": "3.0", + "Name": "Optional Override PySpark Flex Etl Job", + "NumberOfWorkers": 20, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "Tags": { + "key": "value" + }, + "Timeout": 15, + "WorkerType": "G.1X" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json new file mode 100644 index 0000000000000..d77fab393274a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..b837700f2ba0b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-pysparkflex-etl" + ], + "assertionStack": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..56ea621a7e015 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-pysparkflex-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-pysparkflex-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-pysparkflex-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-pysparkflex-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-pysparkflex-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-pysparkflex-etl.assets" + ], + "metadata": { + "/aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicPySparkFlexEtlJobC50DC250" + } + ], + "/aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverridePySparkFlexEtlJob8EE4CFA1" + } + ], + "/aws-glue-job-pysparkflex-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pysparkflex-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pysparkflex-etl" + }, + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" + ], + "metadata": { + "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..c28d10218218d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json @@ -0,0 +1,373 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-pysparkflex-etl": { + "id": "aws-glue-job-pysparkflex-etl", + "path": "aws-glue-job-pysparkflex-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicPySparkFlexEtlJob": { + "id": "BasicPySparkFlexEtlJob", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "executionClass": "FLEX", + "glueVersion": "3.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", + "version": "0.0.0" + } + }, + "OverridePySparkFlexEtlJob": { + "id": "OverridePySparkFlexEtlJob", + "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override PySpark Flex Etl Job", + "executionClass": "FLEX", + "glueVersion": "3.0", + "name": "Optional Override PySpark Flex Etl Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-pysparkflex-etl-integ-test": { + "id": "aws-glue-job-pysparkflex-etl-integ-test", + "path": "aws-glue-job-pysparkflex-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts new file mode 100644 index 0000000000000..ce3145dc0ff2b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts @@ -0,0 +1,66 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ShellJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-pysparkflex-etl'); + +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.PySparkFlexEtlJob(stack, 'BasicPySparkFlexEtlJob', { + script: script, + role: iam_role, +}); + +/*new glue.PySparkFlexEtlJob(stack, 'BasicPySparkFlexEtlJobv3', { + script: script, + role: iam_role, + glueVersion: glue.GlueVersion.V3_0, +}); */ + +new glue.PySparkFlexEtlJob(stack, 'OverridePySparkFlexEtlJob', { + script: script, + role: iam_role, + description: 'Optional Override PySpark Flex Etl Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_1X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override PySpark Flex Etl Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-pysparkflex-etl-integ-test', { + testCases: [stack], +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json new file mode 100644 index 0000000000000..22bd76fefdc70 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..694662c13ef3a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-scalasparkflex-etl" + ], + "assertionStack": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..62e439eab23c2 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-scalasparkflex-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-scalasparkflex-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-scalasparkflex-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-scalasparkflex-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/5a817debdb277ddb49716d89986520ce01e14c36661ccf39e5457466dccbf687.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-scalasparkflex-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-scalasparkflex-etl.assets" + ], + "metadata": { + "/aws-glue-job-scalasparkflex-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicScalaSparkFlexEtlJobF8FD9EFB" + } + ], + "/aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverrideScalaSparkFlexEtlJob843D93B4" + } + ], + "/aws-glue-job-scalasparkflex-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalasparkflex-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalasparkflex-etl" + }, + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets" + ], + "metadata": { + "/aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..194df8aef60b3 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json @@ -0,0 +1,375 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-scalasparkflex-etl": { + "id": "aws-glue-job-scalasparkflex-etl", + "path": "aws-glue-job-scalasparkflex-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicScalaSparkFlexEtlJob": { + "id": "BasicScalaSparkFlexEtlJob", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob", + "children": { + "Codeb58a68516710fd95a65c427a7e567405": { + "id": "Codeb58a68516710fd95a65c427a7e567405", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "" + }, + "executionClass": "FLEX", + "glueVersion": "3.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkFlexEtlJob", + "version": "0.0.0" + } + }, + "OverrideScalaSparkFlexEtlJob": { + "id": "OverrideScalaSparkFlexEtlJob", + "path": "aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override ScalaSpark Flex Etl Job", + "executionClass": "FLEX", + "glueVersion": "3.0", + "name": "Optional Override ScalaSpark Flex Etl Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkFlexEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-scalasparkflex-etl-integ-test": { + "id": "aws-glue-job-scalasparkflex-etl-integ-test", + "path": "aws-glue-job-scalasparkflex-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts new file mode 100644 index 0000000000000..0a69d3f5a517d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts @@ -0,0 +1,63 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ShellJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-scalasparkflex-etl'); + +const jar_file = glue.Code.fromAsset(path.join(__dirname, 'job-jar', 'helloworld.jar')); +const job_class ='com.example.HelloWorld'; + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.ScalaSparkFlexEtlJob(stack, 'BasicScalaSparkFlexEtlJob', { + script: jar_file, + role: iam_role, + className: job_class, +}); + +new glue.ScalaSparkFlexEtlJob(stack, 'OverrideScalaSparkFlexEtlJob', { + script: jar_file, + className: job_class, + role: iam_role, + description: 'Optional Override ScalaSpark Flex Etl Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_1X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override ScalaSpark Flex Etl Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-scalasparkflex-etl-integ-test', { + testCases: [stack], +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar b/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2 { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + }); + + describe('Create new PySpark ETL Flex Job with default parameters', () => { + + beforeEach(() => { + job = new glue.PySparkFlexEtlJob(stack, 'ImportedJob', { role, script }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + + test('ExecutionClass should be Flex', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionClass: 'FLEX', + }); + }); + }); +}); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts new file mode 100644 index 0000000000000..f9e85e9b72e4b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts @@ -0,0 +1,57 @@ +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + let className: string; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + className = 'com.example.HelloWorld'; + }); + + describe('Create new Scala Spark ETL Flex Job with default parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkFlexEtlJob(stack, 'ImportedJob', { role, script, className }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + + test('ExecutionClass should be Flex', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionClass: 'FLEX', + }); + }); + + }); +}); \ No newline at end of file