From ee1e78feb72c78b27c636b5566831e1aadc45d09 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Thu, 31 Aug 2023 05:48:11 +0000 Subject: [PATCH] Refactor legacy Job framework to enforce parameters with interface contracts instead of validations. Left legacy job and job-executable files intact (except for constants locations and refactors from classes to enums) for teams' educatoin; will remove later. Need to create pyspark-etl-job tests but pushing for PR to unblock the rest of the team. --- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 212 ++++++- packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 4 + .../aws-glue-alpha/lib/job-executable.ts | 183 +----- packages/@aws-cdk/aws-glue-alpha/lib/job.ts | 286 ++------- .../aws-glue-alpha/lib/jobs/flex-job.ts | 12 +- .../@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 546 ++++++++++++++++++ .../lib/jobs/pyspark-etl-job.ts | 179 ++++++ .../lib/jobs/python-shell-job.ts | 6 +- .../aws-glue-alpha/lib/jobs/ray-job.ts | 2 +- .../lib/jobs/scala-spark-etl-job | 20 + .../aws-glue-alpha/lib/jobs/spark-etl-job.ts | 10 - .../aws-glue-alpha/lib/jobs/spark-ui.ts | 72 +++ .../aws-glue-alpha/lib/jobs/streaming-job.ts | 11 +- .../lib/triggers/conditional-trigger.ts | 2 +- .../lib/triggers/notify-event-trigger.ts | 8 +- .../lib/triggers/on-demand-trigger.ts | 4 +- .../lib/triggers/scheduled-trigger.ts | 10 +- .../lib/{ => triggers}/trigger.ts | 2 +- .../@aws-cdk/aws-glue-alpha/test/code.test.ts | 12 +- .../test/integ.job-python-shell.ts | 4 +- .../@aws-cdk/aws-glue-alpha/test/integ.job.ts | 19 +- .../test/job-executable.test.ts | 64 +- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 115 ++-- 23 files changed, 1225 insertions(+), 558 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui.ts rename packages/@aws-cdk/aws-glue-alpha/lib/{ => triggers}/trigger.ts (89%) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 36ac8bef38776..b48f6b8f8aab4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -1,4 +1,214 @@ /** - * Enums used by Job Types/Triggers + * The type of predefined worker that is allocated when a job runs. * + * If you need to use a WorkerType that doesn't exist as a static member, you + * can instantiate a `WorkerType` object, e.g: `WorkerType.of('other type')` */ +export enum WorkerType { + // 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. + STANDARD = 'Standard', + + // 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. + G_1X = 'G.1X', + + // 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. + G_2X = 'G.2X', + + // 4 DPU (16 vCPU, 64 GB of memory, 256 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. + G_4X = 'G.4X', + + // 8 DPU (32 vCPU, 128 GB of memory, 512 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. + G_8X = 'G.8X', + + // 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs. + G_025X = 'G.025X', + + Z_2X = 'Z.2X', +} + +/** + * Job states emitted by Glue to CloudWatch Events. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types for more information. + */ +export enum JobState { + /** + * State indicating job run succeeded + */ + SUCCEEDED = 'SUCCEEDED', + + /** + * State indicating job run failed + */ + FAILED = 'FAILED', + + /** + * State indicating job run timed out + */ + TIMEOUT = 'TIMEOUT', + + /** + * State indicating job is starting + */ + STARTING = 'STARTING', + + /** + * State indicating job is running + */ + RUNNING = 'RUNNING', + + /** + * State indicating job is stopping + */ + STOPPING = 'STOPPING', + + /** + * State indicating job stopped + */ + STOPPED = 'STOPPED', +} + +/** + * The Glue CloudWatch metric type. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html + */ +export enum MetricType { + /** + * A value at a point in time. + */ + GAUGE = 'gauge', + + /** + * An aggregate number. + */ + COUNT = 'count', +} + +/** + * The ExecutionClass whether the job is run with a standard or flexible execution class. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job + * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html + */ +export enum ExecutionClass { + /** + * The flexible execution class is appropriate for time-insensitive jobs whose start + * and completion times may vary. + */ + FLEX = 'FLEX', + + /** + * The standard execution class is ideal for time-sensitive workloads that require fast job + * startup and dedicated resources. + */ + STANDARD = 'STANDARD', +} + +/** + * AWS Glue version determines the versions of Apache Spark and Python that are available to the job. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html. + */ +export enum GlueVersion { + /** + * Glue version using Spark 2.2.1 and Python 2.7 + */ + V0_9 = '0.9', + + /** + * Glue version using Spark 2.4.3, Python 2.7 and Python 3.6 + */ + V1_0 = '1.0', + + /** + * Glue version using Spark 2.4.3 and Python 3.7 + */ + V2_0 = '2.0', + + /** + * Glue version using Spark 3.1.1 and Python 3.7 + */ + V3_0 = '3.0', + + /** + * Glue version using Spark 3.3.0 and Python 3.10 + */ + V4_0 = '4.0', + +} + +/** + * Runtime language of the Glue job + */ +export enum JobLanguage { + /** + * Scala + */ + SCALA = 'scala', + + /** + * Python + */ + PYTHON = 'python', +} + +/** + * Python version + */ +export enum PythonVersion { + /** + * Python 2 (the exact version depends on GlueVersion and JobCommand used) + */ + TWO = '2', + + /** + * Python 3 (the exact version depends on GlueVersion and JobCommand used) + */ + THREE = '3', + + /** + * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) + */ + THREE_NINE = '3.9', +} + +/** + * AWS Glue runtime determines the runtime engine of the job. + * + */ +export enum Runtime { + /** + * Runtime for a Glue for Ray 2.4. + */ + RAY_TWO_FOUR = 'Ray2.4', +} + +/** + * The job type. + * + * If you need to use a JobType that doesn't exist as a static member, you + * can instantiate a `JobType` object, e.g: `JobType.of('other name')`. + */ +export enum JobType { + /** + * Command for running a Glue Spark job. + */ + ETL = 'glueetl', + + /** + * Command for running a Glue Spark streaming job. + */ + STREAMING = 'gluestreaming', + + /** + * Command for running a Glue python shell job. + */ + PYTHON_SHELL = 'pythonshell', + + /** + * Command for running a Glue Ray job. + */ + RAY = 'glueray', + +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index c6a611242c925..15afbf3a32589 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -11,3 +11,7 @@ export * from './schema'; export * from './security-configuration'; export * from './storage-parameter'; export * from './table'; +export * from './constants'; +export * from './jobs/job'; +export * from './jobs/spark-ui'; +export * from './jobs/pyspark-etl-job'; \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts index 15cb5757e88b3..da73b2e17136e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts @@ -1,164 +1,5 @@ import { Code } from './code'; - -/** - * AWS Glue version determines the versions of Apache Spark and Python that are available to the job. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html. - * - * If you need to use a GlueVersion that doesn't exist as a static member, you - * can instantiate a `GlueVersion` object, e.g: `GlueVersion.of('1.5')`. - */ -export class GlueVersion { - /** - * Glue version using Spark 2.2.1 and Python 2.7 - */ - public static readonly V0_9 = new GlueVersion('0.9'); - - /** - * Glue version using Spark 2.4.3, Python 2.7 and Python 3.6 - */ - public static readonly V1_0 = new GlueVersion('1.0'); - - /** - * Glue version using Spark 2.4.3 and Python 3.7 - */ - public static readonly V2_0 = new GlueVersion('2.0'); - - /** - * Glue version using Spark 3.1.1 and Python 3.7 - */ - public static readonly V3_0 = new GlueVersion('3.0'); - - /** - * Glue version using Spark 3.3.0 and Python 3.10 - */ - public static readonly V4_0 = new GlueVersion('4.0'); - - /** - * Custom Glue version - * @param version custom version - */ - public static of(version: string): GlueVersion { - return new GlueVersion(version); - } - - /** - * The name of this GlueVersion, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * Runtime language of the Glue job - */ -export enum JobLanguage { - /** - * Scala - */ - SCALA = 'scala', - - /** - * Python - */ - PYTHON = 'python', -} - -/** - * Python version - */ -export enum PythonVersion { - /** - * Python 2 (the exact version depends on GlueVersion and JobCommand used) - */ - TWO = '2', - - /** - * Python 3 (the exact version depends on GlueVersion and JobCommand used) - */ - THREE = '3', - - /** - * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) - */ - THREE_NINE = '3.9', -} - -/** - * AWS Glue runtime determines the runtime engine of the job. - * - */ -export class Runtime { - /** - * Runtime for a Glue for Ray 2.4. - */ - public static readonly RAY_TWO_FOUR = new Runtime('Ray2.4'); - - /** - * Custom runtime - * @param runtime custom runtime - */ - public static of(runtime: string): Runtime { - return new Runtime(runtime); - } - - /** - * The name of this Runtime. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * The job type. - * - * If you need to use a JobType that doesn't exist as a static member, you - * can instantiate a `JobType` object, e.g: `JobType.of('other name')`. - */ -export class JobType { - /** - * Command for running a Glue Spark job. - */ - public static readonly ETL = new JobType('glueetl'); - - /** - * Command for running a Glue Spark streaming job. - */ - public static readonly STREAMING = new JobType('gluestreaming'); - - /** - * Command for running a Glue python shell job. - */ - public static readonly PYTHON_SHELL = new JobType('pythonshell'); - - /** - * Command for running a Glue Ray job. - */ - public static readonly RAY = new JobType('glueray'); - - /** - * Custom type name - * @param name type name - */ - public static of(name: string): JobType { - return new JobType(name); - } - - /** - * The name of this JobType, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} +import { GlueVersion, JobType, PythonVersion, Runtime, JobLanguage } from './constants'; interface PythonExecutableProps { /** @@ -350,40 +191,40 @@ export class JobExecutable { private config: JobExecutableConfig; private constructor(config: JobExecutableConfig) { - const glueVersion = config.glueVersion.name; - const type = config.type.name; - if (JobType.PYTHON_SHELL.name === type) { + const glueVersion = config.glueVersion; + const type = config.type; + if (JobType.PYTHON_SHELL === type) { if (config.language !== JobLanguage.PYTHON) { throw new Error('Python shell requires the language to be set to Python'); } - if ([GlueVersion.V0_9.name, GlueVersion.V4_0.name].includes(glueVersion)) { + if ([GlueVersion.V0_9, GlueVersion.V4_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support Python Shell`); } } - if (JobType.RAY.name === type) { + if (JobType.RAY === type) { if (config.language !== JobLanguage.PYTHON) { throw new Error('Ray requires the language to be set to Python'); } - if ([GlueVersion.V0_9.name, GlueVersion.V1_0.name, GlueVersion.V2_0.name, GlueVersion.V3_0.name].includes(glueVersion)) { + if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0, GlueVersion.V3_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support Ray`); } } - if (config.extraJarsFirst && [GlueVersion.V0_9.name, GlueVersion.V1_0.name].includes(glueVersion)) { + if (config.extraJarsFirst && [GlueVersion.V0_9, GlueVersion.V1_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); } - if (config.pythonVersion === PythonVersion.TWO && ![GlueVersion.V0_9.name, GlueVersion.V1_0.name].includes(glueVersion)) { + if (config.pythonVersion === PythonVersion.TWO && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support PythonVersion ${config.pythonVersion}`); } if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) { throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON'); } - if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL.name && type !== JobType.RAY.name) { + if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL && type !== JobType.RAY) { throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray'); } - if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY.name) { + if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY) { throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray'); } - if (config.runtime === undefined && type === JobType.RAY.name) { + if (config.runtime === undefined && type === JobType.RAY) { throw new Error('Runtime is required for Ray jobs.'); } this.config = config; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts index 825511c1fdfab..5858055bdbcc9 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts @@ -5,155 +5,19 @@ import * as iam from 'aws-cdk-lib/aws-iam'; import * as logs from 'aws-cdk-lib/aws-logs'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib/core'; -import * as constructs from 'constructs'; -import { Code, GlueVersion, JobExecutable, JobExecutableConfig, JobType } from '.'; +import { Code } from '.'; +import { JobExecutable, JobExecutableConfig } from './job-executable'; import { IConnection } from './connection'; import { CfnJob } from 'aws-cdk-lib/aws-glue'; import { ISecurityConfiguration } from './security-configuration'; - -/** - * The type of predefined worker that is allocated when a job runs. - * - * If you need to use a WorkerType that doesn't exist as a static member, you - * can instantiate a `WorkerType` object, e.g: `WorkerType.of('other type')`. - */ -export class WorkerType { - /** - * Each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. - */ - public static readonly STANDARD = new WorkerType('Standard'); - - /** - * Each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. - */ - public static readonly G_1X = new WorkerType('G.1X'); - - /** - * Each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. - */ - public static readonly G_2X = new WorkerType('G.2X'); - - /** - * Each worker maps to 4 DPU (16 vCPU, 64 GB of memory, 256 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. - */ - public static readonly G_4X = new WorkerType('G.4X'); - - /** - * Each worker maps to 8 DPU (32 vCPU, 128 GB of memory, 512 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. - */ - public static readonly G_8X = new WorkerType('G.8X'); - - /** - * Each worker maps to 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs. - */ - public static readonly G_025X = new WorkerType('G.025X'); - - /** - * Each worker maps to 2 high-memory DPU [M-DPU] (8 vCPU, 64 GB of memory, 128 GB disk). Supported in Ray jobs. - */ - public static readonly Z_2X = new WorkerType('Z.2X'); - - /** - * Custom worker type - * @param workerType custom worker type - */ - public static of(workerType: string): WorkerType { - return new WorkerType(workerType); - } - - /** - * The name of this WorkerType, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * Job states emitted by Glue to CloudWatch Events. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types for more information. - */ -export enum JobState { - /** - * State indicating job run succeeded - */ - SUCCEEDED = 'SUCCEEDED', - - /** - * State indicating job run failed - */ - FAILED = 'FAILED', - - /** - * State indicating job run timed out - */ - TIMEOUT = 'TIMEOUT', - - /** - * State indicating job is starting - */ - STARTING = 'STARTING', - - /** - * State indicating job is running - */ - RUNNING = 'RUNNING', - - /** - * State indicating job is stopping - */ - STOPPING = 'STOPPING', - - /** - * State indicating job stopped - */ - STOPPED = 'STOPPED', -} - -/** - * The Glue CloudWatch metric type. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ -export enum MetricType { - /** - * A value at a point in time. - */ - GAUGE = 'gauge', - - /** - * An aggregate number. - */ - COUNT = 'count', -} - -/** - * The ExecutionClass whether the job is run with a standard or flexible execution class. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html - */ -export enum ExecutionClass { - /** - * The flexible execution class is appropriate for time-insensitive jobs whose start - * and completion times may vary. - */ - FLEX = 'FLEX', - - /** - * The standard execution class is ideal for time-sensitive workloads that require fast job - * startup and dedicated resources. - */ - STANDARD = 'STANDARD', -} +import { JobType, JobState, MetricType, ExecutionClass, WorkerType } from './constants'; +import { Construct } from 'constructs'; +import { SparkUIProps, SparkUILoggingLocation } from './jobs/spark-ui'; /** * Interface representing a created or an imported `Job`. */ -export interface IJob extends cdk.IResource, iam.IGrantable { +export interface IJobLegacy extends cdk.IResource, iam.IGrantable { /** * The name of the job. * @attribute @@ -228,7 +92,7 @@ export interface IJob extends cdk.IResource, iam.IGrantable { metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric; } -abstract class JobBase extends cdk.Resource implements IJob { +abstract class JobBaseLegacy extends cdk.Resource implements IJobLegacy { public abstract readonly jobArn: string; public abstract readonly jobName: string; @@ -369,61 +233,13 @@ abstract class JobBase extends cdk.Resource implements IJob { } } -/** - * Properties for enabling Spark UI monitoring feature for Spark-based Glue jobs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface SparkUIProps { - /** - * Enable Spark UI. - */ - readonly enabled: boolean - - /** - * The bucket where the Glue job stores the logs. - * - * @default a new bucket will be created. - */ - readonly bucket?: s3.IBucket; - - /** - * The path inside the bucket (objects prefix) where the Glue job stores the logs. - * Use format `'/foo/bar'` - * - * @default - the logs will be written at the root of the bucket - */ - readonly prefix?: string; -} - -/** - * The Spark UI logging location. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface SparkUILoggingLocation { - /** - * The bucket where the Glue job stores the logs. - */ - readonly bucket: s3.IBucket; - - /** - * The path inside the bucket (objects prefix) where the Glue job stores the logs. - * - * @default '/' - the logs will be written at the root of the bucket - */ - readonly prefix?: string; -} - /** * Properties for enabling Continuous Logging for Glue Jobs. * * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ -export interface ContinuousLoggingProps { +export interface ContinuousLoggingPropsLegacy { /** * Enable continouous logging. */ @@ -621,7 +437,7 @@ export interface JobProps { * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ - readonly continuousLogging?: ContinuousLoggingProps, + readonly continuousLogging?: ContinuousLoggingPropsLegacy, /** * The ExecutionClass whether the job is run with a standard or flexible execution class. @@ -637,7 +453,7 @@ export interface JobProps { /** * A Glue Job. */ -export class Job extends JobBase { +export class JobLegacy extends JobBaseLegacy { /** * Creates a Glue Job * @@ -645,8 +461,8 @@ export class Job extends JobBase { * @param id The construct's id. * @param attrs Import attributes */ - public static fromJobAttributes(scope: constructs.Construct, id: string, attrs: JobAttributes): IJob { - class Import extends JobBase { + public static fromJobAttributes(scope: Construct, id: string, attrs: JobAttributes): IJobLegacy { + class Import extends JobBaseLegacy { public readonly jobName = attrs.jobName; public readonly jobArn = jobArn(scope, attrs.jobName); public readonly grantPrincipal = attrs.role ?? new iam.UnknownPrincipal({ resource: this }); @@ -683,7 +499,7 @@ export class Job extends JobBase { */ public readonly sparkUILoggingLocation?: SparkUILoggingLocation; - constructor(scope: constructs.Construct, id: string, props: JobProps) { + constructor(scope: Construct, id: string, props: JobProps) { super(scope, id, { physicalName: props.jobName, }); @@ -696,7 +512,7 @@ export class Job extends JobBase { }); this.grantPrincipal = this.role; - const sparkUI = props.sparkUI?.enabled ? this.setupSparkUI(executable, this.role, props.sparkUI) : undefined; + const sparkUI = props.sparkUI ? this.setupSparkUI(executable, this.role, props.sparkUI) : undefined;; this.sparkUILoggingLocation = sparkUI?.location; const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = props.enableProfilingMetrics ? { '--enable-metrics': '' } : {}; @@ -709,49 +525,51 @@ export class Job extends JobBase { ...this.checkNoReservedArgs(props.defaultArguments), }; - if (props.executionClass === ExecutionClass.FLEX) { - if (executable.type !== JobType.ETL) { - throw new Error('FLEX ExecutionClass is only available for JobType.ETL jobs'); - } - if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0].includes(executable.glueVersion)) { - throw new Error('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - } - if (props.workerType && (props.workerType !== WorkerType.G_1X && props.workerType !== WorkerType.G_2X)) { - throw new Error('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - } - } - - let maxCapacity = props.maxCapacity; - if (maxCapacity !== undefined && (props.workerType && props.workerCount !== undefined)) { - throw new Error('maxCapacity cannot be used when setting workerType and workerCount'); - } - if (executable.type !== JobType.PYTHON_SHELL) { - if (maxCapacity !== undefined && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(executable.glueVersion)) { - throw new Error('maxCapacity cannot be used when GlueVersion 2.0 or later'); - } - } else { - // max capacity validation for python shell jobs (defaults to 0.0625) - maxCapacity = maxCapacity ?? 0.0625; - if (maxCapacity !== 0.0625 && maxCapacity !== 1) { - throw new Error(`maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs, received ${maxCapacity}`); - } - } - if ((!props.workerType && props.workerCount !== undefined) || (props.workerType && props.workerCount === undefined)) { - throw new Error('Both workerType and workerCount must be set'); - } + // TODO: Implement these validations as interface contracts + + // if (props.executionClass === ExecutionClass.FLEX) { + // if (executable.type !== JobType.ETL) { + // throw new Error('FLEX ExecutionClass is only available for JobType.ETL jobs'); + // } + // if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0].includes(executable.glueVersion)) { + // throw new Error('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); + // } + // if (props.workerType && (props.workerType !== WorkerType.G_1X && props.workerType !== WorkerType.G_2X)) { + // throw new Error('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); + // } + // } + + // let maxCapacity = props.maxCapacity; + // if (maxCapacity !== undefined && (props.workerType && props.workerCount !== undefined)) { + // throw new Error('maxCapacity cannot be used when setting workerType and workerCount'); + // } + // if (executable.type !== JobType.PYTHON_SHELL) { + // if (maxCapacity !== undefined && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(executable.glueVersion)) { + // throw new Error('maxCapacity cannot be used when GlueVersion 2.0 or later'); + // } + // } else { + // // max capacity validation for python shell jobs (defaults to 0.0625) + // maxCapacity = maxCapacity ?? 0.0625; + // if (maxCapacity !== 0.0625 && maxCapacity !== 1) { + // throw new Error(`maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs, received ${maxCapacity}`); + // } + // } + // if ((!props.workerType && props.workerCount !== undefined) || (props.workerType && props.workerCount === undefined)) { + // throw new Error('Both workerType and workerCount must be set'); + // } const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, role: this.role.roleArn, command: { - name: executable.type.name, + name: executable.type, scriptLocation: this.codeS3ObjectUrl(executable.script), pythonVersion: executable.pythonVersion, - runtime: executable.runtime ? executable.runtime.name : undefined, + runtime: executable.runtime ? executable.runtime : undefined, }, - glueVersion: executable.glueVersion.name, - workerType: props.workerType?.name, + glueVersion: executable.glueVersion, + workerType: props.workerType, numberOfWorkers: props.workerCount, maxCapacity: props.maxCapacity, maxRetries: props.maxRetries, @@ -857,7 +675,7 @@ export class Job extends JobBase { return prefix !== undefined ? prefix.slice(1) + '/*' : undefined; } - private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { + private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingPropsLegacy) { const args: {[key: string]: string} = { '--enable-continuous-cloudwatch-log': 'true', '--enable-continuous-log-filter': (props.quiet ?? true).toString(), @@ -906,7 +724,7 @@ function metricRule(rule: events.IRule, props?: cloudwatch.MetricOptions): cloud * @param scope * @param jobName */ -function jobArn(scope: constructs.Construct, jobName: string) : string { +function jobArn(scope: Construct, jobName: string) : string { return cdk.Stack.of(scope).formatArn({ service: 'glue', resource: 'job', diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts index f9c25e74c64ff..04cc4c7c2403f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts @@ -1,13 +1,13 @@ /** * Flex Jobs class * - * Flex jobs supports Python and Scala language. - * The flexible execution class is appropriate for non-urgent jobs such as - * pre-production jobs, testing, and one-time data loads. - * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or - * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) * - * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, * —enable-continuous-cloudwatch-log * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts new file mode 100644 index 0000000000000..2f324c08290fc --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -0,0 +1,546 @@ +import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; +import * as events from 'aws-cdk-lib/aws-events'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as logs from 'aws-cdk-lib/aws-logs'; +import * as cdk from 'aws-cdk-lib/core'; +import * as constructs from 'constructs'; +import { Code } from '..'; +import { MetricType, JobState, WorkerType, GlueVersion } from '../constants'; +import { IConnection } from '../connection'; +import { ISecurityConfiguration } from '../security-configuration'; + +/** + * Interface representing a new or an imported Glue Job + */ +export interface IJob extends cdk.IResource, iam.IGrantable { + /** + * The name of the job. + * @attribute + */ + readonly jobName: string; + + /** + * The ARN of the job. + * @attribute + */ + readonly jobArn: string; + + /** + * Defines a CloudWatch event rule triggered when something happens with this job. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onEvent(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the input jobState. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onStateChange(id: string, jobState: JobState, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the SUCCEEDED state. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onSuccess(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the FAILED state. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onFailure(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the TIMEOUT state. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onTimeout(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Create a CloudWatch metric. + * + * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. + * @param type the metric type. + * @param props metric options. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html + */ + metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric; + + /** + * Create a CloudWatch Metric indicating job success. + */ + metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric; + + /** + * Create a CloudWatch Metric indicating job failure. + */ + metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric; + + /** + * Create a CloudWatch Metric indicating job timeout. + */ + metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric; +} + +/** + * Properties for enabling Continuous Logging for Glue Jobs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ +export interface ContinuousLoggingProps { + /** + * Enable continouous logging. + */ + readonly enabled: boolean; + + /** + * Specify a custom CloudWatch log group name. + * + * @default - a log group is created with name `/aws-glue/jobs/logs-v2/`. + */ + readonly logGroup?: logs.ILogGroup; + + /** + * Specify a custom CloudWatch log stream prefix. + * + * @default - the job run ID. + */ + readonly logStreamPrefix?: string; + + /** + * Filter out non-useful Apache Spark driver/executor and Apache Hadoop YARN heartbeat log messages. + * + * @default true + */ + readonly quiet?: boolean; + + /** + * Apply the provided conversion pattern. + * + * This is a Log4j Conversion Pattern to customize driver and executor logs. + * + * @default `%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n` + */ + readonly conversionPattern?: string; +} + +/** + * A base class is needed to be able to import existing Jobs into a CDK app to + * reference as part of a larger stack or construct. JobBase has the subset + * of attribtues required to idenitfy and reference an existing Glue Job, + * as well as some CloudWatch metric conveneince functions to configure an + * event-driven flow using the job. + */ +export abstract class JobBase extends cdk.Resource implements IJob { + + public abstract readonly jobArn: string; + public abstract readonly jobName: string; + public abstract readonly grantPrincipal: iam.IPrincipal; + + /** + * Create a CloudWatch Event Rule for this Glue Job when it's in a given state + * + * @param id construct id + * @param options event options. Note that some values are overridden if provided, these are + * - eventPattern.source = ['aws.glue'] + * - eventPattern.detailType = ['Glue Job State Change', 'Glue Job Run Status'] + * - eventPattern.detail.jobName = [this.jobName] + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + public onEvent(id: string, options: events.OnEventOptions = {}): events.Rule { + const rule = new events.Rule(this, id, options); + rule.addTarget(options.target); + rule.addEventPattern({ + source: ['aws.glue'], + detailType: ['Glue Job State Change', 'Glue Job Run Status'], + detail: { + jobName: [this.jobName], + }, + }); + return rule; + } + + /** + * Create a CloudWatch Event Rule for the transition into the input jobState. + * + * @param id construct id. + * @param jobState the job state. + * @param options optional event options. + */ + public onStateChange(id: string, jobState: JobState, options: events.OnEventOptions = {}): events.Rule { + const rule = this.onEvent(id, { + description: `Rule triggered when Glue job ${this.jobName} is in ${jobState} state`, + ...options, + }); + rule.addEventPattern({ + detail: { + state: [jobState], + }, + }); + return rule; + } + + /** + * Create a CloudWatch Event Rule matching JobState.SUCCEEDED. + * + * @param id construct id. + * @param options optional event options. default is {}. + */ + public onSuccess(id: string, options: events.OnEventOptions = {}): events.Rule { + return this.onStateChange(id, JobState.SUCCEEDED, options); + } + + /** + * Return a CloudWatch Event Rule matching FAILED state. + * + * @param id construct id. + * @param options optional event options. default is {}. + */ + public onFailure(id: string, options: events.OnEventOptions = {}): events.Rule { + return this.onStateChange(id, JobState.FAILED, options); + } + + /** + * Return a CloudWatch Event Rule matching TIMEOUT state. + * + * @param id construct id. + * @param options optional event options. default is {}. + */ + public onTimeout(id: string, options: events.OnEventOptions = {}): events.Rule { + return this.onStateChange(id, JobState.TIMEOUT, options); + } + + /** + * Create a CloudWatch metric. + * + * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. + * @param type the metric type. + * @param props metric options. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html + */ + public metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return new cloudwatch.Metric({ + metricName, + namespace: 'Glue', + dimensionsMap: { + JobName: this.jobName, + JobRunId: 'ALL', + Type: type, + }, + ...props, + }).attachTo(this); + } + + /** + * Return a CloudWatch Metric indicating job success. + * + * This metric is based on the Rule returned by no-args onSuccess() call. + */ + public metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return metricRule(this.metricJobStateRule('SuccessMetricRule', JobState.SUCCEEDED), props); + } + + /** + * Return a CloudWatch Metric indicating job failure. + * + * This metric is based on the Rule returned by no-args onFailure() call. + */ + public metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return metricRule(this.metricJobStateRule('FailureMetricRule', JobState.FAILED), props); + } + + /** + * Return a CloudWatch Metric indicating job timeout. + * + * This metric is based on the Rule returned by no-args onTimeout() call. + */ + public metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return metricRule(this.metricJobStateRule('TimeoutMetricRule', JobState.TIMEOUT), props); + } + + /** + * Creates or retrieves a singleton event rule for the input job state for use with the metric JobState methods. + * + * @param id construct id. + * @param jobState the job state. + * @private + */ + private metricJobStateRule(id: string, jobState: JobState): events.Rule { + return this.node.tryFindChild(id) as events.Rule ?? this.onStateChange(id, jobState); + } + + /** + * Returns the job arn + * @param scope + * @param jobName + */ + protected buildJobArn(scope: constructs.Construct, jobName: string) : string { + return cdk.Stack.of(scope).formatArn({ + service: 'glue', + resource: 'job', + resourceName: jobName, + }); + } +} + +/** + * A subset of Job attributes are required for importing an existing job + * into a CDK project. This is ionly used when using fromJobAttributes + * to identify and reference the existing job. + */ +export interface JobImportAttributes { + /** + * The name of the job. + */ + readonly jobName: string; + + /** + * The IAM role assumed by Glue to run this job. + * + * @default - undefined + */ + readonly role?: iam.IRole; + +} + +/** + * JobProperties will be used to create new Glue Jobs using this L2 Construct. + */ +export interface JobProperties { + + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + **/ + readonly script: Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) takes during the job execution + * The role must trust the Glue service principal (glue.amazonaws.com) + * and be granted sufficient permissions. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/getting-started-access.html + **/ + readonly role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * @default - a name is automatically generated + **/ + readonly jobName?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * @default - no value + **/ + readonly description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + */ + readonly numberOrWorkers?: number; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + **/ + readonly workerType?: WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * + * An error is returned when this threshold is reached. The maximum value + * you can specify is controlled by a service limit. + * + * @default 1 + **/ + readonly maxConcurrentRuns?: number; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + * for a list of reserved parameters + * @default - no arguments + **/ + readonly defaultArguments?: { [key: string]: string }; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * Connections are used to connect to other AWS Service or resources within a VPC. + * + * @default [] - no connections are added to the job + **/ + readonly connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue performs if the job fails + * @default 0 + **/ + readonly maxRetries?: number; + + /** + * Timeout (optional) + * The maximum time that a job run can consume resources before it is + * terminated and enters TIMEOUT status. Specified in minutes. + * @default 2880 (2 days for non-streaming) + * + **/ + readonly timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * @default - no security configuration. + **/ + readonly securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resourcex + * @default {} - no tags + **/ + readonly tags?: { [key: string]: string }; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 for ETL + **/ + readonly glueVersion?: GlueVersion; + + /** + * Enables the collection of metrics for job profiling. + * + * @default - no profiling metrics emitted. + * + * @see `--enable-metrics` at https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + **/ + readonly enableProfilingMetrics? :boolean; + + /** + * Enables continuous logging with the specified props. + * + * @default - continuous logging is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + **/ + readonly continuousLogging?: ContinuousLoggingProps, + +} + +/** + * A Glue Job. + */ +export abstract class Job extends JobBase { + + /** + * Identifies an existing Glue Job from a subset of attributes that can + * be referenced from within another Stack or Construct. + * + * @param scope The scope creating construct (usually `this`) + * @param id The construct's id. + * @param attrs Attributes for the Glue Job we want to import + */ + public static fromJobAttributes(scope: constructs.Construct, id: string, attrs: JobImportAttributes): IJob { + class Import extends JobBase { + public readonly jobName = attrs.jobName; + public readonly jobArn = this.buildJobArn(scope, attrs.jobName); + public readonly grantPrincipal = attrs.role ?? new iam.UnknownPrincipal({ resource: this }); + } + + return new Import(scope, id); + } + + /** + * The IAM role Glue assumes to run this job. + */ + public readonly abstract role: iam.IRole; + + /** + * Check no usage of reserved arguments. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + protected checkNoReservedArgs(defaultArguments?: { [key: string]: string }) { + if (defaultArguments) { + const reservedArgs = new Set(['--debug', '--mode', '--JOB_NAME']); + Object.keys(defaultArguments).forEach((arg) => { + if (reservedArgs.has(arg)) { + throw new Error(`The ${arg} argument is reserved by Glue. Don't set it`); + } + }); + } + return defaultArguments; + } + + public setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { + const args: {[key: string]: string} = { + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': (props.quiet ?? true).toString(), + }; + + if (props.logGroup) { + args['--continuous-log-logGroup'] = props.logGroup.logGroupName; + props.logGroup.grantWrite(role); + } + + if (props.logStreamPrefix) { + args['--continuous-log-logStreamPrefix'] = props.logStreamPrefix; + } + if (props.conversionPattern) { + args['--continuous-log-conversionPattern'] = props.conversionPattern; + } + return args; + } + + protected codeS3ObjectUrl(code: Code) { + const s3Location = code.bind(this, this.role).s3Location; + return `s3://${s3Location.bucketName}/${s3Location.objectKey}`; + } + +} + +/** + * Create a CloudWatch Metric that's based on Glue Job events + * {@see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types} + * The metric has namespace = 'AWS/Events', metricName = 'TriggeredRules' and RuleName = rule.ruleName dimension. + * + * @param rule for use in setting RuleName dimension value + * @param props metric properties + */ +function metricRule(rule: events.IRule, props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return new cloudwatch.Metric({ + namespace: 'AWS/Events', + metricName: 'TriggeredRules', + dimensionsMap: { RuleName: rule.ruleName }, + statistic: cloudwatch.Statistic.SUM, + ...props, + }).attachTo(rule); +} + diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts new file mode 100644 index 0000000000000..5f67ff7139b52 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -0,0 +1,179 @@ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui'; + +/** + * Spark ETL Jobs class + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. + */ + +/** + * Properties for creating a Python Spark ETL job + */ +export interface PySSparkEtlJobProperties extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + */ + readonly extraPythonFiles?: string[]; + +} + +/** + * A Python Spark ETL Glue Job + */ +export class PySparkEtlJub extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * PySparkEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: PySSparkEtlJobProperties) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE_NINE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: PySSparkEtlJobProperties) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts index ff3d74a63ff9a..27d85dec7a204 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -1,8 +1,8 @@ /** * Python Shell Jobs class * - * A Python shell job runs Python scripts as a shell and supports a Python version that - * depends on the AWS Glue version you are using. + * A Python shell job runs Python scripts as a shell and supports a Python version that + * depends on the AWS Glue version you are using. * This can be used to schedule and run tasks that don't require an Apache Spark environment. * - */ + */ \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 3bcff4328ba74..fff73ebde2732 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -1,7 +1,7 @@ /** * Ray Jobs class * - * Glue ray only supports worker type Z.2X and Glue version 4.0. + * Glue ray only supports worker type Z.2X and Glue version 4.0. * Runtime will default to Ray2.3 and min workers will default to 3. * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job new file mode 100644 index 0000000000000..b0eff73d8cbc5 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job @@ -0,0 +1,20 @@ +import { Code } from '../code'; +import { JobBase, JobProperties } from './job' + +/** + * Spark ETL Jobs class + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. + */ + + /** + * Properties for creating a Python Spark ETL job + */ +export interface ScalaETLJobProps extends JobProps {} + diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts deleted file mode 100644 index 9f2d02e6d6677..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts +++ /dev/null @@ -1,10 +0,0 @@ -/** - * Spark ETL Jobs class - * - * ETL jobs supports Python and Scala language. - * ETL job type supports G1, G2, G4 and G8 worker type default as G2, which customer can override. - * It wil default to the best practice version of ETL 4.0, but allow developers to override to 3.0. - * We will also default to best practice enablement the following ETL features: - * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui.ts new file mode 100644 index 0000000000000..89af210af7f1d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui.ts @@ -0,0 +1,72 @@ +import { IBucket } from 'aws-cdk-lib/aws-s3'; +import { Token } from 'aws-cdk-lib'; +import { EOL } from 'os'; + +/** + * Properties for enabling Spark UI monitoring feature for Spark-based Glue jobs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ +export interface SparkUIProps { + + /** + * The bucket where the Glue job stores the logs. + * + * @default a new bucket will be created. + */ + readonly bucket?: IBucket; + + /** + * The path inside the bucket (objects prefix) where the Glue job stores the logs. + * Use format `'/foo/bar'` + * + * @default - the logs will be written at the root of the bucket + */ + readonly prefix?: string; +} + +/** + * The Spark UI logging location. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ +export interface SparkUILoggingLocation { + /** + * The bucket where the Glue job stores the logs. + */ + readonly bucket: IBucket; + + /** + * The path inside the bucket (objects prefix) where the Glue job stores the logs. + * + * @default '/' - the logs will be written at the root of the bucket + */ + readonly prefix?: string; +} + +export function validateSparkUiPrefix(prefix?: string): void { + if (!prefix || Token.isUnresolved(prefix)) { + // skip validation if prefix is not specified or is a token + return; + } + + const errors: string[] = []; + + if (!prefix.startsWith('/')) { + errors.push('Prefix must begin with \'/\''); + } + + if (prefix.endsWith('/')) { + errors.push('Prefix must not end with \'/\''); + } + + if (errors.length > 0) { + throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`); + } +} + +export function cleanSparkUiPrefixForGrant(prefix?: string): string | undefined { + return prefix !== undefined ? prefix.slice(1) + '/*' : undefined; +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts index 5ff520e088eb4..4ec326dacf5ad 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts @@ -1,13 +1,14 @@ /** * Streaming Jobs class * - * A Streaming job is similar to an ETL job, except that it performs ETL on data streams - * using the Apache Spark Structured Streaming framework. + * A Streaming job is similar to an ETL job, except that it performs ETL on data streams + * using the Apache Spark Structured Streaming framework. * These jobs will default to use Python 3.9. * - * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, - * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker - * and 4.0 version for streaming jobs which developers can override. + * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, + * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker + * and 4.0 version for streaming jobs which developers can override. * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. * */ + diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-trigger.ts index f19c954f1c479..487bf3b1ed291 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-trigger.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-trigger.ts @@ -1,7 +1,7 @@ /** * Conditional Trigger Class * - * Conditional triggers have a predicate and actions associated with them. + * Conditional triggers have a predicate and actions associated with them. * When the predicateCondition is true, the trigger actions will be executed. * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts index 5fa916cf4ed19..aaed7b7b623c8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts @@ -1,10 +1,10 @@ /** * Notify Event Trigger Class * - * Workflows are mandatory for this trigger type. There are two types of notify event triggers, - * batching and non-batching trigger. - * For batching triggers, developers must specify BatchSize but for non-batching BatchSize will - * be set to 1. + * Workflows are mandatory for this trigger type. There are two types of notify event triggers, + * batching and non-batching trigger. + * For batching triggers, developers must specify BatchSize but for non-batching BatchSize will + * be set to 1. * For both triggers, BatchWindow will be default to 900 seconds. * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts index b4209538a9a97..f9aa131a1f7d2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts @@ -1,8 +1,8 @@ /** * On Demand Trigger Class * - * On demand triggers can start glue jobs or crawlers. - * The trigger method will take an optional description but abstract the requirement of an + * On demand triggers can start glue jobs or crawlers. + * The trigger method will take an optional description but abstract the requirement of an * actions list using the job or crawler objects using conditional types. * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts index 1abcaf030ecd7..c34e61330a519 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts @@ -1,12 +1,12 @@ /** * Scheduled Trigger Base Class * - * Schedule triggers are a way for developers to create jobs using cron expressions. - * We’ll provide daily, weekly, and monthly convenience functions, as well as a custom function - * that will allow developers to create their own custom timing using the existing - * event Schedule object without having to build their own cron expressions. + * Schedule triggers are a way for developers to create jobs using cron expressions. + * We’ll provide daily, weekly, and monthly convenience functions, as well as a custom function + * that will allow developers to create their own custom timing using the existing + * event Schedule object without having to build their own cron expressions. * - * The trigger method will take an optional description and list of Actions + * The trigger method will take an optional description and list of Actions * which can refer to Jobs or crawlers via conditional types. * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts similarity index 89% rename from packages/@aws-cdk/aws-glue-alpha/lib/trigger.ts rename to packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts index d2dd5e22c02b3..c40c3d0efe805 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/trigger.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts @@ -1,7 +1,7 @@ /** * Workflow Trigger Base Class * - * In AWS Glue, developers can use workflows to create and visualize complex extract, + * In AWS Glue, developers can use workflows to create and visualize complex extract, * transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. * */ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts index f67d6f71526b4..ff4950c50d797 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts @@ -21,7 +21,7 @@ describe('Code', () => { test('with valid bucket name and key and bound by job sets the right path and grants the job permissions to read from it', () => { bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); script = glue.Code.fromBucket(bucket, key); - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -93,7 +93,7 @@ describe('Code', () => { }); test("with valid and existing file path and bound to job sets job's script location and permissions stack metadata", () => { - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -205,14 +205,14 @@ describe('Code', () => { }); test('used in more than 1 job in the same stack should be reused', () => { - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, script, }), }); - new glue.Job(stack, 'Job2', { + new glue.JobLegacy(stack, 'Job2', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -285,7 +285,7 @@ describe('Code', () => { }); test('throws if trying to rebind in another stack', () => { - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -294,7 +294,7 @@ describe('Code', () => { }); const differentStack = new cdk.Stack(); - expect(() => new glue.Job(differentStack, 'Job2', { + expect(() => new glue.JobLegacy(differentStack, 'Job2', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts index 4e283327f0ad4..1859c3bc85673 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts @@ -24,7 +24,7 @@ const stack = new cdk.Stack(app, 'aws-glue-job-python-shell'); const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')); -new glue.Job(stack, 'ShellJob', { +new glue.JobLegacy(stack, 'ShellJob', { jobName: 'ShellJob', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, @@ -41,7 +41,7 @@ new glue.Job(stack, 'ShellJob', { maxCapacity: 0.0625, }); -new glue.Job(stack, 'ShellJob39', { +new glue.JobLegacy(stack, 'ShellJob39', { jobName: 'ShellJob39', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V3_0, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts index aa420c52eccf4..1e09f9c6ca892 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts @@ -24,8 +24,8 @@ const stack = new cdk.Stack(app, 'aws-glue-job'); const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')); [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - const etlJob = new glue.Job(stack, 'EtlJob' + glueVersion.name, { - jobName: 'EtlJob' + glueVersion.name, + const etlJob = new glue.JobLegacy(stack, 'EtlJob' + glueVersion, { + jobName: 'EtlJob' + glueVersion, executable: glue.JobExecutable.pythonEtl({ pythonVersion: glue.PythonVersion.THREE, glueVersion, @@ -42,9 +42,6 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world. 'arg2': 'value2', '--conf': 'valueConf', }, - sparkUI: { - enabled: true, - }, continuousLogging: { enabled: true, quiet: true, @@ -56,8 +53,8 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world. }, }); etlJob.metricSuccess(); - new glue.Job(stack, 'StreamingJob' + glueVersion.name, { - jobName: 'StreamingJob' + glueVersion.name, + new glue.JobLegacy(stack, 'StreamingJob' + glueVersion, { + jobName: 'StreamingJob' + glueVersion, executable: glue.JobExecutable.pythonStreaming({ pythonVersion: glue.PythonVersion.THREE, glueVersion, @@ -75,7 +72,7 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world. }); }); -new glue.Job(stack, 'ShellJob', { +new glue.JobLegacy(stack, 'ShellJob', { jobName: 'ShellJob', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, @@ -91,7 +88,7 @@ new glue.Job(stack, 'ShellJob', { }, }); -new glue.Job(stack, 'ShellJob39', { +new glue.JobLegacy(stack, 'ShellJob39', { jobName: 'ShellJob39', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, @@ -107,7 +104,7 @@ new glue.Job(stack, 'ShellJob39', { }, }); -new glue.Job(stack, 'RayJob', { +new glue.JobLegacy(stack, 'RayJob', { jobName: 'RayJob', executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V4_0, @@ -126,7 +123,7 @@ new glue.Job(stack, 'RayJob', { }, }); -new glue.Job(stack, 'EtlJobWithFLEX', { +new glue.JobLegacy(stack, 'EtlJobWithFLEX', { jobName: 'EtlJobWithFLEX', executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts index 28c6225c542b6..cc31bb9de0785 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts @@ -3,17 +3,15 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; describe('GlueVersion', () => { - test('.V0_9 should set the name correctly', () => expect(glue.GlueVersion.V0_9.name).toEqual('0.9')); + test('.V0_9 should set the name correctly', () => expect(glue.GlueVersion.V0_9).toEqual('0.9')); - test('.V1_0 should set the name correctly', () => expect(glue.GlueVersion.V1_0.name).toEqual('1.0')); + test('.V1_0 should set the name correctly', () => expect(glue.GlueVersion.V1_0).toEqual('1.0')); - test('.V2_0 should set the name correctly', () => expect(glue.GlueVersion.V2_0.name).toEqual('2.0')); + test('.V2_0 should set the name correctly', () => expect(glue.GlueVersion.V2_0).toEqual('2.0')); - test('.V3_0 should set the name correctly', () => expect(glue.GlueVersion.V3_0.name).toEqual('3.0')); + test('.V3_0 should set the name correctly', () => expect(glue.GlueVersion.V3_0).toEqual('3.0')); - test('.V4_0 should set the name correctly', () => expect(glue.GlueVersion.V4_0.name).toEqual('4.0')); - - test('of(customVersion) should set the name correctly', () => expect(glue.GlueVersion.of('CustomVersion').name).toEqual('CustomVersion')); + test('.V4_0 should set the name correctly', () => expect(glue.GlueVersion.V4_0).toEqual('4.0')); }); describe('PythonVersion', () => { @@ -25,15 +23,13 @@ describe('PythonVersion', () => { }); describe('JobType', () => { - test('.ETL should set the name correctly', () => expect(glue.JobType.ETL.name).toEqual('glueetl')); - - test('.STREAMING should set the name correctly', () => expect(glue.JobType.STREAMING.name).toEqual('gluestreaming')); + test('.ETL should set the name correctly', () => expect(glue.JobType.ETL).toEqual('glueetl')); - test('.PYTHON_SHELL should set the name correctly', () => expect(glue.JobType.PYTHON_SHELL.name).toEqual('pythonshell')); + test('.STREAMING should set the name correctly', () => expect(glue.JobType.STREAMING).toEqual('gluestreaming')); - test('.RAY should set the name correctly', () => expect(glue.JobType.RAY.name).toEqual('glueray')); + test('.PYTHON_SHELL should set the name correctly', () => expect(glue.JobType.PYTHON_SHELL).toEqual('pythonshell')); - test('of(customName) should set the name correctly', () => expect(glue.JobType.of('CustomName').name).toEqual('CustomName')); + test('.RAY should set the name correctly', () => expect(glue.JobType.RAY).toEqual('glueray')); }); describe('JobExecutable', () => { @@ -70,7 +66,7 @@ describe('JobExecutable', () => { test('with JobType.of("pythonshell") and a language other than JobLanguage.PYTHON should throw', () => { expect(() => glue.JobExecutable.of({ glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.of('pythonshell'), + type: glue.JobType.PYTHON_SHELL, language: glue.JobLanguage.SCALA, script, })).toThrow(/Python shell requires the language to be set to Python/); @@ -79,7 +75,7 @@ describe('JobExecutable', () => { test('with JobType.of("glueray") and a language other than JobLanguage.PYTHON should throw', () => { expect(() => glue.JobExecutable.of({ glueVersion: glue.GlueVersion.V4_0, - type: glue.JobType.of('glueray'), + type: glue.JobType.RAY, language: glue.JobLanguage.SCALA, script, })).toThrow(/Ray requires the language to be set to Python/); @@ -113,19 +109,19 @@ describe('JobExecutable', () => { pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Python Shell`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Python Shell`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with JobType.PYTHON_SHELL and GlueVersion.of("${glueVersion.name}") should throw`, () => { + test(`with JobType.PYTHON_SHELL and GlueVersion.of("${glueVersion}") should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.PYTHON_SHELL, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Python Shell`); + glueVersion: glueVersion, + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Python Shell`); }); }); @@ -137,24 +133,24 @@ describe('JobExecutable', () => { pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Ray`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Ray`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0, glue.GlueVersion.V2_0, glue.GlueVersion.V3_0].forEach((glueVersion) => { - test(`with JobType.of("glueray") and GlueVersion ${glueVersion} should throw`, () => { + test(`with JobType.RAY and GlueVersion ${glueVersion} should throw`, () => { expect(() => glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), + type: glue.JobType.RAY, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Ray`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Ray`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion ${glueVersion.name} should throw`, () => { + test(`with extraJarsFirst set and GlueVersion ${glueVersion} should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.ETL, language: glue.JobLanguage.PYTHON, @@ -162,20 +158,20 @@ describe('JobExecutable', () => { extraJarsFirst: true, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support extraJarsFirst`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion.of("${glueVersion.name}") should throw`, () => { + test(`with extraJarsFirst set and GlueVersion.of("${glueVersion}") should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.ETL, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, extraJarsFirst: true, script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support extraJarsFirst`); + glueVersion: glueVersion, + })).toThrow(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); }); }); @@ -187,19 +183,19 @@ describe('JobExecutable', () => { pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support PythonVersion 2`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support PythonVersion 2`); }); }); [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with PythonVersion.TWO and GlueVersion.of("${glueVersion.name}") should throw`, () => { + test(`with PythonVersion.TWO and GlueVersion.of("${glueVersion}") should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.ETL, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support PythonVersion 2`); + glueVersion: glueVersion, + })).toThrow(`Specified GlueVersion ${glueVersion} does not support PythonVersion 2`); }); }); @@ -225,7 +221,7 @@ describe('JobExecutable', () => { test('with PythonVersion PythonVersion.THREE_NINE and JobType.of("pythonshell") should succeed', () => { expect(glue.JobExecutable.of({ - type: glue.JobType.of('pythonshell'), + type: glue.JobType.PYTHON_SHELL, glueVersion: glue.GlueVersion.V1_0, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -246,7 +242,7 @@ describe('JobExecutable', () => { test('with PythonVersion PythonVersion.THREE_NINE and JobTypeof("glueray") should succeed', () => { expect(glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), + type: glue.JobType.RAY, glueVersion: glue.GlueVersion.V4_0, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.THREE_NINE, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index cfea34c396147..16f4a60acbb7a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -9,21 +9,19 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; describe('WorkerType', () => { - test('.STANDARD should set the name correctly', () => expect(glue.WorkerType.STANDARD.name).toEqual('Standard')); + test('.STANDARD should set the name correctly', () => expect(glue.WorkerType.STANDARD).toEqual('Standard')); - test('.G_1X should set the name correctly', () => expect(glue.WorkerType.G_1X.name).toEqual('G.1X')); + test('.G_1X should set the name correctly', () => expect(glue.WorkerType.G_1X).toEqual('G.1X')); - test('.G_2X should set the name correctly', () => expect(glue.WorkerType.G_2X.name).toEqual('G.2X')); + test('.G_2X should set the name correctly', () => expect(glue.WorkerType.G_2X).toEqual('G.2X')); - test('.G_4X should set the name correctly', () => expect(glue.WorkerType.G_4X.name).toEqual('G.4X')); + test('.G_4X should set the name correctly', () => expect(glue.WorkerType.G_4X).toEqual('G.4X')); - test('.G_8X should set the name correctly', () => expect(glue.WorkerType.G_8X.name).toEqual('G.8X')); + test('.G_8X should set the name correctly', () => expect(glue.WorkerType.G_8X).toEqual('G.8X')); - test('.G_025X should set the name correctly', () => expect(glue.WorkerType.G_025X.name).toEqual('G.025X')); + test('.G_025X should set the name correctly', () => expect(glue.WorkerType.G_025X).toEqual('G.025X')); - test('.Z_2X should set the name correctly', () => expect(glue.WorkerType.Z_2X.name).toEqual('Z.2X')); - - test('of(customType) should set name correctly', () => expect(glue.WorkerType.of('CustomType').name).toEqual('CustomType')); + test('.Z_2X should set the name correctly', () => expect(glue.WorkerType.Z_2X).toEqual('Z.2X')); }); describe('Job', () => { @@ -36,7 +34,7 @@ describe('Job', () => { describe('.fromJobAttributes()', () => { test('with required attrs only', () => { - const job = glue.Job.fromJobAttributes(stack, 'ImportedJob', { jobName }); + const job = glue.JobLegacy.fromJobAttributes(stack, 'ImportedJob', { jobName }); expect(job.jobName).toEqual(jobName); expect(job.jobArn).toEqual(stack.formatArn({ @@ -49,7 +47,7 @@ describe('Job', () => { test('with all attrs', () => { const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - const job = glue.Job.fromJobAttributes(stack, 'ImportedJob', { jobName, role }); + const job = glue.JobLegacy.fromJobAttributes(stack, 'ImportedJob', { jobName, role }); expect(job.jobName).toEqual(jobName); expect(job.jobArn).toEqual(stack.formatArn({ @@ -103,7 +101,7 @@ describe('Job', () => { let extraJars: glue.Code[]; let extraFiles: glue.Code[]; let extraPythonFiles: glue.Code[]; - let job: glue.Job; + let job: glue.JobLegacy; let defaultProps: glue.JobProps; beforeEach(() => { @@ -123,7 +121,7 @@ describe('Job', () => { describe('with necessary props only', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', defaultProps); + job = new glue.JobLegacy(stack, 'Job', defaultProps); }); test('should create a role and use it with the job', () => { @@ -194,7 +192,7 @@ describe('Job', () => { test('with a custom role should use it and set it in CloudFormation', () => { const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - job = new glue.Job(stack, 'JobWithRole', { + job = new glue.JobLegacy(stack, 'JobWithRole', { ...defaultProps, role, }); @@ -206,7 +204,7 @@ describe('Job', () => { }); test('with a custom jobName should set it in CloudFormation', () => { - job = new glue.Job(stack, 'JobWithName', { + job = new glue.JobLegacy(stack, 'JobWithName', { ...defaultProps, jobName, }); @@ -219,7 +217,7 @@ describe('Job', () => { describe('enabling continuous logging with defaults', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, continuousLogging: { enabled: true }, }); @@ -240,7 +238,7 @@ describe('Job', () => { beforeEach(() => { logGroup = logs.LogGroup.fromLogGroupName(stack, 'LogGroup', 'LogGroupName'); - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, continuousLogging: { enabled: true, @@ -310,7 +308,7 @@ describe('Job', () => { describe('enabling execution class', () => { describe('enabling execution class with FLEX', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -329,7 +327,7 @@ describe('Job', () => { describe('enabling execution class with FLEX and WorkerType G_1X', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -351,7 +349,7 @@ describe('Job', () => { describe('enabling execution class with FLEX and WorkerType G_2X', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -373,7 +371,7 @@ describe('Job', () => { describe('enabling execution class with STANDARD', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -392,7 +390,7 @@ describe('Job', () => { describe('errors for execution class with FLEX', () => { test('job type except JobType.ETL should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -403,7 +401,7 @@ describe('Job', () => { }); test('with glue version 0.9 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V0_9, pythonVersion: glue.PythonVersion.THREE, @@ -414,7 +412,7 @@ describe('Job', () => { }); test('with glue version 1.0 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -425,7 +423,7 @@ describe('Job', () => { }); test('with glue version 2.0 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -436,7 +434,7 @@ describe('Job', () => { }); test('with G_025X as worker type that is neither G_1X nor G_2X should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -449,7 +447,7 @@ describe('Job', () => { }); test('with G_4X as worker type that is neither G_1X nor G_2X should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -463,12 +461,12 @@ describe('Job', () => { }); }); - describe('enabling spark ui', () => { + describe('enabling spark ui by default', () => { describe('with no bucket or path provided', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, - sparkUI: { enabled: true }, + //sparkUI: { }, }); }); @@ -556,10 +554,9 @@ describe('Job', () => { beforeEach(() => { sparkUIBucket = s3.Bucket.fromBucketName(stack, 'SparkBucketId', sparkUIBucketName); - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, sparkUI: { - enabled: true, bucket: sparkUIBucket, }, }); @@ -642,10 +639,9 @@ describe('Job', () => { 'Prefix must not end with \'/\'', ].join(EOL); it('fails if path is mis-formatted', () => { - expect(() => new glue.Job(stack, 'BadPrefixJob', { + expect(() => new glue.JobLegacy(stack, 'BadPrefixJob', { ...defaultProps, sparkUI: { - enabled: true, bucket: sparkUIBucket, prefix: badPrefix, }, @@ -654,10 +650,9 @@ describe('Job', () => { beforeEach(() => { sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, sparkUI: { - enabled: true, bucket: sparkUIBucket, prefix: prefix, }, @@ -727,7 +722,7 @@ describe('Job', () => { describe('with extended props', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, jobName, description: 'test job', @@ -800,7 +795,7 @@ describe('Job', () => { const defaultArguments: {[key: string]: string} = {}; defaultArguments[arg] = 'random value'; - expect(() => new glue.Job(stack, `Job${index}`, { + expect(() => new glue.JobLegacy(stack, `Job${index}`, { executable: glue.JobExecutable.scalaEtl({ glueVersion: glue.GlueVersion.V2_0, className, @@ -813,7 +808,7 @@ describe('Job', () => { describe('shell job', () => { test('with unsupported glue version should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V0_9, pythonVersion: glue.PythonVersion.TWO, @@ -823,20 +818,19 @@ describe('Job', () => { }); test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, script, }), - sparkUI: { enabled: true }, })).toThrow('Spark UI is not available for JobType.PYTHON_SHELL'); }); }); describe('ray job', () => { test('with unsupported glue version should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -849,7 +843,7 @@ describe('Job', () => { }); test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V4_0, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -858,12 +852,11 @@ describe('Job', () => { }), workerType: glue.WorkerType.Z_2X, workerCount: 2, - sparkUI: { enabled: true }, })).toThrow('Spark UI is not available for JobType.RAY'); }); test('without runtime should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V4_0, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -876,7 +869,7 @@ describe('Job', () => { }); test('etl job with all props should synthesize correctly', () => { - new glue.Job(stack, 'Job', { + new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -912,7 +905,7 @@ describe('Job', () => { }); test('streaming job with all props should synthesize correctly', () => { - new glue.Job(stack, 'Job', { + new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.scalaStreaming({ glueVersion: glue.GlueVersion.V2_0, extraJarsFirst: true, @@ -947,7 +940,7 @@ describe('Job', () => { describe('event rules and rule-based metrics', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.scalaEtl({ glueVersion: glue.GlueVersion.V2_0, className, @@ -981,11 +974,11 @@ describe('Job', () => { }); [ - { name: 'onSuccess()', invoke: (testJob: glue.Job) => testJob.onSuccess('SuccessRule'), state: 'SUCCEEDED' }, - { name: 'onFailure()', invoke: (testJob: glue.Job) => testJob.onFailure('FailureRule'), state: 'FAILED' }, - { name: 'onTimeout()', invoke: (testJob: glue.Job) => testJob.onTimeout('TimeoutRule'), state: 'TIMEOUT' }, + { name: 'onSuccess()', invoke: (testJob: glue.JobLegacy) => testJob.onSuccess('SuccessRule'), state: 'SUCCEEDED' }, + { name: 'onFailure()', invoke: (testJob: glue.JobLegacy) => testJob.onFailure('FailureRule'), state: 'FAILED' }, + { name: 'onTimeout()', invoke: (testJob: glue.JobLegacy) => testJob.onTimeout('TimeoutRule'), state: 'TIMEOUT' }, ].forEach((testCase) => { - test(`${testCase.name} should create a rule with correct properties`, () => { + test(`${testCase} should create a rule with correct properties`, () => { testCase.invoke(job); Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { @@ -1026,11 +1019,11 @@ describe('Job', () => { }); [ - { name: '.metricSuccess()', invoke: (testJob: glue.Job) => testJob.metricSuccess(), state: 'SUCCEEDED', ruleId: 'SuccessMetricRule' }, - { name: '.metricFailure()', invoke: (testJob: glue.Job) => testJob.metricFailure(), state: 'FAILED', ruleId: 'FailureMetricRule' }, - { name: '.metricTimeout()', invoke: (testJob: glue.Job) => testJob.metricTimeout(), state: 'TIMEOUT', ruleId: 'TimeoutMetricRule' }, + { name: '.metricSuccess()', invoke: (testJob: glue.JobLegacy) => testJob.metricSuccess(), state: 'SUCCEEDED', ruleId: 'SuccessMetricRule' }, + { name: '.metricFailure()', invoke: (testJob: glue.JobLegacy) => testJob.metricFailure(), state: 'FAILED', ruleId: 'FailureMetricRule' }, + { name: '.metricTimeout()', invoke: (testJob: glue.JobLegacy) => testJob.metricTimeout(), state: 'TIMEOUT', ruleId: 'TimeoutMetricRule' }, ].forEach((testCase) => { - test(`${testCase.name} should create the expected singleton event rule and corresponding metric`, () => { + test(`${testCase} should create the expected singleton event rule and corresponding metric`, () => { const metric = testCase.invoke(job); testCase.invoke(job); @@ -1119,7 +1112,7 @@ describe('Job', () => { describe('validation for maxCapacity and workerType', () => { test('maxCapacity with workerType and workerCount should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -1132,7 +1125,7 @@ describe('Job', () => { }); test('maxCapacity with GlueVersion 2.0 or later should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -1143,7 +1136,7 @@ describe('Job', () => { }); test('maxCapacity with Python Shell jobs validation', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -1154,7 +1147,7 @@ describe('Job', () => { }); test('workerType without workerCount should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -1165,7 +1158,7 @@ describe('Job', () => { }); test('workerCount without workerType should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE,