Skip to content

Commit

Permalink
Merge branch 'master' into yaml-generic-constants
Browse files Browse the repository at this point in the history
  • Loading branch information
robertwb authored Jun 4, 2024
2 parents 54f1be1 + b4b1cc0 commit 5c16329
Show file tree
Hide file tree
Showing 155 changed files with 3,582 additions and 2,081 deletions.
3 changes: 3 additions & 0 deletions .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run"
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 1
}
3 changes: 2 additions & 1 deletion .github/trigger_files/beam_PostCommit_XVR_Flink.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run"
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
}
7 changes: 0 additions & 7 deletions .github/workflows/beam_PostCommit_Go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,6 @@ jobs:
uses: ./.github/actions/setup-environment-action
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker us.gcr.io
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,6 @@ jobs:
go-version: default
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker us.gcr.io
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,6 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: run PostCommit Java BigQueryEarlyRollout script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,6 @@ jobs:
java-version: ${{ matrix.java_version }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Authenticate on GCP
id: auth
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker us.gcr.io
Expand Down
8 changes: 0 additions & 8 deletions .github/workflows/beam_PreCommit_CommunityMetrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,6 @@ jobs:
run: |
sudo curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
- name: Install gcloud Kubectl
run: gcloud components install kubectl
- name: run Community Metrics PreCommit script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,6 @@ jobs:
uses: ./.github/actions/setup-environment-action
with:
java-version: default
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: run javaExamplesDataflowPrecommit script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_PreCommit_Java_Spark3_Versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,6 @@ jobs:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: Setup environment
uses: ./.github/actions/setup-environment-action
with:
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_PreCommit_Website_Stage_GCS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,6 @@ jobs:
with:
python-version: default
java-version: default
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: Run website_stageWebsite script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand Down
8 changes: 0 additions & 8 deletions .github/workflows/beam_Publish_BeamMetrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,9 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
- name: Setup credential helper
run: |
gcloud auth configure-docker
- name: Install gcloud Kubectl
run: gcloud components install kubectl
- name: run Beam Metrics deployment script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_Publish_Beam_SDK_Snapshots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,6 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.container_task }})
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker ${{ env.docker_registry }}
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_Publish_Docker_Snapshots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,6 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker ${{ env.docker_registry }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,6 @@ jobs:
python-version: ${{ matrix.python_version }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: GCloud Docker credential helper
run: |
gcloud auth configure-docker us.gcr.io
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/beam_Release_Python_NightlySnapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,6 @@ jobs:
with:
java-version: default
python-version: default
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: run Cleanup script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand Down
10 changes: 10 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
## I/Os

* Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
* Ensure that BigtableIO closes the reader streams ([#31477](https://github.com/apache/beam/issues/31477)).

## New Features / Improvements

Expand All @@ -87,6 +88,15 @@
This new implementation still supports all (immutable) List methods as before,
but some of the random access methods like get() and size() will be slower.
To use the old implementation one can use View.asList().withRandomAccess().
* SchemaTransforms implemented with TypedSchemaTransformProvider now produce a
configuration Schema with snake_case naming convention
([#31374](https://github.com/apache/beam/pull/31374)). This will make the following
cases problematic:
* Running a pre-2.57.0 remote SDK pipeline containing a 2.57.0+ Java SchemaTransform,
and vice versa:
* Running a 2.57.0+ remote SDK pipeline containing a pre-2.57.0 Java SchemaTransform
* All direct uses of Python's [SchemaAwareExternalTransform](https://github.com/apache/beam/blob/a998107a1f5c3050821eef6a5ad5843d8adb8aec/sdks/python/apache_beam/transforms/external.py#L381)
should be updated to use new snake_case parameter names.

## Deprecations

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ class BeamModulePlugin implements Plugin<Project> {
def influxdb_version = "2.19"
def httpclient_version = "4.5.13"
def httpcore_version = "4.4.14"
def jackson_version = "2.14.1"
def jackson_version = "2.15.4"
def jaxb_api_version = "2.3.3"
def jsr305_version = "3.0.2"
def everit_json_version = "1.14.2"
Expand All @@ -635,6 +635,7 @@ class BeamModulePlugin implements Plugin<Project> {
def sbe_tool_version = "1.25.1"
def singlestore_jdbc_version = "1.1.4"
def slf4j_version = "1.7.30"
def snakeyaml_version = "2.2"
def spark2_version = "2.4.8"
def spark3_version = "3.2.2"
def spotbugs_version = "4.0.6"
Expand Down Expand Up @@ -692,7 +693,9 @@ class BeamModulePlugin implements Plugin<Project> {
aws_java_sdk2_profiles : "software.amazon.awssdk:profiles:$aws_java_sdk2_version",
azure_sdk_bom : "com.azure:azure-sdk-bom:1.2.14",
bigdataoss_gcsio : "com.google.cloud.bigdataoss:gcsio:$google_cloud_bigdataoss_version",
bigdataoss_gcs_connector : "com.google.cloud.bigdataoss:gcs-connector:hadoop2-$google_cloud_bigdataoss_version",
bigdataoss_util : "com.google.cloud.bigdataoss:util:$google_cloud_bigdataoss_version",
bigdataoss_util_hadoop : "com.google.cloud.bigdataoss:util-hadoop:hadoop2-$google_cloud_bigdataoss_version",
byte_buddy : "net.bytebuddy:byte-buddy:1.14.12",
cassandra_driver_core : "com.datastax.cassandra:cassandra-driver-core:$cassandra_driver_version",
cassandra_driver_mapping : "com.datastax.cassandra:cassandra-driver-mapping:$cassandra_driver_version",
Expand All @@ -708,12 +711,12 @@ class BeamModulePlugin implements Plugin<Project> {
cdap_plugin_zendesk : "io.cdap.plugin:zendesk-plugins:1.0.0",
checker_qual : "org.checkerframework:checker-qual:$checkerframework_version",
classgraph : "io.github.classgraph:classgraph:$classgraph_version",
commons_codec : "commons-codec:commons-codec:1.15",
commons_codec : "commons-codec:commons-codec:1.17.0",
commons_collections : "commons-collections:commons-collections:3.2.2",
commons_compress : "org.apache.commons:commons-compress:1.21",
commons_compress : "org.apache.commons:commons-compress:1.26.2",
commons_csv : "org.apache.commons:commons-csv:1.8",
commons_io : "commons-io:commons-io:2.13.0",
commons_lang3 : "org.apache.commons:commons-lang3:3.9",
commons_io : "commons-io:commons-io:2.16.1",
commons_lang3 : "org.apache.commons:commons-lang3:3.14.0",
commons_logging : "commons-logging:commons-logging:1.2",
commons_math3 : "org.apache.commons:commons-math3:3.6.1",
dbcp2 : "org.apache.commons:commons-dbcp2:$dbcp2_version",
Expand Down Expand Up @@ -864,6 +867,7 @@ class BeamModulePlugin implements Plugin<Project> {
sbe_tool : "uk.co.real-logic:sbe-tool:$sbe_tool_version",
singlestore_jdbc : "com.singlestore:singlestore-jdbc-client:$singlestore_jdbc_version",
slf4j_api : "org.slf4j:slf4j-api:$slf4j_version",
snake_yaml : "org.yaml:snakeyaml:$snakeyaml_version",
slf4j_android : "org.slf4j:slf4j-android:$slf4j_version",
slf4j_ext : "org.slf4j:slf4j-ext:$slf4j_version",
slf4j_jdk14 : "org.slf4j:slf4j-jdk14:$slf4j_version",
Expand Down Expand Up @@ -903,7 +907,7 @@ class BeamModulePlugin implements Plugin<Project> {
vendored_guava_32_1_2_jre : "org.apache.beam:beam-vendor-guava-32_1_2-jre:0.1",
vendored_calcite_1_28_0 : "org.apache.beam:beam-vendor-calcite-1_28_0:0.2",
woodstox_core_asl : "org.codehaus.woodstox:woodstox-core-asl:4.4.1",
zstd_jni : "com.github.luben:zstd-jni:1.5.2-5",
zstd_jni : "com.github.luben:zstd-jni:1.5.6-3",
quickcheck_core : "com.pholser:junit-quickcheck-core:$quickcheck_version",
quickcheck_generators : "com.pholser:junit-quickcheck-generators:$quickcheck_version",
arrow_vector : "org.apache.arrow:arrow-vector:$arrow_version",
Expand Down
2 changes: 1 addition & 1 deletion examples/java/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,5 @@ task wordCount(type:JavaExec) {
mainClass = "org.apache.beam.examples.WordCount"
classpath = sourceSets.main.runtimeClasspath
systemProperties = System.getProperties()
args = ["--output=/tmp/ouput.txt"]
args = ["--output=/tmp/output.txt"]
}
2 changes: 1 addition & 1 deletion examples/notebooks/get-started/try-apache-beam-yaml.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Beam YAML has EXPERIMENTAL ability to do aggregations to group and combine values across records. The is accomplished via the `Combine` transform type.\n",
"Beam YAML has the ability to do aggregations to group and combine values across records. The is accomplished via the `Combine` transform type.\n",
"\n",
"In this example we'll aggregate our records based on the `is_adult` classification. We'll calculate an average age for each of the groups."
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public static void tearDownClass() {
Configuration.class),
"large",
Configuration.fromJsonString(
"{\"numColumns\":10,\"rowsPerSecond\":50000,\"minutes\":60,\"numRecords\":5000000,\"valueSizeBytes\":1000,\"pipelineTimeout\":120,\"runner\":\"DataflowRunner\"}",
"{\"numColumns\":10,\"rowsPerSecond\":50000,\"minutes\":60,\"numRecords\":10000000,\"valueSizeBytes\":1000,\"pipelineTimeout\":120,\"runner\":\"DataflowRunner\"}",
Configuration.class));
} catch (IOException e) {
throw new RuntimeException(e);
Expand Down Expand Up @@ -234,6 +234,9 @@ private void runTest() throws IOException {
writeIO =
BigQueryIO.<byte[]>write()
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
.withSuccessfulInsertsPropagation(false)
.withoutValidation()
.optimizedWrites()
.withAvroFormatFunction(
new AvroFormatFn(
configuration.numColumns,
Expand All @@ -246,6 +249,8 @@ private void runTest() throws IOException {
BigQueryIO.<byte[]>write()
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
.withSuccessfulInsertsPropagation(false)
.withoutValidation()
.optimizedWrites()
.withFormatFunction(new JsonFormatFn(configuration.numColumns));
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public void teardown() {
Configuration.class),
"large",
Configuration.fromJsonString(
"{\"rowsPerSecond\":50000,\"minutes\":60,\"pipelineTimeout\":120,\"numRecords\":5000000,\"valueSizeBytes\":1000,\"runner\":\"DataflowRunner\"}",
"{\"rowsPerSecond\":50000,\"minutes\":60,\"pipelineTimeout\":120,\"numRecords\":10000000,\"valueSizeBytes\":1000,\"runner\":\"DataflowRunner\"}",
Configuration.class));
} catch (IOException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public void teardown() {
Configuration.class),
"large",
Configuration.fromJsonString(
"{\"numRecords\":10000000,\"rowsPerSecond\":25000,\"minutes\":30,\"valueSizeBytes\":1000,\"pipelineTimeout\":300,\"runner\":\"DataflowRunner\"}",
"{\"numRecords\":20000000,\"rowsPerSecond\":50000,\"minutes\":60,\"valueSizeBytes\":1000,\"pipelineTimeout\":180,\"runner\":\"DataflowRunner\"}",
Configuration.class));
} catch (IOException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,19 @@ message BuilderMethod {
bytes payload = 3;
}

message Annotations {
enum Enum {
// The annotation key for the encoded configuration Row used to build a transform
CONFIG_ROW_KEY = 0 [(org.apache.beam.model.pipeline.v1.beam_constant) = "config_row"];
// The annotation key for the configuration Schema used to decode the configuration Row
CONFIG_ROW_SCHEMA_KEY = 1 [(org.apache.beam.model.pipeline.v1.beam_constant) = "config_row_schema"];
// If ths transform is a SchemaTransform, this is the annotation key for the SchemaTransform's URN
SCHEMATRANSFORM_URN_KEY = 2 [(org.apache.beam.model.pipeline.v1.beam_constant) = "schematransform_urn"];
// If the transform is a ManagedSchemaTransform, this is the annotation key for the underlying SchemaTransform's URN
MANAGED_UNDERLYING_TRANSFORM_URN_KEY = 3 [(org.apache.beam.model.pipeline.v1.beam_constant) = "managed_underlying_transform_urn"];
}
}

// Payload for a Schema-aware PTransform.
// This is a transform that is aware of its input and output PCollection schemas
// and is configured using Beam Schema-compatible parameters.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public NativeReader<?> create(

@Override
public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
return new PubsubReaderIterator(context.getWork());
return new PubsubReaderIterator(context.getWorkItem());
}

class PubsubReaderIterator extends WindmillReaderIteratorBase<T> {
Expand Down
Loading

0 comments on commit 5c16329

Please sign in to comment.