diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..c6f2b2c7 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,33 @@ +name: CI Build +on: + push: + branches: [ '**' ] + pull_request: + branches: [ '**' ] +jobs: + build: + name: Maven Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup Java 8 + uses: actions/setup-java@v3 + with: + java-version: 8 + distribution: 'temurin' + cache: 'maven' + + - name: Run the Maven verify phase + run: mvn clean verify -DappId=${{ secrets.APP_ID }} -Dcluster=${{ secrets.CLUSTER }} -DappKey=${{ secrets.APP_SECRET }} -Dauthority=${{ secrets.TENANT_ID }} -Ddatabase=${{ secrets.DATABASE }} -Dingest=${{ secrets.INGEST }} + env: + appId: ${{ secrets.APP_ID }} + authority: ${{ secrets.TENANT_ID }} + appKey: ${{ secrets.APP_SECRET }} + database: ${{ secrets.DATABASE }} + cluster: ${{ secrets.CLUSTER }} + ingest: ${{ secrets.INGEST }} + + - name: Publish Test Report + if: success() || failure() + uses: scacap/action-surefire-report@v1 + diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..2319ea0b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,49 @@ +name: Release +on: + workflow_dispatch: + release: + types: [released] +jobs: + release: + runs-on: ubuntu-latest + strategy: + matrix: + java: [ '8' ] + name: Java ${{ matrix.java }} + steps: + - uses: actions/checkout@v3 + - name: Setup Java ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + cache: 'maven' + - name: Run the Maven package phase + run: mvn clean package -DappId=${{ secrets.APP_ID }} -Dcluster=${{ secrets.CLUSTER }} -DappKey=${{ secrets.APP_SECRET }} -Dauthority=${{ secrets.TENANT_ID }} -Ddatabase=${{ secrets.DATABASE }} -Dingest=${{ secrets.INGEST }} + env: + appId: ${{ secrets.APP_ID }} + authority: ${{ secrets.TENANT_ID }} + appKey: ${{ secrets.APP_SECRET }} + database: ${{ secrets.DATABASE }} + cluster: ${{ secrets.CLUSTER }} + ingest: ${{ secrets.INGEST }} + + - name: Get versions + id: get_version + run: | + echo ::set-output name=VERSION::$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + - name: Move artifacts to staging + run: | + version=${{ steps.get_version.outputs.VERSION }} + mkdir staging + cp target/*.jar staging + cp target/components/packages/microsoftcorporation-kafka-sink-azure-kusto-${{ steps.get_version.outputs.VERSION }}.zip staging + - name: Github Release + uses: anton-yurchenko/git-release@v5.0.1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + args: | + staging/kafka-sink-azure-kusto-${{ steps.get_version.outputs.VERSION }}-jar-with-dependencies.jar + staging/microsoftcorporation-kafka-sink-azure-kusto-${{ steps.get_version.outputs.VERSION }}.zip + continue-on-error: true \ No newline at end of file diff --git a/README.md b/README.md index 0d42f128..3f5c0912 100644 --- a/README.md +++ b/README.md @@ -6,23 +6,67 @@ database PaaS offering. ## Topics covered -[1. Overview](README.md#1-overview)
-[2. Integration design](README.md#2-integration-design)
-[3. Features supported](README.md#3-features-supported)
-[4. Connect worker properties](README.md#4-connect-worker-properties)
-[5. Sink properties](README.md#5-sink-properties)
-[6. Roadmap](README.md#6-roadmap)
-[7. Deployment overview](README.md#7-deployment-overview)
-[8. Connector download/build from source](README.md#8-connector-downloadbuild-from-source)
-[9. Test drive the connector - standalone mode](README.md#9-test-drive-the-connector---standalone-mode)
-[10. Distributed deployment details](README.md#10-distributed-deployment-details)
-[11. Test drive the connector - distributed mode](README.md#11-test-drive-the-connector---distributed-mode)
-[12. Apache Kafka version - Docker Kafka Connect base image version - Confluent Helm chart version related](README.md#12-apache-kafka-version---docker-kafka-connect-base-image-version---confluent-helm-chart-version-related)
-[13. Other](README.md#13-other)
-[14. Need Support?](README.md#14-need-support)
-[15. Major version specifics](README.md#15-major-version-specifics)
-[16. Release History](README.md#16-release-history)
-[17. Contributing](README.md#17-contributing)
+ +* [Azure Data Explorer Kafka Connect Kusto Sink Connector](#azure-data-explorer-kafka-connect-kusto-sink-connector) + * [Topics covered](#topics-covered) + * [1. Overview](#1-overview) + * [2. Integration design](#2-integration-design) + * [3. Features supported](#3-features-supported) + * [3.1. Validation of required properties on start-up and fail-fast](#31-validation-of-required-properties-on-start-up-and-fail-fast) + * [3.2. Configurable behavior on errors](#32-configurable-behavior-on-errors) + * [3.3. Configurable retries](#33-configurable-retries) + * [3.4. Serialization formats](#34-serialization-formats) + * [3.5. Schema registry](#35-schema-registry) + * [3.6. Schema evolution](#36-schema-evolution) + * [3.7. Kafka Connect converters](#37-kafka-connect-converters) + * [3.8. Kafka Connect transformers](#38-kafka-connect-transformers) + * [3.9. Topics to tables mapping](#39-topics-to-tables-mapping) + * [3.10. Kafka Connect Dead Letter Queue](#310-kafka-connect-dead-letter-queue) + * [3.11. Miscellaneous Dead Letter Queue](#311-miscellaneous-dead-letter-queue) + * [3.12. Delivery semantics](#312-delivery-semantics) + * [3.13. Overrides](#313-overrides) + * [3.14. Parallelism](#314-parallelism) + * [3.15. Authentication & Authorization to Azure Data Explorer](#315-authentication--authorization-to-azure-data-explorer) + * [3.16. Security related](#316-security-related) + * [4. Connect worker properties](#4-connect-worker-properties) + * [4.1. Confluent Cloud](#41-confluent-cloud) + * [4.2. HDInsight Kafka with Enterprise Security Package](#42-hdinsight-kafka-with-enterprise-security-package) + * [5. Sink properties](#5-sink-properties) + * [6. Streaming ingestion](#6-streaming-ingestion) + * [7. Roadmap](#7-roadmap) + * [8. Deployment overview](#8-deployment-overview) + * [8.1. Standalone Kafka Connect deployment mode](#81-standalone-kafka-connect-deployment-mode) + * [8.2. Distributed Kafka Connect deployment mode](#82-distributed-kafka-connect-deployment-mode) + * [9. Connector download/build from source](#9-connector-downloadbuild-from-source) + * [9.1. Download a ready-to-use uber jar from our Github repo releases listing](#91-download-a-ready-to-use-uber-jar-from-our-github-repo-releases-listing) + * [9.2. Download the connector from Confluent Connect Hub](#92-download-the-connector-from-confluent-connect-hub) + * [9.3. Build uber jar from source](#93-build-uber-jar-from-source) + * [10. Test drive the connector - standalone mode](#10-test-drive-the-connector---standalone-mode) + * [10.1. Self-contained Dockerized setup](#101-self-contained-dockerized-setup) + * [10.2. HDInsight Kafka, on an edge node](#102-hdinsight-kafka-on-an-edge-node) + * [11. Distributed deployment details](#11-distributed-deployment-details) + * [11.1. Docker image creation](#111-docker-image-creation) + * [11.2. Provision Kafka Connect workers on an Azure Kubernetes Service cluster](#112-provision-kafka-connect-workers-on-an-azure-kubernetes-service-cluster) + * [11.3. Postman for Kafka Connect REST APIs or REST calls from your CLI](#113-postman-for-kafka-connect-rest-apis-or-rest-calls-from-your-cli) + * [11.4. Launch the connector tasks using the Kafka Connect REST API](#114-launch-the-connector-tasks-using-the-kafka-connect-rest-api) + * [12. Test drive the connector - distributed mode](#12-test-drive-the-connector---distributed-mode) + * [12.1. HDInsight Kafka](#121-hdinsight-kafka) + * [12.2. Confluent Cloud](#122-confluent-cloud) + * [12.3. Confluent IaaS (operator based)](#123-confluent-iaas-operator-based) + * [13. Apache Kafka version - Docker Kafka Connect base image version - Confluent Helm chart version related](#13-apache-kafka-version---docker-kafka-connect-base-image-version---confluent-helm-chart-version-related) + * [13.1. Docker image](#131-docker-image) + * [13.2. Helm chart](#132-helm-chart) + * [14. Other](#14-other) + * [14.1. Feedback, issues and contribution](#141-feedback-issues-and-contribution) + * [14.2. Scaling out/in](#142-scaling-outin) + * [14.3. Sizing](#143-sizing) + * [14.4. Performance tuning](#144-performance-tuning) + * [14.5. Upgrading to version 1.x from prior versions](#145-upgrading-to-version-1x-from-prior-versions) + * [15. Need Support?](#15-need-support) + * [16. Major version specifics](#16-major-version-specifics) + * [17. Release History](#17-release-history) + * [17. Contributing](#17-contributing) +
@@ -98,14 +142,14 @@ true' in 'kusto.tables.topics.mapping', by default streaming is set as false. - The connector supports the following converters: -| # | Converter | Details | -|:----| :--- |:---------------------------------------------------------| -| 1 | org.apache.kafka.connect.storage.StringConverter | Use with csv/json | -| 2 | org.apache.kafka.connect.json.JsonConverter | Use with schemaless json | -| 3 | io.confluent.connect.avro.AvroConverter | Use with avro | -| 4 | io.confluent.connect.json.JsonSchemaConverter | Use with json with schema registry | -| 5 | org.apache.kafka.connect.converters.ByteArrayConverter | Use with ORC, Parquet files written as messages to Kafka | -| 6 | io.confluent.connect.protobuf.ProtobufConverter | Use with protobuf format with schema registry | +| # | Converter | Details | +|:--|:-------------------------------------------------------|:---------------------------------------------------------| +| 1 | org.apache.kafka.connect.storage.StringConverter | Use with csv/json | +| 2 | org.apache.kafka.connect.json.JsonConverter | Use with schemaless json | +| 3 | io.confluent.connect.avro.AvroConverter | Use with avro | +| 4 | io.confluent.connect.json.JsonSchemaConverter | Use with json with schema registry | +| 5 | org.apache.kafka.connect.converters.ByteArrayConverter | Use with ORC, Parquet files written as messages to Kafka | +| 6 | io.confluent.connect.protobuf.ProtobufConverter | Use with protobuf format with schema registry | ### 3.8. Kafka Connect transformers @@ -284,48 +328,48 @@ ENV CONNECT_SASL_JAAS_CONFIG="com.sun.security.auth.module.Krb5LoginModule requi The following is complete set of connector sink properties- -| # | Property | Purpose | Details | -|:----|:---------------------------------------------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 1 | connector.class | Classname of the Kusto sink | Hard code to ```com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConnector```
*Required* | -| 2 | topics | Kafka topic specification | List of topics separated by commas
*Required* | -| 3 | kusto.ingestion.url | Kusto ingestion endpoint URL | Provide the ingest URL of your ADX cluster
Use the following construct for the private URL - https://ingest-private-[cluster].kusto.windows.net
*Required* | -| 4 | kusto.query.url | Kusto query endpoint URL | Provide the engine URL of your ADX cluster
*Optional* | -| 5 | aad.auth.strategy | Credentials for Kusto | Strategy to authenticate against Azure Active Directory, either ``application`` (default) or ``managed_identity``.
*Optional, `application` by default* | -| 6 | aad.auth.authority | Credentials for Kusto | Provide the tenant ID of your Azure Active Directory
*Required when authentication is done with an `application` or when `kusto.validation.table.enable` is set to `true`* | -| 7 | aad.auth.appid | Credentials for Kusto | Provide Azure Active Directory Service Principal Name
*Required when authentication is done with an `application` or when `kusto.validation.table.enable` is set to `true`* | -| 8 | aad.auth.appkey | Credentials for Kusto | Provide Azure Active Directory Service Principal secret
*Required when authentication is done with an `application`* | -| 9 | kusto.tables.topics.mapping | Mapping of topics to tables | Provide 1..many topic-table comma-separated mappings as follows-
[{'topic': '\','db': '\', 'table': '\','format': '', 'mapping':'\','streaming':'false'},{'topic': '\','db': '\', 'table': '\','format': '', 'mapping':'\','streaming':'false'}]
*Required*
Note : The attribute mapping (Ex:'mapping':''tableMappingName1') is an optional attribute. During ingestion, Azure Data Explorer automatically maps column according to the ingestion format | -| 10 | key.converter | Deserialization | One of the below supported-
org.apache.kafka.connect.storage.StringConverter
org.apache.kafka.connect.json.JsonConverter
io.confluent.connect.avro.AvroConverter
io.confluent.connect.json.JsonSchemaConverter
org.apache.kafka.connect.converters.ByteArrayConverter

*Required* | -| 11 | value.converter | Deserialization | One of the below supported-
org.apache.kafka.connect.storage.StringConverter
org.apache.kafka.connect.json.JsonConverter
io.confluent.connect.avro.AvroConverter
io.confluent.connect.json.JsonSchemaConverter
org.apache.kafka.connect.converters.ByteArrayConverter

*Required* | -| 12 | value.converter.schema.registry.url | Schema validation | URI of the Kafka schema registry
*Optional* | -| 13 | value.converter.schemas.enable | Schema validation | Set to true if you have embedded schema with payload but are not leveraging the schema registry
Applicable for avro and json

*Optional* | -| 14 | tasks.max | connector parallelism | Specify the number of connector copy/sink tasks
*Required* | -| 15 | flush.size.bytes | Performance knob for batching | Maximum bufer byte size per topic+partition combination that in combination with flush.interval.ms (whichever is reached first) should result in sinking to Kusto
*Default - 1 MB*
*Required* | -| 16 | flush.interval.ms | Performance knob for batching | Minimum time interval per topic+partition combo that in combination with flush.size.bytes (whichever is reached first) should result in sinking to Kusto
*Default - 30 seconds*
*Required* | -| 17 | tempdir.path | Local directory path on Kafka Connect worker to buffer files to before shipping to Kusto | Default is value returned by ```System.getProperty("java.io.tmpdir")``` with a GUID attached to it

*Optional* | -| 18 | behavior.on.error | Configurable behavior in response to errors encountered | Possible values - log, ignore, fail

log - log the error, send record to dead letter queue, and continue processing
ignore - log the error, send record to dead letter queue, proceed with processing despite errors encountered
fail - shut down connector task upon encountering

*Default - fail*
*Optional* | -| 19 | errors.retry.max.time.ms | Configurable retries for transient errors | Period of time in milliseconds to retry for transient errors

*Default - 300 ms*
*Optional* | -| 20 | errors.retry.backoff.time.ms | Configurable retries for transient errors | Period of time in milliseconds to backoff before retry for transient errors

*Default - 10 ms*
*Optional* | -| 21 | errors.deadletterqueue.bootstrap.servers | Channel to write records that failed deserialization | CSV or kafkaBroker:port
*Optional* | -| 22 | errors.deadletterqueue.topic.name | Channel to write records that failed deserialization | Pre-created topic name
*Optional* | -| 23 | errors.deadletterqueue.security.protocol | Channel to write records that failed deserialization | Securitry protocol of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | -| 24 | errors.deadletterqueue.sasl.mechanism | Channel to write records that failed deserialization | SASL mechanism of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | -| 25 | errors.deadletterqueue.sasl.jaas.config | Channel to write records that failed deserialization | JAAS config of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | -| 26 | misc.deadletterqueue.bootstrap.servers | Channel to write records that due to reasons other than deserialization | CSV of kafkaBroker:port
*Optional* | -| 27 | misc.deadletterqueue.topic.name | Channel to write records that due to reasons other than deserialization | Pre-created topic name
*Optional* | -| 28 | misc.deadletterqueue.security.protocol | Channel to write records that due to reasons other than deserialization | Securitry protocol of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | -| 29 | misc.deadletterqueue.sasl.mechanism | Channel to write records that due to reasons other than deserialization | SASL mechanism of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | -| 30 | misc.deadletterqueue.sasl.jaas.config | Channel to write records that due to reasons other than deserialization | JAAS config of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | -| 31 | consumer.override.bootstrap.servers | Security details explicitly required for secure Kafka clusters | Bootstrap server:port CSV of secure Kafka cluster
*Required for secure Kafka clusters* | -| 32 | consumer.override.security.protocol | Security details explicitly required for secure Kafka clusters | Security protocol of secure Kafka cluster
*Required for secure Kafka clusters* | -| 33 | consumer.override.sasl.mechanism | Security details explicitly required for secure Kafka clusters | SASL mechanism of secure Kafka cluster
*Required for secure Kafka clusters* | -| 34 | consumer.override.sasl.jaas.config | Security details explicitly required for secure Kafka clusters | JAAS config of secure Kafka cluster
*Required for secure Kafka clusters* | -| 35 | consumer.override.sasl.kerberos.service.name | Security details explicitly required for secure Kafka clusters, specifically kerberized Kafka | Kerberos service name of kerberized Kafka cluster
*Required for kerberized Kafka clusters* | -| 36 | consumer.override.auto.offset.reset | Configurable consuming from offset | Possible values are - earliest or latest
*Optional* | -| 37 | consumer.override.max.poll.interval.ms | Config to prevent duplication | Set to a value to avoid consumer leaving the group while the Connector is retrying
*Optional* | -| 38 | kusto.validation.table.enable | Validation config to verify the target table exists & the role of user has ingestion privileges | If true , validates existence of table & the princpal has ingestor role. Defaults to false, has to be explicitly set to true to enable this check
*Optional* | -| 38 | proxy.host | Host details of proxy server | Host details of proxy server configuration
*Optional* | -| 38 | proxy.port | Port details of proxy server | Port details of proxy server configuration
*Optional* | +| # | Property | Purpose | Details | +|:---|:---------------------------------------------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1 | connector.class | Classname of the Kusto sink | Hard code to ```com.microsoft.azure.kusto.kafka.connect.sink.KustoSinkConnector```
*Required* | +| 2 | topics | Kafka topic specification | List of topics separated by commas
*Required* | +| 3 | kusto.ingestion.url | Kusto ingestion endpoint URL | Provide the ingest URL of your ADX cluster
Use the following construct for the private URL - https://ingest-private-[cluster].kusto.windows.net
*Required* | +| 4 | kusto.query.url | Kusto query endpoint URL | Provide the engine URL of your ADX cluster
*Optional* | +| 5 | aad.auth.strategy | Credentials for Kusto | Strategy to authenticate against Azure Active Directory, either ``application`` (default) or ``managed_identity``.
*Optional, `application` by default* | +| 6 | aad.auth.authority | Credentials for Kusto | Provide the tenant ID of your Azure Active Directory
*Required when authentication is done with an `application` or when `kusto.validation.table.enable` is set to `true`* | +| 7 | aad.auth.appid | Credentials for Kusto | Provide Azure Active Directory Service Principal Name
*Required when authentication is done with an `application` or when `kusto.validation.table.enable` is set to `true`* | +| 8 | aad.auth.appkey | Credentials for Kusto | Provide Azure Active Directory Service Principal secret
*Required when authentication is done with an `application`* | +| 9 | kusto.tables.topics.mapping | Mapping of topics to tables | Provide 1..many topic-table comma-separated mappings as follows-
[{'topic': '\','db': '\', 'table': '\','format': '', 'mapping':'\','streaming':'false'},{'topic': '\','db': '\', 'table': '\','format': '', 'mapping':'\','streaming':'false'}]
*Required*
Note : The attribute mapping (Ex:'mapping':''tableMappingName1') is an optional attribute. During ingestion, Azure Data Explorer automatically maps column according to the ingestion format | +| 10 | key.converter | Deserialization | One of the below supported-
org.apache.kafka.connect.storage.StringConverter
org.apache.kafka.connect.json.JsonConverter
io.confluent.connect.avro.AvroConverter
io.confluent.connect.json.JsonSchemaConverter
org.apache.kafka.connect.converters.ByteArrayConverter

*Required* | +| 11 | value.converter | Deserialization | One of the below supported-
org.apache.kafka.connect.storage.StringConverter
org.apache.kafka.connect.json.JsonConverter
io.confluent.connect.avro.AvroConverter
io.confluent.connect.json.JsonSchemaConverter
org.apache.kafka.connect.converters.ByteArrayConverter

*Required* | +| 12 | value.converter.schema.registry.url | Schema validation | URI of the Kafka schema registry
*Optional* | +| 13 | value.converter.schemas.enable | Schema validation | Set to true if you have embedded schema with payload but are not leveraging the schema registry
Applicable for avro and json

*Optional* | +| 14 | tasks.max | connector parallelism | Specify the number of connector copy/sink tasks
*Required* | +| 15 | flush.size.bytes | Performance knob for batching | Maximum bufer byte size per topic+partition combination that in combination with flush.interval.ms (whichever is reached first) should result in sinking to Kusto
*Default - 1 MB*
*Required* | +| 16 | flush.interval.ms | Performance knob for batching | Minimum time interval per topic+partition combo that in combination with flush.size.bytes (whichever is reached first) should result in sinking to Kusto
*Default - 30 seconds*
*Required* | +| 17 | tempdir.path | Local directory path on Kafka Connect worker to buffer files to before shipping to Kusto | Default is value returned by ```System.getProperty("java.io.tmpdir")``` with a GUID attached to it

*Optional* | +| 18 | behavior.on.error | Configurable behavior in response to errors encountered | Possible values - log, ignore, fail

log - log the error, send record to dead letter queue, and continue processing
ignore - log the error, send record to dead letter queue, proceed with processing despite errors encountered
fail - shut down connector task upon encountering

*Default - fail*
*Optional* | +| 19 | errors.retry.max.time.ms | Configurable retries for transient errors | Period of time in milliseconds to retry for transient errors

*Default - 300 ms*
*Optional* | +| 20 | errors.retry.backoff.time.ms | Configurable retries for transient errors | Period of time in milliseconds to backoff before retry for transient errors

*Default - 10 ms*
*Optional* | +| 21 | errors.deadletterqueue.bootstrap.servers | Channel to write records that failed deserialization | CSV or kafkaBroker:port
*Optional* | +| 22 | errors.deadletterqueue.topic.name | Channel to write records that failed deserialization | Pre-created topic name
*Optional* | +| 23 | errors.deadletterqueue.security.protocol | Channel to write records that failed deserialization | Securitry protocol of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | +| 24 | errors.deadletterqueue.sasl.mechanism | Channel to write records that failed deserialization | SASL mechanism of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | +| 25 | errors.deadletterqueue.sasl.jaas.config | Channel to write records that failed deserialization | JAAS config of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | +| 26 | misc.deadletterqueue.bootstrap.servers | Channel to write records that due to reasons other than deserialization | CSV of kafkaBroker:port
*Optional* | +| 27 | misc.deadletterqueue.topic.name | Channel to write records that due to reasons other than deserialization | Pre-created topic name
*Optional* | +| 28 | misc.deadletterqueue.security.protocol | Channel to write records that due to reasons other than deserialization | Securitry protocol of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | +| 29 | misc.deadletterqueue.sasl.mechanism | Channel to write records that due to reasons other than deserialization | SASL mechanism of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | +| 30 | misc.deadletterqueue.sasl.jaas.config | Channel to write records that due to reasons other than deserialization | JAAS config of secure Kafka cluster
*Optional but when feature is used with secure cluster, is required* | +| 31 | consumer.override.bootstrap.servers | Security details explicitly required for secure Kafka clusters | Bootstrap server:port CSV of secure Kafka cluster
*Required for secure Kafka clusters* | +| 32 | consumer.override.security.protocol | Security details explicitly required for secure Kafka clusters | Security protocol of secure Kafka cluster
*Required for secure Kafka clusters* | +| 33 | consumer.override.sasl.mechanism | Security details explicitly required for secure Kafka clusters | SASL mechanism of secure Kafka cluster
*Required for secure Kafka clusters* | +| 34 | consumer.override.sasl.jaas.config | Security details explicitly required for secure Kafka clusters | JAAS config of secure Kafka cluster
*Required for secure Kafka clusters* | +| 35 | consumer.override.sasl.kerberos.service.name | Security details explicitly required for secure Kafka clusters, specifically kerberized Kafka | Kerberos service name of kerberized Kafka cluster
*Required for kerberized Kafka clusters* | +| 36 | consumer.override.auto.offset.reset | Configurable consuming from offset | Possible values are - earliest or latest
*Optional* | +| 37 | consumer.override.max.poll.interval.ms | Config to prevent duplication | Set to a value to avoid consumer leaving the group while the Connector is retrying
*Optional* | +| 38 | kusto.validation.table.enable | Validation config to verify the target table exists & the role of user has ingestion privileges | If true , validates existence of table & the princpal has ingestor role. Defaults to false, has to be explicitly set to true to enable this check
*Optional* | +| 38 | proxy.host | Host details of proxy server | Host details of proxy server configuration
*Optional* | +| 38 | proxy.port | Port details of proxy server | Port details of proxy server configuration
*Optional* |
@@ -358,8 +402,8 @@ queued ingestion in case of failure and retry-exhaustion. The following is the roadmap-
-| # | Roadmap item| -| :--- | :--- | +| # | Roadmap item | +|:--|:-------------------------| | 1 | Schema evolution support |
@@ -686,19 +730,19 @@ the [Release History](README.md#16-release-history) section of this document. | 3.0.0 | 2022-06-06 |
  • New feature: Internal default batch set to 30 seconds
  • New feature: Update kusto sdk to latest version 3.1.1
  • Bug fix: Flush timer close / fix NPE
| | 3.0.1 | 2022-06-13 |
  • Bug fix:Close could ingest a file after offsets commit - causing duplications
| | | 3.0.2 | 2022-07-20 |
  • New feature: Changes to support protobuf data ingestion
| -| 3.0.3 | 2022-08-09 |
  • Bug fix: Library upgrade to fix CVE-2020-36518 Out-of-bounds Write
-| 3.0.4 | 2022-09-05 |
  • New feature: Make mapping optional , fixes Issue#76
  • New feature: Make table validation optional when the connector starts up (Refer: kusto.validation.table.enable)
  • Bug fix: Stop collecting messages when DLQ is not enabled. Provides better scaling & reduces GC pressure
-| 3.0.5 | 2022-09-07 |
  • New feature: Support authentication with Managed Identities
-| 3.0.6 | 2022-11-28 |
  • Upgrade Kusto Java SDK to 3.2.1 and fix failing unit test case (mitigate text4shell RCE vulnerability)
-| 3.0.7 | 2022-12-06 |
  • Upgrade Jackson version to the latest security version
  • Filter tombstone records & records that fail JSON serialization
-| 3.0.8 | 2022-12-15 |
  • New feature: Added Proxy support to KustoSinkTask
-| 3.0.9 | 2022-12-19 |
  • Bugfix: Restrict file permissions on file created for ingestion
  • Canonicalize file names
  • Refactor tests
-| 4.0.0 | 2023-03-07 |
  • Upgrade Kusto Java SDK to 4.0.3 and Kafka Clients to 3.4.0
  • Disable table access validation at start up by default
-| 4.0.1 | 2023-03-26 |
  • Upgrade Kusto Java SDK to 4.0.4
-| 4.0.2 | 2023-06-28 |
  • Upgrade Kusto Java SDK to 5.0.0
  • Fix vulnerabilities in libs
-| 4.0.3 | 2023-07-23 |
  • Upgrade Kusto Java SDK to 5.0.1
  • Fix vulnerabilities in libs
-| 4.0.4 | 2023-09-27 |
  • Upgrade Kusto Java SDK to 5.0.2
  • Fix vulnerabilities in snappy-java
-| 4.0.5 | 2023-10-27 |
  • Fix vulnerabilities by upgrading io.netty
+| 3.0.3 | 2022-08-09 |
  • Bug fix: Library upgrade to fix CVE-2020-36518 Out-of-bounds Write
| +| 3.0.4 | 2022-09-05 |
  • New feature: Make mapping optional , fixes Issue#76
  • New feature: Make table validation optional when the connector starts up (Refer: kusto.validation.table.enable)
  • Bug fix: Stop collecting messages when DLQ is not enabled. Provides better scaling & reduces GC pressure
| +| 3.0.5 | 2022-09-07 |
  • New feature: Support authentication with Managed Identities
| +| 3.0.6 | 2022-11-28 |
  • Upgrade Kusto Java SDK to 3.2.1 and fix failing unit test case (mitigate text4shell RCE vulnerability)
| +| 3.0.7 | 2022-12-06 |
  • Upgrade Jackson version to the latest security version
  • Filter tombstone records & records that fail JSON serialization
| +| 3.0.8 | 2022-12-15 |
  • New feature: Added Proxy support to KustoSinkTask
| +| 3.0.9 | 2022-12-19 |
  • Bugfix: Restrict file permissions on file created for ingestion
  • Canonicalize file names
  • Refactor tests
| +| 4.0.0 | 2023-03-07 |
  • Upgrade Kusto Java SDK to 4.0.3 and Kafka Clients to 3.4.0
  • Disable table access validation at start up by default
| +| 4.0.1 | 2023-03-26 |
  • Upgrade Kusto Java SDK to 4.0.4
| +| 4.0.2 | 2023-06-28 |
  • Upgrade Kusto Java SDK to 5.0.0
  • Fix vulnerabilities in libs
| +| 4.0.3 | 2023-07-23 |
  • Upgrade Kusto Java SDK to 5.0.1
  • Fix vulnerabilities in libs
| +| 4.0.4 | 2023-09-27 |
  • Upgrade Kusto Java SDK to 5.0.2
  • Fix vulnerabilities in snappy-java
| +| 4.0.5 | 2023-10-27 |
  • Fix vulnerabilities by upgrading io.netty
| ## 17. Contributing diff --git a/pom.xml b/pom.xml index 87f362a5..7a6363b1 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ kafka-sink-azure-kusto jar A Kafka Connect plugin for Azure Data Explorer (Kusto) Database - 4.0.5 + 4.0.6 0.4.1 4.2.0 @@ -23,9 +23,9 @@ 2.4.10 5.10.0 - 7.4.1 + 7.5.1 3.5.1 - 5.0.2 + 5.0.3 3.2.0 3.11.0 3.0.0 @@ -139,6 +139,15 @@ net.revelc.code impsort-maven-plugin ${impsort.version} + + + + org.codehaus.plexus + plexus-utils + 3.5.1 + + java.,javax.,org.,com. true @@ -207,8 +216,36 @@ net.minidev json-smart + + com.azure + azure-core + + + com.azure + azure-core-http-netty + + + com.azure + azure-identity + + + + com.azure + azure-core + 1.45.1 + + + com.azure + azure-core-http-netty + 1.13.11 + + + com.azure + azure-identity + 1.11.1 + com.microsoft.azure.kusto kusto-ingest