From ee61d06b5fd856b7a8271346aa2141f065ab1bf5 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Wed, 13 Mar 2024 23:14:22 +0200 Subject: [PATCH 01/20] stream source info size --- .../azure/kusto/data/http/HttpPostUtils.java | 3 +- .../azure/kusto/ingest/IngestClientBase.java | 3 +- .../ingest/ManagedStreamingIngestClient.java | 28 +++++++++++-------- .../kusto/ingest/QueuedIngestClientImpl.java | 2 +- .../ingest/source/AbstractSourceInfo.java | 13 +++++++-- .../kusto/ingest/source/BlobSourceInfo.java | 17 ++--------- .../kusto/ingest/source/FileSourceInfo.java | 18 ++---------- .../kusto/ingest/source/StreamSourceInfo.java | 14 ++++++---- .../kusto/ingest/utils/IngestionUtils.java | 2 +- 9 files changed, 46 insertions(+), 54 deletions(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java index dad69c99..ca1d302d 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java @@ -74,7 +74,8 @@ public static String post(CloseableHttpClient httpClient, String urlStr, Abstrac } } } catch (IOException e) { - throw new DataServiceException(urlStr, "IOException in post request:" + e.getMessage(), !Utils.isRetriableIOException(e)); + String message = e.getMessage() == null ? e.getCause().getMessage() : e.getMessage(); + throw new DataServiceException(urlStr, "IOException in post request:" + message, !Utils.isRetriableIOException(e)); } return null; diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java index 89502216..b512ccd7 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java @@ -136,7 +136,8 @@ public IngestionResult ingestFromStream(StreamSourceInfo streamSourceInfo, Inges throws IngestionClientException, IngestionServiceException { // trace ingestFromStream return MonitoredActivity.invoke( - (SupplierTwoExceptions) () -> ingestFromStreamImpl(streamSourceInfo, + (SupplierTwoExceptions) + () -> ingestFromStreamImpl(streamSourceInfo, ingestionProperties), getClientType().concat(".ingestFromStream")); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 62961d5b..f9da0fd1 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -24,10 +24,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.ByteArrayInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.SequenceInputStream; +import java.io.*; import java.lang.invoke.MethodHandles; import java.net.URISyntaxException; import java.util.UUID; @@ -393,18 +390,25 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo sourceId = UUID.randomUUID(); } + InputStream byteArrayStream = streamSourceInfo.getStream(); byte[] streamingBytes; - try { - streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), MAX_STREAMING_SIZE_BYTES + 1); - } catch (IOException e) { - throw new IngestionClientException("Failed to read from stream.", e); + long size = streamSourceInfo.getRawSizeInBytes(); + + // Trust ByteArrayInputStream to be resetable + if (streamSourceInfo.getRawSizeInBytes() <= 0 || (streamSourceInfo.getStream() instanceof ByteArrayInputStream)) { + try { + streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), MAX_STREAMING_SIZE_BYTES + 1); + } catch (IOException e) { + throw new IngestionClientException("Failed to read from stream.", e); + } + // ByteArrayInputStream's close method is a no-op, so we don't need to close it. + byteArrayStream = new ByteArrayInputStream(streamingBytes); + size = streamingBytes.length; } - // ByteArrayInputStream's close method is a no-op, so we don't need to close it. - ByteArrayInputStream byteArrayStream = new ByteArrayInputStream(streamingBytes); - if (streamingBytes.length > MAX_STREAMING_SIZE_BYTES) { - log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", streamingBytes.length); + if (size > MAX_STREAMING_SIZE_BYTES) { + log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", size); StreamSourceInfo managedSourceInfo = new StreamSourceInfo(new SequenceInputStream(byteArrayStream, streamSourceInfo.getStream()), streamSourceInfo.isLeaveOpen(), sourceId, streamSourceInfo.getCompressionType()); return queuedIngestClient.ingestFromStream(managedSourceInfo, ingestionProperties); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java index dad93748..55c4535c 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java @@ -220,7 +220,7 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo blobName, shouldCompress); - BlobSourceInfo blobSourceInfo = new BlobSourceInfo(blobPath, 0, streamSourceInfo.getSourceId()); // TODO: check if we can get the rawDataSize + BlobSourceInfo blobSourceInfo = new BlobSourceInfo(blobPath, streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getSourceId()); // TODO: check if we can get the rawDataSize // locally - maybe add a countingStream ingestionResult = ingestFromBlob(blobSourceInfo, ingestionProperties); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java index 2f05839a..d935503a 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java @@ -4,7 +4,6 @@ package com.microsoft.azure.kusto.ingest.source; import com.microsoft.azure.kusto.data.instrumentation.TraceableAttributes; -import org.jetbrains.annotations.NotNull; import java.util.HashMap; import java.util.Map; @@ -22,9 +21,19 @@ public void setSourceId(UUID sourceId) { this.sourceId = sourceId; } + // An estimation of the raw (uncompressed, un-indexed) size of the data + private long rawSizeInBytes; + + public long getRawSizeInBytes() { + return rawSizeInBytes; + } + + public void setRawSizeInBytes(long rawSizeInBytes) { + this.rawSizeInBytes = rawSizeInBytes; + } + @Override public Map getTracingAttributes() { return new HashMap<>(); } - } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java index 84d52561..66913a22 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java @@ -24,28 +24,17 @@ public void setBlobPath(String blobPath) { this.blobPath = blobPath; } - private long rawSizeInBytes; - - public long getRawSizeInBytes() { - return rawSizeInBytes; - } - - public void setRawSizeInBytes(long rawSizeInBytes) { - this.rawSizeInBytes = rawSizeInBytes; - } - public BlobSourceInfo(String blobPath) { this.blobPath = blobPath; } public BlobSourceInfo(String blobPath, long rawSizeInBytes) { - this.blobPath = blobPath; - this.rawSizeInBytes = rawSizeInBytes; + this(blobPath); + this.setRawSizeInBytes(rawSizeInBytes); } public BlobSourceInfo(String blobPath, long rawSizeInBytes, UUID sourceId) { - this.blobPath = blobPath; - this.rawSizeInBytes = rawSizeInBytes; + this(blobPath, rawSizeInBytes); this.setSourceId(sourceId); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java index 79ccf4bb..bad317c4 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java @@ -3,9 +3,6 @@ package com.microsoft.azure.kusto.ingest.source; -import com.microsoft.azure.kusto.data.instrumentation.TraceableAttributes; -import org.jetbrains.annotations.NotNull; - import java.util.Map; import java.util.UUID; @@ -23,24 +20,13 @@ public void setFilePath(String filePath) { this.filePath = filePath; } - private long rawSizeInBytes; - - public long getRawSizeInBytes() { - return rawSizeInBytes; - } - - public void setRawSizeInBytes(long rawSizeInBytes) { - this.rawSizeInBytes = rawSizeInBytes; - } - public FileSourceInfo(String filePath, long rawSizeInBytes) { this.filePath = filePath; - this.rawSizeInBytes = rawSizeInBytes; + this.setRawSizeInBytes(rawSizeInBytes); } public FileSourceInfo(String filePath, long rawSizeInBytes, UUID sourceId) { - this.filePath = filePath; - this.rawSizeInBytes = rawSizeInBytes; + this(filePath, rawSizeInBytes); this.setSourceId(sourceId); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java index 839ab9b8..bdac8d9a 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java @@ -51,23 +51,25 @@ public StreamSourceInfo(InputStream stream) { } public StreamSourceInfo(InputStream stream, boolean leaveOpen) { + this(stream); setLeaveOpen(leaveOpen); - setStream(stream); } public StreamSourceInfo(InputStream stream, boolean leaveOpen, UUID sourceId) { - setLeaveOpen(leaveOpen); - setStream(stream); + this(stream, leaveOpen); setSourceId(sourceId); } public StreamSourceInfo(InputStream stream, boolean leaveOpen, UUID sourceId, CompressionType compressionType) { - setLeaveOpen(leaveOpen); - setStream(stream); - setSourceId(sourceId); + this(stream, leaveOpen, sourceId); setCompressionType(compressionType); } + public StreamSourceInfo(InputStream stream, boolean leaveOpen, UUID sourceId, CompressionType compressionType, long size) { + this(stream, leaveOpen, sourceId, compressionType); + setRawSizeInBytes(size); + } + public void validate() { Ensure.argIsNotNull(stream, "stream"); Ensure.isTrue(compressionType != CompressionType.zip, "streaming ingest is not working with zip compression"); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java index 80c1506e..6208233a 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java @@ -34,7 +34,7 @@ public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boole stream = new ResettableFileInputStream((FileInputStream) stream); } - return new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath)); + return new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath), fileSourceInfo.getRawSizeInBytes()); } @NotNull From 91a61e457efcc01c05eed98534b8497c32f1b728 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Thu, 15 Feb 2024 17:39:33 +0200 Subject: [PATCH 02/20] some good code, unfished --- .../azure/kusto/data/ClientFactory.java | 6 +- .../kusto/data/HttpClientProperties.java | 18 ++++++ .../kusto/data/http/HttpClientFactory.java | 6 +- .../azure/kusto/data/http/HttpPostUtils.java | 2 +- .../ingest/ManagedStreamingIngestClient.java | 55 +++++++++++++------ 5 files changed, 67 insertions(+), 20 deletions(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java index 9aaf6ba9..2908df01 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java @@ -4,9 +4,11 @@ package com.microsoft.azure.kusto.data; import com.microsoft.azure.kusto.data.auth.ConnectionStringBuilder; +import com.microsoft.azure.kusto.data.http.HttpClientFactory; import org.apache.http.impl.client.CloseableHttpClient; import java.net.URISyntaxException; +import java.util.Optional; public class ClientFactory { private ClientFactory() { @@ -73,7 +75,9 @@ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb) * @throws URISyntaxException if the cluster URL is invalid */ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb, HttpClientProperties properties) throws URISyntaxException { - return new ClientImpl(csb, properties); + HttpClientProperties httpClientProperties = Optional.ofNullable(properties) + .orElse(HttpClientProperties.builder().disableRetries().build()); + return new ClientImpl(csb, HttpClientFactory.create(httpClientProperties),false); } /** diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java b/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java index 5c327dea..f41511ac 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java @@ -14,6 +14,8 @@ public class HttpClientProperties { private final Integer maxConnectionRoute; private final HttpHost proxy; + private final boolean disableRetries; + private HttpClientProperties(HttpClientPropertiesBuilder builder) { this.maxIdleTime = builder.maxIdleTime; this.keepAlive = builder.keepAlive; @@ -21,6 +23,7 @@ private HttpClientProperties(HttpClientPropertiesBuilder builder) { this.maxConnectionTotal = builder.maxConnectionsTotal; this.maxConnectionRoute = builder.maxConnectionsPerRoute; this.proxy = builder.proxy; + this.disableRetries = builder.disableRetries; } /** @@ -98,6 +101,10 @@ public HttpHost getProxy() { return proxy; } + public boolean isDisableRetries() { + return disableRetries; + } + public static class HttpClientPropertiesBuilder { private Integer maxIdleTime = 120; @@ -106,6 +113,7 @@ public static class HttpClientPropertiesBuilder { private Integer maxConnectionsTotal = 40; private Integer maxConnectionsPerRoute = 40; private HttpHost proxy = null; + private boolean disableRetries; private HttpClientPropertiesBuilder() { } @@ -190,6 +198,16 @@ public HttpClientPropertiesBuilder proxy(HttpHost proxy) { return this; } + /** + * Disable all http client internal retries. + * + * @return the builder instance + */ + public HttpClientPropertiesBuilder disableRetries() { + this.disableRetries = true; + return this; + } + public HttpClientProperties build() { return new HttpClientProperties(this); } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java index dcba5d41..260beb4e 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java @@ -38,9 +38,11 @@ public static CloseableHttpClient create(HttpClientProperties providedProperties .setMaxConnTotal(properties.maxConnectionTotal()) .setMaxConnPerRoute(properties.maxConnectionRoute()) .evictExpiredConnections() - .evictIdleConnections(properties.maxIdleTime(), TimeUnit.SECONDS) - .disableRedirectHandling(); + .evictIdleConnections(properties.maxIdleTime(), TimeUnit.SECONDS); + if (properties.isDisableRetries()){ + httpClientBuilder.disableAutomaticRetries(); + } if (properties.isKeepAlive()) { final ConnectionKeepAliveStrategy keepAliveStrategy = new CustomConnectionKeepAliveStrategy(properties.maxKeepAliveTime()); httpClientBuilder.setKeepAliveStrategy(keepAliveStrategy); diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java index ca1d302d..ef24a8f4 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java @@ -75,7 +75,7 @@ public static String post(CloseableHttpClient httpClient, String urlStr, Abstrac } } catch (IOException e) { String message = e.getMessage() == null ? e.getCause().getMessage() : e.getMessage(); - throw new DataServiceException(urlStr, "IOException in post request:" + message, !Utils.isRetriableIOException(e)); + throw new DataServiceException(urlStr, "IOException in post request:" + message, e, !Utils.isRetriableIOException(e)); } return null; diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index f9da0fd1..6db0f5a6 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -376,6 +376,13 @@ protected IngestionResult ingestFromResultSetImpl(ResultSetSourceInfo resultSetS } } + private IngestionResult sendStreamToQueuedIngestion(InputStream inputStream, StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties, int size) throws IngestionClientException, IngestionServiceException { + log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", size); + StreamSourceInfo managedSourceInfo = new StreamSourceInfo(inputStream, + streamSourceInfo.isLeaveOpen(), streamSourceInfo.getSourceId(), streamSourceInfo.getCompressionType()); + return queuedIngestClient.ingestFromStream(managedSourceInfo, ingestionProperties); + } + @Override protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties) throws IngestionClientException, IngestionServiceException { @@ -390,28 +397,26 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo sourceId = UUID.randomUUID(); } - InputStream byteArrayStream = streamSourceInfo.getStream(); + streamSourceInfo.setSourceId(sourceId); byte[] streamingBytes; - long size = streamSourceInfo.getRawSizeInBytes(); + ByteArrayInputStream byteArrayStream; - // Trust ByteArrayInputStream to be resetable - if (streamSourceInfo.getRawSizeInBytes() <= 0 || (streamSourceInfo.getStream() instanceof ByteArrayInputStream)) { - try { + try{ + if (streamSourceInfo.getStream() instanceof ByteArrayInputStream){ + // We can't rely on other InputStream implementations of available() + byteArrayStream = (ByteArrayInputStream)streamSourceInfo.getStream(); + } else { streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), MAX_STREAMING_SIZE_BYTES + 1); - } catch (IOException e) { - throw new IngestionClientException("Failed to read from stream.", e); + byteArrayStream = new ByteArrayInputStream(streamingBytes); + // ByteArrayInputStream's close method is a no-op, so we don't need to close it. } - // ByteArrayInputStream's close method is a no-op, so we don't need to close it. - byteArrayStream = new ByteArrayInputStream(streamingBytes); - size = streamingBytes.length; + } catch (IOException e) { + throw new IngestionClientException("Failed to read from stream.", e); } - - if (size > MAX_STREAMING_SIZE_BYTES) { - log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", size); - StreamSourceInfo managedSourceInfo = new StreamSourceInfo(new SequenceInputStream(byteArrayStream, streamSourceInfo.getStream()), - streamSourceInfo.isLeaveOpen(), sourceId, streamSourceInfo.getCompressionType()); - return queuedIngestClient.ingestFromStream(managedSourceInfo, ingestionProperties); + if (shouldUseQueuedIngestion(streamSourceInfo, ingestionProperties.getDataFormat())) { + return sendStreamToQueuedIngestion( + new SequenceInputStream(byteArrayStream, streamSourceInfo.getStream()), streamSourceInfo,ingestionProperties,byteArrayStream.available()); } if (!streamSourceInfo.isLeaveOpen()) { @@ -440,6 +445,24 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo } } + private boolean shouldUseQueuedIngestion(StreamSourceInfo streamSourceInfo, IngestionProperties.DataFormat dataFormat) throws IOException { + if (streamSourceInfo.getRawSizeInBytes() > 0){ + return streamSourceInfo.getRawSizeInBytes() > MAX_STREAMING_SIZE_BYTES; + } + + long size = streamSourceInfo.getStream().available(); + if (dataFormat.isCompressible()){ + // Binary format + return (size * 1.5) > MAX_STREAMING_SIZE_BYTES; + } + + + // if size is given - use it, else use available, according to format and compression + return streamSourceInfo.getStream().available() > 0 + ? streamSourceInfo.getStream().available() + : byteArrayStream.available(); + } + @Override protected String getClientType() { return CLASS_NAME; From 06c529d96cd25fde20a28805cbb896aa7d1a97ef Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Thu, 20 Jun 2024 17:57:42 +0300 Subject: [PATCH 03/20] manaed streaming queuing policy per format --- .../azure/kusto/data/ClientImpl.java | 5 +- .../data/auth/CallbackTokenProvider.java | 3 +- .../azure/kusto/data/auth/CloudInfo.java | 3 +- .../data/exceptions/ExceptionsUtils.java | 8 + .../azure/kusto/data/http/HttpPostUtils.java | 3 +- .../azure/kusto/ingest/IngestClientBase.java | 16 +- .../kusto/ingest/IngestClientFactory.java | 16 +- .../kusto/ingest/IngestionProperties.java | 4 + .../ingest/ManagedStreamingIngestClient.java | 111 +++++++------- .../ingest/ManagedStreamingQueuingPolicy.java | 64 ++++++++ .../kusto/ingest/QueuedIngestClientImpl.java | 6 - .../kusto/ingest/ResourceAlgorithms.java | 9 +- .../kusto/ingest/StreamingIngestClient.java | 6 +- .../kusto/ingest/source/BlobSourceInfo.java | 21 ++- .../utils/ShouldUseQueueingPredicate.java | 8 + .../microsoft/azure/kusto/ingest/E2ETest.java | 40 ++--- .../kusto/ingest/ManagedStreamingTest.java | 142 ++++++++++++++++++ 17 files changed, 349 insertions(+), 116 deletions(-) create mode 100644 data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java create mode 100644 ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java create mode 100644 ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java create mode 100644 ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientImpl.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientImpl.java index 9a73ce32..86d36515 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientImpl.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientImpl.java @@ -14,6 +14,7 @@ import com.microsoft.azure.kusto.data.exceptions.DataServiceException; import com.microsoft.azure.kusto.data.exceptions.KustoClientInvalidConnectionStringException; import com.microsoft.azure.kusto.data.exceptions.KustoServiceQueryError; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.data.http.HttpClientFactory; import com.microsoft.azure.kusto.data.http.HttpPostUtils; import com.microsoft.azure.kusto.data.http.UncloseableStream; @@ -176,9 +177,9 @@ private KustoOperationResult executeImpl(String database, String command, Client return new KustoOperationResult(response, clusterEndpoint.endsWith("v2/rest/query") ? "v2" : "v1"); } catch (KustoServiceQueryError e) { throw new DataServiceException(clusterEndpoint, - "Error found while parsing json response as KustoOperationResult:" + e.getMessage(), e, e.isPermanent()); + "Error found while parsing json response as KustoOperationResult:" + e, e, e.isPermanent()); } catch (Exception e) { - throw new DataClientException(clusterEndpoint, e.getMessage(), e); + throw new DataClientException(clusterEndpoint, ExceptionsUtils.getMessageEx(e), e); } } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/auth/CallbackTokenProvider.java b/data/src/main/java/com/microsoft/azure/kusto/data/auth/CallbackTokenProvider.java index aa9e7816..5f160dc4 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/auth/CallbackTokenProvider.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/auth/CallbackTokenProvider.java @@ -5,6 +5,7 @@ import com.microsoft.azure.kusto.data.exceptions.DataClientException; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import org.apache.http.client.HttpClient; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -32,7 +33,7 @@ protected String acquireAccessTokenImpl() throws DataClientException { try { return tokenProvider.apply(httpClient); } catch (Exception e) { - throw new DataClientException(clusterUrl, e.getMessage(), e); + throw new DataClientException(clusterUrl, ExceptionsUtils.getMessageEx(e), e); } } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java b/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java index f50cfa4f..8ffd302c 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java @@ -6,6 +6,7 @@ import com.microsoft.azure.kusto.data.ExponentialRetry; import com.microsoft.azure.kusto.data.Utils; import com.microsoft.azure.kusto.data.exceptions.DataClientException; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.data.http.HttpClientFactory; import com.microsoft.azure.kusto.data.instrumentation.SupplierOneException; import com.microsoft.azure.kusto.data.UriUtils; @@ -104,7 +105,7 @@ public static CloudInfo retrieveCloudInfoForCluster(String clusterUrl, throw new DataServiceException(clusterUrl, "URISyntaxException when trying to retrieve cluster metadata:" + e.getMessage(), e, true); } catch (IOException ex) { if (!Utils.isRetriableIOException(ex)) { - throw new DataServiceException(clusterUrl, "IOException when trying to retrieve cluster metadata:" + ex.getMessage(), ex, + throw new DataServiceException(clusterUrl, "IOException when trying to retrieve cluster metadata:" + ExceptionsUtils.getMessageEx(ex), ex, Utils.isRetriableIOException(ex)); } } catch (DataServiceException e) { diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java new file mode 100644 index 00000000..7fcaa7c6 --- /dev/null +++ b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java @@ -0,0 +1,8 @@ +package com.microsoft.azure.kusto.data.exceptions; + +public class ExceptionsUtils { + // Useful in IOException, where message might not propagate to the base IOException + public static String getMessageEx(Exception e){ + return e.getMessage() == null ? e.getCause().getMessage() : e.getMessage(); + } +} \ No newline at end of file diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java index ef24a8f4..048fc124 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpPostUtils.java @@ -74,8 +74,7 @@ public static String post(CloseableHttpClient httpClient, String urlStr, Abstrac } } } catch (IOException e) { - String message = e.getMessage() == null ? e.getCause().getMessage() : e.getMessage(); - throw new DataServiceException(urlStr, "IOException in post request:" + message, e, !Utils.isRetriableIOException(e)); + throw new DataServiceException(urlStr, "IOException in post request:" + ExceptionsUtils.getMessageEx(e), e, !Utils.isRetriableIOException(e)); } return null; diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java index b512ccd7..401a0210 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java @@ -1,11 +1,11 @@ package com.microsoft.azure.kusto.ingest; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.ingest.source.CompressionType; import org.apache.http.conn.util.InetAddressUtils; -import java.net.InetAddress; +import java.io.IOException; import java.net.URI; -import java.net.UnknownHostException; import com.microsoft.azure.kusto.data.instrumentation.SupplierTwoExceptions; import com.microsoft.azure.kusto.data.instrumentation.TraceableAttributes; import com.microsoft.azure.kusto.data.instrumentation.MonitoredActivity; @@ -130,15 +130,21 @@ public IngestionResult ingestFromResultSet(ResultSetSourceInfo resultSetSourceIn * @see IngestionProperties */ protected abstract IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties) - throws IngestionClientException, IngestionServiceException; + throws IngestionClientException, IngestionServiceException, IOException; public IngestionResult ingestFromStream(StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties) throws IngestionClientException, IngestionServiceException { // trace ingestFromStream return MonitoredActivity.invoke( (SupplierTwoExceptions) - () -> ingestFromStreamImpl(streamSourceInfo, - ingestionProperties), + () -> { + try { + return ingestFromStreamImpl(streamSourceInfo, + ingestionProperties); + } catch (IOException e) { + throw new IngestionServiceException(ExceptionsUtils.getMessageEx(e), e); + } + }, getClientType().concat(".ingestFromStream")); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java index 9662fe1f..0b0626a1 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java @@ -92,7 +92,7 @@ public static StreamingIngestClient createStreamingIngestClient(ConnectionString * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder) + ConnectionStringBuilder queryEndpointConnectionStringBuilder) throws URISyntaxException { return createManagedStreamingIngestClient(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, null, true); } @@ -108,7 +108,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(Co * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) + ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { return new ManagedStreamingIngestClient(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, properties, autoCorrectEndpoint); @@ -125,7 +125,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(Co * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties) + ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties) throws URISyntaxException { return new ManagedStreamingIngestClient(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, properties, true); } @@ -151,7 +151,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(Co * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) + @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { return new ManagedStreamingIngestClient(connectionStringBuilder, properties, autoCorrectEndpoint); } @@ -181,7 +181,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromEngineCsb(ConnectionStringBuilder engineConnectionStringBuilder, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { return ManagedStreamingIngestClient.fromEngineConnectionString(engineConnectionStringBuilder, properties); } @@ -211,7 +211,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromDmCsb(ConnectionStringBuilder dmConnectionStringBuilder, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { return ManagedStreamingIngestClient.fromDmConnectionString(dmConnectionStringBuilder, properties); } @@ -226,7 +226,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromDmCsb(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) + @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) throws URISyntaxException { return new ManagedStreamingIngestClient(connectionStringBuilder, httpClient, autoCorrectEndpoint); } @@ -241,7 +241,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromDmCsb(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient) + @Nullable CloseableHttpClient httpClient) throws URISyntaxException { return new ManagedStreamingIngestClient(connectionStringBuilder, httpClient, true); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java index cebe0f1c..6a94dbaf 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java @@ -431,6 +431,10 @@ public IngestionMapping.IngestionMappingKind getIngestionMappingKind() { public boolean isCompressible() { return compressible; } + + public boolean isJsonFormat(){ + return this.equals(JSON) || this.equals(MULTIJSON) || this.equals(SINGLEJSON); + } } public enum IngestionReportLevel { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 6db0f5a6..776f4358 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -12,6 +12,7 @@ import com.microsoft.azure.kusto.data.exceptions.DataServiceException; import com.microsoft.azure.kusto.data.exceptions.DataWebException; import com.microsoft.azure.kusto.data.exceptions.OneApiError; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.ingest.exceptions.IngestionClientException; import com.microsoft.azure.kusto.ingest.exceptions.IngestionServiceException; import com.microsoft.azure.kusto.ingest.result.IngestionResult; @@ -36,19 +37,21 @@ * Since the streaming client communicates directly with the engine, it's more prone to failure, so this class * holds both a streaming client and a queued client. * It tries {@value ATTEMPT_COUNT} times using the streaming client, after which it falls back to the queued streaming client in case of failure. - * If the size of the stream is bigger than {@value MAX_STREAMING_SIZE_BYTES}, it will fall back to the queued streaming client. + * By default the policy for choosing a queued ingestion on the first try is the checking of weather the size of the estimated + * raw stream size (a conversion to compressed CSV) is bigger than 4MB, it will fall back to the queued streaming client. + * Use SourceInfo.size to override size estimations, alternatively - use setQueuingPolicy to override the predicate heuristics. *

*/ public class ManagedStreamingIngestClient extends IngestClientBase implements QueuedIngestClient { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final int ATTEMPT_COUNT = 3; - public static final int MAX_STREAMING_SIZE_BYTES = 4 * 1024 * 1024; public static final String CLASS_NAME = ManagedStreamingIngestClient.class.getSimpleName(); final QueuedIngestClientImpl queuedIngestClient; final StreamingIngestClient streamingIngestClient; private final ExponentialRetry exponentialRetryTemplate; private CloseableHttpClient httpClient = null; + private ManagedStreamingQueuingPolicy queuingPolicy = ManagedStreamingQueuingPolicy.Default; /** * @param dmConnectionString dm connection string @@ -74,7 +77,7 @@ public static ManagedStreamingIngestClient fromDmConnectionString(ConnectionStri * For advanced usage, use {@link ManagedStreamingIngestClient#ManagedStreamingIngestClient(ConnectionStringBuilder, ConnectionStringBuilder)} */ public static ManagedStreamingIngestClient fromDmConnectionString(ConnectionStringBuilder dmConnectionString, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { ConnectionStringBuilder engineConnectionString = new ConnectionStringBuilder(dmConnectionString); engineConnectionString.setClusterUrl(IngestClientBase.getQueryEndpoint(engineConnectionString.getClusterUrl())); @@ -105,7 +108,7 @@ public static ManagedStreamingIngestClient fromEngineConnectionString(Connection * For advanced usage, use {@link ManagedStreamingIngestClient#ManagedStreamingIngestClient(ConnectionStringBuilder, ConnectionStringBuilder)} */ public static ManagedStreamingIngestClient fromEngineConnectionString(ConnectionStringBuilder engineConnectionString, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { ConnectionStringBuilder dmConnectionString = new ConnectionStringBuilder(engineConnectionString); dmConnectionString.setClusterUrl(IngestClientBase.getIngestionEndpoint(engineConnectionString.getClusterUrl())); @@ -121,18 +124,18 @@ public static ManagedStreamingIngestClient fromEngineConnectionString(Connection * instead. */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder) throws URISyntaxException { this(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, null); } /** * @param ingestionEndpointConnectionStringBuilder - Endpoint for ingesting data, usually starts with "https://ingest-" * @param queryEndpointConnectionStringBuilder - Endpoint for querying data, does not include "ingest-" - * @param autoCorrectEndpoint - Flag to indicate whether to correct the endpoint URI or not + * @param autoCorrectEndpoint - Flag to indicate whether to correct the endpoint URI or not * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, boolean autoCorrectEndpoint) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder, boolean autoCorrectEndpoint) throws URISyntaxException { this(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, null, autoCorrectEndpoint); } @@ -147,8 +150,8 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon * {@link #ManagedStreamingIngestClient(ConnectionStringBuilder, HttpClientProperties)})} instead. */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, - @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder, + @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(ingestionEndpointConnectionStringBuilder, properties, autoCorrectEndpoint); streamingIngestClient = new StreamingIngestClient(queryEndpointConnectionStringBuilder, properties, autoCorrectEndpoint); @@ -157,12 +160,12 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon /** * @param connectionStringBuilder - Client connection string - * @param properties - Additional properties to configure the http client - * @param autoCorrectEndpoint - Flag to indicate whether to correct the endpoint URI or not + * @param properties - Additional properties to configure the http client + * @param autoCorrectEndpoint - Flag to indicate whether to correct the endpoint URI or not * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { + @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, properties, autoCorrectEndpoint); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, properties, autoCorrectEndpoint); @@ -171,12 +174,12 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil /** * @param connectionStringBuilder - Client connection string - * @param httpClient - HTTP client - * @param autoCorrectEndpoint - Flag to indicate whether to correct the endpoint URI or not + * @param httpClient - HTTP client + * @param autoCorrectEndpoint - Flag to indicate whether to correct the endpoint URI or not * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) throws URISyntaxException { + @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, httpClient, autoCorrectEndpoint); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, httpClient, autoCorrectEndpoint); @@ -186,13 +189,13 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil /** * @param ingestionEndpointConnectionStringBuilder - Endpoint for ingesting data, usually starts with "https://ingest-" - * @param queryEndpointConnectionStringBuilder - Endpoint for querying data, does not include "ingest-" + * @param queryEndpointConnectionStringBuilder - Endpoint for querying data, does not include "ingest-" * @param properties - Additional properties to configure the http client * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, - @Nullable HttpClientProperties properties) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder, + @Nullable HttpClientProperties properties) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(ingestionEndpointConnectionStringBuilder, properties, true); streamingIngestClient = new StreamingIngestClient(queryEndpointConnectionStringBuilder, properties, true); @@ -205,7 +208,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable HttpClientProperties properties) throws URISyntaxException { + @Nullable HttpClientProperties properties) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, properties, true); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, properties, true); @@ -218,7 +221,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient) throws URISyntaxException { + @Nullable CloseableHttpClient httpClient) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, httpClient, true); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, httpClient, true); @@ -234,8 +237,8 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil * {@link IngestClientFactory#createManagedStreamingIngestClient(ConnectionStringBuilder)} instead. */ public ManagedStreamingIngestClient(ResourceManager resourceManager, - AzureStorageClient storageClient, - StreamingClient streamingClient) { + AzureStorageClient storageClient, + StreamingClient streamingClient) { log.info("Creating a new ManagedStreamingIngestClient from raw parts"); queuedIngestClient = new QueuedIngestClientImpl(resourceManager, storageClient); streamingIngestClient = new StreamingIngestClient(streamingClient); @@ -244,14 +247,14 @@ public ManagedStreamingIngestClient(ResourceManager resourceManager, /** * @param resourceManager ingestion resources manager - * @param storageClient - storage utilities + * @param storageClient - storage utilities * @param streamingClient - the streaming client - * @param retryTemplate - retry template + * @param retryTemplate - retry template */ public ManagedStreamingIngestClient(ResourceManager resourceManager, - AzureStorageClient storageClient, - StreamingClient streamingClient, - ExponentialRetry retryTemplate) { + AzureStorageClient storageClient, + StreamingClient streamingClient, + ExponentialRetry retryTemplate) { log.info("Creating a new ManagedStreamingIngestClient from raw parts"); queuedIngestClient = new QueuedIngestClientImpl(resourceManager, storageClient); streamingIngestClient = new StreamingIngestClient(streamingClient); @@ -294,18 +297,20 @@ protected IngestionResult ingestFromBlobImpl(BlobSourceInfo blobSourceInfo, Inge } BlobClient blobClient = blobClientBuilder.buildClient(); + long blobSize = 0; if (blobSourceInfo.getRawSizeInBytes() <= 0) { try { - blobSourceInfo.setRawSizeInBytes(blobClient.getProperties().getBlobSize()); + blobSize = blobClient.getProperties().getBlobSize(); } catch (BlobStorageException e) { throw new IngestionServiceException( blobSourceInfo.getBlobPath(), - "Failed getting blob properties: " + e.getMessage(), + "Failed getting blob properties: " + ExceptionsUtils.getMessageEx(e), e); } } - if (blobSourceInfo.getRawSizeInBytes() > MAX_STREAMING_SIZE_BYTES) { + if (queuingPolicy.shouldUseQueuedIngestion(blobSize, blobSourceInfo.getRawSizeInBytes(), + blobSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { log.info("Blob size is greater than max streaming size ({} bytes). Falling back to queued.", blobSourceInfo.getRawSizeInBytes()); return queuedIngestClient.ingestFromBlob(blobSourceInfo, ingestionProperties); } @@ -378,14 +383,17 @@ protected IngestionResult ingestFromResultSetImpl(ResultSetSourceInfo resultSetS private IngestionResult sendStreamToQueuedIngestion(InputStream inputStream, StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties, int size) throws IngestionClientException, IngestionServiceException { log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", size); - StreamSourceInfo managedSourceInfo = new StreamSourceInfo(inputStream, - streamSourceInfo.isLeaveOpen(), streamSourceInfo.getSourceId(), streamSourceInfo.getCompressionType()); + StreamSourceInfo managedSourceInfo = new StreamSourceInfo( + inputStream, + streamSourceInfo.isLeaveOpen(), + streamSourceInfo.getSourceId(), + streamSourceInfo.getCompressionType()); return queuedIngestClient.ingestFromStream(managedSourceInfo, ingestionProperties); } @Override protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties) - throws IngestionClientException, IngestionServiceException { + throws IngestionClientException, IngestionServiceException, IOException { Ensure.argIsNotNull(streamSourceInfo, "streamSourceInfo"); Ensure.argIsNotNull(ingestionProperties, "ingestionProperties"); @@ -401,12 +409,19 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo byte[] streamingBytes; ByteArrayInputStream byteArrayStream; - try{ - if (streamSourceInfo.getStream() instanceof ByteArrayInputStream){ + if (queuingPolicy.shouldUseQueuedIngestion(streamSourceInfo.getStream().available(), streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) + { + log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", + streamSourceInfo.getRawSizeInBytes() > 0 ? streamSourceInfo.getRawSizeInBytes() : streamSourceInfo.getStream().available()); + return queuedIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties); + } + + try { + if (streamSourceInfo.getStream() instanceof ByteArrayInputStream) { // We can't rely on other InputStream implementations of available() - byteArrayStream = (ByteArrayInputStream)streamSourceInfo.getStream(); + byteArrayStream = (ByteArrayInputStream) streamSourceInfo.getStream(); } else { - streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), MAX_STREAMING_SIZE_BYTES + 1); + streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 1); byteArrayStream = new ByteArrayInputStream(streamingBytes); // ByteArrayInputStream's close method is a no-op, so we don't need to close it. } @@ -414,10 +429,6 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo throw new IngestionClientException("Failed to read from stream.", e); } - if (shouldUseQueuedIngestion(streamSourceInfo, ingestionProperties.getDataFormat())) { - return sendStreamToQueuedIngestion( - new SequenceInputStream(byteArrayStream, streamSourceInfo.getStream()), streamSourceInfo,ingestionProperties,byteArrayStream.available()); - } if (!streamSourceInfo.isLeaveOpen()) { // From this point we don't need the original stream anymore, we cached it @@ -445,22 +456,8 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo } } - private boolean shouldUseQueuedIngestion(StreamSourceInfo streamSourceInfo, IngestionProperties.DataFormat dataFormat) throws IOException { - if (streamSourceInfo.getRawSizeInBytes() > 0){ - return streamSourceInfo.getRawSizeInBytes() > MAX_STREAMING_SIZE_BYTES; - } - - long size = streamSourceInfo.getStream().available(); - if (dataFormat.isCompressible()){ - // Binary format - return (size * 1.5) > MAX_STREAMING_SIZE_BYTES; - } - - - // if size is given - use it, else use available, according to format and compression - return streamSourceInfo.getStream().available() > 0 - ? streamSourceInfo.getStream().available() - : byteArrayStream.available(); + public void setQueuingPolicy(ManagedStreamingQueuingPolicy queuingPolicy) { + this.queuingPolicy = queuingPolicy; } @Override diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java new file mode 100644 index 00000000..bd781b5a --- /dev/null +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java @@ -0,0 +1,64 @@ +package com.microsoft.azure.kusto.ingest; + +import com.microsoft.azure.kusto.ingest.utils.ShouldUseQueueingPredicate; + +public class ManagedStreamingQueuingPolicy { + static final int MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES = 4 * 1024 * 1024; + static final int MAX_STREAMING_STREAM_SIZE_BYTES = 10 * 1024 * 1024; + static final int MAX_STREAMING_RAW_SIZE_BYTES = 7 * 1024 * 1024; + static final double JSON_UNCOMPRESSED_FACTOR = 1.5d; + static final int NON_BINARY_FACTOR = 2; + static final double BINARY_COMPRESSED_FACTOR = 2d; + static final double BINARY_UNCOMPRESSED_FACTOR = 1.5d; + final ShouldUseQueueingPredicate predicate; + + public ManagedStreamingQueuingPolicy(ShouldUseQueueingPredicate defaultShouldUseQueuedIngestion) { + predicate = defaultShouldUseQueuedIngestion; + } + + // Return true if streaming ingestion should not be tried, according to stream size, compression and format + private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { + // if size is given - use the 7mb limit. + if (rawDataSize > 0){ + return rawDataSize > MAX_STREAMING_RAW_SIZE_BYTES; + } + + // In case available() was implemented wrong, do streaming + if (dataSize <= 0) { + return false; + } + + // In any case - don't stream more than 10mb + if (dataSize > MAX_STREAMING_STREAM_SIZE_BYTES){ + return true; + } + + if (!dataFormat.isCompressible()){ + // Binary format + if (compressed) { + return (dataSize * BINARY_COMPRESSED_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; + } + + return (dataSize * BINARY_UNCOMPRESSED_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; + } + + if (compressed) { + // Compressed + non-binary + return (dataSize * NON_BINARY_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; + } + + if (dataFormat.isJsonFormat()){ + // JSON uncompressed format + return (dataSize / JSON_UNCOMPRESSED_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; + } + + // Uncompressed + non-binary + return (dataSize / NON_BINARY_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; + } + + static ManagedStreamingQueuingPolicy Default = new ManagedStreamingQueuingPolicy(ManagedStreamingQueuingPolicy::defaultShouldUseQueuedIngestion); + + public boolean shouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed ,IngestionProperties.DataFormat dataFormat) { + return predicate.apply(dataSize, rawDataSize, compressed, dataFormat); + } +} diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java index 55c4535c..a32a1a21 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java @@ -140,8 +140,6 @@ protected IngestionResult ingestFromBlobImpl(BlobSourceInfo blobSourceInfo, Inge throw new IngestionServiceException("Failed to ingest from blob", e); } catch (IOException | URISyntaxException e) { throw new IngestionClientException("Failed to ingest from blob", e); - } catch (IngestionServiceException e) { - throw e; } } @@ -182,8 +180,6 @@ protected IngestionResult ingestFromFileImpl(FileSourceInfo fileSourceInfo, Inge throw new IngestionServiceException("Failed to ingest from file", e); } catch (IOException e) { throw new IngestionClientException("Failed to ingest from file", e); - } catch (IngestionServiceException e) { - throw e; } } @@ -232,8 +228,6 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo throw new IngestionServiceException("Failed to ingest from stream", e); } catch (IOException e) { throw new IngestionClientException("Failed to ingest from stream", e); - } catch (IngestionServiceException e) { - throw e; } } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java index 4fed48d5..c153976e 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java @@ -1,5 +1,6 @@ package com.microsoft.azure.kusto.ingest; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.microsoft.azure.kusto.data.Utils; import com.microsoft.azure.kusto.data.instrumentation.FunctionOneException; @@ -69,11 +70,11 @@ private static , TOut> TOut res } public static void postToQueueWithRetries(ResourceManager resourceManager, AzureStorageClient azureStorageClient, IngestionBlobInfo blob) - throws IngestionClientException, IngestionServiceException { + throws IngestionClientException, IngestionServiceException, JsonProcessingException { + ObjectMapper objectMapper = Utils.getObjectMapper(); + String message = objectMapper.writeValueAsString(blob); resourceActionWithRetries(resourceManager, resourceManager.getShuffledQueues(), queue -> { - ObjectMapper objectMapper = Utils.getObjectMapper(); - - azureStorageClient.postMessageToQueue(queue.getQueue(), objectMapper.writeValueAsString(blob)); + azureStorageClient.postMessageToQueue(queue.getQueue(), message); return null; }, "ResourceAlgorithms.postToQueueWithRetries", Collections.singletonMap("blob", SecurityUtils.removeSecretsFromUrl(blob.getBlobPath()))); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java index eee028eb..014597fb 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java @@ -11,6 +11,7 @@ import com.microsoft.azure.kusto.data.exceptions.DataClientException; import com.microsoft.azure.kusto.data.exceptions.DataServiceException; import com.microsoft.azure.kusto.data.HttpClientProperties; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.data.instrumentation.MonitoredActivity; import com.microsoft.azure.kusto.data.instrumentation.SupplierTwoExceptions; import com.microsoft.azure.kusto.ingest.exceptions.IngestionClientException; @@ -185,8 +186,9 @@ private IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo, ingestionProperties.getIngestionMapping().getIngestionMappingReference(), !(streamSourceInfo.getCompressionType() == null || !streamSourceInfo.isLeaveOpen())); } catch (DataClientException | IOException e) { - log.error(e.getMessage(), e); - throw new IngestionClientException(e.getMessage(), e); + String msg = ExceptionsUtils.getMessageEx(e); + log.error(msg, e); + throw new IngestionClientException(msg, e); } catch (DataServiceException e) { log.error(e.getMessage(), e); throw new IngestionServiceException(e.getMessage(), e); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java index 66913a22..6db149c0 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java @@ -3,10 +3,6 @@ package com.microsoft.azure.kusto.ingest.source; -import com.microsoft.azure.kusto.data.instrumentation.TraceableAttributes; -import org.jetbrains.annotations.NotNull; - -import java.util.HashMap; import java.util.Map; import java.util.UUID; @@ -15,6 +11,7 @@ public class BlobSourceInfo extends AbstractSourceInfo { private String blobPath; + private CompressionType compressionType; public String getBlobPath() { return blobPath; @@ -33,11 +30,27 @@ public BlobSourceInfo(String blobPath, long rawSizeInBytes) { this.setRawSizeInBytes(rawSizeInBytes); } + public BlobSourceInfo(String blobPath, long rawSizeInBytes, CompressionType compressionType) { + this(blobPath); + this.compressionType = compressionType; + this.setRawSizeInBytes(rawSizeInBytes); + } + public BlobSourceInfo(String blobPath, long rawSizeInBytes, UUID sourceId) { this(blobPath, rawSizeInBytes); this.setSourceId(sourceId); } + + public CompressionType getCompressionType() { + return compressionType; + } + + public void setCompressionType(CompressionType compressionType) { + this.compressionType = compressionType; + } + + public void validate() { stringIsNotBlank(blobPath, "blobPath"); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java new file mode 100644 index 00000000..3f525e2b --- /dev/null +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java @@ -0,0 +1,8 @@ +package com.microsoft.azure.kusto.ingest.utils; + +import com.microsoft.azure.kusto.ingest.IngestionProperties; + +@FunctionalInterface +public interface ShouldUseQueueingPredicate { + Boolean apply(long size, long rawDataSize, boolean compressed, IngestionProperties.DataFormat format); +} diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java index 35a36c2b..c7e536df 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java @@ -75,21 +75,12 @@ class E2ETest { @BeforeAll public static void setUp() throws IOException { - appKey = System.getenv("APP_KEY"); - if (appKey == null) { - String secretPath = System.getProperty("SecretPath"); - if (secretPath == null) { - throw new IllegalArgumentException("SecretPath is not set"); - } - appKey = Files.readAllLines(Paths.get(secretPath)).get(0); - } tableName = "JavaTest_" + new SimpleDateFormat("yyyy_MM_dd_hh_mm_ss_SSS").format(Calendar.getInstance().getTime()) + "_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE); principalFqn = String.format("aadapp=%s;%s", appId, tenantId); - ConnectionStringBuilder dmCsb = ConnectionStringBuilder.createWithAadApplicationCredentials(System.getenv("DM_CONNECTION_STRING"), appId, appKey, - tenantId); + ConnectionStringBuilder dmCsb = ConnectionStringBuilder.createWithUserPrompt(System.getenv("DM_CONNECTION_STRING")); dmCsb.setUserNameForTracing("testUser"); try { dmCslClient = ClientFactory.createClient(dmCsb); @@ -104,12 +95,12 @@ public static void setUp() throws IOException { Assertions.fail("Failed to create ingest client", ex); } - ConnectionStringBuilder engineCsb = ConnectionStringBuilder.createWithAadApplicationCredentials(System.getenv("ENGINE_CONNECTION_STRING"), appId, - appKey, tenantId); + ConnectionStringBuilder engineCsb = ConnectionStringBuilder.createWithUserPrompt(System.getenv("ENGINE_CONNECTION_STRING")); engineCsb.setUserNameForTracing("Java_E2ETest_ø"); try { streamingIngestClient = IngestClientFactory.createStreamingIngestClient(engineCsb); queryClient = ClientFactory.createClient(engineCsb); +// queryClient.execute("e2e",".add database e2e monitors (\"aadapp=063bba78-b986-4bdc-882c-2ceacc859282;72f988bf-86f1-41af-91ab-2d7cd011db47\")"); streamingClient = ClientFactory.createStreamingClient(engineCsb); managedStreamingIngestClient = IngestClientFactory.createManagedStreamingIngestClient(dmCsb, engineCsb); } catch (URISyntaxException ex) { @@ -182,7 +173,7 @@ private static void createTestData() { first.setPath("$.rownumber"); ColumnMapping second = new ColumnMapping("rowguid", "string"); second.setPath("$.rowguid"); - ColumnMapping[] columnMapping = new ColumnMapping[] {first, second}; + ColumnMapping[] columnMapping = new ColumnMapping[]{first, second}; ingestionPropertiesWithColumnMapping.setIngestionMapping(columnMapping, IngestionMappingKind.JSON); ingestionPropertiesWithColumnMapping.setDataFormat(DataFormat.JSON); @@ -349,20 +340,21 @@ void testIngestFromFileWithTable() { @ValueSource(booleans = {true, false}) void testIngestFromStream(boolean isManaged) throws IOException { for (TestDataItem item : dataForTests) { + InputStream stream = Files.newInputStream(item.file.toPath()); + StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream); if (item.file.getPath().endsWith(".gz")) { - InputStream stream = Files.newInputStream(item.file.toPath()); - StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream); - streamSourceInfo.setCompressionType(CompressionType.gz); - try { - ((isManaged && item.testOnManaged) ? managedStreamingIngestClient : ingestClient).ingestFromStream(streamSourceInfo, - item.ingestionProperties); - } catch (Exception ex) { - Assertions.fail(ex); - } - assertRowCount(item.rows, true); } + + try { + ((isManaged && item.testOnManaged) ? managedStreamingIngestClient : ingestClient).ingestFromStream(streamSourceInfo, + item.ingestionProperties); + } catch (Exception ex) { + Assertions.fail(ex); + } + assertRowCount(item.rows, true); } + } @Test @@ -578,7 +570,7 @@ void testPerformanceKustoOperationResultVsJsonVsStreamingQuery() throws DataClie stopWatch.start(); // The InputStream *must* be closed by the caller to prevent memory leaks try (InputStream is = streamingClient.executeStreamingQuery(databaseName, query, clientRequestProperties); - BufferedReader br = new BufferedReader(new InputStreamReader(is))) { + BufferedReader br = new BufferedReader(new InputStreamReader(is))) { StringBuilder streamedResult = new StringBuilder(); char[] buffer = new char[65536]; String streamedLine; diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java new file mode 100644 index 00000000..0f3ba4e0 --- /dev/null +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java @@ -0,0 +1,142 @@ +package com.microsoft.azure.kusto.ingest; + +import com.azure.data.tables.models.TableEntity; +import com.microsoft.azure.kusto.data.StreamingClient; +import com.microsoft.azure.kusto.ingest.result.IngestionStatus; +import com.microsoft.azure.kusto.ingest.result.OperationStatus; +import com.microsoft.azure.kusto.ingest.source.StreamSourceInfo; +import org.apache.commons.lang3.function.BooleanConsumer; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import javax.xml.crypto.Data; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; +import static org.mockito.Mockito.verify; + +public class ManagedStreamingTest { + private static final ResourceManager resourceManagerMock = mock(ResourceManager.class); + private static final AzureStorageClient azureStorageClientMock = mock(AzureStorageClient.class); + public static final String ACCOUNT_NAME = "someaccount"; + private static QueuedIngestClient queuedIngestClient; + private static IngestionProperties ingestionProperties; + private static StreamingClient streamingClientMock; + + @BeforeAll + static void setUp() throws Exception { + when(resourceManagerMock.getShuffledContainers()) + .thenReturn(Collections.singletonList(TestUtils.containerWithSasFromAccountNameAndContainerName(ACCOUNT_NAME, "someStorage"))); + when(resourceManagerMock.getShuffledQueues()) + .thenReturn(Collections.singletonList(TestUtils.queueWithSasFromAccountNameAndQueueName(ACCOUNT_NAME, "someQueue"))); + + when(resourceManagerMock.getStatusTable()) + .thenReturn(TestUtils.tableWithSasFromTableName("http://statusTable.com")); + + when(resourceManagerMock.getIdentityToken()).thenReturn("identityToken"); + + doNothing().when(azureStorageClientMock).azureTableInsertEntity(any(), any(TableEntity.class)); + + doNothing().when(azureStorageClientMock).postMessageToQueue(any(), anyString()); + streamingClientMock = mock(StreamingClient.class); + when(streamingClientMock.executeStreamingIngest(any(String.class), any(String.class), any(InputStream.class), + isNull(), any(String.class), any(String.class), any(boolean.class))).thenReturn(null); + + ingestionProperties = new IngestionProperties("dbName", "tableName"); + } + + static InputStream createStreamOfSize(int size) { + char[] charArray = new char[size]; + Arrays.fill(charArray, 'a'); + String str = new String(charArray); + return new ByteArrayInputStream(StandardCharsets.UTF_8.encode(str).array()); + } + + @Test + void IngestFromStream_CsvStream() throws Exception { + + InputStream inputStream = createStreamOfSize(1); + StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); + + ManagedStreamingIngestClient managedStreamingIngestClient = new ManagedStreamingIngestClient(resourceManagerMock, azureStorageClientMock, streamingClientMock); + + // Expect to work and also choose no queuing + OperationStatus status = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection().get(0).status; + assertEquals(OperationStatus.Succeeded, status); + + BooleanConsumer assertPolicyWorked = (boolean wasExpectedToUseQueuing) -> { + try { + inputStream.reset(); + IngestionStatus ingestionStatus = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection().get(0); + if (wasExpectedToUseQueuing) { + assertEquals(OperationStatus.Queued, ingestionStatus.status); + } else { + assertEquals(OperationStatus.Succeeded, ingestionStatus.status); + } + System.out.println(ingestionStatus.status); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + + // if size was given - it should be used against MAX_STREAMING_RAW_SIZE_BYTES + streamSourceInfo.setRawSizeInBytes(ManagedStreamingQueuingPolicy.MAX_STREAMING_RAW_SIZE_BYTES + 1); + assertPolicyWorked.accept(true); + + streamSourceInfo.setRawSizeInBytes(ManagedStreamingQueuingPolicy.MAX_STREAMING_RAW_SIZE_BYTES - 1); + assertPolicyWorked.accept(false); + } + + @Test + void shouldUseQueueingPredicate_DefaultBehavior() { + // Raw data size is set - choose queuing although data is small + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion( + 1, ManagedStreamingQueuingPolicy.MAX_STREAMING_RAW_SIZE_BYTES + 1, false, IngestionProperties.DataFormat.CSV)); + + // CSV uncompressed - allow big file + int bigFile = 7 * 1024 * 1024; + assertFalse(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(bigFile, + 0, false, IngestionProperties.DataFormat.CSV)); + + // CSV compressed - don't allow big files + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(bigFile, + 0, true, IngestionProperties.DataFormat.CSV)); + int mediumSizeCompressed = 3 * 1024 * 1024; + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(mediumSizeCompressed, + 0, true, IngestionProperties.DataFormat.CSV)); + + int smallCompressed = 2 * 1024 * 1024; + assertFalse(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(smallCompressed, + 0, true, IngestionProperties.DataFormat.CSV)); + + // JSON uncompress- allow big file + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(bigFile, + 0, false, IngestionProperties.DataFormat.JSON)); + + // JSON compressed + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(mediumSizeCompressed, + 0, true, IngestionProperties.DataFormat.JSON)); + assertFalse(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(smallCompressed, + 0, true, IngestionProperties.DataFormat.JSON)); + + // AVRO - either compressed or not do not allow medium + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(mediumSizeCompressed, + 0, true, IngestionProperties.DataFormat.AVRO)); + assertTrue(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(mediumSizeCompressed, + 0, false, IngestionProperties.DataFormat.AVRO)); + + // AVRO - either compressed or not allow small + assertFalse(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(smallCompressed, + 0, true, IngestionProperties.DataFormat.AVRO)); + assertFalse(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(smallCompressed, + 0, false, IngestionProperties.DataFormat.AVRO)); + } +} From b8d6d3ec1ad9972c75cfea87ea9affda007c279e Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Fri, 28 Jun 2024 13:38:30 +0300 Subject: [PATCH 04/20] ser raw size for stream --- .../ingest/ManagedStreamingIngestClient.java | 4 +-- .../kusto/ingest/StreamingIngestClient.java | 2 +- .../ingest/source/AbstractSourceInfo.java | 4 +-- .../kusto/ingest/source/BlobSourceInfo.java | 3 ++ .../kusto/ingest/source/FileSourceInfo.java | 2 +- .../kusto/ingest/source/StreamSourceInfo.java | 5 ++-- .../kusto/ingest/utils/IngestionUtils.java | 17 +++++++++-- .../microsoft/azure/kusto/ingest/tests.java | 28 +++++++++++++++++++ 8 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 776f4358..11e4a119 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -270,7 +270,7 @@ protected IngestionResult ingestFromFileImpl(FileSourceInfo fileSourceInfo, Inge fileSourceInfo.validate(); ingestionProperties.validate(); try { - StreamSourceInfo streamSourceInfo = IngestionUtils.fileToStream(fileSourceInfo, true); + StreamSourceInfo streamSourceInfo = IngestionUtils.fileToStream(fileSourceInfo, true, ingestionProperties.getDataFormat()); return ingestFromStream(streamSourceInfo, ingestionProperties); } catch (FileNotFoundException e) { log.error("File not found when ingesting a file.", e); @@ -411,7 +411,7 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo if (queuingPolicy.shouldUseQueuedIngestion(streamSourceInfo.getStream().available(), streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { - log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", + log.info("Stream size ({} bytes) is greater than max streaming size according to policy. Falling back to queued.", streamSourceInfo.getRawSizeInBytes() > 0 ? streamSourceInfo.getRawSizeInBytes() : streamSourceInfo.getStream().available()); return queuedIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java index 014597fb..14395307 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java @@ -85,7 +85,7 @@ protected IngestionResult ingestFromFileImpl(FileSourceInfo fileSourceInfo, Inge ingestionProperties.validate(); try { - StreamSourceInfo streamSourceInfo = IngestionUtils.fileToStream(fileSourceInfo, false); + StreamSourceInfo streamSourceInfo = IngestionUtils.fileToStream(fileSourceInfo, false, ingestionProperties.getDataFormat()); return ingestFromStream(streamSourceInfo, ingestionProperties); } catch (FileNotFoundException e) { log.error("File not found when ingesting a file.", e); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java index d935503a..96c79b14 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/AbstractSourceInfo.java @@ -10,7 +10,6 @@ import java.util.UUID; abstract class AbstractSourceInfo implements SourceInfo, TraceableAttributes { - private UUID sourceId; public UUID getSourceId() { @@ -21,13 +20,14 @@ public void setSourceId(UUID sourceId) { this.sourceId = sourceId; } - // An estimation of the raw (uncompressed, un-indexed) size of the data + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known private long rawSizeInBytes; public long getRawSizeInBytes() { return rawSizeInBytes; } + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known public void setRawSizeInBytes(long rawSizeInBytes) { this.rawSizeInBytes = rawSizeInBytes; } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java index 6db149c0..af950029 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java @@ -25,17 +25,20 @@ public BlobSourceInfo(String blobPath) { this.blobPath = blobPath; } + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known public BlobSourceInfo(String blobPath, long rawSizeInBytes) { this(blobPath); this.setRawSizeInBytes(rawSizeInBytes); } + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known public BlobSourceInfo(String blobPath, long rawSizeInBytes, CompressionType compressionType) { this(blobPath); this.compressionType = compressionType; this.setRawSizeInBytes(rawSizeInBytes); } + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known public BlobSourceInfo(String blobPath, long rawSizeInBytes, UUID sourceId) { this(blobPath, rawSizeInBytes); this.setSourceId(sourceId); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java index bad317c4..4ecde1d9 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java @@ -9,7 +9,6 @@ import static com.microsoft.azure.kusto.data.Ensure.stringIsNotBlank; public class FileSourceInfo extends AbstractSourceInfo { - private String filePath; public String getFilePath() { @@ -20,6 +19,7 @@ public void setFilePath(String filePath) { this.filePath = filePath; } + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known public FileSourceInfo(String filePath, long rawSizeInBytes) { this.filePath = filePath; this.setRawSizeInBytes(rawSizeInBytes); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java index bdac8d9a..31ac4817 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/StreamSourceInfo.java @@ -65,9 +65,10 @@ public StreamSourceInfo(InputStream stream, boolean leaveOpen, UUID sourceId, Co setCompressionType(compressionType); } - public StreamSourceInfo(InputStream stream, boolean leaveOpen, UUID sourceId, CompressionType compressionType, long size) { + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known + public StreamSourceInfo(InputStream stream, boolean leaveOpen, UUID sourceId, CompressionType compressionType, long rawDataSize) { this(stream, leaveOpen, sourceId, compressionType); - setRawSizeInBytes(size); + setRawSizeInBytes(rawDataSize); } public void validate() { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java index 6208233a..362dbae3 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java @@ -1,5 +1,7 @@ package com.microsoft.azure.kusto.ingest.utils; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; +import com.microsoft.azure.kusto.ingest.IngestionProperties; import com.microsoft.azure.kusto.ingest.ResettableFileInputStream; import com.microsoft.azure.kusto.ingest.exceptions.IngestionClientException; import com.microsoft.azure.kusto.ingest.source.CompressionType; @@ -21,7 +23,7 @@ private IngestionUtils() { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @NotNull - public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boolean resettable) throws IngestionClientException, FileNotFoundException { + public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boolean resettable, IngestionProperties.DataFormat format) throws IngestionClientException, FileNotFoundException { String filePath = fileSourceInfo.getFilePath(); File file = new File(filePath); if (file.length() == 0) { @@ -34,7 +36,15 @@ public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boole stream = new ResettableFileInputStream((FileInputStream) stream); } - return new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath), fileSourceInfo.getRawSizeInBytes()); + StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath)); + try { + streamSourceInfo.setRawSizeInBytes(fileSourceInfo.getRawSizeInBytes() > 0 ? fileSourceInfo.getRawSizeInBytes() : + format.isCompressible() ? stream.available() : 0); + } catch (IOException e) { + throw new IngestionClientException(ExceptionsUtils.getMessageEx(e), e); + } + + return streamSourceInfo; } @NotNull @@ -47,8 +57,9 @@ public static StreamSourceInfo resultSetToStream(ResultSetSourceInfo resultSetSo log.error(message); throw new IngestionClientException(message); } + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); - return new StreamSourceInfo(byteArrayInputStream, false, resultSetSourceInfo.getSourceId()); + return new StreamSourceInfo(byteArrayInputStream, false, resultSetSourceInfo.getSourceId(), null, byteArrayInputStream.available()); } public static byte[] readBytesFromInputStream(InputStream inputStream, int bytesToRead) throws IOException { diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java new file mode 100644 index 00000000..5954b891 --- /dev/null +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java @@ -0,0 +1,28 @@ +package com.microsoft.azure.kusto.ingest; + +import com.microsoft.azure.kusto.ingest.IngestionProperties; +import com.microsoft.azure.kusto.ingest.ManagedStreamingQueuingPolicy; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +class ManagedStreamingQueuingPolicyTest { + + @Test + void shouldUseQueuedIngestion() { + ManagedStreamingQueuingPolicy policy = ManagedStreamingQueuingPolicy.Default; + + // Test with dataSize, rawDataSize, compressed and dataFormat parameters + // Adjust these values according to your needs + long dataSize = 0; + long rawDataSize = 0; + boolean compressed = false; + IngestionProperties.DataFormat dataFormat = IngestionProperties.DataFormat.CSV; + + boolean result = policy.shouldUseQueuedIngestion(dataSize, rawDataSize, compressed, dataFormat); + + // Assert the result + // Adjust the expected result according to your needs + boolean expectedResult = false; + assertEquals(expectedResult, result); + } +} \ No newline at end of file From 83c1f8749cd75ab86b6c65cfca1576f4f1c50a50 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Mon, 1 Jul 2024 10:46:44 +0300 Subject: [PATCH 05/20] remove todo --- .../microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java index cf1e1c61..f7b78bba 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/QueuedIngestClientImpl.java @@ -222,8 +222,7 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo blobName, shouldCompress); - BlobSourceInfo blobSourceInfo = new BlobSourceInfo(blobPath, streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getSourceId()); // TODO: check if we can get the rawDataSize - // locally - maybe add a countingStream + BlobSourceInfo blobSourceInfo = new BlobSourceInfo(blobPath, streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getSourceId()); ingestionResult = ingestFromBlob(blobSourceInfo, ingestionProperties); if (!streamSourceInfo.isLeaveOpen()) { From 6883b3664cef70cdb360a9cc6094068c872b5308 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Mon, 1 Jul 2024 11:45:42 +0300 Subject: [PATCH 06/20] format --- .../azure/kusto/data/ClientFactory.java | 2 +- .../azure/kusto/data/auth/CloudInfo.java | 3 +- .../data/exceptions/ExceptionsUtils.java | 6 +-- .../kusto/data/http/HttpClientFactory.java | 2 +- .../azure/kusto/ingest/IngestClientBase.java | 15 +++---- .../kusto/ingest/IngestClientFactory.java | 16 +++---- .../kusto/ingest/IngestionProperties.java | 2 +- .../ingest/ManagedStreamingIngestClient.java | 45 ++++++++++--------- .../ingest/ManagedStreamingQueuingPolicy.java | 12 ++--- .../kusto/ingest/source/BlobSourceInfo.java | 2 - .../kusto/ingest/utils/IngestionUtils.java | 7 +-- .../microsoft/azure/kusto/ingest/E2ETest.java | 4 +- .../kusto/ingest/ManagedStreamingTest.java | 9 ++-- .../microsoft/azure/kusto/ingest/tests.java | 2 +- 14 files changed, 65 insertions(+), 62 deletions(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java index 2908df01..f71cf7e1 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java @@ -77,7 +77,7 @@ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb) public static StreamingClient createStreamingClient(ConnectionStringBuilder csb, HttpClientProperties properties) throws URISyntaxException { HttpClientProperties httpClientProperties = Optional.ofNullable(properties) .orElse(HttpClientProperties.builder().disableRetries().build()); - return new ClientImpl(csb, HttpClientFactory.create(httpClientProperties),false); + return new ClientImpl(csb, HttpClientFactory.create(httpClientProperties), false); } /** diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java b/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java index 8ffd302c..7c21e274 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/auth/CloudInfo.java @@ -105,7 +105,8 @@ public static CloudInfo retrieveCloudInfoForCluster(String clusterUrl, throw new DataServiceException(clusterUrl, "URISyntaxException when trying to retrieve cluster metadata:" + e.getMessage(), e, true); } catch (IOException ex) { if (!Utils.isRetriableIOException(ex)) { - throw new DataServiceException(clusterUrl, "IOException when trying to retrieve cluster metadata:" + ExceptionsUtils.getMessageEx(ex), ex, + throw new DataServiceException(clusterUrl, "IOException when trying to retrieve cluster metadata:" + ExceptionsUtils.getMessageEx(ex), + ex, Utils.isRetriableIOException(ex)); } } catch (DataServiceException e) { diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java index 7fcaa7c6..12a8c4d8 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java @@ -2,7 +2,7 @@ public class ExceptionsUtils { // Useful in IOException, where message might not propagate to the base IOException - public static String getMessageEx(Exception e){ - return e.getMessage() == null ? e.getCause().getMessage() : e.getMessage(); + public static String getMessageEx(Exception e) { + return e.getMessage() != null ? e.getCause().getMessage() : e.getMessage(); } -} \ No newline at end of file +} diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java index 260beb4e..820c4b30 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java @@ -40,7 +40,7 @@ public static CloseableHttpClient create(HttpClientProperties providedProperties .evictExpiredConnections() .evictIdleConnections(properties.maxIdleTime(), TimeUnit.SECONDS); - if (properties.isDisableRetries()){ + if (properties.isDisableRetries()) { httpClientBuilder.disableAutomaticRetries(); } if (properties.isKeepAlive()) { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java index 401a0210..0a9630c6 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientBase.java @@ -136,15 +136,14 @@ public IngestionResult ingestFromStream(StreamSourceInfo streamSourceInfo, Inges throws IngestionClientException, IngestionServiceException { // trace ingestFromStream return MonitoredActivity.invoke( - (SupplierTwoExceptions) - () -> { - try { - return ingestFromStreamImpl(streamSourceInfo, + (SupplierTwoExceptions) () -> { + try { + return ingestFromStreamImpl(streamSourceInfo, ingestionProperties); - } catch (IOException e) { - throw new IngestionServiceException(ExceptionsUtils.getMessageEx(e), e); - } - }, + } catch (IOException e) { + throw new IngestionServiceException(ExceptionsUtils.getMessageEx(e), e); + } + }, getClientType().concat(".ingestFromStream")); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java index 0b0626a1..9662fe1f 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestClientFactory.java @@ -92,7 +92,7 @@ public static StreamingIngestClient createStreamingIngestClient(ConnectionString * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder) + ConnectionStringBuilder queryEndpointConnectionStringBuilder) throws URISyntaxException { return createManagedStreamingIngestClient(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, null, true); } @@ -108,7 +108,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(Co * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) + ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { return new ManagedStreamingIngestClient(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, properties, autoCorrectEndpoint); @@ -125,7 +125,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(Co * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties) + ConnectionStringBuilder queryEndpointConnectionStringBuilder, @Nullable HttpClientProperties properties) throws URISyntaxException { return new ManagedStreamingIngestClient(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, properties, true); } @@ -151,7 +151,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(Co * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) + @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { return new ManagedStreamingIngestClient(connectionStringBuilder, properties, autoCorrectEndpoint); } @@ -181,7 +181,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromEngineCsb(ConnectionStringBuilder engineConnectionStringBuilder, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { return ManagedStreamingIngestClient.fromEngineConnectionString(engineConnectionStringBuilder, properties); } @@ -211,7 +211,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromDmCsb(ConnectionStringBuilder dmConnectionStringBuilder, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { return ManagedStreamingIngestClient.fromDmConnectionString(dmConnectionStringBuilder, properties); } @@ -226,7 +226,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromDmCsb(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) + @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) throws URISyntaxException { return new ManagedStreamingIngestClient(connectionStringBuilder, httpClient, autoCorrectEndpoint); } @@ -241,7 +241,7 @@ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFro * @throws URISyntaxException if the connection string is invalid */ public static ManagedStreamingIngestClient createManagedStreamingIngestClientFromDmCsb(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient) + @Nullable CloseableHttpClient httpClient) throws URISyntaxException { return new ManagedStreamingIngestClient(connectionStringBuilder, httpClient, true); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java index 6a94dbaf..d7431989 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/IngestionProperties.java @@ -432,7 +432,7 @@ public boolean isCompressible() { return compressible; } - public boolean isJsonFormat(){ + public boolean isJsonFormat() { return this.equals(JSON) || this.equals(MULTIJSON) || this.equals(SINGLEJSON); } } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 11e4a119..ef703e51 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -77,7 +77,7 @@ public static ManagedStreamingIngestClient fromDmConnectionString(ConnectionStri * For advanced usage, use {@link ManagedStreamingIngestClient#ManagedStreamingIngestClient(ConnectionStringBuilder, ConnectionStringBuilder)} */ public static ManagedStreamingIngestClient fromDmConnectionString(ConnectionStringBuilder dmConnectionString, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { ConnectionStringBuilder engineConnectionString = new ConnectionStringBuilder(dmConnectionString); engineConnectionString.setClusterUrl(IngestClientBase.getQueryEndpoint(engineConnectionString.getClusterUrl())); @@ -108,7 +108,7 @@ public static ManagedStreamingIngestClient fromEngineConnectionString(Connection * For advanced usage, use {@link ManagedStreamingIngestClient#ManagedStreamingIngestClient(ConnectionStringBuilder, ConnectionStringBuilder)} */ public static ManagedStreamingIngestClient fromEngineConnectionString(ConnectionStringBuilder engineConnectionString, - @Nullable HttpClientProperties properties) + @Nullable HttpClientProperties properties) throws URISyntaxException { ConnectionStringBuilder dmConnectionString = new ConnectionStringBuilder(engineConnectionString); dmConnectionString.setClusterUrl(IngestClientBase.getIngestionEndpoint(engineConnectionString.getClusterUrl())); @@ -124,7 +124,7 @@ public static ManagedStreamingIngestClient fromEngineConnectionString(Connection * instead. */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder) throws URISyntaxException { this(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, null); } @@ -135,7 +135,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, boolean autoCorrectEndpoint) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder, boolean autoCorrectEndpoint) throws URISyntaxException { this(ingestionEndpointConnectionStringBuilder, queryEndpointConnectionStringBuilder, null, autoCorrectEndpoint); } @@ -150,8 +150,8 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon * {@link #ManagedStreamingIngestClient(ConnectionStringBuilder, HttpClientProperties)})} instead. */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, - @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder, + @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(ingestionEndpointConnectionStringBuilder, properties, autoCorrectEndpoint); streamingIngestClient = new StreamingIngestClient(queryEndpointConnectionStringBuilder, properties, autoCorrectEndpoint); @@ -165,7 +165,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { + @Nullable HttpClientProperties properties, boolean autoCorrectEndpoint) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, properties, autoCorrectEndpoint); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, properties, autoCorrectEndpoint); @@ -179,7 +179,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) throws URISyntaxException { + @Nullable CloseableHttpClient httpClient, boolean autoCorrectEndpoint) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, httpClient, autoCorrectEndpoint); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, httpClient, autoCorrectEndpoint); @@ -194,8 +194,8 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointConnectionStringBuilder, - ConnectionStringBuilder queryEndpointConnectionStringBuilder, - @Nullable HttpClientProperties properties) throws URISyntaxException { + ConnectionStringBuilder queryEndpointConnectionStringBuilder, + @Nullable HttpClientProperties properties) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(ingestionEndpointConnectionStringBuilder, properties, true); streamingIngestClient = new StreamingIngestClient(queryEndpointConnectionStringBuilder, properties, true); @@ -208,7 +208,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder ingestionEndpointCon * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable HttpClientProperties properties) throws URISyntaxException { + @Nullable HttpClientProperties properties) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, properties, true); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, properties, true); @@ -221,7 +221,7 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil * @throws URISyntaxException if the connection string is invalid */ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuilder, - @Nullable CloseableHttpClient httpClient) throws URISyntaxException { + @Nullable CloseableHttpClient httpClient) throws URISyntaxException { log.info("Creating a new ManagedStreamingIngestClient from connection strings"); queuedIngestClient = new QueuedIngestClientImpl(connectionStringBuilder, httpClient, true); streamingIngestClient = new StreamingIngestClient(connectionStringBuilder, httpClient, true); @@ -237,8 +237,8 @@ public ManagedStreamingIngestClient(ConnectionStringBuilder connectionStringBuil * {@link IngestClientFactory#createManagedStreamingIngestClient(ConnectionStringBuilder)} instead. */ public ManagedStreamingIngestClient(ResourceManager resourceManager, - AzureStorageClient storageClient, - StreamingClient streamingClient) { + AzureStorageClient storageClient, + StreamingClient streamingClient) { log.info("Creating a new ManagedStreamingIngestClient from raw parts"); queuedIngestClient = new QueuedIngestClientImpl(resourceManager, storageClient); streamingIngestClient = new StreamingIngestClient(streamingClient); @@ -252,9 +252,9 @@ public ManagedStreamingIngestClient(ResourceManager resourceManager, * @param retryTemplate - retry template */ public ManagedStreamingIngestClient(ResourceManager resourceManager, - AzureStorageClient storageClient, - StreamingClient streamingClient, - ExponentialRetry retryTemplate) { + AzureStorageClient storageClient, + StreamingClient streamingClient, + ExponentialRetry retryTemplate) { log.info("Creating a new ManagedStreamingIngestClient from raw parts"); queuedIngestClient = new QueuedIngestClientImpl(resourceManager, storageClient); streamingIngestClient = new StreamingIngestClient(streamingClient); @@ -381,7 +381,8 @@ protected IngestionResult ingestFromResultSetImpl(ResultSetSourceInfo resultSetS } } - private IngestionResult sendStreamToQueuedIngestion(InputStream inputStream, StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties, int size) throws IngestionClientException, IngestionServiceException { + private IngestionResult sendStreamToQueuedIngestion(InputStream inputStream, StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties, + int size) throws IngestionClientException, IngestionServiceException { log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", size); StreamSourceInfo managedSourceInfo = new StreamSourceInfo( inputStream, @@ -409,8 +410,8 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo byte[] streamingBytes; ByteArrayInputStream byteArrayStream; - if (queuingPolicy.shouldUseQueuedIngestion(streamSourceInfo.getStream().available(), streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) - { + if (queuingPolicy.shouldUseQueuedIngestion(streamSourceInfo.getStream().available(), streamSourceInfo.getRawSizeInBytes(), + streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { log.info("Stream size ({} bytes) is greater than max streaming size according to policy. Falling back to queued.", streamSourceInfo.getRawSizeInBytes() > 0 ? streamSourceInfo.getRawSizeInBytes() : streamSourceInfo.getStream().available()); return queuedIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties); @@ -421,7 +422,8 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo // We can't rely on other InputStream implementations of available() byteArrayStream = (ByteArrayInputStream) streamSourceInfo.getStream(); } else { - streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 1); + streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), + ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 1); byteArrayStream = new ByteArrayInputStream(streamingBytes); // ByteArrayInputStream's close method is a no-op, so we don't need to close it. } @@ -429,7 +431,6 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo throw new IngestionClientException("Failed to read from stream.", e); } - if (!streamSourceInfo.isLeaveOpen()) { // From this point we don't need the original stream anymore, we cached it try { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java index bd781b5a..c8f54fd2 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java @@ -19,7 +19,7 @@ public ManagedStreamingQueuingPolicy(ShouldUseQueueingPredicate defaultShouldUse // Return true if streaming ingestion should not be tried, according to stream size, compression and format private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { // if size is given - use the 7mb limit. - if (rawDataSize > 0){ + if (rawDataSize > 0) { return rawDataSize > MAX_STREAMING_RAW_SIZE_BYTES; } @@ -29,11 +29,11 @@ private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDa } // In any case - don't stream more than 10mb - if (dataSize > MAX_STREAMING_STREAM_SIZE_BYTES){ + if (dataSize > MAX_STREAMING_STREAM_SIZE_BYTES) { return true; } - if (!dataFormat.isCompressible()){ + if (!dataFormat.isCompressible()) { // Binary format if (compressed) { return (dataSize * BINARY_COMPRESSED_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; @@ -47,7 +47,7 @@ private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDa return (dataSize * NON_BINARY_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; } - if (dataFormat.isJsonFormat()){ + if (dataFormat.isJsonFormat()) { // JSON uncompressed format return (dataSize / JSON_UNCOMPRESSED_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; } @@ -58,7 +58,7 @@ private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDa static ManagedStreamingQueuingPolicy Default = new ManagedStreamingQueuingPolicy(ManagedStreamingQueuingPolicy::defaultShouldUseQueuedIngestion); - public boolean shouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed ,IngestionProperties.DataFormat dataFormat) { - return predicate.apply(dataSize, rawDataSize, compressed, dataFormat); + public boolean shouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { + return predicate.apply(dataSize, rawDataSize, compressed, dataFormat); } } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java index af950029..15af901f 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/BlobSourceInfo.java @@ -44,7 +44,6 @@ public BlobSourceInfo(String blobPath, long rawSizeInBytes, UUID sourceId) { this.setSourceId(sourceId); } - public CompressionType getCompressionType() { return compressionType; } @@ -53,7 +52,6 @@ public void setCompressionType(CompressionType compressionType) { this.compressionType = compressionType; } - public void validate() { stringIsNotBlank(blobPath, "blobPath"); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java index 362dbae3..0cece951 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java @@ -23,7 +23,8 @@ private IngestionUtils() { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @NotNull - public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boolean resettable, IngestionProperties.DataFormat format) throws IngestionClientException, FileNotFoundException { + public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boolean resettable, IngestionProperties.DataFormat format) + throws IngestionClientException, FileNotFoundException { String filePath = fileSourceInfo.getFilePath(); File file = new File(filePath); if (file.length() == 0) { @@ -38,8 +39,8 @@ public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boole StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath)); try { - streamSourceInfo.setRawSizeInBytes(fileSourceInfo.getRawSizeInBytes() > 0 ? fileSourceInfo.getRawSizeInBytes() : - format.isCompressible() ? stream.available() : 0); + streamSourceInfo.setRawSizeInBytes( + fileSourceInfo.getRawSizeInBytes() > 0 ? fileSourceInfo.getRawSizeInBytes() : format.isCompressible() ? stream.available() : 0); } catch (IOException e) { throw new IngestionClientException(ExceptionsUtils.getMessageEx(e), e); } diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java index abb2ab07..4086a51e 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java @@ -181,7 +181,7 @@ private static void createTestData() { first.setPath("$.rownumber"); ColumnMapping second = new ColumnMapping("rowguid", "string"); second.setPath("$.rowguid"); - ColumnMapping[] columnMapping = new ColumnMapping[]{first, second}; + ColumnMapping[] columnMapping = new ColumnMapping[] {first, second}; ingestionPropertiesWithColumnMapping.setIngestionMapping(columnMapping, IngestionMappingKind.JSON); ingestionPropertiesWithColumnMapping.setDataFormat(DataFormat.JSON); @@ -578,7 +578,7 @@ void testPerformanceKustoOperationResultVsJsonVsStreamingQuery() throws DataClie stopWatch.start(); // The InputStream *must* be closed by the caller to prevent memory leaks try (InputStream is = streamingClient.executeStreamingQuery(databaseName, query, clientRequestProperties); - BufferedReader br = new BufferedReader(new InputStreamReader(is))) { + BufferedReader br = new BufferedReader(new InputStreamReader(is))) { StringBuilder streamedResult = new StringBuilder(); char[] buffer = new char[65536]; String streamedLine; diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java index 0f3ba4e0..210c018a 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java @@ -66,16 +66,19 @@ void IngestFromStream_CsvStream() throws Exception { InputStream inputStream = createStreamOfSize(1); StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); - ManagedStreamingIngestClient managedStreamingIngestClient = new ManagedStreamingIngestClient(resourceManagerMock, azureStorageClientMock, streamingClientMock); + ManagedStreamingIngestClient managedStreamingIngestClient = new ManagedStreamingIngestClient(resourceManagerMock, azureStorageClientMock, + streamingClientMock); // Expect to work and also choose no queuing - OperationStatus status = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection().get(0).status; + OperationStatus status = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection() + .get(0).status; assertEquals(OperationStatus.Succeeded, status); BooleanConsumer assertPolicyWorked = (boolean wasExpectedToUseQueuing) -> { try { inputStream.reset(); - IngestionStatus ingestionStatus = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection().get(0); + IngestionStatus ingestionStatus = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties) + .getIngestionStatusCollection().get(0); if (wasExpectedToUseQueuing) { assertEquals(OperationStatus.Queued, ingestionStatus.status); } else { diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java index 5954b891..e583ef92 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java @@ -25,4 +25,4 @@ void shouldUseQueuedIngestion() { boolean expectedResult = false; assertEquals(expectedResult, result); } -} \ No newline at end of file +} From 7c9dd44092c0ba12b7800af0ce2c76f7444f2f92 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Mon, 1 Jul 2024 16:10:28 +0300 Subject: [PATCH 07/20] fix test --- .../microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java index 12a8c4d8..28b275e1 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java @@ -3,6 +3,6 @@ public class ExceptionsUtils { // Useful in IOException, where message might not propagate to the base IOException public static String getMessageEx(Exception e) { - return e.getMessage() != null ? e.getCause().getMessage() : e.getMessage(); + return e.getCause() != null ? e.getCause().getMessage() : e.getMessage(); } } From 89bddb2303a02b0ee22a8879381d5e6dae6e7622 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Tue, 2 Jul 2024 13:42:34 +0300 Subject: [PATCH 08/20] print exception message on last retry --- .../data/auth/endpoints/WellKnownKustoEndpointsData.java | 3 +-- .../azure/kusto/data/exceptions/ExceptionsUtils.java | 2 +- .../microsoft/azure/kusto/ingest/ResourceAlgorithms.java | 9 ++++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java b/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java index 10bd2f78..cc8a6f05 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java @@ -44,8 +44,7 @@ private static WellKnownKustoEndpointsData readInstance() { return objectMapper.readValue(resourceAsStream, WellKnownKustoEndpointsData.class); } } catch (Exception ex) { - ex.printStackTrace(); + throw new RuntimeException("Failed to read WellKnownKustoEndpoints.json"); } - return null; } } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java index 28b275e1..fdba923d 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/ExceptionsUtils.java @@ -3,6 +3,6 @@ public class ExceptionsUtils { // Useful in IOException, where message might not propagate to the base IOException public static String getMessageEx(Exception e) { - return e.getCause() != null ? e.getCause().getMessage() : e.getMessage(); + return (e.getMessage() == null && e.getCause() != null) ? e.getCause().getMessage() : e.getMessage(); } } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java index c153976e..5189c473 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.microsoft.azure.kusto.data.Utils; +import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.data.instrumentation.FunctionOneException; import com.microsoft.azure.kusto.data.instrumentation.MonitoredActivity; import com.microsoft.azure.kusto.data.instrumentation.Tracer; @@ -38,7 +39,7 @@ private static , TOut> TOut res } List> totalAttributes = new ArrayList<>(); - + Exception ex = null; for (int i = 0; i < RETRY_COUNT; i++) { TWrapper resource = resources.get(i % resources.size()); try { @@ -62,11 +63,13 @@ private static , TOut> TOut res } }, actionName, attributes); } catch (Exception e) { + ex = e; log.warn(String.format("Error during retry %d of %d for %s", i + 1, RETRY_COUNT, actionName), e); } } - throw new IngestionClientException(String.format("%s: All %d retries failed - used resources: %s", actionName, RETRY_COUNT, - totalAttributes.stream().map(x -> String.format("%s (%s)", x.get("resource"), x.get("account"))).collect(Collectors.joining(", ")))); + throw new IngestionClientException(String.format("%s: All %d retries failed with last error: %s\n. Used resources: %s", actionName, RETRY_COUNT, + totalAttributes.stream().map(x -> String.format("%s (%s)", x.get("resource"), x.get("account"))).collect(Collectors.joining(", ")), + ExceptionsUtils.getMessageEx(ex))); } public static void postToQueueWithRetries(ResourceManager resourceManager, AzureStorageClient azureStorageClient, IngestionBlobInfo blob) From c50d56f8138bfddfb8bebc67e84da4bba878dee7 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Tue, 2 Jul 2024 13:57:51 +0300 Subject: [PATCH 09/20] used undeclared jackson dep --- .../com/microsoft/azure/kusto/data/IngestionSourceStorage.java | 1 - ingest/pom.xml | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/IngestionSourceStorage.java b/data/src/main/java/com/microsoft/azure/kusto/data/IngestionSourceStorage.java index 660d7698..b9523757 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/IngestionSourceStorage.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/IngestionSourceStorage.java @@ -2,7 +2,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.microsoft.azure.kusto.data.http.HttpPostUtils; class IngestionSourceStorage { public String sourceUri; diff --git a/ingest/pom.xml b/ingest/pom.xml index 23d70319..6d491ffa 100644 --- a/ingest/pom.xml +++ b/ingest/pom.xml @@ -107,6 +107,7 @@ com.microsoft.azure:msal4j:jar io.projectreactor:reactor-core:jar + com.fasterxml.jackson.core:jackson-core:jar true From 5a5ad90f820b3a4881a3ee2821e961f754d1b230 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Tue, 2 Jul 2024 13:58:16 +0300 Subject: [PATCH 10/20] format --- .../com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java index 5189c473..486e78ea 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ResourceAlgorithms.java @@ -39,7 +39,7 @@ private static , TOut> TOut res } List> totalAttributes = new ArrayList<>(); - Exception ex = null; + Exception ex = null; for (int i = 0; i < RETRY_COUNT; i++) { TWrapper resource = resources.get(i % resources.size()); try { From b1b2e0a2ec47d66316856b32318beda82f14d619 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Sun, 21 Jul 2024 15:20:21 +0300 Subject: [PATCH 11/20] add comment --- .../azure/kusto/ingest/ManagedStreamingQueuingPolicy.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java index c8f54fd2..c99427d2 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java @@ -4,8 +4,10 @@ public class ManagedStreamingQueuingPolicy { static final int MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES = 4 * 1024 * 1024; + // Regardless of the format, we don't want to stream more than 10mb static final int MAX_STREAMING_STREAM_SIZE_BYTES = 10 * 1024 * 1024; - static final int MAX_STREAMING_RAW_SIZE_BYTES = 7 * 1024 * 1024; + // Used against the users input of raw data size + static final int MAX_STREAMING_RAW_SIZE_BYTES = 6 * 1024 * 1024; static final double JSON_UNCOMPRESSED_FACTOR = 1.5d; static final int NON_BINARY_FACTOR = 2; static final double BINARY_COMPRESSED_FACTOR = 2d; @@ -18,7 +20,7 @@ public ManagedStreamingQueuingPolicy(ShouldUseQueueingPredicate defaultShouldUse // Return true if streaming ingestion should not be tried, according to stream size, compression and format private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { - // if size is given - use the 7mb limit. + // if size is given - use the 6mb limit. if (rawDataSize > 0) { return rawDataSize > MAX_STREAMING_RAW_SIZE_BYTES; } From f9df56d81e7040c87fe8802ace47035abbaf33b9 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Thu, 25 Jul 2024 15:41:01 +0300 Subject: [PATCH 12/20] good comments --- .../WellKnownKustoEndpointsData.java | 2 +- .../kusto/data/http/HttpClientFactory.java | 3 +- .../ingest/ManagedStreamingIngestClient.java | 63 +++++++++-------- .../kusto/ingest/utils/IngestionUtils.java | 11 ++- .../microsoft/azure/kusto/ingest/E2ETest.java | 2 +- ...=> ManagedStreamingQueuingPolicyTest.java} | 5 +- .../kusto/ingest/ManagedStreamingTest.java | 68 ++++++++++++++++--- 7 files changed, 110 insertions(+), 44 deletions(-) rename ingest/src/test/java/com/microsoft/azure/kusto/ingest/{tests.java => ManagedStreamingQueuingPolicyTest.java} (82%) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java b/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java index cc8a6f05..4a207f77 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/auth/endpoints/WellKnownKustoEndpointsData.java @@ -44,7 +44,7 @@ private static WellKnownKustoEndpointsData readInstance() { return objectMapper.readValue(resourceAsStream, WellKnownKustoEndpointsData.class); } } catch (Exception ex) { - throw new RuntimeException("Failed to read WellKnownKustoEndpoints.json"); + throw new RuntimeException("Failed to read WellKnownKustoEndpoints.json", ex); } } } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java index 820c4b30..dcf0c470 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java @@ -38,7 +38,8 @@ public static CloseableHttpClient create(HttpClientProperties providedProperties .setMaxConnTotal(properties.maxConnectionTotal()) .setMaxConnPerRoute(properties.maxConnectionRoute()) .evictExpiredConnections() - .evictIdleConnections(properties.maxIdleTime(), TimeUnit.SECONDS); + .evictIdleConnections(properties.maxIdleTime(), TimeUnit.SECONDS) + .disableRedirectHandling(); if (properties.isDisableRetries()) { httpClientBuilder.disableAutomaticRetries(); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index ef703e51..f40f4c3b 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -47,12 +47,12 @@ public class ManagedStreamingIngestClient extends IngestClientBase implements Qu private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final int ATTEMPT_COUNT = 3; public static final String CLASS_NAME = ManagedStreamingIngestClient.class.getSimpleName(); - final QueuedIngestClientImpl queuedIngestClient; + final QueuedIngestClient queuedIngestClient; final StreamingIngestClient streamingIngestClient; private final ExponentialRetry exponentialRetryTemplate; private CloseableHttpClient httpClient = null; private ManagedStreamingQueuingPolicy queuingPolicy = ManagedStreamingQueuingPolicy.Default; - + private static final String fallbackLogString ="Data size is greater than max streaming size according to the policy. Falling back to queued."; /** * @param dmConnectionString dm connection string * @return a new ManagedStreamingIngestClient @@ -261,6 +261,12 @@ public ManagedStreamingIngestClient(ResourceManager resourceManager, exponentialRetryTemplate = retryTemplate; } + ManagedStreamingIngestClient(StreamingIngestClient streamingIngestClient, QueuedIngestClient queuedIngestClient, ExponentialRetry exponentialRetry) { + this.streamingIngestClient = streamingIngestClient; + this.queuedIngestClient = queuedIngestClient; + exponentialRetryTemplate = exponentialRetry; + } + @Override protected IngestionResult ingestFromFileImpl(FileSourceInfo fileSourceInfo, IngestionProperties ingestionProperties) throws IngestionClientException, IngestionServiceException { @@ -311,7 +317,7 @@ protected IngestionResult ingestFromBlobImpl(BlobSourceInfo blobSourceInfo, Inge if (queuingPolicy.shouldUseQueuedIngestion(blobSize, blobSourceInfo.getRawSizeInBytes(), blobSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { - log.info("Blob size is greater than max streaming size ({} bytes). Falling back to queued.", blobSourceInfo.getRawSizeInBytes()); + log.info(fallbackLogString); return queuedIngestClient.ingestFromBlob(blobSourceInfo, ingestionProperties); } @@ -381,17 +387,6 @@ protected IngestionResult ingestFromResultSetImpl(ResultSetSourceInfo resultSetS } } - private IngestionResult sendStreamToQueuedIngestion(InputStream inputStream, StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties, - int size) throws IngestionClientException, IngestionServiceException { - log.info("Stream size is greater than max streaming size ({} bytes). Falling back to queued.", size); - StreamSourceInfo managedSourceInfo = new StreamSourceInfo( - inputStream, - streamSourceInfo.isLeaveOpen(), - streamSourceInfo.getSourceId(), - streamSourceInfo.getCompressionType()); - return queuedIngestClient.ingestFromStream(managedSourceInfo, ingestionProperties); - } - @Override protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo, IngestionProperties ingestionProperties) throws IngestionClientException, IngestionServiceException, IOException { @@ -412,34 +407,42 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo if (queuingPolicy.shouldUseQueuedIngestion(streamSourceInfo.getStream().available(), streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { - log.info("Stream size ({} bytes) is greater than max streaming size according to policy. Falling back to queued.", - streamSourceInfo.getRawSizeInBytes() > 0 ? streamSourceInfo.getRawSizeInBytes() : streamSourceInfo.getStream().available()); + log.info(fallbackLogString); return queuedIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties); } try { - if (streamSourceInfo.getStream() instanceof ByteArrayInputStream) { - // We can't rely on other InputStream implementations of available() + if (streamSourceInfo.getStream().available() > 0) { byteArrayStream = (ByteArrayInputStream) streamSourceInfo.getStream(); } else { + // If its not a ByteArrayInputStream: + // Read 10mb (max streaming size), decide with that if we should stream streamingBytes = IngestionUtils.readBytesFromInputStream(streamSourceInfo.getStream(), ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 1); byteArrayStream = new ByteArrayInputStream(streamingBytes); - // ByteArrayInputStream's close method is a no-op, so we don't need to close it. + int size = streamingBytes.length; + if (queuingPolicy.shouldUseQueuedIngestion(size, streamSourceInfo.getRawSizeInBytes(), + streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { + log.info(fallbackLogString); + StreamSourceInfo managedSourceInfo = new StreamSourceInfo(new SequenceInputStream(byteArrayStream, streamSourceInfo.getStream()), + streamSourceInfo.isLeaveOpen(), sourceId, streamSourceInfo.getCompressionType()); + + return queuedIngestClient.ingestFromStream(managedSourceInfo, ingestionProperties); + } + + if (!streamSourceInfo.isLeaveOpen()) { + // From this point we don't need the original stream anymore, we cached it + try { + streamSourceInfo.getStream().close(); + } catch (IOException e) { + log.warn("Failed to close stream", e); + } + } } } catch (IOException e) { throw new IngestionClientException("Failed to read from stream.", e); } - if (!streamSourceInfo.isLeaveOpen()) { - // From this point we don't need the original stream anymore, we cached it - try { - streamSourceInfo.getStream().close(); - } catch (IOException e) { - log.warn("Failed to close stream", e); - } - } - StreamSourceInfo managedSourceInfo = new StreamSourceInfo(byteArrayStream, true, sourceId, streamSourceInfo.getCompressionType()); try { IngestionResult result = streamWithRetries(managedSourceInfo, ingestionProperties, null); @@ -457,6 +460,10 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo } } + /* + Set the policy that handles the logic over which data size would the client choose to directly use queued ingestion + instead of trying streaming ingestion first. + */ public void setQueuingPolicy(ManagedStreamingQueuingPolicy queuingPolicy) { this.queuingPolicy = queuingPolicy; } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java index 0cece951..f25fd500 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java @@ -12,6 +12,7 @@ import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import java.io.*; import java.lang.invoke.MethodHandles; @@ -39,8 +40,14 @@ public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boole StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath)); try { - streamSourceInfo.setRawSizeInBytes( - fileSourceInfo.getRawSizeInBytes() > 0 ? fileSourceInfo.getRawSizeInBytes() : format.isCompressible() ? stream.available() : 0); + + if (fileSourceInfo.getRawSizeInBytes() > 0) { + streamSourceInfo.setRawSizeInBytes( + fileSourceInfo.getRawSizeInBytes()); + } else { + streamSourceInfo.setRawSizeInBytes( + format.isCompressible() ? stream.available() : 0); + } } catch (IOException e) { throw new IngestionClientException(ExceptionsUtils.getMessageEx(e), e); } diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java index 4086a51e..44fa8042 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java @@ -122,9 +122,9 @@ public static void setUp() { @AfterAll public static void tearDown() { try { + queryClient.executeToJsonResult(databaseName, String.format(".drop table %s ifexists", tableName)); ingestClient.close(); managedStreamingIngestClient.close(); - queryClient.executeToJsonResult(databaseName, String.format(".drop table %s ifexists", tableName)); } catch (Exception ex) { Assertions.fail("Failed to drop table", ex); } diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicyTest.java similarity index 82% rename from ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java rename to ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicyTest.java index e583ef92..60e6aa2d 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/tests.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicyTest.java @@ -1,9 +1,8 @@ package com.microsoft.azure.kusto.ingest; -import com.microsoft.azure.kusto.ingest.IngestionProperties; -import com.microsoft.azure.kusto.ingest.ManagedStreamingQueuingPolicy; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.*; + +import static org.junit.jupiter.api.Assertions.assertEquals; class ManagedStreamingQueuingPolicyTest { diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java index 210c018a..a3de5cde 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java @@ -1,35 +1,41 @@ package com.microsoft.azure.kusto.ingest; import com.azure.data.tables.models.TableEntity; +import com.microsoft.azure.kusto.data.ExponentialRetry; import com.microsoft.azure.kusto.data.StreamingClient; +import com.microsoft.azure.kusto.ingest.exceptions.IngestionClientException; +import com.microsoft.azure.kusto.ingest.exceptions.IngestionServiceException; import com.microsoft.azure.kusto.ingest.result.IngestionStatus; import com.microsoft.azure.kusto.ingest.result.OperationStatus; import com.microsoft.azure.kusto.ingest.source.StreamSourceInfo; import org.apache.commons.lang3.function.BooleanConsumer; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; -import javax.xml.crypto.Data; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.*; -import static org.mockito.Mockito.verify; public class ManagedStreamingTest { private static final ResourceManager resourceManagerMock = mock(ResourceManager.class); private static final AzureStorageClient azureStorageClientMock = mock(AzureStorageClient.class); public static final String ACCOUNT_NAME = "someaccount"; - private static QueuedIngestClient queuedIngestClient; + private static QueuedIngestClient queuedIngestClientMock; private static IngestionProperties ingestionProperties; private static StreamingClient streamingClientMock; + private static ManagedStreamingIngestClient managedStreamingIngestClient; + private static ManagedStreamingIngestClient managedStreamingIngestClientSpy; @BeforeAll static void setUp() throws Exception { @@ -51,23 +57,35 @@ static void setUp() throws Exception { isNull(), any(String.class), any(String.class), any(boolean.class))).thenReturn(null); ingestionProperties = new IngestionProperties("dbName", "tableName"); + managedStreamingIngestClient = new ManagedStreamingIngestClient(resourceManagerMock, azureStorageClientMock, + streamingClientMock); + queuedIngestClientMock = mock(QueuedIngestClientImpl.class); + managedStreamingIngestClientSpy = spy(new ManagedStreamingIngestClient(mock(StreamingIngestClient.class), queuedIngestClientMock, new ExponentialRetry(1))); } - static InputStream createStreamOfSize(int size) { + static ByteArrayInputStream createStreamOfSize(int size) throws UnsupportedEncodingException { char[] charArray = new char[size]; Arrays.fill(charArray, 'a'); String str = new String(charArray); - return new ByteArrayInputStream(StandardCharsets.UTF_8.encode(str).array()); + byte[] byteArray = str.getBytes("UTF-8"); + return new ByteArrayInputStream(byteArray); } + static int getStreamSize(InputStream inputStream) throws IOException { + int size = 0; + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = inputStream.read(buffer)) != -1) { + size += bytesRead; + } + return size; + } @Test void IngestFromStream_CsvStream() throws Exception { InputStream inputStream = createStreamOfSize(1); StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); - ManagedStreamingIngestClient managedStreamingIngestClient = new ManagedStreamingIngestClient(resourceManagerMock, azureStorageClientMock, - streamingClientMock); // Expect to work and also choose no queuing OperationStatus status = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection() @@ -142,4 +160,38 @@ void shouldUseQueueingPredicate_DefaultBehavior() { assertFalse(ManagedStreamingQueuingPolicy.Default.shouldUseQueuedIngestion(smallCompressed, 0, false, IngestionProperties.DataFormat.AVRO)); } + + @Test + void ManagedStreaming_BigFile_ShouldQueueTheFullStream() throws IOException, IngestionClientException, IngestionServiceException{ + EmptyAvailableByteArrayOutputStream inputStream = new EmptyAvailableByteArrayOutputStream(createStreamOfSize(ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 10)); + int size = inputStream.bb.available(); + StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); + ArgumentCaptor streamSourceInfoCaptor = ArgumentCaptor.forClass(StreamSourceInfo.class); + + managedStreamingIngestClientSpy.ingestFromStream(streamSourceInfo, ingestionProperties); + verify(queuedIngestClientMock, times(1)).ingestFromStream(streamSourceInfoCaptor.capture(), any()); + + StreamSourceInfo value = streamSourceInfoCaptor.getValue(); + int queuedStreamSize = getStreamSize(value.getStream()); + Assertions.assertEquals(queuedStreamSize, size); + } + + static class EmptyAvailableByteArrayOutputStream extends InputStream + { + private ByteArrayInputStream bb; + + EmptyAvailableByteArrayOutputStream(ByteArrayInputStream bb){ + this.bb = bb; + } + + @Override + public int read() { + return bb.read(); + } + + @Override + public synchronized int available() { + return 0; + } + } } From 3c37d43356c32394f9303a7513dc3844689e4f81 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Sun, 28 Jul 2024 14:48:33 +0300 Subject: [PATCH 13/20] better --- .../azure/kusto/ingest/ManagedStreamingIngestClient.java | 8 ++++---- .../azure/kusto/ingest/source/FileSourceInfo.java | 6 +++++- .../azure/kusto/ingest/utils/IngestionUtils.java | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index f40f4c3b..236c09f0 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -403,7 +403,7 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo streamSourceInfo.setSourceId(sourceId); byte[] streamingBytes; - ByteArrayInputStream byteArrayStream; + InputStream byteArrayStream; if (queuingPolicy.shouldUseQueuedIngestion(streamSourceInfo.getStream().available(), streamSourceInfo.getRawSizeInBytes(), streamSourceInfo.getCompressionType() != null, ingestionProperties.getDataFormat())) { @@ -412,8 +412,8 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo } try { - if (streamSourceInfo.getStream().available() > 0) { - byteArrayStream = (ByteArrayInputStream) streamSourceInfo.getStream(); + if (streamSourceInfo.getStream() instanceof ByteArrayInputStream || streamSourceInfo.getStream() instanceof ResettableFileInputStream) { + byteArrayStream = streamSourceInfo.getStream(); } else { // If its not a ByteArrayInputStream: // Read 10mb (max streaming size), decide with that if we should stream @@ -443,7 +443,7 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo throw new IngestionClientException("Failed to read from stream.", e); } - StreamSourceInfo managedSourceInfo = new StreamSourceInfo(byteArrayStream, true, sourceId, streamSourceInfo.getCompressionType()); + StreamSourceInfo managedSourceInfo = new StreamSourceInfo(byteArrayStream, true, sourceId, streamSourceInfo.getCompressionType(), streamSourceInfo.getRawSizeInBytes()); try { IngestionResult result = streamWithRetries(managedSourceInfo, ingestionProperties, null); if (result != null) { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java index 4ecde1d9..6ca4021f 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/FileSourceInfo.java @@ -19,9 +19,13 @@ public void setFilePath(String filePath) { this.filePath = filePath; } + public FileSourceInfo(String filePath) { + this.filePath = filePath; + } + // An estimation of the raw (uncompressed, un-indexed) size of the data, for binary formatted files - use only if known public FileSourceInfo(String filePath, long rawSizeInBytes) { - this.filePath = filePath; + this(filePath); this.setRawSizeInBytes(rawSizeInBytes); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java index f25fd500..eef1f28c 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java @@ -38,15 +38,17 @@ public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boole stream = new ResettableFileInputStream((FileInputStream) stream); } - StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), getCompression(filePath)); + CompressionType compression = getCompression(filePath); + StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream, false, fileSourceInfo.getSourceId(), compression); try { if (fileSourceInfo.getRawSizeInBytes() > 0) { streamSourceInfo.setRawSizeInBytes( fileSourceInfo.getRawSizeInBytes()); } else { + // Raw streamSourceInfo.setRawSizeInBytes( - format.isCompressible() ? stream.available() : 0); + (compression != null && format.isCompressible()) ? stream.available() : 0); } } catch (IOException e) { throw new IngestionClientException(ExceptionsUtils.getMessageEx(e), e); From b6f60ccd338d00f56a59d8578ba9b2fcf78ac9e3 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Mon, 29 Jul 2024 14:14:34 +0300 Subject: [PATCH 14/20] changelog --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f5a8efe..b0c602a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [5.1.1] - 2024-07-25 +### Added +- A new heuristic for choosing between queuing and streaming in Managed streaming client, the default behavior is using + an estimation against the 4mb limit after dividing or multiplying by some factor described by the consts: + + This will also allow users to stream bigger than 4mb non-compressed data + - disableRetries option to client options - default is true only for streaming clients as if stream is not repeatable it + will simply fail - better to let the user deal with it (alternative considered was to always create repeatable streams + like in managed streaming, but this PR also tries to avoid that) +### Fixed +- Some better error messages ## [5.1.1] - 2024-07-25 ### Fixed - Fix population of application and client version for tracing From 4d3ae60e28a43d558a9970b9b72d5052b913b2a0 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Mon, 29 Jul 2024 14:15:29 +0300 Subject: [PATCH 15/20] format --- .../ingest/ManagedStreamingIngestClient.java | 12 +++++++----- .../kusto/ingest/utils/IngestionUtils.java | 2 +- .../kusto/ingest/ManagedStreamingTest.java | 17 +++++++++-------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 236c09f0..35fc8268 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -52,7 +52,8 @@ public class ManagedStreamingIngestClient extends IngestClientBase implements Qu private final ExponentialRetry exponentialRetryTemplate; private CloseableHttpClient httpClient = null; private ManagedStreamingQueuingPolicy queuingPolicy = ManagedStreamingQueuingPolicy.Default; - private static final String fallbackLogString ="Data size is greater than max streaming size according to the policy. Falling back to queued."; + private static final String fallbackLogString = "Data size is greater than max streaming size according to the policy. Falling back to queued."; + /** * @param dmConnectionString dm connection string * @return a new ManagedStreamingIngestClient @@ -262,7 +263,7 @@ public ManagedStreamingIngestClient(ResourceManager resourceManager, } ManagedStreamingIngestClient(StreamingIngestClient streamingIngestClient, QueuedIngestClient queuedIngestClient, ExponentialRetry exponentialRetry) { - this.streamingIngestClient = streamingIngestClient; + this.streamingIngestClient = streamingIngestClient; this.queuedIngestClient = queuedIngestClient; exponentialRetryTemplate = exponentialRetry; } @@ -443,7 +444,8 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo throw new IngestionClientException("Failed to read from stream.", e); } - StreamSourceInfo managedSourceInfo = new StreamSourceInfo(byteArrayStream, true, sourceId, streamSourceInfo.getCompressionType(), streamSourceInfo.getRawSizeInBytes()); + StreamSourceInfo managedSourceInfo = new StreamSourceInfo(byteArrayStream, true, sourceId, streamSourceInfo.getCompressionType(), + streamSourceInfo.getRawSizeInBytes()); try { IngestionResult result = streamWithRetries(managedSourceInfo, ingestionProperties, null); if (result != null) { @@ -461,8 +463,8 @@ protected IngestionResult ingestFromStreamImpl(StreamSourceInfo streamSourceInfo } /* - Set the policy that handles the logic over which data size would the client choose to directly use queued ingestion - instead of trying streaming ingestion first. + * Set the policy that handles the logic over which data size would the client choose to directly use queued ingestion instead of trying streaming ingestion + * first. */ public void setQueuingPolicy(ManagedStreamingQueuingPolicy queuingPolicy) { this.queuingPolicy = queuingPolicy; diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java index eef1f28c..b8c45f9d 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/IngestionUtils.java @@ -48,7 +48,7 @@ public static StreamSourceInfo fileToStream(FileSourceInfo fileSourceInfo, boole } else { // Raw streamSourceInfo.setRawSizeInBytes( - (compression != null && format.isCompressible()) ? stream.available() : 0); + (compression != null && format.isCompressible()) ? stream.available() : 0); } } catch (IOException e) { throw new IngestionClientException(ExceptionsUtils.getMessageEx(e), e); diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java index a3de5cde..13a1e50f 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/ManagedStreamingTest.java @@ -60,7 +60,8 @@ static void setUp() throws Exception { managedStreamingIngestClient = new ManagedStreamingIngestClient(resourceManagerMock, azureStorageClientMock, streamingClientMock); queuedIngestClientMock = mock(QueuedIngestClientImpl.class); - managedStreamingIngestClientSpy = spy(new ManagedStreamingIngestClient(mock(StreamingIngestClient.class), queuedIngestClientMock, new ExponentialRetry(1))); + managedStreamingIngestClientSpy = spy( + new ManagedStreamingIngestClient(mock(StreamingIngestClient.class), queuedIngestClientMock, new ExponentialRetry(1))); } static ByteArrayInputStream createStreamOfSize(int size) throws UnsupportedEncodingException { @@ -80,13 +81,13 @@ static int getStreamSize(InputStream inputStream) throws IOException { } return size; } + @Test void IngestFromStream_CsvStream() throws Exception { InputStream inputStream = createStreamOfSize(1); StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); - // Expect to work and also choose no queuing OperationStatus status = managedStreamingIngestClient.ingestFromStream(streamSourceInfo, ingestionProperties).getIngestionStatusCollection() .get(0).status; @@ -162,9 +163,10 @@ void shouldUseQueueingPredicate_DefaultBehavior() { } @Test - void ManagedStreaming_BigFile_ShouldQueueTheFullStream() throws IOException, IngestionClientException, IngestionServiceException{ - EmptyAvailableByteArrayOutputStream inputStream = new EmptyAvailableByteArrayOutputStream(createStreamOfSize(ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 10)); - int size = inputStream.bb.available(); + void ManagedStreaming_BigFile_ShouldQueueTheFullStream() throws IOException, IngestionClientException, IngestionServiceException { + EmptyAvailableByteArrayOutputStream inputStream = new EmptyAvailableByteArrayOutputStream( + createStreamOfSize(ManagedStreamingQueuingPolicy.MAX_STREAMING_STREAM_SIZE_BYTES + 10)); + int size = inputStream.bb.available(); StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); ArgumentCaptor streamSourceInfoCaptor = ArgumentCaptor.forClass(StreamSourceInfo.class); @@ -176,11 +178,10 @@ void ManagedStreaming_BigFile_ShouldQueueTheFullStream() throws IOException, Ing Assertions.assertEquals(queuedStreamSize, size); } - static class EmptyAvailableByteArrayOutputStream extends InputStream - { + static class EmptyAvailableByteArrayOutputStream extends InputStream { private ByteArrayInputStream bb; - EmptyAvailableByteArrayOutputStream(ByteArrayInputStream bb){ + EmptyAvailableByteArrayOutputStream(ByteArrayInputStream bb) { this.bb = bb; } From a2c4a763084803345a29eefe8d21e27c7c316021 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Tue, 30 Jul 2024 14:05:47 +0300 Subject: [PATCH 16/20] comments - E2E --- .../ingest/ManagedStreamingQueuingPolicy.java | 18 +++++++----------- .../utils/ShouldUseQueueingPredicate.java | 8 -------- 2 files changed, 7 insertions(+), 19 deletions(-) delete mode 100644 ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java index c99427d2..51a3d7eb 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingQueuingPolicy.java @@ -1,8 +1,10 @@ package com.microsoft.azure.kusto.ingest; -import com.microsoft.azure.kusto.ingest.utils.ShouldUseQueueingPredicate; +interface ManagedStreamingQueuingPolicyPredicator { + boolean shouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat); +} -public class ManagedStreamingQueuingPolicy { +public class ManagedStreamingQueuingPolicy implements ManagedStreamingQueuingPolicyPredicator { static final int MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES = 4 * 1024 * 1024; // Regardless of the format, we don't want to stream more than 10mb static final int MAX_STREAMING_STREAM_SIZE_BYTES = 10 * 1024 * 1024; @@ -12,14 +14,12 @@ public class ManagedStreamingQueuingPolicy { static final int NON_BINARY_FACTOR = 2; static final double BINARY_COMPRESSED_FACTOR = 2d; static final double BINARY_UNCOMPRESSED_FACTOR = 1.5d; - final ShouldUseQueueingPredicate predicate; - public ManagedStreamingQueuingPolicy(ShouldUseQueueingPredicate defaultShouldUseQueuedIngestion) { - predicate = defaultShouldUseQueuedIngestion; + public ManagedStreamingQueuingPolicy() { } // Return true if streaming ingestion should not be tried, according to stream size, compression and format - private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { + public boolean shouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { // if size is given - use the 6mb limit. if (rawDataSize > 0) { return rawDataSize > MAX_STREAMING_RAW_SIZE_BYTES; @@ -58,9 +58,5 @@ private static boolean defaultShouldUseQueuedIngestion(long dataSize, long rawDa return (dataSize / NON_BINARY_FACTOR) > MAX_STREAMING_UNCOMPRESSED_RAW_SIZE_BYTES; } - static ManagedStreamingQueuingPolicy Default = new ManagedStreamingQueuingPolicy(ManagedStreamingQueuingPolicy::defaultShouldUseQueuedIngestion); - - public boolean shouldUseQueuedIngestion(long dataSize, long rawDataSize, boolean compressed, IngestionProperties.DataFormat dataFormat) { - return predicate.apply(dataSize, rawDataSize, compressed, dataFormat); - } + static ManagedStreamingQueuingPolicy Default = new ManagedStreamingQueuingPolicy(); } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java deleted file mode 100644 index 3f525e2b..00000000 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/utils/ShouldUseQueueingPredicate.java +++ /dev/null @@ -1,8 +0,0 @@ -package com.microsoft.azure.kusto.ingest.utils; - -import com.microsoft.azure.kusto.ingest.IngestionProperties; - -@FunctionalInterface -public interface ShouldUseQueueingPredicate { - Boolean apply(long size, long rawDataSize, boolean compressed, IngestionProperties.DataFormat format); -} From 2b8a0cfbfb5e582df9284ac20cc24314d5dd7f26 Mon Sep 17 00:00:00 2001 From: Ohad Bitton Date: Tue, 30 Jul 2024 15:08:40 +0300 Subject: [PATCH 17/20] changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0c602a1..af1a65d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [5.1.1] - 2024-07-25 + +## Unknown ### Added - A new heuristic for choosing between queuing and streaming in Managed streaming client, the default behavior is using an estimation against the 4mb limit after dividing or multiplying by some factor described by the consts: @@ -15,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 like in managed streaming, but this PR also tries to avoid that) ### Fixed - Some better error messages +- ## [5.1.1] - 2024-07-25 ### Fixed - Fix population of application and client version for tracing From 81052076c109810ed77a46b864b005ee9a3b1ef7 Mon Sep 17 00:00:00 2001 From: ohbitton Date: Tue, 8 Oct 2024 15:47:09 +0300 Subject: [PATCH 18/20] comments --- CHANGELOG.md | 5 +---- .../azure/kusto/data/ClientFactory.java | 2 +- .../kusto/data/HttpClientProperties.java | 17 ---------------- .../kusto/data/http/HttpClientFactory.java | 4 ---- .../ingest/ManagedStreamingIngestClient.java | 3 ++- .../azure/kusto/ingest/source/SourceInfo.java | 2 ++ .../microsoft/azure/kusto/ingest/E2ETest.java | 20 +++++++++++++++++-- samples/src/main/java/Query.java | 17 +++++----------- 8 files changed, 29 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c320e3f5..554a6007 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,15 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unknown +## [Unreleased] ### Added - A new heuristic for choosing between queuing and streaming in Managed streaming client, the default behavior is using an estimation against the 4mb limit after dividing or multiplying by some factor described by the consts: This will also allow users to stream bigger than 4mb non-compressed data - - disableRetries option to client options - default is true only for streaming clients as if stream is not repeatable it - will simply fail - better to let the user deal with it (alternative considered was to always create repeatable streams - like in managed streaming, but this PR also tries to avoid that) ### Fixed - Some better error messages diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java index f71cf7e1..e1e9c71c 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java @@ -76,7 +76,7 @@ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb) */ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb, HttpClientProperties properties) throws URISyntaxException { HttpClientProperties httpClientProperties = Optional.ofNullable(properties) - .orElse(HttpClientProperties.builder().disableRetries().build()); + .orElse(HttpClientProperties.builder().build()); return new ClientImpl(csb, HttpClientFactory.create(httpClientProperties), false); } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java b/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java index 76aba222..cce1d28b 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/HttpClientProperties.java @@ -16,7 +16,6 @@ public class HttpClientProperties { private final HttpHost proxy; private final HttpRoutePlanner routePlanner; private final String[] supportedProtocols; - private final boolean disableRetries; private HttpClientProperties(HttpClientPropertiesBuilder builder) { this.maxIdleTime = builder.maxIdleTime; @@ -27,7 +26,6 @@ private HttpClientProperties(HttpClientPropertiesBuilder builder) { this.proxy = builder.proxy; this.routePlanner = builder.routePlanner; this.supportedProtocols = builder.supportedProtocols; - this.disableRetries = builder.disableRetries; } /** @@ -113,10 +111,6 @@ public String[] supportedProtocols() { return supportedProtocols; } - public boolean isDisableRetries() { - return disableRetries; - } - public static class HttpClientPropertiesBuilder { private Integer maxIdleTime = 120; @@ -127,7 +121,6 @@ public static class HttpClientPropertiesBuilder { private HttpHost proxy = null; private HttpRoutePlanner routePlanner = null; private String[] supportedProtocols = null; - private boolean disableRetries; private HttpClientPropertiesBuilder() { } @@ -233,16 +226,6 @@ public HttpClientPropertiesBuilder supportedProtocols(String[] tlsProtocols) { return this; } - /** - * Disable all http client internal retries. - * - * @return the builder instance - */ - public HttpClientPropertiesBuilder disableRetries() { - this.disableRetries = true; - return this; - } - public HttpClientProperties build() { return new HttpClientProperties(this); } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java index 208244c6..3c77a507 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java @@ -44,9 +44,6 @@ public static CloseableHttpClient create(HttpClientProperties providedProperties .evictIdleConnections(properties.maxIdleTime(), TimeUnit.SECONDS) .disableRedirectHandling(); - if (properties.isDisableRetries()) { - httpClientBuilder.disableAutomaticRetries(); - } if (properties.isKeepAlive()) { final ConnectionKeepAliveStrategy keepAliveStrategy = new CustomConnectionKeepAliveStrategy(properties.maxKeepAliveTime()); httpClientBuilder.setKeepAliveStrategy(keepAliveStrategy); @@ -107,5 +104,4 @@ public long getKeepAliveDuration(HttpResponse httpResponse, HttpContext httpCont return defaultKeepAlive * 1000L; } } - } diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 35fc8268..12d23cdd 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -39,7 +39,8 @@ * It tries {@value ATTEMPT_COUNT} times using the streaming client, after which it falls back to the queued streaming client in case of failure. * By default the policy for choosing a queued ingestion on the first try is the checking of weather the size of the estimated * raw stream size (a conversion to compressed CSV) is bigger than 4MB, it will fall back to the queued streaming client. - * Use SourceInfo.size to override size estimations, alternatively - use setQueuingPolicy to override the predicate heuristics. + * Use {@link #setQueuingPolicy(ManagedStreamingQueuingPolicy)} to override the predicate heuristics. + * Use SourceInfo.setRawSizeInBytes to set the raw size of the data. *

*/ public class ManagedStreamingIngestClient extends IngestClientBase implements QueuedIngestClient { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/SourceInfo.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/SourceInfo.java index 5aa91a97..930003c8 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/SourceInfo.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/source/SourceInfo.java @@ -14,4 +14,6 @@ public interface SourceInfo { UUID getSourceId(); void setSourceId(UUID sourceId); + + long getRawSizeInBytes(); } diff --git a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java index 7d51c9a5..7c2bf776 100644 --- a/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java +++ b/ingest/src/test/java/com/microsoft/azure/kusto/ingest/E2ETest.java @@ -358,8 +358,24 @@ void testIngestFromFileWithTable() { @ValueSource(booleans = {true, false}) void testIngestFromStream(boolean isManaged) throws IOException { for (TestDataItem item : dataForTests) { - InputStream stream = Files.newInputStream(item.file.toPath()); - StreamSourceInfo streamSourceInfo = new StreamSourceInfo(stream); + InputStream inputStream = Files.newInputStream(item.file.toPath()); + int nRead; + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + + byte[] data = new byte[48000]; + + while ((nRead = inputStream.read(data, 0, data.length)) != -1) { + buffer.write(data, 0, nRead); + } + + buffer.flush(); + byte[] byteArray = buffer.toByteArray(); + + // Use the byte array as needed + + inputStream.close(); + inputStream = new ByteArrayInputStream(byteArray); + StreamSourceInfo streamSourceInfo = new StreamSourceInfo(inputStream); if (item.file.getPath().endsWith(".gz")) { streamSourceInfo.setCompressionType(CompressionType.gz); } diff --git a/samples/src/main/java/Query.java b/samples/src/main/java/Query.java index b9c7553d..77864178 100644 --- a/samples/src/main/java/Query.java +++ b/samples/src/main/java/Query.java @@ -13,19 +13,13 @@ import java.util.concurrent.TimeUnit; public class Query { - static void printVersion( Class clazz) { - Package p = clazz.getPackage(); - System.out.printf("%s%n Title: %s%n Version: %s%n Vendor: %s%n", - clazz.getName(), - p.getImplementationTitle(), - p.getImplementationVersion(), - p.getImplementationVendor()); - } public static void main(String[] args) { - printVersion(com.microsoft.aad.msal4j.IAccount.class); try { - ConnectionStringBuilder csb = ConnectionStringBuilder.createWithUserPrompt( - System.getenv("ENGINE_CONNECTION_STRING")); + ConnectionStringBuilder csb = ConnectionStringBuilder.createWithAadApplicationCredentials( + System.getProperty("clusterPath"), + System.getProperty("appId"), + System.getProperty("appKey"), + System.getProperty("appTenant")); HttpClientProperties properties = HttpClientProperties.builder() .keepAlive(true) @@ -37,7 +31,6 @@ public static void main(String[] args) { Client client = ClientFactory.createClient(csb, properties); KustoOperationResult results = client.execute(".show version"); - KustoOperationResult results2 = client.execute(".show version"); KustoResultSetTable mainTableResult = results.getPrimaryResults(); System.out.printf("Kusto sent back %s rows.%n", mainTableResult.count()); From 39d1558fcb89f338557aba6819877a9f5322267d Mon Sep 17 00:00:00 2001 From: ohbitton Date: Sun, 27 Oct 2024 12:32:46 +0200 Subject: [PATCH 19/20] comment --- .../com/microsoft/azure/kusto/data/ClientFactory.java | 8 +++++--- .../azure/kusto/data/http/HttpClientFactory.java | 10 ++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java index e1e9c71c..0f874541 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java @@ -75,9 +75,11 @@ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb) * @throws URISyntaxException if the cluster URL is invalid */ public static StreamingClient createStreamingClient(ConnectionStringBuilder csb, HttpClientProperties properties) throws URISyntaxException { - HttpClientProperties httpClientProperties = Optional.ofNullable(properties) - .orElse(HttpClientProperties.builder().build()); - return new ClientImpl(csb, HttpClientFactory.create(httpClientProperties), false); + if (properties == null) { + HttpClientProperties.builder().build(); + } + + return new ClientImpl(csb, HttpClientFactory.create(properties), false); } /** diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java index 3c77a507..baad008e 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/http/HttpClientFactory.java @@ -29,13 +29,15 @@ public class HttpClientFactory { /** * Creates a new Apache HTTP client. * - * @param providedProperties custom HTTP client properties + * @param properties custom HTTP client properties * @return a new Apache HTTP client */ - public static CloseableHttpClient create(HttpClientProperties providedProperties) { + public static CloseableHttpClient create(HttpClientProperties properties) { LOGGER.info("Creating new CloseableHttpClient client"); - final HttpClientProperties properties = Optional.ofNullable(providedProperties) - .orElse(HttpClientProperties.builder().build()); + if (properties == null) { + properties = HttpClientProperties.builder().build(); + } + final HttpClientBuilder httpClientBuilder = HttpClientBuilder.create() .useSystemProperties() .setMaxConnTotal(properties.maxConnectionTotal()) From 290fae3701736918d8b3c90dc8482c9a05ebb9a6 Mon Sep 17 00:00:00 2001 From: ohbitton Date: Sun, 27 Oct 2024 16:05:03 +0200 Subject: [PATCH 20/20] fix --- .../java/com/microsoft/azure/kusto/data/BaseClient.java | 6 +----- .../com/microsoft/azure/kusto/data/ClientFactory.java | 3 --- .../azure/kusto/data/ClientRequestProperties.java | 2 +- .../azure/kusto/data/exceptions/DataWebException.java | 3 ++- .../kusto/data/instrumentation/MonitoredActivity.java | 1 - .../azure/kusto/ingest/ManagedStreamingIngestClient.java | 8 +++++--- .../azure/kusto/ingest/StreamingIngestClient.java | 1 - 7 files changed, 9 insertions(+), 15 deletions(-) diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/BaseClient.java b/data/src/main/java/com/microsoft/azure/kusto/data/BaseClient.java index 27bb149e..9b264ece 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/BaseClient.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/BaseClient.java @@ -55,10 +55,6 @@ private String processResponseBody(HttpResponse response) throws DataServiceExce } } - protected InputStream postToStreamingOutput(HttpRequest request) throws DataServiceException { - return postToStreamingOutput(request, 0); - } - // Todo: Implement async version of this method protected InputStream postToStreamingOutput(HttpRequest request, int redirectCount) throws DataServiceException { @@ -143,7 +139,7 @@ public static DataServiceException createExceptionFromResponse(String url, HttpR } private static void closeResourcesIfNeeded(boolean returnInputStream, HttpResponse httpResponse) { - // If we close the resources after returning the InputStream to the user, he won't be able to read from it - used in streaming query + // If we close the resources after returning the InputStream to the user, he won't be able to read from it - used in streaming query if (!returnInputStream) { if (httpResponse != null) { httpResponse.close(); diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java index a44b87a6..b2377507 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientFactory.java @@ -6,11 +6,8 @@ import com.azure.core.http.HttpClient; import com.microsoft.azure.kusto.data.auth.ConnectionStringBuilder; import com.microsoft.azure.kusto.data.http.HttpClientProperties; -import com.microsoft.azure.kusto.data.http.HttpClientFactory; -import org.apache.http.impl.client.CloseableHttpClient; import java.net.URISyntaxException; -import java.util.Optional; public class ClientFactory { diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/ClientRequestProperties.java b/data/src/main/java/com/microsoft/azure/kusto/data/ClientRequestProperties.java index 7032f1ec..cd322653 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/ClientRequestProperties.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/ClientRequestProperties.java @@ -40,7 +40,7 @@ public class ClientRequestProperties implements Serializable, TraceableAttributes { public static final String OPTION_SERVER_TIMEOUT = "servertimeout"; - // If set and positive, indicates the maximum number of HTTP redirects that the client will process. [Integer] + // If set and positive, indicates the maximum number of HTTP redirects that the client will process. [Integer] public static final String OPTION_CLIENT_MAX_REDIRECT_COUNT = "client_max_redirect_count"; /* * Matches valid Kusto Timespans: Optionally negative, optional number of days followed by a period, optionally up to 24 as hours followed by a colon, diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/DataWebException.java b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/DataWebException.java index d29eedf9..0e42409c 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/DataWebException.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/exceptions/DataWebException.java @@ -35,9 +35,10 @@ public OneApiError getApiError() { try { apiError = OneApiError.fromJsonObject(objectMapper.readTree(getMessage()).get("error")); } catch (JsonProcessingException e) { - log.error("failed to parse error from message {} {} ", e.getMessage(), e); + log.error(String.format("failed to parse error from message: '%s' ", e.getMessage()), e); } } + return apiError; } } diff --git a/data/src/main/java/com/microsoft/azure/kusto/data/instrumentation/MonitoredActivity.java b/data/src/main/java/com/microsoft/azure/kusto/data/instrumentation/MonitoredActivity.java index 3be4f55c..43ad5cd4 100644 --- a/data/src/main/java/com/microsoft/azure/kusto/data/instrumentation/MonitoredActivity.java +++ b/data/src/main/java/com/microsoft/azure/kusto/data/instrumentation/MonitoredActivity.java @@ -2,7 +2,6 @@ import java.util.HashMap; import java.util.Map; -import java.util.concurrent.Callable; public class MonitoredActivity { public static void invoke(Runnable runnable, String nameOfSpan) { diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java index 50244975..af37c23c 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/ManagedStreamingIngestClient.java @@ -36,8 +36,10 @@ * Since the streaming client communicates directly with the engine, it's more prone to failure, so this class * holds both a streaming client and a queued client. * It tries {@value ATTEMPT_COUNT} times using the streaming client, after which it falls back to the queued streaming client in case of failure. - * If the size of the stream is bigger than {@value MAX_STREAMING_SIZE_BYTES}, it will fall back to the queued streaming client. - *

+ * By default the policy for choosing a queued ingestion on the first try is the checking of weather the size of the estimated + * raw stream size (a conversion to compressed CSV) is bigger than 4MB, it will fall back to the queued streaming client. + * Use {@link #setQueuingPolicy(ManagedStreamingQueuingPolicy)} to override the predicate heuristics. + * Use SourceInfo.setRawSizeInBytes to set the raw size of the data. *

*/ public class ManagedStreamingIngestClient extends IngestClientBase implements QueuedIngestClient { @@ -297,7 +299,7 @@ protected IngestionResult ingestFromBlobImpl(BlobSourceInfo blobSourceInfo, Inge BlobClientBuilder blobClientBuilder = new BlobClientBuilder().endpoint(blobSourceInfo.getBlobPath()); if (httpClient != null) { - blobClientBuilder.httpClient((HttpClient) httpClient); + blobClientBuilder.httpClient(httpClient); } BlobClient blobClient = blobClientBuilder.buildClient(); diff --git a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java index 5d92c35d..816c49cb 100644 --- a/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java +++ b/ingest/src/main/java/com/microsoft/azure/kusto/ingest/StreamingIngestClient.java @@ -11,7 +11,6 @@ import com.microsoft.azure.kusto.data.auth.ConnectionStringBuilder; import com.microsoft.azure.kusto.data.exceptions.DataClientException; import com.microsoft.azure.kusto.data.exceptions.DataServiceException; -import com.microsoft.azure.kusto.data.HttpClientProperties; import com.microsoft.azure.kusto.data.exceptions.ExceptionsUtils; import com.microsoft.azure.kusto.data.http.HttpClientProperties; import com.microsoft.azure.kusto.data.instrumentation.MonitoredActivity;