diff --git a/FIPS/scripts/check_content.sh b/FIPS/scripts/check_content.sh index 8b818b1b4..565159073 100755 --- a/FIPS/scripts/check_content.sh +++ b/FIPS/scripts/check_content.sh @@ -1,12 +1,12 @@ #!/bin/bash -e -# scripts used to check if all dependency is shaded into snowflake internal path +# scripts used to check if all dependencies are shaded into snowflake internal path set -o pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" -if jar tvf $DIR/../target/snowflake-jdbc-fips.jar | awk '{print $8}' | grep -v -E "^(net|com)/snowflake" | grep -v -E "(com|net)/\$" | grep -v -E "^META-INF" | grep -v -E "^mozilla" | grep -v -E "^com/sun/jna" | grep -v com/sun/ | grep -v mime.types; then +if jar tvf $DIR/../target/snowflake-jdbc-fips.jar | awk '{print $8}' | grep -v -E "^(net|com)/snowflake" | grep -v -E "(com|net)/\$" | grep -v -E "^META-INF" | grep -v -E "^mozilla" | grep -v -E "^com/sun/jna" | grep -v com/sun/ | grep -v mime.types | grep -v -E "^com/github/" | grep -v -E "^aix/" | grep -v -E "^darwin/" | grep -v -E "^freebsd/" | grep -v -E "^linux/" | grep -v -E "^win/"; then echo "[ERROR] JDBC jar includes class not under the snowflake namespace" exit 1 fi diff --git a/ci/scripts/check_content.sh b/ci/scripts/check_content.sh index a9c0768b6..3a0747be2 100755 --- a/ci/scripts/check_content.sh +++ b/ci/scripts/check_content.sh @@ -8,12 +8,12 @@ set -o pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" -if jar tvf $DIR/../../target/snowflake-jdbc${package_modifier}.jar | awk '{print $8}' | grep -v -E "^(net|com)/snowflake" | grep -v -E "(com|net)/\$" | grep -v -E "^META-INF" | grep -v -E "^mozilla" | grep -v -E "^com/sun/jna" | grep -v com/sun/ | grep -v mime.types; then +if jar tvf $DIR/../../target/snowflake-jdbc${package_modifier}.jar | awk '{print $8}' | grep -v -E "^(net|com)/snowflake" | grep -v -E "(com|net)/\$" | grep -v -E "^META-INF" | grep -v -E "^mozilla" | grep -v -E "^com/sun/jna" | grep -v com/sun/ | grep -v mime.types | grep -v -E "^com/github/" | grep -v -E "^aix/" | grep -v -E "^darwin/" | grep -v -E "^freebsd/" | grep -v -E "^linux/" | grep -v -E "^win/"; then echo "[ERROR] JDBC jar includes class not under the snowflake namespace" exit 1 fi -if jar tvf $DIR/../../target/snowflake-jdbc${package_modifier}.jar | awk '{print $8}' | grep -E "^META-INF/versions/.*.class" | grep -v -E "^META-INF/versions/.*/(net|com)/snowflake"; then - echo "[ERROR] JDBC jar includes multi release classes not under the snowflake namespace" +if jar tvf $DIR/../../target/snowflake-jdbc${package_modifier}.jar | awk '{print $8}' | grep -E "^META-INF/versions/.*.class" | grep -v -E "^META-INF/versions/.*/(net|com)/snowflake" | grep -v -E "^META-INF/versions/.*/com/github" | grep -v -E "^aix/" | grep -v -E "^darwin/" | grep -v -E "^freebsd/" | grep -v -E "^linux/" | grep -v -E "^win/"; then + echo "[ERROR] JDBC jar includes multi-release classes not under the snowflake namespace" exit 1 fi diff --git a/linkage-checker-exclusion-rules.xml b/linkage-checker-exclusion-rules.xml index 65affa44a..64b5860c2 100644 --- a/linkage-checker-exclusion-rules.xml +++ b/linkage-checker-exclusion-rules.xml @@ -19,11 +19,6 @@ Optional - - - - Optional - diff --git a/parent-pom.xml b/parent-pom.xml index b8f46b00f..855d171db 100644 --- a/parent-pom.xml +++ b/parent-pom.xml @@ -19,6 +19,7 @@ 1.10.0 4.5.14 4.4.16 + 1.5.6-5 17.0.0 9.3 1.8.1 @@ -327,6 +328,11 @@ httpcore ${apache.httpcore.version} + + com.github.luben + zstd-jni + ${zstd-jni.version} + org.apache.tika tika-core @@ -650,6 +656,10 @@ org.apache.httpcomponents httpcore + + com.github.luben + zstd-jni + org.apache.tika tika-core diff --git a/src/main/java/net/snowflake/client/jdbc/CompressedStreamFactory.java b/src/main/java/net/snowflake/client/jdbc/CompressedStreamFactory.java new file mode 100644 index 000000000..ebb376db9 --- /dev/null +++ b/src/main/java/net/snowflake/client/jdbc/CompressedStreamFactory.java @@ -0,0 +1,38 @@ +package net.snowflake.client.jdbc; + +import static net.snowflake.client.core.Constants.MB; +import static net.snowflake.common.core.FileCompressionType.GZIP; +import static net.snowflake.common.core.FileCompressionType.ZSTD; + +import com.github.luben.zstd.ZstdInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; +import net.snowflake.common.core.SqlState; +import org.apache.http.Header; + +class CompressedStreamFactory { + + private static final int STREAM_BUFFER_SIZE = MB; + + /** + * Determine the format of the response, if it is not either plain text or gzip, raise an error. + */ + public InputStream createBasedOnEncodingHeader(InputStream is, Header encoding) + throws IOException, SnowflakeSQLException { + if (encoding != null) { + if (GZIP.name().equalsIgnoreCase(encoding.getValue())) { + return new GZIPInputStream(is, STREAM_BUFFER_SIZE); + } else if (ZSTD.name().equalsIgnoreCase(encoding.getValue())) { + return new ZstdInputStream(is); + } else { + throw new SnowflakeSQLException( + SqlState.INTERNAL_ERROR, + ErrorCode.INTERNAL_ERROR.getMessageCode(), + "Exception: unexpected compression got " + encoding.getValue()); + } + } else { + return DefaultResultStreamProvider.detectGzipAndGetStream(is); + } + } +} diff --git a/src/main/java/net/snowflake/client/jdbc/DefaultResultStreamProvider.java b/src/main/java/net/snowflake/client/jdbc/DefaultResultStreamProvider.java index 3ee556bb4..e7a1e8a0c 100644 --- a/src/main/java/net/snowflake/client/jdbc/DefaultResultStreamProvider.java +++ b/src/main/java/net/snowflake/client/jdbc/DefaultResultStreamProvider.java @@ -1,7 +1,5 @@ package net.snowflake.client.jdbc; -import static net.snowflake.client.core.Constants.MB; - import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; @@ -34,7 +32,11 @@ public class DefaultResultStreamProvider implements ResultStreamProvider { // SSE-C algorithm value private static final String SSE_C_AES = "AES256"; - private static final int STREAM_BUFFER_SIZE = MB; + private CompressedStreamFactory compressedStreamFactory; + + public DefaultResultStreamProvider() { + this.compressedStreamFactory = new CompressedStreamFactory(); + } @Override public InputStream getInputStream(ChunkDownloadContext context) throws Exception { @@ -71,9 +73,11 @@ public InputStream getInputStream(ChunkDownloadContext context) throws Exception InputStream inputStream; final HttpEntity entity = response.getEntity(); + Header encoding = response.getFirstHeader("Content-Encoding"); try { - // read the chunk data - inputStream = detectContentEncodingAndGetInputStream(response, entity.getContent()); + // create stream based on compression type + inputStream = + compressedStreamFactory.createBasedOnEncodingHeader(entity.getContent(), encoding); } catch (Exception ex) { logger.error("Failed to decompress data: {}", response); @@ -144,28 +148,6 @@ else if (context.getQrmk() != null) { return response; } - private InputStream detectContentEncodingAndGetInputStream(HttpResponse response, InputStream is) - throws IOException, SnowflakeSQLException { - InputStream inputStream = is; // Determine the format of the response, if it is not - // either plain text or gzip, raise an error. - Header encoding = response.getFirstHeader("Content-Encoding"); - if (encoding != null) { - if ("gzip".equalsIgnoreCase(encoding.getValue())) { - /* specify buffer size for GZIPInputStream */ - inputStream = new GZIPInputStream(is, STREAM_BUFFER_SIZE); - } else { - throw new SnowflakeSQLException( - SqlState.INTERNAL_ERROR, - ErrorCode.INTERNAL_ERROR.getMessageCode(), - "Exception: unexpected compression got " + encoding.getValue()); - } - } else { - inputStream = detectGzipAndGetStream(is); - } - - return inputStream; - } - public static InputStream detectGzipAndGetStream(InputStream is) throws IOException { PushbackInputStream pb = new PushbackInputStream(is, 2); byte[] signature = new byte[2]; diff --git a/src/test/java/net/snowflake/client/jdbc/CompressedStreamFactoryTest.java b/src/test/java/net/snowflake/client/jdbc/CompressedStreamFactoryTest.java new file mode 100644 index 000000000..86eb5764a --- /dev/null +++ b/src/test/java/net/snowflake/client/jdbc/CompressedStreamFactoryTest.java @@ -0,0 +1,80 @@ +package net.snowflake.client.jdbc; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import com.github.luben.zstd.ZstdInputStream; +import com.github.luben.zstd.ZstdOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.http.Header; +import org.apache.http.message.BasicHeader; +import org.junit.Test; + +public class CompressedStreamFactoryTest { + + private final CompressedStreamFactory factory = new CompressedStreamFactory(); + + @Test + public void testDetectContentEncodingAndGetInputStream_Gzip() throws Exception { + // Original data to compress and validate + String originalData = "Some data in GZIP"; + + // Creating encoding header + Header encodingHeader = new BasicHeader("Content-Encoding", "gzip"); + + // Creating a gzip byte array using GZIPOutputStream + byte[] gzipData; + try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) { + gzipOutputStream.write(originalData.getBytes(StandardCharsets.UTF_8)); + gzipOutputStream.close(); // close to flush and finish the compression + gzipData = byteArrayOutputStream.toByteArray(); + } + + // Mocking input stream with the gzip data + InputStream gzipStream = new ByteArrayInputStream(gzipData); + + // Call the private method using reflection + InputStream resultStream = factory.createBasedOnEncodingHeader(gzipStream, encodingHeader); + + // Decompress and validate the data matches original + assertTrue(resultStream instanceof GZIPInputStream); + String decompressedData = IOUtils.toString(resultStream, StandardCharsets.UTF_8); + assertEquals(originalData, decompressedData); + } + + @Test + public void testDetectContentEncodingAndGetInputStream_Zstd() throws Exception { + // Original data to compress and validate + String originalData = "Some data in ZSTD"; + + // Creating encoding header + Header encodingHeader = new BasicHeader("Content-Encoding", "zstd"); + + // Creating a zstd byte array using ZstdOutputStream + byte[] zstdData; + try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + ZstdOutputStream zstdOutputStream = new ZstdOutputStream(byteArrayOutputStream)) { + zstdOutputStream.write(originalData.getBytes(StandardCharsets.UTF_8)); + zstdOutputStream.close(); // close to flush and finish the compression + zstdData = byteArrayOutputStream.toByteArray(); + } + + // Mocking input stream with the zstd data + InputStream zstdStream = new ByteArrayInputStream(zstdData); + + // Call the private method using reflection + InputStream resultStream = factory.createBasedOnEncodingHeader(zstdStream, encodingHeader); + + // Decompress and validate the data matches original + assertTrue(resultStream instanceof ZstdInputStream); + String decompressedData = IOUtils.toString(resultStream, StandardCharsets.UTF_8); + assertEquals(originalData, decompressedData); + } +} diff --git a/thin_public_pom.xml b/thin_public_pom.xml index 08ab73da6..61118214c 100644 --- a/thin_public_pom.xml +++ b/thin_public_pom.xml @@ -64,6 +64,7 @@ UTF-8 2.0.13 1.6.9 + 1.5.6-5 @@ -267,6 +268,11 @@ jsoup ${jsoup.version} + + com.github.luben + zstd-jni + ${zstd-jni.version} + org.slf4j slf4j-api