From c84c8d669d92dee6e44ab44c05b865808702b48f Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Wed, 2 Aug 2023 13:54:07 +0530 Subject: [PATCH] Disallowing compression level for lz4 and best_compression codec (#8737) * Disallowing compression level for lz4 and best_compression codec * setting up codec settings interface for validation Signed-off-by: Sarthak Aggarwal Signed-off-by: Shivansh Arora --- CHANGELOG.md | 1 + .../index/codec/CodecCompressionLevelIT.java | 178 ++++++++++++++++++ .../opensearch/index/codec/CodecSettings.java | 21 +++ .../index/codec/customcodecs/ZstdCodec.java | 10 +- .../codec/customcodecs/ZstdNoDictCodec.java | 10 +- .../opensearch/index/engine/EngineConfig.java | 51 ++++- .../opensearch/index/codec/CodecTests.java | 71 ++++++- 7 files changed, 332 insertions(+), 10 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java create mode 100644 server/src/main/java/org/opensearch/index/codec/CodecSettings.java diff --git a/CHANGELOG.md b/CHANGELOG.md index a37976462b38e..e29bbd2da4db5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Start replication checkpointTimers on primary before segments upload to remote store. ([#8221]()https://github.com/opensearch-project/OpenSearch/pull/8221) - [distribution/archives] [Linux] [x64] Provide the variant of the distributions bundled with JRE ([#8195]()https://github.com/opensearch-project/OpenSearch/pull/8195) - Add configuration for file cache size to max remote data ratio to prevent oversubscription of file cache ([#8606](https://github.com/opensearch-project/OpenSearch/pull/8606)) +- Disallow compression level to be set for default and best_compression index codecs ([#8737]()https://github.com/opensearch-project/OpenSearch/pull/8737) ### Dependencies - Bump `org.apache.logging.log4j:log4j-core` from 2.17.1 to 2.20.0 ([#8307](https://github.com/opensearch-project/OpenSearch/pull/8307)) diff --git a/server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java b/server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java new file mode 100644 index 0000000000000..5f3e53f1454fc --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java @@ -0,0 +1,178 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.apache.logging.log4j.core.util.Throwables; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.concurrent.ExecutionException; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST) +public class CodecCompressionLevelIT extends OpenSearchIntegTestCase { + + public void testLuceneCodecsCreateIndexWithCompressionLevel() { + + internalCluster().ensureAtLeastNumDataNodes(1); + final String index = "test-index"; + + // creating index + assertThrows( + IllegalArgumentException.class, + () -> createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + .put("index.codec.compression_level", randomIntBetween(1, 6)) + .build() + ) + ); + + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + .build() + ); + ensureGreen(index); + } + + public void testZStandardCodecsCreateIndexWithCompressionLevel() { + + internalCluster().ensureAtLeastNumDataNodes(1); + final String index = "test-index"; + + // creating index + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .put("index.codec.compression_level", randomIntBetween(1, 6)) + .build() + ); + + ensureGreen(index); + } + + public void testZStandardToLuceneCodecsWithCompressionLevel() throws ExecutionException, InterruptedException { + + internalCluster().ensureAtLeastNumDataNodes(1); + final String index = "test-index"; + + // creating index + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .put("index.codec.compression_level", randomIntBetween(1, 6)) + .build() + ); + ensureGreen(index); + + assertAcked(client().admin().indices().prepareClose(index)); + + Throwable executionException = expectThrows( + ExecutionException.class, + () -> client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(index).settings( + Settings.builder().put("index.codec", randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + ) + ) + .get() + ); + + Throwable rootCause = Throwables.getRootCause(executionException); + assertEquals(IllegalArgumentException.class, rootCause.getClass()); + assertTrue(rootCause.getMessage().startsWith("Compression level cannot be set")); + + assertAcked( + client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(index).settings( + Settings.builder() + .put("index.codec", randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + .put("index.codec.compression_level", (String) null) + ) + ) + .get() + ); + + assertAcked(client().admin().indices().prepareOpen(index)); + ensureGreen(index); + } + + public void testLuceneToZStandardCodecsWithCompressionLevel() throws ExecutionException, InterruptedException { + + internalCluster().ensureAtLeastNumDataNodes(1); + final String index = "test-index"; + + // creating index + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + .build() + ); + ensureGreen(index); + + assertAcked(client().admin().indices().prepareClose(index)); + + Throwable executionException = expectThrows( + ExecutionException.class, + () -> client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(index).settings( + Settings.builder() + .put("index.codec", randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + .put("index.codec.compression_level", randomIntBetween(1, 6)) + ) + ) + .get() + ); + + Throwable rootCause = Throwables.getRootCause(executionException); + assertEquals(IllegalArgumentException.class, rootCause.getClass()); + assertTrue(rootCause.getMessage().startsWith("Compression level cannot be set")); + + assertAcked( + client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(index).settings( + Settings.builder() + .put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .put("index.codec.compression_level", randomIntBetween(1, 6)) + ) + ) + .get() + ); + + assertAcked(client().admin().indices().prepareOpen(index)); + ensureGreen(index); + } + +} diff --git a/server/src/main/java/org/opensearch/index/codec/CodecSettings.java b/server/src/main/java/org/opensearch/index/codec/CodecSettings.java new file mode 100644 index 0000000000000..2d371dfc190db --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/CodecSettings.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.apache.lucene.codecs.Codec; +import org.opensearch.common.settings.Setting; + +/** + * This {@link CodecSettings} allows us to manage the settings with {@link Codec}. + * + * @opensearch.internal + */ +public interface CodecSettings { + boolean supports(Setting setting); +} diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java index 04c110fceacdf..042f7eaa29e53 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java @@ -9,12 +9,15 @@ package org.opensearch.index.codec.customcodecs; import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.Setting; +import org.opensearch.index.codec.CodecSettings; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; /** * ZstdCodec provides ZSTD compressor using the zstd-jni library. */ -public class ZstdCodec extends Lucene95CustomCodec { +public class ZstdCodec extends Lucene95CustomCodec implements CodecSettings { /** * Creates a new ZstdCodec instance with the default compression level. @@ -41,4 +44,9 @@ public ZstdCodec(MapperService mapperService, Logger logger, int compressionLeve public String toString() { return getClass().getSimpleName(); } + + @Override + public boolean supports(Setting setting) { + return setting.equals(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING); + } } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java index 134f9a14422ad..a7e8e0e42ee68 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java @@ -9,12 +9,15 @@ package org.opensearch.index.codec.customcodecs; import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.Setting; +import org.opensearch.index.codec.CodecSettings; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; /** * ZstdNoDictCodec provides ZSTD compressor without a dictionary support. */ -public class ZstdNoDictCodec extends Lucene95CustomCodec { +public class ZstdNoDictCodec extends Lucene95CustomCodec implements CodecSettings { /** * Creates a new ZstdNoDictCodec instance with the default compression level. @@ -41,4 +44,9 @@ public ZstdNoDictCodec(MapperService mapperService, Logger logger, int compressi public String toString() { return getClass().getSimpleName(); } + + @Override + public boolean supports(Setting setting) { + return setting.equals(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING); + } } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 7900e63a95c39..03669eaac0070 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -48,6 +48,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.index.IndexSettings; import org.opensearch.index.codec.CodecService; +import org.opensearch.index.codec.CodecSettings; import org.opensearch.index.mapper.ParsedDocument; import org.opensearch.index.seqno.RetentionLeases; import org.opensearch.core.index.shard.ShardId; @@ -63,6 +64,7 @@ import java.util.Comparator; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.function.BooleanSupplier; import java.util.function.LongSupplier; import java.util.function.Supplier; @@ -148,13 +150,52 @@ public Supplier retentionLeasesSupplier() { * Compression Level gives a trade-off between compression ratio and speed. The higher compression level results in higher compression ratio but slower compression and decompression speeds. * This setting is not realtime updateable. */ - public static final Setting INDEX_CODEC_COMPRESSION_LEVEL_SETTING = Setting.intSetting( + + public static final Setting INDEX_CODEC_COMPRESSION_LEVEL_SETTING = new Setting<>( "index.codec.compression_level", - 3, - 1, - 6, + Integer.toString(3), + new Setting.IntegerParser(1, 6, "index.codec.compression_level", false), Property.IndexScope - ); + ) { + @Override + public Set getSettingsDependencies(String key) { + return Set.of(new SettingDependency() { + @Override + public Setting getSetting() { + return INDEX_CODEC_SETTING; + } + + @Override + public void validate(String key, Object value, Object dependency) { + if (!(dependency instanceof String)) { + throw new IllegalArgumentException("Codec should be of string type."); + } + doValidateCodecSettings((String) dependency); + } + }); + } + }; + + private static void doValidateCodecSettings(final String codec) { + switch (codec) { + case "zstd": + case "zstd_no_dict": + return; + case "best_compression": + case "lucene_default": + case "default": + break; + default: + if (Codec.availableCodecs().contains(codec)) { + Codec luceneCodec = Codec.forName(codec); + if (luceneCodec instanceof CodecSettings + && ((CodecSettings) luceneCodec).supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)) { + return; + } + } + } + throw new IllegalArgumentException("Compression level cannot be set for the " + codec + " codec."); + } /** * Configures an index to optimize documents with auto generated ids for append only. If this setting is updated from false diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index b0d904392407c..0eeeae9e8e59e 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -43,12 +43,14 @@ import org.apache.lucene.index.SegmentReader; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs; +import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Settings; import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec; import org.opensearch.index.codec.customcodecs.Lucene95CustomStoredFieldsFormat; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.similarity.SimilarityService; import org.opensearch.indices.mapper.MapperRegistry; @@ -60,6 +62,7 @@ import java.util.Collections; import static org.hamcrest.Matchers.instanceOf; +import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING; @SuppressCodecs("*") // we test against default codec so never get a random one here! public class CodecTests extends OpenSearchTestCase { @@ -96,7 +99,7 @@ public void testZstdNoDict() throws Exception { public void testZstdWithCompressionLevel() throws Exception { int randomCompressionLevel = randomIntBetween(1, 6); - Codec codec = createCodecService(randomCompressionLevel).codec("zstd"); + Codec codec = createCodecService(randomCompressionLevel, "zstd").codec("zstd"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); @@ -104,12 +107,73 @@ public void testZstdWithCompressionLevel() throws Exception { public void testZstdNoDictWithCompressionLevel() throws Exception { int randomCompressionLevel = randomIntBetween(1, 6); - Codec codec = createCodecService(randomCompressionLevel).codec("zstd_no_dict"); + Codec codec = createCodecService(randomCompressionLevel, "zstd_no_dict").codec("zstd_no_dict"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); } + public void testBestCompressionWithCompressionLevel() { + final Settings zstdSettings = Settings.builder() + .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) + .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC)) + .build(); + + // able to validate zstd + final IndexScopedSettings zstdIndexScopedSettings = new IndexScopedSettings( + zstdSettings, + IndexScopedSettings.BUILT_IN_INDEX_SETTINGS + ); + zstdIndexScopedSettings.validate(zstdSettings, true); + + final Settings settings = Settings.builder() + .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) + .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC)) + .build(); + final IndexScopedSettings indexScopedSettings = new IndexScopedSettings(settings, IndexScopedSettings.BUILT_IN_INDEX_SETTINGS); + + final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> indexScopedSettings.validate(settings, true)); + assertTrue(e.getMessage().startsWith("Compression level cannot be set")); + } + + public void testLuceneCodecsWithCompressionLevel() { + String codecName = randomFrom(Codec.availableCodecs()); + Codec codec = Codec.forName(codecName); + + final Settings customCodecSettings = Settings.builder() + .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) + .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), "Lucene95CustomCodec") + .build(); + + final IndexScopedSettings customCodecIndexScopedSettings = new IndexScopedSettings( + customCodecSettings, + IndexScopedSettings.BUILT_IN_INDEX_SETTINGS + ); + customCodecIndexScopedSettings.validate(customCodecSettings, true); + + final Settings settings = Settings.builder() + .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) + .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), codecName) + .build(); + final IndexScopedSettings indexScopedSettings = new IndexScopedSettings(settings, IndexScopedSettings.BUILT_IN_INDEX_SETTINGS); + + if (!(codec instanceof CodecSettings && ((CodecSettings) codec).supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING))) { + final IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> indexScopedSettings.validate(settings, true) + ); + assertTrue(e.getMessage().startsWith("Compression level cannot be set")); + } + } + + public void testZstandardCompressionLevelSupport() throws Exception { + CodecService codecService = createCodecService(false); + CodecSettings zstdCodec = (CodecSettings) codecService.codec("zstd"); + CodecSettings zstdNoDictCodec = (CodecSettings) codecService.codec("zstd_no_dict"); + assertTrue(zstdCodec.supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)); + assertTrue(zstdNoDictCodec.supports(INDEX_CODEC_COMPRESSION_LEVEL_SETTING)); + } + public void testDefaultMapperServiceNull() throws Exception { Codec codec = createCodecService(true).codec("default"); assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec); @@ -165,9 +229,10 @@ private CodecService createCodecService(boolean isMapperServiceNull) throws IOEx return buildCodecService(nodeSettings); } - private CodecService createCodecService(int randomCompressionLevel) throws IOException { + private CodecService createCodecService(int randomCompressionLevel, String codec) throws IOException { Settings nodeSettings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put("index.codec", codec) .put("index.codec.compression_level", randomCompressionLevel) .build(); return buildCodecService(nodeSettings);