Skip to content

Commit

Permalink
Make HybridDirectory MMAP Extensions Configurable (#3837) (#3970)
Browse files Browse the repository at this point in the history
* Make HybridDirectory MMAP Extensions Configurable

Make the HybridDirectory's list of mmap extensions configurable
via index settings instead of being hard-coded to a specfic set.
Set defaults to the list of currently hard-coded values for
backwards compatibility.

Signed-off-by: Matt Weber <matt@mattweber.org>

* Add javadoc to INDEX_STORE_HYBRID_MMAP_EXTENSIONS.

Signed-off-by: Matt Weber <matt@mattweber.org>
(cherry picked from commit b08a2b8)

Co-authored-by: Matt Weber <matt@mattweber.org>
  • Loading branch information
opensearch-trigger-bot[bot] and mattweber authored Jul 21, 2022
1 parent 6fe35d9 commit 0a42e6d
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
IndexModule.INDEX_STORE_TYPE_SETTING,
IndexModule.INDEX_STORE_PRE_LOAD_SETTING,
IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS,
IndexModule.INDEX_RECOVERY_TYPE_SETTING,
IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING,
FsDirectoryFactory.INDEX_LOCK_FACTOR_SETTING,
Expand Down
12 changes: 12 additions & 0 deletions server/src/main/java/org/opensearch/index/IndexModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@ public final class IndexModule {
Property.NodeScope
);

/** Which lucene file extensions to load with the mmap directory when using hybridfs store.
* This is an expert setting.
* @see <a href="https://lucene.apache.org/core/9_2_0/core/org/apache/lucene/codecs/lucene92/package-summary.html#file-names">Lucene File Extensions</a>.
*/
public static final Setting<List<String>> INDEX_STORE_HYBRID_MMAP_EXTENSIONS = Setting.listSetting(
"index.store.hybrid.mmap.extensions",
List.of("nvd", "dvd", "tim", "tip", "dim", "kdd", "kdi", "cfs", "doc"),
Function.identity(),
Property.IndexScope,
Property.NodeScope
);

public static final String SIMILARITY_SETTINGS_PREFIX = "index.similarity";

// whether to use the query cache
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index
case HYBRIDFS:
// Use Lucene defaults
final FSDirectory primaryDirectory = FSDirectory.open(location, lockFactory);
final Set<String> mmapExtensions = new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS));
if (primaryDirectory instanceof MMapDirectory) {
MMapDirectory mMapDirectory = (MMapDirectory) primaryDirectory;
return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions));
return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions), mmapExtensions);
} else {
return primaryDirectory;
}
Expand Down Expand Up @@ -142,10 +143,12 @@ public static boolean isHybridFs(Directory directory) {
*/
static final class HybridDirectory extends NIOFSDirectory {
private final MMapDirectory delegate;
private final Set<String> mmapExtensions;

HybridDirectory(LockFactory lockFactory, MMapDirectory delegate) throws IOException {
HybridDirectory(LockFactory lockFactory, MMapDirectory delegate, Set<String> mmapExtensions) throws IOException {
super(delegate.getDirectory(), lockFactory);
this.delegate = delegate;
this.mmapExtensions = mmapExtensions;
}

@Override
Expand All @@ -164,43 +167,16 @@ public IndexInput openInput(String name, IOContext context) throws IOException {
}
}

boolean useDelegate(String name) {
final String extension = FileSwitchDirectory.getExtension(name);
return mmapExtensions.contains(extension);
}

@Override
public void close() throws IOException {
IOUtils.close(super::close, delegate);
}

boolean useDelegate(String name) {
String extension = FileSwitchDirectory.getExtension(name);
switch (extension) {
// Norms, doc values and term dictionaries are typically performance-sensitive and hot in the page
// cache, so we use mmap, which provides better performance.
case "nvd":
case "dvd":
case "tim":
// We want to open the terms index and KD-tree index off-heap to save memory, but this only performs
// well if using mmap.
case "tip":
// dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10
case "dim":
case "kdd":
case "kdi":
// Compound files are tricky because they store all the information for the segment. Benchmarks
// suggested that not mapping them hurts performance.
case "cfs":
// MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed
// up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we
// are not mmaping them as queries that leverage positions are more costly and the decoding of postings
// tends to be less a bottleneck.
case "doc":
return true;
// Other files are either less performance-sensitive (e.g. stored field index, norms metadata)
// or are large and have a random access pattern and mmap leads to page cache trashing
// (e.g. stored fields and term vectors).
default:
return false;
}
}

MMapDirectory getDelegate() {
return delegate;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,52 @@ public void testPreload() throws IOException {
try (Directory directory = newDirectory(build)) {
assertTrue(FsDirectoryFactory.isHybridFs(directory));
FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory;
assertTrue(hybridDirectory.useDelegate("foo.dvd"));
// test default hybrid mmap extensions
assertTrue(hybridDirectory.useDelegate("foo.nvd"));
assertTrue(hybridDirectory.useDelegate("foo.dvd"));
assertTrue(hybridDirectory.useDelegate("foo.tim"));
assertTrue(hybridDirectory.useDelegate("foo.tip"));
assertTrue(hybridDirectory.useDelegate("foo.cfs"));
assertTrue(hybridDirectory.useDelegate("foo.dim"));
assertTrue(hybridDirectory.useDelegate("foo.kdd"));
assertTrue(hybridDirectory.useDelegate("foo.kdi"));
assertFalse(hybridDirectory.useDelegate("foo.bar"));
assertTrue(hybridDirectory.useDelegate("foo.cfs"));
assertTrue(hybridDirectory.useDelegate("foo.doc"));
assertFalse(hybridDirectory.useDelegate("foo.pos"));
assertFalse(hybridDirectory.useDelegate("foo.pay"));
MMapDirectory delegate = hybridDirectory.getDelegate();
assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class));
FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate;
assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd"));
assertTrue(preLoadMMapDirectory.useDelegate("foo.bar"));
assertFalse(preLoadMMapDirectory.useDelegate("foo.cfs"));
}
build = Settings.builder()
.put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.HYBRIDFS.name().toLowerCase(Locale.ROOT))
.putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "nvd", "dvd", "cfs")
.putList(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getKey(), "nvd", "dvd", "tim", "pos", "pay")
.build();
try (Directory directory = newDirectory(build)) {
assertTrue(FsDirectoryFactory.isHybridFs(directory));
FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory;
// test custom hybrid mmap extensions
assertTrue(hybridDirectory.useDelegate("foo.nvd"));
assertTrue(hybridDirectory.useDelegate("foo.dvd"));
assertTrue(hybridDirectory.useDelegate("foo.tim"));
assertFalse(hybridDirectory.useDelegate("foo.tip"));
assertFalse(hybridDirectory.useDelegate("foo.dim"));
assertFalse(hybridDirectory.useDelegate("foo.kdd"));
assertFalse(hybridDirectory.useDelegate("foo.kdi"));
assertFalse(hybridDirectory.useDelegate("foo.cfs"));
assertFalse(hybridDirectory.useDelegate("foo.doc"));
assertTrue(hybridDirectory.useDelegate("foo.pos"));
assertTrue(hybridDirectory.useDelegate("foo.pay"));
MMapDirectory delegate = hybridDirectory.getDelegate();
assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class));
FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate;
assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd"));
assertFalse(preLoadMMapDirectory.useDelegate("foo.bar"));
assertTrue(preLoadMMapDirectory.useDelegate("foo.cfs"));
assertTrue(preLoadMMapDirectory.useDelegate("foo.nvd"));
}
}

Expand Down

0 comments on commit 0a42e6d

Please sign in to comment.