Skip to content

Commit

Permalink
Lucene 9.12 Upgrade (#14361)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhioncbr authored Nov 4, 2024
1 parent af87e4f commit 3ae9c4c
Show file tree
Hide file tree
Showing 24 changed files with 91 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public MutableVectorIndex(String segmentName, String vectorColumn, VectorIndexCo
// to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig.
_indexDir = new File(FileUtils.getTempDirectory(), segmentName);
_indexDirectory = FSDirectory.open(
new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).toPath());
new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION).toPath());
LOGGER.info("Creating mutable HNSW index for segment: {}, column: {} at path: {} with {}", segmentName,
vectorColumn, _indexDir.getAbsolutePath(), vectorIndexConfig.getProperties());
_indexWriter = new IndexWriter(_indexDirectory, VectorIndexUtils.getIndexWriterConfig(vectorIndexConfig));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class LuceneFSTIndexCreator implements FSTIndexCreator {
*/
public LuceneFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries)
throws IOException {
_fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION);
_fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V912_FST_INDEX_FILE_EXTENSION);

_fstBuilder = new FSTBuilder();
_dictId = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,14 +353,14 @@ public void close()
}

private File getV1TextIndexFile(File indexDir) {
String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION;
String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION;
return new File(indexDir, luceneIndexDirectory);
}

private File getMutableIndexDir(File indexDir, File consumerDir) {
String segmentName = getSegmentName(indexDir);
return new File(new File(consumerDir, segmentName),
_textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION);
_textColumn + V1Constants.Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION);
}

private String getSegmentName(File indexDir) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public HnswVectorIndexCreator(String column, File segmentIndexDir, VectorIndexCo
// segment generation is always in V1 and later we convert (as part of post creation processing)
// to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig.
File indexFile = new File(segmentIndexDir, _vectorColumn
+ V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION);
+ V1Constants.Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION);
_indexDirectory = FSDirectory.open(indexFile.toPath());
LOGGER.info("Creating HNSW index for column: {} at path: {} with {} for segment: {}", column,
indexFile.getAbsolutePath(), vectorIndexConfig.getProperties(), segmentIndexDir.getAbsolutePath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
Expand Down Expand Up @@ -73,8 +73,8 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
*
* @param mode stored fields compression mode to use for newly flushed/merged segments.
*/
public HnswCodec(Lucene99Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) {
super("Lucene99", new Lucene99Codec(mode));
public HnswCodec(Lucene912Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) {
super("Lucene99", new Lucene912Codec(mode));
_defaultKnnVectorsFormat = defaultKnnVectorsFormat;
_defaultPostingsFormat = new Lucene90PostingsFormat();
_defaultDVFormat = new Lucene90DocValuesFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ private void deleteV2Files(File v2SegmentDirectory)
if (file.isFile() && file.exists()) {
FileUtils.deleteQuietly(file);
}
if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION)) {
if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION)) {
FileUtils.deleteDirectory(file);
}
if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION)) {
if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION)) {
FileUtils.deleteDirectory(file);
}
}
Expand Down Expand Up @@ -227,7 +227,7 @@ private void copyCreationMetadataIfExists(File currentDir, File v3Dir)
private void copyLuceneTextIndexIfExists(File segmentDirectory, File v3Dir)
throws IOException {
// TODO: see if this can be done by reusing some existing methods
String suffix = V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION;
String suffix = V1Constants.Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION;
File[] textIndexFiles = segmentDirectory.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
Expand Down Expand Up @@ -264,7 +264,7 @@ public boolean accept(File dir, String name) {
private void copyVectorIndexIfExists(File segmentDirectory, File v3Dir)
throws IOException {
// TODO: see if this can be done by reusing some existing methods
String suffix = V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION;
String suffix = V1Constants.Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION;
File[] vectorIndexFiles = segmentDirectory.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ public class FstIndexType extends AbstractIndexType<FstIndexConfig, TextIndexRea
private static final List<String> EXTENSIONS =
ImmutableList.of(V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION,
V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION,
V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION);
V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION,
V1Constants.Indexes.LUCENE_V912_FST_INDEX_FILE_EXTENSION);

protected FstIndexType() {
super(StandardIndexes.FST_ID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION;
import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V912_FST_INDEX_FILE_EXTENSION;


/**
Expand Down Expand Up @@ -157,7 +157,7 @@ private void createFSTIndexForColumn(SegmentDirectory.Writer segmentWriter, Colu
String segmentName = _segmentDirectory.getSegmentMetadata().getName();
String columnName = columnMetadata.getColumnName();
File inProgress = new File(indexDir, columnName + ".fst.inprogress");
File fstIndexFile = new File(indexDir, columnName + LUCENE_V99_FST_INDEX_FILE_EXTENSION);
File fstIndexFile = new File(indexDir, columnName + LUCENE_V912_FST_INDEX_FILE_EXTENSION);

if (!inProgress.exists()) {
// Create a marker file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ private void createVectorIndexForColumn(SegmentDirectory.Writer segmentWriter, C
String columnName = columnMetadata.getColumnName();
File inProgress =
new File(segmentDirectory, columnName
+ V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION + ".inprogress");
+ V1Constants.Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION + ".inprogress");
File vectorIndexFile =
new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION);
new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION);

if (!inProgress.exists()) {
// Marker file does not exist, which means last run ended normally.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.io.IOException;
import java.util.List;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.OffHeapFSTStore;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.pinot.segment.local.utils.fst.PinotBufferIndexInput;
import org.apache.pinot.segment.local.utils.fst.RegexpMatcher;
Expand Down Expand Up @@ -52,8 +51,7 @@ public LuceneFSTIndexReader(PinotDataBuffer pinotDataBuffer)
_dataBufferIndexInput = new PinotBufferIndexInput(_dataBuffer, 0L, _dataBuffer.size());

_readFST =
new FST<>(FST.readMetadata(_dataBufferIndexInput, PositiveIntOutputs.getSingleton()),
_dataBufferIndexInput, new OffHeapFSTStore());
new FST<>(FST.readMetadata(_dataBufferIndexInput, PositiveIntOutputs.getSingleton()), _dataBufferIndexInput);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ public class TextIndexType extends AbstractIndexType<TextIndexConfig, TextIndexR
V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION,
V1Constants.Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION,
V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION,
V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION
V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION,
V1Constants.Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION
);

protected TextIndexType() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ public IndexHandler createIndexHandler(SegmentDirectory segmentDirectory, Map<St
@Override
public List<String> getFileExtensions(@Nullable ColumnMetadata columnMetadata) {
return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION,
V1Constants.Indexes.VECTOR_V99_INDEX_FILE_EXTENSION);
V1Constants.Indexes.VECTOR_V99_INDEX_FILE_EXTENSION,
V1Constants.Indexes.VECTOR_V912_INDEX_FILE_EXTENSION);
}

private static class ReaderFactory implements IndexReaderFactory<VectorIndexReader> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ static void cleanupTextIndex(File segDir, String column) {
FileUtils.deleteQuietly(luceneV9IndexFile);
File luceneV99IndexFile = new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION);
FileUtils.deleteQuietly(luceneV99IndexFile);
File luceneV912IndexFile = new File(segDir, column + Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION);
FileUtils.deleteQuietly(luceneV912IndexFile);
File luceneV9MappingFile = new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
FileUtils.deleteQuietly(luceneV9MappingFile);

Expand All @@ -76,7 +78,8 @@ static boolean hasTextIndex(File segDir, String column) {
return new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION).exists()
|| new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()
|| new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists()
|| new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists();
|| new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()
|| new File(segDir, column + Indexes.LUCENE_V912_TEXT_INDEX_FILE_EXTENSION).exists();
//@formatter:on
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import java.io.File;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.VectorSimilarityFunction;
Expand All @@ -40,6 +40,8 @@ static void cleanupVectorIndex(File segDir, String column) {
FileUtils.deleteQuietly(luceneIndexFile);
File luceneV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION);
FileUtils.deleteQuietly(luceneV99IndexFile);
File luceneV912IndexFile = new File(segDir, column + Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION);
FileUtils.deleteQuietly(luceneV912IndexFile);
File luceneMappingFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION);
FileUtils.deleteQuietly(luceneMappingFile);

Expand All @@ -48,11 +50,13 @@ static void cleanupVectorIndex(File segDir, String column) {
FileUtils.deleteQuietly(nativeIndexFile);
File nativeV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION);
FileUtils.deleteQuietly(nativeV99IndexFile);
File nativeV912IndexFile = new File(segDir, column + Indexes.VECTOR_V912_INDEX_FILE_EXTENSION);
FileUtils.deleteQuietly(nativeV912IndexFile);
}

static boolean hasVectorIndex(File segDir, String column) {
return new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir,
column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION).exists();
return new File(segDir, column + Indexes.VECTOR_V912_HNSW_INDEX_FILE_EXTENSION).exists()
|| new File(segDir, column + Indexes.VECTOR_V912_INDEX_FILE_EXTENSION).exists();
}

public static VectorSimilarityFunction toSimilarityFunction(
Expand Down Expand Up @@ -94,8 +98,8 @@ public static IndexWriterConfig getIndexWriterConfig(VectorIndexConfig vectorInd
HnswVectorsFormat knnVectorsFormat =
new HnswVectorsFormat(maxCon, beamWidth, maxDimensions);

Lucene99Codec.Mode mode = Lucene99Codec.Mode.valueOf(vectorIndexConfig.getProperties()
.getOrDefault("mode", Lucene99Codec.Mode.BEST_SPEED.name()));
Lucene912Codec.Mode mode = Lucene912Codec.Mode.valueOf(vectorIndexConfig.getProperties()
.getOrDefault("mode", Lucene912Codec.Mode.BEST_SPEED.name()));
indexWriterConfig.setCodec(new HnswCodec(mode, knnVectorsFormat));
return indexWriterConfig;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class NativeFSTIndexCreator implements FSTIndexCreator {
* @param sortedEntries Sorted entries of the unique values of the column.
*/
public NativeFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) {
_fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION);
_fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V912_FST_INDEX_FILE_EXTENSION);

_fstBuilder = new FSTBuilder();
_dictId = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION;
import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V912_FST_INDEX_FILE_EXTENSION;


public class LuceneFSTIndexCreatorTest {
Expand Down Expand Up @@ -62,7 +62,7 @@ public void testIndexWriterReader()
LuceneFSTIndexCreator creator = new LuceneFSTIndexCreator(
INDEX_DIR, "testFSTColumn", uniqueValues);
creator.seal();
File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION);
File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V912_FST_INDEX_FILE_EXTENSION);
PinotDataBuffer pinotDataBuffer =
PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, "fstIndexFile");
LuceneFSTIndexReader reader = new LuceneFSTIndexReader(pinotDataBuffer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION;
import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V912_FST_INDEX_FILE_EXTENSION;


public class NativeFSTIndexCreatorTest {
Expand Down Expand Up @@ -59,7 +59,7 @@ public void testIndexWriterReader()
creator.seal();
}

File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION);
File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V912_FST_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(fstFile);
NativeFSTIndexReader reader = new NativeFSTIndexReader(dataBuffer)) {

Expand Down
Loading

0 comments on commit 3ae9c4c

Please sign in to comment.