Skip to content

Commit

Permalink
LUCENE-9669: Add an expert API to allow opening indices created < N-1 (
Browse files Browse the repository at this point in the history
…#2212)

Today we force indices that were created with N-2 and older versions of Lucene
to fail on open. This check doesn't even check if the codecs are available. In order
to allow users to open older indices and for us to support N-2 versions this change
adds an API on DirectoryReader to specify a minimum index version on a per reader basis.
This doesn't apply for the IndexWriter which will fail on opening older indices.
  • Loading branch information
s1monw authored Jan 19, 2021
1 parent 426c902 commit c1ae6dc
Show file tree
Hide file tree
Showing 11 changed files with 153 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
Expand All @@ -87,6 +88,7 @@
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
Expand Down Expand Up @@ -863,7 +865,12 @@ public void testUnsupportedOldIndexes() throws Exception {
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
CheckIndex.Status indexStatus = checker.checkIndex();
assertFalse(indexStatus.clean);
assertTrue(bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName()));
if (unsupportedNames[i].startsWith("7.")) {
assertTrue(bos.toString(IOUtils.UTF_8).contains("Could not load codec 'Lucene70'"));
} else {
assertTrue(
bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName()));
}
checker.close();

dir.close();
Expand Down Expand Up @@ -1986,4 +1993,45 @@ static BytesRef toBytes(long value) {
bytes.bytes[bytes.length++] = (byte) value;
return bytes;
}

public void testFailOpenOldIndex() throws IOException {
for (String name : oldNames) {
Directory directory = oldIndexDirs.get(name);
IndexCommit commit = DirectoryReader.listCommits(directory).get(0);
IndexFormatTooOldException ex =
expectThrows(
IndexFormatTooOldException.class,
() -> StandardDirectoryReader.open(commit, Version.LATEST.major));
assertTrue(
ex.getMessage()
.contains(
"only supports reading from version " + Version.LATEST.major + " upwards."));
// now open with allowed min version
StandardDirectoryReader.open(commit, Version.LATEST.major - 1).close();
}
}

public void testReadNMinusTwoCommit() throws IOException {
for (String name : this.unsupportedNames) {
if (name.startsWith(Version.MIN_SUPPORTED_MAJOR - 1 + ".")) {
Path oldIndexDir = createTempDir(name);
TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
// don't checkindex, we don't have the codecs yet
dir.setCheckIndexOnClose(false);
IllegalArgumentException iae =
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.listCommits(dir));
// TODO fix this once we have the codec for 7.0 recreated
assertEquals(
"Could not load codec 'Lucene70'. Did you forget to add lucene-backward-codecs.jar?",
iae.getMessage());
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.listCommits(dir));
assertEquals(
"Could not load codec 'Lucene70'. Did you forget to add lucene-backward-codecs.jar?",
ex.getMessage());
}
}
}
}
}
4 changes: 3 additions & 1 deletion lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,9 @@ public Status checkIndex(List<String> onlySegments) throws IOException {
try {
// Do not use SegmentInfos.read(Directory) since the spooky
// retrying it does is not necessary here (we hold the write lock):
sis = SegmentInfos.readCommit(dir, lastSegmentsFile);
sis =
SegmentInfos.readCommit(
dir, lastSegmentsFile, 0 /* always open old indices if codecs are around */);
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
Expand Down
21 changes: 19 additions & 2 deletions lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,23 @@ public static DirectoryReader open(final IndexCommit commit) throws IOException
return StandardDirectoryReader.open(commit.getDirectory(), commit);
}

/**
* Expert: returns an IndexReader reading the index on the given {@link IndexCommit}. This method
* allows to open indices that were created wih a Lucene version older than N-1 provided that all
* codecs for this index are available in the classpath and the segment file format used was
* created with Lucene 7 or newer. Users of this API must be aware that Lucene doesn't guarantee
* semantic compatibility for indices created with versions older than N-1. All backwards
* compatibility aside from the file format is optional and applied on a best effort basis.
*
* @param commit the commit point to open
* @param minSupportedMajorVersion the minimum supported major index version
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion)
throws IOException {
return StandardDirectoryReader.open(commit.getDirectory(), minSupportedMajorVersion, commit);
}

/**
* If the index has changed since the provided reader was opened, open and return a new reader;
* else, return null. The new reader, if not null, will be the same type of reader as the previous
Expand Down Expand Up @@ -221,7 +238,7 @@ public static List<IndexCommit> listCommits(Directory dir) throws IOException {

List<IndexCommit> commits = new ArrayList<>();

SegmentInfos latest = SegmentInfos.readLatestCommit(dir);
SegmentInfos latest = SegmentInfos.readLatestCommit(dir, 0);
final long currentGen = latest.getGeneration();

commits.add(new StandardDirectoryReader.ReaderCommit(null, latest, dir));
Expand All @@ -237,7 +254,7 @@ public static List<IndexCommit> listCommits(Directory dir) throws IOException {
try {
// IOException allowed to throw there, in case
// segments_N is corrupt
sis = SegmentInfos.readCommit(dir, fileName);
sis = SegmentInfos.readCommit(dir, fileName, 0);
} catch (FileNotFoundException | NoSuchFileException fnfe) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.Version;

/** This exception is thrown when Lucene detects an index that is too old for this Lucene version */
public class IndexFormatTooOldException extends IOException {
Expand All @@ -42,7 +43,9 @@ public IndexFormatTooOldException(String resourceDescription, String reason) {
+ resourceDescription
+ "): "
+ reason
+ ". This version of Lucene only supports indexes created with release 8.0 and later.");
+ ". This version of Lucene only supports indexes created with release "
+ Version.MIN_SUPPORTED_MAJOR
+ ".0 and later by default.");
this.resourceDescription = resourceDescription;
this.reason = reason;
this.version = null;
Expand Down Expand Up @@ -81,7 +84,9 @@ public IndexFormatTooOldException(
+ minVersion
+ " and "
+ maxVersion
+ "). This version of Lucene only supports indexes created with release 8.0 and later.");
+ "). This version of Lucene only supports indexes created with release "
+ Version.MIN_SUPPORTED_MAJOR
+ ".0 and later.");
this.resourceDescription = resourceDescription;
this.version = version;
this.minVersion = minVersion;
Expand Down
8 changes: 8 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,14 @@ public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException {
changed();

} else if (reader != null) {
if (reader.segmentInfos.getIndexCreatedVersionMajor() < Version.MIN_SUPPORTED_MAJOR) {
// second line of defence in the case somebody tries to trick us.
throw new IllegalArgumentException(
"createdVersionMajor must be >= "
+ Version.MIN_SUPPORTED_MAJOR
+ ", got: "
+ reader.segmentInfos.getIndexCreatedVersionMajor());
}
// Init from an existing already opened NRT or non-NRT reader:

if (reader.directory() != commit.getDirectory()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ public ParallelLeafReader(
Version minVersion = Version.LATEST;
for (final LeafReader reader : this.parallelReaders) {
Version leafVersion = reader.getMetaData().getMinVersion();

if (leafVersion == null) {
minVersion = null;
break;
Expand Down
31 changes: 26 additions & 5 deletions lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
Original file line number Diff line number Diff line change
Expand Up @@ -285,12 +285,18 @@ public byte[] getId() {
*/
public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
throws IOException {
return readCommit(directory, segmentFileName, Version.MIN_SUPPORTED_MAJOR);
}

static final SegmentInfos readCommit(
Directory directory, String segmentFileName, int minSupportedMajorVersion)
throws IOException {

long generation = generationFromSegmentsFileName(segmentFileName);
// System.out.println(Thread.currentThread() + ": SegmentInfos.readCommit " + segmentFileName);
try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
try {
return readCommit(directory, input, generation);
return readCommit(directory, input, generation, minSupportedMajorVersion);
} catch (EOFException | NoSuchFileException | FileNotFoundException e) {
throw new CorruptIndexException(
"Unexpected file read error while reading index.", input, e);
Expand All @@ -301,6 +307,13 @@ public static final SegmentInfos readCommit(Directory directory, String segmentF
/** Read the commit from the provided {@link ChecksumIndexInput}. */
public static final SegmentInfos readCommit(
Directory directory, ChecksumIndexInput input, long generation) throws IOException {
return readCommit(directory, input, generation, Version.MIN_SUPPORTED_MAJOR);
}

/** Read the commit from the provided {@link ChecksumIndexInput}. */
static final SegmentInfos readCommit(
Directory directory, ChecksumIndexInput input, long generation, int minSupportedMajorVersion)
throws IOException {
Throwable priorE = null;
int format = -1;
try {
Expand Down Expand Up @@ -329,14 +342,17 @@ public static final SegmentInfos readCommit(
input);
}

if (indexCreatedVersion < Version.LATEST.major - 1) {
if (indexCreatedVersion < minSupportedMajorVersion) {
throw new IndexFormatTooOldException(
input,
"This index was initially created with Lucene "
+ indexCreatedVersion
+ ".x while the current version is "
+ Version.LATEST
+ " and Lucene only supports reading the current and previous major versions.");
+ " and Lucene only supports reading"
+ (minSupportedMajorVersion == Version.MIN_SUPPORTED_MAJOR
? " the current and previous major versions"
: " from version " + minSupportedMajorVersion + " upwards"));
}

SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
Expand Down Expand Up @@ -499,7 +515,7 @@ private static Codec readCodec(DataInput input) throws IOException {
throw new IllegalArgumentException(
"Could not load codec '"
+ name
+ "'. Did you forget to add lucene-backward-codecs.jar?",
+ "'. Did you forget to add lucene-backward-codecs.jar?",
e);
}
throw e;
Expand All @@ -508,10 +524,15 @@ private static Codec readCodec(DataInput input) throws IOException {

/** Find the latest commit ({@code segments_N file}) and load all {@link SegmentCommitInfo}s. */
public static final SegmentInfos readLatestCommit(Directory directory) throws IOException {
return readLatestCommit(directory, Version.MIN_SUPPORTED_MAJOR);
}

static final SegmentInfos readLatestCommit(Directory directory, int minSupportedMajorVersion)
throws IOException {
return new FindSegmentsFile<SegmentInfos>(directory) {
@Override
protected SegmentInfos doBody(String segmentFileName) throws IOException {
return readCommit(directory, segmentFileName);
return readCommit(directory, segmentFileName, minSupportedMajorVersion);
}
}.run();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;

/** Default implementation of {@link DirectoryReader}. */
public final class StandardDirectoryReader extends DirectoryReader {
Expand All @@ -57,13 +58,27 @@ public final class StandardDirectoryReader extends DirectoryReader {
this.writeAllDeletes = writeAllDeletes;
}

/** called from DirectoryReader.open(...) methods */
static DirectoryReader open(final Directory directory, final IndexCommit commit)
throws IOException {
return open(directory, Version.MIN_SUPPORTED_MAJOR, commit);
}

/** called from DirectoryReader.open(...) methods */
static DirectoryReader open(
final Directory directory, int minSupportedMajorVersion, final IndexCommit commit)
throws IOException {
return new SegmentInfos.FindSegmentsFile<DirectoryReader>(directory) {
@Override
protected DirectoryReader doBody(String segmentFileName) throws IOException {
SegmentInfos sis = SegmentInfos.readCommit(directory, segmentFileName);
if (minSupportedMajorVersion > Version.LATEST.major || minSupportedMajorVersion < 0) {
throw new IllegalArgumentException(
"minSupportedMajorVersion must be positive and <= "
+ Version.LATEST.major
+ " but was: "
+ minSupportedMajorVersion);
}
SegmentInfos sis =
SegmentInfos.readCommit(directory, segmentFileName, minSupportedMajorVersion);
final SegmentReader[] readers = new SegmentReader[sis.size()];
boolean success = false;
try {
Expand Down
6 changes: 6 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/Version.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ public final class Version {
*/
@Deprecated public static final Version LUCENE_CURRENT = LATEST;

/**
* Constant for the minimal supported major version of an index. This version is defined by the
* version that initially created the index.
*/
public static final int MIN_SUPPORTED_MAJOR = Version.LATEST.major - 1;

/**
* Parse a version number of the form {@code "major.minor.bugfix.prerelease"}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
import org.junit.Assume;

@LuceneTestCase.SuppressCodecs("SimpleText")
Expand Down Expand Up @@ -1096,4 +1097,18 @@ public void testIndexExistsOnNonExistentDirectory() throws Exception {
assertFalse(DirectoryReader.indexExists(dir));
dir.close();
}

public void testOpenWithInvalidMinCompatVersion() throws IOException {
try (Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(newStringField("field1", "foobar", Field.Store.YES));
doc.add(newStringField("field2", "foobaz", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1));
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ public static LeafReader wrap(IndexReader reader) throws IOException {
minVersion = leafVersion;
}
}
metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null);
int createdVersionMajor = reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor();
metaData = new LeafMetaData(createdVersionMajor, minVersion, null);
}
fieldInfos = FieldInfos.getMergedFieldInfos(in);
}
Expand Down

0 comments on commit c1ae6dc

Please sign in to comment.