diff --git a/gradle/generation/forUtil.gradle b/gradle/generation/forUtil.gradle index 9ada40c8dde0..96f5bfdf9740 100644 --- a/gradle/generation/forUtil.gradle +++ b/gradle/generation/forUtil.gradle @@ -23,7 +23,7 @@ configure(project(":lucene:core")) { description "Regenerate gen_ForUtil.py" group "generation" - def genDir = file("src/java/org/apache/lucene/codecs/lucene90") + def genDir = file("src/java/org/apache/lucene/codecs/lucene99") def genScript = file("${genDir}/gen_ForUtil.py") def genOutput = file("${genDir}/ForUtil.java") @@ -44,7 +44,7 @@ configure(project(":lucene:core")) { configure(project(":lucene:backward-codecs")) { - task generateForUtilInternal() { + task generateForUtil84Internal() { description "Regenerate gen_ForUtil.py" group "generation" @@ -64,6 +64,28 @@ configure(project(":lucene:backward-codecs")) { } } - regenerate.dependsOn wrapWithPersistentChecksums(generateForUtilInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ]) + regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil84Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ]) + + task generateForUtil90Internal() { + description "Regenerate gen_ForUtil.py" + group "generation" + + def genDir = file("src/java/org/apache/lucene/backward_codecs/lucene90") + def genScript = file("${genDir}/gen_ForUtil.py") + def genOutput = file("${genDir}/ForUtil.java") + + inputs.file genScript + outputs.file genOutput + + doLast { + quietExec { + workingDir genDir + executable project.externalTool("python3") + args = [ '-B', genScript ] + } + } + } + + regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil90Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ]) } diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0ee491bbec56..12b058fe14e7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -256,11 +256,12 @@ Optimizations * GITHUB#12719: Top-level conjunctions that are not sorted by score now have a specialized bulk scorer. (Adrien Grand) +* GITHUB#12696: Change Postings back to using FOR in Lucene99PostingsFormat. Freqs, positions and offset keep using PFOR. (Jakub Slowinski) + * GITHUB#1052: Faster merging of terms enums. (Adrien Grand) * GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand) - Changes in runtime behavior --------------------- diff --git a/lucene/backward-codecs/src/generated/checksums/generateForUtil.json b/lucene/backward-codecs/src/generated/checksums/generateForUtil84.json similarity index 100% rename from lucene/backward-codecs/src/generated/checksums/generateForUtil.json rename to lucene/backward-codecs/src/generated/checksums/generateForUtil84.json diff --git a/lucene/backward-codecs/src/generated/checksums/generateForUtil90.json b/lucene/backward-codecs/src/generated/checksums/generateForUtil90.json new file mode 100644 index 000000000000..15fd411b1cba --- /dev/null +++ b/lucene/backward-codecs/src/generated/checksums/generateForUtil90.json @@ -0,0 +1,4 @@ +{ + "lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java": "861cab516c7424e6323831c16f0f521499391a90", + "lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py": "b66e2f8012759d6d5ce0d73fabb329ae4a391aa0" +} \ No newline at end of file diff --git a/lucene/backward-codecs/src/java/module-info.java b/lucene/backward-codecs/src/java/module-info.java index 689e0a7d5b4b..992ad22a773d 100644 --- a/lucene/backward-codecs/src/java/module-info.java +++ b/lucene/backward-codecs/src/java/module-info.java @@ -42,7 +42,8 @@ org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; provides org.apache.lucene.codecs.PostingsFormat with org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat, - org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat; + org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat, + org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; provides org.apache.lucene.codecs.KnnVectorsFormat with org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat, org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java index a58c84e314b0..c889c8b86269 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java @@ -16,7 +16,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import org.apache.lucene.store.DataInput; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90Codec.java index b9465985063a..77280dcff639 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90Codec.java @@ -35,7 +35,6 @@ import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat; import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat; import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat; import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsFormat.java similarity index 94% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsFormat.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsFormat.java index de0d9c381a8e..0fceeb47296a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsFormat.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import org.apache.lucene.codecs.BlockTermState; @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader; import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; import org.apache.lucene.index.IndexOptions; @@ -36,7 +35,9 @@ import org.apache.lucene.util.packed.PackedInts; /** - * Lucene 5.0 postings format, which encodes postings in packed integer blocks for fast decode. + * Lucene 9.0 postings format, which encodes postings in packed integer blocks for fast decode. + * + *
Note: Lucene90PostingsFormat is now READ ONLY. * *
Basic idea: * @@ -371,30 +372,11 @@ public final class Lucene90PostingsFormat extends PostingsFormat { // Increment version to change it static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; - - private final int minTermBlockSize; - private final int maxTermBlockSize; + static final int VERSION_CURRENT = 1; - /** Creates {@code Lucene90PostingsFormat} with default settings. */ + /** Creates read-only {@code Lucene90PostingsFormat}. */ public Lucene90PostingsFormat() { - this( - Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, - Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); - } - - /** - * Creates {@code Lucene90PostingsFormat} with custom values for {@code minBlockSize} and {@code - * maxBlockSize} passed to block terms dictionary. - * - * @see - * Lucene90BlockTreeTermsWriter#Lucene90BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) - */ - public Lucene90PostingsFormat(int minTermBlockSize, int maxTermBlockSize) { super("Lucene90"); - Lucene90BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize); - this.minTermBlockSize = minTermBlockSize; - this.maxTermBlockSize = maxTermBlockSize; } @Override @@ -403,20 +385,8 @@ public String toString() { } @Override - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state); - boolean success = false; - try { - FieldsConsumer ret = - new Lucene90BlockTreeTermsWriter( - state, postingsWriter, minTermBlockSize, maxTermBlockSize); - success = true; - return ret; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(postingsWriter); - } - } + public FieldsConsumer fieldsConsumer(SegmentWriteState state) { + throw new UnsupportedOperationException("Old codecs may only be used for reading"); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java index 56f7bafd4fe4..190fe5b1d1d5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java @@ -14,23 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; -import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_START; +import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_START; import java.io.IOException; import java.util.Arrays; +import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Impacts; import org.apache.lucene.index.ImpactsEnum; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsWriter.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsWriter.java similarity index 95% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsWriter.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsWriter.java index f640d8d2c573..b745d4e1217f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsWriter.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsWriter.java @@ -14,22 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; -import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT; +import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC; +import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT; import java.io.IOException; +import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CompetitiveImpactAccumulator; import org.apache.lucene.codecs.PushPostingsWriterBase; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90ScoreSkipReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90ScoreSkipReader.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90ScoreSkipReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90ScoreSkipReader.java index 44789a983344..504fd5294e16 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90ScoreSkipReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90ScoreSkipReader.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import java.util.AbstractList; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipReader.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java index da31bd75a80a..cd2febadf8f0 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import java.util.Arrays; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipWriter.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipWriter.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipWriter.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipWriter.java index 0743de226c8a..72319eff6a50 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipWriter.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipWriter.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import java.util.Arrays; @@ -92,8 +92,7 @@ public Lucene90SkipWriter( } } - public void setField( - boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) { + void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) { this.fieldHasPositions = fieldHasPositions; this.fieldHasOffsets = fieldHasOffsets; this.fieldHasPayloads = fieldHasPayloads; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/PForUtil.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/PForUtil.java index 211912142a45..69b91660d10c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/PForUtil.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import java.util.Arrays; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py rename to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py index a468c590cf9f..6c58ce1a9a0a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py @@ -40,7 +40,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene90; +package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import org.apache.lucene.store.DataInput; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java index 9393d0fa5a10..a354dc0827c0 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java @@ -18,6 +18,7 @@ import java.util.Objects; import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat; +import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompoundFormat; @@ -37,7 +38,6 @@ import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat; import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat; import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat; import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java index 21bc835b61e9..6dc0e4edb59e 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java @@ -151,17 +151,17 @@ * field names. These are used to store auxiliary information about the document, such as its * title, url, or an identifier to access a database. The set of stored fields are what is * returned for each hit when searching. This is keyed by document number. - *
One might think to use total term frequency to track how many positions are left to read + * as we decode the blocks, and decode the last block differently when num_left_positions < + * BLOCK_SIZE. Unfortunately this won't work since the tracking will be messed up when we skip + * blocks as the skipper will only tell us new position offset (start of block) and number of + * positions to skip for that block, without telling us how many positions it has skipped. + */ + public long lastPosBlockOffset; + + /** + * docid when there is a single pulsed posting, otherwise -1. freq is always implicitly + * totalTermFreq in this case. + */ + public int singletonDocID; + + /** Sole constructor. */ + public IntBlockTermState() { + skipOffset = -1; + lastPosBlockOffset = -1; + singletonDocID = -1; + } + + @Override + public IntBlockTermState clone() { + IntBlockTermState other = new IntBlockTermState(); + other.copyFrom(this); + return other; + } + + @Override + public void copyFrom(TermState _other) { + super.copyFrom(_other); + IntBlockTermState other = (IntBlockTermState) _other; + docStartFP = other.docStartFP; + posStartFP = other.posStartFP; + payStartFP = other.payStartFP; + lastPosBlockOffset = other.lastPosBlockOffset; + skipOffset = other.skipOffset; + singletonDocID = other.singletonDocID; + } + + @Override + public String toString() { + return super.toString() + + " docStartFP=" + + docStartFP + + " posStartFP=" + + posStartFP + + " payStartFP=" + + payStartFP + + " lastPosBlockOffset=" + + lastPosBlockOffset + + " singletonDocID=" + + singletonDocID; + } + } +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java index f9bac90b906d..ec775b513cd6 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java @@ -16,8 +16,10 @@ */ package org.apache.lucene.backward_codecs.lucene91; +import org.apache.lucene.backward_codecs.lucene90.Lucene90RWPostingsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90RWSegmentInfoFormat; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; @@ -32,6 +34,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { } }; private final SegmentInfoFormat segmentInfosFormat = new Lucene90RWSegmentInfoFormat(); + private final PostingsFormat postingsFormat = new Lucene90RWPostingsFormat(); public Lucene91RWCodec() { this.defaultKnnVectorsFormat = @@ -49,4 +52,9 @@ public KnnVectorsFormat knnVectorsFormat() { public SegmentInfoFormat segmentInfoFormat() { return segmentInfosFormat; } + + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return postingsFormat; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92RWCodec.java index 6008fa5df399..8d0867874130 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92RWCodec.java @@ -16,8 +16,10 @@ */ package org.apache.lucene.backward_codecs.lucene92; +import org.apache.lucene.backward_codecs.lucene90.Lucene90RWPostingsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90RWSegmentInfoFormat; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; @@ -33,6 +35,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { } }; private final SegmentInfoFormat segmentInfosFormat = new Lucene90RWSegmentInfoFormat(); + private final PostingsFormat postingsFormat = new Lucene90RWPostingsFormat(); /** Instantiates a new codec. */ public Lucene92RWCodec() { @@ -51,4 +54,9 @@ public final KnnVectorsFormat knnVectorsFormat() { public SegmentInfoFormat segmentInfoFormat() { return segmentInfosFormat; } + + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return postingsFormat; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/Lucene94RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/Lucene94RWCodec.java index 88edd2431f87..678dec4ad190 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/Lucene94RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/Lucene94RWCodec.java @@ -16,8 +16,10 @@ */ package org.apache.lucene.backward_codecs.lucene94; +import org.apache.lucene.backward_codecs.lucene90.Lucene90RWPostingsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90RWSegmentInfoFormat; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; @@ -32,8 +34,8 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { return defaultKnnVectorsFormat; } }; - private final SegmentInfoFormat segmentInfosFormat = new Lucene90RWSegmentInfoFormat(); + private final PostingsFormat postingsFormat = new Lucene90RWPostingsFormat(); /** Instantiates a new codec. */ public Lucene94RWCodec() { @@ -52,4 +54,9 @@ public final KnnVectorsFormat knnVectorsFormat() { public SegmentInfoFormat segmentInfoFormat() { return segmentInfosFormat; } + + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return postingsFormat; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/Lucene95RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/Lucene95RWCodec.java index c8311dfd1158..e51a2a391ddf 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/Lucene95RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/Lucene95RWCodec.java @@ -16,9 +16,11 @@ */ package org.apache.lucene.backward_codecs.lucene95; +import org.apache.lucene.backward_codecs.lucene90.Lucene90RWPostingsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90RWSegmentInfoFormat; import org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsFormat; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; @@ -33,6 +35,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { return defaultKnnVectorsFormat; } }; + private final PostingsFormat postingsFormat = new Lucene90RWPostingsFormat(); private final SegmentInfoFormat segmentInfosFormat = new Lucene90RWSegmentInfoFormat(); /** Instantiates a new codec. */ @@ -52,4 +55,9 @@ public final KnnVectorsFormat knnVectorsFormat() { public SegmentInfoFormat segmentInfoFormat() { return segmentInfosFormat; } + + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return postingsFormat; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java index 5ee3451c9e89..d389372d8f85 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java @@ -22,14 +22,14 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsWriter; import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsReader; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; -/** Uses {@link OrdsBlockTreeTermsWriter} with {@link Lucene90PostingsWriter}. */ +/** Uses {@link OrdsBlockTreeTermsWriter} with {@link Lucene99PostingsWriter}. */ public class BlockTreeOrdsPostingsFormat extends PostingsFormat { private final int minTermBlockSize; @@ -67,7 +67,7 @@ public String toString() { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state); + PostingsWriterBase postingsWriter = new Lucene99PostingsWriter(state); boolean success = false; try { @@ -84,7 +84,7 @@ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new Lucene90PostingsReader(state); + PostingsReaderBase postingsReader = new Lucene99PostingsReader(state); boolean success = false; try { FieldsProducer ret = new OrdsBlockTreeTermsReader(postingsReader, state); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index c243dd0c0eea..4c355f314f13 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -24,7 +24,7 @@ import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat; import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; @@ -54,7 +54,7 @@ // - or: longer dense skip lists than just next byte? /** - * Wraps {@link Lucene90PostingsFormat} format for on-disk storage, but then at read time loads and + * Wraps {@link Lucene99PostingsFormat} format for on-disk storage, but then at read time loads and * stores all terms and postings directly in RAM as byte[], int[]. * *
WARNING: This is exceptionally RAM intensive: it makes no effort to compress the @@ -97,12 +97,12 @@ public DirectPostingsFormat(int minSkipCount, int lowFreqCutoff) { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - return PostingsFormat.forName("Lucene90").fieldsConsumer(state); + return PostingsFormat.forName("Lucene99").fieldsConsumer(state); } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - FieldsProducer postings = PostingsFormat.forName("Lucene90").fieldsProducer(state); + FieldsProducer postings = PostingsFormat.forName("Lucene99").fieldsProducer(state); if (state.context.context != IOContext.Context.MERGE) { FieldsProducer loadedPostings; try { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java index 4e0fec1587f4..0a18b75ac003 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java @@ -22,8 +22,8 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsReader; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; @@ -41,7 +41,7 @@ public String toString() { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state); + PostingsWriterBase postingsWriter = new Lucene99PostingsWriter(state); boolean success = false; try { @@ -57,7 +57,7 @@ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new Lucene90PostingsReader(state); + PostingsReaderBase postingsReader = new Lucene99PostingsReader(state); boolean success = false; try { FieldsProducer ret = new FSTTermsReader(state, postingsReader); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java index 6881aedbd50f..ae01216ccf69 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java @@ -17,13 +17,13 @@ package org.apache.lucene.codecs.uniformsplit; -import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.BLOCK_SIZE; import java.io.IOException; import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.IntBlockTermState; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsReader; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsWriter; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.TermState; @@ -34,7 +34,7 @@ /** * {@link TermState} serializer which encodes each file pointer as a delta relative to a base file - * pointer. It differs from {@link Lucene90PostingsWriter#encodeTerm} which encodes each file + * pointer. It differs from {@link Lucene99PostingsWriter#encodeTerm} which encodes each file * pointer as a delta relative to the previous file pointer. * *
It automatically sets the base file pointer to the first valid file pointer for doc start FP, @@ -95,7 +95,7 @@ public long getBasePayStartFP() { /** * Writes a {@link BlockTermState} to the provided {@link DataOutput}. * - *
Simpler variant of {@link Lucene90PostingsWriter#encodeTerm(DataOutput, FieldInfo, + *
Simpler variant of {@link Lucene99PostingsWriter#encodeTerm(DataOutput, FieldInfo, * BlockTermState, boolean)}. */ public void writeTermState( @@ -148,7 +148,7 @@ public void writeTermState( /** * Reads a {@link BlockTermState} from the provided {@link DataInput}. * - *
Simpler variant of {@link Lucene90PostingsReader#decodeTerm(DataInput, FieldInfo, + *
Simpler variant of {@link Lucene99PostingsReader#decodeTerm(DataInput, FieldInfo, * BlockTermState, boolean)}. * * @param reuse {@link BlockTermState} to reuse; or null to create a new one. diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitPostingsFormat.java index 5ac38e78b8f6..8e2b7bb7d540 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitPostingsFormat.java @@ -23,8 +23,8 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsReader; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; @@ -113,7 +113,7 @@ protected UniformSplitPostingsFormat( @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state); + PostingsWriterBase postingsWriter = new Lucene99PostingsWriter(state); boolean success = false; try { FieldsConsumer termsWriter = @@ -130,7 +130,7 @@ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new Lucene90PostingsReader(state); + PostingsReaderBase postingsReader = new Lucene99PostingsReader(state); boolean success = false; try { FieldsProducer termsReader = diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/package-info.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/package-info.java index f1df01d09b2a..a0fa67508f39 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/package-info.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/package-info.java @@ -28,7 +28,7 @@ * org.apache.lucene.search.PhraseQuery}) *
field
.
*
- * The default implementation always returns "Lucene90". + *
The default implementation always returns "Lucene99". * *
WARNING: if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
@@ -164,7 +164,7 @@ public PostingsFormat getPostingsFormatForField(String field) {
* Returns the docvalues format that should be used for writing new segments of field
* .
*
- *
The default implementation always returns "Lucene90". + *
The default implementation always returns "Lucene99". * *
WARNING: if you subclass, you are responsible for index backwards compatibility: * future version of Lucene are only guaranteed to be able to read the default implementation. diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java new file mode 100644 index 000000000000..f233276c6c53 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java @@ -0,0 +1,518 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene99; + +import java.io.IOException; +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.MultiLevelSkipListWriter; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader; +import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.TermState; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.packed.PackedInts; + +/** + * Lucene 9.9 postings format, which encodes postings in packed integer blocks for fast decode. + * + *
Basic idea: + * + *
In packed blocks, integers are encoded with the same bit width ({@link PackedInts packed + * format}): the block size (i.e. number of integers inside block) is fixed (currently 128). + * Additionally blocks that are all the same value are encoded in an optimized way. + *
In VInt blocks, integers are encoded as {@link DataOutput#writeVInt VInt}: the block + * size is variable. + *
When the postings are long enough, Lucene99PostingsFormat will try to encode most + * integer data as a packed block. + *
Take a term with 259 documents as an example, the first 256 document ids are encoded as + * two packed blocks, while the remaining 3 are encoded as one VInt block. + *
Different kinds of data are always encoded separately into different packed blocks, but + * may possibly be interleaved into the same VInt block. + *
This strategy is applied to pairs: <document number, frequency>, <position, + * payload length>, <position, offset start, offset length>, and <position, + * payload length, offsetstart, offset length>. + *
The structure of skip table is quite similar to previous version of Lucene. Skip + * interval is the same as block size, and each skip entry points to the beginning of each + * block. However, for the first block, skip data is omitted. + *
A position is an integer indicating where the term occurs within one document. A payload + * is a blob of metadata associated with current position. An offset is a pair of integers + * indicating the tokenized start/end offsets for given term in current position: it is + * essentially a specialized payload. + *
When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets + * (assuming a null payload contributes one count). As mentioned in block structure, it is + * possible to encode these three either combined or separately. + *
In all cases, payloads and offsets are stored together. When encoded as a packed block, + * position data is separated out as .pos, while payloads and offsets are encoded in .pay + * (payload metadata will also be stored directly in .pay). When encoded as VInt blocks, all + * these three are stored interleaved into the .pos (so is payload metadata). + *
With this strategy, the majority of payload and offset data will be outside .pos file. + * So for queries that require only position data, running on a full index with payloads and + * offsets, this reduces disk pre-fetches. + *
Files and detailed format: + * + *
.tim
: Term Dictionary
+ * .tip
: Term Index
+ * .doc
: Frequencies and Skip Data
+ * .pos
: Positions
+ * .pay
: Payloads and Offsets
+ * The .tim file contains the list of terms in each field along with per-term statistics + * (such as docfreq) and pointers to the frequencies, positions, payload and skip data in the + * .doc, .pos, and .pay files. See {@link Lucene90BlockTreeTermsWriter} for more details on + * the format. + *
NOTE: The term dictionary can plug into different postings implementations: the postings + * writer/reader are actually responsible for encoding and decoding the PostingsHeader and + * TermMetadata sections described here: + *
Notes: + *
The .tip file contains an index into the term dictionary, so that it can be accessed + * randomly. See {@link Lucene90BlockTreeTermsWriter} for more details on the format. + *
The .doc file contains the lists of documents which contain each term, along with the + * frequency of the term in that document (except when frequencies are omitted: {@link + * IndexOptions#DOCS}). It also saves skip data to the beginning of each packed or VInt block, + * when the length of document list is larger than packed block size. + *
Notes: + *
DocDelta: if frequencies are indexed, this determines both the document number and + * the frequency. In particular, DocDelta/2 is the difference between this document + * number and the previous document number (or zero when this is the first document in a + * TermFreqs). When DocDelta is odd, the frequency is one. When DocDelta is even, the + * frequency is read as another VInt. If frequencies are omitted, DocDelta contains the + * gap (not multiplied by 2) between document numbers and no frequency information is + * stored. + *
For example, the TermFreqs for a term which occurs once in document seven and + * three times in document eleven, with frequencies indexed, would be the following + * sequence of VInts: + *
15, 8, 3 + *
If frequencies were omitted ({@link IndexOptions#DOCS}) it would be this sequence + * of VInts instead: + *
7,4 + *
The .pos file contains the lists of positions that each term occurs at within documents. + * It also sometimes stores part of payloads and offsets for speedup. + *
Notes: + *
4, 5, 4 + *
The .pay file will store payloads and offsets associated with certain term-document + * positions. Some payloads and offsets will be separated out into .pos file, for performance + * reasons. + *
Notes: + *
One might think to use total term frequency to track how many positions are left to read + * as we decode the blocks, and decode the last block differently when num_left_positions < + * BLOCK_SIZE. Unfortunately this won't work since the tracking will be messed up when we skip + * blocks as the skipper will only tell us new position offset (start of block) and number of + * positions to skip for that block, without telling us how many positions it has skipped. + */ + public long lastPosBlockOffset; + + /** + * docid when there is a single pulsed posting, otherwise -1. freq is always implicitly + * totalTermFreq in this case. + */ + public int singletonDocID; + + /** Sole constructor. */ + public IntBlockTermState() { + skipOffset = -1; + lastPosBlockOffset = -1; + singletonDocID = -1; + } + + @Override + public IntBlockTermState clone() { + IntBlockTermState other = new IntBlockTermState(); + other.copyFrom(this); + return other; + } + + @Override + public void copyFrom(TermState _other) { + super.copyFrom(_other); + IntBlockTermState other = (IntBlockTermState) _other; + docStartFP = other.docStartFP; + posStartFP = other.posStartFP; + payStartFP = other.payStartFP; + lastPosBlockOffset = other.lastPosBlockOffset; + skipOffset = other.skipOffset; + singletonDocID = other.singletonDocID; + } + + @Override + public String toString() { + return super.toString() + + " docStartFP=" + + docStartFP + + " posStartFP=" + + posStartFP + + " payStartFP=" + + payStartFP + + " lastPosBlockOffset=" + + lastPosBlockOffset + + " singletonDocID=" + + singletonDocID; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java new file mode 100644 index 000000000000..77e50e23ce86 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java @@ -0,0 +1,2087 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene99; + +import static org.apache.lucene.codecs.lucene99.ForUtil.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.DOC_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.MAX_SKIP_LEVELS; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.PAY_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.POS_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.TERMS_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.VERSION_CURRENT; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.VERSION_START; + +import java.io.IOException; +import java.util.Arrays; +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.IntBlockTermState; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.Impacts; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SlowImpactsEnum; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format. + * + * @lucene.experimental + */ +public final class Lucene99PostingsReader extends PostingsReaderBase { + + private final IndexInput docIn; + private final IndexInput posIn; + private final IndexInput payIn; + + private final int version; + + /** Sole constructor. */ + public Lucene99PostingsReader(SegmentReadState state) throws IOException { + boolean success = false; + IndexInput docIn = null; + IndexInput posIn = null; + IndexInput payIn = null; + + // NOTE: these data files are too costly to verify checksum against all the bytes on open, + // but for now we at least verify proper structure of the checksum footer: which looks + // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption + // such as file truncation. + + String docName = + IndexFileNames.segmentFileName( + state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.DOC_EXTENSION); + try { + docIn = state.directory.openInput(docName, state.context); + version = + CodecUtil.checkIndexHeader( + docIn, + DOC_CODEC, + VERSION_START, + VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix); + CodecUtil.retrieveChecksum(docIn); + + if (state.fieldInfos.hasProx()) { + String proxName = + IndexFileNames.segmentFileName( + state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.POS_EXTENSION); + posIn = state.directory.openInput(proxName, state.context); + CodecUtil.checkIndexHeader( + posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix); + CodecUtil.retrieveChecksum(posIn); + + if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { + String payName = + IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + Lucene99PostingsFormat.PAY_EXTENSION); + payIn = state.directory.openInput(payName, state.context); + CodecUtil.checkIndexHeader( + payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix); + CodecUtil.retrieveChecksum(payIn); + } + } + + this.docIn = docIn; + this.posIn = posIn; + this.payIn = payIn; + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(docIn, posIn, payIn); + } + } + } + + @Override + public void init(IndexInput termsIn, SegmentReadState state) throws IOException { + // Make sure we are talking to the matching postings writer + CodecUtil.checkIndexHeader( + termsIn, + TERMS_CODEC, + VERSION_START, + VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix); + final int indexBlockSize = termsIn.readVInt(); + if (indexBlockSize != BLOCK_SIZE) { + throw new IllegalStateException( + "index-time BLOCK_SIZE (" + + indexBlockSize + + ") != read-time BLOCK_SIZE (" + + BLOCK_SIZE + + ")"); + } + } + + /** Read values that have been written using variable-length encoding instead of bit-packing. */ + static void readVIntBlock( + IndexInput docIn, long[] docBuffer, long[] freqBuffer, int num, boolean indexHasFreq) + throws IOException { + if (indexHasFreq) { + for (int i = 0; i < num; i++) { + final int code = docIn.readVInt(); + docBuffer[i] = code >>> 1; + if ((code & 1) != 0) { + freqBuffer[i] = 1; + } else { + freqBuffer[i] = docIn.readVInt(); + } + } + } else { + for (int i = 0; i < num; i++) { + docBuffer[i] = docIn.readVInt(); + } + } + } + + static void prefixSum(long[] buffer, int count, long base) { + buffer[0] += base; + for (int i = 1; i < count; ++i) { + buffer[i] += buffer[i - 1]; + } + } + + static int findFirstGreater(long[] buffer, int target, int from) { + for (int i = from; i < BLOCK_SIZE; ++i) { + if (buffer[i] >= target) { + return i; + } + } + return BLOCK_SIZE; + } + + @Override + public BlockTermState newTermState() { + return new IntBlockTermState(); + } + + @Override + public void close() throws IOException { + IOUtils.close(docIn, posIn, payIn); + } + + @Override + public void decodeTerm( + DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) + throws IOException { + final IntBlockTermState termState = (IntBlockTermState) _termState; + final boolean fieldHasPositions = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + final boolean fieldHasOffsets = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + final boolean fieldHasPayloads = fieldInfo.hasPayloads(); + + if (absolute) { + termState.docStartFP = 0; + termState.posStartFP = 0; + termState.payStartFP = 0; + } + + final long l = in.readVLong(); + if ((l & 0x01) == 0) { + termState.docStartFP += l >>> 1; + if (termState.docFreq == 1) { + termState.singletonDocID = in.readVInt(); + } else { + termState.singletonDocID = -1; + } + } else { + assert absolute == false; + assert termState.singletonDocID != -1; + termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1); + } + + if (fieldHasPositions) { + termState.posStartFP += in.readVLong(); + if (fieldHasOffsets || fieldHasPayloads) { + termState.payStartFP += in.readVLong(); + } + if (termState.totalTermFreq > BLOCK_SIZE) { + termState.lastPosBlockOffset = in.readVLong(); + } else { + termState.lastPosBlockOffset = -1; + } + } + + if (termState.docFreq > BLOCK_SIZE) { + termState.skipOffset = in.readVLong(); + } else { + termState.skipOffset = -1; + } + } + + @Override + public PostingsEnum postings( + FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) + throws IOException { + + boolean indexHasPositions = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + + if (indexHasPositions == false + || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) { + BlockDocsEnum docsEnum; + if (reuse instanceof BlockDocsEnum) { + docsEnum = (BlockDocsEnum) reuse; + if (!docsEnum.canReuse(docIn, fieldInfo)) { + docsEnum = new BlockDocsEnum(fieldInfo); + } + } else { + docsEnum = new BlockDocsEnum(fieldInfo); + } + return docsEnum.reset((IntBlockTermState) termState, flags); + } else { + EverythingEnum everythingEnum; + if (reuse instanceof EverythingEnum) { + everythingEnum = (EverythingEnum) reuse; + if (!everythingEnum.canReuse(docIn, fieldInfo)) { + everythingEnum = new EverythingEnum(fieldInfo); + } + } else { + everythingEnum = new EverythingEnum(fieldInfo); + } + return everythingEnum.reset((IntBlockTermState) termState, flags); + } + } + + @Override + public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) + throws IOException { + if (state.docFreq <= BLOCK_SIZE) { + // no skip data + return new SlowImpactsEnum(postings(fieldInfo, state, null, flags)); + } + + final boolean indexHasPositions = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + final boolean indexHasOffsets = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + final boolean indexHasPayloads = fieldInfo.hasPayloads(); + + if (indexHasPositions == false + || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) { + return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state); + } + + if (indexHasPositions + && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) + && (indexHasOffsets == false + || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) + && (indexHasPayloads == false + || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) { + return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state); + } + + return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags); + } + + final class BlockDocsEnum extends PostingsEnum { + + final ForUtil forUtil = new ForUtil(); + final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil); + final PForUtil pforUtil = new PForUtil(forUtil); + + private final long[] docBuffer = new long[BLOCK_SIZE + 1]; + private final long[] freqBuffer = new long[BLOCK_SIZE]; + + private int docBufferUpto; + + private Lucene99SkipReader skipper; + private boolean skipped; + + final IndexInput startDocIn; + + IndexInput docIn; + final boolean indexHasFreq; + final boolean indexHasPos; + final boolean indexHasOffsets; + final boolean indexHasPayloads; + + private int docFreq; // number of docs in this posting list + private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted) + private int blockUpto; // number of docs in or before the current block + private int doc; // doc we last read + private long accum; // accumulator for doc deltas + + // Where this term's postings start in the .doc file: + private long docTermStartFP; + + // Where this term's skip data starts (after + // docTermStartFP) in the .doc file (or -1 if there is + // no skip data for this term): + private long skipOffset; + + // docID for next skip point, we won't use skipper if + // target docID is not larger than this + private int nextSkipDoc; + + private boolean needsFreq; // true if the caller actually needs frequencies + // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block + // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer + // (needsFreq=false) + private boolean isFreqsRead; + private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 + + public BlockDocsEnum(FieldInfo fieldInfo) throws IOException { + this.startDocIn = Lucene99PostingsReader.this.docIn; + this.docIn = null; + indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + indexHasPos = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + indexHasOffsets = + fieldInfo + .getIndexOptions() + .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + indexHasPayloads = fieldInfo.hasPayloads(); + // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in + // advance() + docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; + } + + public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { + return docIn == startDocIn + && indexHasFreq + == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) + && indexHasPos + == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) + >= 0) + && indexHasPayloads == fieldInfo.hasPayloads(); + } + + public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException { + docFreq = termState.docFreq; + totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq; + docTermStartFP = termState.docStartFP; + skipOffset = termState.skipOffset; + singletonDocID = termState.singletonDocID; + if (docFreq > 1) { + if (docIn == null) { + // lazy init + docIn = startDocIn.clone(); + } + docIn.seek(docTermStartFP); + } + + doc = -1; + this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS); + this.isFreqsRead = true; + if (indexHasFreq == false || needsFreq == false) { + for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) { + freqBuffer[i] = 1; + } + } + accum = 0; + blockUpto = 0; + nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block + docBufferUpto = BLOCK_SIZE; + skipped = false; + return this; + } + + @Override + public int freq() throws IOException { + if (isFreqsRead == false) { + pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block + isFreqsRead = true; + } + return (int) freqBuffer[docBufferUpto - 1]; + } + + @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override + public int startOffset() throws IOException { + return -1; + } + + @Override + public int endOffset() throws IOException { + return -1; + } + + @Override + public BytesRef getPayload() throws IOException { + return null; + } + + @Override + public int docID() { + return doc; + } + + private void refillDocs() throws IOException { + // Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn + // after it + if (isFreqsRead == false) { + pforUtil.skip(docIn); + isFreqsRead = true; + } + + final int left = docFreq - blockUpto; + assert left >= 0; + + if (left >= BLOCK_SIZE) { + forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); + + if (indexHasFreq) { + if (needsFreq) { + isFreqsRead = false; + } else { + pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all + } + } + blockUpto += BLOCK_SIZE; + } else if (docFreq == 1) { + docBuffer[0] = singletonDocID; + freqBuffer[0] = totalTermFreq; + docBuffer[1] = NO_MORE_DOCS; + blockUpto++; + } else { + // Read vInts: + readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq); + prefixSum(docBuffer, left, accum); + docBuffer[left] = NO_MORE_DOCS; + blockUpto += left; + } + accum = docBuffer[BLOCK_SIZE - 1]; + docBufferUpto = 0; + assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; + } + + @Override + public int nextDoc() throws IOException { + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); // we don't need to load freqBuffer for now (will be loaded later if + // necessary) + } + + doc = (int) docBuffer[docBufferUpto]; + docBufferUpto++; + return doc; + } + + @Override + public int advance(int target) throws IOException { + // current skip docID < docIDs generated from current buffer <= next skip docID + // we don't need to skip if target is buffered already + if (docFreq > BLOCK_SIZE && target > nextSkipDoc) { + + if (skipper == null) { + // Lazy init: first time this enum has ever been used for skipping + skipper = + new Lucene99SkipReader( + docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads); + } + + if (!skipped) { + assert skipOffset != -1; + // This is the first time this enum has skipped + // since reset() was called; load the skip data: + skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq); + skipped = true; + } + + // always plus one to fix the result, since skip position in Lucene99SkipReader + // is a little different from MultiLevelSkipListReader + final int newDocUpto = skipper.skipTo(target) + 1; + + if (newDocUpto >= blockUpto) { + // Skipper moved + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + blockUpto = newDocUpto; + + // Force to read next block + docBufferUpto = BLOCK_SIZE; + accum = skipper.getDoc(); // actually, this is just lastSkipEntry + docIn.seek(skipper.getDocPointer()); // now point to the block we want to search + // even if freqBuffer were not read from the previous block, we will mark them as read, + // as we don't need to skip the previous block freqBuffer in refillDocs, + // as we have already positioned docIn where in needs to be. + isFreqsRead = true; + } + // next time we call advance, this is used to + // foresee whether skipper is necessary. + nextSkipDoc = skipper.getNextSkipDoc(); + } + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); + } + + // Now scan... this is an inlined/pared down version + // of nextDoc(): + long doc; + while (true) { + doc = docBuffer[docBufferUpto]; + + if (doc >= target) { + break; + } + ++docBufferUpto; + } + + docBufferUpto++; + return this.doc = (int) doc; + } + + @Override + public long cost() { + return docFreq; + } + } + + // Also handles payloads + offsets + final class EverythingEnum extends PostingsEnum { + + final ForUtil forUtil = new ForUtil(); + final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil); + final PForUtil pforUtil = new PForUtil(forUtil); + + private final long[] docBuffer = new long[BLOCK_SIZE + 1]; + private final long[] freqBuffer = new long[BLOCK_SIZE + 1]; + private final long[] posDeltaBuffer = new long[BLOCK_SIZE]; + + private final long[] payloadLengthBuffer; + private final long[] offsetStartDeltaBuffer; + private final long[] offsetLengthBuffer; + + private byte[] payloadBytes; + private int payloadByteUpto; + private int payloadLength; + + private int lastStartOffset; + private int startOffset; + private int endOffset; + + private int docBufferUpto; + private int posBufferUpto; + + private Lucene99SkipReader skipper; + private boolean skipped; + + final IndexInput startDocIn; + + IndexInput docIn; + final IndexInput posIn; + final IndexInput payIn; + final BytesRef payload; + + final boolean indexHasOffsets; + final boolean indexHasPayloads; + + private int docFreq; // number of docs in this posting list + private long totalTermFreq; // number of positions in this posting list + private int blockUpto; // number of docs in or before the current block + private int doc; // doc we last read + private long accum; // accumulator for doc deltas + private int freq; // freq we last read + private int position; // current position + + // how many positions "behind" we are; nextPosition must + // skip these to "catch up": + private int posPendingCount; + + // Lazy pos seek: if != -1 then we must seek to this FP + // before reading positions: + private long posPendingFP; + + // Lazy pay seek: if != -1 then we must seek to this FP + // before reading payloads/offsets: + private long payPendingFP; + + // Where this term's postings start in the .doc file: + private long docTermStartFP; + + // Where this term's postings start in the .pos file: + private long posTermStartFP; + + // Where this term's payloads/offsets start in the .pay + // file: + private long payTermStartFP; + + // File pointer where the last (vInt encoded) pos delta + // block is. We need this to know whether to bulk + // decode vs vInt decode the block: + private long lastPosBlockFP; + + // Where this term's skip data starts (after + // docTermStartFP) in the .doc file (or -1 if there is + // no skip data for this term): + private long skipOffset; + + private int nextSkipDoc; + + private boolean needsOffsets; // true if we actually need offsets + private boolean needsPayloads; // true if we actually need payloads + private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 + + public EverythingEnum(FieldInfo fieldInfo) throws IOException { + indexHasOffsets = + fieldInfo + .getIndexOptions() + .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + indexHasPayloads = fieldInfo.hasPayloads(); + + this.startDocIn = Lucene99PostingsReader.this.docIn; + this.docIn = null; + this.posIn = Lucene99PostingsReader.this.posIn.clone(); + if (indexHasOffsets || indexHasPayloads) { + this.payIn = Lucene99PostingsReader.this.payIn.clone(); + } else { + this.payIn = null; + } + if (indexHasOffsets) { + offsetStartDeltaBuffer = new long[BLOCK_SIZE]; + offsetLengthBuffer = new long[BLOCK_SIZE]; + } else { + offsetStartDeltaBuffer = null; + offsetLengthBuffer = null; + startOffset = -1; + endOffset = -1; + } + + if (indexHasPayloads) { + payloadLengthBuffer = new long[BLOCK_SIZE]; + payloadBytes = new byte[128]; + payload = new BytesRef(); + } else { + payloadLengthBuffer = null; + payloadBytes = null; + payload = null; + } + + // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in + // advance() + docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; + } + + public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { + return docIn == startDocIn + && indexHasOffsets + == (fieldInfo + .getIndexOptions() + .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0) + && indexHasPayloads == fieldInfo.hasPayloads(); + } + + public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOException { + docFreq = termState.docFreq; + docTermStartFP = termState.docStartFP; + posTermStartFP = termState.posStartFP; + payTermStartFP = termState.payStartFP; + skipOffset = termState.skipOffset; + totalTermFreq = termState.totalTermFreq; + singletonDocID = termState.singletonDocID; + if (docFreq > 1) { + if (docIn == null) { + // lazy init + docIn = startDocIn.clone(); + } + docIn.seek(docTermStartFP); + } + posPendingFP = posTermStartFP; + payPendingFP = payTermStartFP; + posPendingCount = 0; + if (termState.totalTermFreq < BLOCK_SIZE) { + lastPosBlockFP = posTermStartFP; + } else if (termState.totalTermFreq == BLOCK_SIZE) { + lastPosBlockFP = -1; + } else { + lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; + } + + this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS); + this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); + + doc = -1; + accum = 0; + blockUpto = 0; + if (docFreq > BLOCK_SIZE) { + nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block + } else { + nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping + } + docBufferUpto = BLOCK_SIZE; + skipped = false; + return this; + } + + @Override + public int freq() throws IOException { + return freq; + } + + @Override + public int docID() { + return doc; + } + + private void refillDocs() throws IOException { + final int left = docFreq - blockUpto; + assert left >= 0; + + if (left >= BLOCK_SIZE) { + forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); + pforUtil.decode(docIn, freqBuffer); + blockUpto += BLOCK_SIZE; + } else if (docFreq == 1) { + docBuffer[0] = singletonDocID; + freqBuffer[0] = totalTermFreq; + docBuffer[1] = NO_MORE_DOCS; + blockUpto++; + } else { + readVIntBlock(docIn, docBuffer, freqBuffer, left, true); + prefixSum(docBuffer, left, accum); + docBuffer[left] = NO_MORE_DOCS; + blockUpto += left; + } + accum = docBuffer[BLOCK_SIZE - 1]; + docBufferUpto = 0; + assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; + } + + private void refillPositions() throws IOException { + if (posIn.getFilePointer() == lastPosBlockFP) { + final int count = (int) (totalTermFreq % BLOCK_SIZE); + int payloadLength = 0; + int offsetLength = 0; + payloadByteUpto = 0; + for (int i = 0; i < count; i++) { + int code = posIn.readVInt(); + if (indexHasPayloads) { + if ((code & 1) != 0) { + payloadLength = posIn.readVInt(); + } + payloadLengthBuffer[i] = payloadLength; + posDeltaBuffer[i] = code >>> 1; + if (payloadLength != 0) { + if (payloadByteUpto + payloadLength > payloadBytes.length) { + payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength); + } + posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength); + payloadByteUpto += payloadLength; + } + } else { + posDeltaBuffer[i] = code; + } + + if (indexHasOffsets) { + int deltaCode = posIn.readVInt(); + if ((deltaCode & 1) != 0) { + offsetLength = posIn.readVInt(); + } + offsetStartDeltaBuffer[i] = deltaCode >>> 1; + offsetLengthBuffer[i] = offsetLength; + } + } + payloadByteUpto = 0; + } else { + pforUtil.decode(posIn, posDeltaBuffer); + + if (indexHasPayloads) { + if (needsPayloads) { + pforUtil.decode(payIn, payloadLengthBuffer); + int numBytes = payIn.readVInt(); + + if (numBytes > payloadBytes.length) { + payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes); + } + payIn.readBytes(payloadBytes, 0, numBytes); + } else { + // this works, because when writing a vint block we always force the first length to be + // written + pforUtil.skip(payIn); // skip over lengths + int numBytes = payIn.readVInt(); // read length of payloadBytes + payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes + } + payloadByteUpto = 0; + } + + if (indexHasOffsets) { + if (needsOffsets) { + pforUtil.decode(payIn, offsetStartDeltaBuffer); + pforUtil.decode(payIn, offsetLengthBuffer); + } else { + // this works, because when writing a vint block we always force the first length to be + // written + pforUtil.skip(payIn); // skip over starts + pforUtil.skip(payIn); // skip over lengths + } + } + } + } + + @Override + public int nextDoc() throws IOException { + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); + } + + doc = (int) docBuffer[docBufferUpto]; + freq = (int) freqBuffer[docBufferUpto]; + posPendingCount += freq; + docBufferUpto++; + + position = 0; + lastStartOffset = 0; + return doc; + } + + @Override + public int advance(int target) throws IOException { + if (target > nextSkipDoc) { + if (skipper == null) { + // Lazy init: first time this enum has ever been used for skipping + skipper = + new Lucene99SkipReader( + docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads); + } + + if (!skipped) { + assert skipOffset != -1; + // This is the first time this enum has skipped + // since reset() was called; load the skip data: + skipper.init( + docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq); + skipped = true; + } + + final int newDocUpto = skipper.skipTo(target) + 1; + + if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) { + // Skipper moved + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + blockUpto = newDocUpto; + + // Force to read next block + docBufferUpto = BLOCK_SIZE; + accum = skipper.getDoc(); + docIn.seek(skipper.getDocPointer()); + posPendingFP = skipper.getPosPointer(); + payPendingFP = skipper.getPayPointer(); + posPendingCount = skipper.getPosBufferUpto(); + lastStartOffset = 0; // new document + payloadByteUpto = skipper.getPayloadByteUpto(); + } + nextSkipDoc = skipper.getNextSkipDoc(); + } + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); + } + + // Now scan: + long doc; + while (true) { + doc = docBuffer[docBufferUpto]; + freq = (int) freqBuffer[docBufferUpto]; + posPendingCount += freq; + docBufferUpto++; + + if (doc >= target) { + break; + } + } + + position = 0; + lastStartOffset = 0; + return this.doc = (int) doc; + } + + // TODO: in theory we could avoid loading frq block + // when not needed, ie, use skip data to load how far to + // seek the pos pointer ... instead of having to load frq + // blocks only to sum up how many positions to skip + private void skipPositions() throws IOException { + // Skip positions now: + int toSkip = posPendingCount - freq; + // if (DEBUG) { + // System.out.println(" FPR.skipPositions: toSkip=" + toSkip); + // } + + final int leftInBlock = BLOCK_SIZE - posBufferUpto; + if (toSkip < leftInBlock) { + int end = posBufferUpto + toSkip; + while (posBufferUpto < end) { + if (indexHasPayloads) { + payloadByteUpto += payloadLengthBuffer[posBufferUpto]; + } + posBufferUpto++; + } + } else { + toSkip -= leftInBlock; + while (toSkip >= BLOCK_SIZE) { + assert posIn.getFilePointer() != lastPosBlockFP; + pforUtil.skip(posIn); + + if (indexHasPayloads) { + // Skip payloadLength block: + pforUtil.skip(payIn); + + // Skip payloadBytes block: + int numBytes = payIn.readVInt(); + payIn.seek(payIn.getFilePointer() + numBytes); + } + + if (indexHasOffsets) { + pforUtil.skip(payIn); + pforUtil.skip(payIn); + } + toSkip -= BLOCK_SIZE; + } + refillPositions(); + payloadByteUpto = 0; + posBufferUpto = 0; + while (posBufferUpto < toSkip) { + if (indexHasPayloads) { + payloadByteUpto += payloadLengthBuffer[posBufferUpto]; + } + posBufferUpto++; + } + } + + position = 0; + lastStartOffset = 0; + } + + @Override + public int nextPosition() throws IOException { + assert posPendingCount > 0; + + if (posPendingFP != -1) { + posIn.seek(posPendingFP); + posPendingFP = -1; + + if (payPendingFP != -1 && payIn != null) { + payIn.seek(payPendingFP); + payPendingFP = -1; + } + + // Force buffer refill: + posBufferUpto = BLOCK_SIZE; + } + + if (posPendingCount > freq) { + skipPositions(); + posPendingCount = freq; + } + + if (posBufferUpto == BLOCK_SIZE) { + refillPositions(); + posBufferUpto = 0; + } + position += posDeltaBuffer[posBufferUpto]; + + if (indexHasPayloads) { + payloadLength = (int) payloadLengthBuffer[posBufferUpto]; + payload.bytes = payloadBytes; + payload.offset = payloadByteUpto; + payload.length = payloadLength; + payloadByteUpto += payloadLength; + } + + if (indexHasOffsets) { + startOffset = lastStartOffset + (int) offsetStartDeltaBuffer[posBufferUpto]; + endOffset = startOffset + (int) offsetLengthBuffer[posBufferUpto]; + lastStartOffset = startOffset; + } + + posBufferUpto++; + posPendingCount--; + return position; + } + + @Override + public int startOffset() { + return startOffset; + } + + @Override + public int endOffset() { + return endOffset; + } + + @Override + public BytesRef getPayload() { + if (payloadLength == 0) { + return null; + } else { + return payload; + } + } + + @Override + public long cost() { + return docFreq; + } + } + + final class BlockImpactsDocsEnum extends ImpactsEnum { + + final ForUtil forUtil = new ForUtil(); + final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil); + final PForUtil pforUtil = new PForUtil(forUtil); + + private final long[] docBuffer = new long[BLOCK_SIZE + 1]; + private final long[] freqBuffer = new long[BLOCK_SIZE]; + + private int docBufferUpto; + + private final Lucene99ScoreSkipReader skipper; + + final IndexInput docIn; + + final boolean indexHasFreqs; + + private int docFreq; // number of docs in this posting list + private int blockUpto; // number of documents in or before the current block + private int doc; // doc we last read + private long accum; // accumulator for doc deltas + + private int nextSkipDoc = -1; + + private long seekTo = -1; + + // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block + // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer + // (needsFreq=false) + private boolean isFreqsRead; + + public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState) + throws IOException { + indexHasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + final boolean indexHasPositions = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + final boolean indexHasOffsets = + fieldInfo + .getIndexOptions() + .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + final boolean indexHasPayloads = fieldInfo.hasPayloads(); + + this.docIn = Lucene99PostingsReader.this.docIn.clone(); + + docFreq = termState.docFreq; + docIn.seek(termState.docStartFP); + + doc = -1; + accum = 0; + blockUpto = 0; + docBufferUpto = BLOCK_SIZE; + + skipper = + new Lucene99ScoreSkipReader( + docIn.clone(), MAX_SKIP_LEVELS, indexHasPositions, indexHasOffsets, indexHasPayloads); + skipper.init( + termState.docStartFP + termState.skipOffset, + termState.docStartFP, + termState.posStartFP, + termState.payStartFP, + docFreq); + + // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in + // advance() + docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; + this.isFreqsRead = true; + if (indexHasFreqs == false) { + Arrays.fill(freqBuffer, 1L); + } + } + + @Override + public int freq() throws IOException { + if (isFreqsRead == false) { + pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block + isFreqsRead = true; + } + return (int) freqBuffer[docBufferUpto - 1]; + } + + @Override + public int docID() { + return doc; + } + + private void refillDocs() throws IOException { + // Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn + // after it + if (isFreqsRead == false) { + pforUtil.skip(docIn); + isFreqsRead = true; + } + + final int left = docFreq - blockUpto; + assert left >= 0; + + if (left >= BLOCK_SIZE) { + forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); + if (indexHasFreqs) { + isFreqsRead = false; + } + blockUpto += BLOCK_SIZE; + } else { + readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs); + prefixSum(docBuffer, left, accum); + docBuffer[left] = NO_MORE_DOCS; + blockUpto += left; + } + accum = docBuffer[BLOCK_SIZE - 1]; + docBufferUpto = 0; + assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; + } + + @Override + public void advanceShallow(int target) throws IOException { + if (target > nextSkipDoc) { + // always plus one to fix the result, since skip position in Lucene99SkipReader + // is a little different from MultiLevelSkipListReader + final int newDocUpto = skipper.skipTo(target) + 1; + + if (newDocUpto >= blockUpto) { + // Skipper moved + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + blockUpto = newDocUpto; + + // Force to read next block + docBufferUpto = BLOCK_SIZE; + accum = skipper.getDoc(); + seekTo = skipper.getDocPointer(); // delay the seek + } + // next time we call advance, this is used to + // foresee whether skipper is necessary. + nextSkipDoc = skipper.getNextSkipDoc(); + } + assert nextSkipDoc >= target; + } + + @Override + public Impacts getImpacts() throws IOException { + // nextDoc() doesn't advance skip lists, so it's important to do it here to make sure we're + // not returning impacts over a bigger range of doc IDs than necessary. + advanceShallow(doc); + return skipper.getImpacts(); + } + + @Override + public int nextDoc() throws IOException { + if (docBufferUpto == BLOCK_SIZE) { + if (seekTo >= 0) { + docIn.seek(seekTo); + isFreqsRead = true; // reset isFreqsRead + seekTo = -1; + } + refillDocs(); + } + return this.doc = (int) docBuffer[docBufferUpto++]; + } + + @Override + public int advance(int target) throws IOException { + if (target > nextSkipDoc) { + advanceShallow(target); + } + if (docBufferUpto == BLOCK_SIZE) { + if (seekTo >= 0) { + docIn.seek(seekTo); + isFreqsRead = true; // reset isFreqsRead + seekTo = -1; + } + refillDocs(); + } + + int next = findFirstGreater(docBuffer, target, docBufferUpto); + this.doc = (int) docBuffer[next]; + docBufferUpto = next + 1; + return doc; + } + + @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override + public int startOffset() { + return -1; + } + + @Override + public int endOffset() { + return -1; + } + + @Override + public BytesRef getPayload() { + return null; + } + + @Override + public long cost() { + return docFreq; + } + } + + final class BlockImpactsPostingsEnum extends ImpactsEnum { + final ForUtil forUtil = new ForUtil(); + final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil); + final PForUtil pforUtil = new PForUtil(forUtil); + + private final long[] docBuffer = new long[BLOCK_SIZE]; + private final long[] freqBuffer = new long[BLOCK_SIZE]; + private final long[] posDeltaBuffer = new long[BLOCK_SIZE]; + + private int docBufferUpto; + private int posBufferUpto; + + private final Lucene99ScoreSkipReader skipper; + + final IndexInput docIn; + final IndexInput posIn; + + final boolean indexHasOffsets; + final boolean indexHasPayloads; + + private int docFreq; // number of docs in this posting list + private long totalTermFreq; // number of positions in this posting list + private int docUpto; // how many docs we've read + private int doc; // doc we last read + private long accum; // accumulator for doc deltas + private int freq; // freq we last read + private int position; // current position + + // how many positions "behind" we are; nextPosition must + // skip these to "catch up": + private int posPendingCount; + + // Lazy pos seek: if != -1 then we must seek to this FP + // before reading positions: + private long posPendingFP; + + // Where this term's postings start in the .doc file: + private long docTermStartFP; + + // Where this term's postings start in the .pos file: + private long posTermStartFP; + + // Where this term's payloads/offsets start in the .pay + // file: + private long payTermStartFP; + + // File pointer where the last (vInt encoded) pos delta + // block is. We need this to know whether to bulk + // decode vs vInt decode the block: + private long lastPosBlockFP; + + private int nextSkipDoc = -1; + + private long seekTo = -1; + + public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState) + throws IOException { + indexHasOffsets = + fieldInfo + .getIndexOptions() + .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + indexHasPayloads = fieldInfo.hasPayloads(); + + this.docIn = Lucene99PostingsReader.this.docIn.clone(); + + this.posIn = Lucene99PostingsReader.this.posIn.clone(); + + docFreq = termState.docFreq; + docTermStartFP = termState.docStartFP; + posTermStartFP = termState.posStartFP; + payTermStartFP = termState.payStartFP; + totalTermFreq = termState.totalTermFreq; + docIn.seek(docTermStartFP); + posPendingFP = posTermStartFP; + posPendingCount = 0; + if (termState.totalTermFreq < BLOCK_SIZE) { + lastPosBlockFP = posTermStartFP; + } else if (termState.totalTermFreq == BLOCK_SIZE) { + lastPosBlockFP = -1; + } else { + lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; + } + + doc = -1; + accum = 0; + docUpto = 0; + docBufferUpto = BLOCK_SIZE; + + skipper = + new Lucene99ScoreSkipReader( + docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads); + skipper.init( + docTermStartFP + termState.skipOffset, + docTermStartFP, + posTermStartFP, + payTermStartFP, + docFreq); + } + + @Override + public int freq() throws IOException { + return freq; + } + + @Override + public int docID() { + return doc; + } + + private void refillDocs() throws IOException { + final int left = docFreq - docUpto; + assert left >= 0; + + if (left >= BLOCK_SIZE) { + forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); + pforUtil.decode(docIn, freqBuffer); + } else { + readVIntBlock(docIn, docBuffer, freqBuffer, left, true); + prefixSum(docBuffer, left, accum); + docBuffer[left] = NO_MORE_DOCS; + } + accum = docBuffer[BLOCK_SIZE - 1]; + docBufferUpto = 0; + } + + private void refillPositions() throws IOException { + if (posIn.getFilePointer() == lastPosBlockFP) { + final int count = (int) (totalTermFreq % BLOCK_SIZE); + int payloadLength = 0; + for (int i = 0; i < count; i++) { + int code = posIn.readVInt(); + if (indexHasPayloads) { + if ((code & 1) != 0) { + payloadLength = posIn.readVInt(); + } + posDeltaBuffer[i] = code >>> 1; + if (payloadLength != 0) { + posIn.seek(posIn.getFilePointer() + payloadLength); + } + } else { + posDeltaBuffer[i] = code; + } + if (indexHasOffsets) { + if ((posIn.readVInt() & 1) != 0) { + // offset length changed + posIn.readVInt(); + } + } + } + } else { + pforUtil.decode(posIn, posDeltaBuffer); + } + } + + @Override + public void advanceShallow(int target) throws IOException { + if (target > nextSkipDoc) { + // always plus one to fix the result, since skip position in Lucene99SkipReader + // is a little different from MultiLevelSkipListReader + final int newDocUpto = skipper.skipTo(target) + 1; + + if (newDocUpto > docUpto) { + // Skipper moved + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + docUpto = newDocUpto; + + // Force to read next block + docBufferUpto = BLOCK_SIZE; + accum = skipper.getDoc(); + posPendingFP = skipper.getPosPointer(); + posPendingCount = skipper.getPosBufferUpto(); + seekTo = skipper.getDocPointer(); // delay the seek + } + // next time we call advance, this is used to + // foresee whether skipper is necessary. + nextSkipDoc = skipper.getNextSkipDoc(); + } + assert nextSkipDoc >= target; + } + + @Override + public Impacts getImpacts() throws IOException { + advanceShallow(doc); + return skipper.getImpacts(); + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + if (target > nextSkipDoc) { + advanceShallow(target); + } + if (docBufferUpto == BLOCK_SIZE) { + if (seekTo >= 0) { + docIn.seek(seekTo); + seekTo = -1; + } + refillDocs(); + } + + int next = findFirstGreater(docBuffer, target, docBufferUpto); + if (next == BLOCK_SIZE) { + return doc = NO_MORE_DOCS; + } + this.doc = (int) docBuffer[next]; + this.freq = (int) freqBuffer[next]; + for (int i = docBufferUpto; i <= next; ++i) { + posPendingCount += freqBuffer[i]; + } + docUpto += next - docBufferUpto + 1; + docBufferUpto = next + 1; + position = 0; + return doc; + } + + // TODO: in theory we could avoid loading frq block + // when not needed, ie, use skip data to load how far to + // seek the pos pointer ... instead of having to load frq + // blocks only to sum up how many positions to skip + private void skipPositions() throws IOException { + // Skip positions now: + int toSkip = posPendingCount - freq; + + final int leftInBlock = BLOCK_SIZE - posBufferUpto; + if (toSkip < leftInBlock) { + posBufferUpto += toSkip; + } else { + toSkip -= leftInBlock; + while (toSkip >= BLOCK_SIZE) { + assert posIn.getFilePointer() != lastPosBlockFP; + pforUtil.skip(posIn); + toSkip -= BLOCK_SIZE; + } + refillPositions(); + posBufferUpto = toSkip; + } + + position = 0; + } + + @Override + public int nextPosition() throws IOException { + assert posPendingCount > 0; + + if (posPendingFP != -1) { + posIn.seek(posPendingFP); + posPendingFP = -1; + + // Force buffer refill: + posBufferUpto = BLOCK_SIZE; + } + + if (posPendingCount > freq) { + skipPositions(); + posPendingCount = freq; + } + + if (posBufferUpto == BLOCK_SIZE) { + refillPositions(); + posBufferUpto = 0; + } + position += posDeltaBuffer[posBufferUpto++]; + + posPendingCount--; + return position; + } + + @Override + public int startOffset() { + return -1; + } + + @Override + public int endOffset() { + return -1; + } + + @Override + public BytesRef getPayload() { + return null; + } + + @Override + public long cost() { + return docFreq; + } + } + + final class BlockImpactsEverythingEnum extends ImpactsEnum { + + final ForUtil forUtil = new ForUtil(); + final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil); + final PForUtil pforUtil = new PForUtil(forUtil); + + private final long[] docBuffer = new long[BLOCK_SIZE]; + private final long[] freqBuffer = new long[BLOCK_SIZE]; + private final long[] posDeltaBuffer = new long[BLOCK_SIZE]; + + private final long[] payloadLengthBuffer; + private final long[] offsetStartDeltaBuffer; + private final long[] offsetLengthBuffer; + + private byte[] payloadBytes; + private int payloadByteUpto; + private int payloadLength; + + private int lastStartOffset; + private int startOffset = -1; + private int endOffset = -1; + + private int docBufferUpto; + private int posBufferUpto; + + private final Lucene99ScoreSkipReader skipper; + + final IndexInput docIn; + final IndexInput posIn; + final IndexInput payIn; + final BytesRef payload; + + final boolean indexHasFreq; + final boolean indexHasPos; + final boolean indexHasOffsets; + final boolean indexHasPayloads; + + private int docFreq; // number of docs in this posting list + private long totalTermFreq; // number of positions in this posting list + private int docUpto; // how many docs we've read + private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads + private int doc; // doc we last read + private long accum; // accumulator for doc deltas + private int position; // current position + + // how many positions "behind" we are; nextPosition must + // skip these to "catch up": + private int posPendingCount; + + // Lazy pos seek: if != -1 then we must seek to this FP + // before reading positions: + private long posPendingFP; + + // Lazy pay seek: if != -1 then we must seek to this FP + // before reading payloads/offsets: + private long payPendingFP; + + // Where this term's postings start in the .doc file: + private long docTermStartFP; + + // Where this term's postings start in the .pos file: + private long posTermStartFP; + + // Where this term's payloads/offsets start in the .pay + // file: + private long payTermStartFP; + + // File pointer where the last (vInt encoded) pos delta + // block is. We need this to know whether to bulk + // decode vs vInt decode the block: + private long lastPosBlockFP; + + private int nextSkipDoc = -1; + + private final boolean needsPositions; + private final boolean needsOffsets; // true if we actually need offsets + private final boolean needsPayloads; // true if we actually need payloads + + private boolean + isFreqsRead; // shows if freqBuffer for the current doc block are read into freqBuffer + + private long seekTo = -1; + + public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags) + throws IOException { + indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + indexHasPos = + fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + indexHasOffsets = + fieldInfo + .getIndexOptions() + .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) + >= 0; + indexHasPayloads = fieldInfo.hasPayloads(); + + needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS); + needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS); + needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); + + this.docIn = Lucene99PostingsReader.this.docIn.clone(); + + if (indexHasPos && needsPositions) { + this.posIn = Lucene99PostingsReader.this.posIn.clone(); + } else { + this.posIn = null; + } + + if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) { + this.payIn = Lucene99PostingsReader.this.payIn.clone(); + } else { + this.payIn = null; + } + + if (indexHasOffsets) { + offsetStartDeltaBuffer = new long[BLOCK_SIZE]; + offsetLengthBuffer = new long[BLOCK_SIZE]; + } else { + offsetStartDeltaBuffer = null; + offsetLengthBuffer = null; + startOffset = -1; + endOffset = -1; + } + + if (indexHasPayloads) { + payloadLengthBuffer = new long[BLOCK_SIZE]; + payloadBytes = new byte[128]; + payload = new BytesRef(); + } else { + payloadLengthBuffer = null; + payloadBytes = null; + payload = null; + } + + docFreq = termState.docFreq; + docTermStartFP = termState.docStartFP; + posTermStartFP = termState.posStartFP; + payTermStartFP = termState.payStartFP; + totalTermFreq = termState.totalTermFreq; + docIn.seek(docTermStartFP); + posPendingFP = posTermStartFP; + payPendingFP = payTermStartFP; + posPendingCount = 0; + if (termState.totalTermFreq < BLOCK_SIZE) { + lastPosBlockFP = posTermStartFP; + } else if (termState.totalTermFreq == BLOCK_SIZE) { + lastPosBlockFP = -1; + } else { + lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; + } + + doc = -1; + accum = 0; + docUpto = 0; + posDocUpTo = 0; + isFreqsRead = true; + docBufferUpto = BLOCK_SIZE; + + skipper = + new Lucene99ScoreSkipReader( + docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads); + skipper.init( + docTermStartFP + termState.skipOffset, + docTermStartFP, + posTermStartFP, + payTermStartFP, + docFreq); + + if (indexHasFreq == false) { + for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) { + freqBuffer[i] = 1; + } + } + } + + @Override + public int freq() throws IOException { + if (indexHasFreq && (isFreqsRead == false)) { + pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block + isFreqsRead = true; + } + return (int) freqBuffer[docBufferUpto - 1]; + } + + @Override + public int docID() { + return doc; + } + + private void refillDocs() throws IOException { + if (indexHasFreq) { + if (isFreqsRead == false) { // previous freq block was not read + // check if we need to load the previous freq block to catch up on positions or we can + // skip it + if (indexHasPos && needsPositions && (posDocUpTo < docUpto)) { + pforUtil.decode(docIn, freqBuffer); // load the previous freq block + } else { + pforUtil.skip(docIn); // skip it + } + isFreqsRead = true; + } + if (indexHasPos && needsPositions) { + while (posDocUpTo + < docUpto) { // catch on positions, bring posPendingCount upto the current doc + posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)]; + posDocUpTo++; + } + } + } + + final int left = docFreq - docUpto; + assert left >= 0; + + if (left >= BLOCK_SIZE) { + forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); + if (indexHasFreq) { + isFreqsRead = + false; // freq block will be loaded lazily when necessary, we don't load it here + } + } else { + readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq); + prefixSum(docBuffer, left, accum); + docBuffer[left] = NO_MORE_DOCS; + } + accum = docBuffer[BLOCK_SIZE - 1]; + docBufferUpto = 0; + } + + private void refillPositions() throws IOException { + if (posIn.getFilePointer() == lastPosBlockFP) { + final int count = (int) (totalTermFreq % BLOCK_SIZE); + int payloadLength = 0; + int offsetLength = 0; + payloadByteUpto = 0; + for (int i = 0; i < count; i++) { + int code = posIn.readVInt(); + if (indexHasPayloads) { + if ((code & 1) != 0) { + payloadLength = posIn.readVInt(); + } + payloadLengthBuffer[i] = payloadLength; + posDeltaBuffer[i] = code >>> 1; + if (payloadLength != 0) { + if (payloadByteUpto + payloadLength > payloadBytes.length) { + payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength); + } + posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength); + payloadByteUpto += payloadLength; + } + } else { + posDeltaBuffer[i] = code; + } + + if (indexHasOffsets) { + int deltaCode = posIn.readVInt(); + if ((deltaCode & 1) != 0) { + offsetLength = posIn.readVInt(); + } + offsetStartDeltaBuffer[i] = deltaCode >>> 1; + offsetLengthBuffer[i] = offsetLength; + } + } + payloadByteUpto = 0; + } else { + pforUtil.decode(posIn, posDeltaBuffer); + + if (indexHasPayloads && payIn != null) { + if (needsPayloads) { + pforUtil.decode(payIn, payloadLengthBuffer); + int numBytes = payIn.readVInt(); + + if (numBytes > payloadBytes.length) { + payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes); + } + payIn.readBytes(payloadBytes, 0, numBytes); + } else { + // this works, because when writing a vint block we always force the first length to be + // written + pforUtil.skip(payIn); // skip over lengths + int numBytes = payIn.readVInt(); // read length of payloadBytes + payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes + } + payloadByteUpto = 0; + } + + if (indexHasOffsets && payIn != null) { + if (needsOffsets) { + pforUtil.decode(payIn, offsetStartDeltaBuffer); + pforUtil.decode(payIn, offsetLengthBuffer); + } else { + // this works, because when writing a vint block we always force the first length to be + // written + pforUtil.skip(payIn); // skip over starts + pforUtil.skip(payIn); // skip over lengths + } + } + } + } + + @Override + public void advanceShallow(int target) throws IOException { + if (target > nextSkipDoc) { + // always plus one to fix the result, since skip position in Lucene99SkipReader + // is a little different from MultiLevelSkipListReader + final int newDocUpto = skipper.skipTo(target) + 1; + + if (newDocUpto > docUpto) { + // Skipper moved + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + docUpto = newDocUpto; + posDocUpTo = docUpto; + + // Force to read next block + docBufferUpto = BLOCK_SIZE; + accum = skipper.getDoc(); + posPendingFP = skipper.getPosPointer(); + payPendingFP = skipper.getPayPointer(); + posPendingCount = skipper.getPosBufferUpto(); + lastStartOffset = 0; // new document + payloadByteUpto = skipper.getPayloadByteUpto(); // actually, this is just lastSkipEntry + seekTo = skipper.getDocPointer(); // delay the seek + } + // next time we call advance, this is used to + // foresee whether skipper is necessary. + nextSkipDoc = skipper.getNextSkipDoc(); + } + assert nextSkipDoc >= target; + } + + @Override + public Impacts getImpacts() throws IOException { + advanceShallow(doc); + return skipper.getImpacts(); + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + if (target > nextSkipDoc) { + advanceShallow(target); + } + if (docBufferUpto == BLOCK_SIZE) { + if (seekTo >= 0) { + docIn.seek(seekTo); + seekTo = -1; + isFreqsRead = true; // reset isFreqsRead + } + refillDocs(); + } + + // Now scan: + long doc; + while (true) { + doc = docBuffer[docBufferUpto]; + docBufferUpto++; + docUpto++; + + if (doc >= target) { + break; + } + + if (docBufferUpto == BLOCK_SIZE) { + return this.doc = NO_MORE_DOCS; + } + } + position = 0; + lastStartOffset = 0; + + return this.doc = (int) doc; + } + + // TODO: in theory we could avoid loading frq block + // when not needed, ie, use skip data to load how far to + // seek the pos pointer ... instead of having to load frq + // blocks only to sum up how many positions to skip + private void skipPositions() throws IOException { + // Skip positions now: + int toSkip = posPendingCount - (int) freqBuffer[docBufferUpto - 1]; + // if (DEBUG) { + // System.out.println(" FPR.skipPositions: toSkip=" + toSkip); + // } + + final int leftInBlock = BLOCK_SIZE - posBufferUpto; + if (toSkip < leftInBlock) { + int end = posBufferUpto + toSkip; + while (posBufferUpto < end) { + if (indexHasPayloads) { + payloadByteUpto += payloadLengthBuffer[posBufferUpto]; + } + posBufferUpto++; + } + } else { + toSkip -= leftInBlock; + while (toSkip >= BLOCK_SIZE) { + assert posIn.getFilePointer() != lastPosBlockFP; + pforUtil.skip(posIn); + + if (indexHasPayloads && payIn != null) { + // Skip payloadLength block: + pforUtil.skip(payIn); + + // Skip payloadBytes block: + int numBytes = payIn.readVInt(); + payIn.seek(payIn.getFilePointer() + numBytes); + } + + if (indexHasOffsets && payIn != null) { + pforUtil.skip(payIn); + pforUtil.skip(payIn); + } + toSkip -= BLOCK_SIZE; + } + refillPositions(); + payloadByteUpto = 0; + posBufferUpto = 0; + while (posBufferUpto < toSkip) { + if (indexHasPayloads) { + payloadByteUpto += payloadLengthBuffer[posBufferUpto]; + } + posBufferUpto++; + } + } + + position = 0; + lastStartOffset = 0; + } + + @Override + public int nextPosition() throws IOException { + if (indexHasPos == false || needsPositions == false) { + return -1; + } + + if (isFreqsRead == false) { + pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this docs block + isFreqsRead = true; + } + while (posDocUpTo < docUpto) { // bring posPendingCount upto the current doc + posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)]; + posDocUpTo++; + } + + assert posPendingCount > 0; + + if (posPendingFP != -1) { + posIn.seek(posPendingFP); + posPendingFP = -1; + + if (payPendingFP != -1 && payIn != null) { + payIn.seek(payPendingFP); + payPendingFP = -1; + } + + // Force buffer refill: + posBufferUpto = BLOCK_SIZE; + } + + if (posPendingCount > freqBuffer[docBufferUpto - 1]) { + skipPositions(); + posPendingCount = (int) freqBuffer[docBufferUpto - 1]; + } + + if (posBufferUpto == BLOCK_SIZE) { + refillPositions(); + posBufferUpto = 0; + } + position += posDeltaBuffer[posBufferUpto]; + + if (indexHasPayloads) { + payloadLength = (int) payloadLengthBuffer[posBufferUpto]; + payload.bytes = payloadBytes; + payload.offset = payloadByteUpto; + payload.length = payloadLength; + payloadByteUpto += payloadLength; + } + + if (indexHasOffsets && needsOffsets) { + startOffset = lastStartOffset + (int) offsetStartDeltaBuffer[posBufferUpto]; + endOffset = startOffset + (int) offsetLengthBuffer[posBufferUpto]; + lastStartOffset = startOffset; + } + + posBufferUpto++; + posPendingCount--; + return position; + } + + @Override + public int startOffset() { + return startOffset; + } + + @Override + public int endOffset() { + return endOffset; + } + + @Override + public BytesRef getPayload() { + if (payloadLength == 0) { + return null; + } else { + return payload; + } + } + + @Override + public long cost() { + return docFreq; + } + } + + @Override + public void checkIntegrity() throws IOException { + if (docIn != null) { + CodecUtil.checksumEntireFile(docIn); + } + if (posIn != null) { + CodecUtil.checksumEntireFile(posIn); + } + if (payIn != null) { + CodecUtil.checksumEntireFile(payIn); + } + } + + @Override + public String toString() { + return getClass().getSimpleName() + + "(positions=" + + (posIn != null) + + ",payloads=" + + (payIn != null) + + ")"; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java new file mode 100644 index 000000000000..34b4c9c62a3b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java @@ -0,0 +1,539 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene99; + +import static org.apache.lucene.codecs.lucene99.ForUtil.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.DOC_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.MAX_SKIP_LEVELS; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.PAY_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.POS_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.TERMS_CODEC; +import static org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.VERSION_CURRENT; + +import java.io.IOException; +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.CompetitiveImpactAccumulator; +import org.apache.lucene.codecs.PushPostingsWriterBase; +import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat.IntBlockTermState; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * Concrete class that writes docId(maybe frq,pos,offset,payloads) list with postings format. + * + *
Postings list for each term will be stored separately.
+ *
+ * @see Lucene99SkipWriter for details about skipping setting and postings layout.
+ * @lucene.experimental
+ */
+public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
+
+ IndexOutput docOut;
+ IndexOutput posOut;
+ IndexOutput payOut;
+
+ static final IntBlockTermState emptyState = new IntBlockTermState();
+ IntBlockTermState lastState;
+
+ // Holds starting file pointers for current term:
+ private long docStartFP;
+ private long posStartFP;
+ private long payStartFP;
+
+ final long[] docDeltaBuffer;
+ final long[] freqBuffer;
+ private int docBufferUpto;
+
+ final long[] posDeltaBuffer;
+ final long[] payloadLengthBuffer;
+ final long[] offsetStartDeltaBuffer;
+ final long[] offsetLengthBuffer;
+ private int posBufferUpto;
+
+ private byte[] payloadBytes;
+ private int payloadByteUpto;
+
+ private int lastBlockDocID;
+ private long lastBlockPosFP;
+ private long lastBlockPayFP;
+ private int lastBlockPosBufferUpto;
+ private int lastBlockPayloadByteUpto;
+
+ private int lastDocID;
+ private int lastPosition;
+ private int lastStartOffset;
+ private int docCount;
+
+ private final PForUtil pforUtil;
+ private final ForDeltaUtil forDeltaUtil;
+ private final Lucene99SkipWriter skipWriter;
+
+ private boolean fieldHasNorms;
+ private NumericDocValues norms;
+ private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator =
+ new CompetitiveImpactAccumulator();
+
+ /** Creates a postings writer */
+ public Lucene99PostingsWriter(SegmentWriteState state) throws IOException {
+
+ String docFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.DOC_EXTENSION);
+ docOut = state.directory.createOutput(docFileName, state.context);
+ IndexOutput posOut = null;
+ IndexOutput payOut = null;
+ boolean success = false;
+ try {
+ CodecUtil.writeIndexHeader(
+ docOut, DOC_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ final ForUtil forUtil = new ForUtil();
+ forDeltaUtil = new ForDeltaUtil(forUtil);
+ pforUtil = new PForUtil(forUtil);
+ if (state.fieldInfos.hasProx()) {
+ posDeltaBuffer = new long[BLOCK_SIZE];
+ String posFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.POS_EXTENSION);
+ posOut = state.directory.createOutput(posFileName, state.context);
+ CodecUtil.writeIndexHeader(
+ posOut, POS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+
+ if (state.fieldInfos.hasPayloads()) {
+ payloadBytes = new byte[128];
+ payloadLengthBuffer = new long[BLOCK_SIZE];
+ } else {
+ payloadBytes = null;
+ payloadLengthBuffer = null;
+ }
+
+ if (state.fieldInfos.hasOffsets()) {
+ offsetStartDeltaBuffer = new long[BLOCK_SIZE];
+ offsetLengthBuffer = new long[BLOCK_SIZE];
+ } else {
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ }
+
+ if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
+ String payFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name,
+ state.segmentSuffix,
+ Lucene99PostingsFormat.PAY_EXTENSION);
+ payOut = state.directory.createOutput(payFileName, state.context);
+ CodecUtil.writeIndexHeader(
+ payOut, PAY_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ }
+ } else {
+ posDeltaBuffer = null;
+ payloadLengthBuffer = null;
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ payloadBytes = null;
+ }
+ this.payOut = payOut;
+ this.posOut = posOut;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+ }
+ }
+
+ docDeltaBuffer = new long[BLOCK_SIZE];
+ freqBuffer = new long[BLOCK_SIZE];
+
+ // TODO: should we try skipping every 2/4 blocks...?
+ skipWriter =
+ new Lucene99SkipWriter(
+ MAX_SKIP_LEVELS, BLOCK_SIZE, state.segmentInfo.maxDoc(), docOut, posOut, payOut);
+ }
+
+ @Override
+ public IntBlockTermState newTermState() {
+ return new IntBlockTermState();
+ }
+
+ @Override
+ public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
+ CodecUtil.writeIndexHeader(
+ termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ termsOut.writeVInt(BLOCK_SIZE);
+ }
+
+ @Override
+ public void setField(FieldInfo fieldInfo) {
+ super.setField(fieldInfo);
+ skipWriter.setField(writePositions, writeOffsets, writePayloads);
+ lastState = emptyState;
+ fieldHasNorms = fieldInfo.hasNorms();
+ }
+
+ @Override
+ public void startTerm(NumericDocValues norms) {
+ docStartFP = docOut.getFilePointer();
+ if (writePositions) {
+ posStartFP = posOut.getFilePointer();
+ if (writePayloads || writeOffsets) {
+ payStartFP = payOut.getFilePointer();
+ }
+ }
+ lastDocID = 0;
+ lastBlockDocID = -1;
+ skipWriter.resetSkip();
+ this.norms = norms;
+ competitiveFreqNormAccumulator.clear();
+ }
+
+ @Override
+ public void startDoc(int docID, int termDocFreq) throws IOException {
+ // Have collected a block of docs, and get a new doc.
+ // Should write skip data as well as postings list for
+ // current block.
+ if (lastBlockDocID != -1 && docBufferUpto == 0) {
+ skipWriter.bufferSkip(
+ lastBlockDocID,
+ competitiveFreqNormAccumulator,
+ docCount,
+ lastBlockPosFP,
+ lastBlockPayFP,
+ lastBlockPosBufferUpto,
+ lastBlockPayloadByteUpto);
+ competitiveFreqNormAccumulator.clear();
+ }
+
+ final int docDelta = docID - lastDocID;
+
+ if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
+ throw new CorruptIndexException(
+ "docs out of order (" + docID + " <= " + lastDocID + " )", docOut);
+ }
+
+ docDeltaBuffer[docBufferUpto] = docDelta;
+ if (writeFreqs) {
+ freqBuffer[docBufferUpto] = termDocFreq;
+ }
+
+ docBufferUpto++;
+ docCount++;
+
+ if (docBufferUpto == BLOCK_SIZE) {
+ forDeltaUtil.encodeDeltas(docDeltaBuffer, docOut);
+ if (writeFreqs) {
+ pforUtil.encode(freqBuffer, docOut);
+ }
+ // NOTE: don't set docBufferUpto back to 0 here;
+ // finishDoc will do so (because it needs to see that
+ // the block was filled so it can save skip data)
+ }
+
+ lastDocID = docID;
+ lastPosition = 0;
+ lastStartOffset = 0;
+
+ long norm;
+ if (fieldHasNorms) {
+ boolean found = norms.advanceExact(docID);
+ if (found == false) {
+ // This can happen if indexing hits a problem after adding a doc to the
+ // postings but before buffering the norm. Such documents are written
+ // deleted and will go away on the first merge.
+ norm = 1L;
+ } else {
+ norm = norms.longValue();
+ assert norm != 0 : docID;
+ }
+ } else {
+ norm = 1L;
+ }
+
+ competitiveFreqNormAccumulator.add(writeFreqs ? termDocFreq : 1, norm);
+ }
+
+ @Override
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset)
+ throws IOException {
+ if (position > IndexWriter.MAX_POSITION) {
+ throw new CorruptIndexException(
+ "position="
+ + position
+ + " is too large (> IndexWriter.MAX_POSITION="
+ + IndexWriter.MAX_POSITION
+ + ")",
+ docOut);
+ }
+ if (position < 0) {
+ throw new CorruptIndexException("position=" + position + " is < 0", docOut);
+ }
+ posDeltaBuffer[posBufferUpto] = position - lastPosition;
+ if (writePayloads) {
+ if (payload == null || payload.length == 0) {
+ // no payload
+ payloadLengthBuffer[posBufferUpto] = 0;
+ } else {
+ payloadLengthBuffer[posBufferUpto] = payload.length;
+ if (payloadByteUpto + payload.length > payloadBytes.length) {
+ payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payload.length);
+ }
+ System.arraycopy(
+ payload.bytes, payload.offset, payloadBytes, payloadByteUpto, payload.length);
+ payloadByteUpto += payload.length;
+ }
+ }
+
+ if (writeOffsets) {
+ assert startOffset >= lastStartOffset;
+ assert endOffset >= startOffset;
+ offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastStartOffset;
+ offsetLengthBuffer[posBufferUpto] = endOffset - startOffset;
+ lastStartOffset = startOffset;
+ }
+
+ posBufferUpto++;
+ lastPosition = position;
+ if (posBufferUpto == BLOCK_SIZE) {
+ pforUtil.encode(posDeltaBuffer, posOut);
+
+ if (writePayloads) {
+ pforUtil.encode(payloadLengthBuffer, payOut);
+ payOut.writeVInt(payloadByteUpto);
+ payOut.writeBytes(payloadBytes, 0, payloadByteUpto);
+ payloadByteUpto = 0;
+ }
+ if (writeOffsets) {
+ pforUtil.encode(offsetStartDeltaBuffer, payOut);
+ pforUtil.encode(offsetLengthBuffer, payOut);
+ }
+ posBufferUpto = 0;
+ }
+ }
+
+ @Override
+ public void finishDoc() throws IOException {
+ // Since we don't know df for current term, we had to buffer
+ // those skip data for each block, and when a new doc comes,
+ // write them to skip file.
+ if (docBufferUpto == BLOCK_SIZE) {
+ lastBlockDocID = lastDocID;
+ if (posOut != null) {
+ if (payOut != null) {
+ lastBlockPayFP = payOut.getFilePointer();
+ }
+ lastBlockPosFP = posOut.getFilePointer();
+ lastBlockPosBufferUpto = posBufferUpto;
+ lastBlockPayloadByteUpto = payloadByteUpto;
+ }
+ docBufferUpto = 0;
+ }
+ }
+
+ /** Called when we are done adding docs to this term */
+ @Override
+ public void finishTerm(BlockTermState _state) throws IOException {
+ IntBlockTermState state = (IntBlockTermState) _state;
+ assert state.docFreq > 0;
+
+ // TODO: wasteful we are counting this (counting # docs
+ // for this term) in two places?
+ assert state.docFreq == docCount : state.docFreq + " vs " + docCount;
+
+ // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to
+ // it.
+ final int singletonDocID;
+ if (state.docFreq == 1) {
+ // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
+ singletonDocID = (int) docDeltaBuffer[0];
+ } else {
+ singletonDocID = -1;
+ // vInt encode the remaining doc deltas and freqs:
+ for (int i = 0; i < docBufferUpto; i++) {
+ final int docDelta = (int) docDeltaBuffer[i];
+ final int freq = (int) freqBuffer[i];
+ if (!writeFreqs) {
+ docOut.writeVInt(docDelta);
+ } else if (freq == 1) {
+ docOut.writeVInt((docDelta << 1) | 1);
+ } else {
+ docOut.writeVInt(docDelta << 1);
+ docOut.writeVInt(freq);
+ }
+ }
+ }
+
+ final long lastPosBlockOffset;
+
+ if (writePositions) {
+ // totalTermFreq is just total number of positions(or payloads, or offsets)
+ // associated with current term.
+ assert state.totalTermFreq != -1;
+ if (state.totalTermFreq > BLOCK_SIZE) {
+ // record file offset for last pos in last block
+ lastPosBlockOffset = posOut.getFilePointer() - posStartFP;
+ } else {
+ lastPosBlockOffset = -1;
+ }
+ if (posBufferUpto > 0) {
+ // TODO: should we send offsets/payloads to
+ // .pay...? seems wasteful (have to store extra
+ // vLong for low (< BLOCK_SIZE) DF terms = vast vast
+ // majority)
+
+ // vInt encode the remaining positions/payloads/offsets:
+ int lastPayloadLength = -1; // force first payload length to be written
+ int lastOffsetLength = -1; // force first offset length to be written
+ int payloadBytesReadUpto = 0;
+ for (int i = 0; i < posBufferUpto; i++) {
+ final int posDelta = (int) posDeltaBuffer[i];
+ if (writePayloads) {
+ final int payloadLength = (int) payloadLengthBuffer[i];
+ if (payloadLength != lastPayloadLength) {
+ lastPayloadLength = payloadLength;
+ posOut.writeVInt((posDelta << 1) | 1);
+ posOut.writeVInt(payloadLength);
+ } else {
+ posOut.writeVInt(posDelta << 1);
+ }
+
+ if (payloadLength != 0) {
+ posOut.writeBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
+ payloadBytesReadUpto += payloadLength;
+ }
+ } else {
+ posOut.writeVInt(posDelta);
+ }
+
+ if (writeOffsets) {
+ int delta = (int) offsetStartDeltaBuffer[i];
+ int length = (int) offsetLengthBuffer[i];
+ if (length == lastOffsetLength) {
+ posOut.writeVInt(delta << 1);
+ } else {
+ posOut.writeVInt(delta << 1 | 1);
+ posOut.writeVInt(length);
+ lastOffsetLength = length;
+ }
+ }
+ }
+
+ if (writePayloads) {
+ assert payloadBytesReadUpto == payloadByteUpto;
+ payloadByteUpto = 0;
+ }
+ }
+ } else {
+ lastPosBlockOffset = -1;
+ }
+
+ long skipOffset;
+ if (docCount > BLOCK_SIZE) {
+ skipOffset = skipWriter.writeSkip(docOut) - docStartFP;
+ } else {
+ skipOffset = -1;
+ }
+
+ state.docStartFP = docStartFP;
+ state.posStartFP = posStartFP;
+ state.payStartFP = payStartFP;
+ state.singletonDocID = singletonDocID;
+ state.skipOffset = skipOffset;
+ state.lastPosBlockOffset = lastPosBlockOffset;
+ docBufferUpto = 0;
+ posBufferUpto = 0;
+ lastDocID = 0;
+ docCount = 0;
+ }
+
+ @Override
+ public void encodeTerm(
+ DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute)
+ throws IOException {
+ IntBlockTermState state = (IntBlockTermState) _state;
+ if (absolute) {
+ lastState = emptyState;
+ assert lastState.docStartFP == 0;
+ }
+
+ if (lastState.singletonDocID != -1
+ && state.singletonDocID != -1
+ && state.docStartFP == lastState.docStartFP) {
+ // With runs of rare values such as ID fields, the increment of pointers in the docs file is
+ // often 0.
+ // Furthermore some ID schemes like auto-increment IDs or Flake IDs are monotonic, so we
+ // encode the delta
+ // between consecutive doc IDs to save space.
+ final long delta = (long) state.singletonDocID - lastState.singletonDocID;
+ out.writeVLong((BitUtil.zigZagEncode(delta) << 1) | 0x01);
+ } else {
+ out.writeVLong((state.docStartFP - lastState.docStartFP) << 1);
+ if (state.singletonDocID != -1) {
+ out.writeVInt(state.singletonDocID);
+ }
+ }
+
+ if (writePositions) {
+ out.writeVLong(state.posStartFP - lastState.posStartFP);
+ if (writePayloads || writeOffsets) {
+ out.writeVLong(state.payStartFP - lastState.payStartFP);
+ }
+ }
+ if (writePositions) {
+ if (state.lastPosBlockOffset != -1) {
+ out.writeVLong(state.lastPosBlockOffset);
+ }
+ }
+ if (state.skipOffset != -1) {
+ out.writeVLong(state.skipOffset);
+ }
+ lastState = state;
+ }
+
+ @Override
+ public void close() throws IOException {
+ // TODO: add a finish() at least to PushBase? DV too...?
+ boolean success = false;
+ try {
+ if (docOut != null) {
+ CodecUtil.writeFooter(docOut);
+ }
+ if (posOut != null) {
+ CodecUtil.writeFooter(posOut);
+ }
+ if (payOut != null) {
+ CodecUtil.writeFooter(payOut);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(docOut, posOut, payOut);
+ } else {
+ IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+ }
+ docOut = posOut = payOut = null;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScoreSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScoreSkipReader.java
new file mode 100644
index 000000000000..f2882c085c40
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScoreSkipReader.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene99;
+
+import java.io.IOException;
+import java.util.AbstractList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.RandomAccess;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.index.Impacts;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+
+final class Lucene99ScoreSkipReader extends Lucene99SkipReader {
+
+ private final byte[][] impactData;
+ private final int[] impactDataLength;
+ private final ByteArrayDataInput badi = new ByteArrayDataInput();
+ private final Impacts impacts;
+ private int numLevels = 1;
+ private final MutableImpactList[] perLevelImpacts;
+
+ public Lucene99ScoreSkipReader(
+ IndexInput skipStream,
+ int maxSkipLevels,
+ boolean hasPos,
+ boolean hasOffsets,
+ boolean hasPayloads) {
+ super(skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
+ this.impactData = new byte[maxSkipLevels][];
+ Arrays.fill(impactData, new byte[0]);
+ this.impactDataLength = new int[maxSkipLevels];
+ this.perLevelImpacts = new MutableImpactList[maxSkipLevels];
+ for (int i = 0; i < perLevelImpacts.length; ++i) {
+ perLevelImpacts[i] = new MutableImpactList();
+ }
+ impacts =
+ new Impacts() {
+
+ @Override
+ public int numLevels() {
+ return numLevels;
+ }
+
+ @Override
+ public int getDocIdUpTo(int level) {
+ return skipDoc[level];
+ }
+
+ @Override
+ public List Although this skipper uses MultiLevelSkipListReader as an interface, its definition of skip
+ * position will be a little different.
+ *
+ * For example, when skipInterval = blockSize = 3, df = 2*skipInterval = 6,
+ *
+ * In this case, MultiLevelSkipListReader will use the last document as a skip point, while
+ * Lucene99SkipReader should assume no skip point will comes.
+ *
+ * If we use the interface directly in Lucene99SkipReader, it may silly try to read another skip
+ * data after the only skip point is loaded.
+ *
+ * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, and
+ * numSkipped+blockSize== df, the MultiLevelSkipListReader will assume the skip list isn't exhausted
+ * yet, and try to load a non-existed skip point
+ *
+ * Therefore, we'll trim df before passing it to the interface. see trim(int)
+ */
+public class Lucene99SkipReader extends MultiLevelSkipListReader {
+ private long[] docPointer;
+ private long[] posPointer;
+ private long[] payPointer;
+ private int[] posBufferUpto;
+ private int[] payloadByteUpto;
+
+ private long lastPosPointer;
+ private long lastPayPointer;
+ private int lastPayloadByteUpto;
+ private long lastDocPointer;
+ private int lastPosBufferUpto;
+
+ public Lucene99SkipReader(
+ IndexInput skipStream,
+ int maxSkipLevels,
+ boolean hasPos,
+ boolean hasOffsets,
+ boolean hasPayloads) {
+ super(skipStream, maxSkipLevels, ForUtil.BLOCK_SIZE, 8);
+ docPointer = new long[maxSkipLevels];
+ if (hasPos) {
+ posPointer = new long[maxSkipLevels];
+ posBufferUpto = new int[maxSkipLevels];
+ if (hasPayloads) {
+ payloadByteUpto = new int[maxSkipLevels];
+ } else {
+ payloadByteUpto = null;
+ }
+ if (hasOffsets || hasPayloads) {
+ payPointer = new long[maxSkipLevels];
+ } else {
+ payPointer = null;
+ }
+ } else {
+ posPointer = null;
+ }
+ }
+
+ /**
+ * Trim original docFreq to tell skipReader read proper number of skip points.
+ *
+ * Since our definition in Lucene99Skip* is a little different from MultiLevelSkip* This
+ * trimmed docFreq will prevent skipReader from: 1. silly reading a non-existed skip point after
+ * the last block boundary 2. moving into the vInt block
+ */
+ protected int trim(int df) {
+ return df % ForUtil.BLOCK_SIZE == 0 ? df - 1 : df;
+ }
+
+ public void init(
+ long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df)
+ throws IOException {
+ super.init(skipPointer, trim(df));
+ lastDocPointer = docBasePointer;
+ lastPosPointer = posBasePointer;
+ lastPayPointer = payBasePointer;
+
+ Arrays.fill(docPointer, docBasePointer);
+ if (posPointer != null) {
+ Arrays.fill(posPointer, posBasePointer);
+ if (payPointer != null) {
+ Arrays.fill(payPointer, payBasePointer);
+ }
+ } else {
+ assert posBasePointer == 0;
+ }
+ }
+
+ /**
+ * Returns the doc pointer of the doc to which the last call of {@link
+ * MultiLevelSkipListReader#skipTo(int)} has skipped.
+ */
+ public long getDocPointer() {
+ return lastDocPointer;
+ }
+
+ public long getPosPointer() {
+ return lastPosPointer;
+ }
+
+ public int getPosBufferUpto() {
+ return lastPosBufferUpto;
+ }
+
+ public long getPayPointer() {
+ return lastPayPointer;
+ }
+
+ public int getPayloadByteUpto() {
+ return lastPayloadByteUpto;
+ }
+
+ public int getNextSkipDoc() {
+ return skipDoc[0];
+ }
+
+ @Override
+ protected void seekChild(int level) throws IOException {
+ super.seekChild(level);
+ docPointer[level] = lastDocPointer;
+ if (posPointer != null) {
+ posPointer[level] = lastPosPointer;
+ posBufferUpto[level] = lastPosBufferUpto;
+ if (payloadByteUpto != null) {
+ payloadByteUpto[level] = lastPayloadByteUpto;
+ }
+ if (payPointer != null) {
+ payPointer[level] = lastPayPointer;
+ }
+ }
+ }
+
+ @Override
+ protected void setLastSkipData(int level) {
+ super.setLastSkipData(level);
+ lastDocPointer = docPointer[level];
+
+ if (posPointer != null) {
+ lastPosPointer = posPointer[level];
+ lastPosBufferUpto = posBufferUpto[level];
+ if (payPointer != null) {
+ lastPayPointer = payPointer[level];
+ }
+ if (payloadByteUpto != null) {
+ lastPayloadByteUpto = payloadByteUpto[level];
+ }
+ }
+ }
+
+ @Override
+ protected int readSkipData(int level, IndexInput skipStream) throws IOException {
+ int delta = skipStream.readVInt();
+ docPointer[level] += skipStream.readVLong();
+
+ if (posPointer != null) {
+ posPointer[level] += skipStream.readVLong();
+ posBufferUpto[level] = skipStream.readVInt();
+
+ if (payloadByteUpto != null) {
+ payloadByteUpto[level] = skipStream.readVInt();
+ }
+
+ if (payPointer != null) {
+ payPointer[level] += skipStream.readVLong();
+ }
+ }
+ readImpacts(level, skipStream);
+ return delta;
+ }
+
+ // The default impl skips impacts
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ skipStream.skipBytes(skipStream.readVInt());
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java
new file mode 100644
index 000000000000..ec50e5e5c168
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene99;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Write skip lists with multiple levels, and support skip within block ints.
+ *
+ * Assume that docFreq = 28, skipInterval = blockSize = 12
+ *
+ * Note that skipWriter will ignore first document in block#0, since it is useless as a skip
+ * point. Also, we'll never skip into the vInts block, only record skip data at the start its start
+ * point(if it exist).
+ *
+ * For each skip point, we will record: 1. docID in former position, i.e. for position 12, record
+ * docID[11], etc. 2. its related file points(position, payload), 3. related numbers or
+ * uptos(position, payload). 4. start offset.
+ */
+public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
+ private int[] lastSkipDoc;
+ private long[] lastSkipDocPointer;
+ private long[] lastSkipPosPointer;
+ private long[] lastSkipPayPointer;
+
+ private final IndexOutput docOut;
+ private final IndexOutput posOut;
+ private final IndexOutput payOut;
+
+ private int curDoc;
+ private long curDocPointer;
+ private long curPosPointer;
+ private long curPayPointer;
+ private int curPosBufferUpto;
+ private int curPayloadByteUpto;
+ private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
+ private boolean fieldHasPositions;
+ private boolean fieldHasOffsets;
+ private boolean fieldHasPayloads;
+
+ public Lucene99SkipWriter(
+ int maxSkipLevels,
+ int blockSize,
+ int docCount,
+ IndexOutput docOut,
+ IndexOutput posOut,
+ IndexOutput payOut) {
+ super(blockSize, 8, maxSkipLevels, docCount);
+ this.docOut = docOut;
+ this.posOut = posOut;
+ this.payOut = payOut;
+
+ lastSkipDoc = new int[maxSkipLevels];
+ lastSkipDocPointer = new long[maxSkipLevels];
+ if (posOut != null) {
+ lastSkipPosPointer = new long[maxSkipLevels];
+ if (payOut != null) {
+ lastSkipPayPointer = new long[maxSkipLevels];
+ }
+ }
+ curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
+ for (int i = 0; i < maxSkipLevels; ++i) {
+ curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
+ }
+ }
+
+ public void setField(
+ boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
+ this.fieldHasPositions = fieldHasPositions;
+ this.fieldHasOffsets = fieldHasOffsets;
+ this.fieldHasPayloads = fieldHasPayloads;
+ }
+
+ // tricky: we only skip data for blocks (terms with more than 128 docs), but re-init'ing the
+ // skipper
+ // is pretty slow for rare terms in large segments as we have to fill O(log #docs in segment) of
+ // junk.
+ // this is the vast majority of terms (worst case: ID field or similar). so in resetSkip() we
+ // save
+ // away the previous pointers, and lazy-init only if we need to buffer skip data for the term.
+ private boolean initialized;
+ long lastDocFP;
+ long lastPosFP;
+ long lastPayFP;
+
+ @Override
+ public void resetSkip() {
+ lastDocFP = docOut.getFilePointer();
+ if (fieldHasPositions) {
+ lastPosFP = posOut.getFilePointer();
+ if (fieldHasOffsets || fieldHasPayloads) {
+ lastPayFP = payOut.getFilePointer();
+ }
+ }
+ if (initialized) {
+ for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
+ acc.clear();
+ }
+ }
+ initialized = false;
+ }
+
+ private void initSkip() {
+ if (!initialized) {
+ super.resetSkip();
+ Arrays.fill(lastSkipDoc, 0);
+ Arrays.fill(lastSkipDocPointer, lastDocFP);
+ if (fieldHasPositions) {
+ Arrays.fill(lastSkipPosPointer, lastPosFP);
+ if (fieldHasOffsets || fieldHasPayloads) {
+ Arrays.fill(lastSkipPayPointer, lastPayFP);
+ }
+ }
+ // sets of competitive freq,norm pairs should be empty at this point
+ assert Arrays.stream(curCompetitiveFreqNorms)
+ .map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
+ .mapToInt(Collection::size)
+ .sum()
+ == 0;
+ initialized = true;
+ }
+ }
+
+ /** Sets the values for the current skip data. */
+ public void bufferSkip(
+ int doc,
+ CompetitiveImpactAccumulator competitiveFreqNorms,
+ int numDocs,
+ long posFP,
+ long payFP,
+ int posBufferUpto,
+ int payloadByteUpto)
+ throws IOException {
+ initSkip();
+ this.curDoc = doc;
+ this.curDocPointer = docOut.getFilePointer();
+ this.curPosPointer = posFP;
+ this.curPayPointer = payFP;
+ this.curPosBufferUpto = posBufferUpto;
+ this.curPayloadByteUpto = payloadByteUpto;
+ this.curCompetitiveFreqNorms[0].addAll(competitiveFreqNorms);
+ bufferSkip(numDocs);
+ }
+
+ private final ByteBuffersDataOutput freqNormOut = ByteBuffersDataOutput.newResettableInstance();
+
+ @Override
+ protected void writeSkipData(int level, DataOutput skipBuffer) throws IOException {
+
+ int delta = curDoc - lastSkipDoc[level];
+
+ skipBuffer.writeVInt(delta);
+ lastSkipDoc[level] = curDoc;
+
+ skipBuffer.writeVLong(curDocPointer - lastSkipDocPointer[level]);
+ lastSkipDocPointer[level] = curDocPointer;
+
+ if (fieldHasPositions) {
+
+ skipBuffer.writeVLong(curPosPointer - lastSkipPosPointer[level]);
+ lastSkipPosPointer[level] = curPosPointer;
+ skipBuffer.writeVInt(curPosBufferUpto);
+
+ if (fieldHasPayloads) {
+ skipBuffer.writeVInt(curPayloadByteUpto);
+ }
+
+ if (fieldHasOffsets || fieldHasPayloads) {
+ skipBuffer.writeVLong(curPayPointer - lastSkipPayPointer[level]);
+ lastSkipPayPointer[level] = curPayPointer;
+ }
+ }
+
+ CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
+ assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
+ if (level + 1 < numberOfSkipLevels) {
+ curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
+ }
+ writeImpacts(competitiveFreqNorms, freqNormOut);
+ skipBuffer.writeVInt(Math.toIntExact(freqNormOut.size()));
+ freqNormOut.copyTo(skipBuffer);
+ freqNormOut.reset();
+ competitiveFreqNorms.clear();
+ }
+
+ public static void writeImpacts(CompetitiveImpactAccumulator acc, DataOutput out)
+ throws IOException {
+ Collection Aside: Instead of being constant this could depend among others on {@link
- * Lucene90PostingsFormat#BLOCK_SIZE}, {@link TermsEnum#docFreq()}, {@link
+ * Lucene99PostingsFormat#BLOCK_SIZE}, {@link TermsEnum#docFreq()}, {@link
* TermsEnum#totalTermFreq()}, {@link DocIdSetIterator#cost()} (expected number of matching docs),
* {@link LeafReader#maxDoc()} (total number of docs in the segment), and the seek time and block
* size of the device storing the index.
@@ -413,7 +413,7 @@ public boolean equals(Object obj) {
/**
* Number of simple operations in {@link
- * Lucene90PostingsReader.BlockImpactsPostingsEnum#nextPosition()} when no seek or buffer refill
+ * Lucene99PostingsReader.BlockImpactsPostingsEnum#nextPosition()} when no seek or buffer refill
* is done.
*/
private static final int TERM_OPS_PER_POS = 7;
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
index 15681e8a2e54..0fd87daff7d9 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat
+org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestForDeltaUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestForDeltaUtil.java
new file mode 100644
index 000000000000..29448ff1e41a
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestForDeltaUtil.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene99;
+
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.packed.PackedInts;
+
+public class TestForDeltaUtil extends LuceneTestCase {
+
+ public void testEncodeDecode() throws IOException {
+ final int iterations = RandomNumbers.randomIntBetween(random(), 50, 1000);
+ final int[] values = new int[iterations * ForUtil.BLOCK_SIZE];
+
+ for (int i = 0; i < iterations; ++i) {
+ final int bpv = TestUtil.nextInt(random(), 1, 31 - 7);
+ for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
+ values[i * ForUtil.BLOCK_SIZE + j] =
+ RandomNumbers.randomIntBetween(random(), 1, (int) PackedInts.maxValue(bpv));
+ }
+ }
+
+ final Directory d = new ByteBuffersDirectory();
+ final long endPointer;
+
+ {
+ // encode
+ IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
+ final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(new ForUtil());
+
+ for (int i = 0; i < iterations; ++i) {
+ long[] source = new long[ForUtil.BLOCK_SIZE];
+ for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
+ source[j] = values[i * ForUtil.BLOCK_SIZE + j];
+ }
+ forDeltaUtil.encodeDeltas(source, out);
+ }
+ endPointer = out.getFilePointer();
+ out.close();
+ }
+
+ {
+ // decode
+ IndexInput in = d.openInput("test.bin", IOContext.READONCE);
+ final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(new ForUtil());
+ for (int i = 0; i < iterations; ++i) {
+ long base = 0;
+ final long[] restored = new long[ForUtil.BLOCK_SIZE];
+ forDeltaUtil.decodeAndPrefixSum(in, base, restored);
+ final long[] expected = new long[ForUtil.BLOCK_SIZE];
+ for (int j = 0; j < ForUtil.BLOCK_SIZE; ++j) {
+ expected[j] = values[i * ForUtil.BLOCK_SIZE + j];
+ if (j > 0) {
+ expected[j] += expected[j - 1];
+ } else {
+ expected[j] += base;
+ }
+ }
+ assertArrayEquals(Arrays.toString(restored), expected, restored);
+ }
+ assertEquals(endPointer, in.getFilePointer());
+ in.close();
+ }
+
+ d.close();
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestForUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestForUtil.java
similarity index 98%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestForUtil.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestForUtil.java
index 363226423ee2..2f179c743afb 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestForUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestForUtil.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene90;
+package org.apache.lucene.codecs.lucene99;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90PostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99PostingsFormat.java
similarity index 91%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90PostingsFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99PostingsFormat.java
index 90a9ebe5369b..99c0e0a6ae28 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90PostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99PostingsFormat.java
@@ -14,7 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene90;
+package org.apache.lucene.codecs.lucene99;
+
+import static org.apache.lucene.codecs.lucene99.Lucene99ScoreSkipReader.readImpacts;
import java.io.IOException;
import java.util.Arrays;
@@ -22,9 +24,9 @@
import java.util.List;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
-import org.apache.lucene.codecs.lucene90.Lucene90ScoreSkipReader.MutableImpactList;
import org.apache.lucene.codecs.lucene90.blocktree.FieldReader;
import org.apache.lucene.codecs.lucene90.blocktree.Stats;
+import org.apache.lucene.codecs.lucene99.Lucene99ScoreSkipReader.MutableImpactList;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
@@ -40,8 +42,8 @@
import org.apache.lucene.tests.index.BasePostingsFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
-public class TestLucene90PostingsFormat extends BasePostingsFormatTestCase {
- private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene90PostingsFormat());
+public class TestLucene99PostingsFormat extends BasePostingsFormatTestCase {
+ private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene99PostingsFormat());
@Override
protected Codec getCodec() {
@@ -77,7 +79,7 @@ private void shouldFail(int minItemsInBlock, int maxItemsInBlock) {
expectThrows(
IllegalArgumentException.class,
() -> {
- new Lucene90PostingsFormat(minItemsInBlock, maxItemsInBlock);
+ new Lucene99PostingsFormat(minItemsInBlock, maxItemsInBlock);
});
}
@@ -131,13 +133,12 @@ private void doTestImpactSerialization(List Aside: Instead of being constant this could depend among others on {@link
- * Lucene90PostingsFormat#BLOCK_SIZE}, {@link TermsEnum#docFreq()}, {@link
+ * Lucene99PostingsFormat#BLOCK_SIZE}, {@link TermsEnum#docFreq()}, {@link
* TermsEnum#totalTermFreq()}, {@link DocIdSetIterator#cost()} (expected number of matching docs),
* {@link LeafReader#maxDoc()} (total number of docs in the segment), and the seek time and block
* size of the device storing the index.
@@ -272,7 +272,7 @@ public void visit(String field, QueryVisitor visitor) {
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
/**
- * Number of simple operations in {@link Lucene90PostingsReader.EverythingEnum#nextPosition()}
+ * Number of simple operations in {@link Lucene99PostingsReader.EverythingEnum#nextPosition()}
* when no seek or buffer refill is done.
*/
private static final int TERM_OPS_PER_POS = 7;
diff --git a/lucene/suggest/src/java/module-info.java b/lucene/suggest/src/java/module-info.java
index 4444a9dd2f11..673fa3912138 100644
--- a/lucene/suggest/src/java/module-info.java
+++ b/lucene/suggest/src/java/module-info.java
@@ -30,7 +30,8 @@
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.search.suggest.document.Completion50PostingsFormat,
org.apache.lucene.search.suggest.document.Completion84PostingsFormat,
- org.apache.lucene.search.suggest.document.Completion90PostingsFormat;
+ org.apache.lucene.search.suggest.document.Completion90PostingsFormat,
+ org.apache.lucene.search.suggest.document.Completion99PostingsFormat;
provides org.apache.lucene.analysis.TokenFilterFactory with
org.apache.lucene.search.suggest.analyzing.SuggestStopFilterFactory;
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion50PostingsFormat.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion50PostingsFormat.java
index 02536cc7f26b..e53e991db229 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion50PostingsFormat.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion50PostingsFormat.java
@@ -22,7 +22,7 @@
* {@link org.apache.lucene.search.suggest.document.CompletionPostingsFormat} for {@code
* org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat}. This format is only used for
* backward-compatibility of the index format and cannot be used to write data, use {@link
- * Completion90PostingsFormat} on new indices.
+ * Completion99PostingsFormat} on new indices.
*
* @lucene.experimental
*/
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion84PostingsFormat.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion84PostingsFormat.java
index 8551fc3a80ad..76e298118348 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion84PostingsFormat.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion84PostingsFormat.java
@@ -22,7 +22,7 @@
* {@link org.apache.lucene.search.suggest.document.CompletionPostingsFormat} for {@code
* org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat}. This format is only used for
* backward-compatibility of the index format and cannot be used to write data, use {@link
- * Completion90PostingsFormat} on new indices.
+ * Completion99PostingsFormat} on new indices.
*
* @lucene.experimental
*/
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion90PostingsFormat.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion90PostingsFormat.java
index cf2a9103aa7d..db45303bcd74 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion90PostingsFormat.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion90PostingsFormat.java
@@ -19,8 +19,10 @@
import org.apache.lucene.codecs.PostingsFormat;
/**
- * {@link CompletionPostingsFormat} for {@link
- * org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat}
+ * {@link org.apache.lucene.search.suggest.document.CompletionPostingsFormat} for {@code
+ * org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat}. This format is only used for
+ * backward-compatibility of the index format and cannot be used to write data, use {@link
+ * Completion90PostingsFormat} on new indices.
*
* @lucene.experimental
*/
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion99PostingsFormat.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion99PostingsFormat.java
new file mode 100644
index 000000000000..4449d754e290
--- /dev/null
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion99PostingsFormat.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.suggest.document;
+
+import org.apache.lucene.codecs.PostingsFormat;
+
+/**
+ * {@link CompletionPostingsFormat} for {@link
+ * org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat}
+ *
+ * @lucene.experimental
+ */
+public class Completion99PostingsFormat extends CompletionPostingsFormat {
+ /** Creates a {@link Completion99PostingsFormat} that will load the completion FST on-heap. */
+ public Completion99PostingsFormat() {
+ this(FSTLoadMode.ON_HEAP);
+ }
+
+ /**
+ * Creates a {@link Completion99PostingsFormat} that will use the provided
+ * 0 1 2 3 4 5
+ * d d d d d d (posting list)
+ * ^ ^ (skip point in MultiLeveSkipWriter)
+ * ^ (skip point in Lucene99SkipWriter)
+ *
+ *
+ *
+ * | block#0 | | block#1 | |vInts|
+ * d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ * ^ ^ (level 0 skip point)
+ *
+ *
+ * The stored fields for documents
*
*
- *
* {@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Dictionary}
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat Term Dictionary}
* .tim
* The term dictionary, stores term info
*
- *
* {@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Index}
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat Term Index}
* .tip
* The index into the Term Dictionary
*
- *
* {@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Frequencies}
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat Frequencies}
* .doc
* Contains the list of docs which contain each term along with frequency
*
- *
* {@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Positions}
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat Positions}
* .pos
* Stores position information about where a term occurs in the index
*
- *
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index 853e8f9b0c72..ea9df1934138 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -21,8 +21,8 @@
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
-import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
-import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader;
+import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99PostingsReader;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@@ -401,10 +401,10 @@ public boolean equals(Object obj) {
/**
* A guess of the average number of simple operations for the initial seek and buffer refill per
* document for the positions of a term. See also {@link
- * Lucene90PostingsReader.BlockImpactsPostingsEnum#nextPosition()}.
+ * Lucene99PostingsReader.BlockImpactsPostingsEnum#nextPosition()}.
*
* {@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Payloads}
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat Payloads}
* .pay
* Stores additional per-position metadata information such as character offsets and user payloads
* fstLoadMode
+ *
to determine if the completion FST should be loaded on or off heap.
+ */
+ public Completion99PostingsFormat(FSTLoadMode fstLoadMode) {
+ super("Completion99", fstLoadMode);
+ }
+
+ @Override
+ protected PostingsFormat delegatePostingsFormat() {
+ return PostingsFormat.forName("Lucene99");
+ }
+}
diff --git a/lucene/suggest/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/suggest/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
index a40704710475..81d1ed9465d7 100644
--- a/lucene/suggest/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/suggest/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@@ -33,3 +33,4 @@
org.apache.lucene.search.suggest.document.Completion50PostingsFormat
org.apache.lucene.search.suggest.document.Completion84PostingsFormat
org.apache.lucene.search.suggest.document.Completion90PostingsFormat
+org.apache.lucene.search.suggest.document.Completion99PostingsFormat
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index 682363f29cb6..30201a8eac80 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -965,7 +965,7 @@ static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set