Skip to content

Commit

Permalink
Star Tree File Formats (#14809)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
(cherry picked from commit 6bae704)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] committed Sep 2, 2024
1 parent 2a2fcbd commit a9eae9d
Show file tree
Hide file tree
Showing 38 changed files with 3,479 additions and 823 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.index.codec.composite;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat {
private final DocValuesFormat delegate;
private final MapperService mapperService;

/** Data codec name for Composite Doc Values Format */
public static final String DATA_CODEC_NAME = "Composite99FormatData";

/** Meta codec name for Composite Doc Values Format */
public static final String META_CODEC_NAME = "Composite99FormatMeta";

/** Filename extension for the composite index data */
public static final String DATA_EXTENSION = "cid";

/** Filename extension for the composite index meta */
public static final String META_EXTENSION = "cim";

/** Data doc values codec name for Composite Doc Values Format */
public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData";

/** Meta doc values codec name for Composite Doc Values Format */
public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata";

/** Filename extension for the composite index data doc values */
public static final String DATA_DOC_VALUES_EXTENSION = "cidvd";

/** Filename extension for the composite index meta doc values */
public static final String META_DOC_VALUES_EXTENSION = "cidvm";

/** Initial version for the Composite90DocValuesFormat */
public static final int VERSION_START = 0;

/** Current version for the Composite90DocValuesFormat */
public static final int VERSION_CURRENT = VERSION_START;

// needed for SPI
public Composite99DocValuesFormat() {
this(new Lucene90DocValuesFormat(), null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,47 @@

package org.opensearch.index.codec.composite.composite99;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.CompositeIndexValues;
import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory;
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
import org.opensearch.index.compositeindex.datacube.Metric;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER;
import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfoList;

/**
* Reader for star tree index and star tree doc values from the segments
Expand All @@ -32,11 +57,158 @@
*/
@ExperimentalApi
public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader {
private DocValuesProducer delegate;
private static final Logger logger = LogManager.getLogger(Composite99DocValuesReader.class);

private final DocValuesProducer delegate;
private IndexInput dataIn;
private ChecksumIndexInput metaIn;
private final Map<String, IndexInput> compositeIndexInputMap = new LinkedHashMap<>();
private final Map<String, CompositeIndexMetadata> compositeIndexMetadataMap = new LinkedHashMap<>();
private final List<String> fields;
private DocValuesProducer compositeDocValuesProducer;
private final List<CompositeIndexFieldInfo> compositeFieldInfos = new ArrayList<>();
private SegmentReadState readState;

public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException {
public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException {
this.delegate = producer;
// TODO : read star tree files
this.fields = new ArrayList<>();

String metaFileName = IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Composite99DocValuesFormat.META_EXTENSION
);

String dataFileName = IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Composite99DocValuesFormat.DATA_EXTENSION
);

boolean success = false;
try {

// initialize meta input
dataIn = readState.directory.openInput(dataFileName, readState.context);
CodecUtil.checkIndexHeader(
dataIn,
Composite99DocValuesFormat.DATA_CODEC_NAME,
Composite99DocValuesFormat.VERSION_START,
Composite99DocValuesFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix
);

// initialize data input
metaIn = readState.directory.openChecksumInput(metaFileName, readState.context);
Throwable priorE = null;
try {
CodecUtil.checkIndexHeader(
metaIn,
Composite99DocValuesFormat.META_CODEC_NAME,
Composite99DocValuesFormat.VERSION_START,
Composite99DocValuesFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix
);

while (true) {

// validate magic marker
long magicMarker = metaIn.readLong();
if (magicMarker == -1) {
break;
} else if (magicMarker < 0) {
throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn);

Check warning on line 122 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L122

Added line #L122 was not covered by tests
} else if (COMPOSITE_FIELD_MARKER != magicMarker) {
logger.error("Invalid composite field magic marker");
throw new IOException("Invalid composite field magic marker");

Check warning on line 125 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L124-L125

Added lines #L124 - L125 were not covered by tests
}

int version = metaIn.readVInt();
if (VERSION_CURRENT != version) {
logger.error("Invalid composite field version");
throw new IOException("Invalid composite field version");

Check warning on line 131 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L130-L131

Added lines #L130 - L131 were not covered by tests
}

// construct composite index metadata
String compositeFieldName = metaIn.readString();
CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName(
metaIn.readString()
);

switch (compositeFieldType) {
case STAR_TREE:
StarTreeMetadata starTreeMetadata = new StarTreeMetadata(
metaIn,
compositeFieldName,
compositeFieldType,
version
);
compositeFieldInfos.add(new CompositeIndexFieldInfo(compositeFieldName, compositeFieldType));

IndexInput starTreeIndexInput = dataIn.slice(
"star-tree data slice for respective star-tree fields",
starTreeMetadata.getDataStartFilePointer(),
starTreeMetadata.getDataLength()
);
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getMetrics()) {
fields.add(
fullyQualifiedFieldNameForStarTreeMetricsDocValues(
compositeFieldName,
metric.getField(),
metricStat.getTypeName()
)
);

}
}

break;
default:
throw new CorruptIndexException("Invalid composite field type found in the file", dataIn);

Check warning on line 181 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L181

Added line #L181 was not covered by tests
}
}

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);

// initialize star-tree doc values producer

compositeDocValuesProducer = LuceneDocValuesProducerFactory.getDocValuesProducerForCompositeCodec(
Composite99Codec.COMPOSITE_INDEX_CODEC_NAME,
this.readState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION,
Composite99DocValuesFormat.META_DOC_VALUES_CODEC,
Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION
);

} catch (Throwable t) {
priorE = t;

Check warning on line 202 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L201-L202

Added lines #L201 - L202 were not covered by tests
} finally {
CodecUtil.checkFooter(metaIn, priorE);
}
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);

Check warning on line 209 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L209

Added line #L209 was not covered by tests
}
}
}

@Override
Expand Down Expand Up @@ -67,24 +239,63 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
@Override
public void checkIntegrity() throws IOException {
delegate.checkIntegrity();
// Todo : check integrity of composite index related [star tree] files
CodecUtil.checksumEntireFile(dataIn);
}

@Override
public void close() throws IOException {
delegate.close();
// Todo: close composite index related files [star tree] files
boolean success = false;
try {
IOUtils.close(metaIn, dataIn);
IOUtils.close(compositeDocValuesProducer);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(metaIn, dataIn);

Check warning on line 255 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L255

Added line #L255 was not covered by tests
}
compositeIndexInputMap.clear();
compositeIndexMetadataMap.clear();
fields.clear();
metaIn = null;
dataIn = null;
}
}

@Override
public List<CompositeIndexFieldInfo> getCompositeIndexFields() {
// todo : read from file formats and get the field names.
return new ArrayList<>();
return compositeFieldInfos;
}

@Override
public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException {
// TODO : read compositeIndexValues [starTreeValues] from star tree files
throw new UnsupportedOperationException();

switch (compositeIndexFieldInfo.getType()) {
case STAR_TREE:
return new StarTreeValues(
compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()),
compositeIndexInputMap.get(compositeIndexFieldInfo.getField()),
compositeDocValuesProducer,
this.readState
);

default:
throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName());

Check warning on line 283 in server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java#L283

Added line #L283 was not covered by tests
}

}

/**
* Returns the sorted numeric doc values for the given sorted numeric field.
* If the sorted numeric field is null, it returns an empty doc id set iterator.
* <p>
* Sorted numeric field can be null for cases where the segment doesn't hold a particular value.
*
* @param sortedNumeric the sorted numeric doc values for a field
* @return empty sorted numeric values if the field is not present, else sortedNumeric
*/
public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) {
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

}
Loading

0 comments on commit a9eae9d

Please sign in to comment.