Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] handle new model metadata stream from native process #59725

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ public final class InferenceIndexConstants {
* version: 7.8.0:
* - adds inference_config definition to trained model config
*
* version: 7.10.0: 000003
* - adds trained_model_metadata object
*/
public static final String INDEX_VERSION = "000002";
public static final String INDEX_VERSION = "000003";
public static final String INDEX_NAME_PREFIX = ".ml-inference-";
public static final String INDEX_PATTERN = INDEX_NAME_PREFIX + "*";
public static final String LATEST_INDEX_NAME = INDEX_NAME_PREFIX + INDEX_VERSION;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.core.ml.inference.trainedmodel.metadata;

import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

public class TotalFeatureImportance implements ToXContentObject, Writeable {

private static final String NAME = "total_feature_importance";
public static final ParseField FIELD_NAME = new ParseField("field_name");
public static final ParseField IMPORTANCE = new ParseField("importance");
public static final ParseField CLASS_IMPORTANCE = new ParseField("class_importance");

// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
public static final ConstructingObjectParser<TotalFeatureImportance, Void> LENIENT_PARSER = createParser(true);
public static final ConstructingObjectParser<TotalFeatureImportance, Void> STRICT_PARSER = createParser(false);

private static ConstructingObjectParser<TotalFeatureImportance, Void> createParser(boolean ignoreUnknownFields) {
ConstructingObjectParser<TotalFeatureImportance, Void> parser = new ConstructingObjectParser<>(NAME,
ignoreUnknownFields,
a -> new TotalFeatureImportance((String)a[0], (Double)a[1], (List<ClassImportance>)a[2]));
parser.declareString(ConstructingObjectParser.constructorArg(), FIELD_NAME);
parser.declareDouble(ConstructingObjectParser.constructorArg(), IMPORTANCE);
parser.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(),
ignoreUnknownFields ? ClassImportance.LENIENT_PARSER : ClassImportance.STRICT_PARSER,
CLASS_IMPORTANCE);
return parser;
}

public static TotalFeatureImportance fromXContent(XContentParser parser, boolean lenient) throws IOException {
return lenient ? LENIENT_PARSER.parse(parser, null) : STRICT_PARSER.parse(parser, null);
}

public final String fieldName;
public final double importance;
public final List<ClassImportance> classImportances;

public TotalFeatureImportance(StreamInput in) throws IOException {
this.fieldName = in.readString();
this.importance = in.readDouble();
this.classImportances = in.readList(ClassImportance::new);
}

TotalFeatureImportance(String fieldName, double importance, @Nullable List<ClassImportance> classImportances) {
this.fieldName = fieldName;
this.importance = importance;
this.classImportances = classImportances == null ? Collections.emptyList() : classImportances;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(fieldName);
out.writeDouble(importance);
out.writeList(classImportances);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(FIELD_NAME.getPreferredName(), fieldName);
builder.field(IMPORTANCE.getPreferredName(), importance);
if (classImportances.isEmpty() == false) {
builder.field(CLASS_IMPORTANCE.getPreferredName(), classImportances);
}
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TotalFeatureImportance that = (TotalFeatureImportance) o;
return Double.compare(that.importance, importance) == 0
&& Objects.equals(fieldName, that.fieldName)
&& Objects.equals(classImportances, that.classImportances);
}

@Override
public int hashCode() {
return Objects.hash(fieldName, importance, classImportances);
}

public static class ClassImportance implements ToXContentObject, Writeable {
private static final String NAME = "total_class_importance";

public static final ParseField CLASS_NAME = new ParseField("class_name");
public static final ParseField IMPORTANCE = new ParseField("importance");

// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
public static final ConstructingObjectParser<ClassImportance, Void> LENIENT_PARSER = createParser(true);
public static final ConstructingObjectParser<ClassImportance, Void> STRICT_PARSER = createParser(false);

private static ConstructingObjectParser<ClassImportance, Void> createParser(boolean ignoreUnknownFields) {
ConstructingObjectParser<ClassImportance, Void> parser = new ConstructingObjectParser<>(NAME,
ignoreUnknownFields,
a -> new ClassImportance((String)a[0], (Double)a[1]));
parser.declareString(ConstructingObjectParser.constructorArg(), CLASS_NAME);
parser.declareDouble(ConstructingObjectParser.constructorArg(), IMPORTANCE);
return parser;
}

public static ClassImportance fromXContent(XContentParser parser, boolean lenient) throws IOException {
return lenient ? LENIENT_PARSER.parse(parser, null) : STRICT_PARSER.parse(parser, null);
}

public final String className;
public final double importance;

public ClassImportance(StreamInput in) throws IOException {
this.className = in.readString();
this.importance = in.readDouble();
}

ClassImportance(String className, double importance) {
this.className = className;
this.importance = importance;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(className);
out.writeDouble(importance);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(CLASS_NAME.getPreferredName(), className);
builder.field(IMPORTANCE.getPreferredName(), importance);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ClassImportance that = (ClassImportance) o;
return Double.compare(that.importance, importance) == 0 &&
Objects.equals(className, that.className);
}

@Override
public int hashCode() {
return Objects.hash(className, importance);
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.core.ml.inference.trainedmodel.metadata;

import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.core.ml.inference.persistence.InferenceIndexConstants;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
import org.elasticsearch.xpack.core.ml.utils.ToXContentParams;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

public class TrainedModelMetadata implements ToXContentObject, Writeable {

public static final String NAME = "trained_model_metadata";
public static final ParseField TOTAL_FEATURE_IMPORTANCE = new ParseField("total_feature_importance");
public static final ParseField MODEL_ID = new ParseField("model_id");

// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
public static final ConstructingObjectParser<TrainedModelMetadata, Void> LENIENT_PARSER = createParser(true);
public static final ConstructingObjectParser<TrainedModelMetadata, Void> STRICT_PARSER = createParser(false);

@SuppressWarnings("unchecked")
private static ConstructingObjectParser<TrainedModelMetadata, Void> createParser(boolean ignoreUnknownFields) {
ConstructingObjectParser<TrainedModelMetadata, Void> parser = new ConstructingObjectParser<>(NAME,
ignoreUnknownFields,
a -> new TrainedModelMetadata((String)a[0], (List<TotalFeatureImportance>)a[1]));
parser.declareString(ConstructingObjectParser.constructorArg(), MODEL_ID);
parser.declareObjectArray(ConstructingObjectParser.constructorArg(),
ignoreUnknownFields ? TotalFeatureImportance.LENIENT_PARSER : TotalFeatureImportance.STRICT_PARSER,
TOTAL_FEATURE_IMPORTANCE);
return parser;
}

public static TrainedModelMetadata fromXContent(XContentParser parser, boolean lenient) throws IOException {
return lenient ? LENIENT_PARSER.parse(parser, null) : STRICT_PARSER.parse(parser, null);
}

public static String docId(String modelId) {
return NAME + "-" + modelId;
}

private final List<TotalFeatureImportance> totalFeatureImportances;
private final String modelId;

public TrainedModelMetadata(StreamInput in) throws IOException {
this.modelId = in.readString();
this.totalFeatureImportances = in.readList(TotalFeatureImportance::new);
}

public TrainedModelMetadata(String modelId, List<TotalFeatureImportance> totalFeatureImportances) {
this.modelId = ExceptionsHelper.requireNonNull(modelId, MODEL_ID);
this.totalFeatureImportances = Collections.unmodifiableList(totalFeatureImportances);
}

public String getModelId() {
return modelId;
}

public String getDocId() {
return docId(modelId);
}

public List<TotalFeatureImportance> getTotalFeatureImportances() {
return totalFeatureImportances;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TrainedModelMetadata that = (TrainedModelMetadata) o;
return Objects.equals(totalFeatureImportances, that.totalFeatureImportances) &&
Objects.equals(modelId, that.modelId);
}

@Override
public int hashCode() {
return Objects.hash(totalFeatureImportances, modelId);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(modelId);
out.writeList(totalFeatureImportances);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (params.paramAsBoolean(ToXContentParams.FOR_INTERNAL_STORAGE, false)) {
builder.field(InferenceIndexConstants.DOC_TYPE.getPreferredName(), NAME);
}
builder.field(MODEL_ID.getPreferredName(), modelId);
builder.field(TOTAL_FEATURE_IMPORTANCE.getPreferredName(), totalFeatureImportances);
builder.endObject();
return builder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,15 @@ public final class Messages {

public static final String INFERENCE_TRAINED_MODEL_EXISTS = "Trained machine learning model [{0}] already exists";
public static final String INFERENCE_TRAINED_MODEL_DOC_EXISTS = "Trained machine learning model chunked doc [{0}][{1}] already exists";
public static final String INFERENCE_TRAINED_MODEL_METADATA_EXISTS = "Trained machine learning model metadata [{0}] already exists";
public static final String INFERENCE_FAILED_TO_STORE_MODEL = "Failed to store trained machine learning model [{0}]";
public static final String INFERENCE_FAILED_TO_STORE_MODEL_METADATA = "Failed to store trained machine learning model metadata [{0}]";
public static final String INFERENCE_NOT_FOUND = "Could not find trained model [{0}]";
public static final String INFERENCE_NOT_FOUND_MULTIPLE = "Could not find trained models {0}";
public static final String INFERENCE_CONFIG_NOT_SUPPORTED_ON_VERSION =
"Configuration [{0}] requires minimum node version [{1}] (current minimum node version [{2}]";
public static final String MODEL_DEFINITION_NOT_FOUND = "Could not find trained model definition [{0}]";
public static final String MODEL_METADATA_NOT_FOUND = "Could not find trained model metadata [{0}]";
public static final String INFERENCE_CANNOT_DELETE_MODEL =
"Unable to delete model [{0}]";
public static final String MODEL_DEFINITION_TRUNCATED =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"order" : 0,
"version" : ${xpack.ml.version.id},
"index_patterns" : [
".ml-inference-000002"
".ml-inference-000003"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these templates automatically installed when any new node comes online? Or does the new node have to be a master node?

@dimitris-athanasiou do you know?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aside of the question, I believe the index pattern here should be .ml-inference-* and we shouldn't have to change this each time.

Back to the question, all nodes get the templates installed regardless of whether they're master or not.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dimitris-athanasiou I am not sure. What if an older node has a specific mapping in mind and tries to write to the index?

A new index would be created with the new mapping (if it didn't already exist) and the new node does not have code to support the new mapping.

Keeping it versioned allows only NEW nodes to reference the new index mapping.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, it makes sense.

],
"settings" : {
"index" : {
Expand Down Expand Up @@ -70,6 +70,30 @@
},
"inference_config": {
"enabled": false
},
"total_feature_importance": {
"type": "nested",
"dynamic": "false",
"properties": {
"importance": {
"type": "double"
},
"field_name": {
"type": "keyword"
},
"class_importance": {
"type": "nested",
"dynamic": "false",
"properties": {
"importance": {
"type": "double"
},
"class_name": {
"type": "keyword"
}
}
}
}
}
}
}
Expand Down
Loading