Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCRIPTING: Move Aggregation Scripts to their own context #32068

Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2ee4b46
SCRIPTING: Move Aggregation Scripts to their own context
original-brownbear Jul 15, 2018
5088757
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Jul 18, 2018
f924c07
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Jul 18, 2018
de9b238
CR: Make returns typed
original-brownbear Jul 18, 2018
2ac53ab
CR: Make returns typed
original-brownbear Jul 18, 2018
0386ec8
CR: Make returns typed
original-brownbear Jul 18, 2018
574780d
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Jul 18, 2018
63963f6
CR: Make returns typed
original-brownbear Jul 18, 2018
ba8fd94
CR: Make returns typed
original-brownbear Jul 18, 2018
678d388
Merge branch 'master' into replace-agg-script-context
original-brownbear Jul 19, 2018
145e11f
Renamings from CR
original-brownbear Jul 19, 2018
05e6559
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Jul 21, 2018
bc27466
CR: Make BucketSelectorPipelineAggregator agnositc to lang
original-brownbear Jul 21, 2018
0f13633
CR: Make BucketSelectorPipelineAggregator agnositic to language
original-brownbear Jul 21, 2018
a1bd389
CR: Add separate context for scriptheuristic
original-brownbear Jul 21, 2018
af67738
CR: Add separate context for scriptheuristic
original-brownbear Jul 21, 2018
b0a4487
CR: Add separate context for scriptheuristic
original-brownbear Jul 21, 2018
2662c87
CR: Add separate context for scriptheuristic
original-brownbear Jul 21, 2018
f31660c
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Jul 24, 2018
d506e97
CR: Move params to script field
original-brownbear Jul 25, 2018
5a55338
CR: Move params to script field
original-brownbear Jul 25, 2018
956480e
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Jul 25, 2018
b095f3d
CR: Move params to script field
original-brownbear Jul 25, 2018
5bf4573
Merge remote-tracking branch 'elastic/master' into replace-agg-script…
original-brownbear Aug 3, 2018
6509e85
CR: Rename heuristic score script
original-brownbear Aug 4, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.BucketAggregationScript;
import org.elasticsearch.script.BucketAggregationSelectorScript;
import org.elasticsearch.script.ClassPermission;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.FilterScript;
Expand All @@ -54,6 +56,7 @@
import java.security.PrivilegedAction;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -112,6 +115,20 @@ protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundE
} else if (context.instanceClazz.equals(ExecutableScript.class)) {
ExecutableScript.Factory factory = (p) -> new ExpressionExecutableScript(expr, p);
return context.factoryClazz.cast(factory);
} else if (context.instanceClazz.equals(BucketAggregationScript.class)) {
return context.factoryClazz.cast(newBucketAggregationScriptFactory(expr));
} else if (context.instanceClazz.equals(BucketAggregationSelectorScript.class)) {
BucketAggregationScript.Factory factory = newBucketAggregationScriptFactory(expr);
BucketAggregationSelectorScript.Factory wrappedFactory = () -> {
BucketAggregationScript script = factory.newInstance();
return new BucketAggregationSelectorScript() {
@Override
public boolean execute(Map<String, Object> params) {
return script.execute(params) == 1.0;
}
};
};
return context.factoryClazz.cast(wrappedFactory);
} else if (context.instanceClazz.equals(FilterScript.class)) {
FilterScript.Factory factory = (p, lookup) -> newFilterScript(expr, lookup, p);
return context.factoryClazz.cast(factory);
Expand All @@ -122,6 +139,37 @@ protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundE
throw new IllegalArgumentException("expression engine does not know how to handle script context [" + context.name + "]");
}

private static BucketAggregationScript.Factory newBucketAggregationScriptFactory(Expression expr) {
return () -> {
ReplaceableConstDoubleValues[] functionValuesArray =
new ReplaceableConstDoubleValues[expr.variables.length];
Map<String, ReplaceableConstDoubleValues> functionValuesMap = new HashMap<>();
for (int i = 0; i < expr.variables.length; ++i) {
functionValuesArray[i] = new ReplaceableConstDoubleValues();
functionValuesMap.put(expr.variables[i], functionValuesArray[i]);
}
return new BucketAggregationScript() {
@Override
public double execute(Map<String, Object> params) {
params.forEach((name, value) -> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In expressions, we want these types of decisions made up front, not at execution time. Take a look at how params are passed to the factory in other script examples, and exposed as getParams() on the script class. We want to avoid costly things like hash lookups per document for something that won't change (eg the params that exist).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjernst but these params aren't really params like in the other scripts. They aren't constant but change in a loop here https://github.com/elastic/elasticsearch/pull/32068/files#diff-15179b62bf7bf2f829791af279ea380aR90.

Previously those were part of a params field passed to the factory which made the script stateful and I guess led to this todo https://github.com/elastic/elasticsearch/pull/32068/files#diff-15179b62bf7bf2f829791af279ea380aL98 <= that I resolved here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, that isn't really resolving that comment. While only one script object is created with your change, the performance is the same because we are still doing the binding at execution time for each document. I would rather keep that original code with that comment, deprecate passing things in via params (by adding the parameters directly as arguments), and then fix the TODO in master.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjernst ah now I see. It still looks to me like the change here get's us to a better spot for existing scripts that still use the params here since:

  • We're saving the whole setting up of the array and hash map for function values in org.elasticsearch.script.expression.ExpressionExecutableScript#ExpressionExecutableScript
  • We get rid of the state in the script and the instantiating the script multiple times

Wouldn't it make more sense to keep this to improve the backwards compatible case and add a todo to avoid the redundant binding for the unchanged vars (and creating the map over and over) instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or put differently :)
If we don't move to passing the params as the execute argument, then we'll be forced to recreate the script over so long as we support passing variables to scripts via params won't we? => better move to more efficiently updating this map (and avoiding looping over it in expressions ideally) than adding the params field back and being forced to recreate the script?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then again, the correct solution will have params in this still -> will revert to using params as on the script tomorrow :)

ReplaceableConstDoubleValues placeholder = functionValuesMap.get(name);
if (placeholder == null) {
throw new IllegalArgumentException("Error using " + expr + ". " +
"The variable [" + name + "] does not exist in the executable expressions script.");
} else if (value instanceof Number == false) {
throw new IllegalArgumentException("Error using " + expr + ". " +
"Executable expressions scripts can only process numbers." +
" The variable [" + name + "] is not a number.");
} else {
placeholder.setValue(((Number) value).doubleValue());
}
});
return expr.evaluate(functionValuesArray);
}
};
};
}

private SearchScript.LeafFactory newSearchScript(Expression expr, SearchLookup lookup, @Nullable Map<String, Object> vars) {
MapperService mapper = lookup.doc().mapperService();
// NOTE: if we need to do anything complicated with bindings in the future, we can just extend Bindings,
Expand Down Expand Up @@ -267,7 +315,7 @@ public void setDocument(int docid) {
};
};
}

private ScoreScript.LeafFactory newScoreScript(Expression expr, SearchLookup lookup, @Nullable Map<String, Object> vars) {
SearchScript.LeafFactory searchLeafFactory = newSearchScript(expr, lookup, vars);
return new ScoreScript.LeafFactory() {
Expand All @@ -284,17 +332,17 @@ public ScoreScript newInstance(LeafReaderContext ctx) throws IOException {
public double execute() {
return script.runAsDouble();
}

@Override
public void setDocument(int docid) {
script.setDocument(docid);
}

@Override
public void setScorer(Scorer scorer) {
script.setScorer(scorer);
}

@Override
public double get_score() {
return script.getScore();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.script;

import java.util.Map;

/**
* A script used in bucket aggregations that returns a {@code double} value.
*/
public abstract class BucketAggregationScript {

public static final String[] PARAMETERS = { "params" };

public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("bucket_aggregation", Factory.class);

public abstract double execute(Map<String, Object> params);

public interface Factory {
BucketAggregationScript newInstance();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.script;

import java.util.Map;

/**
* A script used in bucket aggregations that returns a {@code boolean} value.
*/
public abstract class BucketAggregationSelectorScript {

public static final String[] PARAMETERS = { "params" };

public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("aggregation_selector", Factory.class);

public abstract boolean execute(Map<String, Object> params);

public interface Factory {
BucketAggregationSelectorScript newInstance();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,5 @@ interface Factory {
ScriptContext<Factory> CONTEXT = new ScriptContext<>("executable", Factory.class);

// TODO: remove these once each has its own script interface
ScriptContext<Factory> AGGS_CONTEXT = new ScriptContext<>("aggs_executable", Factory.class);
ScriptContext<Factory> UPDATE_CONTEXT = new ScriptContext<>("update", Factory.class);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.script;

import java.util.Map;

/**
* A script used in script heuristics.
*/
public abstract class ScriptHeuristicScript {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name is confusing having "script" twice. Can we call it HeuristicScoreScript? Or better yet, something specific to the agg it is used in (significant terms).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SignificantTermsHeuristicScoreScript or too long? :)

Copy link
Member

@rjernst rjernst Aug 3, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's long, but probably better in this case too not have any possible confusion with regular scoring scripts.


public static final String[] PARAMETERS = { "params" };

public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("script_heuristic", Factory.class);

public abstract double execute(Map<String, Object> params);

public interface Factory {
ScriptHeuristicScript newInstance();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ public class ScriptModule {
SearchScript.SCRIPT_SORT_CONTEXT,
SearchScript.TERMS_SET_QUERY_CONTEXT,
ExecutableScript.CONTEXT,
ExecutableScript.AGGS_CONTEXT,
BucketAggregationScript.CONTEXT,
BucketAggregationSelectorScript.CONTEXT,
ScriptHeuristicScript.CONTEXT,
ExecutableScript.UPDATE_CONTEXT,
IngestScript.CONTEXT,
FilterScript.CONTEXT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptHeuristicScript;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

public class ScriptHeuristic extends SignificanceHeuristic {
Expand All @@ -48,19 +50,21 @@ static class ExecutableScriptHeuristic extends ScriptHeuristic {
private final LongAccessor supersetSizeHolder;
private final LongAccessor subsetDfHolder;
private final LongAccessor supersetDfHolder;
private final ExecutableScript executableScript;
private final ScriptHeuristicScript executableScript;
private final Map<String, Object> params = new HashMap<>();

ExecutableScriptHeuristic(Script script, ExecutableScript executableScript){
ExecutableScriptHeuristic(Script script, ScriptHeuristicScript executableScript) {
super(script);
subsetSizeHolder = new LongAccessor();
supersetSizeHolder = new LongAccessor();
subsetDfHolder = new LongAccessor();
supersetDfHolder = new LongAccessor();
this.executableScript = executableScript;
executableScript.setNextVar("_subset_freq", subsetDfHolder);
executableScript.setNextVar("_subset_size", subsetSizeHolder);
executableScript.setNextVar("_superset_freq", supersetDfHolder);
executableScript.setNextVar("_superset_size", supersetSizeHolder);
params.putAll(script.getParams());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be its own context. Putting these into params would be a breaking change, and also not utilize the intent of having contexts (different variables for different uses).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjernst but currently these are documented as params aren't they? See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html#_scripted

Also under the hood org.elasticsearch.painless.ScriptImpl simply puts these under params, these aren't available as top level params are they?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, I was confused because of what I saw in ScriptImpl for painless (naming implying it was in variables for the script, but that is actually the params). I still think this needs to be its own context. We can eventually move these to direct arguments of the execute method (again, so params can be read-only in the future).

Copy link
Member Author

@original-brownbear original-brownbear Jul 21, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjernst Made this a separate context now in a1bd389

Didn't add any further logic for making these direct parameters yet though or did we want to add that here already?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They can be added as direct arguments in a separate PR. There should also be deprecation messages along with that so we can remove inserting them into params.

params.put("_subset_freq", subsetDfHolder);
params.put("_subset_size", subsetSizeHolder);
params.put("_superset_freq", supersetDfHolder);
params.put("_superset_size", supersetSizeHolder);
}

@Override
Expand All @@ -69,7 +73,7 @@ public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long
supersetSizeHolder.value = supersetSize;
subsetDfHolder.value = subsetFreq;
supersetDfHolder.value = supersetFreq;
return ((Number) executableScript.run()).doubleValue();
return executableScript.execute(params);
}
}

Expand All @@ -91,15 +95,15 @@ public void writeTo(StreamOutput out) throws IOException {

@Override
public SignificanceHeuristic rewrite(InternalAggregation.ReduceContext context) {
ExecutableScript.Factory factory = context.scriptService().compile(script, ExecutableScript.AGGS_CONTEXT);
return new ExecutableScriptHeuristic(script, factory.newInstance(script.getParams()));
ScriptHeuristicScript.Factory factory = context.scriptService().compile(script, ScriptHeuristicScript.CONTEXT);
return new ExecutableScriptHeuristic(script, factory.newInstance());
}

@Override
public SignificanceHeuristic rewrite(SearchContext context) {
QueryShardContext shardContext = context.getQueryShardContext();
ExecutableScript.Factory compiledScript = shardContext.getScriptService().compile(script, ExecutableScript.AGGS_CONTEXT);
return new ExecutableScriptHeuristic(script, compiledScript.newInstance(script.getParams()));
ScriptHeuristicScript.Factory compiledScript = shardContext.getScriptService().compile(script, ScriptHeuristicScript.CONTEXT);
return new ExecutableScriptHeuristic(script, compiledScript.newInstance());
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.BucketAggregationScript;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregations;
Expand Down Expand Up @@ -89,7 +88,9 @@ public InternalAggregation reduce(InternalAggregation aggregation, ReduceContext
(InternalMultiBucketAggregation<InternalMultiBucketAggregation, InternalMultiBucketAggregation.InternalBucket>) aggregation;
List<? extends InternalMultiBucketAggregation.InternalBucket> buckets = originalAgg.getBuckets();

ExecutableScript.Factory factory = reduceContext.scriptService().compile(script, ExecutableScript.AGGS_CONTEXT);
BucketAggregationScript.Factory factory =
reduceContext.scriptService().compile(script, BucketAggregationScript.CONTEXT);
BucketAggregationScript executableScript = factory.newInstance();
List<InternalMultiBucketAggregation.InternalBucket> newBuckets = new ArrayList<>();
for (InternalMultiBucketAggregation.InternalBucket bucket : buckets) {
Map<String, Object> vars = new HashMap<>();
Expand All @@ -110,24 +111,13 @@ public InternalAggregation reduce(InternalAggregation aggregation, ReduceContext
if (skipBucket) {
newBuckets.add(bucket);
} else {
ExecutableScript executableScript = factory.newInstance(vars);
Object returned = executableScript.run();
// no need to check for self references since only numbers are valid
if (returned == null) {
newBuckets.add(bucket);
} else {
if ((returned instanceof Number) == false) {
throw new AggregationExecutionException("series_arithmetic script for reducer [" + name()
+ "] must return a Number");
}
final List<InternalAggregation> aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false).map(
(p) -> (InternalAggregation) p).collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), ((Number) returned).doubleValue(), formatter,
new ArrayList<>(), metaData()));
InternalMultiBucketAggregation.InternalBucket newBucket = originalAgg.createBucket(new InternalAggregations(aggs),
bucket);
newBuckets.add(newBucket);
}
double returned = executableScript.execute(vars);
final List<InternalAggregation> aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false).map(
(p) -> (InternalAggregation) p).collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), returned, formatter, new ArrayList<>(), metaData()));
InternalMultiBucketAggregation.InternalBucket newBucket = originalAgg.createBucket(new InternalAggregations(aggs),
bucket);
newBuckets.add(newBucket);
}
}
return originalAgg.create(newBuckets);
Expand Down
Loading