Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Highlight In SQL #96

Merged
merged 27 commits into from
Aug 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1097d9f
Add support for highlight to parser and AstExpressionBuilder
Jul 1, 2022
1a453e1
Add unit test for highlight in AstExpressionBuilder
Jul 4, 2022
0d5c87b
Add unit test for highlight in AstBuilderTest
Jul 5, 2022
26d0b7e
Support highlight as an Unresolved expression.
Jul 5, 2022
3f10b8b
Represent highlight as UnresolvedExpression.
Jul 5, 2022
543d0d7
Support highlight in Analyzer.
Jul 5, 2022
f47ffe7
Treat highlight as a proper function in AST
Jul 6, 2022
5fdb939
Add support for highlight in Analyzer
Jul 6, 2022
5c8db0a
Add a simple IT test for highlight.
Jul 6, 2022
ac9f080
Register highlight function in the BuiltInFunctionRepository
Jul 6, 2022
b526132
Partial support for highlight in physical plan.
Jul 6, 2022
807c475
Add HighlightOperator.
Jul 6, 2022
74b6492
Highlight alpha complete.
Jul 7, 2022
ad7affc
Initial implementation to upporting highlight('*')
forestmvey Jul 25, 2022
a192a21
Add support for multiple highlight calls in select statement.
forestmvey Jul 25, 2022
092d054
Cleaning up code, adding copyright, and adding javadocs
forestmvey Jul 26, 2022
9bdbb86
Removed OpenSearchLogicalIndexScan highlightFields and dependencies.
forestmvey Jul 26, 2022
9b463cf
Adding tests and touching up code in prep for PR.
forestmvey Jul 26, 2022
2648e0c
Fixing checkstyle errors.
forestmvey Jul 26, 2022
7290e90
Added HighlightOperatorTest and additional testing.
forestmvey Jul 26, 2022
2feb2c4
Added HighlightExpressionTest
forestmvey Jul 27, 2022
cd6f911
Improving test coverage, fixing checkstyle errors, fixing jacoco errors.
forestmvey Jul 28, 2022
3db8788
Added javadocs, minor PR revisions, and fixed jacoco errors by improv…
forestmvey Jul 29, 2022
3ee3491
Code cleanup, adding parsing failure tests, and adding tests for high…
forestmvey Jul 29, 2022
e63b7b9
Removing HighlightOperator functionality and minor revisions based on…
forestmvey Jul 29, 2022
11d834b
Removed unnecessary visitHighlight call in PhysicalPlanNodeVisitor
forestmvey Jul 29, 2022
373353b
Fixing formatting errors
forestmvey Jul 30, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) {
child = windowAnalyzer.analyze(expr, context);
}

for (UnresolvedExpression expr : node.getProjectList()) {
HighlightAnalyzer highlightAnalyzer = new HighlightAnalyzer(expressionAnalyzer, child);
child = highlightAnalyzer.analyze(expr, context);

}

List<NamedExpression> namedExpressions =
selectExpressionAnalyzer.analyze(node.getProjectList(), context,
new ExpressionReferenceOptimizer(expressionAnalyzer.getRepository(), child));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import lombok.Getter;
Expand All @@ -29,6 +27,7 @@
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Literal;
Expand All @@ -44,12 +43,12 @@
import org.opensearch.sql.ast.expression.WindowFunction;
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.LiteralExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.NamedExpression;
Expand Down Expand Up @@ -191,6 +190,12 @@ public Expression visitWindowFunction(WindowFunction node, AnalysisContext conte
return expr;
}

@Override
public Expression visitHighlight(HighlightFunction node, AnalysisContext context) {
Expression expr = node.getHighlightField().accept(this, context);
return new HighlightExpression(expr);
}

@Override
public Expression visitIn(In node, AnalysisContext context) {
return visitIn(node.getField(), node.getValueList(), context);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.analysis;

import lombok.RequiredArgsConstructor;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.planner.logical.LogicalHighlight;
import org.opensearch.sql.planner.logical.LogicalPlan;

/**
* Analyze the highlight in the {@link AnalysisContext} to construct the {@link
* LogicalPlan}.
*/
@RequiredArgsConstructor
public class HighlightAnalyzer extends AbstractNodeVisitor<LogicalPlan, AnalysisContext> {
private final ExpressionAnalyzer expressionAnalyzer;
private final LogicalPlan child;

public LogicalPlan analyze(UnresolvedExpression projectItem, AnalysisContext context) {
LogicalPlan highlight = projectItem.accept(this, context);
return (highlight == null) ? child : highlight;
}

@Override
public LogicalPlan visitAlias(Alias node, AnalysisContext context) {
if (!(node.getDelegated() instanceof HighlightFunction)) {
return null;
}

HighlightFunction unresolved = (HighlightFunction) node.getDelegated();
Expression field = expressionAnalyzer.analyze(unresolved.getHighlightField(), context);
return new LogicalHighlight(child, field);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Let;
Expand Down Expand Up @@ -254,4 +255,8 @@ public T visitKmeans(Kmeans node, C context) {
public T visitAD(AD node, C context) {
return visitChildren(node, context);
}

public T visitHighlight(HighlightFunction node, C context) {
return visitChildren(node, context);
}
}
5 changes: 5 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Let;
Expand Down Expand Up @@ -261,6 +262,10 @@ public When when(UnresolvedExpression condition, UnresolvedExpression result) {
return new When(condition, result);
}

public UnresolvedExpression highlight(UnresolvedExpression fieldName) {
return new HighlightFunction(fieldName);
}

public UnresolvedExpression window(UnresolvedExpression function,
List<UnresolvedExpression> partitionByList,
List<Pair<SortOption, UnresolvedExpression>> sortList) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.expression;

import java.util.List;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;

/**
* Expression node of Highlight function.
*/
@AllArgsConstructor
@EqualsAndHashCode(callSuper = false)
@Getter
@ToString
public class HighlightFunction extends UnresolvedExpression {
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
private final UnresolvedExpression highlightField;

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitHighlight(this, context);
}

@Override
public List<UnresolvedExpression> getChild() {
return List.of(highlightField);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ public T visitNamed(NamedExpression node, C context) {
return node.getDelegated().accept(this, context);
}

public T visitHighlight(HighlightExpression node, C context) {
return visitNode(node, context);
}

public T visitReference(ReferenceExpression node, C context) {
return visitNode(node, context);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression;

import com.google.common.collect.ImmutableMap;
import java.util.List;
import lombok.Getter;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.env.Environment;
import org.opensearch.sql.expression.function.BuiltinFunctionName;

/**
* Highlight Expression.
*/
@Getter
public class HighlightExpression extends FunctionExpression {
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
private final Expression highlightField;

/**
* HighlightExpression Constructor.
* @param highlightField : Highlight field for expression.
*/
public HighlightExpression(Expression highlightField) {
super(BuiltinFunctionName.HIGHLIGHT.getName(), List.of(highlightField));
this.highlightField = highlightField;
}

/**
* Return String or Map value matching highlight field.
* @param valueEnv : Dataset to parse value from.
* @return : String or Map value of highlight fields.
*/
@Override
public ExprValue valueOf(Environment<Expression, ExprValue> valueEnv) {
String refName = "_highlight";
if (!getHighlightField().toString().contains("*")) {
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
refName += "." + StringUtils.unquoteText(getHighlightField().toString());
}
ExprValue retVal = valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING));

// If only one highlight returned, or no highlights can be parsed.
if (retVal.isMissing() || retVal.type() != ExprCoreType.STRUCT) {
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
return retVal;
}

var highlightMapBuilder = ImmutableMap.<String, ExprValue>builder();
highlightMapBuilder.putAll(retVal.tupleValue());
ImmutableMap.Builder<String, ExprValue> builder = new ImmutableMap.Builder<>();
for (var entry : retVal.tupleValue().entrySet()) {
String entryKey = "highlight(" + getHighlightField() + ")." + entry.getKey();
builder.put(entryKey, ExprValueUtils.stringValue(entry.getValue().toString()));
}

return ExprTupleValue.fromExprValueMap(builder.build());
}

/**
* Get type for HighlightExpression.
* @return : String type.
*/
@Override
public ExprType type() {
return ExprCoreType.STRING;
}

@Override
public <T, C> T accept(ExpressionNodeVisitor<T, C> visitor, C context) {
return visitor.visitHighlight(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ public enum BuiltinFunctionName {
MATCHPHRASE(FunctionName.of("matchphrase")),
QUERY_STRING(FunctionName.of("query_string")),
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),
HIGHLIGHT(FunctionName.of("highlight")),
MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")),
/**
* Legacy Relevance Function.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.experimental.UtilityClass;
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.env.Environment;

Expand Down Expand Up @@ -50,6 +51,14 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
repository.register(highlight());
}

private static FunctionResolver highlight() {
FunctionName functionName = BuiltinFunctionName.HIGHLIGHT.getName();
FunctionSignature functionSignature = new FunctionSignature(functionName, List.of(STRING));
FunctionBuilder functionBuilder = arguments -> new HighlightExpression(arguments.get(0));
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
return new FunctionResolver(functionName, ImmutableMap.of(functionSignature, functionBuilder));
}

private static FunctionResolver match_bool_prefix() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.planner.logical;

import java.util.Collections;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.expression.Expression;

@EqualsAndHashCode(callSuper = true)
@Getter
@ToString
public class LogicalHighlight extends LogicalPlan {
private final Expression highlightField;

public LogicalHighlight(LogicalPlan childPlan, Expression field) {
super(Collections.singletonList(childPlan));
highlightField = field;
}

@Override
public <R, C> R accept(LogicalPlanNodeVisitor<R, C> visitor, C context) {
return visitor.visitHighlight(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ public LogicalPlan window(LogicalPlan input,
return new LogicalWindow(input, windowFunction, windowDefinition);
}

public LogicalPlan highlight(LogicalPlan input, Expression field) {
return new LogicalHighlight(input, field);
}

public static LogicalPlan remove(LogicalPlan input, ReferenceExpression... fields) {
return new LogicalRemove(input, ImmutableSet.copyOf(fields));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public R visitFilter(LogicalFilter plan, C context) {
return visitNode(plan, context);
}

public R visitHighlight(LogicalHighlight plan, C context) {
return visitNode(plan, context);
}

public R visitAggregation(LogicalAggregation plan, C context) {
return visitNode(plan, context);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,4 @@ public ValuesOperator values(List<LiteralExpression>... values) {
public static LimitOperator limit(PhysicalPlan input, Integer limit, Integer offset) {
return new LimitOperator(input, limit, offset);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,4 @@ public R visitMLCommons(PhysicalPlan node, C context) {
public R visitAD(PhysicalPlan node, C context) {
return visitNode(node, context);
}


}
Loading