Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Coral-Trino] Migrate FROM_UNIXTIME and FROM_UTC_TIMESTAMP #426

Merged
merged 4 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ protected SqlRexConvertletTable getConvertletTable() {
}

@Override
protected SqlValidator getSqlValidator() {
public SqlValidator getSqlValidator() {
return sqlValidator;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
*/
package com.linkedin.coral.trino.rel2trino;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand All @@ -28,7 +27,6 @@
import org.apache.calcite.rel.logical.LogicalSort;
import org.apache.calcite.rel.logical.LogicalUnion;
import org.apache.calcite.rel.logical.LogicalValues;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
Expand All @@ -42,12 +40,10 @@
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;

import com.linkedin.coral.com.google.common.collect.ImmutableList;
import com.linkedin.coral.common.functions.FunctionReturnTypes;
import com.linkedin.coral.common.functions.GenericProjectFunction;
import com.linkedin.coral.trino.rel2trino.functions.GenericProjectToTrinoConverter;

import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.MULTIPLY;
import static org.apache.calcite.sql.type.ReturnTypes.explicit;
import static org.apache.calcite.sql.type.SqlTypeName.*;

Expand Down Expand Up @@ -175,20 +171,6 @@ public RexNode visitCall(RexCall call) {

final String operatorName = call.getOperator().getName();

if (operatorName.equalsIgnoreCase("from_utc_timestamp")) {
Optional<RexNode> modifiedCall = visitFromUtcTimestampCall(call);
if (modifiedCall.isPresent()) {
return modifiedCall.get();
}
}

if (operatorName.equalsIgnoreCase("from_unixtime")) {
Optional<RexNode> modifiedCall = visitFromUnixtime(call);
if (modifiedCall.isPresent()) {
return modifiedCall.get();
}
}

if (operatorName.equalsIgnoreCase("cast")) {
Optional<RexNode> modifiedCall = visitCast(call);
if (modifiedCall.isPresent()) {
Expand Down Expand Up @@ -231,80 +213,6 @@ private Optional<RexNode> visitConcat(RexCall call) {
return Optional.of(rexBuilder.makeCall(op, castOperands));
}

private Optional<RexNode> visitFromUnixtime(RexCall call) {
List<RexNode> convertedOperands = visitList(call.getOperands(), (boolean[]) null);
SqlOperator formatDatetime = createSqlOperatorOfFunction("format_datetime", FunctionReturnTypes.STRING);
SqlOperator fromUnixtime = createSqlOperatorOfFunction("from_unixtime", explicit(TIMESTAMP));
if (convertedOperands.size() == 1) {
return Optional
.of(rexBuilder.makeCall(formatDatetime, rexBuilder.makeCall(fromUnixtime, call.getOperands().get(0)),
rexBuilder.makeLiteral("yyyy-MM-dd HH:mm:ss")));
} else if (convertedOperands.size() == 2) {
return Optional.of(rexBuilder.makeCall(formatDatetime,
rexBuilder.makeCall(fromUnixtime, call.getOperands().get(0)), call.getOperands().get(1)));
}
return Optional.empty();
}

private Optional<RexNode> visitFromUtcTimestampCall(RexCall call) {
RelDataType inputType = call.getOperands().get(0).getType();
// TODO(trinodb/trino#6295) support high-precision timestamp
RelDataType targetType = typeFactory.createSqlType(TIMESTAMP, 3);

List<RexNode> convertedOperands = visitList(call.getOperands(), (boolean[]) null);
RexNode sourceValue = convertedOperands.get(0);
RexNode timezone = convertedOperands.get(1);

// In below definitions we should use `TIMESTATMP WITH TIME ZONE`. As calcite is lacking
// this type we use `TIMESTAMP` instead. It does not have any practical implications as result syntax tree
// is not type-checked, and only used for generating output SQL for a view query.
SqlOperator trinoAtTimeZone =
createSqlOperatorOfFunction("at_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoWithTimeZone =
createSqlOperatorOfFunction("with_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoToUnixTime = createSqlOperatorOfFunction("to_unixtime", explicit(DOUBLE));
SqlOperator trinoFromUnixtimeNanos =
createSqlOperatorOfFunction("from_unixtime_nanos", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoFromUnixTime =
createSqlOperatorOfFunction("from_unixtime", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoCanonicalizeHiveTimezoneId =
createSqlOperatorOfFunction("$canonicalize_hive_timezone_id", explicit(VARCHAR));

RelDataType bigintType = typeFactory.createSqlType(BIGINT);
RelDataType doubleType = typeFactory.createSqlType(DOUBLE);

if (inputType.getSqlTypeName() == BIGINT || inputType.getSqlTypeName() == INTEGER
|| inputType.getSqlTypeName() == SMALLINT || inputType.getSqlTypeName() == TINYINT) {

return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixtimeNanos,
rexBuilder.makeCall(MULTIPLY, rexBuilder.makeCast(bigintType, sourceValue),
rexBuilder.makeBigintLiteral(BigDecimal.valueOf(1000000)))),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

if (inputType.getSqlTypeName() == DOUBLE || inputType.getSqlTypeName() == FLOAT
|| inputType.getSqlTypeName() == DECIMAL) {

return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixTime, rexBuilder.makeCast(doubleType, sourceValue)),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

if (inputType.getSqlTypeName() == TIMESTAMP || inputType.getSqlTypeName() == DATE) {
return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixTime,
rexBuilder.makeCall(trinoToUnixTime,
rexBuilder.makeCall(trinoWithTimeZone, sourceValue, rexBuilder.makeLiteral("UTC")))),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

return Optional.empty();
}

// Hive allows passing in a byte array or String to substr/substring, so we can make an effort to emulate the
// behavior by casting non-String input to String
// https://cwiki.apache.org/confluence/display/hive/languagemanual+udf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.linkedin.coral.trino.rel2trino.transformers.CollectListOrSetFunctionTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CoralRegistryOperatorRenameSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CurrentTimestampTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.FromUnixtimeOperatorTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.GenericCoralRegistryOperatorRenameSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.JoinSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.MapValueConstructorTransformer;
Expand Down Expand Up @@ -104,7 +105,7 @@ protected SqlCall transform(SqlCall sqlCall) {
+ "{\"op\": \"date\", \"operands\":[{\"op\": \"timestamp\", \"operands\":[{\"input\": 1}]}]}]",
null, null),
new ToDateOperatorTransformer(configs.getOrDefault(AVOID_TRANSFORM_TO_DATE_UDF, false)),
new CurrentTimestampTransformer(),
new CurrentTimestampTransformer(), new FromUnixtimeOperatorTransformer(),
yiqiangin marked this conversation as resolved.
Show resolved Hide resolved

// LinkedIn specific functions
new CoralRegistryOperatorRenameSqlCallTransformer(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Copyright 2022-2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino;

import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.util.SqlShuttle;
import org.apache.calcite.sql.validate.SqlValidator;

import com.linkedin.coral.common.HiveMetastoreClient;
import com.linkedin.coral.common.transformers.SqlCallTransformers;
import com.linkedin.coral.common.utils.TypeDerivationUtil;
import com.linkedin.coral.hive.hive2rel.HiveToRelConverter;
import com.linkedin.coral.trino.rel2trino.transformers.FromUtcTimestampOperatorTransformer;


/**
* DataTypeDerivedSqlCallConverter transforms the sqlCalls
* in the input SqlNode representation to be compatible with Trino engine.
* The transformation may involve change in operator, reordering the operands
* or even re-constructing the SqlNode.
*
* All the transformations performed as part of this shuttle require RelDataType derivation.
*/
public class DataTypeDerivedSqlCallConverter extends SqlShuttle {
private final SqlCallTransformers operatorTransformerList;

public DataTypeDerivedSqlCallConverter(HiveMetastoreClient mscClient, SqlNode topSqlNode) {
SqlValidator sqlValidator = new HiveToRelConverter(mscClient).getSqlValidator();
TypeDerivationUtil typeDerivationUtil = new TypeDerivationUtil(sqlValidator, topSqlNode);
operatorTransformerList = SqlCallTransformers.of(new FromUtcTimestampOperatorTransformer(typeDerivationUtil));
}

@Override
public SqlNode visit(final SqlCall call) {
return operatorTransformerList.apply((SqlCall) super.visit(call));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
import static com.google.common.base.Preconditions.*;
import static com.linkedin.coral.trino.rel2trino.Calcite2TrinoUDFConverter.convertRel;
import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.*;
import static org.apache.calcite.sql.parser.SqlParserPos.*;


public class RelToTrinoConverter extends RelToSqlConverter {
Expand Down Expand Up @@ -88,7 +86,12 @@ public RelToTrinoConverter(HiveMetastoreClient mscClient, Map<String, Boolean> c
public String convert(RelNode relNode) {
RelNode rel = convertRel(relNode, configs);
SqlNode sqlNode = convertToSqlNode(rel);
SqlNode sqlNodeWithUDFOperatorConverted = sqlNode.accept(new CoralToTrinoSqlCallConverter(configs));

SqlNode sqlNodeWithRelDataTypeDerivedConversions =
sqlNode.accept(new DataTypeDerivedSqlCallConverter(_hiveMetastoreClient, sqlNode));

SqlNode sqlNodeWithUDFOperatorConverted =
sqlNodeWithRelDataTypeDerivedConversions.accept(new CoralToTrinoSqlCallConverter(configs));
return sqlNodeWithUDFOperatorConverted.accept(new TrinoSqlRewriter()).toSqlString(TrinoSqlDialect.INSTANCE)
.toString();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino.transformers;

import java.util.List;

import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.parser.SqlParserPos;

import com.linkedin.coral.common.functions.FunctionReturnTypes;
import com.linkedin.coral.common.transformers.SqlCallTransformer;

import static org.apache.calcite.sql.type.ReturnTypes.*;
import static org.apache.calcite.sql.type.SqlTypeName.*;


/**
* This transformer operates on SqlCalls with 'FROM_UNIXTIME(x)' Coral IR function
* and transforms it to trino engine compatible function - FORMAT_DATETIME(FROM_UNIXTIME(x)).
* For Example:
* A SqlCall of the form: "FROM_UNIXTIME(10000)" is transformed to
* "FORMAT_DATETIME(FROM_UNIXTIME(10000), 'yyyy-MM-dd HH:mm:ss')"
*/
public class FromUnixtimeOperatorTransformer extends SqlCallTransformer {

private static final String FORMAT_DATETIME = "format_datetime";
private static final String FROM_UNIXTIME = "from_unixtime";

@Override
protected boolean condition(SqlCall sqlCall) {
return sqlCall.getOperator().getName().equalsIgnoreCase(FROM_UNIXTIME);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@findinpath If we make this check not simply based on the name so we make sure it only matches the Hive operator and not the Trino one, would we still need TimestampFromUnixtimeTransformer from #464? Also, can the operator in this case live only in coral-trino?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from_unixtime takes for both Trino and Hive a numeric value, I don't know how to distinguish it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed offline. We can use timestamp_from_unixtime as a different name to disambiguate. @findinpath will try it.

Copy link
Contributor

@findinpath findinpath Oct 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#467

Thank you @wmoustafa for the hint and help in putting together the PR.

}

@Override
protected SqlCall transform(SqlCall sqlCall) {
SqlOperator formatDatetimeOperator = createSqlOperator(FORMAT_DATETIME, FunctionReturnTypes.STRING);
SqlOperator fromUnixtimeOperator = createSqlOperator(FROM_UNIXTIME, explicit(TIMESTAMP));

List<SqlNode> operands = sqlCall.getOperandList();
if (operands.size() == 1) {
return formatDatetimeOperator.createCall(SqlParserPos.ZERO,
fromUnixtimeOperator.createCall(SqlParserPos.ZERO, operands.get(0)),
SqlLiteral.createCharString("yyyy-MM-dd HH:mm:ss", SqlParserPos.ZERO));
} else if (operands.size() == 2) {
return formatDatetimeOperator.createCall(SqlParserPos.ZERO,
fromUnixtimeOperator.createCall(SqlParserPos.ZERO, operands.get(0)), operands.get(1));
yiqiangin marked this conversation as resolved.
Show resolved Hide resolved
}
return sqlCall;
}
}
Loading