Skip to content

Commit

Permalink
Support shaded UDF resolution for static registration
Browse files Browse the repository at this point in the history
  • Loading branch information
ljfgem committed Jun 5, 2024
1 parent a83b648 commit b61b0f3
Show file tree
Hide file tree
Showing 13 changed files with 215 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -23,6 +23,7 @@
import org.apache.calcite.sql.SqlSpecialOperator;
import org.apache.calcite.sql.SqlUnresolvedFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlOperandTypeChecker;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
Expand Down Expand Up @@ -143,9 +144,22 @@ public Function tryResolve(@Nonnull String functionName, @Nullable Table hiveTab
* @return list of matching Functions or empty list if there is no match
*/
public Collection<Function> resolve(String functionName) {
Collection<Function> staticLookup = registry.lookup(functionName);
Collection<Function> staticLookup = registry.lookup(removeShadingPrefix(functionName));
if (!staticLookup.isEmpty()) {
return staticLookup;
if (isClassShaded(functionName)) {
// If the UDF class name is shaded, we need to return the function
// in registry with shaded class name to let Calcite know that the
// shaded UDF is legitimate.
return staticLookup.stream().map(f -> {
SqlOperator sqlOperator = f.getSqlOperator();
SqlUserDefinedFunction sqlUserDefinedFunction =
new SqlUserDefinedFunction(new SqlIdentifier(functionName, SqlParserPos.ZERO),
sqlOperator.getReturnTypeInference(), null, sqlOperator.getOperandTypeChecker(), null, null);
return new Function(functionName, sqlUserDefinedFunction);
}).collect(Collectors.toList());
} else {
return staticLookup;
}
} else {
Collection<Function> dynamicLookup = ImmutableList.of();
Function Function = dynamicFunctionRegistry.get(functionName);
Expand Down Expand Up @@ -182,8 +196,10 @@ public Collection<Function> tryResolveAsDaliFunction(String functionName, @Nonnu
if (funcClassName == null) {
return ImmutableList.of();
}
final Collection<Function> Functions = registry.lookup(funcClassName);
if (Functions.size() == 0) {
// If the UDF class name is shaded, remove the shading prefix, which allows user to
// register the unshaded UDF once and use different shading prefix in the view
final Collection<Function> functions = registry.lookup(removeShadingPrefix(funcClassName));
if (functions.size() == 0) {
Collection<Function> dynamicResolvedFunctions =
resolveDaliFunctionDynamically(functionName, funcClassName, hiveTable, numOfOperands);

Expand All @@ -194,12 +210,25 @@ public Collection<Function> tryResolveAsDaliFunction(String functionName, @Nonnu
}

return dynamicResolvedFunctions;
} else {
if (isClassShaded(funcClassName)) {
// If the UDF class name is shaded, we need to return the function in registry
// with shaded class name. Note that the original function in registry is unshaded.
return functions.stream().map(f -> {
SqlUserDefinedFunction sqlUserDefinedFunction = (SqlUserDefinedFunction) f.getSqlOperator();
VersionedSqlUserDefinedFunction versionedSqlUserDefinedFunction =
new VersionedSqlUserDefinedFunction(funcClassName, sqlUserDefinedFunction.getReturnTypeInference(),
sqlUserDefinedFunction.getOperandTypeChecker(), sqlUserDefinedFunction.getParamTypes(),
sqlUserDefinedFunction.getFunction(), hiveTable.getDaliUdfDependencies(), functionName);
return new Function(functionName, versionedSqlUserDefinedFunction);
}).collect(Collectors.toList());
} else {
return functions.stream()
.map(f -> new Function(f.getFunctionName(), new VersionedSqlUserDefinedFunction(
(SqlUserDefinedFunction) f.getSqlOperator(), hiveTable.getDaliUdfDependencies(), functionName)))
.collect(Collectors.toList());
}
}

return Functions.stream()
.map(f -> new Function(f.getFunctionName(), new VersionedSqlUserDefinedFunction(
(SqlUserDefinedFunction) f.getSqlOperator(), hiveTable.getDaliUdfDependencies(), functionName)))
.collect(Collectors.toList());
}

public void addDynamicFunctionToTheRegistry(String funcClassName, Function function) {
Expand Down Expand Up @@ -238,4 +267,33 @@ public void addDynamicFunctionToTheRegistry(String funcClassName, Function funct

return sqlOperandTypeChecker;
}

/**
* Checks if the given class name has a shading prefix.
* A class name is considered shaded if the prefix before the first dot
* contains underscore, e.g., "abc_0_1.com.linkedin.x.y.z".
*/
private boolean isClassShaded(String className) {
if (className != null && !className.isEmpty()) {
int firstDotIndex = className.indexOf('.');
if (firstDotIndex != -1) {
String prefix = className.substring(0, firstDotIndex);
return prefix.matches(".+_.+");
}
}
return false;
}

/**
* Removes the shading prefix from a given UDF class name if it is present.
* A class name is considered to have a shading prefix if the prefix contains
* underscore, e.g., "abc_0_1.com.linkedin.x.y.z".
*/
private String removeShadingPrefix(String className) {
if (isClassShaded(className)) {
return className.substring(className.indexOf('.') + 1);
} else {
return className;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -53,8 +53,13 @@
* a new version of library.
* TODO: Provide function registry catalog
*
* Note that Coral maintains a copy of StaticHiveFunctionRegistry for read only at run time.
* Notes:
* 1. Coral maintains a copy of StaticHiveFunctionRegistry for read only at run time.
* For individual query, we create a copy of function registry in a RelConTextProvider object.
* 2. Coral supports registering an unshaded UDF once and allows the use of different shaded UDFs.
* For example, user can register "com.linkedin.abc" once and then use variations like "abc_0_1.com.linkedin.abc"
* or "x_y_z.com.linkedin.abc" in Hive view definitions. A UDF is considered shaded if the prefix before the
* first dot contains an underscore.
*/
public class StaticHiveFunctionRegistry implements FunctionRegistry {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -356,6 +356,14 @@ public void testDaliUDFCall() {
assertEquals(RelOptUtil.toString(rel), expectedPlan);
}

@Test
public void testUDFWithShadedClassName() {
RelNode rel = converter.convertView("test", "tableOneViewShadePrefixUDF");
String expectedPlan = "LogicalProject(EXPR$0=[shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF($0)])\n"
+ " LogicalTableScan(table=[[hive, test, tableone]])\n";
assertEquals(RelOptUtil.toString(rel), expectedPlan);
}

@Test(expectedExceptions = UnknownSqlFunctionException.class)
public void testUnresolvedUdfError() {
final String sql = "SELECT default_foo_IsTestMemberId(a) from foo";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,14 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException {
throw new RuntimeException("Failed to setup view");
}

driver.run(
"create function lessThanHundred_with_shade_prefix as 'shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF'");
response = driver.run(
"CREATE VIEW IF NOT EXISTS test.tableOneViewShadePrefixUDF as SELECT lessThanHundred_with_shade_prefix(a) from test.tableOne");
if (response.getResponseCode() != 0) {
throw new RuntimeException("Failed to setup view");
}

driver.run(
"CREATE TABLE IF NOT EXISTS union_table(foo uniontype<int, double, array<string>, struct<a:int,b:string>>)");

Expand Down Expand Up @@ -232,8 +240,12 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException {
setOrUpdateDaliFunction(tableOneView, "LessThanHundred", "com.linkedin.coral.hive.hive2rel.CoralTestUDF");
Table tableOneViewLateralUDTF = msc.getTable("test", "tableOneViewLateralUDTF");
setOrUpdateDaliFunction(tableOneViewLateralUDTF, "CountOfRow", "com.linkedin.coral.hive.hive2rel.CoralTestUDTF");
Table tableOneViewShadePrefixUDF = msc.getTable("test", "tableOneViewShadePrefixUDF");
setOrUpdateDaliFunction(tableOneViewShadePrefixUDF, "lessThanHundred_with_shade_prefix",
"shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF");
msc.alter_table("test", "tableOneView", tableOneView);
msc.alter_table("test", "tableOneViewLateralUDTF", tableOneViewLateralUDTF);
msc.alter_table("test", "tableOneViewShadePrefixUDF", tableOneViewShadePrefixUDF);
hive = testHive;
return hive;
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package shade_prefix.com.linkedin.coral.hive.hive2rel;

import org.apache.hadoop.hive.ql.exec.UDF;


// This is used in TestUtils to set up as dali function
// This needs in a separate file for Hive to correctly load for setup
public class CoralTestUDF extends UDF {
public boolean evaluate(int input) {
return input < 100;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2022-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -164,6 +164,38 @@ public void testFallBackToLinkedInHiveUDFTransformer() {
assertEquals(sparkSqlStmt, targetSqlStmt);
}

@Test
public void testUDFWithShadedClassName() {
// After registering unshaded UDF "com.linkedin.coral.hive.hive2rel.CoralTestUDF",
// Coral should be able to translate the view with the corresponding shaded UDF class
// "shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF"
RelNode relNode = TestUtils.toRelNode("default", "foo_udf_with_shade_prefix");
CoralSpark coralSpark = createCoralSpark(relNode);
List<SparkUDFInfo> udfJars = coralSpark.getSparkUDFInfoList();

// Shaded UDF class name should be returned for UDF registration, otherwise
// Spark can't find the unshaded UDF name in the shaded Jar.
String udfClassName = udfJars.get(0).getClassName();
String targetClassName = "shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF";
assertEquals(udfClassName, targetClassName);

String udfFunctionName = udfJars.get(0).getFunctionName();
String targetFunctionName = "LessThanHundred_0_1";
assertEquals(udfFunctionName, targetFunctionName);

List<String> listOfUriStrings = convertToListOfUriStrings(udfJars.get(0).getArtifactoryUrls());
String targetArtifactoryUrl = "ivy://com.linkedin:udf-shaded:1.0";
assertTrue(listOfUriStrings.contains(targetArtifactoryUrl));

SparkUDFInfo.UDFTYPE testUdfType = udfJars.get(0).getUdfType();
SparkUDFInfo.UDFTYPE targetUdfType = SparkUDFInfo.UDFTYPE.HIVE_CUSTOM_UDF;
assertEquals(testUdfType, targetUdfType);

String sparkSqlStmt = coralSpark.getSparkSql();
String targetSqlStmt = "SELECT LessThanHundred_0_1(foo.a)\n" + "FROM default.foo foo";
assertEquals(sparkSqlStmt, targetSqlStmt);
}

@Test(expectedExceptions = UnsupportedUDFException.class)
public void testUnsupportedUdf() {
RelNode relNode = TestUtils.toRelNode("default", "foo_dali_udf5");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -83,6 +83,8 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce
"create function default_foo_duplicate_udf_LessThanHundred as 'com.linkedin.coral.hive.hive2rel.CoralTestUDF'");
run(driver, "CREATE FUNCTION LessThanHundred as 'com.linkedin.coral.hive.hive2rel.CoralTestUDF'");

run(driver, "CREATE FUNCTION LessThanHundred_0_1 as 'shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF'");

run(driver, String.join("\n", "", "CREATE VIEW IF NOT EXISTS foo_view", "AS", "SELECT b AS bcol, sum(c) AS sum_c",
"FROM foo", "GROUP BY b"));
run(driver, "DROP VIEW IF EXISTS foo_v1");
Expand Down Expand Up @@ -235,6 +237,11 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce
" 'dependencies' = 'ivy://com.linkedin:udf:1.0')", "AS",
"SELECT default_foo_duplicate_udf_LessThanHundred(a), default_foo_duplicate_udf_LessThanHundred(a)",
"FROM foo"));

run(driver, String.join("\n", "", "CREATE VIEW IF NOT EXISTS foo_udf_with_shade_prefix",
"tblproperties('functions' = 'LessThanHundred_0_1:shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUDF',",
" 'dependencies' = 'ivy://com.linkedin:udf-shaded:1.0')", "AS", "SELECT LessThanHundred_0_1(a)",
"FROM foo"));
}

private static void executeCreateTableQuery(Driver driver, String dbName, String tableName, String schema) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package shade_prefix.com.linkedin.coral.hive.hive2rel;

import org.apache.hadoop.hive.ql.exec.UDF;


// This is used in TestUtils to set up as dali function
// This needs in a separate file for Hive to correctly load for setup
public class CoralTestUDF extends UDF {
public boolean evaluate(int input) {
return input < 100;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Copyright 2023-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -27,7 +27,7 @@ public class GenericCoralRegistryOperatorRenameSqlCallTransformer extends SqlCal

@Override
protected boolean condition(SqlCall sqlCall) {
return sqlCall.getOperator().getName().startsWith("com.linkedin");
return sqlCall.getOperator().getName().contains("com.linkedin");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import com.google.common.collect.ImmutableMap;

import org.apache.calcite.rel.RelNode;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.MetaException;
Expand All @@ -20,7 +22,10 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry;

import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static org.apache.calcite.sql.type.OperandTypes.*;
import static org.testng.Assert.assertEquals;


Expand All @@ -32,6 +37,8 @@ public class HiveToTrinoConverterTest {
public void beforeClass() throws IOException, HiveException, MetaException {
conf = TestUtils.loadResourceHiveConf();
TestUtils.initializeTablesAndViews(conf);
StaticHiveFunctionRegistry.createAddUserDefinedFunction("com.linkedin.coral.hive.hive2rel.CoralTestUdf",
ReturnTypes.BOOLEAN, family(SqlTypeFamily.INTEGER));
}

@AfterTest
Expand Down Expand Up @@ -994,4 +1001,14 @@ public void testSqlSelectAliasAppenderTransformerWithoutTableAliasPrefix() {
+ "WHERE \"tablea\".\"a\" > 5";
assertEquals(expandedSql, expected);
}

@Test
public void testUDFWithShadedClassName() {
RelNode relNode = TestUtils.getHiveToRelConverter().convertView("test", "udf_with_shade_prefix");
RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter();
String expandedSql = relToTrinoConverter.convert(relNode);

String expected = "SELECT \"coral_test_udf\"(\"tablea\".\"a\")\n" + "FROM \"test\".\"tablea\" AS \"tablea\"";
assertEquals(expandedSql, expected);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,13 @@ public static void initializeTablesAndViews(HiveConf conf) throws HiveException,
+ "UNION ALL\n"
+ "SELECT a_tinyint, a_smallint, a_integer, a_bigint, a_float FROM test.table_with_mixed_columns");

run(driver, "CREATE FUNCTION LessThanHundred_0_1 AS 'shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUdf'");

run(driver, String.join("\n", "CREATE VIEW IF NOT EXISTS test.udf_with_shade_prefix",
"tblproperties('functions' = 'LessThanHundred_0_1:shade_prefix.com.linkedin.coral.hive.hive2rel.CoralTestUdf',",
" 'dependencies' = 'ivy://com.linkedin:udf-shaded:1.0')", "AS", "SELECT LessThanHundred_0_1(a)",
"FROM test.tableA"));

// Tables used in RelToTrinoConverterTest
run(driver,
"CREATE TABLE IF NOT EXISTS test.tableOne(icol int, dcol double, scol string, tcol timestamp, acol array<string>)");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package shade_prefix.com.linkedin.coral.hive.hive2rel;

import org.apache.hadoop.hive.ql.exec.UDF;


// This is used in TestUtils to set up as dali function
// This needs in a separate file for Hive to correctly load for setup
public class CoralTestUdf extends UDF {
public boolean evaluate(int input) {
return input < 100;
}
}

0 comments on commit b61b0f3

Please sign in to comment.