Skip to content

Commit

Permalink
Add FunctionRegistry APIs to retrieve function metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
pramodsatya committed Mar 29, 2024
1 parent 3aa020d commit c2011b1
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 82 deletions.
7 changes: 7 additions & 0 deletions velox/expression/VectorFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
});
}

std::optional<VectorFunctionMetadata> getVectorFunctionMetadata(
const std::string& name) {
return applyToVectorFunctionEntry<VectorFunctionMetadata>(
name,
[&](const auto& /*name*/, const auto& entry) { return entry.metadata; });
}

TypePtr resolveVectorFunction(
const std::string& functionName,
const std::vector<TypePtr>& argTypes) {
Expand Down
5 changes: 5 additions & 0 deletions velox/expression/VectorFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ class SimpleFunctionAdapterFactory {
std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
const std::string& name);

/// Returns metadata corresponding to function with the specified name. Returns
/// std::nullopt if there is no function with the specified name.
std::optional<VectorFunctionMetadata> getVectorFunctionMetadata(
const std::string& name);

/// Given name of vector function and argument types, returns
/// the return type if function exists and have a signature that binds to the
/// input types otherwise returns nullptr.
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ add_library(velox_function_registry FunctionRegistry.cpp)
add_library(velox_coverage_util CoverageUtil.cpp)

target_link_libraries(velox_function_registry velox_expression velox_type
velox_core velox_exception)
velox_core velox_exception velox_exec)
target_link_libraries(velox_coverage_util velox_function_registry)
add_subdirectory(lib)
if(${VELOX_ENABLE_PRESTO_FUNCTIONS})
Expand Down
80 changes: 0 additions & 80 deletions velox/functions/CoverageUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,86 +274,6 @@ void printCoverageMap(
std::cout << out.str() << std::endl;
}

// A function name is a companion function's if the name is an existing
// aggregation functio name followed by a specific suffixes.
bool isCompanionFunctionName(
const std::string& name,
const std::unordered_map<std::string, exec::AggregateFunctionEntry>&
aggregateFunctions) {
auto suffixOffset = name.rfind("_partial");
if (suffixOffset == std::string::npos) {
suffixOffset = name.rfind("_merge_extract");
}
if (suffixOffset == std::string::npos) {
suffixOffset = name.rfind("_merge");
}
if (suffixOffset == std::string::npos) {
suffixOffset = name.rfind("_extract");
}
if (suffixOffset == std::string::npos) {
return false;
}
return aggregateFunctions.count(name.substr(0, suffixOffset)) > 0;
}

/// Returns alphabetically sorted list of scalar functions available in Velox,
/// excluding companion functions.
std::vector<std::string> getSortedScalarNames() {
// Do not print "internal" functions.
static const std::unordered_set<std::string> kBlockList = {"row_constructor"};

auto functions = getFunctionSignatures();

std::vector<std::string> names;
names.reserve(functions.size());
exec::aggregateFunctions().withRLock([&](const auto& aggregateFunctions) {
for (const auto& func : functions) {
const auto& name = func.first;
if (!isCompanionFunctionName(name, aggregateFunctions) &&
kBlockList.count(name) == 0) {
names.emplace_back(name);
}
}
});
std::sort(names.begin(), names.end());
return names;
}

/// Returns alphabetically sorted list of aggregate functions available in
/// Velox, excluding compaion functions.
std::vector<std::string> getSortedAggregateNames() {
std::vector<std::string> names;
exec::aggregateFunctions().withRLock([&](const auto& functions) {
names.reserve(functions.size());
for (const auto& entry : functions) {
if (!isCompanionFunctionName(entry.first, functions)) {
names.push_back(entry.first);
}
}
});
std::sort(names.begin(), names.end());
return names;
}

/// Returns alphabetically sorted list of window functions available in Velox,
/// excluding companion functions.
std::vector<std::string> getSortedWindowNames() {
const auto& functions = exec::windowFunctions();

std::vector<std::string> names;
names.reserve(functions.size());
exec::aggregateFunctions().withRLock([&](const auto& aggregateFunctions) {
for (const auto& entry : functions) {
if (!isCompanionFunctionName(entry.first, aggregateFunctions) &&
aggregateFunctions.count(entry.first) == 0) {
names.emplace_back(entry.first);
}
}
});
std::sort(names.begin(), names.end());
return names;
}

/// Takes a super-set of simple, vector and aggregate function names and prints
/// coverage map showing which of these functions are available in Velox.
/// Companion functions are excluded.
Expand Down
92 changes: 92 additions & 0 deletions velox/functions/FunctionRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,96 @@ resolveVectorFunctionWithMetadata(
return exec::resolveVectorFunctionWithMetadata(functionName, argTypes);
}

bool isCompanionFunctionName(
const std::string& name,
const std::unordered_map<std::string, exec::AggregateFunctionEntry>&
aggregateFunctions) {
auto suffixOffset = name.rfind("_partial");
if (suffixOffset == std::string::npos) {
suffixOffset = name.rfind("_merge_extract");
}
if (suffixOffset == std::string::npos) {
suffixOffset = name.rfind("_merge");
}
if (suffixOffset == std::string::npos) {
suffixOffset = name.rfind("_extract");
}
if (suffixOffset == std::string::npos) {
return false;
}
return aggregateFunctions.count(name.substr(0, suffixOffset)) > 0;
}

std::vector<std::string> getSortedScalarNames() {
// Do not print "internal" functions.
static const std::unordered_set<std::string> kBlockList = {
"row_constructor", "in", "is_null"};

auto functions = getFunctionSignatures();

std::vector<std::string> names;
names.reserve(functions.size());
exec::aggregateFunctions().withRLock([&](const auto& aggregateFunctions) {
for (const auto& func : functions) {
const auto& name = func.first;
if (!isCompanionFunctionName(name, aggregateFunctions) &&
kBlockList.count(name) == 0) {
names.emplace_back(name);
}
}
});
std::sort(names.begin(), names.end());
return names;
}

std::vector<std::string> getSortedAggregateNames() {
std::vector<std::string> names;
exec::aggregateFunctions().withRLock([&](const auto& functions) {
names.reserve(functions.size());
for (const auto& entry : functions) {
if (!isCompanionFunctionName(entry.first, functions)) {
names.push_back(entry.first);
}
}
});
std::sort(names.begin(), names.end());
return names;
}

std::vector<std::string> getSortedWindowNames() {
const auto& functions = exec::windowFunctions();

std::vector<std::string> names;
names.reserve(functions.size());
exec::aggregateFunctions().withRLock([&](const auto& aggregateFunctions) {
for (const auto& entry : functions) {
if (!isCompanionFunctionName(entry.first, aggregateFunctions) &&
aggregateFunctions.count(entry.first) == 0) {
names.emplace_back(entry.first);
}
}
});
std::sort(names.begin(), names.end());
return names;
}

std::optional<exec::VectorFunctionMetadata> getFunctionMetadata(
const std::string& functionName) {
auto simpleFunctionMetadata =
exec::simpleFunctions().getFunctionSignaturesAndMetadata(functionName);
if (simpleFunctionMetadata.size()) {
// Functions like abs are registered as simple functions for primitive
// types, and as a vector function for complex types like DECIMAL. So do not
// throw an error if function metadata is not found in simple function
// signature map.
return simpleFunctionMetadata.back().first;
}

auto vectorFunctionMetadata = exec::getVectorFunctionMetadata(functionName);
if (vectorFunctionMetadata.has_value()) {
return vectorFunctionMetadata.value();
}
return std::nullopt;
}

} // namespace facebook::velox
22 changes: 22 additions & 0 deletions velox/functions/FunctionRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <string>
#include <vector>

#include "velox/exec/Aggregate.h"
#include "velox/exec/WindowFunction.h"
#include "velox/expression/FunctionMetadata.h"
#include "velox/expression/FunctionSignature.h"
#include "velox/type/Type.h"
Expand Down Expand Up @@ -76,4 +78,24 @@ resolveVectorFunctionWithMetadata(
/// Clears the function registry.
void clearFunctionRegistry();

/// A function name is a companion function's if the name is an existing
/// aggregation function name followed by a specific suffixes.
bool isCompanionFunctionName(
const std::string& name,
const std::unordered_map<std::string, exec::AggregateFunctionEntry>&
aggregateFunctions);

/// Returns sorted list of scalar function names available in Velox.
std::vector<std::string> getSortedScalarNames();

/// Returns sorted list of aggregate function names available in Velox.
std::vector<std::string> getSortedAggregateNames();

/// Returns sorted list of window function names available in Velox.
std::vector<std::string> getSortedWindowNames();

/// Get the function metadata corresponding to functionName.
std::optional<exec::VectorFunctionMetadata> getFunctionMetadata(
const std::string& functionName);

} // namespace facebook::velox
33 changes: 32 additions & 1 deletion velox/functions/tests/FunctionRegistryTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ struct FuncOne {
const arg_type<velox::Varchar>& /* arg1 */) {
return true;
}

FOLLY_ALWAYS_INLINE bool callNullable(
out_type<velox::Varchar>& /* result */,
const arg_type<velox::Varchar>* /* arg1 */) {
return true;
}
};

template <typename T>
Expand Down Expand Up @@ -191,7 +197,10 @@ VELOX_DECLARE_VECTOR_FUNCTION(
VELOX_DECLARE_VECTOR_FUNCTION_WITH_METADATA(
udf_vector_func_four,
VectorFuncFour::signatures(),
exec::VectorFunctionMetadataBuilder().deterministic(false).build(),
exec::VectorFunctionMetadataBuilder()
.deterministic(false)
.defaultNullBehavior(false)
.build(),
std::make_unique<VectorFuncFour>());

inline void registerTestFunctions() {
Expand Down Expand Up @@ -560,4 +569,26 @@ TEST_F(FunctionRegistryTest, resolveCast) {
velox::VeloxRuntimeError);
}

TEST_F(FunctionRegistryTest, functionMetadata) {
auto checkMetadata = [&](StringView functionName,
bool expectedDeterminism,
bool expectedDefaultNullBehavior) {
auto metadata = getFunctionMetadata(functionName);
VELOX_USER_CHECK(
metadata.has_value(), "Metadata for {} not found", functionName);
VELOX_USER_CHECK_EQ(metadata.value().deterministic, expectedDeterminism);
VELOX_USER_CHECK_EQ(
metadata.value().defaultNullBehavior, expectedDefaultNullBehavior);
};

// Validate VectorFunctionMetadata for simple functions func_one and func_two.
checkMetadata("func_one", false, false);
checkMetadata("func_two", true, true);

// Validate VectorFunctionMetadata for vector functions vector_func_three and
// vector_func_four.
checkMetadata("vector_func_three", true, true);
checkMetadata("vector_func_four", false, false);
}

} // namespace facebook::velox

0 comments on commit c2011b1

Please sign in to comment.