Skip to content

Commit

Permalink
feat: support 'bin' function push down (#5246)
Browse files Browse the repository at this point in the history
close #5103
  • Loading branch information
An-DJ authored Jul 25, 2022
1 parent e289119 commit da32955
Show file tree
Hide file tree
Showing 6 changed files with 296 additions and 1 deletion.
35 changes: 35 additions & 0 deletions dbms/src/Common/hex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,38 @@ const char * const hex_char_to_digit_table = "\xff\xff\xff\xff\xff\xff\xff\xff\x
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";

const char * const bin_byte_to_char_table = "0000000000000001000000100000001100000100000001010000011000000111"
"0000100000001001000010100000101100001100000011010000111000001111"
"0001000000010001000100100001001100010100000101010001011000010111"
"0001100000011001000110100001101100011100000111010001111000011111"
"0010000000100001001000100010001100100100001001010010011000100111"
"0010100000101001001010100010101100101100001011010010111000101111"
"0011000000110001001100100011001100110100001101010011011000110111"
"0011100000111001001110100011101100111100001111010011111000111111"
"0100000001000001010000100100001101000100010001010100011001000111"
"0100100001001001010010100100101101001100010011010100111001001111"
"0101000001010001010100100101001101010100010101010101011001010111"
"0101100001011001010110100101101101011100010111010101111001011111"
"0110000001100001011000100110001101100100011001010110011001100111"
"0110100001101001011010100110101101101100011011010110111001101111"
"0111000001110001011100100111001101110100011101010111011001110111"
"0111100001111001011110100111101101111100011111010111111001111111"
"1000000010000001100000101000001110000100100001011000011010000111"
"1000100010001001100010101000101110001100100011011000111010001111"
"1001000010010001100100101001001110010100100101011001011010010111"
"1001100010011001100110101001101110011100100111011001111010011111"
"1010000010100001101000101010001110100100101001011010011010100111"
"1010100010101001101010101010101110101100101011011010111010101111"
"1011000010110001101100101011001110110100101101011011011010110111"
"1011100010111001101110101011101110111100101111011011111010111111"
"1100000011000001110000101100001111000100110001011100011011000111"
"1100100011001001110010101100101111001100110011011100111011001111"
"1101000011010001110100101101001111010100110101011101011011010111"
"1101100011011001110110101101101111011100110111011101111011011111"
"1110000011100001111000101110001111100100111001011110011011100111"
"1110100011101001111010101110101111101100111011011110111011101111"
"1111000011110001111100101111001111110100111101011111011011110111"
"1111100011111001111110101111101111111100111111011111111011111111";

const size_t bin_byte_no_zero_prefix_len[256] = {1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
15 changes: 15 additions & 0 deletions dbms/src/Common/hex.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ inline void writeHexByteLowercase(UInt8 byte, void * out)
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
}

extern const char * const bin_byte_to_char_table;
extern const size_t bin_byte_no_zero_prefix_len[];

inline void writeBinByte(UInt8 byte, void * out)
{
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
}

inline size_t writeNoZeroPrefixBinByte(UInt8 byte, void * out)
{
size_t len = bin_byte_no_zero_prefix_len[static_cast<size_t>(byte)];
memcpy(out, &bin_byte_to_char_table[byte * 8 + (8 - len)], len);
return len;
}

/// Produces hex representation of an unsigned int with leading zeros (for checksums)
template <typename TUInt>
inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table)
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
{tipb::ScalarFuncSig::TimestampDiff, "tidbTimestampDiff"},

//{tipb::ScalarFuncSig::BitLength, "cast"},
//{tipb::ScalarFuncSig::Bin, "cast"},
{tipb::ScalarFuncSig::Bin, "bin"},
{tipb::ScalarFuncSig::ASCII, "ascii"},
//{tipb::ScalarFuncSig::Char, "cast"},
{tipb::ScalarFuncSig::CharLengthUTF8, "lengthUTF8"},
Expand Down
122 changes: 122 additions & 0 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <Common/TargetSpecific.h>
#include <Common/UTF8Helpers.h>
#include <Common/Volnitsky.h>
#include <Common/hex.h>
#include <Core/AccurateComparison.h>
#include <DataTypes/DataTypeArray.h>
#include <Flash/Coprocessor/DAGContext.h>
Expand Down Expand Up @@ -5299,6 +5300,126 @@ class FunctionHexInt : public IFunction
}
};

class FunctionBin : public IFunction
{
public:
static constexpr auto name = "bin";
static constexpr size_t word_size = 8;
FunctionBin() = default;

static FunctionPtr create(const Context & /*context*/)
{
return std::make_shared<FunctionBin>();
}

std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
throw Exception(
fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

auto first_argument = removeNullable(arguments[0]);
if (!first_argument->isInteger())
throw Exception(
fmt::format("Illegal type {} of first argument of function {}", first_argument->getName(), getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

return std::make_shared<DataTypeString>();
}

template <typename T>
static void executeOneUIntOrInt(T data, char *& out)
{
auto x = static_cast<Int64>(data); // NOLINT
bool was_nonzero = false;
bool was_first_nonzero_byte = true;
for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8)
{
UInt8 byte = x >> offset;
/// Skip leading zeros
if (byte == 0 && !was_nonzero && offset)
continue;
was_nonzero = true;
if (was_first_nonzero_byte)
{
out += writeNoZeroPrefixBinByte(byte, out);
was_first_nonzero_byte = false;
}
else
{
writeBinByte(byte, out);
out += word_size;
}
}
*out = '\0';
++out;
}

template <typename T>
bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const
{
auto * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte.
if (col_vec)
{
auto col_str = ColumnString::create();
ColumnString::Chars_t & out_vec = col_str->getChars();
ColumnString::Offsets & out_offsets = col_str->getOffsets();
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
size_t size = in_vec.size();
out_offsets.resize(size);
out_vec.resize(size * (word_size + 1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte.
size_t pos = 0;
for (size_t i = 0; i < size; ++i)
{
/// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it).
if (pos + MAX_LENGTH > out_vec.size())
out_vec.resize(out_vec.size() * word_size + MAX_LENGTH);
char * begin = reinterpret_cast<char *>(&out_vec[pos]);
char * end = begin;
executeOneUIntOrInt(in_vec[i], end);
pos += end - begin;
out_offsets[i] = pos;
}
out_vec.resize(pos);
col_res = std::move(col_str);
return true;
}
else
{
return false;
}
}

void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * column = block.getByPosition(arguments[0]).column.get();
ColumnPtr res_column;
if (tryExecuteUIntOrInt<UInt8>(column, res_column)
|| tryExecuteUIntOrInt<UInt16>(column, res_column)
|| tryExecuteUIntOrInt<UInt32>(column, res_column)
|| tryExecuteUIntOrInt<UInt64>(column, res_column)
|| tryExecuteUIntOrInt<Int8>(column, res_column)
|| tryExecuteUIntOrInt<Int16>(column, res_column)
|| tryExecuteUIntOrInt<Int32>(column, res_column)
|| tryExecuteUIntOrInt<Int64>(column, res_column))
{
block.getByPosition(result).column = std::move(res_column);
return;
}
else
{
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}

private:
};

// clang-format off
struct NameEmpty { static constexpr auto name = "empty"; };
struct NameNotEmpty { static constexpr auto name = "notEmpty"; };
Expand Down Expand Up @@ -5386,5 +5507,6 @@ void registerFunctionsString(FunctionFactory & factory)
factory.registerFunction<FunctionHexStr>();
factory.registerFunction<FunctionHexInt>();
factory.registerFunction<FunctionRepeat>();
factory.registerFunction<FunctionBin>();
}
} // namespace DB
62 changes: 62 additions & 0 deletions dbms/src/Functions/tests/gtest_bin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>
namespace DB::tests
{
class TestBin : public DB::tests::FunctionTest
{
};

TEST_F(TestBin, Simple)
try
{
ASSERT_COLUMN_EQ(
createColumn<String>({"1100100"}),
executeFunction("bin", createColumn<Int64>({100})));
}
CATCH

TEST_F(TestBin, Boundary)
try
{
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111111111111111111111111111111111111111111110000000", "1111111"}),
executeFunction("bin", createColumn<Int8>({0, INT8_MIN, INT8_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111111111111111111111111111111111111000000000000000", "111111111111111"}),
executeFunction("bin", createColumn<Int16>({0, INT16_MIN, INT16_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}),
executeFunction("bin", createColumn<Int32>({0, INT32_MIN, INT32_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1000000000000000000000000000000000000000000000000000000000000000", "111111111111111111111111111111111111111111111111111111111111111"}),
executeFunction("bin", createColumn<Int64>({0, INT64_MIN, INT64_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "11111111"}),
executeFunction("bin", createColumn<UInt8>({0, 255})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111"}),
executeFunction("bin", createColumn<UInt16>({0, 65535})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "11111111111111111111111111111111"}),
executeFunction("bin", createColumn<UInt32>({0, 4294967295})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111111111111111111111111111111111111111111111111111"}),
executeFunction("bin", createColumn<UInt64>({0, ULLONG_MAX})));
}
CATCH

} // namespace DB::tests
61 changes: 61 additions & 0 deletions tests/fullstack-test/expr/bin.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mysql> drop table if exists test.t1;
mysql> create table test.t1(c1 bigint);
mysql> insert into test.t1 values(0);
mysql> insert into test.t1 values(1);
mysql> insert into test.t1 values(44);
mysql> insert into test.t1 values(100);
mysql> insert into test.t1 values(-9223372036854775808);
mysql> insert into test.t1 values(9223372036854775807);
mysql> insert into test.t1 values(NULL);
mysql> alter table test.t1 set tiflash replica 1;
mysql> drop table if exists test.t2;
mysql> create table test.t2(c1 bigint unsigned);
mysql> insert into test.t2 values(0);
mysql> insert into test.t2 values(1);
mysql> insert into test.t2 values(44);
mysql> insert into test.t2 values(100);
mysql> insert into test.t2 values(18446744073709551615);
mysql> insert into test.t2 values(NULL);
mysql> alter table test.t2 set tiflash replica 1;

func> wait_table test t1
func> wait_table test t2

mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t1;
+------------------------------------------------------------------+
| bin(c1) |
+------------------------------------------------------------------+
| 0 |
| 1 |
| 101100 |
| 1100100 |
| 1000000000000000000000000000000000000000000000000000000000000000 |
| 111111111111111111111111111111111111111111111111111111111111111 |
| NULL |
+------------------------------------------------------------------+
mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t2;
+------------------------------------------------------------------+
| bin(c1) |
+------------------------------------------------------------------+
| 0 |
| 1 |
| 101100 |
| 1100100 |
| 1111111111111111111111111111111111111111111111111111111111111111 |
| NULL |
+------------------------------------------------------------------+

0 comments on commit da32955

Please sign in to comment.