From 772c064c353ee7f51b1fabf67567b56e1d0945e7 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 7 Feb 2023 23:10:06 +0800 Subject: [PATCH 01/37] feat(expr):support is_ipv4 and is_ipv6 expr to pushdown to tiflash --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 4 +- dbms/src/Functions/FunctionsIsIPAddr.cpp | 28 ++++ dbms/src/Functions/FunctionsIsIPAddr.h | 136 ++++++++++++++++++ dbms/src/Functions/registerFunctions.cpp | 2 + dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 100 +++++++++++++ tests/fullstack-test/expr/is_ip_addr.test | 72 ++++++++++ 6 files changed, 340 insertions(+), 2 deletions(-) create mode 100644 dbms/src/Functions/FunctionsIsIPAddr.cpp create mode 100644 dbms/src/Functions/FunctionsIsIPAddr.h create mode 100644 dbms/src/Functions/tests/gtest_is_ip_addr.cpp create mode 100644 tests/fullstack-test/expr/is_ip_addr.test diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index d57aa58333f..b62480e7c5a 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -424,10 +424,10 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::InetNtoa, "IPv4NumToString"}, {tipb::ScalarFuncSig::Inet6Aton, "tiDBIPv6StringToNum"}, {tipb::ScalarFuncSig::Inet6Ntoa, "tiDBIPv6NumToString"}, - //{tipb::ScalarFuncSig::IsIPv4, "cast"}, + {tipb::ScalarFuncSig::IsIPv4, "tiDBIsIPv4"}, //{tipb::ScalarFuncSig::IsIPv4Compat, "cast"}, //{tipb::ScalarFuncSig::IsIPv4Mapped, "cast"}, - //{tipb::ScalarFuncSig::IsIPv6, "cast"}, + {tipb::ScalarFuncSig::IsIPv6, "tiDBIsIPv6"}, //{tipb::ScalarFuncSig::UUID, "cast"}, {tipb::ScalarFuncSig::LikeSig, "like3Args"}, diff --git a/dbms/src/Functions/FunctionsIsIPAddr.cpp b/dbms/src/Functions/FunctionsIsIPAddr.cpp new file mode 100644 index 00000000000..fe8ae1db562 --- /dev/null +++ b/dbms/src/Functions/FunctionsIsIPAddr.cpp @@ -0,0 +1,28 @@ +// Copyright 2023 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ + + +void registerFunctionsIsIPAddr(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} // namespace DB diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h new file mode 100644 index 00000000000..24ab670fea9 --- /dev/null +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -0,0 +1,136 @@ +// Copyright 2023 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} // namespace ErrorCodes + +/** Helper functions + * + * isIPV4(x) - Judge whether the input string is an IPv4 address. + * + * isIPV6(x) - Judge whether the input string is an IPv6 address + * + */ + +static inline UInt8 isIPv4(String input_address) +{ + char str[INET_ADDRSTRLEN]; + if(input_address.empty()) return 0; + if(inet_pton(AF_INET, input_address.c_str(), str) == 1) return 1; + return 0; +} + +static inline UInt8 isIPv6(String input_address) +{ + char str[INET6_ADDRSTRLEN]; + if(input_address.empty()) return 0; + if(inet_pton(AF_INET6, input_address.c_str(), str) == 1) return 1; + return 0; +} + +class FunctionIsIPv4 : public IFunction +{ +public: + static constexpr auto name = "tiDBIsIPv4"; + FunctionIsIPv4() = default; + + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 1) + throw Exception( + fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return std::make_shared(); + } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); + + Field res_field; + int val_num = c0_col->size(); + auto col_res = ColumnUInt8::create(); + col_res->reserve(val_num); + + for (int i = 0; i < val_num; i++) + { + c0_col->get(i, res_field); + String handled_str = res_field.get(); + col_res->insert(static_cast(isIPv4(handled_str))); + } + + block.getByPosition(result).column = std::move(col_res); + } +private: +}; + +class FunctionIsIPv6 : public IFunction +{ +public: + static constexpr auto name = "tiDBIsIPv6"; + FunctionIsIPv6() = default; + + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 1) + throw Exception( + fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return std::make_shared(); + } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); + + Field res_field; + int val_num = c0_col->size(); + auto col_res = ColumnUInt8::create(); + col_res->reserve(val_num); + + for (int i = 0; i < val_num; i++) + { + c0_col->get(i, res_field); + String handled_str = res_field.get(); + col_res->insert(static_cast(isIPv6(handled_str))); + } + + block.getByPosition(result).column = std::move(col_res); + } +private: +}; +} // namespace DB diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 34587a74a55..c27768803b5 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -44,6 +44,7 @@ void registerFunctionsStringMath(FunctionFactory &); void registerFunctionsDuration(FunctionFactory &); void registerFunctionsRegexp(FunctionFactory &); void registerFunctionsJson(FunctionFactory &); +void registerFunctionsIsIPAddr(FunctionFactory &); void registerFunctions() @@ -73,6 +74,7 @@ void registerFunctions() registerFunctionsDuration(factory); registerFunctionsRegexp(factory); registerFunctionsJson(factory); + registerFunctionsIsIPAddr(factory); } } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp new file mode 100644 index 00000000000..274b73ff855 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -0,0 +1,100 @@ +// Copyright 2023 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#include + +#pragma GCC diagnostic pop + +namespace DB +{ +namespace tests +{ +class TestIsIPAddr : public DB::tests::FunctionTest +{ +}; +TEST_F(TestIsIPAddr, isIPv4) +try +{ + // column with column + ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn({"123.123.123.123", + "0.0.0.0", + "127.0.0.1", + "192.168.0.0/10", + "192.168.99.22.123", + "999.999.999.999", + "3.2.1.", + "3..2.1", + "...", + "4556456", + "ajdjioa", + ""})})); + + // column with nullable + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", + "aidjio", + "1236.461.841.312", + "99.99.99.99", + std::nullopt})})); + +} +CATCH + + +TEST_F(TestIsIPAddr, isIPv6) +try +{ + // column with column + ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", + "0000:0000:0000:0000:0000:0000:0000:0000", + "2001:0:2851:b9f0:6d:2326:9036:f37a", + "fe80::2dc3:25a5:49a1:6002%24", + "4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00:0D00", + "4207:A33A:58D3:F2C3:8EDC:A548::0D00", + "4207::::8EDC:A548:3EC7:0D00", + "4207:::::A548:3EC7:0D00", + "::::::", + "4556456", + "ajdjioa", + ""})})); + + // column with nullable + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", + "aidjio", + "1236.461.841.312", + "99.99.99.99", + std::nullopt})})); + +} +CATCH + + +} // namespace tests +} // namespace DB diff --git a/tests/fullstack-test/expr/is_ip_addr.test b/tests/fullstack-test/expr/is_ip_addr.test new file mode 100644 index 00000000000..2decab65e88 --- /dev/null +++ b/tests/fullstack-test/expr/is_ip_addr.test @@ -0,0 +1,72 @@ +# Copyright 2023 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +mysql> drop table if exists test.t; +mysql> create table test.t(v4 varchar(100), v6 varchar(100)); +mysql> insert into test.t values('123.123.123.123', 'F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286'); +mysql> insert into test.t values('0.0.0.0', '0000:0000:0000:0000:0000:0000:0000:0000'); +mysql> insert into test.t values('127.0.0.1', '2001:0:2851:b9f0:6d:2326:9036:f37a'); +mysql> insert into test.t values('192.168.0.0/10', 'fe80::2dc3:25a5:49a1:6002%24'); +mysql> insert into test.t values('192.168.99.22.123', '4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00'); +mysql> insert into test.t values('999.999.999.999', '4207:A33A:58D3:F2C3:8EDC:A548::0D00'); +mysql> insert into test.t values('3.2.1.', '4207::::8EDC:A548:3EC7:0D00'); +mysql> insert into test.t values('3..2.1', '4207:::::A548:3EC7:0D00'); +mysql> insert into test.t values('...', '::::::'); +mysql> insert into test.t values('4556456', '4556456'); +mysql> insert into test.t values('ajdjioa', 'ajdjioa'); +mysql> insert into test.t values('', ''); +mysql> insert into test.t values(null,null); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select is_ipv4(v4) from test.t; ++-------------+ +| is_ipv4(v4) | ++-------------+ +| 1 | +| 1 | +| 1 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| NULL | ++-------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select is_ipv6(v6) from test.t; ++-------------+ +| is_ipv6(v6) | ++-------------+ +| 1 | +| 1 | +| 1 | +| 0 | +| 0 | +| 1 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| 0 | +| NULL | ++-------------+ +mysql> drop table if exists test.t; From fae3bbf4fbccd11fc41e15a56a7a39ea5d4d928d Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 7 Feb 2023 23:53:03 +0800 Subject: [PATCH 02/37] style: format code --- dbms/src/Functions/FunctionsIsIPAddr.h | 16 ++++--- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 44 +++---------------- 2 files changed, 17 insertions(+), 43 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 24ab670fea9..48ee272fb67 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -15,8 +15,8 @@ #pragma once #include -#include #include +#include #include #include #include @@ -39,16 +39,20 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; static inline UInt8 isIPv4(String input_address) { char str[INET_ADDRSTRLEN]; - if(input_address.empty()) return 0; - if(inet_pton(AF_INET, input_address.c_str(), str) == 1) return 1; + if (input_address.empty()) + return 0; + if (inet_pton(AF_INET, input_address.c_str(), str) == 1) + return 1; return 0; } static inline UInt8 isIPv6(String input_address) { char str[INET6_ADDRSTRLEN]; - if(input_address.empty()) return 0; - if(inet_pton(AF_INET6, input_address.c_str(), str) == 1) return 1; + if (input_address.empty()) + return 0; + if (inet_pton(AF_INET6, input_address.c_str(), str) == 1) + return 1; return 0; } @@ -90,6 +94,7 @@ class FunctionIsIPv4 : public IFunction block.getByPosition(result).column = std::move(col_res); } + private: }; @@ -131,6 +136,7 @@ class FunctionIsIPv6 : public IFunction block.getByPosition(result).column = std::move(col_res); } + private: }; } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index 274b73ff855..fc69ae49226 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -44,26 +44,10 @@ TEST_F(TestIsIPAddr, isIPv4) try { // column with column - ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn({"123.123.123.123", - "0.0.0.0", - "127.0.0.1", - "192.168.0.0/10", - "192.168.99.22.123", - "999.999.999.999", - "3.2.1.", - "3..2.1", - "...", - "4556456", - "ajdjioa", - ""})})); + ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn({"123.123.123.123", "0.0.0.0", "127.0.0.1", "192.168.0.0/10", "192.168.99.22.123", "999.999.999.999", "3.2.1.", "3..2.1", "...", "4556456", "ajdjioa", ""})})); - // column with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", - "aidjio", - "1236.461.841.312", - "99.99.99.99", - std::nullopt})})); - + // column with nullable + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); } CATCH @@ -72,26 +56,10 @@ TEST_F(TestIsIPAddr, isIPv6) try { // column with column - ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", - "0000:0000:0000:0000:0000:0000:0000:0000", - "2001:0:2851:b9f0:6d:2326:9036:f37a", - "fe80::2dc3:25a5:49a1:6002%24", - "4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00:0D00", - "4207:A33A:58D3:F2C3:8EDC:A548::0D00", - "4207::::8EDC:A548:3EC7:0D00", - "4207:::::A548:3EC7:0D00", - "::::::", - "4556456", - "ajdjioa", - ""})})); + ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "0000:0000:0000:0000:0000:0000:0000:0000", "2001:0:2851:b9f0:6d:2326:9036:f37a", "fe80::2dc3:25a5:49a1:6002%24", "4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00:0D00", "4207:A33A:58D3:F2C3:8EDC:A548::0D00", "4207::::8EDC:A548:3EC7:0D00", "4207:::::A548:3EC7:0D00", "::::::", "4556456", "ajdjioa", ""})})); - // column with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", - "aidjio", - "1236.461.841.312", - "99.99.99.99", - std::nullopt})})); - + // column with nullable + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); } CATCH From 9748c3ed8c897ba550aa3b954835c1ea405aedd1 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 08:02:21 +0800 Subject: [PATCH 03/37] Update is_ip_addr.test --- tests/fullstack-test/expr/is_ip_addr.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fullstack-test/expr/is_ip_addr.test b/tests/fullstack-test/expr/is_ip_addr.test index 2decab65e88..b25c7124275 100644 --- a/tests/fullstack-test/expr/is_ip_addr.test +++ b/tests/fullstack-test/expr/is_ip_addr.test @@ -49,7 +49,7 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; | 0 | | 0 | | 0 | -| NULL | +| 0 | +-------------+ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select is_ipv6(v6) from test.t; +-------------+ @@ -67,6 +67,6 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; | 0 | | 0 | | 0 | -| NULL | +| 0 | +-------------+ mysql> drop table if exists test.t; From 97208698c4eba358bba0fd5a6a69eacb04d3a1ff Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 16:43:04 +0800 Subject: [PATCH 04/37] style:Revise according to review comments for IsIPAddr func Revise according to review comments Co-authored-by: yanweiqi <592838129@qq.com> --- dbms/src/Functions/FunctionsIsIPAddr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 48ee272fb67..d178ede701a 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -30,9 +30,9 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; /** Helper functions * - * isIPV4(x) - Judge whether the input string is an IPv4 address. + * isIPv4(x) - Judge whether the input string is an IPv4 address. * - * isIPV6(x) - Judge whether the input string is an IPv6 address + * isIPv6(x) - Judge whether the input string is an IPv6 address * */ From b587f198278f306d98b94b86ae07d47616cb91b8 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 16:43:16 +0800 Subject: [PATCH 05/37] style:Revise according to review comments for IsIPAddr func Co-authored-by: yanweiqi <592838129@qq.com> --- dbms/src/Functions/FunctionsIsIPAddr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index d178ede701a..55658d73a27 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -85,7 +85,7 @@ class FunctionIsIPv4 : public IFunction auto col_res = ColumnUInt8::create(); col_res->reserve(val_num); - for (int i = 0; i < val_num; i++) + for (int i = 0; i < val_num; ++i) { c0_col->get(i, res_field); String handled_str = res_field.get(); From 47e68f75c980ab60dfe69d12ce4492886da61d4d Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 16:43:23 +0800 Subject: [PATCH 06/37] style:Revise according to review comments for IsIPAddr Co-authored-by: yanweiqi <592838129@qq.com> --- dbms/src/Functions/FunctionsIsIPAddr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 55658d73a27..0e2eca57fc5 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -127,7 +127,7 @@ class FunctionIsIPv6 : public IFunction auto col_res = ColumnUInt8::create(); col_res->reserve(val_num); - for (int i = 0; i < val_num; i++) + for (int i = 0; i < val_num; ++i) { c0_col->get(i, res_field); String handled_str = res_field.get(); From d2287a5dce219ab9d8c04a2ff03993953af7e301 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 16:43:33 +0800 Subject: [PATCH 07/37] style:Revise according to review comments for IsIPAddr Co-authored-by: yanweiqi <592838129@qq.com> --- dbms/src/Functions/FunctionsIsIPAddr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 0e2eca57fc5..3191764683b 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -38,9 +38,9 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; static inline UInt8 isIPv4(String input_address) { - char str[INET_ADDRSTRLEN]; if (input_address.empty()) return 0; + char str[INET_ADDRSTRLEN]; if (inet_pton(AF_INET, input_address.c_str(), str) == 1) return 1; return 0; From 3d7a92b2168b33fe208c1957816cc1950636c9d7 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 16:43:44 +0800 Subject: [PATCH 08/37] style:Revise according to review comments for IsIPAddr func Co-authored-by: yanweiqi <592838129@qq.com> --- dbms/src/Functions/FunctionsIsIPAddr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 3191764683b..9947e14d248 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -48,9 +48,9 @@ static inline UInt8 isIPv4(String input_address) static inline UInt8 isIPv6(String input_address) { - char str[INET6_ADDRSTRLEN]; if (input_address.empty()) return 0; + char str[INET6_ADDRSTRLEN]; if (inet_pton(AF_INET6, input_address.c_str(), str) == 1) return 1; return 0; From 8c70e8ed897a56e56b5b77a56863045241c394b7 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 12:00:08 +0000 Subject: [PATCH 09/37] Revise according to review comments: 1. Modify the style and format of the code 2. Optimize the performance of isIPv4 and isIPv6 functions and remove invalid memset --- dbms/src/Functions/FunctionsIsIPAddr.cpp | 1 - dbms/src/Functions/FunctionsIsIPAddr.h | 168 ++++++++++++++++++++--- 2 files changed, 146 insertions(+), 23 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.cpp b/dbms/src/Functions/FunctionsIsIPAddr.cpp index fe8ae1db562..97bad2cac4a 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.cpp +++ b/dbms/src/Functions/FunctionsIsIPAddr.cpp @@ -18,7 +18,6 @@ namespace DB { - void registerFunctionsIsIPAddr(FunctionFactory & factory) { factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 9947e14d248..a987a42cdc4 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -36,24 +36,146 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; * */ -static inline UInt8 isIPv4(String input_address) +/* Description: + * This function is used to determine whether the input string is an IPv4 address, + * and the code comes from the inet_pton4 function of "arpa/inet.h". + * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + */ +static inline UInt8 isIPv4(const char * src) { - if (input_address.empty()) + if (NULL == src) return 0; - char str[INET_ADDRSTRLEN]; - if (inet_pton(AF_INET, input_address.c_str(), str) == 1) - return 1; - return 0; + + static const char digits[] = "0123456789"; + int saw_digit, octets, ch; + unsigned char tmp[4], *tp; + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while ((ch = *src++) != '\0') + { + const char * pch; + + if ((pch = strchr(digits, ch)) != NULL) + { + unsigned int new = *tp * 10 + (unsigned int)(pch - digits); + + if (new > 255) + return 0; + *tp = new; + if (!saw_digit) + { + if (++octets > 4) + return 0; + saw_digit = 1; + } + } + else if (ch == '.' && saw_digit) + { + if (octets == 4) + return 0; + *++tp = 0; + saw_digit = 0; + } + else + return 0; + } + if (octets < 4) + return 0; + + return 1; } -static inline UInt8 isIPv6(String input_address) +/* Description: + * This function is used to determine whether the input string is an IPv6 address, + * and the code comes from the inet_pton6 function of "arpa/inet.h". + * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + */ +static inline UInt8 isIPv6(const char * src) { - if (input_address.empty()) + if (NULL == src) + return 0; + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; + static const int IN6ADDRSZ = 16; + unsigned char tmp[16], *tp, *endp, *colonp; + const char *xdigits, *curtok; + int ch, saw_xdigit; + unsigned int val; + + memset((tp = tmp), '\0', IN6ADDRSZ); + endp = tp + IN6ADDRSZ; + colonp = NULL; + if (*src == ':') + if (*++src != ':') + return 0; + curtok = src; + saw_xdigit = 0; + val = 0; + while ((ch = *src++) != '\0') + { + const char * pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != NULL) + { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return 0; + saw_xdigit = 1; + continue; + } + if (ch == ':') + { + curtok = src; + if (!saw_xdigit) + { + if (colonp) + return 0; + colonp = tp; + continue; + } + if (tp + INT16SZ > endp) + return 0; + *tp++ = (unsigned char)(val >> 8) & 0xff; + *tp++ = (unsigned char)val & 0xff; + saw_xdigit = 0; + val = 0; + continue; + } + if (ch == '.' && ((tp + INADDRSZ) <= endp) && isIPv4(curtok) > 0) + { + tp += INADDRSZ; + saw_xdigit = 0; + break; /* '\0' was seen by isIPv4(). */ + } + return 0; + } + if (saw_xdigit) + { + if (tp + INT16SZ > endp) + return 0; + *tp++ = (unsigned char)(val >> 8) & 0xff; + *tp++ = (unsigned char)val & 0xff; + } + if (colonp != NULL) + { + const size_t n = tp - colonp; + size_t i; + + for (i = 1; i <= n; i++) + { + endp[-i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp) return 0; - char str[INET6_ADDRSTRLEN]; - if (inet_pton(AF_INET6, input_address.c_str(), str) == 1) - return 1; - return 0; + return 1; } class FunctionIsIPv4 : public IFunction @@ -78,18 +200,19 @@ class FunctionIsIPv4 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); + const IColumn * col_input = block.getByPosition(arguments[0]).column.get(); + const ColumnUInt64::Container & vec_input = col_input->getData(); Field res_field; - int val_num = c0_col->size(); + int val_num = col_input->size(); auto col_res = ColumnUInt8::create(); col_res->reserve(val_num); + ColumnUInt8::Container & vec_res = col_res->getData(); for (int i = 0; i < val_num; ++i) { - c0_col->get(i, res_field); - String handled_str = res_field.get(); - col_res->insert(static_cast(isIPv4(handled_str))); + const char * input_address = static_cast(vec_input[i]); + vec_res[i] = static_cast(isIPv4(input_address)); } block.getByPosition(result).column = std::move(col_res); @@ -120,18 +243,19 @@ class FunctionIsIPv6 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); + const IColumn * col_input = block.getByPosition(arguments[0]).column.get(); + const ColumnUInt64::Container & vec_input = col_input->getData(); Field res_field; - int val_num = c0_col->size(); + int val_num = col_input->size(); auto col_res = ColumnUInt8::create(); col_res->reserve(val_num); + ColumnUInt8::Container & vec_res = col_res->getData(); for (int i = 0; i < val_num; ++i) { - c0_col->get(i, res_field); - String handled_str = res_field.get(); - col_res->insert(static_cast(isIPv6(handled_str))); + const char * input_address = static_cast(vec_input[i]); + vec_res[i] = static_cast(isIPv6(input_address)); } block.getByPosition(result).column = std::move(col_res); From 380fe4d6029d6449c40de61d1fe712a6b050c86c Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 12:14:58 +0000 Subject: [PATCH 10/37] Modify the style of the code --- dbms/src/Functions/FunctionsIsIPAddr.h | 38 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index a987a42cdc4..9dd0cbf08dc 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -21,6 +21,18 @@ #include #include +#ifndef IN6ADDRSZ +#define IN6ADDRSZ 16 +#endif + +#ifndef INT16SZ +#define INT16SZ sizeof(short) +#endif + +#ifndef INADDRSZ +#define INADDRSZ 4 +#endif + namespace DB { namespace ErrorCodes @@ -48,7 +60,7 @@ static inline UInt8 isIPv4(const char * src) static const char digits[] = "0123456789"; int saw_digit, octets, ch; - unsigned char tmp[4], *tp; + unsigned char tmp[INADDRSZ], *tp; saw_digit = 0; octets = 0; @@ -59,11 +71,11 @@ static inline UInt8 isIPv4(const char * src) if ((pch = strchr(digits, ch)) != NULL) { - unsigned int new = *tp * 10 + (unsigned int)(pch - digits); + unsigned int num = *tp * 10 + (unsigned int)(pch - digits); - if (new > 255) + if (num > 255) return 0; - *tp = new; + *tp = num; if (!saw_digit) { if (++octets > 4) @@ -98,7 +110,7 @@ static inline UInt8 isIPv6(const char * src) return 0; static const char xdigits_l[] = "0123456789abcdef", xdigits_u[] = "0123456789ABCDEF"; - static const int IN6ADDRSZ = 16; + static const int INT16SZ = size_of(short); unsigned char tmp[16], *tp, *endp, *colonp; const char *xdigits, *curtok; int ch, saw_xdigit; @@ -200,16 +212,16 @@ class FunctionIsIPv4 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - const IColumn * col_input = block.getByPosition(arguments[0]).column.get(); + size_t size = block.getByPosition(arguments[0]).column->size(); + const IColumn * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_input = col_input->getData(); Field res_field; - int val_num = col_input->size(); auto col_res = ColumnUInt8::create(); - col_res->reserve(val_num); + col_res->reserve(size); ColumnUInt8::Container & vec_res = col_res->getData(); - for (int i = 0; i < val_num; ++i) + for (int i = 0; i < size; ++i) { const char * input_address = static_cast(vec_input[i]); vec_res[i] = static_cast(isIPv4(input_address)); @@ -243,16 +255,16 @@ class FunctionIsIPv6 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - const IColumn * col_input = block.getByPosition(arguments[0]).column.get(); + size_t size = block.getByPosition(arguments[0]).column->size(); + const IColumn * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_input = col_input->getData(); Field res_field; - int val_num = col_input->size(); auto col_res = ColumnUInt8::create(); - col_res->reserve(val_num); + col_res->reserve(size); ColumnUInt8::Container & vec_res = col_res->getData(); - for (int i = 0; i < val_num; ++i) + for (int i = 0; i < size; ++i) { const char * input_address = static_cast(vec_input[i]); vec_res[i] = static_cast(isIPv6(input_address)); From a67c8eecfb9655ec6792bf32a5c40d0bad42f348 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 20:16:28 +0800 Subject: [PATCH 11/37] Update dbms/src/Functions/FunctionsIsIPAddr.h Co-authored-by: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> --- dbms/src/Functions/FunctionsIsIPAddr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 9dd0cbf08dc..22c59382ba7 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -44,7 +44,7 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; * * isIPv4(x) - Judge whether the input string is an IPv4 address. * - * isIPv6(x) - Judge whether the input string is an IPv6 address + * isIPv6(x) - Judge whether the input string is an IPv6 address. * */ From 85ed858a6bcedadd35cefd8ef57dc962a69d9ace Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 12:31:04 +0000 Subject: [PATCH 12/37] fix some bugs for is_ipv4 and is_ipv6 --- dbms/src/Functions/FunctionsIsIPAddr.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 9dd0cbf08dc..5d0c12ced5b 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -44,7 +44,7 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; * * isIPv4(x) - Judge whether the input string is an IPv4 address. * - * isIPv6(x) - Judge whether the input string is an IPv6 address + * isIPv6(x) - Judge whether the input string is an IPv6 address. * */ @@ -110,7 +110,6 @@ static inline UInt8 isIPv6(const char * src) return 0; static const char xdigits_l[] = "0123456789abcdef", xdigits_u[] = "0123456789ABCDEF"; - static const int INT16SZ = size_of(short); unsigned char tmp[16], *tp, *endp, *colonp; const char *xdigits, *curtok; int ch, saw_xdigit; @@ -213,7 +212,7 @@ class FunctionIsIPv4 : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { size_t size = block.getByPosition(arguments[0]).column->size(); - const IColumn * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); + const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_input = col_input->getData(); Field res_field; @@ -221,7 +220,7 @@ class FunctionIsIPv4 : public IFunction col_res->reserve(size); ColumnUInt8::Container & vec_res = col_res->getData(); - for (int i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { const char * input_address = static_cast(vec_input[i]); vec_res[i] = static_cast(isIPv4(input_address)); @@ -256,7 +255,7 @@ class FunctionIsIPv6 : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { size_t size = block.getByPosition(arguments[0]).column->size(); - const IColumn * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); + const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_input = col_input->getData(); Field res_field; @@ -264,7 +263,7 @@ class FunctionIsIPv6 : public IFunction col_res->reserve(size); ColumnUInt8::Container & vec_res = col_res->getData(); - for (int i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { const char * input_address = static_cast(vec_input[i]); vec_res[i] = static_cast(isIPv6(input_address)); From 6eb6907fd99807b4896705b91e150b674404e910 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 13:41:49 +0000 Subject: [PATCH 13/37] fix bug of IsIPAddr and modify the code style --- dbms/src/Functions/FunctionsIsIPAddr.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 5d0c12ced5b..a8ec46f37c9 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -215,21 +215,18 @@ class FunctionIsIPv4 : public IFunction const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_input = col_input->getData(); - Field res_field; auto col_res = ColumnUInt8::create(); col_res->reserve(size); ColumnUInt8::Container & vec_res = col_res->getData(); for (size_t i = 0; i < size; ++i) { - const char * input_address = static_cast(vec_input[i]); + const char * input_address = static_cast(&(vec_input[i])); vec_res[i] = static_cast(isIPv4(input_address)); } block.getByPosition(result).column = std::move(col_res); } - -private: }; class FunctionIsIPv6 : public IFunction @@ -258,20 +255,17 @@ class FunctionIsIPv6 : public IFunction const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_input = col_input->getData(); - Field res_field; auto col_res = ColumnUInt8::create(); col_res->reserve(size); ColumnUInt8::Container & vec_res = col_res->getData(); for (size_t i = 0; i < size; ++i) { - const char * input_address = static_cast(vec_input[i]); + const char * input_address = static_cast(&(vec_input[i])); vec_res[i] = static_cast(isIPv6(input_address)); } block.getByPosition(result).column = std::move(col_res); } - -private: }; } // namespace DB From b1824d41f2ff27e1a518b6a1e658773ffb39c2a7 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 22:11:27 +0800 Subject: [PATCH 14/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index a8ec46f37c9..6e1dc9e3e0c 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -221,7 +221,7 @@ class FunctionIsIPv4 : public IFunction for (size_t i = 0; i < size; ++i) { - const char * input_address = static_cast(&(vec_input[i])); + const char * input_address = reinterpret_cast(&(vec_input[i])); vec_res[i] = static_cast(isIPv4(input_address)); } @@ -261,7 +261,7 @@ class FunctionIsIPv6 : public IFunction for (size_t i = 0; i < size; ++i) { - const char * input_address = static_cast(&(vec_input[i])); + const char * input_address = reinterpret_cast(&(vec_input[i])); vec_res[i] = static_cast(isIPv6(input_address)); } From 7ce9952ffa84cc99f44131244729b208f57331cd Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Wed, 8 Feb 2023 23:04:58 +0800 Subject: [PATCH 15/37] test:update test for is_ipv4 and is_ipv6 func --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index fc69ae49226..c16fede0ab4 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -47,7 +47,7 @@ try ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn({"123.123.123.123", "0.0.0.0", "127.0.0.1", "192.168.0.0/10", "192.168.99.22.123", "999.999.999.999", "3.2.1.", "3..2.1", "...", "4556456", "ajdjioa", ""})})); // column with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, {}}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", {}})})); } CATCH @@ -59,7 +59,7 @@ try ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "0000:0000:0000:0000:0000:0000:0000:0000", "2001:0:2851:b9f0:6d:2326:9036:f37a", "fe80::2dc3:25a5:49a1:6002%24", "4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00:0D00", "4207:A33A:58D3:F2C3:8EDC:A548::0D00", "4207::::8EDC:A548:3EC7:0D00", "4207:::::A548:3EC7:0D00", "::::::", "4556456", "ajdjioa", ""})})); // column with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, {}}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", {}})})); } CATCH From 4433ab2123ceac03e2c1ac5cd90b727ff899ed81 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 08:03:48 +0800 Subject: [PATCH 16/37] fix bug:process null for is_ipv4 and is_ipv6 func --- dbms/src/Functions/FunctionsIsIPAddr.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 6e1dc9e3e0c..7c3e3f21575 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -212,8 +212,9 @@ class FunctionIsIPv4 : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { size_t size = block.getByPosition(arguments[0]).column->size(); - const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); - const ColumnUInt64::Container & vec_input = col_input->getData(); + const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); + const typename ColumnString::Chars_t & data = col_input->getChars(); + const typename ColumnString::Offsets & offsets = col_input->getOffsets(); auto col_res = ColumnUInt8::create(); col_res->reserve(size); @@ -221,8 +222,7 @@ class FunctionIsIPv4 : public IFunction for (size_t i = 0; i < size; ++i) { - const char * input_address = reinterpret_cast(&(vec_input[i])); - vec_res[i] = static_cast(isIPv4(input_address)); + vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); } block.getByPosition(result).column = std::move(col_res); @@ -252,8 +252,9 @@ class FunctionIsIPv6 : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { size_t size = block.getByPosition(arguments[0]).column->size(); - const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); - const ColumnUInt64::Container & vec_input = col_input->getData(); + const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); + const typename ColumnString::Chars_t & data = col_input->getChars(); + const typename ColumnString::Offsets & offsets = col_input->getOffsets(); auto col_res = ColumnUInt8::create(); col_res->reserve(size); @@ -261,8 +262,7 @@ class FunctionIsIPv6 : public IFunction for (size_t i = 0; i < size; ++i) { - const char * input_address = reinterpret_cast(&(vec_input[i])); - vec_res[i] = static_cast(isIPv6(input_address)); + vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); } block.getByPosition(result).column = std::move(col_res); From 48b3fa16b3ab49a6d650b1558fec3add8fdda042 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 08:22:05 +0800 Subject: [PATCH 17/37] process input length for is_ipv4 and is_ipv6 func --- dbms/src/Functions/FunctionsIsIPAddr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 7c3e3f21575..9e716f695f0 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -217,7 +217,7 @@ class FunctionIsIPv4 : public IFunction const typename ColumnString::Offsets & offsets = col_input->getOffsets(); auto col_res = ColumnUInt8::create(); - col_res->reserve(size); + col_res->resize(size); ColumnUInt8::Container & vec_res = col_res->getData(); for (size_t i = 0; i < size; ++i) @@ -257,7 +257,7 @@ class FunctionIsIPv6 : public IFunction const typename ColumnString::Offsets & offsets = col_input->getOffsets(); auto col_res = ColumnUInt8::create(); - col_res->reserve(size); + col_res->resize(size); ColumnUInt8::Container & vec_res = col_res->getData(); for (size_t i = 0; i < size; ++i) From 340be85f28a52de7103c4e290aef583b66eaf81f Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 08:45:19 +0800 Subject: [PATCH 18/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 9e716f695f0..b3ac904ddc6 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -217,8 +217,8 @@ class FunctionIsIPv4 : public IFunction const typename ColumnString::Offsets & offsets = col_input->getOffsets(); auto col_res = ColumnUInt8::create(); - col_res->resize(size); ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(szie); for (size_t i = 0; i < size; ++i) { @@ -257,8 +257,8 @@ class FunctionIsIPv6 : public IFunction const typename ColumnString::Offsets & offsets = col_input->getOffsets(); auto col_res = ColumnUInt8::create(); - col_res->resize(size); ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(szie); for (size_t i = 0; i < size; ++i) { From b7799edb0edce06d4accf5eefedf0648c1f4ec13 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 08:58:03 +0800 Subject: [PATCH 19/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 60 +++++++++++++++----------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index b3ac904ddc6..df45dbb80a8 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -211,21 +211,27 @@ class FunctionIsIPv4 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - size_t size = block.getByPosition(arguments[0]).column->size(); - const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); - const typename ColumnString::Chars_t & data = col_input->getChars(); - const typename ColumnString::Offsets & offsets = col_input->getOffsets(); + if (const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) + { + size_t size = block.getByPosition(arguments[0]).column->size(); + const typename ColumnString::Chars_t & data = col_input->getChars(); + const typename ColumnString::Offsets & offsets = col_input->getOffsets(); - auto col_res = ColumnUInt8::create(); - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(szie); + auto col_res = ColumnUInt8::create(); + ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(size); - for (size_t i = 0; i < size; ++i) - { - vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); - } + for (size_t i = 0; i < size; ++i) + { + vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); + } - block.getByPosition(result).column = std::move(col_res); + block.getByPosition(result).column = std::move(col_res); + } + else + throw Exception( + fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), + ErrorCodes::ILLEGAL_COLUMN); } }; @@ -251,21 +257,27 @@ class FunctionIsIPv6 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - size_t size = block.getByPosition(arguments[0]).column->size(); - const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); - const typename ColumnString::Chars_t & data = col_input->getChars(); - const typename ColumnString::Offsets & offsets = col_input->getOffsets(); + if (const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) + { + size_t size = block.getByPosition(arguments[0]).column->size(); + const typename ColumnString::Chars_t & data = col_input->getChars(); + const typename ColumnString::Offsets & offsets = col_input->getOffsets(); - auto col_res = ColumnUInt8::create(); - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(szie); + auto col_res = ColumnUInt8::create(); + ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(size); - for (size_t i = 0; i < size; ++i) - { - vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); - } + for (size_t i = 0; i < size; ++i) + { + vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); + } - block.getByPosition(result).column = std::move(col_res); + block.getByPosition(result).column = std::move(col_res); + } + else + throw Exception( + fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), + ErrorCodes::ILLEGAL_COLUMN); } }; } // namespace DB From cea864a529d2840edca01fdbfdb3775ca009d427 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 09:07:42 +0800 Subject: [PATCH 20/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index df45dbb80a8..ba5095b25d3 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -37,6 +37,7 @@ namespace DB { namespace ErrorCodes { +extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } // namespace ErrorCodes From 127f21f5c6542b0e3ba9c92f82a9ae621080a900 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 10:47:16 +0800 Subject: [PATCH 21/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 38 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index ba5095b25d3..bf60c11a81f 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -19,7 +19,6 @@ #include #include #include -#include #ifndef IN6ADDRSZ #define IN6ADDRSZ 16 @@ -56,11 +55,12 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; */ static inline UInt8 isIPv4(const char * src) { - if (NULL == src) + if (nullptr == src) return 0; static const char digits[] = "0123456789"; - int saw_digit, octets, ch; + int saw_digit, octets; + char ch; unsigned char tmp[INADDRSZ], *tp; saw_digit = 0; @@ -70,9 +70,9 @@ static inline UInt8 isIPv4(const char * src) { const char * pch; - if ((pch = strchr(digits, ch)) != NULL) + if ((pch = strchr(digits, ch)) != nullptr) { - unsigned int num = *tp * 10 + (unsigned int)(pch - digits); + unsigned int num = *tp * 10 + static_cast(pch - digits); if (num > 255) return 0; @@ -107,7 +107,7 @@ static inline UInt8 isIPv4(const char * src) */ static inline UInt8 isIPv6(const char * src) { - if (NULL == src) + if (nullptr == src) return 0; static const char xdigits_l[] = "0123456789abcdef", xdigits_u[] = "0123456789ABCDEF"; @@ -118,7 +118,7 @@ static inline UInt8 isIPv6(const char * src) memset((tp = tmp), '\0', IN6ADDRSZ); endp = tp + IN6ADDRSZ; - colonp = NULL; + colonp = nullptr; if (*src == ':') if (*++src != ':') return 0; @@ -129,9 +129,9 @@ static inline UInt8 isIPv6(const char * src) { const char * pch; - if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) + if ((pch = strchr((xdigits = xdigits_l), ch)) == nullptr) pch = strchr((xdigits = xdigits_u), ch); - if (pch != NULL) + if (pch != nullptr) { val <<= 4; val |= (pch - xdigits); @@ -173,7 +173,7 @@ static inline UInt8 isIPv6(const char * src) *tp++ = (unsigned char)(val >> 8) & 0xff; *tp++ = (unsigned char)val & 0xff; } - if (colonp != NULL) + if (colonp != nullptr) { const size_t n = tp - colonp; size_t i; @@ -222,10 +222,19 @@ class FunctionIsIPv4 : public IFunction ColumnUInt8::Container & vec_res = col_res->getData(); vec_res.resize(size); +<<<<<<< HEAD + size_t prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[prev_offset]))); + prev_offset = offsets[i]; + } +======= for (size_t i = 0; i < size; ++i) { vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); } +>>>>>>> cea864a529d2840edca01fdbfdb3775ca009d427 block.getByPosition(result).column = std::move(col_res); } @@ -268,10 +277,19 @@ class FunctionIsIPv6 : public IFunction ColumnUInt8::Container & vec_res = col_res->getData(); vec_res.resize(size); +<<<<<<< HEAD + size_t prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[prev_offset]))); + prev_offset = offsets[i]; + } +======= for (size_t i = 0; i < size; ++i) { vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); } +>>>>>>> cea864a529d2840edca01fdbfdb3775ca009d427 block.getByPosition(result).column = std::move(col_res); } From 4058f1aab3cc215c4027493d94a6e81a7459866e Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Thu, 9 Feb 2023 10:48:52 +0800 Subject: [PATCH 22/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index bf60c11a81f..5dfccfce87d 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -222,19 +222,12 @@ class FunctionIsIPv4 : public IFunction ColumnUInt8::Container & vec_res = col_res->getData(); vec_res.resize(size); -<<<<<<< HEAD size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[prev_offset]))); prev_offset = offsets[i]; } -======= - for (size_t i = 0; i < size; ++i) - { - vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); - } ->>>>>>> cea864a529d2840edca01fdbfdb3775ca009d427 block.getByPosition(result).column = std::move(col_res); } @@ -277,19 +270,12 @@ class FunctionIsIPv6 : public IFunction ColumnUInt8::Container & vec_res = col_res->getData(); vec_res.resize(size); -<<<<<<< HEAD size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[prev_offset]))); prev_offset = offsets[i]; } -======= - for (size_t i = 0; i < size; ++i) - { - vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]))); - } ->>>>>>> cea864a529d2840edca01fdbfdb3775ca009d427 block.getByPosition(result).column = std::move(col_res); } From c7f29141de8b1a557fbe5454a1a947ccdefeca3d Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Fri, 10 Feb 2023 23:49:07 +0800 Subject: [PATCH 23/37] Update gtest_is_ip_addr.cpp --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index c16fede0ab4..e2ed45b1f18 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -27,12 +27,6 @@ #include #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsign-compare" -#include - -#pragma GCC diagnostic pop - namespace DB { namespace tests @@ -43,11 +37,23 @@ class TestIsIPAddr : public DB::tests::FunctionTest TEST_F(TestIsIPAddr, isIPv4) try { - // column with column + // test ColumnVector without nullable ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn({"123.123.123.123", "0.0.0.0", "127.0.0.1", "192.168.0.0/10", "192.168.99.22.123", "999.999.999.999", "3.2.1.", "3..2.1", "...", "4556456", "ajdjioa", ""})})); - // column with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, {}}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", {}})})); + // test ColumnVector with nullable + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + + // test ColumnConst without nullable + ASSERT_COLUMN_EQ(createConstColumn(4,1), executeFunction("tiDBIsIPv4", {createConstColumn(4, "123.123.123.123")})); + ASSERT_COLUMN_EQ(createConstColumn(4,0), executeFunction("tiDBIsIPv4", {createConstColumn(4, "aidjio")})); + + // test ColumnConst with nullable but non-null value + ASSERT_COLUMN_EQ(createConstColumn(2,1), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "123.123.123.123")})); + ASSERT_COLUMN_EQ(createConstColumn(2,0), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "1236.461.841.312")})); + + // test ColumnConst with nullable and null value + ASSERT_COLUMN_EQ(createConstColumn>(4,std::nullopt), executeFunction("tiDBIsIPv4", {createConstColumn>(4, std::nullopt)})); + } CATCH @@ -55,11 +61,23 @@ CATCH TEST_F(TestIsIPAddr, isIPv6) try { - // column with column + // test ColumnVector without nullable ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "0000:0000:0000:0000:0000:0000:0000:0000", "2001:0:2851:b9f0:6d:2326:9036:f37a", "fe80::2dc3:25a5:49a1:6002%24", "4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00:0D00", "4207:A33A:58D3:F2C3:8EDC:A548::0D00", "4207::::8EDC:A548:3EC7:0D00", "4207:::::A548:3EC7:0D00", "::::::", "4556456", "ajdjioa", ""})})); - // column with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, {}}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", {}})})); + // test ColumnVector with nullable + ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + + // test ColumnConst without nullable + ASSERT_COLUMN_EQ(createConstColumn(4,1), executeFunction("tiDBIsIPv6", {createConstColumn(4, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); + ASSERT_COLUMN_EQ(createConstColumn(4,0), executeFunction("tiDBIsIPv6", {createConstColumn(4, "aidjio")})); + + // test ColumnConst with nullable but non-null value + ASSERT_COLUMN_EQ(createConstColumn(2,1), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); + ASSERT_COLUMN_EQ(createConstColumn(2,0), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "aidjio")})); + + // test ColumnConst with nullable and null value + ASSERT_COLUMN_EQ(createConstColumn>(4,std::nullopt), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); + } CATCH From f31a94ae03fb79f49ab82537cebf6729c1404443 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Fri, 10 Feb 2023 23:49:43 +0800 Subject: [PATCH 24/37] Update FunctionsIsIPAddr.cpp --- dbms/src/Functions/FunctionsIsIPAddr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.cpp b/dbms/src/Functions/FunctionsIsIPAddr.cpp index 97bad2cac4a..ef2b1045ca4 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.cpp +++ b/dbms/src/Functions/FunctionsIsIPAddr.cpp @@ -20,8 +20,8 @@ namespace DB void registerFunctionsIsIPAddr(FunctionFactory & factory) { - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } // namespace DB From 684fef985dc8805ddef7dd37ee6c8594ee4130cb Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Fri, 10 Feb 2023 23:49:53 +0800 Subject: [PATCH 25/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 81 +++++++++----------------- 1 file changed, 26 insertions(+), 55 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 5dfccfce87d..d95094afddd 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -20,16 +20,16 @@ #include #include -#ifndef IN6ADDRSZ -#define IN6ADDRSZ 16 +#ifndef INADDRSZ +#define INADDRSZ 4 #endif #ifndef INT16SZ #define INT16SZ sizeof(short) #endif -#ifndef INADDRSZ -#define INADDRSZ 4 +#ifndef IN6ADDRSZ +#define IN6ADDRSZ 16 #endif namespace DB @@ -37,6 +37,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } // namespace ErrorCodes @@ -178,7 +179,7 @@ static inline UInt8 isIPv6(const char * src) const size_t n = tp - colonp; size_t i; - for (i = 1; i <= n; i++) + for (i = 1; i <= n; ++i) { endp[-i] = colonp[n - i]; colonp[n - i] = 0; @@ -190,64 +191,27 @@ static inline UInt8 isIPv6(const char * src) return 1; } -class FunctionIsIPv4 : public IFunction +struct IsIPv4Name { -public: static constexpr auto name = "tiDBIsIPv4"; - FunctionIsIPv4() = default; - - static FunctionPtr create(const Context &) { return std::make_shared(); }; - - std::string getName() const override { return name; } - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() != 1) - throw Exception( - fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - return std::make_shared(); - } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override - { - if (const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) - { - size_t size = block.getByPosition(arguments[0]).column->size(); - const typename ColumnString::Chars_t & data = col_input->getChars(); - const typename ColumnString::Offsets & offsets = col_input->getOffsets(); - - auto col_res = ColumnUInt8::create(); - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(size); - - size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) - { - vec_res[i] = static_cast(isIPv4(reinterpret_cast(&data[prev_offset]))); - prev_offset = offsets[i]; - } - - block.getByPosition(result).column = std::move(col_res); - } - else - throw Exception( - fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), - ErrorCodes::ILLEGAL_COLUMN); - } +}; +struct IsIPv6Name +{ + static constexpr auto name = "tiDBIsIPv6"; }; -class FunctionIsIPv6 : public IFunction +template +class FunctionIsIPv4OrIsIPv6 : public IFunction { public: - static constexpr auto name = "tiDBIsIPv6"; - FunctionIsIPv6() = default; + static constexpr auto name = Name::name; + FunctionIsIPv4OrIsIPv6() = default; - static FunctionPtr create(const Context &) { return std::make_shared(); }; + static FunctionPtr create(const Context &) { return std::make_shared(); }; std::string getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { @@ -255,7 +219,10 @@ class FunctionIsIPv6 : public IFunction throw Exception( fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - + if (!arguments[0]->isString()) + throw Exception( + fmt::format("Illegal type {} of first argument of function {}. Must be String.", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override @@ -273,7 +240,7 @@ class FunctionIsIPv6 : public IFunction size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { - vec_res[i] = static_cast(isIPv6(reinterpret_cast(&data[prev_offset]))); + vec_res[i] = Function(reinterpret_cast(&data[prev_offset])); prev_offset = offsets[i]; } @@ -286,3 +253,7 @@ class FunctionIsIPv6 : public IFunction } }; } // namespace DB + +#undef INADDRSZ +#undef INT16SZ +#undef IN6ADDRSZ From cc3f674119e8dc51e62156159fe24c66fa4344f3 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Sat, 11 Feb 2023 00:00:55 +0800 Subject: [PATCH 26/37] Update gtest_is_ip_addr.cpp --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index e2ed45b1f18..7077668b5b3 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -44,16 +44,15 @@ try ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); // test ColumnConst without nullable - ASSERT_COLUMN_EQ(createConstColumn(4,1), executeFunction("tiDBIsIPv4", {createConstColumn(4, "123.123.123.123")})); - ASSERT_COLUMN_EQ(createConstColumn(4,0), executeFunction("tiDBIsIPv4", {createConstColumn(4, "aidjio")})); + ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv4", {createConstColumn(4, "123.123.123.123")})); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv4", {createConstColumn(4, "aidjio")})); // test ColumnConst with nullable but non-null value - ASSERT_COLUMN_EQ(createConstColumn(2,1), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "123.123.123.123")})); - ASSERT_COLUMN_EQ(createConstColumn(2,0), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "1236.461.841.312")})); - - // test ColumnConst with nullable and null value - ASSERT_COLUMN_EQ(createConstColumn>(4,std::nullopt), executeFunction("tiDBIsIPv4", {createConstColumn>(4, std::nullopt)})); + ASSERT_COLUMN_EQ(createConstColumn(2, 1), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "123.123.123.123")})); + ASSERT_COLUMN_EQ(createConstColumn(2, 0), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "1236.461.841.312")})); + // test ColumnConst with nullable and null value + ASSERT_COLUMN_EQ(createConstColumn>(4, std::nullopt), executeFunction("tiDBIsIPv4", {createConstColumn>(4, std::nullopt)})); } CATCH @@ -66,18 +65,17 @@ try // test ColumnVector with nullable ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); - + // test ColumnConst without nullable - ASSERT_COLUMN_EQ(createConstColumn(4,1), executeFunction("tiDBIsIPv6", {createConstColumn(4, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); - ASSERT_COLUMN_EQ(createConstColumn(4,0), executeFunction("tiDBIsIPv6", {createConstColumn(4, "aidjio")})); + ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv6", {createConstColumn(4, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv6", {createConstColumn(4, "aidjio")})); // test ColumnConst with nullable but non-null value - ASSERT_COLUMN_EQ(createConstColumn(2,1), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); - ASSERT_COLUMN_EQ(createConstColumn(2,0), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "aidjio")})); - - // test ColumnConst with nullable and null value - ASSERT_COLUMN_EQ(createConstColumn>(4,std::nullopt), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); + ASSERT_COLUMN_EQ(createConstColumn(2, 1), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); + ASSERT_COLUMN_EQ(createConstColumn(2, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "aidjio")})); + // test ColumnConst with nullable and null value + ASSERT_COLUMN_EQ(createConstColumn>(4, std::nullopt), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); } CATCH From 0767bd8e8ae49e8d1cb6ee6f3004042819269d10 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Sat, 11 Feb 2023 00:01:18 +0800 Subject: [PATCH 27/37] Update FunctionsIsIPAddr.cpp --- dbms/src/Functions/FunctionsIsIPAddr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.cpp b/dbms/src/Functions/FunctionsIsIPAddr.cpp index ef2b1045ca4..cbbd1d5620a 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.cpp +++ b/dbms/src/Functions/FunctionsIsIPAddr.cpp @@ -20,8 +20,8 @@ namespace DB void registerFunctionsIsIPAddr(FunctionFactory & factory) { - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); } } // namespace DB From c0a8c52d929355a84910b27f1b4a52c2d19d8065 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Mon, 13 Feb 2023 22:22:56 +0800 Subject: [PATCH 28/37] Update FunctionsIsIPAddr.cpp --- dbms/src/Functions/FunctionsIsIPAddr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.cpp b/dbms/src/Functions/FunctionsIsIPAddr.cpp index cbbd1d5620a..5a227dec1f1 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.cpp +++ b/dbms/src/Functions/FunctionsIsIPAddr.cpp @@ -20,8 +20,8 @@ namespace DB void registerFunctionsIsIPAddr(FunctionFactory & factory) { - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); } } // namespace DB From cfc2cc263efe24e6e50a0bec841dceb1dc419ed1 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Mon, 13 Feb 2023 22:23:25 +0800 Subject: [PATCH 29/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 246 ++++++++++++------------- 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index d95094afddd..2cb27285387 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -49,162 +49,162 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; * */ -/* Description: - * This function is used to determine whether the input string is an IPv4 address, - * and the code comes from the inet_pton4 function of "arpa/inet.h". - * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c - */ -static inline UInt8 isIPv4(const char * src) -{ - if (nullptr == src) - return 0; - - static const char digits[] = "0123456789"; - int saw_digit, octets; - char ch; - unsigned char tmp[INADDRSZ], *tp; - saw_digit = 0; - octets = 0; - *(tp = tmp) = 0; - while ((ch = *src++) != '\0') +struct CheckIsIPv4Impl +{ + static constexpr auto name = "tiDBIsIPv4"; + /* Description: + * This function is used to determine whether the input string is an IPv4 address, + * and the code comes from the inet_pton4 function of "arpa/inet.h". + * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + */ + static inline UInt8 isMatch(const char * src) { - const char * pch; + if (nullptr == src) + return 0; + + static const char digits[] = "0123456789"; + int saw_digit, octets; + char ch; + unsigned char tmp[INADDRSZ], *tp; - if ((pch = strchr(digits, ch)) != nullptr) + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while ((ch = *src++) != '\0') { - unsigned int num = *tp * 10 + static_cast(pch - digits); + const char * pch; - if (num > 255) - return 0; - *tp = num; - if (!saw_digit) + if ((pch = strchr(digits, ch)) != nullptr) { - if (++octets > 4) + unsigned int num = *tp * 10 + static_cast(pch - digits); + + if (num > 255) return 0; - saw_digit = 1; + *tp = num; + if (!saw_digit) + { + if (++octets > 4) + return 0; + saw_digit = 1; + } } - } - else if (ch == '.' && saw_digit) - { - if (octets == 4) + else if (ch == '.' && saw_digit) + { + if (octets == 4) + return 0; + *++tp = 0; + saw_digit = 0; + } + else return 0; - *++tp = 0; - saw_digit = 0; } - else + if (octets < 4) return 0; - } - if (octets < 4) - return 0; - - return 1; -} -/* Description: - * This function is used to determine whether the input string is an IPv6 address, - * and the code comes from the inet_pton6 function of "arpa/inet.h". - * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c - */ -static inline UInt8 isIPv6(const char * src) + return 1; + } +}; +struct CheckIsIPv6Impl { - if (nullptr == src) - return 0; - static const char xdigits_l[] = "0123456789abcdef", - xdigits_u[] = "0123456789ABCDEF"; - unsigned char tmp[16], *tp, *endp, *colonp; - const char *xdigits, *curtok; - int ch, saw_xdigit; - unsigned int val; + static constexpr auto name = "tiDBIsIPv6"; - memset((tp = tmp), '\0', IN6ADDRSZ); - endp = tp + IN6ADDRSZ; - colonp = nullptr; - if (*src == ':') - if (*++src != ':') - return 0; - curtok = src; - saw_xdigit = 0; - val = 0; - while ((ch = *src++) != '\0') + /* Description: + * This function is used to determine whether the input string is an IPv6 address, + * and the code comes from the inet_pton6 function of "arpa/inet.h". + * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + */ + static inline UInt8 isMatch(const char * src) { - const char * pch; + if (nullptr == src) + return 0; + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; + unsigned char tmp[16], *tp, *endp, *colonp; + const char *xdigits, *curtok; + int ch, saw_xdigit; + unsigned int val; - if ((pch = strchr((xdigits = xdigits_l), ch)) == nullptr) - pch = strchr((xdigits = xdigits_u), ch); - if (pch != nullptr) - { - val <<= 4; - val |= (pch - xdigits); - if (val > 0xffff) + memset((tp = tmp), '\0', IN6ADDRSZ); + endp = tp + IN6ADDRSZ; + colonp = nullptr; + if (*src == ':') + if (*++src != ':') return 0; - saw_xdigit = 1; - continue; - } - if (ch == ':') + curtok = src; + saw_xdigit = 0; + val = 0; + while ((ch = *src++) != '\0') { - curtok = src; - if (!saw_xdigit) + const char * pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == nullptr) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != nullptr) { - if (colonp) + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) return 0; - colonp = tp; + saw_xdigit = 1; continue; } + if (ch == ':') + { + curtok = src; + if (!saw_xdigit) + { + if (colonp) + return 0; + colonp = tp; + continue; + } + if (tp + INT16SZ > endp) + return 0; + *tp++ = static_cast(val >> 8) & 0xff; + *tp++ = static_cast(val) & 0xff; + saw_xdigit = 0; + val = 0; + continue; + } + if (ch == '.' && ((tp + INADDRSZ) <= endp) && CheckIsIPv4Impl::isMatch(curtok) > 0) + { + tp += INADDRSZ; + saw_xdigit = 0; + break; /* '\0' was seen by CheckIsIPv4Impl::isMatch(). */ + } + return 0; + } + if (saw_xdigit) + { if (tp + INT16SZ > endp) return 0; - *tp++ = (unsigned char)(val >> 8) & 0xff; - *tp++ = (unsigned char)val & 0xff; - saw_xdigit = 0; - val = 0; - continue; + *tp++ = static_cast(val >> 8) & 0xff; + *tp++ = static_cast(val) & 0xff; } - if (ch == '.' && ((tp + INADDRSZ) <= endp) && isIPv4(curtok) > 0) + if (colonp != nullptr) { - tp += INADDRSZ; - saw_xdigit = 0; - break; /* '\0' was seen by isIPv4(). */ - } - return 0; - } - if (saw_xdigit) - { - if (tp + INT16SZ > endp) - return 0; - *tp++ = (unsigned char)(val >> 8) & 0xff; - *tp++ = (unsigned char)val & 0xff; - } - if (colonp != nullptr) - { - const size_t n = tp - colonp; - size_t i; + const size_t n = tp - colonp; + size_t i; - for (i = 1; i <= n; ++i) - { - endp[-i] = colonp[n - i]; - colonp[n - i] = 0; + for (i = 1; i <= n; ++i) + { + endp[-i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; } - tp = endp; + if (tp != endp) + return 0; + return 1; } - if (tp != endp) - return 0; - return 1; -} - -struct IsIPv4Name -{ - static constexpr auto name = "tiDBIsIPv4"; -}; -struct IsIPv6Name -{ - static constexpr auto name = "tiDBIsIPv6"; }; -template +template class FunctionIsIPv4OrIsIPv6 : public IFunction { public: - static constexpr auto name = Name::name; + static constexpr auto name = Impl::name; FunctionIsIPv4OrIsIPv6() = default; static FunctionPtr create(const Context &) { return std::make_shared(); }; @@ -240,7 +240,7 @@ class FunctionIsIPv4OrIsIPv6 : public IFunction size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { - vec_res[i] = Function(reinterpret_cast(&data[prev_offset])); + vec_res[i] = Impl::isMatch(reinterpret_cast(&data[prev_offset])); prev_offset = offsets[i]; } From 88de59c7c528a8eae44918bf6e539eade0ea30b3 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 08:35:23 +0800 Subject: [PATCH 30/37] Update gtest_is_ip_addr.cpp --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index 7077668b5b3..6afca2ac6ff 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -41,7 +41,7 @@ try ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn({"123.123.123.123", "0.0.0.0", "127.0.0.1", "192.168.0.0/10", "192.168.99.22.123", "999.999.999.999", "3.2.1.", "3..2.1", "...", "4556456", "ajdjioa", ""})})); // test ColumnVector with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 1, std::nullopt}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 1, 0}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); // test ColumnConst without nullable ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv4", {createConstColumn(4, "123.123.123.123")})); @@ -52,7 +52,7 @@ try ASSERT_COLUMN_EQ(createConstColumn(2, 0), executeFunction("tiDBIsIPv4", {createConstColumn>(2, "1236.461.841.312")})); // test ColumnConst with nullable and null value - ASSERT_COLUMN_EQ(createConstColumn>(4, std::nullopt), executeFunction("tiDBIsIPv4", {createConstColumn>(4, std::nullopt)})); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv4", {createConstColumn>(4, std::nullopt)})); } CATCH @@ -64,7 +64,7 @@ try ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "0000:0000:0000:0000:0000:0000:0000:0000", "2001:0:2851:b9f0:6d:2326:9036:f37a", "fe80::2dc3:25a5:49a1:6002%24", "4207:A33A:58D3:F2C3:8EDC:A548:3EC7:0D00:0D00:0D00", "4207:A33A:58D3:F2C3:8EDC:A548::0D00", "4207::::8EDC:A548:3EC7:0D00", "4207:::::A548:3EC7:0D00", "::::::", "4556456", "ajdjioa", ""})})); // test ColumnVector with nullable - ASSERT_COLUMN_EQ(createColumn>({1, 0, 0, 0, std::nullopt}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); // test ColumnConst without nullable ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv6", {createConstColumn(4, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); @@ -75,7 +75,7 @@ try ASSERT_COLUMN_EQ(createConstColumn(2, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "aidjio")})); // test ColumnConst with nullable and null value - ASSERT_COLUMN_EQ(createConstColumn>(4, std::nullopt), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); } CATCH From 80ab34e1bedc79d045e5b4b504755bea663da434 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 08:38:00 +0800 Subject: [PATCH 31/37] Update registerFunctions.cpp --- dbms/src/Functions/registerFunctions.cpp | 312 ++++++++++++++++++----- 1 file changed, 253 insertions(+), 59 deletions(-) diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index c27768803b5..2ca3818eb6c 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,69 +12,263 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#ifndef INADDRSZ +#define INADDRSZ 4 +#endif + +#ifndef INT16SZ +#define INT16SZ sizeof(short) +#endif + +#ifndef IN6ADDRSZ +#define IN6ADDRSZ 16 +#endif namespace DB { -/** These functions are defined in a separate translation units. - * This is done in order to reduce the consumption of RAM during build, and to speed up the parallel build. +namespace ErrorCodes +{ +extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} // namespace ErrorCodes + +/** Helper functions + * + * isIPv4(x) - Judge whether the input string is an IPv4 address. + * + * isIPv6(x) - Judge whether the input string is an IPv6 address. + * */ -void registerFunctionsArithmetic(FunctionFactory &); -void registerFunctionsTuple(FunctionFactory &); -void registerFunctionsCoding(FunctionFactory &); -void registerFunctionsComparison(FunctionFactory &); -void registerFunctionsConditional(FunctionFactory &); -void registerFunctionsConversion(FunctionFactory &); -void registerFunctionsTiDBConversion(FunctionFactory &); -void registerFunctionsDateTime(FunctionFactory &); -void registerFunctionsHashing(FunctionFactory &); -void registerFunctionsLogical(FunctionFactory &); -void registerFunctionsMiscellaneous(FunctionFactory &); -void registerFunctionsRound(FunctionFactory &); -void registerFunctionsString(FunctionFactory &); -void registerFunctionsStringSearch(FunctionFactory &); -void registerFunctionsURL(FunctionFactory &); -void registerFunctionsMath(FunctionFactory &); -void registerFunctionsTransform(FunctionFactory &); -void registerFunctionsGeo(FunctionFactory &); -void registerFunctionsNull(FunctionFactory &); -void registerFunctionsStringMath(FunctionFactory &); -void registerFunctionsDuration(FunctionFactory &); -void registerFunctionsRegexp(FunctionFactory &); -void registerFunctionsJson(FunctionFactory &); -void registerFunctionsIsIPAddr(FunctionFactory &); - - -void registerFunctions() + + +struct CheckIsIPv4Impl +{ + static constexpr auto name = "tiDBIsIPv4"; + /* Description: + * This function is used to determine whether the input string is an IPv4 address, + * and the code comes from the inet_pton4 function of "arpa/inet.h". + * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + */ + static inline UInt8 isMatch(const char * src) + { + if (nullptr == src) + return 0; + + static const char digits[] = "0123456789"; + int saw_digit, octets; + char ch; + unsigned char tmp[INADDRSZ], *tp; + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while ((ch = *src++) != '\0') + { + const char * pch; + + if ((pch = strchr(digits, ch)) != nullptr) + { + unsigned int num = *tp * 10 + static_cast(pch - digits); + + if (num > 255) + return 0; + *tp = num; + if (!saw_digit) + { + if (++octets > 4) + return 0; + saw_digit = 1; + } + } + else if (ch == '.' && saw_digit) + { + if (octets == 4) + return 0; + *++tp = 0; + saw_digit = 0; + } + else + return 0; + } + if (octets < 4) + return 0; + + return 1; + } +}; +struct CheckIsIPv6Impl { - auto & factory = FunctionFactory::instance(); - - registerFunctionsArithmetic(factory); - registerFunctionsTuple(factory); - registerFunctionsCoding(factory); - registerFunctionsComparison(factory); - registerFunctionsConditional(factory); - registerFunctionsConversion(factory); - registerFunctionsTiDBConversion(factory); - registerFunctionsDateTime(factory); - registerFunctionsHashing(factory); - registerFunctionsLogical(factory); - registerFunctionsMiscellaneous(factory); - registerFunctionsRound(factory); - registerFunctionsString(factory); - registerFunctionsStringSearch(factory); - registerFunctionsURL(factory); - registerFunctionsMath(factory); - registerFunctionsTransform(factory); - registerFunctionsGeo(factory); - registerFunctionsNull(factory); - registerFunctionsStringMath(factory); - registerFunctionsDuration(factory); - registerFunctionsRegexp(factory); - registerFunctionsJson(factory); - registerFunctionsIsIPAddr(factory); -} + static constexpr auto name = "tiDBIsIPv6"; + /* Description: + * This function is used to determine whether the input string is an IPv6 address, + * and the code comes from the inet_pton6 function of "arpa/inet.h". + * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + */ + static inline UInt8 isMatch(const char * src) + { + if (nullptr == src) + return 0; + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; + unsigned char tmp[16], *tp, *endp, *colonp; + const char *xdigits, *curtok; + int ch, saw_xdigit; + unsigned int val; + + memset((tp = tmp), '\0', IN6ADDRSZ); + endp = tp + IN6ADDRSZ; + colonp = nullptr; + if (*src == ':') + if (*++src != ':') + return 0; + curtok = src; + saw_xdigit = 0; + val = 0; + while ((ch = *src++) != '\0') + { + const char * pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == nullptr) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != nullptr) + { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return 0; + saw_xdigit = 1; + continue; + } + if (ch == ':') + { + curtok = src; + if (!saw_xdigit) + { + if (colonp) + return 0; + colonp = tp; + continue; + } + if (tp + INT16SZ > endp) + return 0; + *tp++ = static_cast(val >> 8) & 0xff; + *tp++ = static_cast(val) & 0xff; + saw_xdigit = 0; + val = 0; + continue; + } + if (ch == '.' && ((tp + INADDRSZ) <= endp) && CheckIsIPv4Impl::isMatch(curtok) > 0) + { + tp += INADDRSZ; + saw_xdigit = 0; + break; /* '\0' was seen by CheckIsIPv4Impl::isMatch(). */ + } + return 0; + } + if (saw_xdigit) + { + if (tp + INT16SZ > endp) + return 0; + *tp++ = static_cast(val >> 8) & 0xff; + *tp++ = static_cast(val) & 0xff; + } + if (colonp != nullptr) + { + const size_t n = tp - colonp; + size_t i; + + for (i = 1; i <= n; ++i) + { + endp[-i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp) + return 0; + return 1; + } +}; + +template +class FunctionIsIPv4OrIsIPv6 : public IFunction +{ +public: + static constexpr auto name = Impl::name; + FunctionIsIPv4OrIsIPv6() = default; + + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 1) + throw Exception( + fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (!arguments[0]->onlyNull()) + { + DataTypePtr data_type = removeNullable(arguments[0]); + if (!data_type->isString()) + throw Exception( + fmt::format("Illegal argument type {} of function {}, should be integer", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + return std::make_shared(); + } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto [column, nullmap] = removeNullable(block.getByPosition(arguments[0]).column.get()); + if (const auto * col_input = checkAndGetColumn(column)) + { + size_t size = block.getByPosition(arguments[0]).column->size(); + const typename ColumnString::Chars_t & data = col_input->getChars(); + const typename ColumnString::Offsets & offsets = col_input->getOffsets(); + + auto col_res = ColumnUInt8::create(); + ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(size); + + size_t prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + if (nullmap && (*nullmap)[i]) + { + vec_res[i] = 0; + } + else + { + vec_res[i] = Impl::isMatch(reinterpret_cast(&data[prev_offset])); + } + prev_offset = offsets[i]; + } + + block.getByPosition(result).column = std::move(col_res); + } + else + throw Exception( + fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), + ErrorCodes::ILLEGAL_COLUMN); + } +}; } // namespace DB + +#undef INADDRSZ +#undef INT16SZ +#undef IN6ADDRSZ From 2bc8de1784abaa3c60883b42459080f2181043ff Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 08:40:36 +0800 Subject: [PATCH 32/37] Update registerFunctions.cpp --- dbms/src/Functions/registerFunctions.cpp | 312 +++++------------------ 1 file changed, 59 insertions(+), 253 deletions(-) diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 2ca3818eb6c..c27768803b5 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 PingCAP, Ltd. +// Copyright 2022 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,263 +12,69 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once +#include +#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef INADDRSZ -#define INADDRSZ 4 -#endif - -#ifndef INT16SZ -#define INT16SZ sizeof(short) -#endif - -#ifndef IN6ADDRSZ -#define IN6ADDRSZ 16 -#endif namespace DB { -namespace ErrorCodes -{ -extern const int ILLEGAL_COLUMN; -extern const int ILLEGAL_TYPE_OF_ARGUMENT; -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} // namespace ErrorCodes - -/** Helper functions - * - * isIPv4(x) - Judge whether the input string is an IPv4 address. - * - * isIPv6(x) - Judge whether the input string is an IPv6 address. - * +/** These functions are defined in a separate translation units. + * This is done in order to reduce the consumption of RAM during build, and to speed up the parallel build. */ - - -struct CheckIsIPv4Impl -{ - static constexpr auto name = "tiDBIsIPv4"; - /* Description: - * This function is used to determine whether the input string is an IPv4 address, - * and the code comes from the inet_pton4 function of "arpa/inet.h". - * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c - */ - static inline UInt8 isMatch(const char * src) - { - if (nullptr == src) - return 0; - - static const char digits[] = "0123456789"; - int saw_digit, octets; - char ch; - unsigned char tmp[INADDRSZ], *tp; - - saw_digit = 0; - octets = 0; - *(tp = tmp) = 0; - while ((ch = *src++) != '\0') - { - const char * pch; - - if ((pch = strchr(digits, ch)) != nullptr) - { - unsigned int num = *tp * 10 + static_cast(pch - digits); - - if (num > 255) - return 0; - *tp = num; - if (!saw_digit) - { - if (++octets > 4) - return 0; - saw_digit = 1; - } - } - else if (ch == '.' && saw_digit) - { - if (octets == 4) - return 0; - *++tp = 0; - saw_digit = 0; - } - else - return 0; - } - if (octets < 4) - return 0; - - return 1; - } -}; -struct CheckIsIPv6Impl +void registerFunctionsArithmetic(FunctionFactory &); +void registerFunctionsTuple(FunctionFactory &); +void registerFunctionsCoding(FunctionFactory &); +void registerFunctionsComparison(FunctionFactory &); +void registerFunctionsConditional(FunctionFactory &); +void registerFunctionsConversion(FunctionFactory &); +void registerFunctionsTiDBConversion(FunctionFactory &); +void registerFunctionsDateTime(FunctionFactory &); +void registerFunctionsHashing(FunctionFactory &); +void registerFunctionsLogical(FunctionFactory &); +void registerFunctionsMiscellaneous(FunctionFactory &); +void registerFunctionsRound(FunctionFactory &); +void registerFunctionsString(FunctionFactory &); +void registerFunctionsStringSearch(FunctionFactory &); +void registerFunctionsURL(FunctionFactory &); +void registerFunctionsMath(FunctionFactory &); +void registerFunctionsTransform(FunctionFactory &); +void registerFunctionsGeo(FunctionFactory &); +void registerFunctionsNull(FunctionFactory &); +void registerFunctionsStringMath(FunctionFactory &); +void registerFunctionsDuration(FunctionFactory &); +void registerFunctionsRegexp(FunctionFactory &); +void registerFunctionsJson(FunctionFactory &); +void registerFunctionsIsIPAddr(FunctionFactory &); + + +void registerFunctions() { - static constexpr auto name = "tiDBIsIPv6"; + auto & factory = FunctionFactory::instance(); + + registerFunctionsArithmetic(factory); + registerFunctionsTuple(factory); + registerFunctionsCoding(factory); + registerFunctionsComparison(factory); + registerFunctionsConditional(factory); + registerFunctionsConversion(factory); + registerFunctionsTiDBConversion(factory); + registerFunctionsDateTime(factory); + registerFunctionsHashing(factory); + registerFunctionsLogical(factory); + registerFunctionsMiscellaneous(factory); + registerFunctionsRound(factory); + registerFunctionsString(factory); + registerFunctionsStringSearch(factory); + registerFunctionsURL(factory); + registerFunctionsMath(factory); + registerFunctionsTransform(factory); + registerFunctionsGeo(factory); + registerFunctionsNull(factory); + registerFunctionsStringMath(factory); + registerFunctionsDuration(factory); + registerFunctionsRegexp(factory); + registerFunctionsJson(factory); + registerFunctionsIsIPAddr(factory); +} - /* Description: - * This function is used to determine whether the input string is an IPv6 address, - * and the code comes from the inet_pton6 function of "arpa/inet.h". - * References: http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c - */ - static inline UInt8 isMatch(const char * src) - { - if (nullptr == src) - return 0; - static const char xdigits_l[] = "0123456789abcdef", - xdigits_u[] = "0123456789ABCDEF"; - unsigned char tmp[16], *tp, *endp, *colonp; - const char *xdigits, *curtok; - int ch, saw_xdigit; - unsigned int val; - - memset((tp = tmp), '\0', IN6ADDRSZ); - endp = tp + IN6ADDRSZ; - colonp = nullptr; - if (*src == ':') - if (*++src != ':') - return 0; - curtok = src; - saw_xdigit = 0; - val = 0; - while ((ch = *src++) != '\0') - { - const char * pch; - - if ((pch = strchr((xdigits = xdigits_l), ch)) == nullptr) - pch = strchr((xdigits = xdigits_u), ch); - if (pch != nullptr) - { - val <<= 4; - val |= (pch - xdigits); - if (val > 0xffff) - return 0; - saw_xdigit = 1; - continue; - } - if (ch == ':') - { - curtok = src; - if (!saw_xdigit) - { - if (colonp) - return 0; - colonp = tp; - continue; - } - if (tp + INT16SZ > endp) - return 0; - *tp++ = static_cast(val >> 8) & 0xff; - *tp++ = static_cast(val) & 0xff; - saw_xdigit = 0; - val = 0; - continue; - } - if (ch == '.' && ((tp + INADDRSZ) <= endp) && CheckIsIPv4Impl::isMatch(curtok) > 0) - { - tp += INADDRSZ; - saw_xdigit = 0; - break; /* '\0' was seen by CheckIsIPv4Impl::isMatch(). */ - } - return 0; - } - if (saw_xdigit) - { - if (tp + INT16SZ > endp) - return 0; - *tp++ = static_cast(val >> 8) & 0xff; - *tp++ = static_cast(val) & 0xff; - } - if (colonp != nullptr) - { - const size_t n = tp - colonp; - size_t i; - - for (i = 1; i <= n; ++i) - { - endp[-i] = colonp[n - i]; - colonp[n - i] = 0; - } - tp = endp; - } - if (tp != endp) - return 0; - return 1; - } -}; - -template -class FunctionIsIPv4OrIsIPv6 : public IFunction -{ -public: - static constexpr auto name = Impl::name; - FunctionIsIPv4OrIsIPv6() = default; - - static FunctionPtr create(const Context &) { return std::make_shared(); }; - - std::string getName() const override { return name; } - size_t getNumberOfArguments() const override { return 1; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForNulls() const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() != 1) - throw Exception( - fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!arguments[0]->onlyNull()) - { - DataTypePtr data_type = removeNullable(arguments[0]); - if (!data_type->isString()) - throw Exception( - fmt::format("Illegal argument type {} of function {}, should be integer", arguments[0]->getName(), getName()), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - return std::make_shared(); - } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override - { - auto [column, nullmap] = removeNullable(block.getByPosition(arguments[0]).column.get()); - if (const auto * col_input = checkAndGetColumn(column)) - { - size_t size = block.getByPosition(arguments[0]).column->size(); - const typename ColumnString::Chars_t & data = col_input->getChars(); - const typename ColumnString::Offsets & offsets = col_input->getOffsets(); - - auto col_res = ColumnUInt8::create(); - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(size); - - size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) - { - if (nullmap && (*nullmap)[i]) - { - vec_res[i] = 0; - } - else - { - vec_res[i] = Impl::isMatch(reinterpret_cast(&data[prev_offset])); - } - prev_offset = offsets[i]; - } - - block.getByPosition(result).column = std::move(col_res); - } - else - throw Exception( - fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), - ErrorCodes::ILLEGAL_COLUMN); - } -}; } // namespace DB - -#undef INADDRSZ -#undef INT16SZ -#undef IN6ADDRSZ From dce17e882075a3614102cf32d1a1dd9b10369cdf Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 08:41:14 +0800 Subject: [PATCH 33/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 27 ++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 2cb27285387..2ca3818eb6c 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -14,7 +14,9 @@ #pragma once +#include #include +#include #include #include #include @@ -212,6 +214,7 @@ class FunctionIsIPv4OrIsIPv6 : public IFunction std::string getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { @@ -219,15 +222,20 @@ class FunctionIsIPv4OrIsIPv6 : public IFunction throw Exception( fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!arguments[0]->isString()) - throw Exception( - fmt::format("Illegal type {} of first argument of function {}. Must be String.", arguments[0]->getName(), getName()), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (!arguments[0]->onlyNull()) + { + DataTypePtr data_type = removeNullable(arguments[0]); + if (!data_type->isString()) + throw Exception( + fmt::format("Illegal argument type {} of function {}, should be integer", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } return std::make_shared(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - if (const auto * col_input = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) + auto [column, nullmap] = removeNullable(block.getByPosition(arguments[0]).column.get()); + if (const auto * col_input = checkAndGetColumn(column)) { size_t size = block.getByPosition(arguments[0]).column->size(); const typename ColumnString::Chars_t & data = col_input->getChars(); @@ -240,7 +248,14 @@ class FunctionIsIPv4OrIsIPv6 : public IFunction size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { - vec_res[i] = Impl::isMatch(reinterpret_cast(&data[prev_offset])); + if (nullmap && (*nullmap)[i]) + { + vec_res[i] = 0; + } + else + { + vec_res[i] = Impl::isMatch(reinterpret_cast(&data[prev_offset])); + } prev_offset = offsets[i]; } From fb191ffa1a78f8bf50661f20ed7f5b5bc3a5f9a1 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 10:12:35 +0800 Subject: [PATCH 34/37] Update gtest_is_ip_addr.cpp --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index 6afca2ac6ff..bf6ba11c9ed 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -42,6 +42,7 @@ try // test ColumnVector with nullable ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 1, 0}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn({0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn>({std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt})})); // test ColumnConst without nullable ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv4", {createConstColumn(4, "123.123.123.123")})); @@ -65,6 +66,7 @@ try // test ColumnVector with nullable ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn({0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn>({std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt})})); // test ColumnConst without nullable ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv6", {createConstColumn(4, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); @@ -73,7 +75,7 @@ try // test ColumnConst with nullable but non-null value ASSERT_COLUMN_EQ(createConstColumn(2, 1), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); ASSERT_COLUMN_EQ(createConstColumn(2, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "aidjio")})); - + // test ColumnConst with nullable and null value ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); } From 09fd7fd29c83f298ee654c9b9a416601a718b854 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 02:57:23 +0000 Subject: [PATCH 35/37] update testcase --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index bf6ba11c9ed..9d504be27da 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -75,7 +75,7 @@ try // test ColumnConst with nullable but non-null value ASSERT_COLUMN_EQ(createConstColumn(2, 1), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); ASSERT_COLUMN_EQ(createConstColumn(2, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(2, "aidjio")})); - + // test ColumnConst with nullable and null value ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); } From c23be4f538292ff6baa254d8f4c15d832b7ff421 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 17:53:12 +0800 Subject: [PATCH 36/37] Update gtest_is_ip_addr.cpp --- dbms/src/Functions/tests/gtest_is_ip_addr.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp index 9d504be27da..5ba376fb307 100644 --- a/dbms/src/Functions/tests/gtest_is_ip_addr.cpp +++ b/dbms/src/Functions/tests/gtest_is_ip_addr.cpp @@ -43,6 +43,7 @@ try // test ColumnVector with nullable ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 1, 0}), executeFunction("tiDBIsIPv4", {createColumn>({"123.123.123.123", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); ASSERT_COLUMN_EQ(createColumn({0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createColumn>({std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn({0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv4", {createOnlyNullColumn(5)})); // test ColumnConst without nullable ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv4", {createConstColumn(4, "123.123.123.123")})); @@ -54,6 +55,7 @@ try // test ColumnConst with nullable and null value ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv4", {createConstColumn>(4, std::nullopt)})); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv4", {createOnlyNullColumnConst(4)})); } CATCH @@ -67,6 +69,7 @@ try // test ColumnVector with nullable ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn>({"F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286", "aidjio", "1236.461.841.312", "99.99.99.99", std::nullopt})})); ASSERT_COLUMN_EQ(createColumn({0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createColumn>({std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt})})); + ASSERT_COLUMN_EQ(createColumn({0, 0, 0, 0, 0}), executeFunction("tiDBIsIPv6", {createOnlyNullColumn(5)})); // test ColumnConst without nullable ASSERT_COLUMN_EQ(createConstColumn(4, 1), executeFunction("tiDBIsIPv6", {createConstColumn(4, "F746:C349:48E3:22F2:81E0:0EA8:E7B6:8286")})); @@ -78,6 +81,7 @@ try // test ColumnConst with nullable and null value ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv6", {createConstColumn>(4, std::nullopt)})); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), executeFunction("tiDBIsIPv6", {createOnlyNullColumnConst(4)})); } CATCH From 32cef308a0f4e2342fecc67d69872ad2d32dd784 Mon Sep 17 00:00:00 2001 From: AntiTopQuark Date: Tue, 14 Feb 2023 22:56:00 +0800 Subject: [PATCH 37/37] Update FunctionsIsIPAddr.h --- dbms/src/Functions/FunctionsIsIPAddr.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/dbms/src/Functions/FunctionsIsIPAddr.h b/dbms/src/Functions/FunctionsIsIPAddr.h index 2ca3818eb6c..08b8548e44e 100644 --- a/dbms/src/Functions/FunctionsIsIPAddr.h +++ b/dbms/src/Functions/FunctionsIsIPAddr.h @@ -234,17 +234,28 @@ class FunctionIsIPv4OrIsIPv6 : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { + size_t size = block.getByPosition(arguments[0]).column->size(); + auto col_res = ColumnUInt8::create(); + ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(size); + + /// Always null. + if (block.getByPosition(arguments[0]).type->onlyNull()) + { + for (size_t i = 0; i < size; ++i) + { + vec_res[i] = 0; + } + block.getByPosition(result).column = std::move(col_res); + return; + } + auto [column, nullmap] = removeNullable(block.getByPosition(arguments[0]).column.get()); if (const auto * col_input = checkAndGetColumn(column)) { - size_t size = block.getByPosition(arguments[0]).column->size(); const typename ColumnString::Chars_t & data = col_input->getChars(); const typename ColumnString::Offsets & offsets = col_input->getOffsets(); - auto col_res = ColumnUInt8::create(); - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(size); - size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) {