From 4618c7344a16becef94c6e4817266c9333c254eb Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Tue, 19 Nov 2024 10:43:54 +0800 Subject: [PATCH] all --- be/src/vec/columns/column.h | 13 ++---- be/src/vec/columns/column_array.cpp | 12 +----- be/src/vec/columns/column_array.h | 3 +- be/src/vec/columns/column_map.cpp | 24 ++--------- be/src/vec/columns/column_map.h | 3 +- be/src/vec/columns/column_nullable.cpp | 13 +----- be/src/vec/columns/column_nullable.h | 4 +- be/src/vec/columns/column_object.h | 6 --- be/src/vec/columns/column_string.cpp | 31 +++++++++----- be/src/vec/columns/column_string.h | 4 +- be/src/vec/columns/column_struct.cpp | 24 ++--------- be/src/vec/columns/column_struct.h | 3 +- be/src/vec/core/block.cpp | 2 +- be/test/vec/columns/column_string_test.cpp | 48 ++++++++++++++++++++++ 14 files changed, 90 insertions(+), 100 deletions(-) create mode 100644 be/test/vec/columns/column_string_test.cpp diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index d96446298d90b3..9046a1dba48d3f 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -126,16 +126,9 @@ class IColumn : public COW { return nullptr; } - // shrink the end zeros for CHAR type or ARRAY type - virtual MutablePtr get_shrinked_column() { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "Method get_shrinked_column is not supported for " + get_name()); - return nullptr; - } - - // check the column whether could shrinked - // now support only in char type, or the nested type in complex type: array{char}, struct{char}, map{char} - virtual bool could_shrinked_column() { return false; } + // shrink the end zeros for ColumnStr(also for who has it nested). so nest column will call it for all nested. + // for non-str col, will reach here(do nothing). only ColumnStr will really shrink itself. + virtual void shrink_padding_chars() {} /// Some columns may require finalization before using of other operations. virtual void finalize() {} diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index e66e016381e83b..bd4464e2caf81c 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -79,16 +79,8 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } -bool ColumnArray::could_shrinked_column() { - return data->could_shrinked_column(); -} - -MutableColumnPtr ColumnArray::get_shrinked_column() { - if (could_shrinked_column()) { - return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable()); - } else { - return ColumnArray::create(data->assume_mutable(), offsets->assume_mutable()); - } +void ColumnArray::shrink_padding_chars() { + data->shrink_padding_chars(); } std::string ColumnArray::get_name() const { diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index a3c79ffaa0298d..4dbc8e91e52b88 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -112,8 +112,7 @@ class ColumnArray final : public COWHelper { return Base::create(std::forward(args)...); } - MutableColumnPtr get_shrinked_column() override; - bool could_shrinked_column() override; + void shrink_padding_chars() override; /** On the index i there is an offset to the beginning of the i + 1 -th element. */ using ColumnOffsets = ColumnVector; diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 85964ca967b095..eb3b431a229d7b 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -502,27 +502,9 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) const { return res; } -bool ColumnMap::could_shrinked_column() { - return keys_column->could_shrinked_column() || values_column->could_shrinked_column(); -} - -MutableColumnPtr ColumnMap::get_shrinked_column() { - MutableColumns new_columns(2); - - if (keys_column->could_shrinked_column()) { - new_columns[0] = keys_column->get_shrinked_column(); - } else { - new_columns[0] = keys_column->get_ptr(); - } - - if (values_column->could_shrinked_column()) { - new_columns[1] = values_column->get_shrinked_column(); - } else { - new_columns[1] = values_column->get_ptr(); - } - - return ColumnMap::create(new_columns[0]->assume_mutable(), new_columns[1]->assume_mutable(), - offsets_column->assume_mutable()); +void ColumnMap::shrink_padding_chars() { + keys_column->shrink_padding_chars(); + values_column->shrink_padding_chars(); } void ColumnMap::reserve(size_t n) { diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 08eee11c436d2c..ae482a2d4e058c 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -118,8 +118,7 @@ class ColumnMap final : public COWHelper { const char* deserialize_and_insert_from_arena(const char* pos) override; void update_hash_with_value(size_t n, SipHash& hash) const override; - MutableColumnPtr get_shrinked_column() override; - bool could_shrinked_column() override; + void shrink_padding_chars() override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 8fffe1dfe652da..493aabdae31608 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -51,17 +51,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP _need_update_has_null = true; } -bool ColumnNullable::could_shrinked_column() { - return get_nested_column_ptr()->could_shrinked_column(); -} - -MutableColumnPtr ColumnNullable::get_shrinked_column() { - if (could_shrinked_column()) { - return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(), - get_null_map_column_ptr()); - } else { - return ColumnNullable::create(get_nested_column_ptr(), get_null_map_column_ptr()); - } +void ColumnNullable::shrink_padding_chars() { + get_nested_column_ptr()->shrink_padding_chars(); } void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 35787cd52d81ff..6096fd4b66965a 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -143,8 +143,8 @@ class ColumnNullable final : public COWHelper, public N return Base::create(std::forward(args)...); } - MutableColumnPtr get_shrinked_column() override; - bool could_shrinked_column() override; + void shrink_padding_chars() override; + bool is_variable_length() const override { return nested_column->is_variable_length(); } std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 3379a5ff8e88e2..f73d5e9f7e3b66 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -446,12 +446,6 @@ class ColumnObject final : public COWHelper { void update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const override; - // Not implemented - MutableColumnPtr get_shrinked_column() override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "get_shrinked_column" + get_name()); - } - Int64 get_int(size_t /*n*/) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_int" + get_name()); } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 6eb3e45b2e015a..3caa194551bf79 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -22,6 +22,7 @@ #include #include +#include #include "util/memcpy_inlined.h" #include "util/simd/bits.h" @@ -81,16 +82,26 @@ MutableColumnPtr ColumnStr::clone_resized(size_t to_size) const { } template -MutableColumnPtr ColumnStr::get_shrinked_column() { - auto shrinked_column = ColumnStr::create(); - shrinked_column->get_offsets().reserve(offsets.size()); - shrinked_column->get_chars().reserve(chars.size()); - for (int i = 0; i < size(); i++) { - StringRef str = get_data_at(i); - reinterpret_cast*>(shrinked_column.get()) - ->insert_data(str.data, strnlen(str.data, str.size)); - } - return shrinked_column; +void ColumnStr::shrink_padding_chars() { + if (size() == 0) { + return; + } + char* data = reinterpret_cast(chars.data()); + auto* offset = offsets.data(); + size_t size = offsets.size(); + + // deal the 0-th element. no need to move. + auto next_start = offset[0]; + offset[0] = strnlen(data, size_at(0)); + for (size_t i = 1; i < size; i++) { + // get the i-th length and whole move it to cover the last's trailing void + auto length = strnlen(data + next_start, offset[i] - next_start); + memmove(data + offset[i - 1], data + next_start, length); + // offset i will be changed. so save the old value for (i+1)-th to get its length. + next_start = offset[i]; + offset[i] = offset[i - 1] + length; + } + chars.resize_fill(offsets.back()); // just call it to shrink memory here. no possible to expand. } // This method is only called by MutableBlock::merge_ignore_overflow diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 0881c887288692..a72a3ec5cdc96a 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -85,6 +85,7 @@ class ColumnStr final : public COWHelper> { /// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle. Chars chars; + // Start position of i-th element. size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; } /// Size of i-th element, including terminating zero. @@ -117,8 +118,7 @@ class ColumnStr final : public COWHelper> { MutableColumnPtr clone_resized(size_t to_size) const override; - MutableColumnPtr get_shrinked_column() override; - bool could_shrinked_column() override { return true; } + void shrink_padding_chars() override; Field operator[](size_t n) const override { assert(n < size()); diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index ec0c5e6a6877fc..3a238a09c0d1d4 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -313,28 +313,10 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { return ColumnStruct::create(new_columns); } -bool ColumnStruct::could_shrinked_column() { - const size_t tuple_size = columns.size(); - for (size_t i = 0; i < tuple_size; ++i) { - if (columns[i]->could_shrinked_column()) { - return true; - } - } - return false; -} - -MutableColumnPtr ColumnStruct::get_shrinked_column() { - const size_t tuple_size = columns.size(); - MutableColumns new_columns(tuple_size); - - for (size_t i = 0; i < tuple_size; ++i) { - if (columns[i]->could_shrinked_column()) { - new_columns[i] = columns[i]->get_shrinked_column(); - } else { - new_columns[i] = columns[i]->get_ptr(); - } +void ColumnStruct::shrink_padding_chars() { + for (auto& column : columns) { + column->shrink_padding_chars(); } - return ColumnStruct::create(std::move(new_columns)); } void ColumnStruct::reserve(size_t n) { diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 15836604792a74..e9f8014d9db06e 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -151,8 +151,7 @@ class ColumnStruct final : public COWHelper { int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override; - MutableColumnPtr get_shrinked_column() override; - bool could_shrinked_column() override; + void shrink_padding_chars() override; void reserve(size_t n) override; void resize(size_t n) override; diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 11075335fb17af..b15b83cf77ea9c 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -1215,7 +1215,7 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector& char_ for (auto idx : char_type_idx) { if (idx < data.size()) { auto& col_and_name = this->get_by_position(idx); - col_and_name.column = col_and_name.column->assume_mutable()->get_shrinked_column(); + col_and_name.column->assume_mutable()->shrink_padding_chars(); } } } diff --git a/be/test/vec/columns/column_string_test.cpp b/be/test/vec/columns/column_string_test.cpp new file mode 100644 index 00000000000000..07195afe1cfcac --- /dev/null +++ b/be/test/vec/columns/column_string_test.cpp @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/columns/column_string.h" + +#include +#include + +#include "vec/common/string_ref.h" +#include "vec/core/types.h" + +using namespace doris; +using namespace doris::vectorized; + +TEST(ColumnStringTest, shrink_padding_chars) { + ColumnString::MutablePtr col = ColumnString::create(); + col->insert_data("123\0 ", 7); + col->insert_data("456\0xx", 6); + col->insert_data("78", 2); + col->shrink_padding_chars(); + + EXPECT_EQ(col->size(), 3); + EXPECT_EQ(col->get_data_at(0), StringRef("123")); + EXPECT_EQ(col->get_data_at(0).size, 3); + EXPECT_EQ(col->get_data_at(1), StringRef("456")); + EXPECT_EQ(col->get_data_at(1).size, 3); + EXPECT_EQ(col->get_data_at(2), StringRef("78")); + EXPECT_EQ(col->get_data_at(2).size, 2); + + col->insert_data("xyz", 2); // only xy + + EXPECT_EQ(col->size(), 4); + EXPECT_EQ(col->get_data_at(3), StringRef("xy")); +} \ No newline at end of file