Skip to content

Commit

Permalink
all
Browse files Browse the repository at this point in the history
  • Loading branch information
zclllyybb committed Nov 19, 2024
1 parent 20473c1 commit 4618c73
Show file tree
Hide file tree
Showing 14 changed files with 90 additions and 100 deletions.
13 changes: 3 additions & 10 deletions be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,9 @@ class IColumn : public COW<IColumn> {
return nullptr;
}

// shrink the end zeros for CHAR type or ARRAY<CHAR> type
virtual MutablePtr get_shrinked_column() {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Method get_shrinked_column is not supported for " + get_name());
return nullptr;
}

// check the column whether could shrinked
// now support only in char type, or the nested type in complex type: array{char}, struct{char}, map{char}
virtual bool could_shrinked_column() { return false; }
// shrink the end zeros for ColumnStr(also for who has it nested). so nest column will call it for all nested.
// for non-str col, will reach here(do nothing). only ColumnStr will really shrink itself.
virtual void shrink_padding_chars() {}

/// Some columns may require finalization before using of other operations.
virtual void finalize() {}
Expand Down
12 changes: 2 additions & 10 deletions be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,8 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest
offsets = ColumnOffsets::create();
}

bool ColumnArray::could_shrinked_column() {
return data->could_shrinked_column();
}

MutableColumnPtr ColumnArray::get_shrinked_column() {
if (could_shrinked_column()) {
return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable());
} else {
return ColumnArray::create(data->assume_mutable(), offsets->assume_mutable());
}
void ColumnArray::shrink_padding_chars() {
data->shrink_padding_chars();
}

std::string ColumnArray::get_name() const {
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/columns/column_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ class ColumnArray final : public COWHelper<IColumn, ColumnArray> {
return Base::create(std::forward<Args>(args)...);
}

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;

/** On the index i there is an offset to the beginning of the i + 1 -th element. */
using ColumnOffsets = ColumnVector<Offset64>;
Expand Down
24 changes: 3 additions & 21 deletions be/src/vec/columns/column_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,27 +502,9 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) const {
return res;
}

bool ColumnMap::could_shrinked_column() {
return keys_column->could_shrinked_column() || values_column->could_shrinked_column();
}

MutableColumnPtr ColumnMap::get_shrinked_column() {
MutableColumns new_columns(2);

if (keys_column->could_shrinked_column()) {
new_columns[0] = keys_column->get_shrinked_column();
} else {
new_columns[0] = keys_column->get_ptr();
}

if (values_column->could_shrinked_column()) {
new_columns[1] = values_column->get_shrinked_column();
} else {
new_columns[1] = values_column->get_ptr();
}

return ColumnMap::create(new_columns[0]->assume_mutable(), new_columns[1]->assume_mutable(),
offsets_column->assume_mutable());
void ColumnMap::shrink_padding_chars() {
keys_column->shrink_padding_chars();
values_column->shrink_padding_chars();
}

void ColumnMap::reserve(size_t n) {
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/columns/column_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ class ColumnMap final : public COWHelper<IColumn, ColumnMap> {
const char* deserialize_and_insert_from_arena(const char* pos) override;

void update_hash_with_value(size_t n, SipHash& hash) const override;
MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;
ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
size_t filter(const Filter& filter) override;
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
Expand Down
13 changes: 2 additions & 11 deletions be/src/vec/columns/column_nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP
_need_update_has_null = true;
}

bool ColumnNullable::could_shrinked_column() {
return get_nested_column_ptr()->could_shrinked_column();
}

MutableColumnPtr ColumnNullable::get_shrinked_column() {
if (could_shrinked_column()) {
return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(),
get_null_map_column_ptr());
} else {
return ColumnNullable::create(get_nested_column_ptr(), get_null_map_column_ptr());
}
void ColumnNullable::shrink_padding_chars() {
get_nested_column_ptr()->shrink_padding_chars();
}

void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash,
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public N
return Base::create(std::forward<Args>(args)...);
}

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;

bool is_variable_length() const override { return nested_column->is_variable_length(); }

std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; }
Expand Down
6 changes: 0 additions & 6 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,12 +446,6 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
const uint8_t* __restrict null_data) const override;

// Not implemented
MutableColumnPtr get_shrinked_column() override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"get_shrinked_column" + get_name());
}

Int64 get_int(size_t /*n*/) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_int" + get_name());
}
Expand Down
31 changes: 21 additions & 10 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <cstring>

#include "util/memcpy_inlined.h"
#include "util/simd/bits.h"
Expand Down Expand Up @@ -81,16 +82,26 @@ MutableColumnPtr ColumnStr<T>::clone_resized(size_t to_size) const {
}

template <typename T>
MutableColumnPtr ColumnStr<T>::get_shrinked_column() {
auto shrinked_column = ColumnStr<T>::create();
shrinked_column->get_offsets().reserve(offsets.size());
shrinked_column->get_chars().reserve(chars.size());
for (int i = 0; i < size(); i++) {
StringRef str = get_data_at(i);
reinterpret_cast<ColumnStr<T>*>(shrinked_column.get())
->insert_data(str.data, strnlen(str.data, str.size));
}
return shrinked_column;
void ColumnStr<T>::shrink_padding_chars() {
if (size() == 0) {
return;
}
char* data = reinterpret_cast<char*>(chars.data());
auto* offset = offsets.data();
size_t size = offsets.size();

// deal the 0-th element. no need to move.
auto next_start = offset[0];
offset[0] = strnlen(data, size_at(0));
for (size_t i = 1; i < size; i++) {
// get the i-th length and whole move it to cover the last's trailing void
auto length = strnlen(data + next_start, offset[i] - next_start);
memmove(data + offset[i - 1], data + next_start, length);
// offset i will be changed. so save the old value for (i+1)-th to get its length.
next_start = offset[i];
offset[i] = offset[i - 1] + length;
}
chars.resize_fill(offsets.back()); // just call it to shrink memory here. no possible to expand.
}

// This method is only called by MutableBlock::merge_ignore_overflow
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
/// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
Chars chars;

// Start position of i-th element.
size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; }

/// Size of i-th element, including terminating zero.
Expand Down Expand Up @@ -117,8 +118,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {

MutableColumnPtr clone_resized(size_t to_size) const override;

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override { return true; }
void shrink_padding_chars() override;

Field operator[](size_t n) const override {
assert(n < size());
Expand Down
24 changes: 3 additions & 21 deletions be/src/vec/columns/column_struct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,28 +313,10 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const {
return ColumnStruct::create(new_columns);
}

bool ColumnStruct::could_shrinked_column() {
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) {
if (columns[i]->could_shrinked_column()) {
return true;
}
}
return false;
}

MutableColumnPtr ColumnStruct::get_shrinked_column() {
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);

for (size_t i = 0; i < tuple_size; ++i) {
if (columns[i]->could_shrinked_column()) {
new_columns[i] = columns[i]->get_shrinked_column();
} else {
new_columns[i] = columns[i]->get_ptr();
}
void ColumnStruct::shrink_padding_chars() {
for (auto& column : columns) {
column->shrink_padding_chars();
}
return ColumnStruct::create(std::move(new_columns));
}

void ColumnStruct::reserve(size_t n) {
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/columns/column_struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ class ColumnStruct final : public COWHelper<IColumn, ColumnStruct> {

int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override;

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;

void reserve(size_t n) override;
void resize(size_t n) override;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector<size_t>& char_
for (auto idx : char_type_idx) {
if (idx < data.size()) {
auto& col_and_name = this->get_by_position(idx);
col_and_name.column = col_and_name.column->assume_mutable()->get_shrinked_column();
col_and_name.column->assume_mutable()->shrink_padding_chars();
}
}
}
Expand Down
48 changes: 48 additions & 0 deletions be/test/vec/columns/column_string_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "vec/columns/column_string.h"

#include <gmock/gmock-more-matchers.h>
#include <gtest/gtest.h>

#include "vec/common/string_ref.h"
#include "vec/core/types.h"

using namespace doris;
using namespace doris::vectorized;

TEST(ColumnStringTest, shrink_padding_chars) {
ColumnString::MutablePtr col = ColumnString::create();
col->insert_data("123\0 ", 7);
col->insert_data("456\0xx", 6);
col->insert_data("78", 2);
col->shrink_padding_chars();

EXPECT_EQ(col->size(), 3);
EXPECT_EQ(col->get_data_at(0), StringRef("123"));
EXPECT_EQ(col->get_data_at(0).size, 3);
EXPECT_EQ(col->get_data_at(1), StringRef("456"));
EXPECT_EQ(col->get_data_at(1).size, 3);
EXPECT_EQ(col->get_data_at(2), StringRef("78"));
EXPECT_EQ(col->get_data_at(2).size, 2);

col->insert_data("xyz", 2); // only xy

EXPECT_EQ(col->size(), 4);
EXPECT_EQ(col->get_data_at(3), StringRef("xy"));
}

0 comments on commit 4618c73

Please sign in to comment.