diff --git a/engine/db/catalog/basic_meta_impl.cpp b/engine/db/catalog/basic_meta_impl.cpp index 8015876..cad9488 100644 --- a/engine/db/catalog/basic_meta_impl.cpp +++ b/engine/db/catalog/basic_meta_impl.cpp @@ -31,6 +31,7 @@ constexpr const char* METRIC_TYPE = "metric_type"; constexpr const char* INDICES = "indices"; constexpr const char* MODEL = "model"; constexpr const char* DIMENSIONS = "dimensions"; +constexpr const char* ELEMENT_TYPE = "element_type"; constexpr const char* DB_CATALOG_FILE_NAME = "catalog"; @@ -49,6 +50,10 @@ Status LoadFieldSchemaFromJson(const vectordb::Json& json, meta::FieldSchema& fi field_schema.vector_dimension_ = json.GetInt(VECTOR_DIMENSION); field_schema.metric_type_ = static_cast(json.GetInt(METRIC_TYPE)); } + // Only set and list fields have element_type. + if (field_schema.field_type_ == meta::FieldType::SET || field_schema.field_type_ == meta::FieldType::LIST) { + field_schema.element_type = static_cast(json.GetInt(ELEMENT_TYPE)); + } return Status::OK(); } @@ -113,6 +118,10 @@ void DumpFieldSchemaToJson(const meta::FieldSchema& field_schema, vectordb::Json json.SetInt(VECTOR_DIMENSION, field_schema.vector_dimension_); json.SetInt(METRIC_TYPE, static_cast(field_schema.metric_type_)); } + // Only set and list fields have element_type. + if (field_schema.field_type_ == meta::FieldType::SET || field_schema.field_type_ == meta::FieldType::LIST) { + json.SetInt(ELEMENT_TYPE, static_cast(field_schema.element_type)); + } } // Convert a TableSchema to a Json object @@ -364,6 +373,23 @@ Status ValidateSchema(TableSchema& table_schema, std::vector &em } } + // 8. Set and list fields must have element type and should be supported data type. + if (field.field_type_ == FieldType::SET || field.field_type_ == FieldType::LIST) { + if (field.element_type == FieldType::UNKNOWN) { + return Status(DB_UNEXPECTED_ERROR, "Element type of " + field.name_ + " is not valid."); + } + if (field.element_type != FieldType::INT1 && + field.element_type != FieldType::INT2 && + field.element_type != FieldType::INT4 && + field.element_type != FieldType::INT8 && + field.element_type != FieldType::FLOAT && + field.element_type != FieldType::DOUBLE && + field.element_type != FieldType::STRING && + field.element_type != FieldType::BOOL) { + return Status(DB_UNEXPECTED_ERROR, "Element type of " + field.name_ + " is not supported."); + } + } + if (has_primary_key && field.is_primary_key_) { return Status(DB_UNEXPECTED_ERROR, "Cannot have more than 1 primary key fields."); } diff --git a/engine/db/catalog/meta_types.hpp b/engine/db/catalog/meta_types.hpp index 14257d9..2f0fa16 100644 --- a/engine/db/catalog/meta_types.hpp +++ b/engine/db/catalog/meta_types.hpp @@ -39,6 +39,9 @@ enum class FieldType { GEO_POINT = 60, + SET = 70, + LIST = 71, + UNKNOWN = 999, }; @@ -57,6 +60,7 @@ struct FieldSchema { FieldType field_type_ = FieldType::INT4; size_t vector_dimension_ = DEFAULT_VECTOR_DIMENSION; MetricType metric_type_ = MetricType::EUCLIDEAN; + FieldType element_type = FieldType::INT4; }; struct AutoEmbedding { @@ -103,7 +107,12 @@ static const std::unordered_map fieldTypeMap = { {"VECTOR_DOUBLE", FieldType::VECTOR_DOUBLE}, {"SPARSE_VECTOR_FLOAT", FieldType::SPARSE_VECTOR_FLOAT}, {"SPARSE_VECTOR_DOUBLE", FieldType::SPARSE_VECTOR_DOUBLE}, +<<<<<<< HEAD + {"SET", FieldType::SET}, + {"LIST", FieldType::LIST}, +======= {"GEO_POINT", FieldType::GEO_POINT}, +>>>>>>> main {"UNKNOWN", FieldType::UNKNOWN}}; static const std::unordered_map metricTypeMap = { diff --git a/engine/db/table_segment_mvp.cpp b/engine/db/table_segment_mvp.cpp index 0e39c9f..c585f2a 100644 --- a/engine/db/table_segment_mvp.cpp +++ b/engine/db/table_segment_mvp.cpp @@ -42,6 +42,11 @@ constexpr size_t FieldTypeSizeMVP(meta::FieldType type) { // like the length of the string or the dimension of the vector. You might want // to handle these cases differently. return 0; + case meta::FieldType::SET: + case meta::FieldType::LIST: + // For these types, we can't determine the size without additional information + // like the length of the set or the list. + return 0; case meta::FieldType::UNKNOWN: default: // Unknown type @@ -54,13 +59,15 @@ Status TableSegmentMVP::Init(meta::TableSchema& table_schema, int64_t size_limit primitive_offset_ = 0; schema = table_schema; - // Get how many primitive, vectors, and variable-length attributes (string, sparse vectors). + // Get how many primitive, vectors, and variable-length attributes (string, sparse vectors, set, list). for (auto& field_schema : table_schema.fields_) { auto current_total_vec_num = dense_vector_num_ + sparse_vector_num_; if (field_schema.field_type_ == meta::FieldType::STRING || field_schema.field_type_ == meta::FieldType::JSON || field_schema.field_type_ == meta::FieldType::SPARSE_VECTOR_DOUBLE || - field_schema.field_type_ == meta::FieldType::SPARSE_VECTOR_FLOAT) { + field_schema.field_type_ == meta::FieldType::SPARSE_VECTOR_FLOAT || + field_schema.field_type_ == meta::FieldType::SET || + field_schema.field_type_ == meta::FieldType::LIST) { field_id_mem_offset_map_[field_schema.id_] = var_len_attr_num_; field_name_mem_offset_map_[field_schema.name_] = var_len_attr_num_; if (field_schema.field_type_ == meta::FieldType::SPARSE_VECTOR_DOUBLE || @@ -265,6 +272,47 @@ TableSegmentMVP::TableSegmentMVP(meta::TableSchema& table_schema, const std::str file.read(reinterpret_cast(v->data()), dataLen); var_len_attr_table_[attrIdx][recordIdx] = std::move(v); break; + case meta::FieldType::SET: + case meta::FieldType::LIST: { + // set or list contains elements of primitive data types + switch (field.element_type) { + case meta::FieldType::INT1: + case meta::FieldType::INT2: + case meta::FieldType::INT4: + case meta::FieldType::INT8: { + std::vector values(dataLen / sizeof(int64_t)); + file.read(reinterpret_cast(values.data()), dataLen); + var_len_attr_table_[attrIdx][recordIdx] = std::move(values); + break; + } + case meta::FieldType::FLOAT: { + std::vector values(dataLen / sizeof(float)); + file.read(reinterpret_cast(values.data()), dataLen); + var_len_attr_table_[attrIdx][recordIdx] = std::move(values); + break; + } + case meta::FieldType::DOUBLE: { + std::vector values(dataLen / sizeof(double)); + file.read(reinterpret_cast(values.data()), dataLen); + var_len_attr_table_[attrIdx][recordIdx] = std::move(values); + break; + } + case meta::FieldType::STRING: { + std::string str(dataLen, '\0'); + file.read(&str[0], dataLen); + var_len_attr_table_[attrIdx][recordIdx] = str; + break; + } + case meta::FieldType::BOOL: { + std::vector values(dataLen / sizeof(bool)); + file.read(reinterpret_cast(values.data()), dataLen); + var_len_attr_table_[attrIdx][recordIdx] = std::move(values); + break; + } + default: + break; + } + } } } } @@ -663,7 +711,7 @@ Status TableSegmentMVP::Insert(meta::TableSchema& table_schema, Json& records, i } else { // std::cerr << "primary key [" << value << "] already exists, skipping." << std::endl; skipped_entry++; - goto LOOP_END; + goto LOOP_END; } } } else { @@ -679,7 +727,7 @@ Status TableSegmentMVP::Insert(meta::TableSchema& table_schema, Json& records, i } else { // std::cerr << "primary key [" << value << "] already exists, skipping." << std::endl; skipped_entry++; - goto LOOP_END; + goto LOOP_END; } } break; @@ -695,7 +743,7 @@ Status TableSegmentMVP::Insert(meta::TableSchema& table_schema, Json& records, i } else { // std::cerr << "primary key [" << value << "] already exists, skipping." << std::endl; skipped_entry++; - goto LOOP_END; + goto LOOP_END; } } break; @@ -711,7 +759,7 @@ Status TableSegmentMVP::Insert(meta::TableSchema& table_schema, Json& records, i } else { // std::cerr << "primary key [" << value << "] already exists, skipping." << std::endl; skipped_entry++; - goto LOOP_END; + goto LOOP_END; } } break; @@ -727,7 +775,7 @@ Status TableSegmentMVP::Insert(meta::TableSchema& table_schema, Json& records, i } else { // std::cerr << "primary key [" << value << "] already exists, skipping." << std::endl; skipped_entry++; - goto LOOP_END; + goto LOOP_END; } } break; diff --git a/engine/db/table_segment_mvp.hpp b/engine/db/table_segment_mvp.hpp index 2c4bca8..f2b973a 100644 --- a/engine/db/table_segment_mvp.hpp +++ b/engine/db/table_segment_mvp.hpp @@ -80,6 +80,10 @@ class TableSegmentMVP { int64_t dense_vector_num_; char* attribute_table_; // The attribute table in memory (exclude vector attributes and string attributes). std::vector var_len_attr_table_; // The variable length attribute table in memory. + + // TODO: + // vector for container attribute table + // std::vector> string_tables_; // Hold the string attributes. std::vector vector_dims_; float** vector_tables_; // The vector attribute tables. Each vector attribute has its own vector table.