Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Hide internal dimensions #3266

Open
wants to merge 5 commits into
base: xan/geometry_cast_util
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/managed_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ void ManagedQuery::setup_read() {
for (int i = 0; i < attribute_num; i++) {
columns_.push_back(schema.attribute(i).name());
}

auto is_internal = [](std::string name) {
return name.rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;
};

auto internal_end = std::remove_if(
columns_.begin(), columns_.end(), is_internal);
columns_.erase(internal_end, columns_.end());
}

// Allocate and attach buffers
Expand Down
16 changes: 14 additions & 2 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ void SOMAGeometryDataFrame::create(
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config,
std::optional<TimestampRange> timestamp) {
std::vector<std::string> spatial_axes;
auto tiledb_schema = ArrowAdapter::tiledb_schema_from_arrow_schema(
ctx->tiledb_ctx(),
schema,
Expand Down Expand Up @@ -98,7 +97,20 @@ std::unique_ptr<ArrowSchema> SOMAGeometryDataFrame::schema() const {

const std::vector<std::string> SOMAGeometryDataFrame::index_column_names()
const {
return this->dimension_names();
std::vector<std::string> dim_names = this->dimension_names();

auto is_internal = [](std::string name) {
return name.rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;
};

auto first_dim = std::find_if(
begin(dim_names), end(dim_names), is_internal);
dim_names.insert(first_dim, SOMA_GEOMETRY_COLUMN_NAME);
auto internal_end = std::remove_if(
begin(dim_names), end(dim_names), is_internal);
dim_names.erase(internal_end, dim_names.end());

return dim_names;
}

const std::vector<std::string> SOMAGeometryDataFrame::spatial_column_names()
Expand Down
60 changes: 60 additions & 0 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#ifndef SOMA_GEOMETRY_DATAFRAME
#define SOMA_GEOMETRY_DATAFRAME

#include <algorithm>
#include <filesystem>
#include <vector>

Expand Down Expand Up @@ -175,6 +176,65 @@ class SOMAGeometryDataFrame : virtual public SOMAArray {
*/
uint64_t count();

/**
* @brief Set the spatial axis slice using multiple ranges
*
* @note Partitioning is not supported
*
* @tparam T
* @param axis
* @param ranges
*/
template <typename T>
void set_spatial_dim_ranges(
const std::string& axis, const std::vector<std::pair<T, T>>& ranges) {
std::vector<std::pair<T, T>> min_range;
std::vector<std::pair<T, T>> max_range;

if (ranges.size() != 1) {
throw TileDBSOMAError(
"Multi ranges are not supported for axis dimensions");
}

T min_domain, max_domain;

// Both min and max dimension share the same domain
if (ArraySchemaExperimental::current_domain(
*this->ctx()->tiledb_ctx(), *this->tiledb_schema())
.is_empty()) {
std::pair<T, T> domain = this->tiledb_schema()
->domain()
.dimension(
SOMA_GEOMETRY_DIMENSION_PREFIX +
axis + "__min")
.domain<T>();
min_domain = domain.first;
max_domain = domain.second;
} else {
auto current_domain = ArraySchemaExperimental::current_domain(
*this->ctx()->tiledb_ctx(),
*this->tiledb_schema().get())
.ndrectangle()
.range<T>(
SOMA_GEOMETRY_DIMENSION_PREFIX +
axis + "__min");
min_domain = current_domain[0];
max_domain = current_domain[1];
}

for (const std::pair<T, T>& range : ranges) {
min_range.push_back(
std::make_pair(min_domain, std::min(range.second, max_domain)));
max_range.push_back(
std::make_pair(std::max(range.first, min_domain), max_domain));
}

this->set_dim_ranges(
SOMA_GEOMETRY_DIMENSION_PREFIX + axis + "__min", min_range);
this->set_dim_ranges(
SOMA_GEOMETRY_DIMENSION_PREFIX + axis + "__max", max_range);
}

void set_array_data(
std::unique_ptr<ArrowSchema> arrow_schema,
std::unique_ptr<ArrowArray> arrow_array) override;
Expand Down
204 changes: 126 additions & 78 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@
* This file defines the ArrowAdapter class.
*/

#include "arrow_adapter.h"
#include <algorithm>
#include <variant>

#include "../soma/column_buffer.h"
#include "arrow_adapter.h"
#include "logger.h"

namespace tiledbsoma {
Expand Down Expand Up @@ -329,16 +332,44 @@ json ArrowAdapter::_get_filter_list_json(FilterList filter_list) {

std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
std::shared_ptr<Context> ctx, std::shared_ptr<Array> tiledb_array) {
auto is_internal = [](const Dimension& dim) {
return dim.name().rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;
};

auto tiledb_schema = tiledb_array->schema();
auto ndim = tiledb_schema.domain().ndim();
auto nattr = tiledb_schema.attribute_num();
auto dimensions = tiledb_schema.domain().dimensions();

// For geometry dataframe replace the internal dim with the geometry column
int internal_dim_idx = std::find_if(
dimensions.begin(),
dimensions.end(),
is_internal) -
dimensions.begin();
auto internal_dim_iter = std::remove_if(
dimensions.begin(), dimensions.end(), is_internal);
dimensions.erase(internal_dim_iter, dimensions.end());

std::vector<std::variant<Dimension, Attribute>> columns;
for (size_t i = 0; i < dimensions.size(); ++i) {
columns.push_back(dimensions[i]);
}

for (size_t i = 0; i < tiledb_schema.attribute_num(); ++i) {
auto attr = tiledb_schema.attribute(i);
if (strcmp(attr.name().c_str(), SOMA_GEOMETRY_COLUMN_NAME.c_str()) ==
0) {
columns.insert(columns.begin() + internal_dim_idx, attr);
} else {
columns.push_back(attr);
}
}

std::unique_ptr<ArrowSchema> arrow_schema = std::make_unique<ArrowSchema>();
arrow_schema->format = strdup("+s");
arrow_schema->name = strdup("parent");
arrow_schema->metadata = nullptr;
arrow_schema->flags = 0;
arrow_schema->n_children = ndim + nattr;
arrow_schema->n_children = columns.size();
arrow_schema->dictionary = nullptr;
arrow_schema->release = &ArrowAdapter::release_schema;
arrow_schema->private_data = nullptr;
Expand All @@ -351,87 +382,104 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(

ArrowSchema* child = nullptr;

for (uint32_t i = 0; i < ndim; ++i) {
auto dim = tiledb_schema.domain().dimension(i);
child = arrow_schema->children[i] = (ArrowSchema*)malloc(
sizeof(ArrowSchema));
child->format = strdup(
ArrowAdapter::to_arrow_format(dim.type()).data());
child->name = strdup(dim.name().c_str());
child->metadata = nullptr;
child->flags = 0;
child->n_children = 0;
child->children = nullptr;
child->dictionary = nullptr;
child->release = &ArrowAdapter::release_schema;
child->private_data = nullptr;
LOG_TRACE(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_array dim {} format {} "
"name {}",
i,
child->format,
child->name));
for (size_t i = 0; i < columns.size(); ++i) {
std::visit(
[&](auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr (std::is_same_v<T, Dimension>) {
child = arrow_schema->children[i] =
arrow_schema_from_tiledb_dimension(arg).release();
} else if constexpr (std::is_same_v<T, Attribute>) {
child = arrow_schema->children[i] =
arrow_schema_from_tiledb_attribute(
arg, *ctx, *tiledb_array)
.release();
}
},
columns[i]);
}

for (uint32_t i = 0; i < nattr; ++i) {
auto attr = tiledb_schema.attribute(i);
child = arrow_schema->children[ndim + i] = (ArrowSchema*)malloc(
sizeof(ArrowSchema));
child->format = strdup(
ArrowAdapter::to_arrow_format(attr.type()).data());
child->name = strdup(attr.name().c_str());
child->metadata = nullptr;
child->flags = 0;
if (attr.nullable()) {
child->flags |= ARROW_FLAG_NULLABLE;
} else {
child->flags &= ~ARROW_FLAG_NULLABLE;
}
child->n_children = 0;
child->children = nullptr;
child->dictionary = nullptr;
child->release = &ArrowAdapter::release_schema;
child->private_data = nullptr;
return arrow_schema;
}

LOG_TRACE(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_array attr {} format {} "
"name {}",
i,
child->format,
child->name));

auto enmr_name = AttributeExperimental::get_enumeration_name(
*ctx, attr);
if (enmr_name.has_value()) {
auto enmr = ArrayExperimental::get_enumeration(
*ctx, *tiledb_array, attr.name());
auto dict = (ArrowSchema*)malloc(sizeof(ArrowSchema));
std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_dimension(
const Dimension& dimension) {
std::unique_ptr<ArrowSchema> arrow_schema = std::make_unique<ArrowSchema>();
arrow_schema->format = strdup(
ArrowAdapter::to_arrow_format(dimension.type()).data());
arrow_schema->name = strdup(dimension.name().c_str());
arrow_schema->metadata = nullptr;
arrow_schema->flags = 0;
arrow_schema->n_children = 0;
arrow_schema->children = nullptr;
arrow_schema->dictionary = nullptr;
arrow_schema->release = &ArrowAdapter::release_schema;
arrow_schema->private_data = nullptr;
LOG_TRACE(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_dimension format {} "
"name {}",
arrow_schema->format,
arrow_schema->name));

return arrow_schema;
}

std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_attribute(
Attribute& attribute, const Context& ctx, const Array& tiledb_array) {
std::unique_ptr<ArrowSchema> arrow_schema = std::make_unique<ArrowSchema>();
arrow_schema->format = strdup(
ArrowAdapter::to_arrow_format(attribute.type()).data());
arrow_schema->name = strdup(attribute.name().c_str());
arrow_schema->metadata = nullptr;
arrow_schema->flags = 0;
if (attribute.nullable() &&
strcmp(attribute.name().c_str(), SOMA_GEOMETRY_COLUMN_NAME.c_str()) !=
0) {
arrow_schema->flags |= ARROW_FLAG_NULLABLE;
} else {
arrow_schema->flags &= ~ARROW_FLAG_NULLABLE;
}
arrow_schema->n_children = 0;
arrow_schema->children = nullptr;
arrow_schema->dictionary = nullptr;
arrow_schema->release = &ArrowAdapter::release_schema;
arrow_schema->private_data = nullptr;

LOG_TRACE(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_array format {} "
"name {}",
arrow_schema->format,
arrow_schema->name));

auto enmr_name = AttributeExperimental::get_enumeration_name(
ctx, attribute);
if (enmr_name.has_value()) {
auto enmr = ArrayExperimental::get_enumeration(
ctx, tiledb_array, attribute.name());
auto dict = (ArrowSchema*)malloc(sizeof(ArrowSchema));
dict->format = strdup(
ArrowAdapter::to_arrow_format(enmr.type(), false).data());
if (enmr.type() == TILEDB_STRING_ASCII || enmr.type() == TILEDB_CHAR) {
dict->format = strdup("z");
} else {
dict->format = strdup(
ArrowAdapter::to_arrow_format(enmr.type(), false).data());
if (enmr.type() == TILEDB_STRING_ASCII ||
enmr.type() == TILEDB_CHAR) {
dict->format = strdup("z");
} else {
dict->format = strdup(
ArrowAdapter::to_arrow_format(enmr.type(), false).data());
}
dict->name = strdup(enmr.name().c_str());
dict->metadata = nullptr;
if (enmr.ordered()) {
child->flags |= ARROW_FLAG_DICTIONARY_ORDERED;
} else {
child->flags &= ~ARROW_FLAG_DICTIONARY_ORDERED;
}
dict->n_children = 0;
dict->children = nullptr;
dict->dictionary = nullptr;
dict->release = &ArrowAdapter::release_schema;
dict->private_data = nullptr;
child->dictionary = dict;
}
child->release = &ArrowAdapter::release_schema;
dict->name = strdup(enmr.name().c_str());
dict->metadata = nullptr;
if (enmr.ordered()) {
arrow_schema->flags |= ARROW_FLAG_DICTIONARY_ORDERED;
} else {
arrow_schema->flags &= ~ARROW_FLAG_DICTIONARY_ORDERED;
}
dict->n_children = 0;
dict->children = nullptr;
dict->dictionary = nullptr;
dict->release = &ArrowAdapter::release_schema;
dict->private_data = nullptr;
arrow_schema->dictionary = dict;
}

arrow_schema->release = &ArrowAdapter::release_schema;
return arrow_schema;
}

Expand Down
16 changes: 16 additions & 0 deletions libtiledbsoma/src/utils/arrow_adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,22 @@ class ArrowAdapter {
static std::unique_ptr<ArrowSchema> arrow_schema_from_tiledb_array(
std::shared_ptr<Context> ctx, std::shared_ptr<Array> tiledb_array);

/**
* @brief Create a an ArrowSchema from TileDB Dimension
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @brief Create a an ArrowSchema from TileDB Dimension
* @brief Create an ArrowSchema from TileDB Dimension

*
* @return ArrowSchema
*/
static std::unique_ptr<ArrowSchema> arrow_schema_from_tiledb_dimension(
const Dimension& dimension);

/**
* @brief Create a an ArrowSchema from TileDB Attribute
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @brief Create a an ArrowSchema from TileDB Attribute
* @brief Create an ArrowSchema from TileDB Attribute

*
* @return ArrowSchema
*/
static std::unique_ptr<ArrowSchema> arrow_schema_from_tiledb_attribute(
Attribute& attribute, const Context& ctx, const Array& tiledb_array);

/**
* @brief Get members of the TileDB Schema in the form of a PlatformConfig
*
Expand Down
Loading
Loading