Skip to content

Commit

Permalink
Accept a reference to Matrix by default, which is probably safer.
Browse files Browse the repository at this point in the history
This is more ergonomic and reduces the risk of using dangling pointers.
  • Loading branch information
LTLA committed Oct 3, 2024
1 parent 5e3805d commit cd59c85
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 20 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.24)

project(tatami_layered
VERSION 1.0.0
VERSION 2.0.0
DESCRIPTION "Create layered tatami matrices"
LANGUAGES CXX)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ We can easily convert an existing `tatami::Matrix` to a layered sparse matrix:
```cpp
#include "tatami_layered/tatami_layered.hpp"

auto converted = tatami_layered::convert_to_layered_sparse(mat.get());
auto converted = tatami_layered::convert_to_layered_sparse(*mat);
```

We can also read a layered sparse matrix from a Matrix Market file:
Expand Down
48 changes: 30 additions & 18 deletions include/tatami_layered/convert_to_layered_sparse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ namespace tatami_layered {
* @cond
*/
template<typename ColIndex_, typename ValueOut_ = double, typename IndexOut_ = int, typename ValueIn_, typename IndexIn_>
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tatami::Matrix<ValueIn_, IndexIn_>* mat, IndexIn_ chunk_size, int nthreads) {
auto NR = mat->nrow(), NC = mat->ncol();
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tatami::Matrix<ValueIn_, IndexIn_>& mat, IndexIn_ chunk_size, int nthreads) {
auto NR = mat.nrow(), NC = mat.ncol();
IndexIn_ leftovers = NC % chunk_size;
size_t nchunks = std::max(static_cast<size_t>(1), static_cast<size_t>(NC) / chunk_size + (leftovers != 0));

Expand All @@ -41,11 +41,11 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tata
for (auto& x : max_per_chunk) { x.resize(NR); }
for (auto& x : num_per_chunk) { x.resize(NR); }

if (mat->sparse()) {
if (mat.sparse()) {
tatami::parallelize([&](size_t, IndexIn_ start, IndexIn_ length) -> void {
tatami::Options opt;
opt.sparse_ordered_index = false;
auto ext = tatami::consecutive_extractor<true>(mat, true, start, length, opt);
auto ext = tatami::consecutive_extractor<true>(&mat, true, start, length, opt);
std::vector<ValueIn_> dbuffer(NC);
std::vector<IndexIn_> ibuffer(NC);

Expand All @@ -64,7 +64,7 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tata

} else {
tatami::parallelize([&](size_t, IndexIn_ start, IndexIn_ length) -> void {
auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
auto ext = tatami::consecutive_extractor<false>(&mat, true, start, length);
std::vector<ValueIn_> dbuffer(NC);

for (IndexIn_ r = start, end = start + length; r < end; ++r) {
Expand Down Expand Up @@ -101,9 +101,9 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tata
std::vector<size_t> output_positions(nchunks);
std::vector<ValueIn_> dbuffer(NC);

if (mat->sparse()) {
if (mat.sparse()) {
std::vector<IndexIn_> ibuffer(NC);
auto ext = tatami::consecutive_extractor<true>(mat, true, start, length);
auto ext = tatami::consecutive_extractor<true>(&mat, true, start, length);
for (IndexIn_ r = start, end = start + length; r < end; ++r) {
for (size_t chunk = 0; chunk < nchunks; ++chunk) {
output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
Expand All @@ -120,7 +120,7 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tata
}

} else {
auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
auto ext = tatami::consecutive_extractor<false>(&mat, true, start, length);
for (IndexIn_ r = start, end = start + length; r < end; ++r) {
for (size_t chunk = 0; chunk < nchunks; ++chunk) {
output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
Expand Down Expand Up @@ -154,8 +154,8 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tata
}

template<typename ColIndex_, typename ValueOut_ = double, typename IndexOut_ = int, typename ValueIn_, typename IndexIn_>
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const tatami::Matrix<ValueIn_, IndexIn_>* mat, IndexIn_ chunk_size, int nthreads) {
auto NR = mat->nrow(), NC = mat->ncol();
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const tatami::Matrix<ValueIn_, IndexIn_>& mat, IndexIn_ chunk_size, int nthreads) {
auto NR = mat.nrow(), NC = mat.ncol();
IndexIn_ leftovers = NC % chunk_size;
size_t nchunks = std::max(static_cast<size_t>(1), static_cast<size_t>(NC) / chunk_size + (leftovers != 0));

Expand All @@ -181,11 +181,11 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const t
for (auto& x : num_per_chunk) { x.resize(NR); }
}

if (mat->sparse()) {
if (mat.sparse()) {
tatami::parallelize([&](size_t t, IndexIn_ start, IndexIn_ length) -> void {
tatami::Options opt;
opt.sparse_ordered_index = false;
auto ext = tatami::consecutive_extractor<true>(mat, false, start, length, opt);
auto ext = tatami::consecutive_extractor<true>(&mat, false, start, length, opt);
std::vector<ValueIn_> dbuffer(NR);
std::vector<IndexIn_> ibuffer(NR);
auto& max_per_chunk = max_per_chunk_threaded[t];
Expand All @@ -210,7 +210,7 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const t

} else {
tatami::parallelize([&](size_t t, IndexIn_ start, IndexIn_ length) -> void {
auto ext = tatami::consecutive_extractor<false>(mat, false, start, length);
auto ext = tatami::consecutive_extractor<false>(&mat, false, start, length);
std::vector<ValueIn_> dbuffer(NR);
auto& max_per_chunk = max_per_chunk_threaded[t];
auto& num_per_chunk = num_per_chunk_threaded[t];
Expand Down Expand Up @@ -275,9 +275,9 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const t
}
}

if (mat->sparse()) {
if (mat.sparse()) {
std::vector<IndexIn_> ibuffer(length);
auto ext = tatami::consecutive_extractor<true>(mat, false, 0, NC, start, length);
auto ext = tatami::consecutive_extractor<true>(&mat, false, 0, NC, start, length);

for (IndexIn_ c = 0; c < NC; ++c) {
auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
Expand All @@ -294,7 +294,7 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const t
}

} else {
auto ext = tatami::consecutive_extractor<false>(mat, false, 0, NC, start, length);
auto ext = tatami::consecutive_extractor<false>(&mat, false, 0, NC, start, length);

for (IndexIn_ c = 0; c < NC; ++c) {
auto ptr = ext->fetch(c, dbuffer.data());
Expand Down Expand Up @@ -352,19 +352,31 @@ std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const t
* For example, if `ColumnIndex_` was set to an unsigned 8-bit integer, `chunk_size` would be automatically reduced to 256.
*/
template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = uint16_t, typename ValueIn_, typename IndexIn_>
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>* mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>& mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
size_t max_index = static_cast<size_t>(std::numeric_limits<ColumnIndex_>::max()) + 1;
if (static_cast<size_t>(chunk_size) > max_index) {
chunk_size = max_index;
}

if (mat->prefer_rows()) {
if (mat.prefer_rows()) {
return convert_by_row<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, num_threads);
} else {
return convert_by_column<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, num_threads);
}
}

/**
* @cond
*/
// Provided for back-compatibility.
template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = uint16_t, typename ValueIn_, typename IndexIn_>
std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>* mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
return convert_to_layered_sparse<ValueOut_, IndexOut_, ColumnIndex_, ValueIn_, IndexIn_>(*mat, chunk_size, num_threads);
}
/**
* @endcond
*/

}

#endif

0 comments on commit cd59c85

Please sign in to comment.