From d09423e97568289b04f30a56a19cc17d8a35a17f Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 17 May 2024 07:47:41 -0700 Subject: [PATCH] Finished the conversion of the tests. --- include/tatami/subset/DelayedSubsetBlock.hpp | 3 +- include/tatami/subset/DelayedSubsetSorted.hpp | 19 +- .../subset/DelayedSubsetSortedUnique.hpp | 227 ++++++++------ include/tatami/subset/DelayedSubsetUnique.hpp | 296 ++++++++++-------- include/tatami/subset/utils.hpp | 77 ++--- 5 files changed, 351 insertions(+), 271 deletions(-) diff --git a/include/tatami/subset/DelayedSubsetBlock.hpp b/include/tatami/subset/DelayedSubsetBlock.hpp index 7dbee3d5..b6ab08b2 100644 --- a/include/tatami/subset/DelayedSubsetBlock.hpp +++ b/include/tatami/subset/DelayedSubsetBlock.hpp @@ -128,7 +128,8 @@ class AcrossDense : public DenseExtractor { }; template -struct AcrossSparse : public SparseExtractor { +class AcrossSparse : public SparseExtractor { +public: template AcrossSparse(const Matrix* matrix, Index_ subset_start, bool row, MaybeOracle oracle, Args_&& ... args) : my_shift(subset_start) { if constexpr(oracle_) { diff --git a/include/tatami/subset/DelayedSubsetSorted.hpp b/include/tatami/subset/DelayedSubsetSorted.hpp index 843909cd..4be04042 100644 --- a/include/tatami/subset/DelayedSubsetSorted.hpp +++ b/include/tatami/subset/DelayedSubsetSorted.hpp @@ -56,7 +56,7 @@ DenseParallelResults format_dense_parallel(const SubsetStorage_& indices } template -class ParallelDense : DenseExtractor { +class ParallelDense : public DenseExtractor { public: template ParallelDense(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, const Options& opt) { @@ -201,7 +201,7 @@ class ParallelSparseCore { } my_ext = new_extractor(matrix, row, std::move(oracle), std::move(processed.collapsed), opt); - my_expansion = std::move(processed.my_expansion); + my_expansion = std::move(processed.expansion); } template @@ -278,10 +278,11 @@ class ParallelFullSparse : public SparseExtractor { }; template -struct ParallelBlockSparse : public SparseExtractor, public ParallelSparseBase { +class ParallelBlockSparse : public SparseExtractor { +public: template ParallelBlockSparse(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, Index_ block_start, Index_ block_length, const Options& opt) : - my_core(matrix, subset, block_length, row, std::move(oracle), opt, [&](Index_ i) -> Index_ { return i + block_start; }); + my_core(matrix, subset, block_length, row, std::move(oracle), opt, [&](Index_ i) -> Index_ { return i + block_start; }), my_block_start(block_start) {} @@ -295,10 +296,11 @@ struct ParallelBlockSparse : public SparseExtractor, pu }; template -struct ParallelIndexSparse : public SparseExtractor, public ParallelSparseBase { +class ParallelIndexSparse : public SparseExtractor { +public: template - ParallelIndexSparse(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, VectorPtr sub_ptr, const Options& opt) : - core(matrix, subset, indices->size(), row, std::move(oracle), opt, [&](Index_ i) -> Index_ { return indices_ptr->operator[](i); }); + ParallelIndexSparse(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, VectorPtr indices_ptr, const Options& opt) : + my_core(matrix, subset, indices_ptr->size(), row, std::move(oracle), opt, [&](Index_ i) -> Index_ { return indices_ptr->operator[](i); }), my_indices_ptr(std::move(indices_ptr)) {} @@ -308,6 +310,7 @@ struct ParallelIndexSparse : public SparseExtractor, pu } private: + ParallelSparseCore my_core; VectorPtr my_indices_ptr; }; @@ -339,7 +342,7 @@ class DelayedSubsetSorted : public Matrix { * @param check Whether to check `idx` for sorted values. */ DelayedSubsetSorted(std::shared_ptr > matrix, SubsetStorage_ subset, bool row, bool check = true) : - my_matrix(std::move(matrix)), my_subset(std::move(subset)), by_row(row) + my_matrix(std::move(matrix)), my_subset(std::move(subset)), my_row(row) { if (check) { for (Index_ i = 1, end = my_subset.size(); i < end; ++i) { diff --git a/include/tatami/subset/DelayedSubsetSortedUnique.hpp b/include/tatami/subset/DelayedSubsetSortedUnique.hpp index 1bede15d..3e849fce 100644 --- a/include/tatami/subset/DelayedSubsetSortedUnique.hpp +++ b/include/tatami/subset/DelayedSubsetSortedUnique.hpp @@ -20,84 +20,127 @@ namespace tatami { */ namespace DelayedSubsetSortedUnique_internal { -template -VectorPtr create(const IndexStorage_& indices) { - return std::make_shared >(indices.begin(), indices.end()); +template +VectorPtr create(const SubsetStorage_& subset) { + return std::make_shared >(subset.begin(), subset.end()); } -template -VectorPtr create(const IndexStorage_& indices, Index_ block_start, Index_ block_length) { - auto pistart = indices.begin() + block_start; +template +VectorPtr create(const SubsetStorage_& subset, Index_ block_start, Index_ block_length) { + auto pistart = subset.begin() + block_start; return std::make_shared >(pistart, pistart + block_length); } -template -VectorPtr create(const IndexStorage_& indices, const VectorPtr& idx_ptr) { +template +VectorPtr create(const SubsetStorage_& subset, const VectorPtr& indices_ptr) { auto rawptr = std::make_shared >(); VectorPtr outptr(rawptr); auto& output = *rawptr; - const auto& input = *idx_ptr; + const auto& input = *indices_ptr; output.reserve(input.size()); for (auto i : input) { - output.push_back(indices[i]); + output.push_back(subset[i]); } return outptr; } -template -struct ParallelDense : public DenseExtractor { - template - ParallelDense(const Matrix* mat, const IndexStorage_& indices, bool row, MaybeOracle oracle, const Options& opt) : - internal(new_extractor(mat, row, std::move(oracle), create(indices), opt)) {} - - template - ParallelDense(const Matrix* mat, const IndexStorage_& indices, bool row, MaybeOracle oracle, Index_ block_start, Index_ block_length, const Options& opt) : - internal(new_extractor(mat, row, std::move(oracle), create(indices, block_start, block_length), opt)) {} - - template - ParallelDense(const Matrix* mat, const IndexStorage_& indices, bool row, MaybeOracle oracle, VectorPtr idx_ptr, const Options& opt) : - internal(new_extractor(mat, row, std::move(oracle), create(indices, idx_ptr), opt)) {} +template +std::unique_ptr > create_parallel_dense( + const Matrix* matrix, + const SubsetStorage_& subset, + bool row, + MaybeOracle oracle, + const Options& opt) +{ + return new_extractor(matrix, row, std::move(oracle), create(subset), opt); +} -public: - const Value_* fetch(Index_ i, Value_* buffer) { - return internal->fetch(i, buffer); - } +template +std::unique_ptr > create_parallel_dense( + const Matrix* matrix, + const SubsetStorage_& subset, + bool row, + MaybeOracle oracle, + Index_ block_start, + Index_ block_length, + const Options& opt) +{ + return new_extractor(matrix, row, std::move(oracle), create(subset, block_start, block_length), opt); +} -protected: - std::unique_ptr > internal; -}; +template +std::unique_ptr > create_parallel_dense( + const Matrix* matrix, + const SubsetStorage_& subset, + bool row, + MaybeOracle oracle, + VectorPtr indices_ptr, + const Options& opt) +{ + return new_extractor(matrix, row, std::move(oracle), create(subset, indices_ptr), opt); +} template -struct ParallelSparse : public SparseExtractor { - template - ParallelSparse(const Matrix* mat, const IndexStorage_& indices, const std::vector& remap, bool row, MaybeOracle oracle, const Options& opt) : - internal(new_extractor(mat, row, std::move(oracle), create(indices), opt)), remapping(remap) {} - - template - ParallelSparse(const Matrix* mat, const IndexStorage_& indices, const std::vector& remap, bool row, MaybeOracle oracle, Index_ block_start, Index_ block_length, const Options& opt) : - internal(new_extractor(mat, row, std::move(oracle), create(indices, block_start, block_length), opt)), remapping(remap) {} - - template - ParallelSparse(const Matrix* mat, const IndexStorage_& indices, const std::vector& remap, bool row, MaybeOracle oracle, VectorPtr idx_ptr, const Options& opt) : - internal(new_extractor(mat, row, std::move(oracle), create(indices, idx_ptr), opt)), remapping(remap) {} +class ParallelSparse : public SparseExtractor { +public: + template + ParallelSparse( + const Matrix* matrix, + const SubsetStorage_& subset, + const std::vector& remap, + bool row, + MaybeOracle oracle, + const Options& opt + ) : + my_ext(new_extractor(matrix, row, std::move(oracle), create(subset), opt)), + my_remapping(remap) + {} + + template + ParallelSparse( + const Matrix* matrix, + const SubsetStorage_& subset, + const std::vector& remap, + bool row, MaybeOracle oracle, + Index_ block_start, + Index_ block_length, + const Options& opt + ) : + my_ext(new_extractor(matrix, row, std::move(oracle), create(subset, block_start, block_length), opt)), + my_remapping(remap) + {} + + template + ParallelSparse( + const Matrix* matrix, + const SubsetStorage_& subset, + const std::vector& remap, + bool row, + MaybeOracle oracle, + VectorPtr indices_ptr, + const Options& opt + ) : + my_ext(new_extractor(matrix, row, std::move(oracle), create(subset, indices_ptr), opt)), + my_remapping(remap) + {} public: - SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto out = internal->fetch(i, vbuffer, ibuffer); + SparseRange fetch(Index_ i, Value_* value_buffer, Index_* index_buffer) { + auto out = my_ext->fetch(i, value_buffer, index_buffer); if (out.index) { for (Index_ i = 0; i < out.number; ++i) { - ibuffer[i] = remapping[out.index[i]]; + index_buffer[i] = my_remapping[out.index[i]]; } - out.index = ibuffer; + out.index = index_buffer; } return out; } private: - std::unique_ptr > internal; - const std::vector& remapping; + std::unique_ptr > my_ext; + const std::vector& my_remapping; }; } @@ -113,87 +156,83 @@ struct ParallelSparse : public SparseExtractor { * * @tparam Value_ Type of matrix value. * @tparam Index_ Type of index value. - * @tparam IndexStorage_ Vector containing the subset indices. + * @tparam SubsetStorage_ Vector containing the subset indices. */ -template +template class DelayedSubsetSortedUnique : public Matrix { public: /** - * @param p Pointer to the underlying (pre-subset) matrix. - * @param idx Vector of 0-based indices to use for subsetting on the rows (if `row = true`) or columns (otherwise). + * @param matrix Pointer to the underlying (pre-subset) matrix. + * @param subset Vector of 0-based indices to use for subsetting on the rows (if `row = true`) or columns (otherwise). * This should be sorted and unique. * @param row Whether to apply the subset to the rows. * If false, the subset is applied to the columns. * @param check Whether to check `idx` for sorted and unique values. */ - DelayedSubsetSortedUnique(std::shared_ptr > p, IndexStorage_ idx, bool row, bool check = true) : - mat(std::move(p)), indices(std::move(idx)), by_row(row) + DelayedSubsetSortedUnique(std::shared_ptr > matrix, SubsetStorage_ subset, bool row, bool check = true) : + my_matrix(std::move(matrix)), my_subset(std::move(subset)), my_row(row) { if (check) { - for (Index_ i = 1, end = indices.size(); i < end; ++i) { - if (indices[i] <= indices[i-1]) { - throw std::runtime_error("indices should be unique and sorted"); + for (Index_ i = 1, end = my_subset.size(); i < end; ++i) { + if (my_subset[i] <= my_subset[i-1]) { + throw std::runtime_error("subset should be unique and sorted"); } } } - Index_ mapping_dim = by_row ? mat->nrow() : mat->ncol(); - mapping_single.resize(mapping_dim); - for (Index_ i = 0, end = indices.size(); i < end; ++i) { - mapping_single[indices[i]] = i; + Index_ mapping_dim = my_row ? my_matrix->nrow() : my_matrix->ncol(); + my_mapping_single.resize(mapping_dim); + for (Index_ i = 0, end = my_subset.size(); i < end; ++i) { + my_mapping_single[my_subset[i]] = i; } } private: - std::shared_ptr > mat; - IndexStorage_ indices; - bool by_row; - std::vector mapping_single; + std::shared_ptr > my_matrix; + SubsetStorage_ my_subset; + bool my_row; + std::vector my_mapping_single; public: Index_ nrow() const { - if (by_row) { - return indices.size(); + if (my_row) { + return my_subset.size(); } else { - return mat->nrow(); + return my_matrix->nrow(); } } Index_ ncol() const { - if (by_row) { - return mat->ncol(); + if (my_row) { + return my_matrix->ncol(); } else { - return indices.size(); + return my_subset.size(); } } bool is_sparse() const { - return mat->is_sparse(); + return my_matrix->is_sparse(); } double is_sparse_proportion() const { - return mat->is_sparse_proportion(); + return my_matrix->is_sparse_proportion(); } bool prefer_rows() const { - return mat->prefer_rows(); + return my_matrix->prefer_rows(); } double prefer_rows_proportion() const { - return mat->prefer_rows_proportion(); + return my_matrix->prefer_rows_proportion(); } bool uses_oracle(bool row) const { - return mat->uses_oracle(row); + return my_matrix->uses_oracle(row); } - using Matrix::dense_column; - - using Matrix::dense_row; - - using Matrix::sparse_column; + using Matrix::dense; - using Matrix::sparse_row; + using Matrix::sparse; /******************** *** Myopic dense *** @@ -201,10 +240,10 @@ class DelayedSubsetSortedUnique : public Matrix { private: template std::unique_ptr > populate_myopic_dense(bool row, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, row, false, std::forward(args)...); + return DelayedSubsetSortedUnique_internal::create_parallel_dense(my_matrix.get(), my_subset, row, false, std::forward(args)...); } } @@ -227,10 +266,10 @@ class DelayedSubsetSortedUnique : public Matrix { private: template std::unique_ptr > populate_myopic_sparse(bool row, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, mapping_single, row, false, std::forward(args)...); + return std::make_unique >(my_matrix.get(), my_subset, my_mapping_single, row, false, std::forward(args)...); } } @@ -253,10 +292,10 @@ class DelayedSubsetSortedUnique : public Matrix { private: template std::unique_ptr > populate_oracular_dense(bool row, std::shared_ptr > oracle, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::move(oracle), std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, row, std::move(oracle), std::forward(args)...); + return DelayedSubsetSortedUnique_internal::create_parallel_dense(my_matrix.get(), my_subset, row, std::move(oracle), std::forward(args)...); } } @@ -279,10 +318,10 @@ class DelayedSubsetSortedUnique : public Matrix { private: template std::unique_ptr > populate_oracular_sparse(bool row, std::shared_ptr > oracle, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::move(oracle), std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, mapping_single, row, std::move(oracle), std::forward(args)...); + return std::make_unique >(my_matrix.get(), my_subset, my_mapping_single, row, std::move(oracle), std::forward(args)...); } } diff --git a/include/tatami/subset/DelayedSubsetUnique.hpp b/include/tatami/subset/DelayedSubsetUnique.hpp index 60f265eb..03875a1b 100644 --- a/include/tatami/subset/DelayedSubsetUnique.hpp +++ b/include/tatami/subset/DelayedSubsetUnique.hpp @@ -28,12 +28,12 @@ struct DenseParallelResults { std::vector permutation; }; -template -DenseParallelResults format_dense_parallel(const IndexStorage_& indices, Index_ len, ToIndex_ to_index) { +template +DenseParallelResults format_dense_parallel(const SubsetStorage_& subset, Index_ len, ToIndex_ to_index) { std::vector > collected; collected.reserve(len); for (Index_ i = 0; i < len; ++i) { - collected.emplace_back(indices[to_index(i)], i); + collected.emplace_back(subset[to_index(i)], i); } std::sort(collected.begin(), collected.end()); @@ -49,41 +49,43 @@ DenseParallelResults format_dense_parallel(const IndexStorage_& indices, } template -struct ParallelDense : DenseExtractor { - template - ParallelDense(const Matrix* mat, const IndexStorage_& indices, bool row, MaybeOracle oracle, const Options& opt) { - auto processed = format_dense_parallel(indices, indices.size(), [&](Index_ i) -> Index_ { return i; }); - initialize(mat, std::move(processed), indices.size(), row, std::move(oracle), opt); +class ParallelDense : public DenseExtractor { +public: + template + ParallelDense(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, const Options& opt) { + auto processed = format_dense_parallel(subset, subset.size(), [&](Index_ i) -> Index_ { return i; }); + initialize(matrix, std::move(processed), row, std::move(oracle), opt); } - template - ParallelDense(const Matrix* mat, const IndexStorage_& indices, bool row, MaybeOracle oracle, Index_ block_start, Index_ block_length, const Options& opt) { - auto processed = format_dense_parallel(indices, block_length, [&](Index_ i) -> Index_ { return i + block_start; }); - initialize(mat, std::move(processed), block_length, row, std::move(oracle), opt); + template + ParallelDense(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, Index_ block_start, Index_ block_length, const Options& opt) { + auto processed = format_dense_parallel(subset, block_length, [&](Index_ i) -> Index_ { return i + block_start; }); + initialize(matrix, std::move(processed), row, std::move(oracle), opt); } - template - ParallelDense(const Matrix* mat, const IndexStorage_& indices, bool row, MaybeOracle oracle, VectorPtr subset_ptr, const Options& opt) { - const auto& subset = *subset_ptr; - auto processed = format_dense_parallel(indices, subset.size(), [&](Index_ i) -> Index_ { return subset[i]; }); - initialize(mat, std::move(processed), subset.size(), row, std::move(oracle), opt); + template + ParallelDense(const Matrix* matrix, const SubsetStorage_& subset, bool row, MaybeOracle oracle, VectorPtr indices_ptr, const Options& opt) { + const auto& indices = *indices_ptr; + auto processed = format_dense_parallel(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; }); + initialize(matrix, std::move(processed), row, std::move(oracle), opt); } private: - void initialize(const Matrix* mat, DenseParallelResults processed, size_t extent, bool row, MaybeOracle oracle, const Options& opt) { - internal = new_extractor(mat, row, std::move(oracle), std::move(processed.sorted), opt); - vholding.resize(extent); - permutation = std::move(processed.permutation); + void initialize(const Matrix* matrix, DenseParallelResults processed, bool row, MaybeOracle oracle, const Options& opt) { + size_t extent = processed.sorted.size(); + my_holding_vbuffer.resize(extent); + my_ext = new_extractor(matrix, row, std::move(oracle), std::move(processed.sorted), opt); + my_permutation = std::move(processed.permutation); } public: const Value_* fetch(Index_ i, Value_* buffer) { - auto src = internal->fetch(i, vholding.data()); + auto src = my_ext->fetch(i, my_holding_vbuffer.data()); // 'input' and 'output' should not point to the same array. In theory, it // is possible to do an in-place permutation, but this requires another // array anyway to track the permutation status, so we'll just keep it simple. - for (auto p : permutation) { + for (auto p : my_permutation) { buffer[p] = *src; ++src; } @@ -92,130 +94,160 @@ struct ParallelDense : DenseExtractor { } private: - std::unique_ptr > internal; - std::vector vholding; - std::vector permutation; + std::unique_ptr > my_ext; + std::vector my_holding_vbuffer; + std::vector my_permutation; }; -template -std::vector format_sparse_parallel(const IndexStorage_& indices, Index_ len, ToIndex_ to_index) { +template +std::vector format_sparse_parallel(const SubsetStorage_& subset, Index_ len, ToIndex_ to_index) { std::vector collected; collected.reserve(len); for (Index_ i = 0; i < len; ++i) { - collected.emplace_back(indices[to_index(i)]); + collected.emplace_back(subset[to_index(i)]); } std::sort(collected.begin(), collected.end()); return collected; } template -struct ParallelSparse : public SparseExtractor { - template - ParallelSparse(const Matrix* mat, const IndexStorage_& indices, const std::vector& remap, bool row, MaybeOracle oracle, const Options& opt) : remapping(remap) { - auto processed = format_sparse_parallel(indices, indices.size(), [&](Index_ i) -> Index_ { return i; }); - initialize(mat, std::move(processed), indices.size(), row, std::move(oracle), opt); - } - - template - ParallelSparse(const Matrix* mat, const IndexStorage_& indices, const std::vector& remap, bool row, MaybeOracle oracle, Index_ block_start, Index_ block_length, const Options& opt) : remapping(remap) { - auto processed = format_sparse_parallel(indices, block_length, [&](Index_ i) -> Index_ { return i + block_start; }); - initialize(mat, std::move(processed), block_length, row, std::move(oracle), opt); - } - - template - ParallelSparse(const Matrix* mat, const IndexStorage_& indices, const std::vector& remap, bool row, MaybeOracle oracle, VectorPtr subset_ptr, const Options& opt) : remapping(remap) { - const auto& subset = *subset_ptr; - auto processed = format_sparse_parallel(indices, subset.size(), [&](Index_ i) -> Index_ { return subset[i]; }); - initialize(mat, std::move(processed), subset.size(), row, std::move(oracle), opt); +class ParallelSparse : public SparseExtractor { +public: + template + ParallelSparse( + const Matrix* matrix, + const SubsetStorage_& subset, + const std::vector& remap, + bool row, MaybeOracle oracle, + const Options& opt + ) : + my_remapping(remap) + { + auto processed = format_sparse_parallel(subset, subset.size(), [](Index_ i) -> Index_ { return i; }); + initialize(matrix, std::move(processed), row, std::move(oracle), opt); + } + + template + ParallelSparse( + const Matrix* matrix, + const SubsetStorage_& subset, + const std::vector& remap, + bool row, + MaybeOracle oracle, + Index_ block_start, + Index_ block_length, + const Options& opt + ) : + my_remapping(remap) + { + auto processed = format_sparse_parallel(subset, block_length, [&](Index_ i) -> Index_ { return i + block_start; }); + initialize(matrix, std::move(processed), row, std::move(oracle), opt); + } + + template + ParallelSparse( + const Matrix* matrix, + const SubsetStorage_& subset, + const std::vector& remap, + bool row, + MaybeOracle oracle, + VectorPtr indices_ptr, + const Options& opt + ) : + my_remapping(remap) + { + const auto& indices = *indices_ptr; + auto processed = format_sparse_parallel(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; }); + initialize(matrix, std::move(processed), row, std::move(oracle), opt); } private: - void initialize(const Matrix* mat, std::vector sorted, size_t extent, bool row, MaybeOracle oracle, Options opt) { - needs_value = opt.sparse_extract_value; - needs_index = opt.sparse_extract_index; - needs_sort = opt.sparse_ordered_index; - + void initialize(const Matrix* matrix, std::vector sorted, bool row, MaybeOracle oracle, Options opt) { + my_needs_value = opt.sparse_extract_value; + my_needs_index = opt.sparse_extract_index; + my_needs_sort = opt.sparse_ordered_index; + // The conditionals here mirror those in 'fetch', // to self-document the case where each of the temporaries are needed. - if (!needs_sort) { - if (needs_index) { - ; // no 'iholding' required as a user-provided 'ibuffer' should be available. + if (!my_needs_sort) { + if (my_needs_index) { + ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available. } - } else if (needs_value) { + } else if (my_needs_value) { opt.sparse_extract_index = true; - sortspace.reserve(extent); - if (needs_index) { - ; // no 'iholding' required as a user-provided 'ibuffer' should be available. + my_sortspace.reserve(sorted.size()); + if (my_needs_index) { + ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available. } else { - iholding.resize(extent); // needs 'iholding' as user-provided 'ibuffer' may be NULL. + my_holding_ibuffer.resize(sorted.size()); // needs 'my_holding_ibuffer' as user-provided 'index_buffer' may be NULL. } - } else if (needs_index) { - ; // no 'iholding' required as a user-provided 'ibuffer' should be available. + } else if (my_needs_index) { + ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available. } - internal = new_extractor(mat, row, std::move(oracle), std::move(sorted), opt); + my_ext = new_extractor(matrix, row, std::move(oracle), std::move(sorted), opt); } public: - SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto input = internal->fetch(i, vbuffer, (iholding.empty() ? ibuffer : iholding.data())); + SparseRange fetch(Index_ i, Value_* value_buffer, Index_* index_buffer) { + auto input = my_ext->fetch(i, value_buffer, (my_holding_ibuffer.empty() ? index_buffer : my_holding_ibuffer.data())); // Pointers in 'input' and the 'buffer' pointers may point to the same array, - // as we're either just modifiying in place or we're copying to 'sortspace'. - if (!needs_sort) { - if (needs_index) { + // as we're either just modifiying in place or we're copying to 'my_sortspace'. + if (!my_needs_sort) { + if (my_needs_index) { for (Index_ i = 0; i < input.number; ++i) { - ibuffer[i] = remapping[input.index[i]]; + index_buffer[i] = my_remapping[input.index[i]]; } - input.index = ibuffer; + input.index = index_buffer; } - } else if (needs_value) { - // We assume that the indices have already been extracted for sorting + } else if (my_needs_value) { + // We assume that the subset have already been extracted for sorting // purposes, even if they weren't actually requested. - sortspace.clear(); + my_sortspace.clear(); for (Index_ i = 0; i < input.number; ++i) { - sortspace.emplace_back(remapping[input.index[i]], input.value[i]); + my_sortspace.emplace_back(my_remapping[input.index[i]], input.value[i]); } - std::sort(sortspace.begin(), sortspace.end()); + std::sort(my_sortspace.begin(), my_sortspace.end()); - auto vcopy = vbuffer; - for (const auto& ss : sortspace) { + auto vcopy = value_buffer; + for (const auto& ss : my_sortspace) { *vcopy = ss.second; ++vcopy; } - input.value = vbuffer; + input.value = value_buffer; - if (needs_index) { - auto icopy = ibuffer; - for (const auto& ss : sortspace) { + if (my_needs_index) { + auto icopy = index_buffer; + for (const auto& ss : my_sortspace) { *icopy = ss.first; ++icopy; } - input.index = ibuffer; + input.index = index_buffer; } else { input.index = NULL; } - } else if (needs_index) { + } else if (my_needs_index) { for (Index_ i = 0; i < input.number; ++i) { - ibuffer[i] = remapping[input.index[i]]; + index_buffer[i] = my_remapping[input.index[i]]; } - std::sort(ibuffer, ibuffer + input.number); - input.index = ibuffer; + std::sort(index_buffer, index_buffer + input.number); + input.index = index_buffer; } return input; } private: - const std::vector& remapping; - std::unique_ptr > internal; - bool needs_value, needs_index, needs_sort; - std::vector > sortspace; - std::vector iholding; + const std::vector& my_remapping; + std::unique_ptr > my_ext; + bool my_needs_value, my_needs_index, my_needs_sort; + std::vector > my_sortspace; + std::vector my_holding_ibuffer; }; } @@ -231,83 +263,83 @@ struct ParallelSparse : public SparseExtractor { * * @tparam Value_ Type of matrix value. * @tparam Index_ Integer type of index value. - * @tparam IndexStorage_ Vector containing the subset indices. + * @tparam SubsetStorage_ Vector containing the subset indices. * Any class implementing `[`, `size()`, `begin()` and `end()` can be used here. */ -template +template class DelayedSubsetUnique : public Matrix { public: /** - * @param p Pointer to the underlying (pre-subset) matrix. - * @param idx Vector of 0-based indices to use for subsetting on the rows (if `row = true`) or columns (otherwise). + * @param matrix Pointer to the underlying (pre-subset) matrix. + * @param subset Vector of 0-based indices to use for subsetting on the rows (if `row = true`) or columns (otherwise). * This should be unique, but may be unsorted. * @param row Whether to apply the subset to the rows. * If false, the subset is applied to the columns. - * @param check Whether to check `idx` for unique values. + * @param check Whether to check `subset` for unique values. */ - DelayedSubsetUnique(std::shared_ptr > p, IndexStorage_ idx, bool row, bool check = true) : - mat(std::move(p)), indices(std::move(idx)), by_row(row) + DelayedSubsetUnique(std::shared_ptr > p, SubsetStorage_ subset, bool row, bool check = true) : + my_matrix(std::move(p)), my_subset(std::move(subset)), my_row(row) { - Index_ fulldim = by_row ? mat->nrow() : mat->ncol(); + Index_ fulldim = my_row ? my_matrix->nrow() : my_matrix->ncol(); if (check) { std::vector checks(fulldim); - for (Index_ i = 0, end = indices.size(); i < end; ++i) { - auto& found = checks[indices[i]]; + for (Index_ i = 0, end = my_subset.size(); i < end; ++i) { + auto& found = checks[my_subset[i]]; if (found) { - throw std::runtime_error("indices should be unique"); + throw std::runtime_error("my_subset should be unique"); } found = 1; } } - mapping_single.resize(fulldim); - for (Index_ i = 0, end = indices.size(); i < end; ++i) { - mapping_single[indices[i]] = i; + my_mapping_single.resize(fulldim); + for (Index_ i = 0, end = my_subset.size(); i < end; ++i) { + my_mapping_single[my_subset[i]] = i; } } private: - std::shared_ptr > mat; - IndexStorage_ indices; - bool by_row; - std::vector mapping_single; + std::shared_ptr > my_matrix; + SubsetStorage_ my_subset; + bool my_row; + std::vector my_mapping_single; public: Index_ nrow() const { - if (by_row) { - return indices.size(); + if (my_row) { + return my_subset.size(); } else { - return mat->nrow(); + return my_matrix->nrow(); } } Index_ ncol() const { - if (by_row) { - return mat->ncol(); + if (my_row) { + return my_matrix->ncol(); } else { - return indices.size(); + return my_subset.size(); } } bool is_sparse() const { - return mat->is_sparse(); + return my_matrix->is_sparse(); } double is_sparse_proportion() const { - return mat->is_sparse_proportion(); + return my_matrix->is_sparse_proportion(); } bool prefer_rows() const { - return mat->prefer_rows(); + return my_matrix->prefer_rows(); } double prefer_rows_proportion() const { - return mat->prefer_rows_proportion(); + return my_matrix->prefer_rows_proportion(); } bool uses_oracle(bool row) const { - return mat->uses_oracle(row); + return my_matrix->uses_oracle(row); } using Matrix::dense_column; @@ -324,10 +356,10 @@ class DelayedSubsetUnique : public Matrix { private: template std::unique_ptr > populate_myopic_dense(bool row, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, row, false, std::forward(args)...); + return std::make_unique >(my_matrix.get(), my_subset, row, false, std::forward(args)...); } } @@ -350,10 +382,10 @@ class DelayedSubsetUnique : public Matrix { private: template std::unique_ptr > populate_myopic_sparse(bool row, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, mapping_single, row, false, std::forward(args)...); + return std::make_unique >(my_matrix.get(), my_subset, my_mapping_single, row, false, std::forward(args)...); } } @@ -376,10 +408,10 @@ class DelayedSubsetUnique : public Matrix { private: template std::unique_ptr > populate_oracular_dense(bool row, std::shared_ptr > oracle, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::move(oracle), std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, row, std::move(oracle), std::forward(args)...); + return std::make_unique >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward(args)...); } } @@ -402,10 +434,10 @@ class DelayedSubsetUnique : public Matrix { private: template std::unique_ptr > populate_oracular_sparse(bool row, std::shared_ptr > oracle, Args_&& ... args) const { - if (row == by_row) { - return std::make_unique >(mat.get(), indices, row, std::move(oracle), std::forward(args)...); + if (row == my_row) { + return std::make_unique >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward(args)...); } else { - return std::make_unique >(mat.get(), indices, mapping_single, row, std::move(oracle), std::forward(args)...); + return std::make_unique >(my_matrix.get(), my_subset, my_mapping_single, row, std::move(oracle), std::forward(args)...); } } diff --git a/include/tatami/subset/utils.hpp b/include/tatami/subset/utils.hpp index 8807f9c0..59ff68ae 100644 --- a/include/tatami/subset/utils.hpp +++ b/include/tatami/subset/utils.hpp @@ -12,79 +12,84 @@ namespace tatami { namespace subset_utils { -template -struct SubsetOracle : public Oracle { - SubsetOracle(std::shared_ptr > ora, const IndexStorage_& ix) : source(std::move(ora)), indices(ix) {} +template +class SubsetOracle : public Oracle { +public: + SubsetOracle(std::shared_ptr > oracle, const SubsetStorage_& subset) : my_oracle(std::move(oracle)), my_subset(subset) {} Index_ get(size_t i) const { - return indices[source->get(i)]; + return my_subset[my_oracle->get(i)]; } size_t total() const { - return source->total(); + return my_oracle->total(); } private: - std::shared_ptr > source; - const IndexStorage_& indices; + std::shared_ptr > my_oracle; + const SubsetStorage_& my_subset; }; -template -struct MyopicPerpendicularDense : public MyopicDenseExtractor { +template +class MyopicPerpendicularDense : public MyopicDenseExtractor { +public: template - MyopicPerpendicularDense(const Matrix* mat, const IndexStorage_& in, bool row, Args_&& ... args) : - indices(in), internal(new_extractor(mat, row, false, std::forward(args)...)) {} + MyopicPerpendicularDense(const Matrix* matrix, const SubsetStorage_& subset, bool row, Args_&& ... args) : + my_subset(subset), my_ext(new_extractor(matrix, row, false, std::forward(args)...)) {} const Value_* fetch(Index_ i, Value_* buffer) { - return internal->fetch(indices[i], buffer); + return my_ext->fetch(my_subset[i], buffer); } protected: - const IndexStorage_& indices; - std::unique_ptr > internal; + const SubsetStorage_& my_subset; + std::unique_ptr > my_ext; }; -template -struct MyopicPerpendicularSparse : public MyopicSparseExtractor { +template +class MyopicPerpendicularSparse : public MyopicSparseExtractor { +public: template - MyopicPerpendicularSparse(const Matrix* mat, const IndexStorage_& in, bool row, Args_&& ... args) : - indices(in), internal(new_extractor(mat, row, false, std::forward(args)...)) {} + MyopicPerpendicularSparse(const Matrix* matrix, const SubsetStorage_& subset, bool row, Args_&& ... args) : + my_subset(subset), my_ext(new_extractor(matrix, row, false, std::forward(args)...)) {} - SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - return internal->fetch(indices[i], vbuffer, ibuffer); + SparseRange fetch(Index_ i, Value_* value_buffer, Index_* index_buffer) { + return my_ext->fetch(my_subset[i], value_buffer, index_buffer); } protected: - const IndexStorage_& indices; - std::unique_ptr > internal; + const SubsetStorage_& my_subset; + std::unique_ptr > my_ext; }; template -struct OracularPerpendicularDense : public OracularDenseExtractor { - template - OracularPerpendicularDense(const Matrix* mat, const IndexStorage_& in, bool row, std::shared_ptr > ora, Args_&& ... args) : - internal(new_extractor(mat, row, std::make_shared >(std::move(ora), in), std::forward(args)...)) {} +class OracularPerpendicularDense : public OracularDenseExtractor { +public: + template + OracularPerpendicularDense(const Matrix* matrix, const SubsetStorage_& subset, bool row, std::shared_ptr > oracle, Args_&& ... args) : + my_ext(new_extractor(matrix, row, std::make_shared >(std::move(oracle), subset), std::forward(args)...)) {} const Value_* fetch(Index_ i, Value_* buffer) { - return internal->fetch(i, buffer); + return my_ext->fetch(i, buffer); } protected: - std::unique_ptr > internal; + std::unique_ptr > my_ext; }; template -struct OracularPerpendicularSparse : public OracularSparseExtractor { - template - OracularPerpendicularSparse(const Matrix* mat, const IndexStorage_& in, bool row, std::shared_ptr > ora, Args_&& ... args) : - internal(new_extractor(mat, row, std::make_shared >(std::move(ora), in), std::forward(args)...)) {} - - SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - return internal->fetch(i, vbuffer, ibuffer); +class OracularPerpendicularSparse : public OracularSparseExtractor { +public: + template + OracularPerpendicularSparse(const Matrix* matrix, const SubsetStorage_& subset, bool row, std::shared_ptr > oracle, Args_&& ... args) : + my_ext(new_extractor(matrix, row, std::make_shared >(std::move(oracle), subset), std::forward(args)...)) {} + + SparseRange fetch(Index_ i, Value_* value_buffer, Index_* index_buffer) { + return my_ext->fetch(i, value_buffer, index_buffer); } protected: - std::unique_ptr > internal; + std::unique_ptr > my_ext; }; }