Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dns to rsp #46

Merged
merged 1 commit into from
May 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/operator/tensor/elemwise_binary_broadcast_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ inline void BinaryBroadcastBackwardUseInImpl(const OpContext& ctx,
ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s);
Reduce<red::sum, ndim, DType, mshadow::op::mul, LOP>(s, lgrad, req[0], workspace,
ograd, lhs, rhs);
Reduce<red::sum, ndim, DType, mshadow::op::mul, ROP>(s, rgrad, req[0], workspace,
Reduce<red::sum, ndim, DType, mshadow::op::mul, ROP>(s, rgrad, req[1], workspace,
ograd, lhs, rhs);
}

Expand Down
72 changes: 35 additions & 37 deletions src/operator/tensor/elemwise_unary_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "../elemwise_op_common.h"
#include "../special_functions-inl.h"
#include "../mxnet_op.h"
#include "./broadcast_reduce-inl.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -244,19 +245,18 @@ struct FillRspRowIdx {
struct MarkRspRowIdx {
// i represents the row index of the matrix data
template<typename DType, typename RType>
MSHADOW_XINLINE static void Map(int i, RType* row_idx, const DType* data,
const int invalid_rid, const int num_cols) {
int j = 0;
int offset = i * num_cols;
MSHADOW_XINLINE static void Map(int i, RType* row_idx, const DType* data, const index_t num_cols) {
index_t j = 0;
index_t offset = i * num_cols;
for (; j < num_cols; ++j) {
if (data[offset+j] != 0) {
break;
}
}
if (num_cols == j) {
row_idx[i] = invalid_rid; // mark zero row as invalid
row_idx[i] = 0; // mark as zero for zero row
} else {
row_idx[i] = i;
row_idx[i] = 1; // mark as one for non-zero row
}
}
};
Expand Down Expand Up @@ -296,21 +296,36 @@ struct CopyDnsToRsp{
* but required to wrap a TBlob object as an NDArray. See the use case
* in DotForwardCsrDnsRsp in matrix_op-inl.h.
* Will revisit this interface in the future.
* TODO(junwu): Add gpu implementation.
*/
template<typename xpu>
void CastStorageDnsRspImpl(mshadow::Stream<xpu> *s, const TBlob& dns, NDArray* rsp) {
inline void CastStorageDnsRspImpl(mshadow::Stream<cpu>* s, const TBlob& dns, NDArray* rsp) {
CHECK(rsp != nullptr);
CHECK_EQ(rsp->storage_type(), kRowSparseStorage);
CHECK_EQ(dns.shape_, rsp->shape());

MSHADOW_TYPE_SWITCH(dns.type_flag_, DType, { // data type
MSHADOW_INT_TYPE_SWITCH(rsp->aux_type(rowsparse::kIdx), RType, { // row idx type
const index_t num_rows = dns.shape_[0];
const index_t num_cols = dns.shape_[1];
rsp->CheckAndAlloc({TShape({num_rows})});
RType* row_idx = rsp->aux_data(rowsparse::kIdx).dptr<RType>();
mxnet_op::Kernel<CopyDnsToRsp, xpu>::Launch(s, num_rows, row_idx, rsp->data().dptr<DType>(),
dns.dptr<DType>(), num_rows, num_cols);
rsp->CheckAndAllocAuxData(rowsparse::kIdx, mshadow::Shape1(num_rows));
TBlob row_idx_blob = rsp->aux_data(rowsparse::kIdx);
RType* row_idx = row_idx_blob.dptr<RType>();
mxnet_op::Kernel<MarkRspRowIdx, cpu>::Launch(s, num_rows, row_idx,
dns.dptr<DType>(), num_cols);
index_t nnr = 0;
nnr = std::accumulate(row_idx, row_idx+num_rows, nnr);
rsp->SetAuxShape(rowsparse::kIdx, mshadow::Shape1(nnr));
if (0 == nnr) return;
rsp->CheckAndAllocData(mshadow::Shape2(nnr, num_cols));
mshadow::Tensor<cpu, 2, DType> dns_data = dns.FlatTo2D<cpu, DType>(s);
mshadow::Tensor<cpu, 2, DType> rsp_data = rsp->data().FlatTo2D<cpu, DType>(s);
size_t idx = 0;
for (index_t i = 0; i < num_rows; ++i) {
if (row_idx[i] > 0) {
row_idx[idx] = i;
mshadow::Copy(rsp_data[idx], dns_data[i], s);
++idx;
}
}
});
});
}
Expand All @@ -336,7 +351,7 @@ struct CastStorageRspDnsKernel {
* since the shape is known at binding stage.
*/
template<typename xpu>
void CastStorageRspDnsImpl(mshadow::Stream<xpu> *s, const NDArray& rsp, TBlob* dns) {
void CastStorageRspDnsImpl(mshadow::Stream<xpu>* s, const NDArray& rsp, TBlob* dns) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(rsp.storage_type(), kRowSparseStorage);
Expand Down Expand Up @@ -429,12 +444,11 @@ struct FillCsrColIdxAndVals {
* Will revisit this interface in the future.
*/
template<typename xpu>
void CastStorageDnsCsrImpl(mshadow::Stream<xpu> *s, const TBlob& dns, NDArray* csr) {
void CastStorageDnsCsrImpl(mshadow::Stream<xpu>* s, const TBlob& dns, NDArray* csr) {
CHECK(csr != nullptr);
CHECK_EQ(csr->storage_type(), kCSRStorage);
CHECK_EQ(dns.shape_.ndim(), 2);
CHECK_EQ(dns.shape_, csr->shape());

MSHADOW_TYPE_SWITCH(dns.type_flag_, DType, { // data type
MSHADOW_INT_TYPE_SWITCH(csr->aux_type(csr::kIndPtr), IType, { // indptr type
MSHADOW_INT_TYPE_SWITCH(csr->aux_type(csr::kIdx), CType, { // col idx type
Expand Down Expand Up @@ -500,12 +514,11 @@ struct CopyCsrDataToDns {
* Will revisit this interface in the future.
*/
template<typename xpu>
void CastStorageCsrDnsImpl(mshadow::Stream<xpu> *s, const NDArray& csr, TBlob* dns) {
void CastStorageCsrDnsImpl(mshadow::Stream<xpu>* s, const NDArray& csr, TBlob* dns) {
CHECK(dns != nullptr);
CHECK_EQ(csr.storage_type(), kCSRStorage);
CHECK_EQ(dns->shape_.ndim(), 2);
CHECK_EQ(dns->shape_, csr.shape());

MSHADOW_TYPE_SWITCH(dns->type_flag_, DType, { // data type
MSHADOW_INT_TYPE_SWITCH(csr.aux_type(csr::kIndPtr), IType, { // indptr type
MSHADOW_INT_TYPE_SWITCH(csr.aux_type(csr::kIdx), CType, { // col idx type
Expand Down Expand Up @@ -539,7 +552,7 @@ inline bool CastStorageInferStorageType(const nnvm::NodeAttrs& attrs,
}

template<typename xpu>
void CastStorageComputeImpl(mshadow::Stream<xpu> *s,
void CastStorageComputeImpl(mshadow::Stream<xpu>* s,
const NDArray& input,
const NDArray& output) {
using namespace mshadow;
Expand All @@ -551,7 +564,7 @@ void CastStorageComputeImpl(mshadow::Stream<xpu> *s,
CastStorageRspDnsImpl<xpu>(s, input, &ret);
} else if (src_stype == kDefaultStorage && dst_stype == kRowSparseStorage) {
NDArray ret = output; // get rid of the const qualifer
CastStorageDnsRspImpl<xpu>(s, input.data(), &ret);
CastStorageDnsRspImpl(s, input.data(), &ret);
} else if (src_stype == kDefaultStorage && dst_stype == kCSRStorage) {
NDArray ret = output; // get rid of the const qualifer
CastStorageDnsCsrImpl<xpu>(s, input.data(), &ret);
Expand All @@ -563,22 +576,6 @@ void CastStorageComputeImpl(mshadow::Stream<xpu> *s,
}
}

template<typename xpu>
void CastStorageToDefault(mshadow::Stream<xpu> *s,
const NDArray& input,
TBlob* ret) {
using namespace mshadow;
using namespace mshadow::expr;
const auto src_stype = input.storage_type();
if (src_stype == kRowSparseStorage) {
CastStorageRspDnsImpl<xpu>(s, input, ret);
} else if (src_stype == kCSRStorage) {
CastStorageCsrDnsImpl<xpu>(s, input, ret);
} else {
LOG(FATAL) << "Not implemented";
}
}

template<typename xpu>
void CastStorageComputeEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
Expand All @@ -590,7 +587,7 @@ void CastStorageComputeEx(const nnvm::NodeAttrs& attrs,
Stream<xpu> *s = ctx.get_stream<xpu>();
CHECK_EQ(inputs.size(), 1);
CHECK_EQ(outputs.size(), 1);
CastStorageComputeImpl(s, inputs[0], outputs[0]);
CastStorageComputeImpl<xpu>(s, inputs[0], outputs[0]);
}

#define MXNET_OPERATOR_REGISTER_UNARY(name) \
Expand All @@ -607,4 +604,5 @@ void CastStorageComputeEx(const nnvm::NodeAttrs& attrs,

} // namespace op
} // namespace mxnet

#endif // MXNET_OPERATOR_TENSOR_ELEMWISE_UNARY_OP_H_
1 change: 0 additions & 1 deletion src/operator/tensor/matrix_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
struct DotParam : public dmlc::Parameter<DotParam> {
bool transpose_a;
bool transpose_b;
int out_stype; // output storage type
DMLC_DECLARE_PARAMETER(DotParam) {
DMLC_DECLARE_FIELD(transpose_a)
.describe("If true then transpose the first input before dot.")
Expand Down