Skip to content

Commit

Permalink
[v1.x] [Large Tensor] Backport of Fixed RNN op (apache#17632)
Browse files Browse the repository at this point in the history
* Changed relevant function args to index_t

* Added nightly test for RNN

* Added fix for LSTM, GRU, RNN-ReLU, RNN-tanh

* Using const instead of literals

* Added nightly test for RNN ReLU & tanh, LSTM, GRU

* Type assertion to force evaluation of output NDArray

* Incorporated latest round of comments
  • Loading branch information
connorgoggins authored and bgawrych committed May 28, 2020
1 parent 0c6785f commit b54d38d
Show file tree
Hide file tree
Showing 3 changed files with 252 additions and 218 deletions.
40 changes: 20 additions & 20 deletions src/operator/rnn-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ struct RNNParam : public dmlc::Parameter<RNNParam> {
bool bidirectional, state_outputs;
int mode;
float p;
int seq_length_, batch_size_, input_size_;
index_t seq_length_, batch_size_, input_size_;

bool use_sequence_length;
dmlc::optional<int> projection_size;
Expand Down Expand Up @@ -123,8 +123,8 @@ struct RNNParam : public dmlc::Parameter<RNNParam> {
}
};

inline int GetRnnParamSize(int num_layer,
int input_size,
inline index_t GetRnnParamSize(int num_layer,
index_t input_size,
int state_size,
int direction,
int mode,
Expand All @@ -141,14 +141,14 @@ inline int GetRnnParamSize(int num_layer,
size *= 3;
break;
}
int size1 = (input_size + state_size + 2) * size; // first layer size
int size2 = (state_size * direction + state_size + 2) * size; // other layers size
index_t size1 = (input_size + state_size + 2) * size; // first layer size
index_t size2 = (state_size * direction + state_size + 2) * size; // other layers size
if (projection_size.has_value()) {
int proj_size = projection_size.value();
index_t proj_size = projection_size.value();
size1 = (input_size + proj_size + 2) * size;
size2 = (proj_size * direction + proj_size + 2) * size;
}
int param_size = size1 + (num_layer - 1) * size2;
index_t param_size = size1 + (num_layer - 1) * size2;
if (projection_size.has_value()) {
param_size += projection_size.value() * state_size * num_layer * direction;
}
Expand Down Expand Up @@ -183,8 +183,8 @@ inline int GetRnnBiasSize(int num_layer,
* - output -> h[t](, c[t] additionally with Lstm) time by time(sz: NxH(x2))
* - intermediate y[1...T] as next layer's inputs(sz: TxNxHxD)
*/
inline size_t GetRNNWorkspaceSize(int seq_length,
int batch_size,
inline size_t GetRNNWorkspaceSize(index_t seq_length,
index_t batch_size,
int hidden_size,
int projection_size,
int direction,
Expand Down Expand Up @@ -215,8 +215,8 @@ inline size_t GetRNNWorkspaceSize(int seq_length,

inline size_t GetRNNReserveSpaceSize(int num_layer,
int direction,
int seq_length,
int batch_size,
index_t seq_length,
index_t batch_size,
int hidden_size,
int mode) {
size_t size = 0;
Expand Down Expand Up @@ -280,9 +280,9 @@ void RNNForwardTraining(DType* ws,
bool state_outputs,
const int num_layers,
const int direction,
const int seq_length,
const int batch_size,
const int input_size,
const index_t seq_length,
const index_t batch_size,
const index_t input_size,
const int state_size,
DType* x_ptr,
DType* hx_ptr,
Expand Down Expand Up @@ -323,9 +323,9 @@ void RNNForwardInference(DType* ws,
bool state_outputs,
const int num_layers,
const int direction,
const int seq_length,
const int batch_size,
const int input_size,
const index_t seq_length,
const index_t batch_size,
const index_t input_size,
const int state_size,
const int projection_size,
DType* x_ptr,
Expand Down Expand Up @@ -365,9 +365,9 @@ void RNNBackward(DType* ws,
DType* rs,
const int num_layers,
const int direction,
const int seq_length,
const int batch_size,
const int input_size,
const index_t seq_length,
const index_t batch_size,
const index_t input_size,
const int state_size,
DType* x_ptr,
DType* hx_ptr,
Expand Down
Loading

0 comments on commit b54d38d

Please sign in to comment.