From a044bcf64a789d610c2309e38e1a222e201287a7 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Mon, 12 Oct 2020 22:11:13 +0800 Subject: [PATCH] fix sequence length --- paddle/fluid/operators/cudnn_lstm_op.h | 68 +++++++++++++------ .../tests/unittests/test_lstm_cudnn_op.py | 31 +++++++-- 2 files changed, 70 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/operators/cudnn_lstm_op.h b/paddle/fluid/operators/cudnn_lstm_op.h index cd251de5bdc2b..51c7b701d50c1 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.h +++ b/paddle/fluid/operators/cudnn_lstm_op.h @@ -82,24 +82,27 @@ void create_mask_matrix(const framework::ExecutionContext& context, const bool& is_reverse) { const auto& seq_len_vec = GetDataFromTensor(sequence_length); const int& table_width = mask_matrix->dims()[0]; + VLOG(2) << "INPUT MASK TENSOR SHAPE:" << mask_matrix->dims(); Tensor temp; temp.Resize( framework::make_ddim({mask_matrix->dims()[1], mask_matrix->dims()[0]})); T* data_temp = temp.mutable_data(context.GetPlace()); - std::memset(data_temp, 1, mask_matrix->numel() * sizeof(T)); + std::fill(data_temp, data_temp + mask_matrix->numel(), static_cast(1.0)); for (unsigned int i = 0; i < seq_len_vec.size(); i++) { // reset the mask matrix if (seq_len_vec[i] == table_width) { continue; } if (is_reverse) { - std::memset(data_temp + i * table_width * sizeof(T), 0, - (table_width - seq_len_vec[i]) * sizeof(T)); + std::fill(data_temp + i * table_width, + data_temp + i * table_width + seq_len_vec[i], + static_cast(0)); } else { - std::memset(data_temp + (i * table_width + seq_len_vec[i]) * sizeof(T), 0, - (table_width - seq_len_vec[i]) * sizeof(T)); + std::fill(data_temp + i * table_width + seq_len_vec[i], + data_temp + (i + 1) * table_width, static_cast(0)); } } + Print2DTensor(&temp, "Original mask Tensor"); // transpose the result for the mask mask_matrix->mutable_data(context.GetPlace()); std::vector trans_vec; @@ -125,8 +128,8 @@ void dropout_cpu_function_inplace(const framework::ExecutionContext& context, auto mask_data = mask->mutable_data(context.GetPlace()); // Special case when dropout_prob is 1.0 if (dropout_prob == 1.0f) { - std::memset(x_data, 0, size * sizeof(*x_data)); - std::memset(mask_data, 0, size * sizeof(*mask_data)); // NOLINT + std::fill(x_data, x_data + size, static_cast(0)); + std::fill(mask_data, mask_data + size, static_cast(0)); return; } auto engine = framework::GetCPURandomEngine(seed_number); @@ -145,7 +148,7 @@ void dropout_cpu_function_inplace(const framework::ExecutionContext& context, } auto mask_data = mask->data(); if (dropout_prob == 1.0f) { - std::memset(x_data, 0, size * sizeof(*x_data)); + std::fill(x_data, x_data + size, static_cast(0)); return; } for (size_t i = 0; i < size; ++i) { @@ -300,15 +303,27 @@ struct LSTMCell : Cell { cell_act, cand_act); framework::TensorCopy(*output, device_ctx->GetPlace(), *device_ctx, last_h); Print3DTensor(last_h, "last_h"); - // auto eigen_output = - // framework::EigenMatrix::Reshape(*output, output->dims().size() - - // 1); - // auto eigen_mask = framework::EigenMatrix::From( - // mask_tensor, framework::make_ddim({mask_tensor.dims()[1], 1})); - //// eigen_output.device(device_ctx->eigen_device()) = - // eigen_output = - // eigen_output * - // eigen_mask.broadcast(Eigen::DSizes(1, output->dims()[1])); + + auto eigen_init_h = + framework::EigenMatrix::Reshape(*init_h, init_h->dims().size() - 1); + auto eigen_last_h = + framework::EigenMatrix::Reshape(*last_h, last_h->dims().size() - 1); + + auto eigen_mask = framework::EigenMatrix::From( + mask_tensor, framework::make_ddim({mask_tensor.dims()[1], 1})); + // eigen_output.device(device_ctx->eigen_device()) = + auto eigen_mask_broadcast = + eigen_mask.broadcast(Eigen::DSizes(1, output->dims()[1])); + auto& place = *device_ctx->eigen_device(); + eigen_last_h.device(place) = eigen_last_h * eigen_mask_broadcast + + eigen_init_h * (1 - eigen_mask_broadcast); + + auto eigen_init_c = + framework::EigenMatrix::Reshape(*init_c, init_c->dims().size() - 1); + auto eigen_last_c = + framework::EigenMatrix::Reshape(*last_c, last_c->dims().size() - 1); + eigen_last_c.device(place) = eigen_last_c * eigen_mask_broadcast + + eigen_init_c * (1 - eigen_mask_broadcast); } }; @@ -367,7 +382,9 @@ struct Layer { framework::EigenMatrix::Reshape(*output, output->dims().size() - 1); auto eigen_mask = framework::EigenMatrix::From( mask_tensor, framework::make_ddim({mask_tensor.dims()[1], 1})); - eigen_output = + auto& place = *context.template device_context() + .eigen_device(); + eigen_output.device(place) = eigen_output * eigen_mask.broadcast(Eigen::DSizes(1, output->dims()[1])); } @@ -412,6 +429,7 @@ struct SingleLayer : Layer { mask_matrix.Resize(framework::make_ddim({time_step, input->dims()[1]})); if (has_sequence_length) { create_mask_matrix(context, sequence_length, &mask_matrix, false); + Print2DTensor(&mask_matrix, "Mask Matrix"); mask_tensor_list = Unbind(mask_matrix); } @@ -447,9 +465,9 @@ struct SingleLayer : Layer { init_c_holder, last_h_holder, last_c_holder, &output_tensors[i], mask_tensor_list[i]); } - // if (has_sequence_length) { - // this->postprocess(context, &output_tensors[i], mask_tensor_list[i]); - //} + if (has_sequence_length) { + this->postprocess(context, &output_tensors[i], mask_tensor_list[i]); + } } if (time_step % 2 == 0) { framework::TensorCopy(*last_h_holder, context.GetPlace(), dev_ctx, @@ -717,7 +735,13 @@ class CudnnLSTMCPUKernel : public framework::OpKernel { auto* weight = ctx.Input("W"); auto* init_h = ctx.Input("InitH"); auto* init_c = ctx.Input("InitC"); - auto* sequence_length = ctx.Input("SequenceLength"); + + bool has_seq_length = ctx.HasInput("SequenceLength"); + const Tensor* sequence_length = nullptr; + if (has_seq_length) { + sequence_length = ctx.Input("SequenceLength"); + } + // auto* sequence_length = ctx.Input("SequenceLength"); auto* last_h = ctx.Output("LastH"); auto* last_c = ctx.Output("LastC"); auto* output = ctx.Output("Out"); diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py index 29a0fa55f7729..be4e97211e6fa 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py @@ -359,6 +359,9 @@ def setUp(self): self.dtype = np.float64 self.sequence_length = np.array([12, 11, 10, 9, 8], dtype=np.int32) self.num_layers = 1 + self.is_bidirec = False + + self.set_attrs() seq_length = 12 batch_size = 5 @@ -405,10 +408,10 @@ def setUp(self): } self.attrs = { 'dropout_prob': 0.0, - 'is_bidirec': False, + 'is_bidirec': self.is_bidirec, 'input_size': input_size, 'hidden_size': hidden_size, - 'num_layers': 1, + 'num_layers': self.num_layers, } self.outputs = { 'Out': output, @@ -433,13 +436,27 @@ def test_grad_with_place(self): ['Out', 'LastH', 'LastC']) -@unittest.skipIf(not core.is_compiled_with_cuda(), - "core is not compiled with CUDA") -class TestCUDNNLstmOp2(TestCUDNNLstmOp): - def set_attrs(self): - self.num_layers = 2 +#@unittest.skipIf(not core.is_compiled_with_cuda(), +# "core is not compiled with CUDA") +#class TestCUDNNLstmOp2(TestCUDNNLstmOp): +# def set_attrs(self): +# self.num_layers = 2 +class TestCUDNNLstmCpu(TestCUDNNLstmOp): + def test_output_with_place(self): + place = core.CPUPlace() + self.check_output_with_place( + place, no_check_set=['Reserve', 'StateOut']) + + def test_grad_with_place(self): + pass + + +#class TestCUDNNLstmCpu2(TestCUDNNLstmCpu): +# def set_attrs(self): +# self.num_layers=2 +# @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNlstmAPI(unittest.TestCase):