diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index b37269b471b4d..47948adde32e8 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/lod_rank_table.h" +#include "paddle/framework/lod_tensor.h" #include "paddle/operators/array_operator.h" #include "paddle/operators/math/math_function.h" @@ -46,8 +47,21 @@ class ShrinkRNNMemoryOp : public ArrayOp { auto *out_var = scope.FindVar(Output("Out")); PADDLE_ENFORCE(out_var != nullptr, "Output Out must be set"); auto &out_tensor = *out_var->GetMutable(); + + size_t height = dst_num_rows; + + // do shrink for the top level LoD + if (x_tensor.lod().size() > 0 && + x_tensor.lod()[0].size() > static_cast(dst_num_rows)) { + auto lod_offset = framework::GetSubLoDAndAbsoluteOffset(x_tensor.lod(), 0, + dst_num_rows, 0); + height = lod_offset.second.second; + auto out_lod = out_tensor.mutable_lod(); + framework::AppendLoD(out_lod, lod_offset.first); + } + if (dst_num_rows != 0) { - out_tensor.ShareDataWith(x_tensor.Slice(0, dst_num_rows)); + out_tensor.ShareDataWith(x_tensor.Slice(0, height)); } } }; @@ -64,11 +78,11 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "(LoDTensor) The shrinked RNN step memory."); AddComment( R"DOC( - In dynamic RNN, we are able to handle sequences of different lengths. - Because of the multiple lengths, the size of each step input can be + In dynamic RNN, we are able to handle sequences of different lengths. + Because of the multiple lengths, the size of each step input can be different, which may lead to a mismatching between the input of - the current step and the memory generated by the previous one. This - operator shrinks memory according to the size of the next step input, + the current step and the memory generated by the previous one. This + operator shrinks memory according to the size of the next step input, to make sure that they can match each other. )DOC"); } @@ -132,6 +146,7 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase { PADDLE_ENFORCE(context->HasOutput(framework::GradVarName("X"))); context->SetOutputDim(framework::GradVarName("X"), context->GetInputDim("X")); + context->ShareLoD("X", framework::GradVarName("X")); } }; diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index be1588fc2d09f..a14721b9aacfa 100644 --- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -3,43 +3,86 @@ from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.layers as layers from paddle.v2.fluid.backward import append_backward -from paddle.v2.fluid.framework import default_main_program -import numpy +from paddle.v2.fluid.framework import default_main_program, switch_main_program +from paddle.v2.fluid.framework import Program +import numpy as np -main_program = default_main_program() - -class TestShrinkRNNMemory(unittest.TestCase): - def test_shrink_rnn_memory(self): +class TestShrinkRNNMemoryBase(unittest.TestCase): + def setUp(self): + self.main_program = Program() + switch_main_program(self.main_program) x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False - table = layers.lod_rank_table(x=x) + rank_table_tensor = layers.data( + 'rank_table_tensor', shape=[1], dtype='float32', lod_level=1) + table = layers.lod_rank_table(x=rank_table_tensor) i = layers.zeros(dtype='int64', shape=[1]) - mem1 = layers.shrink_memory(x=x, i=i, table=table) + self.mem1 = layers.shrink_memory(x=x, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True - mem2 = layers.shrink_memory(x=mem1, i=i, table=table) + self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True - mem3 = layers.shrink_memory(x=mem2, i=i, table=table) + self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table) + mem3_mean = layers.mean(x=self.mem3) + append_backward(loss=mem3_mean) + self.x_grad = self.main_program.global_block().var('x@GRAD') + + def sum_lodtensor(self, tensor): + sum_res = 0.0 + for i in xrange(np.product(tensor.get_dims())): + sum_res += tensor.get_float_element(i) + return sum_res + +class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase): + def test_refer_lod(self): cpu = core.CPUPlace() - tensor = core.LoDTensor() - tensor.set_lod([[0, 2, 5, 6]]) - tensor_np = numpy.random.random(size=(3, 100)).astype('float32') - tensor.set(tensor_np, cpu) + x_tensor = core.LoDTensor() + x_tensor.set_lod([[0, 2, 5, 6]]) + tensor_np = np.random.random(size=(6, 100)).astype('float32') + x_tensor.set(tensor_np, cpu) + + rank_table_tensor = core.LoDTensor() + rank_table_tensor.set_lod([[0, 1, 3, 6]]) + rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), + cpu) + exe = Executor(cpu) - outs = exe.run(feed={'x': tensor}, fetch_list=[mem1, mem2, mem3]) - self.assertTrue(numpy.allclose(tensor_np[0:3], outs[0])) - self.assertTrue(numpy.allclose(tensor_np[0:2], outs[1])) - self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2])) + outs = exe.run( + feed={'x': x_tensor, + 'rank_table_tensor': rank_table_tensor}, + fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad], + return_numpy=False) + self.assertTrue(np.allclose(tensor_np[0:6], outs[0])) + self.assertTrue(np.allclose(tensor_np[0:5], outs[1])) + self.assertTrue(np.allclose(tensor_np[0:2], outs[2])) + self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01) - mem3_mean = layers.mean(x=mem3) - append_backward(loss=mem3_mean) - x_grad = exe.run( - feed={'x': tensor}, - fetch_list=[main_program.global_block().var('x@GRAD')])[0] - self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1) + +class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase): + def test_no_lod(self): + cpu = core.CPUPlace() + x_tensor = core.LoDTensor() + tensor_np = np.random.random(size=(3, 100)).astype('float32') + x_tensor.set(tensor_np, cpu) + + rank_table_tensor = core.LoDTensor() + rank_table_tensor.set_lod([[0, 1, 3, 6]]) + rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), + cpu) + + exe = Executor(cpu) + outs = exe.run( + feed={'x': x_tensor, + 'rank_table_tensor': rank_table_tensor}, + fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad], + return_numpy=False) + self.assertTrue(np.allclose(tensor_np[0:3], outs[0])) + self.assertTrue(np.allclose(tensor_np[0:2], outs[1])) + self.assertTrue(np.allclose(tensor_np[0:1], outs[2])) + self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01) if __name__ == '__main__':