From 1381f9ae4acf1a549ca3cccefee0a8d1284f1e7d Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Thu, 21 Jun 2018 02:42:46 +0000
Subject: [PATCH 1/9] Alloc memory for output grad

---
 paddle/fluid/operators/sequence_expand_op.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index d62c387c3eebf..bca1c5b2f2b90 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -184,13 +184,20 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
     int ref_level = context.Attr<int>("ref_level");
 
-    g_x->mutable_data<T>(context.GetPlace());
+    framework::LoDTensor printed_tensor;
+    printed_tensor.set_lod(x->lod());
+    printed_tensor.Resize(x->dims());
+    printed_tensor.mutable_data<T>(context.GetPlace());
+
+//    g_x->mutable_data<T>(context.GetPlace());
+    g_x->ShareDataWith(printed_tensor);
     g_x->set_lod(x->lod());
 
     auto& y_lod = y->lod();
     if (ref_level == -1) ref_level = y_lod.size() - 1;
     // just copy the gradient
     if (y_lod[ref_level].size() <= 1) {
+	  //printf("Copied\n");
       framework::TensorCopy(*g_out, context.GetPlace(), g_x);
       return;
     }
@@ -204,9 +211,11 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
       ref_x_lod.resize(x->dims()[0] + 1);
       std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
     }
+//	printf("In functor\n");
     SequenceExpandGradFunctor<DeviceContext, T> functor;
     functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
             ref_lod, g_x);
+//	printf("Out functor\n");
   }
 };
 

From 5b92a63e4a821652d32d5f4cc9fa07a59e9c0303 Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 10:45:59 +0800
Subject: [PATCH 2/9] Try tensor copy to fix

---
 paddle/fluid/operators/sequence_expand_op.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index bca1c5b2f2b90..210970a3c5f53 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -184,20 +184,19 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
     int ref_level = context.Attr<int>("ref_level");
 
-    framework::LoDTensor printed_tensor;
-    printed_tensor.set_lod(x->lod());
-    printed_tensor.Resize(x->dims());
-    printed_tensor.mutable_data<T>(context.GetPlace());
+    framework::LoDTensor temp_tensor;
+    temp_tensor.set_lod(x->lod());
+    temp_tensor.Resize(x->dims());
+    temp_tensor.mutable_data<T>(context.GetPlace());
 
-//    g_x->mutable_data<T>(context.GetPlace());
-    g_x->ShareDataWith(printed_tensor);
+    g_x->mutable_data<T>(context.GetPlace());
+    // g_x->ShareDataWith(temp_tensor);
     g_x->set_lod(x->lod());
 
     auto& y_lod = y->lod();
     if (ref_level == -1) ref_level = y_lod.size() - 1;
     // just copy the gradient
     if (y_lod[ref_level].size() <= 1) {
-	  //printf("Copied\n");
       framework::TensorCopy(*g_out, context.GetPlace(), g_x);
       return;
     }
@@ -211,11 +210,12 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
       ref_x_lod.resize(x->dims()[0] + 1);
       std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
     }
-//	printf("In functor\n");
     SequenceExpandGradFunctor<DeviceContext, T> functor;
+//    functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
+//            ref_lod, g_x);
     functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
-            ref_lod, g_x);
-//	printf("Out functor\n");
+            ref_lod, &temp_tensor);
+    framework::TensorCopy(temp_tensor, context.GetPlace(), g_x);
   }
 };
 

From b10f958cb1d2e7a8bbd744c1fd6eb564f359a33d Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 14:38:32 +0800
Subject: [PATCH 3/9] Set temp tensor to zero

---
 paddle/fluid/operators/sequence_expand_op.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index 210970a3c5f53..3e61eb84ac683 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -189,6 +189,10 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     temp_tensor.Resize(x->dims());
     temp_tensor.mutable_data<T>(context.GetPlace());
 
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    math::SetConstant<DeviceContext, T> set_zero;
+    set_zero(dev_ctx, temp_tensor, static_cast<T>(0));
+
     g_x->mutable_data<T>(context.GetPlace());
     // g_x->ShareDataWith(temp_tensor);
     g_x->set_lod(x->lod());

From 3527d3024e3466111edcd88c6505c7428aa75f29 Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 14:49:48 +0800
Subject: [PATCH 4/9] param fix

---
 paddle/fluid/operators/sequence_expand_op.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index 3e61eb84ac683..abea99197fd76 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -191,7 +191,7 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
 
     auto& dev_ctx = context.template device_context<DeviceContext>();
     math::SetConstant<DeviceContext, T> set_zero;
-    set_zero(dev_ctx, temp_tensor, static_cast<T>(0));
+    set_zero(dev_ctx, &temp_tensor, static_cast<T>(0));
 
     g_x->mutable_data<T>(context.GetPlace());
     // g_x->ShareDataWith(temp_tensor);

From 16629beaa33d7918b077f38b4a4a823915264d72 Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 07:55:07 +0000
Subject: [PATCH 5/9] clean up

---
 paddle/fluid/operators/sequence_expand_op.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index abea99197fd76..acd4193fdab3c 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -194,7 +194,6 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     set_zero(dev_ctx, &temp_tensor, static_cast<T>(0));
 
     g_x->mutable_data<T>(context.GetPlace());
-    // g_x->ShareDataWith(temp_tensor);
     g_x->set_lod(x->lod());
 
     auto& y_lod = y->lod();
@@ -215,8 +214,6 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
       std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
     }
     SequenceExpandGradFunctor<DeviceContext, T> functor;
-//    functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
-//            ref_lod, g_x);
     functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
             ref_lod, &temp_tensor);
     framework::TensorCopy(temp_tensor, context.GetPlace(), g_x);

From 2b09a6ead1fe17f6261eb9ae606d1b0869d1862a Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 17:02:28 +0800
Subject: [PATCH 6/9] rm temp tensor, set g_x to 0

---
 paddle/fluid/operators/sequence_expand_op.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index abea99197fd76..2cee1229b4e39 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -184,19 +184,19 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
     int ref_level = context.Attr<int>("ref_level");
 
-    framework::LoDTensor temp_tensor;
-    temp_tensor.set_lod(x->lod());
-    temp_tensor.Resize(x->dims());
-    temp_tensor.mutable_data<T>(context.GetPlace());
-
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    math::SetConstant<DeviceContext, T> set_zero;
-    set_zero(dev_ctx, &temp_tensor, static_cast<T>(0));
+    //framework::LoDTensor temp_tensor;
+    //temp_tensor.set_lod(x->lod());
+    //temp_tensor.Resize(x->dims());
+    //temp_tensor.mutable_data<T>(context.GetPlace());
 
     g_x->mutable_data<T>(context.GetPlace());
     // g_x->ShareDataWith(temp_tensor);
     g_x->set_lod(x->lod());
 
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    math::SetConstant<DeviceContext, T> set_zero;
+    set_zero(dev_ctx, g_x, static_cast<T>(0));
+
     auto& y_lod = y->lod();
     if (ref_level == -1) ref_level = y_lod.size() - 1;
     // just copy the gradient
@@ -218,8 +218,8 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
 //    functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
 //            ref_lod, g_x);
     functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
-            ref_lod, &temp_tensor);
-    framework::TensorCopy(temp_tensor, context.GetPlace(), g_x);
+            ref_lod, g_x);
+    // framework::TensorCopy(temp_tensor, context.GetPlace(), g_x);
   }
 };
 

From ecca7a952b86be8a41b719df64143502bd8c1793 Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 17:53:45 +0800
Subject: [PATCH 7/9] clean up

---
 paddle/fluid/operators/sequence_expand_op.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index 27ceaf73aa892..c6234083af3d1 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -184,11 +184,6 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
     int ref_level = context.Attr<int>("ref_level");
 
-    //framework::LoDTensor temp_tensor;
-    //temp_tensor.set_lod(x->lod());
-    //temp_tensor.Resize(x->dims());
-    //temp_tensor.mutable_data<T>(context.GetPlace());
-
     g_x->mutable_data<T>(context.GetPlace());
     g_x->set_lod(x->lod());
 
@@ -216,7 +211,6 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
     SequenceExpandGradFunctor<DeviceContext, T> functor;
     functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
             ref_lod, g_x);
-    // framework::TensorCopy(temp_tensor, context.GetPlace(), g_x);
   }
 };
 

From 2f79823f38d86a94e8d779916788f71cf7cd1d53 Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Tue, 26 Jun 2018 17:58:01 +0800
Subject: [PATCH 8/9] Set zero outside functor

---
 paddle/fluid/operators/sequence_expand_op.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index c6234083af3d1..6966ff10bc6b4 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -151,8 +151,8 @@ struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> {
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
       LoDTensor* dx) {
-    math::SetConstant<platform::CPUDeviceContext, T> set_zero;
-    set_zero(context, dx, static_cast<T>(0));
+    // math::SetConstant<platform::CPUDeviceContext, T> set_zero;
+    // set_zero(context, dx, static_cast<T>(0));
 
     int dout_offset = 0;
     for (size_t i = 1; i < ref_lod.size(); ++i) {

From 8cea2361cdab738801bca6f1e5a3d126f966a935 Mon Sep 17 00:00:00 2001
From: ktlichkid <liqingsheng@baidu.com>
Date: Wed, 27 Jun 2018 02:17:21 +0000
Subject: [PATCH 9/9] Remove comment

---
 paddle/fluid/operators/sequence_expand_op.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h
index 6966ff10bc6b4..39301e1ac0971 100644
--- a/paddle/fluid/operators/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_expand_op.h
@@ -151,9 +151,6 @@ struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> {
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
       LoDTensor* dx) {
-    // math::SetConstant<platform::CPUDeviceContext, T> set_zero;
-    // set_zero(context, dx, static_cast<T>(0));
-
     int dout_offset = 0;
     for (size_t i = 1; i < ref_lod.size(); ++i) {
       int repeat_num = ref_lod[i] - ref_lod[i - 1];