From 6adf4ac3f9624b095027da81812cfa1807438cb3 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Mon, 15 May 2017 11:24:45 +0800
Subject: [PATCH 1/7] preliminary implementation

---
 paddle/gserver/layers/ConvShiftLayer.cpp | 142 +++++++++++++++++++++--
 1 file changed, 132 insertions(+), 10 deletions(-)

diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp
index 002be415691f0..a8b04a88267d7 100644
--- a/paddle/gserver/layers/ConvShiftLayer.cpp
+++ b/paddle/gserver/layers/ConvShiftLayer.cpp
@@ -52,6 +52,9 @@ class ConvShiftLayer : public Layer {
 
   void forward(PassType passType) override;
   void backward(const UpdateCallback& callback = nullptr) override;
+  bool isSeqType();
+  void circularConvSeq();
+  void circularConvSeqDerivative();
 };
 
 REGISTER_LAYER(conv_shift, ConvShiftLayer);
@@ -66,16 +69,126 @@ bool ConvShiftLayer::init(const LayerMap& layerMap,
   return true;
 }
 
+bool ConvShiftLayer::isSeqType() {
+  const Argument& inLayer0 = getInput(0);
+  if (nullptr == inLayer0.sequenceStartPositions)
+    return false;
+  else
+    return true;
+}
+
+void ConvShiftLayer::circularConvSeq() {
+  const Argument& inLayer0 = getInput(0);
+  MatrixPtr in0 = inLayer0.value;
+  MatrixPtr in1 = getInputValue(1);
+  MatrixPtr out = getOutputValue();
+  const ICpuGpuVectorPtr& sequenceStartPositions =
+      inLayer0.sequenceStartPositions;
+
+  size_t width0 = in0->getWidth();
+  size_t numSeqs = sequenceStartPositions->getSize() - 1;
+  size_t height0 = in0->getHeight();
+  size_t width1 = in1->getWidth();
+  size_t height1 = in1->getHeight();
+
+  CHECK_EQ(numSeqs, height1);
+  CHECK_EQ(width0, out->getWidth());
+  CHECK_EQ(height0, out->getHeight());
+
+  CHECK_EQ(width1 % 2, 1U);
+
+  real* inV0 = in0->getData();
+  const int* startPosIntPtr = sequenceStartPositions->getData(false);
+  real* inV1 = in1->getData();
+  real* outV = out->getData();
+
+  int leftCtxLen = (width1 - 1) / 2;
+  for (size_t x = 0; x < numSeqs - 1; x++) {
+    int curSeqLen = startPosIntPtr[x + 1];
+    size_t curSeqWidth = curSeqLen * width0;
+    for (size_t i = 0; i < curSeqWidth; i++) {
+      for (size_t j = 0; j < width1; ++j) {
+        int index = i + j - leftCtxLen;
+        index = (index + curSeqWidth) % curSeqWidth;
+        int outVRowOffset = i / width0;
+        int outVColOffset = i % width0;
+        int inV0RowOffset = index / width0;
+        int inV0ColOffset = index % width0;
+        (outV + outVRowOffset)[outVColOffset] +=
+            (inV0 + inV0RowOffset)[inV0ColOffset] * inV1[j];
+      }
+    }
+    outV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
+  }
+}
+
+void ConvShiftLayer::circularConvSeqDerivative() {
+  const Argument& inLayer0 = getInput(0);
+  MatrixPtr in0 = inLayer0.value;
+  MatrixPtr in1 = getInputValue(1);
+  MatrixPtr inG0 = getInputGrad(0);
+  MatrixPtr inG1 = getInputGrad(1);
+  MatrixPtr outG = getOutputGrad();
+  const ICpuGpuVectorPtr& sequenceStartPositions =
+      inLayer0.sequenceStartPositions;
+
+  size_t height0 = in0->getHeight();
+  size_t height1 = in1->getHeight();
+  size_t numSeqs = sequenceStartPositions->getSize() - 1;
+  size_t width0 = in0->getWidth();
+  size_t width1 = in1->getWidth();
+
+  CHECK_EQ(height1, numSeqs);
+  CHECK_EQ(height0, inG0->getHeight());
+  CHECK_EQ(width0, inG0->getWidth());
+  CHECK_EQ(height1, inG1->getHeight());
+  CHECK_EQ(width1, inG1->getWidth());
+  CHECK_EQ(height0, outG->getHeight());
+  CHECK_EQ(width0, outG->getWidth());
+
+  const int* startPosIntPtr = sequenceStartPositions->getData(false);
+  real* outGV = outG->getData();
+  real* inV0 = in0->getData();
+  real* inV1 = in1->getData();
+  real* inGV0 = inG0->getData();
+  real* inGV1 = inG1->getData();
+
+  int leftCtxLen = (width1 - 1) / 2;
+  for (size_t x = 0; x < numSeqs - 1; x++) {
+    int curSeqLen = startPosIntPtr[x + 1];
+    size_t curSeqWidth = curSeqLen * width0;
+    for (size_t j = 0; j < width1; j++) {
+      for (size_t i = 0; i < curSeqWidth; i++) {
+        int index = i + j - leftCtxLen;
+        index = (index + curSeqWidth) % curSeqWidth;
+        int inGV0RowOffset = index / width0;
+        int inGV0ColOffset = index % width0;
+        int outGVRowOffset = i / width0;
+        int outGVColOffset = i % width0;
+        (inGV0 + inGV0RowOffset)[inGV0ColOffset] +=
+            (outGV + outGVRowOffset)[outGVColOffset] * inV1[j];
+        inGV1[j] += (outGV + outGVRowOffset)[outGVColOffset] *
+                    (inGV0 + inGV0RowOffset)[inGV0ColOffset];
+      }
+    }
+    outGV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
+    inGV0 += curSeqWidth;
+    inGV1 += width1;
+  }
+}
+
 void ConvShiftLayer::forward(PassType passType) {
   Layer::forward(passType);
 
   MatrixPtr inV0 = getInputValue(0);
-  MatrixPtr inV1 = getInputValue(1);
 
   size_t batchSize = inV0->getHeight();
   size_t dataDim = inV0->getWidth();
 
-  CHECK_EQ(batchSize, inV1->getHeight());
   CHECK_EQ(dataDim, getSize());
 
   {
@@ -83,25 +196,34 @@ void ConvShiftLayer::forward(PassType passType) {
     resetOutput(batchSize, dataDim);
   }
 
-  MatrixPtr outV = getOutputValue();
-
   REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
-  outV->circularConv(*inV0, *inV1);
+  if (!isSeqType()) {
+    MatrixPtr inV1 = getInputValue(1);
+    CHECK_EQ(batchSize, inV1->getHeight());
+    MatrixPtr outV = getOutputValue();
+    outV->circularConv(*inV0, *inV1);
+  } else {
+    circularConvSeq();
+  }
 }
 
 void ConvShiftLayer::backward(const UpdateCallback& callback) {
-  MatrixPtr inV0 = getInputValue(0);
-  MatrixPtr inV1 = getInputValue(1);
-  MatrixPtr outG = getOutputGrad();
   MatrixPtr inG0 = getInputGrad(0);
   MatrixPtr inG1 = getInputGrad(1);
 
   REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());
 
-  if (inG0 && inG1) {
+  if (!(inG0 && inG1)) {
+    CHECK(!inG0 || !inG1) << "Not supported";
+  }
+
+  if (!isSeqType()) {
+    MatrixPtr inV0 = getInputValue(0);
+    MatrixPtr inV1 = getInputValue(1);
+    MatrixPtr outG = getOutputGrad();
     outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
   } else {
-    CHECK(!inG0 || !inG1) << "Not supported";
+    circularConvSeqDerivative();
   }
 }
 

From 8cd2222e49a1a4f07665efc3864c9a3f43f53941 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Mon, 15 May 2017 14:56:21 +0800
Subject: [PATCH 2/7] merge convolution logic into class Matrix

---
 paddle/gserver/layers/ConvShiftLayer.cpp | 146 ++---------------------
 paddle/math/Matrix.cpp                   |  95 ++++++++++-----
 paddle/math/Matrix.h                     |  18 ++-
 3 files changed, 90 insertions(+), 169 deletions(-)

diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp
index a8b04a88267d7..e4dd7f0ee034b 100644
--- a/paddle/gserver/layers/ConvShiftLayer.cpp
+++ b/paddle/gserver/layers/ConvShiftLayer.cpp
@@ -52,9 +52,6 @@ class ConvShiftLayer : public Layer {
 
   void forward(PassType passType) override;
   void backward(const UpdateCallback& callback = nullptr) override;
-  bool isSeqType();
-  void circularConvSeq();
-  void circularConvSeqDerivative();
 };
 
 REGISTER_LAYER(conv_shift, ConvShiftLayer);
@@ -69,122 +66,12 @@ bool ConvShiftLayer::init(const LayerMap& layerMap,
   return true;
 }
 
-bool ConvShiftLayer::isSeqType() {
-  const Argument& inLayer0 = getInput(0);
-  if (nullptr == inLayer0.sequenceStartPositions)
-    return false;
-  else
-    return true;
-}
-
-void ConvShiftLayer::circularConvSeq() {
-  const Argument& inLayer0 = getInput(0);
-  MatrixPtr in0 = inLayer0.value;
-  MatrixPtr in1 = getInputValue(1);
-  MatrixPtr out = getOutputValue();
-  const ICpuGpuVectorPtr& sequenceStartPositions =
-      inLayer0.sequenceStartPositions;
-
-  size_t width0 = in0->getWidth();
-  size_t numSeqs = sequenceStartPositions->getSize() - 1;
-  size_t height0 = in0->getHeight();
-  size_t width1 = in1->getWidth();
-  size_t height1 = in1->getHeight();
-
-  CHECK_EQ(numSeqs, height1);
-  CHECK_EQ(width0, out->getWidth());
-  CHECK_EQ(height0, out->getHeight());
-
-  CHECK_EQ(width1 % 2, 1U);
-
-  real* inV0 = in0->getData();
-  const int* startPosIntPtr = sequenceStartPositions->getData(false);
-  real* inV1 = in1->getData();
-  real* outV = out->getData();
-
-  int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < numSeqs - 1; x++) {
-    int curSeqLen = startPosIntPtr[x + 1];
-    size_t curSeqWidth = curSeqLen * width0;
-    for (size_t i = 0; i < curSeqWidth; i++) {
-      for (size_t j = 0; j < width1; ++j) {
-        int index = i + j - leftCtxLen;
-        index = (index + curSeqWidth) % curSeqWidth;
-        int outVRowOffset = i / width0;
-        int outVColOffset = i % width0;
-        int inV0RowOffset = index / width0;
-        int inV0ColOffset = index % width0;
-        (outV + outVRowOffset)[outVColOffset] +=
-            (inV0 + inV0RowOffset)[inV0ColOffset] * inV1[j];
-      }
-    }
-    outV += curSeqWidth;
-    inV0 += curSeqWidth;
-    inV1 += width1;
-  }
-}
-
-void ConvShiftLayer::circularConvSeqDerivative() {
-  const Argument& inLayer0 = getInput(0);
-  MatrixPtr in0 = inLayer0.value;
-  MatrixPtr in1 = getInputValue(1);
-  MatrixPtr inG0 = getInputGrad(0);
-  MatrixPtr inG1 = getInputGrad(1);
-  MatrixPtr outG = getOutputGrad();
-  const ICpuGpuVectorPtr& sequenceStartPositions =
-      inLayer0.sequenceStartPositions;
-
-  size_t height0 = in0->getHeight();
-  size_t height1 = in1->getHeight();
-  size_t numSeqs = sequenceStartPositions->getSize() - 1;
-  size_t width0 = in0->getWidth();
-  size_t width1 = in1->getWidth();
-
-  CHECK_EQ(height1, numSeqs);
-  CHECK_EQ(height0, inG0->getHeight());
-  CHECK_EQ(width0, inG0->getWidth());
-  CHECK_EQ(height1, inG1->getHeight());
-  CHECK_EQ(width1, inG1->getWidth());
-  CHECK_EQ(height0, outG->getHeight());
-  CHECK_EQ(width0, outG->getWidth());
-
-  const int* startPosIntPtr = sequenceStartPositions->getData(false);
-  real* outGV = outG->getData();
-  real* inV0 = in0->getData();
-  real* inV1 = in1->getData();
-  real* inGV0 = inG0->getData();
-  real* inGV1 = inG1->getData();
-
-  int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < numSeqs - 1; x++) {
-    int curSeqLen = startPosIntPtr[x + 1];
-    size_t curSeqWidth = curSeqLen * width0;
-    for (size_t j = 0; j < width1; j++) {
-      for (size_t i = 0; i < curSeqWidth; i++) {
-        int index = i + j - leftCtxLen;
-        index = (index + curSeqWidth) % curSeqWidth;
-        int inGV0RowOffset = index / width0;
-        int inGV0ColOffset = index % width0;
-        int outGVRowOffset = i / width0;
-        int outGVColOffset = i % width0;
-        (inGV0 + inGV0RowOffset)[inGV0ColOffset] +=
-            (outGV + outGVRowOffset)[outGVColOffset] * inV1[j];
-        inGV1[j] += (outGV + outGVRowOffset)[outGVColOffset] *
-                    (inGV0 + inGV0RowOffset)[inGV0ColOffset];
-      }
-    }
-    outGV += curSeqWidth;
-    inV0 += curSeqWidth;
-    inV1 += width1;
-    inGV0 += curSeqWidth;
-    inGV1 += width1;
-  }
-}
-
 void ConvShiftLayer::forward(PassType passType) {
   Layer::forward(passType);
 
   MatrixPtr inV0 = getInputValue(0);
+  MatrixPtr inV1 = getInputValue(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   size_t batchSize = inV0->getHeight();
   size_t dataDim = inV0->getWidth();
@@ -196,34 +83,27 @@ void ConvShiftLayer::forward(PassType passType) {
     resetOutput(batchSize, dataDim);
   }
 
+  MatrixPtr outV = getOutputValue();
+
   REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
-  if (!isSeqType()) {
-    MatrixPtr inV1 = getInputValue(1);
-    CHECK_EQ(batchSize, inV1->getHeight());
-    MatrixPtr outV = getOutputValue();
-    outV->circularConv(*inV0, *inV1);
-  } else {
-    circularConvSeq();
-  }
+  outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
 }
 
 void ConvShiftLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inV0 = getInputValue(0);
+  MatrixPtr inV1 = getInputValue(1);
+  MatrixPtr outG = getOutputGrad();
   MatrixPtr inG0 = getInputGrad(0);
   MatrixPtr inG1 = getInputGrad(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());
 
-  if (!(inG0 && inG1)) {
-    CHECK(!inG0 || !inG1) << "Not supported";
-  }
-
-  if (!isSeqType()) {
-    MatrixPtr inV0 = getInputValue(0);
-    MatrixPtr inV1 = getInputValue(1);
-    MatrixPtr outG = getOutputGrad();
-    outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
+  if (inG0 && inG1) {
+    outG->circularConvDerivative(
+        *outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
   } else {
-    circularConvSeqDerivative();
+    CHECK(!inG0 || !inG1) << "Not supported";
   }
 }
 
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 6ac61be0bf1b7..bf282eb524e3d 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -3877,14 +3877,22 @@ real CpuMatrix::getMax() {
   return res;
 }
 
-void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
-  size_t height = this->getHeight();
+void CpuMatrix::circularConv(Matrix& in0,
+                             Matrix& in1,
+                             const ICpuGpuVectorPtr& seqStartPosPtr,
+                             bool useGpu) {
+  size_t height0 = this->getHeight();
   size_t width0 = this->getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  // if sequence type, height1 should be sequence number
+  if (nullptr != seqStartPosPtr) {
+    numSeqs = seqStartPosPtr->getSize() - 1;
+  }
 
-  CHECK_EQ(height, in0.getHeight());
+  CHECK_EQ(height0, in0.getHeight());
   CHECK_EQ(width0, in0.getWidth());
-  CHECK_EQ(height, in1.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
 
   CHECK_EQ(width1 % 2, 1U);
 
@@ -3892,32 +3900,50 @@ void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
   real* inV0 = in0.getData();
   real* inV1 = in1.getData();
 
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
+
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height;
-       ++x, outV += width0, inV0 += width0, inV1 += width1) {
-    for (size_t i = 0; i < width0; ++i) {  // each dimension of output
-      for (size_t j = 0; j < width1; ++j) {
-        // iterate over all dimentions of inV1
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        outV[i] += inV0[index] * inV1[j];
+  // row first order, treate multiple rows as a long row
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    // conv a complete sequence
+    for (size_t i = 0; i < curSeqWidth; ++i) {
+      for (size_t j = 0; j < width1;
+           ++j) {  // iterate over convolution template
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(outV + i) += *(inV0 + index) * inV1[j];
       }
     }
+    outV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
   }
 }
 
-void CpuMatrix::circularConvDerivative(
-    Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
-  size_t height = in0.getHeight();
+void CpuMatrix::circularConvDerivative(Matrix& outG,
+                                       Matrix& in0,
+                                       Matrix& in1,
+                                       Matrix& inG0,
+                                       Matrix& inG1,
+                                       const ICpuGpuVectorPtr& seqStartPosPtr,
+                                       bool useGpu) {
+  size_t height0 = in0.getHeight();
   size_t width0 = in0.getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;
 
-  CHECK_EQ(height, in1.getHeight());
-  CHECK_EQ(height, inG0.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
+  CHECK_EQ(height0, inG0.getHeight());
   CHECK_EQ(width0, inG0.getWidth());
-  CHECK_EQ(height, inG1.getHeight());
+  CHECK_EQ(numSeqs, inG1.getHeight());
   CHECK_EQ(width1, inG1.getWidth());
-  CHECK_EQ(height, outG.getHeight());
+  CHECK_EQ(height0, outG.getHeight());
   CHECK_EQ(width0, outG.getWidth());
 
   real* outGV = outG.getData();
@@ -3925,23 +3951,28 @@ void CpuMatrix::circularConvDerivative(
   real* inV1 = in1.getData();
   real* inGV0 = inG0.getData();
   real* inGV1 = inG1.getData();
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
 
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height; ++x,
-              outGV += width0,
-              inV0 += width0,
-              inV1 += width1,
-              inGV0 += width0,
-              inGV1 += width1) {
-    for (size_t j = 0; j < width1; ++j) {  // iterate over width1
-      for (size_t i = 0; i < width0; ++i) {
-        // such over all dimensions of outG
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        inGV0[index] += outGV[i] * inV1[j];
-        inGV1[j] += outGV[i] * inV0[index];
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    for (size_t j = 0; j < width1; ++j) {  // iterate over convolution template
+      for (size_t i = 0; i < curSeqWidth; i++) {
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(inGV0 + index) += *(outGV + i) * inV1[j];
+        inGV1[j] += *(outGV + i) * *(inV0 + index);
       }
     }
+    outGV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
+    inGV0 += curSeqWidth;
+    inGV1 += width1;
   }
 }
 
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 3252adb19e4c2..2dcc04fb59feb 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -744,7 +744,10 @@ class Matrix : public BaseMatrix {
    * b's index arithmetic is computed modulo M,
    * c's index arithmetic is computed modulo N.
    */
-  virtual void circularConv(Matrix& b, Matrix& c) {
+  virtual void circularConv(Matrix& b,
+                            Matrix& c,
+                            const ICpuGpuVectorPtr& seqStartPosPtr,
+                            bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -752,7 +755,9 @@ class Matrix : public BaseMatrix {
                                       Matrix& prevOut1,
                                       Matrix& prevOut2,
                                       Matrix& prevGrad1,
-                                      Matrix& prevGrad2) {
+                                      Matrix& prevGrad2,
+                                      const ICpuGpuVectorPtr& seqStartPosPtr,
+                                      bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix {
                                         IVector& label,
                                         real alpha);
 
-  void circularConv(Matrix& b, Matrix& c);
+  void circularConv(Matrix& b,
+                    Matrix& c,
+                    const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                    bool useGpu = false);
   void circularConvDerivative(Matrix& output,
                               Matrix& prevOut1,
                               Matrix& prevOut2,
                               Matrix& prevGrad1,
-                              Matrix& prevGrad2);
+                              Matrix& prevGrad2,
+                              const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                              bool useGpu = false);
 
   void softmax(Matrix& output);
   void sequenceSoftmax(Matrix& output, const IVector& index);

From 25cdee6dd9510e1b62bacea66f0813f54f2f04bc Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Mon, 15 May 2017 14:56:21 +0800
Subject: [PATCH 3/7] merge convolution logic into class Matrix

---
 paddle/gserver/layers/ConvShiftLayer.cpp | 146 ++---------------------
 paddle/math/Matrix.cpp                   |  95 ++++++++++-----
 paddle/math/Matrix.h                     |  18 ++-
 3 files changed, 90 insertions(+), 169 deletions(-)

diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp
index a8b04a88267d7..e4dd7f0ee034b 100644
--- a/paddle/gserver/layers/ConvShiftLayer.cpp
+++ b/paddle/gserver/layers/ConvShiftLayer.cpp
@@ -52,9 +52,6 @@ class ConvShiftLayer : public Layer {
 
   void forward(PassType passType) override;
   void backward(const UpdateCallback& callback = nullptr) override;
-  bool isSeqType();
-  void circularConvSeq();
-  void circularConvSeqDerivative();
 };
 
 REGISTER_LAYER(conv_shift, ConvShiftLayer);
@@ -69,122 +66,12 @@ bool ConvShiftLayer::init(const LayerMap& layerMap,
   return true;
 }
 
-bool ConvShiftLayer::isSeqType() {
-  const Argument& inLayer0 = getInput(0);
-  if (nullptr == inLayer0.sequenceStartPositions)
-    return false;
-  else
-    return true;
-}
-
-void ConvShiftLayer::circularConvSeq() {
-  const Argument& inLayer0 = getInput(0);
-  MatrixPtr in0 = inLayer0.value;
-  MatrixPtr in1 = getInputValue(1);
-  MatrixPtr out = getOutputValue();
-  const ICpuGpuVectorPtr& sequenceStartPositions =
-      inLayer0.sequenceStartPositions;
-
-  size_t width0 = in0->getWidth();
-  size_t numSeqs = sequenceStartPositions->getSize() - 1;
-  size_t height0 = in0->getHeight();
-  size_t width1 = in1->getWidth();
-  size_t height1 = in1->getHeight();
-
-  CHECK_EQ(numSeqs, height1);
-  CHECK_EQ(width0, out->getWidth());
-  CHECK_EQ(height0, out->getHeight());
-
-  CHECK_EQ(width1 % 2, 1U);
-
-  real* inV0 = in0->getData();
-  const int* startPosIntPtr = sequenceStartPositions->getData(false);
-  real* inV1 = in1->getData();
-  real* outV = out->getData();
-
-  int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < numSeqs - 1; x++) {
-    int curSeqLen = startPosIntPtr[x + 1];
-    size_t curSeqWidth = curSeqLen * width0;
-    for (size_t i = 0; i < curSeqWidth; i++) {
-      for (size_t j = 0; j < width1; ++j) {
-        int index = i + j - leftCtxLen;
-        index = (index + curSeqWidth) % curSeqWidth;
-        int outVRowOffset = i / width0;
-        int outVColOffset = i % width0;
-        int inV0RowOffset = index / width0;
-        int inV0ColOffset = index % width0;
-        (outV + outVRowOffset)[outVColOffset] +=
-            (inV0 + inV0RowOffset)[inV0ColOffset] * inV1[j];
-      }
-    }
-    outV += curSeqWidth;
-    inV0 += curSeqWidth;
-    inV1 += width1;
-  }
-}
-
-void ConvShiftLayer::circularConvSeqDerivative() {
-  const Argument& inLayer0 = getInput(0);
-  MatrixPtr in0 = inLayer0.value;
-  MatrixPtr in1 = getInputValue(1);
-  MatrixPtr inG0 = getInputGrad(0);
-  MatrixPtr inG1 = getInputGrad(1);
-  MatrixPtr outG = getOutputGrad();
-  const ICpuGpuVectorPtr& sequenceStartPositions =
-      inLayer0.sequenceStartPositions;
-
-  size_t height0 = in0->getHeight();
-  size_t height1 = in1->getHeight();
-  size_t numSeqs = sequenceStartPositions->getSize() - 1;
-  size_t width0 = in0->getWidth();
-  size_t width1 = in1->getWidth();
-
-  CHECK_EQ(height1, numSeqs);
-  CHECK_EQ(height0, inG0->getHeight());
-  CHECK_EQ(width0, inG0->getWidth());
-  CHECK_EQ(height1, inG1->getHeight());
-  CHECK_EQ(width1, inG1->getWidth());
-  CHECK_EQ(height0, outG->getHeight());
-  CHECK_EQ(width0, outG->getWidth());
-
-  const int* startPosIntPtr = sequenceStartPositions->getData(false);
-  real* outGV = outG->getData();
-  real* inV0 = in0->getData();
-  real* inV1 = in1->getData();
-  real* inGV0 = inG0->getData();
-  real* inGV1 = inG1->getData();
-
-  int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < numSeqs - 1; x++) {
-    int curSeqLen = startPosIntPtr[x + 1];
-    size_t curSeqWidth = curSeqLen * width0;
-    for (size_t j = 0; j < width1; j++) {
-      for (size_t i = 0; i < curSeqWidth; i++) {
-        int index = i + j - leftCtxLen;
-        index = (index + curSeqWidth) % curSeqWidth;
-        int inGV0RowOffset = index / width0;
-        int inGV0ColOffset = index % width0;
-        int outGVRowOffset = i / width0;
-        int outGVColOffset = i % width0;
-        (inGV0 + inGV0RowOffset)[inGV0ColOffset] +=
-            (outGV + outGVRowOffset)[outGVColOffset] * inV1[j];
-        inGV1[j] += (outGV + outGVRowOffset)[outGVColOffset] *
-                    (inGV0 + inGV0RowOffset)[inGV0ColOffset];
-      }
-    }
-    outGV += curSeqWidth;
-    inV0 += curSeqWidth;
-    inV1 += width1;
-    inGV0 += curSeqWidth;
-    inGV1 += width1;
-  }
-}
-
 void ConvShiftLayer::forward(PassType passType) {
   Layer::forward(passType);
 
   MatrixPtr inV0 = getInputValue(0);
+  MatrixPtr inV1 = getInputValue(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   size_t batchSize = inV0->getHeight();
   size_t dataDim = inV0->getWidth();
@@ -196,34 +83,27 @@ void ConvShiftLayer::forward(PassType passType) {
     resetOutput(batchSize, dataDim);
   }
 
+  MatrixPtr outV = getOutputValue();
+
   REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
-  if (!isSeqType()) {
-    MatrixPtr inV1 = getInputValue(1);
-    CHECK_EQ(batchSize, inV1->getHeight());
-    MatrixPtr outV = getOutputValue();
-    outV->circularConv(*inV0, *inV1);
-  } else {
-    circularConvSeq();
-  }
+  outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
 }
 
 void ConvShiftLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inV0 = getInputValue(0);
+  MatrixPtr inV1 = getInputValue(1);
+  MatrixPtr outG = getOutputGrad();
   MatrixPtr inG0 = getInputGrad(0);
   MatrixPtr inG1 = getInputGrad(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());
 
-  if (!(inG0 && inG1)) {
-    CHECK(!inG0 || !inG1) << "Not supported";
-  }
-
-  if (!isSeqType()) {
-    MatrixPtr inV0 = getInputValue(0);
-    MatrixPtr inV1 = getInputValue(1);
-    MatrixPtr outG = getOutputGrad();
-    outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
+  if (inG0 && inG1) {
+    outG->circularConvDerivative(
+        *outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
   } else {
-    circularConvSeqDerivative();
+    CHECK(!inG0 || !inG1) << "Not supported";
   }
 }
 
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 6ac61be0bf1b7..36e44e69d03e6 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -3877,14 +3877,22 @@ real CpuMatrix::getMax() {
   return res;
 }
 
-void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
-  size_t height = this->getHeight();
+void CpuMatrix::circularConv(Matrix& in0,
+                             Matrix& in1,
+                             const ICpuGpuVectorPtr& seqStartPosPtr,
+                             bool useGpu) {
+  size_t height0 = this->getHeight();
   size_t width0 = this->getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  // if sequence type, height1 should be sequence number
+  if (nullptr != seqStartPosPtr) {
+    numSeqs = seqStartPosPtr->getSize() - 1;
+  }
 
-  CHECK_EQ(height, in0.getHeight());
+  CHECK_EQ(height0, in0.getHeight());
   CHECK_EQ(width0, in0.getWidth());
-  CHECK_EQ(height, in1.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
 
   CHECK_EQ(width1 % 2, 1U);
 
@@ -3892,32 +3900,50 @@ void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
   real* inV0 = in0.getData();
   real* inV1 = in1.getData();
 
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
+
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height;
-       ++x, outV += width0, inV0 += width0, inV1 += width1) {
-    for (size_t i = 0; i < width0; ++i) {  // each dimension of output
-      for (size_t j = 0; j < width1; ++j) {
-        // iterate over all dimentions of inV1
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        outV[i] += inV0[index] * inV1[j];
+  // row first order, treate multiple rows as a long row
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    // conv a complete sequence
+    for (size_t i = 0; i < curSeqWidth; ++i) {
+      for (size_t j = 0; j < width1;
+           ++j) {  // iterate over convolution template
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(outV + i) += *(inV0 + index) * inV1[j];
       }
     }
+    outV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
   }
 }
 
-void CpuMatrix::circularConvDerivative(
-    Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
-  size_t height = in0.getHeight();
+void CpuMatrix::circularConvDerivative(Matrix& outG,
+                                       Matrix& in0,
+                                       Matrix& in1,
+                                       Matrix& inG0,
+                                       Matrix& inG1,
+                                       const ICpuGpuVectorPtr& seqStartPosPtr,
+                                       bool useGpu) {
+  size_t height0 = in0.getHeight();
   size_t width0 = in0.getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;
 
-  CHECK_EQ(height, in1.getHeight());
-  CHECK_EQ(height, inG0.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
+  CHECK_EQ(height0, inG0.getHeight());
   CHECK_EQ(width0, inG0.getWidth());
-  CHECK_EQ(height, inG1.getHeight());
+  CHECK_EQ(numSeqs, inG1.getHeight());
   CHECK_EQ(width1, inG1.getWidth());
-  CHECK_EQ(height, outG.getHeight());
+  CHECK_EQ(height0, outG.getHeight());
   CHECK_EQ(width0, outG.getWidth());
 
   real* outGV = outG.getData();
@@ -3925,23 +3951,28 @@ void CpuMatrix::circularConvDerivative(
   real* inV1 = in1.getData();
   real* inGV0 = inG0.getData();
   real* inGV1 = inG1.getData();
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
 
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height; ++x,
-              outGV += width0,
-              inV0 += width0,
-              inV1 += width1,
-              inGV0 += width0,
-              inGV1 += width1) {
-    for (size_t j = 0; j < width1; ++j) {  // iterate over width1
-      for (size_t i = 0; i < width0; ++i) {
-        // such over all dimensions of outG
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        inGV0[index] += outGV[i] * inV1[j];
-        inGV1[j] += outGV[i] * inV0[index];
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    for (size_t j = 0; j < width1; ++j) {  // iterate over convolution template
+      for (size_t i = 0; i < curSeqWidth; ++i) {
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(inGV0 + index) += *(outGV + i) * inV1[j];
+        inGV1[j] += *(outGV + i) * *(inV0 + index);
       }
     }
+    outGV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
+    inGV0 += curSeqWidth;
+    inGV1 += width1;
   }
 }
 
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 3252adb19e4c2..2dcc04fb59feb 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -744,7 +744,10 @@ class Matrix : public BaseMatrix {
    * b's index arithmetic is computed modulo M,
    * c's index arithmetic is computed modulo N.
    */
-  virtual void circularConv(Matrix& b, Matrix& c) {
+  virtual void circularConv(Matrix& b,
+                            Matrix& c,
+                            const ICpuGpuVectorPtr& seqStartPosPtr,
+                            bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -752,7 +755,9 @@ class Matrix : public BaseMatrix {
                                       Matrix& prevOut1,
                                       Matrix& prevOut2,
                                       Matrix& prevGrad1,
-                                      Matrix& prevGrad2) {
+                                      Matrix& prevGrad2,
+                                      const ICpuGpuVectorPtr& seqStartPosPtr,
+                                      bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix {
                                         IVector& label,
                                         real alpha);
 
-  void circularConv(Matrix& b, Matrix& c);
+  void circularConv(Matrix& b,
+                    Matrix& c,
+                    const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                    bool useGpu = false);
   void circularConvDerivative(Matrix& output,
                               Matrix& prevOut1,
                               Matrix& prevOut2,
                               Matrix& prevGrad1,
-                              Matrix& prevGrad2);
+                              Matrix& prevGrad2,
+                              const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                              bool useGpu = false);
 
   void softmax(Matrix& output);
   void sequenceSoftmax(Matrix& output, const IVector& index);

From 167ceb54cc580247e476e63428485db6faeceb73 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Wed, 17 May 2017 11:16:17 +0800
Subject: [PATCH 4/7] add unitest

---
 paddle/gserver/tests/LayerGradUtil.cpp  | 26 ++++++++++++++++++++++++-
 paddle/gserver/tests/LayerGradUtil.h    | 18 ++++++++++++++++-
 paddle/gserver/tests/test_LayerGrad.cpp | 13 +++++++++++--
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
index a0b1cd471dd02..beac97bf4b10e 100644
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -387,6 +387,28 @@ void initDataLayer(TestConfig testConf,
         data.value->sigmoid(*data.value);
         data.grad->zeroMem();
         break;
+      case INPUT_SEQUENCE_MNUM_DATA: {
+        // first calculate height
+        sequenceStartPositions =
+            ICpuGpuVector::create(batchSize + 1, /*useGpu=*/false);
+        int seqLen = 0;
+        int* buf = sequenceStartPositions->getMutableData(false);
+        int64_t pos = 0;
+        for (size_t j = 0; j < batchSize; ++j) {
+          seqLen = uniformRandom(testConf.inputDefs[i].maxLen) + 1;
+          buf[j] = pos;
+          pos += seqLen;
+        }
+        buf[batchSize] = pos;
+        fillData(trans, layer->getSize(), pos);
+        data.value->randomizeUniform();
+        data.value->add(-0.5);
+        if (testLayerName != "prelu") {
+          data.value->sigmoid(*data.value);
+        }
+        data.grad->zeroMem();
+        break;
+      }
       default:
         LOG(FATAL) << " unknown inputType ";
         return;
@@ -394,10 +416,12 @@ void initDataLayer(TestConfig testConf,
     if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
         testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA ||
         testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL ||
-        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) {
+        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA ||
+        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MNUM_DATA) {
       if (!sequenceStartPositions) {
         generateSequenceStartPositions(batchSize, sequenceStartPositions);
       }
+
       data.sequenceStartPositions = sequenceStartPositions;
     }
     if (testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA) {
diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h
index 9f68eb64d0b4a..6c21fecf5d8a1 100644
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
@@ -31,7 +31,8 @@ enum InputType {
   INPUT_SEQUENCE_LABEL,
   INPUT_SPARSE_NON_VALUE_DATA,
   INPUT_SPARSE_FLOAT_VALUE_DATA,
-  INPUT_DENSE_DIM_DATA,  // using sequence length to init dense data
+  INPUT_DENSE_DIM_DATA,      // using sequence length to init dense data
+  INPUT_SEQUENCE_MNUM_DATA,  // regard batchSize as sequence number
 };
 
 struct ParaSparse {
@@ -62,6 +63,7 @@ struct InputDef {
   string name;
   size_t dim;
   size_t paraSize;
+  size_t maxLen;  // maximum length of sequence data
   ParaSparse sparse;
   bool isStatic;
   std::vector<int> labelInitValue;
@@ -76,6 +78,20 @@ struct InputDef {
     isStatic = false;
   }
 
+  InputDef(InputType type,
+           string nameIn,
+           size_t dimIn,
+           size_t sizeIn,
+           size_t maxSeqLen) {
+    inputType = type;
+    name = nameIn;
+    dim = dimIn;
+    paraSize = sizeIn;
+    maxLen = maxSeqLen;
+    sparse = {""};
+    isStatic = false;
+  }
+
   InputDef(InputType type,
            string nameIn,
            size_t dimIn,
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index e1e8e7fae7ca4..5a1c385e82acb 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -902,12 +902,16 @@ TEST(Layer, SequenceReshapeLayer) {
   }
 }
 
-TEST(Layer, ConvShiftLayer) {
+void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
   TestConfig config;
   config.layerConfig.set_type("conv_shift");
   config.layerConfig.set_size(10);
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  if (trans_type == "non-seq")
+    config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  else
+    config.inputDefs.push_back(
+        {INPUT_SEQUENCE_MNUM_DATA, "layer_0", 10, 0, maxLen});
   config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
   config.layerConfig.add_inputs();
   config.layerConfig.add_inputs();
@@ -916,6 +920,11 @@ TEST(Layer, ConvShiftLayer) {
   testLayerGrad(config, "conv_shift", 100, false, false);
 }
 
+TEST(Layer, ConvShiftLayer) {
+  testConvShiftLayer("non-seq");
+  testConvShiftLayer("seq", 5);
+}
+
 TEST(Layer, PowerLayer) {
   TestConfig config;
   config.layerConfig.set_type("power");

From aa4ac875524e3e267ca19c0d1e0878da0604bd0b Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Wed, 24 May 2017 14:37:04 +0800
Subject: [PATCH 5/7] remove sigmoid activation

---
 paddle/gserver/tests/LayerGradUtil.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
index beac97bf4b10e..9b5f25ed8fe63 100644
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -403,9 +403,6 @@ void initDataLayer(TestConfig testConf,
         fillData(trans, layer->getSize(), pos);
         data.value->randomizeUniform();
         data.value->add(-0.5);
-        if (testLayerName != "prelu") {
-          data.value->sigmoid(*data.value);
-        }
         data.grad->zeroMem();
         break;
       }

From a4e5e66bd9ea0e339ae7c179fb959e34f6d6e2cd Mon Sep 17 00:00:00 2001
From: yangyaming <mxscmxsc@gmail.com>
Date: Thu, 6 Jul 2017 15:13:57 +0800
Subject: [PATCH 6/7] Remove INPUT_SEQUENCE_MNUM_DATA type in LayerGradUtil

---
 paddle/gserver/tests/LayerGradUtil.cpp  | 23 +----------------------
 paddle/gserver/tests/LayerGradUtil.h    | 18 +-----------------
 paddle/gserver/tests/test_LayerGrad.cpp | 13 ++-----------
 3 files changed, 4 insertions(+), 50 deletions(-)

diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
index 9b5f25ed8fe63..a0b1cd471dd02 100644
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -387,25 +387,6 @@ void initDataLayer(TestConfig testConf,
         data.value->sigmoid(*data.value);
         data.grad->zeroMem();
         break;
-      case INPUT_SEQUENCE_MNUM_DATA: {
-        // first calculate height
-        sequenceStartPositions =
-            ICpuGpuVector::create(batchSize + 1, /*useGpu=*/false);
-        int seqLen = 0;
-        int* buf = sequenceStartPositions->getMutableData(false);
-        int64_t pos = 0;
-        for (size_t j = 0; j < batchSize; ++j) {
-          seqLen = uniformRandom(testConf.inputDefs[i].maxLen) + 1;
-          buf[j] = pos;
-          pos += seqLen;
-        }
-        buf[batchSize] = pos;
-        fillData(trans, layer->getSize(), pos);
-        data.value->randomizeUniform();
-        data.value->add(-0.5);
-        data.grad->zeroMem();
-        break;
-      }
       default:
         LOG(FATAL) << " unknown inputType ";
         return;
@@ -413,12 +394,10 @@ void initDataLayer(TestConfig testConf,
     if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
         testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA ||
         testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL ||
-        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA ||
-        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MNUM_DATA) {
+        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) {
       if (!sequenceStartPositions) {
         generateSequenceStartPositions(batchSize, sequenceStartPositions);
       }
-
       data.sequenceStartPositions = sequenceStartPositions;
     }
     if (testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA) {
diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h
index 6c21fecf5d8a1..9f68eb64d0b4a 100644
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
@@ -31,8 +31,7 @@ enum InputType {
   INPUT_SEQUENCE_LABEL,
   INPUT_SPARSE_NON_VALUE_DATA,
   INPUT_SPARSE_FLOAT_VALUE_DATA,
-  INPUT_DENSE_DIM_DATA,      // using sequence length to init dense data
-  INPUT_SEQUENCE_MNUM_DATA,  // regard batchSize as sequence number
+  INPUT_DENSE_DIM_DATA,  // using sequence length to init dense data
 };
 
 struct ParaSparse {
@@ -63,7 +62,6 @@ struct InputDef {
   string name;
   size_t dim;
   size_t paraSize;
-  size_t maxLen;  // maximum length of sequence data
   ParaSparse sparse;
   bool isStatic;
   std::vector<int> labelInitValue;
@@ -78,20 +76,6 @@ struct InputDef {
     isStatic = false;
   }
 
-  InputDef(InputType type,
-           string nameIn,
-           size_t dimIn,
-           size_t sizeIn,
-           size_t maxSeqLen) {
-    inputType = type;
-    name = nameIn;
-    dim = dimIn;
-    paraSize = sizeIn;
-    maxLen = maxSeqLen;
-    sparse = {""};
-    isStatic = false;
-  }
-
   InputDef(InputType type,
            string nameIn,
            size_t dimIn,
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 5a1c385e82acb..e1e8e7fae7ca4 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -902,16 +902,12 @@ TEST(Layer, SequenceReshapeLayer) {
   }
 }
 
-void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
+TEST(Layer, ConvShiftLayer) {
   TestConfig config;
   config.layerConfig.set_type("conv_shift");
   config.layerConfig.set_size(10);
 
-  if (trans_type == "non-seq")
-    config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
-  else
-    config.inputDefs.push_back(
-        {INPUT_SEQUENCE_MNUM_DATA, "layer_0", 10, 0, maxLen});
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
   config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
   config.layerConfig.add_inputs();
   config.layerConfig.add_inputs();
@@ -920,11 +916,6 @@ void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
   testLayerGrad(config, "conv_shift", 100, false, false);
 }
 
-TEST(Layer, ConvShiftLayer) {
-  testConvShiftLayer("non-seq");
-  testConvShiftLayer("seq", 5);
-}
-
 TEST(Layer, PowerLayer) {
   TestConfig config;
   config.layerConfig.set_type("power");

From eeccac134e395f1c935d0be760712b456f23b8e8 Mon Sep 17 00:00:00 2001
From: yangyaming <mxscmxsc@gmail.com>
Date: Thu, 6 Jul 2017 16:04:34 +0800
Subject: [PATCH 7/7] Use SELF_DEFINE_DATA to build unit test for
 ConvShiftLayer.

---
 paddle/gserver/tests/test_LayerGrad.cpp | 31 ++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 67251f08e34fa..833fcab473b23 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -910,18 +910,43 @@ TEST(Layer, SequenceReshapeLayer) {
   }
 }
 
-TEST(Layer, ConvShiftLayer) {
+void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
   TestConfig config;
   config.layerConfig.set_type("conv_shift");
   config.layerConfig.set_size(10);
+  size_t batch_size = 100;
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  if (trans_type == "non-seq") {
+    config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  } else {
+    // Generate sequence data
+    vector<int> seqStartPositions(batch_size + 1, 0);
+    int seqLen = 0;
+    size_t pos = 0;
+    for (size_t i = 0; i < batch_size; ++i) {
+      seqLen = uniformRandom(maxLen) + 1;
+      seqStartPositions[i] = pos;
+      pos += seqLen;
+    }
+    seqStartPositions[batch_size] = pos;
+
+    MatrixPtr matValuePtr = Matrix::create(pos, 10, false, false);
+    matValuePtr->randomizeUniform();
+
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, "layer_0", matValuePtr, seqStartPositions});
+  }
   config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
   config.layerConfig.add_inputs();
   config.layerConfig.add_inputs();
 
   // Not support GPU now
-  testLayerGrad(config, "conv_shift", 100, false, false);
+  testLayerGrad(config, "conv_shift", batch_size, false, false);
+}
+
+TEST(Layer, ConvShiftLayer) {
+  testConvShiftLayer("non-seq");
+  testConvShiftLayer("seq", 5);
 }
 
 TEST(Layer, PowerLayer) {