Merge pull request apache#65 from piiswrong/master

multi output softmax
stefanhenneking · Oct 24, 2015 · ded43f1 · ded43f1
2 parents f00b208 + d298f4f
commit ded43f1
Show file tree

Hide file tree

Showing 4 changed files with 128 additions and 0 deletions.
diff --git a/mshadow/cuda/tensor_gpu-inl.cuh b/mshadow/cuda/tensor_gpu-inl.cuh
@@ -288,6 +288,77 @@ inline void SoftmaxGrad(Tensor<gpu, 2, DType> &dst,
        expr::MakePlan(label),
        dst.size(1));
 }
+
+template<typename DType>
+__global__ void Softmax3DGradKernel(Tensor<gpu, 3, DType> &dst,
+                                    const Tensor<gpu, 3, DType> &src,
+                                    const Tensor<gpu, 2, DType> &label) {
+  const index_t xmax = dst.size(1);
+  const int y = blockIdx.x;
+  const int n = threadIdx.x;
+
+  if (n < dst.size(2)) {
+    const int k = static_cast<int>(label[y][n]);
+    for (index_t i = 0; i < xmax; ++i) {
+      if (i == k) {
+        dst[y][i][n] = src[y][i][n] - 1.0f;
+      } else {
+        dst[y][i][n] = src[y][i][n];
+      }
+    }
+  }
+}  
+
+template<typename DType>
+__global__ void Softmax3DKernel(Tensor<gpu, 3, DType> &dst,
+                    const Tensor<gpu, 3, DType> &src) {
+  const index_t xmax = dst.size(1);
+  const int y = blockIdx.x;
+  const int n = threadIdx.x;
+
+  if (n < dst.size(2)) {
+    DType smax = src[y][0][n];
+    for (index_t i = 1; i < xmax; ++i) {
+      smax = max(smax, src[y][i][n]);
+    }
+    DType ssum = 0.0f;
+    for (index_t i = 0; i < xmax; ++i) {
+      DType p = expf(src[y][i][n] - smax);
+      ssum += p;
+      dst[y][i][n] = p;
+    }
+    for (index_t i = 0; i < xmax; ++i) {
+      dst[y][i][n] /= ssum;
+    }
+  }
+}
+
+template<typename DType>
+inline void Softmax(Tensor<gpu, 3, DType> &dst,
+                    const Tensor<gpu, 3, DType> &src) {
+  dim3 dimBlock(kBaseThreadNum);
+  dim3 dimGrid(dst.size(0), dst.size(2));
+  CHECK_EQ(dst.shape_, src.shape_) << "Softmax: shape mismatch";
+  CheckLaunchParam(dimGrid, dimBlock, "Softmax");
+  cudaStream_t stream = Stream<gpu>::GetStream(dst.stream_);
+  Softmax3DKernel<DType><<<dimGrid, dimBlock, 0, stream>>>(dst, src);
+}
+
+
+template<typename DType>
+inline void SoftmaxGrad(Tensor<gpu, 3, DType> &dst,
+                        const Tensor<gpu, 3, DType> &src,
+                        const Tensor<gpu, 2, DType> &label) {
+  dim3 dimBlock(kBaseThreadNum);
+  dim3 dimGrid(dst.size(0), dst.size(2));
+  CHECK_EQ(dst.shape_, src.shape_) << "SoftmaxGrad: shape mismatch";
+  CHECK_EQ(dst.size(0), label.size(0)) << "SoftmaxGrad: label shape mismatch";
+  CHECK_EQ(dst.size(2), label.size(1)) << "SoftmaxGrad: label shape mismatch";
+  CheckLaunchParam(dimGrid, dimBlock, "SoftmaxGrad");
+  cudaStream_t stream = Stream<gpu>::GetStream(dst.stream_);
+  Softmax3DGradKernel<DType><<<dimGrid, dimBlock, 0, stream>>>(dst, src, label);
+}
+
 }  // namespace cuda
 }  // namespace mshadow
 #endif  // MSHADOW_CUDA_TENSOR_GPU_INL_CUH_
diff --git a/mshadow/tensor_blob.h b/mshadow/tensor_blob.h
@@ -490,6 +490,10 @@ class TBlob {
   inline index_t size(index_t idx) const {
     return shape_[idx];
   }
+  /*! \brief total number of elements in the tensor */
+  inline index_t Size(void) const {
+    return shape_.Size();
+  }
   /*!
    * \brief fetch the tensor, with respect to specific dimension
    * if dim do not match the stored dimension, an error will be issued

diff --git a/mshadow/tensor_cpu-inl.h b/mshadow/tensor_cpu-inl.h
@@ -282,6 +282,24 @@ inline void SoftmaxGrad(Tensor<cpu, 2, DType> dst,
   }
 }
 
+template<typename DType>
+inline void SoftmaxGrad(Tensor<cpu, 3, DType> dst,
+                        const Tensor<cpu, 3, DType> &src,
+                        const Tensor<cpu, 2, DType> &label) {
+  for (index_t n = 0; n < dst.size(2); ++n) {
+    for (index_t y = 0; y < dst.size(0); ++y) {
+      const index_t k = static_cast<int>(label[y][n]);
+      for (index_t x = 0; x < dst.size(1); ++x) {
+        if (x == k) {
+          dst[y][k][n] = src[y][k][n] - 1.0f;
+        } else {
+          dst[y][x][n] = src[y][x][n];
+        }
+      }
+    }
+  }
+}
+
 template<typename DType>
 inline void Softmax(Tensor<cpu, 2, DType> dst,
                     const Tensor<cpu, 2, DType> &energy) {
@@ -291,6 +309,28 @@ inline void Softmax(Tensor<cpu, 2, DType> dst,
   }
 }
 
+template<typename DType>
+inline void Softmax(Tensor<cpu, 3, DType> dst,
+                    const Tensor<cpu, 3, DType> &energy) {
+  CHECK_EQ(dst.shape_, energy.shape_) << "Softmax: shape mismatch";
+  for (index_t y = 0; y < dst.size(0); ++y) {
+    for (index_t n = 0; n < dst.size(2); ++n) {
+      DType mmax = energy[y][0][n];
+      for (index_t x = 1; x < dst.size(1); ++x) {
+        if (mmax < energy[y][x][n]) mmax = energy[y][x][n];
+      }
+      DType sum = 0.0f;
+      for (index_t x = 0; x < dst.size(1); ++x) {
+        dst[y][x][n] = std::exp(energy[y][x][n] - mmax);
+        sum += dst[y][x][n];
+      }
+      for (index_t x = 0; x < dst.size(1); ++x) {
+        dst[y][x][n] /= sum;
+      }
+    }
+  }
+}
+
 template<typename DType>
 inline DType VDot(const Tensor<cpu, 1, DType> &lhs,
                   const Tensor<cpu, 1, DType> &rhs) {

diff --git a/mshadow/tensor_gpu-inl.h b/mshadow/tensor_gpu-inl.h
@@ -159,13 +159,26 @@ inline void Softmax(Tensor<gpu, 2, DType> dst,
   cuda::Softmax(dst, src);
 }
 
+template<typename DType>
+inline void Softmax(Tensor<gpu, 3, DType> dst,
+                    const Tensor<gpu, 3, DType>& src) {
+  cuda::Softmax(dst, src);
+}
+
 template<typename DType>
 inline void SoftmaxGrad(Tensor<gpu, 2, DType> dst,
                         const Tensor<gpu, 2, DType> &src,
                         const Tensor<gpu, 1, DType> &label) {
   cuda::SoftmaxGrad(dst, src, label);
 }
 
+template<typename DType>
+inline void SoftmaxGrad(Tensor<gpu, 3, DType> dst,
+                        const Tensor<gpu, 3, DType> &src,
+                        const Tensor<gpu, 2, DType> &label) {
+  cuda::SoftmaxGrad(dst, src, label);
+}
+
 }  // namespace mshadow
 #endif  // __CUDACC__
 #endif  // MSHADOW_TENSOR_GPU_INL_H_