PaddlePaddle · Superjomn · Aug 9, 2017 · Jul 29, 2017 · Jul 29, 2017 · Jul 30, 2017
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
@@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
 cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
 cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
 
+cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor)
+cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
+
 cc_test(variable_test SRCS variable_test.cc)
 
 cc_library(scope SRCS scope.cc)

diff --git a/paddle/framework/details/lod_tensor.cc b/paddle/framework/details/lod_tensor.cc
@@ -0,0 +1,62 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/lod_tensor.h"
+
+#include <memory>
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+using LOD = LODTensor::LOD;
+
+std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level_begin,
+                              size_t level_end) {
+  auto new_lod = std::make_shared<LOD>();
+  new_lod->reserve(level_end - level_begin);
+  for (size_t i = level_begin; i < level_end; i++) {
+    new_lod->emplace_back(lod[i]);
+  }
+  return new_lod;
+}
+
+std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level, size_t elem_begin,
+                              size_t elem_end, bool tensor_shared) {
+  // slice the lod.
+  auto new_lod = std::make_shared<LOD>();
+  new_lod->reserve(lod.size() - level);
+  auto start = lod.at(level)[elem_begin];
+  auto end = lod.at(level)[elem_end];
+
+  for (auto it = lod.begin() + level; it != lod.end(); it++) {
+    auto it_begin = std::find(it->begin(), it->end(), start);
+    auto it_end = std::find(it_begin, it->end(), end);
+    PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
+    PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
+    new_lod->emplace_back(it_begin, it_end + 1);
+    if (!tensor_shared) {
+      // reset offset if tensor is copyed and sliced.
+      std::transform(new_lod->back().begin(), new_lod->back().end(),
+                     new_lod->back().begin(),
+                     [start](int v) { return v - start; });
+      PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD");
+    }
+  }
+  return new_lod;
+}
+
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/framework/details/lod_tensor.h b/paddle/framework/details/lod_tensor.h
@@ -0,0 +1,46 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include <memory>
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+/*
+ * Slice levels from LOD.
+ *
+ * @lod: LOD to slice.
+ * @level_begin: level to begin slice.
+ * @level_end: level to end slice.
+ */
+std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
+                                         size_t level_begin, size_t level_end);
+
+/*
+ * Slice elements from a level of LOD.
+ *
+ * @lod: LOD to slice.
+ * @level: which level to slice.
+ * @elem_begin: element's index to begin slice.
+ * @elem_end: element's index to end slice.
+ */
+std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
+                                         size_t level, size_t elem_begin,
+                                         size_t elem_end, bool tensor_shared);
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
@@ -0,0 +1,51 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/lod_tensor.h"
+
+#include <glog/logging.h>
+
+namespace paddle {
+namespace framework {
+
+LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const {
+  PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
+  auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
+  // slice levels just need to update LOD info, each level will contains the
+  // whole tensor_, so no need to modify tensor_.
+  return LODTensor(tensor_, new_lod);
+}
+
+LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin,
+                                 size_t elem_end) const {
+  PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
+  PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
+                 NumLevels());
+  PADDLE_ENFORCE(elem_begin < NumElements(level),
+                 "element begin [%d] out of range [%d]", elem_begin,
+                 NumElements(level));
+  PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
+                 "element end [%d] out of range [%d]", elem_end,
+                 NumElements(level));
+
+  auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
+                                   true /*tensor_shared*/);
+
+  // slice elements just need to update LOD info, because offsets are not
+  // changed, so the original tensor_ can be reused.
+  return LODTensor(tensor_, new_lod);
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
@@ -0,0 +1,145 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include <memory>
+#if (!PADDLE_ONLY_CPU)
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#endif
+
+#include "paddle/framework/ddim.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+
+/*
+ * LODTensor (Level of details Tensor)
+ * see https://en.wikipedia.org/wiki/Level_of_details for reference.
+ */
+class LODTensor {
+ public:
+// Level save offsets of each unit.
+#ifdef PADDLE_ONLY_CPU
+  using Level = std::vector<size_t>;
+#else
+  using Level = thrust::device_vector<size_t>;
+#endif
+  // LOD stores offsets of each level of units, the largest units level first,
+  // then the smaller units level. Each Level stores the offsets of units in
+  // Tesor.
+  typedef std::vector<Level> LOD;
+
+  LODTensor() {}
+  LODTensor(const std::shared_ptr<Tensor> &tensor,
+            const std::shared_ptr<LOD> &lod) {
+    Reset(tensor, lod);
+  }
+
+  void Reset(const std::shared_ptr<Tensor> &tensor,
+             const std::shared_ptr<LOD> &lod) {
+    tensor_ = tensor;
+    lod_start_pos_ = lod;
+  }
+
+  /*
+   * Get a element from LOD.
+   */
+  size_t lod_element(size_t level, size_t elem) const {
+    PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
+                   NumLevels());
+    PADDLE_ENFORCE(elem < NumElements(level),
+                   "element begin [%d] out of range [%d]", elem,
+                   NumElements(level));
+    return (*lod_start_pos_)[level][elem];
+  }
+
+  /*
+   * Number of LODTensor's levels, each level has units of data, for example,
+   * in the sentence's view, article, paragraph, sentence are 3 levels.
+   */
+  size_t NumLevels() const {
+    return lod_start_pos_ ? lod_start_pos_->size() : 0UL;
+  }
+  /*
+   * Number of elements in a level.
+   */
+  size_t NumElements(size_t level = 0) const {
+    PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
+                   NumLevels());
+    // the last offset is the end of last element
+    return lod_start_pos_->at(level).size() - 1;
+  }
+
+  /*
+   * Slice of levels[level_begin:level_end], with tensor copied.
+   */
+  template <typename T>
+  LODTensor SliceCopied(size_t level_begin, size_t level_end,
+                        const platform::Place &dst_place) const;
+
+  /*
+   * Slice of levels[level_begin:level_end], with tensor shared.
+   */
+  LODTensor SliceShared(size_t level_begin, size_t level_end) const;
+
+  /*
+   * Slice of elements of a level, [elem_begin: elem_end], with tensor copied.
+   * @note: low performance in slice lod_start_pos_.
+   */
+  template <typename T>
+  LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end,
+                        const platform::Place &dst_place) const;
+
+  /*
+   * Slice of elements of a level, [elem_begin: elem_end], with tensor shared.
+   * @note: low performance in slice lod_start_pos_.
+   */
+  LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const;
+
+  /*
+   * Copy other's lod_start_pos_, to share LOD info.
+   * @note: the LOD info should not be changed.
+   */
+  void ShareLOD(const LODTensor &other) {
+    lod_start_pos_ = other.lod_start_pos_;
+  }
+
+  /*
+   * Copy other's lod_start_pos_'s content, free to mutate.
+   */
+  void CopyLOD(const LODTensor &other) {
+    lod_start_pos_ = std::make_shared<LOD>(*other.lod_start_pos_);
+  }
+  /*
+   * Determine whether LODTensor has a valid LOD info.
+   */
+  bool HasLOD() const { return bool(lod_start_pos_); }
+  LOD *lod() const { return lod_start_pos_.get(); }
+
+  std::shared_ptr<Tensor> &tensor() { return tensor_; }
+  Tensor *raw_tensor() { return tensor_.get(); }
+
+ private:
+  std::shared_ptr<LOD> lod_start_pos_;
+  std::shared_ptr<Tensor> tensor_;
+};
+
+}  // namespace framework
+}  // namespace paddle
+
+#include "paddle/framework/lod_tensor_impl.h"
diff --git a/paddle/framework/lod_tensor_impl.h b/paddle/framework/lod_tensor_impl.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/details/lod_tensor.h"
+
+namespace paddle {
+namespace framework {
+
+template <typename T>
+LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end,
+                                 const platform::Place &dst_place) const {
+  PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
+  auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
+  auto new_tensor = std::make_shared<Tensor>();
+  new_tensor->CopyFrom<T>(*tensor_, dst_place);
+
+  return LODTensor(new_tensor, new_lod);
+}
+
+template <typename T>
+LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin,
+                                 size_t elem_end,
+                                 const platform::Place &dst_place) const {
+  PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
+  PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
+                 NumLevels());
+  PADDLE_ENFORCE(elem_begin < NumElements(level),
+                 "element begin [%d] out of range [%d]", elem_begin,
+                 NumElements(level));
+  PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
+                 "element end [%d] out of range [%d]", elem_end,
+                 NumElements(level));
+
+  auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
+                                   false /*tensor_shared*/);
+
+  auto start_idx = new_lod->front().front();
+  auto end_idx = new_lod->front().back() - 1 /*the next element's start*/;
+  auto sliced_tensor = tensor_->Slice<T>(start_idx, end_idx);
+  auto new_tensor = std::make_shared<Tensor>();
+  new_tensor->CopyFrom<T>(sliced_tensor, dst_place);
+
+  return LODTensor(new_tensor, new_lod);
+}
+
+}  // namespace framework
+}  // namespace paddle