-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LODTensor (Level of details, or Level of sequences Tensor). #3109
Changes from all commits
0e91e08
0ba4f1d
246a83a
32c948a
f2c2a64
d9c4074
54f4749
660fc06
4dcc825
a1342c8
c4e751a
6a0953c
76a9c8b
018f121
9cd8033
62cab87
71108f8
0607a28
8e970b3
df75fe2
14858cd
1c64657
9319c1e
9c84313
52adce2
adfe84d
180499c
98a3de5
710d863
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/framework/lod_tensor.h" | ||
|
||
#include <memory> | ||
|
||
namespace paddle { | ||
namespace framework { | ||
namespace details { | ||
|
||
using LOD = LODTensor::LOD; | ||
|
||
std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level_begin, | ||
size_t level_end) { | ||
auto new_lod = std::make_shared<LOD>(); | ||
new_lod->reserve(level_end - level_begin); | ||
for (size_t i = level_begin; i < level_end; i++) { | ||
new_lod->emplace_back(lod[i]); | ||
} | ||
return new_lod; | ||
} | ||
|
||
std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level, size_t elem_begin, | ||
size_t elem_end, bool tensor_shared) { | ||
// slice the lod. | ||
auto new_lod = std::make_shared<LOD>(); | ||
new_lod->reserve(lod.size() - level); | ||
auto start = lod.at(level)[elem_begin]; | ||
auto end = lod.at(level)[elem_end]; | ||
|
||
for (auto it = lod.begin() + level; it != lod.end(); it++) { | ||
auto it_begin = std::find(it->begin(), it->end(), start); | ||
auto it_end = std::find(it_begin, it->end(), end); | ||
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); | ||
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); | ||
new_lod->emplace_back(it_begin, it_end + 1); | ||
if (!tensor_shared) { | ||
// reset offset if tensor is copyed and sliced. | ||
std::transform(new_lod->back().begin(), new_lod->back().end(), | ||
new_lod->back().begin(), | ||
[start](int v) { return v - start; }); | ||
PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD"); | ||
} | ||
} | ||
return new_lod; | ||
} | ||
|
||
} // namespace details | ||
} // namespace framework | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
|
||
namespace paddle { | ||
namespace framework { | ||
namespace details { | ||
|
||
/* | ||
* Slice levels from LOD. | ||
* | ||
* @lod: LOD to slice. | ||
* @level_begin: level to begin slice. | ||
* @level_end: level to end slice. | ||
*/ | ||
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod, | ||
size_t level_begin, size_t level_end); | ||
|
||
/* | ||
* Slice elements from a level of LOD. | ||
* | ||
* @lod: LOD to slice. | ||
* @level: which level to slice. | ||
* @elem_begin: element's index to begin slice. | ||
* @elem_end: element's index to end slice. | ||
*/ | ||
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod, | ||
size_t level, size_t elem_begin, | ||
size_t elem_end, bool tensor_shared); | ||
} // namespace details | ||
} // namespace framework | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/framework/lod_tensor.h" | ||
|
||
#include <glog/logging.h> | ||
|
||
namespace paddle { | ||
namespace framework { | ||
|
||
LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const { | ||
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); | ||
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); | ||
// slice levels just need to update LOD info, each level will contains the | ||
// whole tensor_, so no need to modify tensor_. | ||
return LODTensor(tensor_, new_lod); | ||
} | ||
|
||
LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin, | ||
size_t elem_end) const { | ||
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); | ||
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, | ||
NumLevels()); | ||
PADDLE_ENFORCE(elem_begin < NumElements(level), | ||
"element begin [%d] out of range [%d]", elem_begin, | ||
NumElements(level)); | ||
PADDLE_ENFORCE(elem_end < NumElements(level) + 1, | ||
"element end [%d] out of range [%d]", elem_end, | ||
NumElements(level)); | ||
|
||
auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, | ||
true /*tensor_shared*/); | ||
|
||
// slice elements just need to update LOD info, because offsets are not | ||
// changed, so the original tensor_ can be reused. | ||
return LODTensor(tensor_, new_lod); | ||
} | ||
|
||
} // namespace framework | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
#if (!PADDLE_ONLY_CPU) | ||
#include <thrust/device_vector.h> | ||
#include <thrust/host_vector.h> | ||
#endif | ||
|
||
#include "paddle/framework/ddim.h" | ||
#include "paddle/framework/tensor.h" | ||
#include "paddle/platform/enforce.h" | ||
|
||
namespace paddle { | ||
namespace framework { | ||
|
||
/* | ||
* LODTensor (Level of details Tensor) | ||
* see https://en.wikipedia.org/wiki/Level_of_details for reference. | ||
*/ | ||
class LODTensor { | ||
public: | ||
// Level save offsets of each unit. | ||
#ifdef PADDLE_ONLY_CPU | ||
using Level = std::vector<size_t>; | ||
#else | ||
using Level = thrust::device_vector<size_t>; | ||
#endif | ||
// LOD stores offsets of each level of units, the largest units level first, | ||
// then the smaller units level. Each Level stores the offsets of units in | ||
// Tesor. | ||
typedef std::vector<Level> LOD; | ||
|
||
LODTensor() {} | ||
LODTensor(const std::shared_ptr<Tensor> &tensor, | ||
const std::shared_ptr<LOD> &lod) { | ||
Reset(tensor, lod); | ||
} | ||
|
||
void Reset(const std::shared_ptr<Tensor> &tensor, | ||
const std::shared_ptr<LOD> &lod) { | ||
tensor_ = tensor; | ||
lod_start_pos_ = lod; | ||
} | ||
|
||
/* | ||
* Get a element from LOD. | ||
*/ | ||
size_t lod_element(size_t level, size_t elem) const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lod_element => StartPosition |
||
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, | ||
NumLevels()); | ||
PADDLE_ENFORCE(elem < NumElements(level), | ||
"element begin [%d] out of range [%d]", elem, | ||
NumElements(level)); | ||
return (*lod_start_pos_)[level][elem]; | ||
} | ||
|
||
/* | ||
* Number of LODTensor's levels, each level has units of data, for example, | ||
* in the sentence's view, article, paragraph, sentence are 3 levels. | ||
*/ | ||
size_t NumLevels() const { | ||
return lod_start_pos_ ? lod_start_pos_->size() : 0UL; | ||
} | ||
/* | ||
* Number of elements in a level. | ||
*/ | ||
size_t NumElements(size_t level = 0) const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NumElements => ElementsOfLevel |
||
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, | ||
NumLevels()); | ||
// the last offset is the end of last element | ||
return lod_start_pos_->at(level).size() - 1; | ||
} | ||
|
||
/* | ||
* Slice of levels[level_begin:level_end], with tensor copied. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. // Copy the slice of data from *this into a new LODTensor and return the new LODTensor. |
||
*/ | ||
template <typename T> | ||
LODTensor SliceCopied(size_t level_begin, size_t level_end, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I have the same question with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tensor's CopyFrom need a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this method slice some levels, or elements in a level? |
||
const platform::Place &dst_place) const; | ||
|
||
/* | ||
* Slice of levels[level_begin:level_end], with tensor shared. | ||
*/ | ||
LODTensor SliceShared(size_t level_begin, size_t level_end) const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. // Returns a new LODTensor that shares data with *this but provides a view of the specified slice. |
||
|
||
/* | ||
* Slice of elements of a level, [elem_begin: elem_end], with tensor copied. | ||
* @note: low performance in slice lod_start_pos_. | ||
*/ | ||
template <typename T> | ||
LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end, | ||
const platform::Place &dst_place) const; | ||
|
||
/* | ||
* Slice of elements of a level, [elem_begin: elem_end], with tensor shared. | ||
* @note: low performance in slice lod_start_pos_. | ||
*/ | ||
LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const; | ||
|
||
/* | ||
* Copy other's lod_start_pos_, to share LOD info. | ||
* @note: the LOD info should not be changed. | ||
*/ | ||
void ShareLOD(const LODTensor &other) { | ||
lod_start_pos_ = other.lod_start_pos_; | ||
} | ||
|
||
/* | ||
* Copy other's lod_start_pos_'s content, free to mutate. | ||
*/ | ||
void CopyLOD(const LODTensor &other) { | ||
lod_start_pos_ = std::make_shared<LOD>(*other.lod_start_pos_); | ||
} | ||
/* | ||
* Determine whether LODTensor has a valid LOD info. | ||
*/ | ||
bool HasLOD() const { return bool(lod_start_pos_); } | ||
LOD *lod() const { return lod_start_pos_.get(); } | ||
|
||
std::shared_ptr<Tensor> &tensor() { return tensor_; } | ||
Tensor *raw_tensor() { return tensor_.get(); } | ||
|
||
private: | ||
std::shared_ptr<LOD> lod_start_pos_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lod_start_pos_ = > lod_ |
||
std::shared_ptr<Tensor> tensor_; | ||
}; | ||
|
||
} // namespace framework | ||
} // namespace paddle | ||
|
||
#include "paddle/framework/lod_tensor_impl.h" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include "paddle/framework/details/lod_tensor.h" | ||
|
||
namespace paddle { | ||
namespace framework { | ||
|
||
template <typename T> | ||
LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end, | ||
const platform::Place &dst_place) const { | ||
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); | ||
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); | ||
auto new_tensor = std::make_shared<Tensor>(); | ||
new_tensor->CopyFrom<T>(*tensor_, dst_place); | ||
|
||
return LODTensor(new_tensor, new_lod); | ||
} | ||
|
||
template <typename T> | ||
LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin, | ||
size_t elem_end, | ||
const platform::Place &dst_place) const { | ||
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); | ||
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, | ||
NumLevels()); | ||
PADDLE_ENFORCE(elem_begin < NumElements(level), | ||
"element begin [%d] out of range [%d]", elem_begin, | ||
NumElements(level)); | ||
PADDLE_ENFORCE(elem_end < NumElements(level) + 1, | ||
"element end [%d] out of range [%d]", elem_end, | ||
NumElements(level)); | ||
|
||
auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, | ||
false /*tensor_shared*/); | ||
|
||
auto start_idx = new_lod->front().front(); | ||
auto end_idx = new_lod->front().back() - 1 /*the next element's start*/; | ||
auto sliced_tensor = tensor_->Slice<T>(start_idx, end_idx); | ||
auto new_tensor = std::make_shared<Tensor>(); | ||
new_tensor->CopyFrom<T>(sliced_tensor, dst_place); | ||
|
||
return LODTensor(new_tensor, new_lod); | ||
} | ||
|
||
} // namespace framework | ||
} // namespace paddle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
一个LODTensor instance里不是一定总有LOD 信息的?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里是否应该改成
output_var.clone(input_var); // clone the tensor type Tensor* output = output_var.Get<Tensor>();
@wangkuiyi