Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LODTensor (Level of details, or Level of sequences Tensor). #3109

Merged
merged 29 commits into from
Aug 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0e91e08
init lottensor
Superjomn Jul 29, 2017
0ba4f1d
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Jul 29, 2017
246a83a
Merge branch 'fix_rnn' into lottensor
Superjomn Jul 30, 2017
32c948a
init lottensor
Superjomn Jul 30, 2017
f2c2a64
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Jul 30, 2017
d9c4074
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Jul 31, 2017
54f4749
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 1, 2017
660fc06
finish lodtensor
Superjomn Aug 1, 2017
4dcc825
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 1, 2017
a1342c8
add lodtensor
Superjomn Aug 1, 2017
c4e751a
add reshape of lod
Superjomn Aug 2, 2017
6a0953c
recover .clang-format
Superjomn Aug 2, 2017
76a9c8b
fix test lod
Superjomn Aug 3, 2017
018f121
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 3, 2017
9cd8033
add details
Superjomn Aug 3, 2017
62cab87
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 3, 2017
71108f8
rename Elements/Levels
Superjomn Aug 3, 2017
0607a28
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 4, 2017
8e970b3
size_t and vector reserve
Superjomn Aug 4, 2017
df75fe2
add details
Superjomn Aug 4, 2017
14858cd
add const& std::shared_ptr
Superjomn Aug 4, 2017
1c64657
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 5, 2017
9319c1e
add lod_tensor_impl.h
Superjomn Aug 5, 2017
9c84313
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 7, 2017
52adce2
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 8, 2017
adfe84d
fix compile error
Superjomn Aug 8, 2017
180499c
fix thrust vector
Superjomn Aug 8, 2017
98a3de5
remove a shared_ptr
Superjomn Aug 8, 2017
710d863
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into lottensor
Superjomn Aug 8, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions paddle/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)

cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)

cc_test(variable_test SRCS variable_test.cc)

cc_library(scope SRCS scope.cc)
Expand Down
62 changes: 62 additions & 0 deletions paddle/framework/details/lod_tensor.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/framework/lod_tensor.h"

#include <memory>

namespace paddle {
namespace framework {
namespace details {

using LOD = LODTensor::LOD;

std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level_begin,
size_t level_end) {
auto new_lod = std::make_shared<LOD>();
new_lod->reserve(level_end - level_begin);
for (size_t i = level_begin; i < level_end; i++) {
new_lod->emplace_back(lod[i]);
}
return new_lod;
}

std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level, size_t elem_begin,
size_t elem_end, bool tensor_shared) {
// slice the lod.
auto new_lod = std::make_shared<LOD>();
new_lod->reserve(lod.size() - level);
auto start = lod.at(level)[elem_begin];
auto end = lod.at(level)[elem_end];

for (auto it = lod.begin() + level; it != lod.end(); it++) {
auto it_begin = std::find(it->begin(), it->end(), start);
auto it_end = std::find(it_begin, it->end(), end);
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
new_lod->emplace_back(it_begin, it_end + 1);
if (!tensor_shared) {
// reset offset if tensor is copyed and sliced.
std::transform(new_lod->back().begin(), new_lod->back().end(),
new_lod->back().begin(),
[start](int v) { return v - start; });
PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD");
}
}
return new_lod;
}

} // namespace details
} // namespace framework
} // namespace paddle
46 changes: 46 additions & 0 deletions paddle/framework/details/lod_tensor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <memory>

namespace paddle {
namespace framework {
namespace details {

/*
* Slice levels from LOD.
*
* @lod: LOD to slice.
* @level_begin: level to begin slice.
* @level_end: level to end slice.
*/
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
size_t level_begin, size_t level_end);

/*
* Slice elements from a level of LOD.
*
* @lod: LOD to slice.
* @level: which level to slice.
* @elem_begin: element's index to begin slice.
* @elem_end: element's index to end slice.
*/
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
size_t level, size_t elem_begin,
size_t elem_end, bool tensor_shared);
} // namespace details
} // namespace framework
} // namespace paddle
51 changes: 51 additions & 0 deletions paddle/framework/lod_tensor.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/framework/lod_tensor.h"

#include <glog/logging.h>

namespace paddle {
namespace framework {

LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
// slice levels just need to update LOD info, each level will contains the
// whole tensor_, so no need to modify tensor_.
return LODTensor(tensor_, new_lod);
}

LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin,
size_t elem_end) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

一个LODTensor instance里不是一定总有LOD 信息的?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tensor* output = Input(0)->Clone();

这里是否应该改成

output_var.clone(input_var); // clone the tensor type
Tensor* output = output_var.Get<Tensor>();

@wangkuiyi

PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
"element begin [%d] out of range [%d]", elem_begin,
NumElements(level));
PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
"element end [%d] out of range [%d]", elem_end,
NumElements(level));

auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
true /*tensor_shared*/);

// slice elements just need to update LOD info, because offsets are not
// changed, so the original tensor_ can be reused.
return LODTensor(tensor_, new_lod);
}

} // namespace framework
} // namespace paddle
145 changes: 145 additions & 0 deletions paddle/framework/lod_tensor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <memory>
#if (!PADDLE_ONLY_CPU)
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#endif

#include "paddle/framework/ddim.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/enforce.h"

namespace paddle {
namespace framework {

/*
* LODTensor (Level of details Tensor)
* see https://en.wikipedia.org/wiki/Level_of_details for reference.
*/
class LODTensor {
public:
// Level save offsets of each unit.
#ifdef PADDLE_ONLY_CPU
using Level = std::vector<size_t>;
#else
using Level = thrust::device_vector<size_t>;
#endif
// LOD stores offsets of each level of units, the largest units level first,
// then the smaller units level. Each Level stores the offsets of units in
// Tesor.
typedef std::vector<Level> LOD;

LODTensor() {}
LODTensor(const std::shared_ptr<Tensor> &tensor,
const std::shared_ptr<LOD> &lod) {
Reset(tensor, lod);
}

void Reset(const std::shared_ptr<Tensor> &tensor,
const std::shared_ptr<LOD> &lod) {
tensor_ = tensor;
lod_start_pos_ = lod;
}

/*
* Get a element from LOD.
*/
size_t lod_element(size_t level, size_t elem) const {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lod_element => StartPosition

PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem < NumElements(level),
"element begin [%d] out of range [%d]", elem,
NumElements(level));
return (*lod_start_pos_)[level][elem];
}

/*
* Number of LODTensor's levels, each level has units of data, for example,
* in the sentence's view, article, paragraph, sentence are 3 levels.
*/
size_t NumLevels() const {
return lod_start_pos_ ? lod_start_pos_->size() : 0UL;
}
/*
* Number of elements in a level.
*/
size_t NumElements(size_t level = 0) const {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NumElements => ElementsOfLevel

PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
// the last offset is the end of last element
return lod_start_pos_->at(level).size() - 1;
}

/*
* Slice of levels[level_begin:level_end], with tensor copied.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// Copy the slice of data from *this into a new LODTensor and return the new LODTensor.

*/
template <typename T>
LODTensor SliceCopied(size_t level_begin, size_t level_end,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T doesn't appear in the definition of SliceCopied. Do we really need this method a function template?

I have the same question with SliceShared.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tensor's CopyFrom need a T, SliceShared's T will be removed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this method slice some levels, or elements in a level?

const platform::Place &dst_place) const;

/*
* Slice of levels[level_begin:level_end], with tensor shared.
*/
LODTensor SliceShared(size_t level_begin, size_t level_end) const;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// Returns a new LODTensor that shares data with *this but provides a view of the specified slice.


/*
* Slice of elements of a level, [elem_begin: elem_end], with tensor copied.
* @note: low performance in slice lod_start_pos_.
*/
template <typename T>
LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end,
const platform::Place &dst_place) const;

/*
* Slice of elements of a level, [elem_begin: elem_end], with tensor shared.
* @note: low performance in slice lod_start_pos_.
*/
LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const;

/*
* Copy other's lod_start_pos_, to share LOD info.
* @note: the LOD info should not be changed.
*/
void ShareLOD(const LODTensor &other) {
lod_start_pos_ = other.lod_start_pos_;
}

/*
* Copy other's lod_start_pos_'s content, free to mutate.
*/
void CopyLOD(const LODTensor &other) {
lod_start_pos_ = std::make_shared<LOD>(*other.lod_start_pos_);
}
/*
* Determine whether LODTensor has a valid LOD info.
*/
bool HasLOD() const { return bool(lod_start_pos_); }
LOD *lod() const { return lod_start_pos_.get(); }

std::shared_ptr<Tensor> &tensor() { return tensor_; }
Tensor *raw_tensor() { return tensor_.get(); }

private:
std::shared_ptr<LOD> lod_start_pos_;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lod_start_pos_ = > lod_

std::shared_ptr<Tensor> tensor_;
};

} // namespace framework
} // namespace paddle

#include "paddle/framework/lod_tensor_impl.h"
60 changes: 60 additions & 0 deletions paddle/framework/lod_tensor_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/framework/details/lod_tensor.h"

namespace paddle {
namespace framework {

template <typename T>
LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end,
const platform::Place &dst_place) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
auto new_tensor = std::make_shared<Tensor>();
new_tensor->CopyFrom<T>(*tensor_, dst_place);

return LODTensor(new_tensor, new_lod);
}

template <typename T>
LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin,
size_t elem_end,
const platform::Place &dst_place) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
"element begin [%d] out of range [%d]", elem_begin,
NumElements(level));
PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
"element end [%d] out of range [%d]", elem_end,
NumElements(level));

auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
false /*tensor_shared*/);

auto start_idx = new_lod->front().front();
auto end_idx = new_lod->front().back() - 1 /*the next element's start*/;
auto sliced_tensor = tensor_->Slice<T>(start_idx, end_idx);
auto new_tensor = std::make_shared<Tensor>();
new_tensor->CopyFrom<T>(sliced_tensor, dst_place);

return LODTensor(new_tensor, new_lod);
}

} // namespace framework
} // namespace paddle
Loading