Skip to content

Commit

Permalink
[Inference] Add TryShrinkMemory interface. (#28409)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiweibo authored Nov 11, 2020
1 parent 26d292b commit 1bf4836
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 4 deletions.
15 changes: 13 additions & 2 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,10 @@ bool AnalysisPredictor::PrepareScope(
status_is_cloned_ = true;
} else {
paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope());
scope_.reset(new paddle::framework::Scope(), [&](framework::Scope *scope) {
delete scope;
memory::Release(place_);
});
status_is_cloned_ = false;
}
sub_scope_ = &scope_->NewScope();
Expand Down Expand Up @@ -591,7 +594,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
gflags.push_back("--allocator_strategy=thread_local");
process_level_allocator_enabled = false;
} else {
gflags.push_back("--allocator_strategy=naive_best_fit");
process_level_allocator_enabled = true;
}

Expand Down Expand Up @@ -890,6 +892,11 @@ bool AnalysisPredictor::LoadParameters() {
return true;
}

uint64_t AnalysisPredictor::TryShrinkMemory() {
ClearIntermediateTensor();
return paddle::memory::Release(place_);
}

void AnalysisPredictor::ClearIntermediateTensor() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
platform::errors::PreconditionNotMet(
Expand Down Expand Up @@ -985,6 +992,8 @@ AnalysisPredictor::~AnalysisPredictor() {
mkldnn_quantizer_ = nullptr;
}
#endif

memory::Release(place_);
}

std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
Expand Down Expand Up @@ -1142,6 +1151,8 @@ void Predictor::ClearIntermediateTensor() {
predictor_->ClearIntermediateTensor();
}

uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }

int GetNumBytesOfDataType(DataType dtype) {
switch (dtype) {
case DataType::FLOAT32:
Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,17 @@ class AnalysisPredictor : public PaddlePredictor {
///
void ClearIntermediateTensor();

///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory() override;

///
/// \brief Get the argument used by predictor
///
Expand Down
46 changes: 44 additions & 2 deletions paddle/fluid/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
auto* out_data = out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
LOG(INFO) << "output_data: " << out_data;
predictor->TryShrinkMemory();
}

TEST(AnalysisPredictor, Clone) {
Expand Down Expand Up @@ -253,8 +254,7 @@ class MkldnnQuantizerTest : public testing::Test {
public:
MkldnnQuantizerTest() {
AnalysisConfig config(FLAGS_dirname);

predictor.reset(new AnalysisPredictor(config));
predictor = std::move(CreatePaddlePredictor(config));
auto* predictor_p = static_cast<AnalysisPredictor*>(predictor.get());

auto qconfig = new MkldnnQuantizerConfig();
Expand Down Expand Up @@ -507,3 +507,45 @@ TEST(AnalysisPredictor, bf16_pass_strategy) {
}

} // namespace paddle

namespace paddle_infer {

TEST(Predictor, Run) {
Config config;
config.SetModel(FLAGS_dirname);

auto predictor = CreatePredictor(config);

auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");

w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});

auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);

for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}

predictor->Run();

auto out = predictor->GetOutputHandle("fc_1.tmp_2");
PlaceType place;
int size = 0;
out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
predictor->TryShrinkMemory();
}

} // namespace paddle_infer
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/api_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ TEST(paddle_inference_api, demo) {
auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> outputs;
predictor->Run({}, &outputs);
predictor->TryShrinkMemory();
}

TEST(paddle_inference_api, get_version) {
Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/inference/api/paddle_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,17 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual void ClearIntermediateTensor() {}

///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
virtual uint64_t TryShrinkMemory() { return 0; }

/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
/// and the parameters between them are shared.
Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/inference/api/paddle_inference_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,17 @@ class PD_INFER_DECL Predictor {
/// \brief Clear the intermediate tensors of the predictor
void ClearIntermediateTensor();

///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory();

private:
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ void BindAnalysisPredictor(py::module *m) {
.def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
.def("clear_intermediate_tensor",
&AnalysisPredictor::ClearIntermediateTensor)
.def("try_shrink_memory", &AnalysisPredictor::TryShrinkMemory)
.def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar)
.def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch)
.def("prepare_argument", &AnalysisPredictor::PrepareArgument)
Expand Down Expand Up @@ -593,6 +594,7 @@ void BindPaddleInferPredictor(py::module *m) {
.def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
.def("run", &paddle_infer::Predictor::Run)
.def("clone", &paddle_infer::Predictor::Clone)
.def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory)
.def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor);
}
Expand Down

0 comments on commit 1bf4836

Please sign in to comment.