Skip to content

Commit

Permalink
Enabled performance benchmark tests for Eager Dygraph
Browse files Browse the repository at this point in the history
  • Loading branch information
jim19930609 committed Nov 29, 2021
1 parent b630774 commit 18ec0fa
Show file tree
Hide file tree
Showing 10 changed files with 50 additions and 43 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/eager/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ set(eager_deps pten pten_api hook_utils tensor_utils utils global_utils backward
set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy)
set(generated_deps dygraph_function dygraph_node)

if(NOT DEFINED ON_INFER)
if(NOT ON_INFER)
message("Performing Eager Dygraph Auto Code Generation")
add_subdirectory(auto_code_generator)
endif()
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/api/generated/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_subdirectory(eager_generated)

if(NOT DEFINED ON_INFER)
if(NOT ON_INFER)
add_subdirectory(fluid_generated)
endif()
4 changes: 4 additions & 0 deletions paddle/fluid/eager/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
add_subdirectory(data_structure_tests)
add_subdirectory(task_tests)

if(NOT ON_INFER)
add_subdirectory(performance_tests)
endif()
7 changes: 7 additions & 0 deletions paddle/fluid/eager/tests/performance_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op)

cc_test(test_egr_performance_benchmark_eager_cpu SRCS benchmark_eager_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps})
cc_test(test_egr_performance_benchmark_fluid_cpu SRCS benchmark_fluid_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps})

cc_test(test_egr_performance_benchmark_eager_cuda SRCS benchmark_eager_cuda.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps})
cc_test(test_egr_performance_benchmark_fluid_cuda SRCS benchmark_fluid_cuda.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps})
20 changes: 10 additions & 10 deletions paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

#include "paddle/fluid/imperative/tracer.h"

#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include "paddle/fluid/eager/tests/test_utils.h"

#ifdef WITH_GPERFTOOLS
Expand All @@ -42,11 +42,11 @@ TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; }

TEST(Benchmark, EagerScaleCPU) {
// Prepare Device Contexts
egr::InitEnv(paddle::platform::CPUPlace());
eager_test::InitEnv(paddle::platform::CPUPlace());

for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4});
egr::EagerTensor tensor = EagerUtils::CreateTensorWithValue(
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0, true);
RetainGradForTensor(tensor);
Expand Down Expand Up @@ -78,20 +78,20 @@ TEST(Benchmark, EagerScaleCPU) {

TEST(Benchmark, EagerIntermediateMatmulCPU) {
// Prepare Device Contexts
InitEnv(paddle::platform::CPUPlace());
eager_test::InitEnv(paddle::platform::CPUPlace());

auto tracer = std::make_shared<paddle::imperative::Tracer>();
paddle::imperative::SetCurrentTracer(tracer);

for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddimX = paddle::framework::make_ddim({2, 2});
egr::EagerTensor X = EagerUtils::CreateTensorWithValue(
egr::EagerTensor X = CreateTensorWithValue(
ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 1.0, true);
RetainGradForTensor(X);

paddle::framework::DDim ddimY = paddle::framework::make_ddim({2, 2});
egr::EagerTensor Y = EagerUtils::CreateTensorWithValue(
egr::EagerTensor Y = CreateTensorWithValue(
ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 2.0, true);
RetainGradForTensor(Y);
Expand Down Expand Up @@ -122,15 +122,15 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) {

TEST(Benchmark, EagerIntermediateMLPCPU) {
// Prepare Device Contexts
InitEnv(paddle::platform::CPUPlace());
eager_test::InitEnv(paddle::platform::CPUPlace());

auto tracer = std::make_shared<paddle::imperative::Tracer>();
paddle::imperative::SetCurrentTracer(tracer);

for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddimX =
paddle::framework::make_ddim({MLP_M, MLP_N});
egr::EagerTensor X = EagerUtils::CreateTensorWithValue(
egr::EagerTensor X = CreateTensorWithValue(
ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, MLP_X_VAL, true);
RetainGradForTensor(X);
Expand All @@ -140,13 +140,13 @@ TEST(Benchmark, EagerIntermediateMLPCPU) {
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
paddle::framework::DDim ddimW =
paddle::framework::make_ddim({MLP_N, MLP_K});
egr::EagerTensor W = EagerUtils::CreateTensorWithValue(
egr::EagerTensor W = CreateTensorWithValue(
ddimW, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, MLP_W_VAL, true);
RetainGradForTensor(W);

paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K});
egr::EagerTensor B = EagerUtils::CreateTensorWithValue(
egr::EagerTensor B = CreateTensorWithValue(
ddimB, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, MLP_B_VAL, true);
RetainGradForTensor(B);
Expand Down
20 changes: 10 additions & 10 deletions paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

#include "paddle/fluid/imperative/tracer.h"

#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include "paddle/fluid/eager/tests/test_utils.h"

#ifdef WITH_GPERFTOOLS
Expand All @@ -39,11 +39,11 @@ DECLARE_bool(run_pten_kernel);
TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; }

TEST(Benchmark, EagerScaleCUDA) {
egr::InitEnv(paddle::platform::CUDAPlace());
eager_test::InitEnv(paddle::platform::CUDAPlace());

for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4});
egr::EagerTensor tensor = EagerUtils::CreateTensorWithValue(
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
RetainGradForTensor(tensor);
Expand Down Expand Up @@ -77,21 +77,21 @@ TEST(Benchmark, EagerScaleCUDA) {

TEST(Benchmark, EagerIntermediateMatmulCUDA) {
paddle::platform::CUDAPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

auto tracer = std::make_shared<paddle::imperative::Tracer>();
tracer->SetExpectedPlace(place);
paddle::imperative::SetCurrentTracer(tracer);

for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = paddle::framework::make_ddim({2, 2});
egr::EagerTensor X = EagerUtils::CreateTensorWithValue(
egr::EagerTensor X = CreateTensorWithValue(
ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 1.0, true);
RetainGradForTensor(X);

paddle::framework::DDim ddimY = paddle::framework::make_ddim({2, 2});
egr::EagerTensor Y = EagerUtils::CreateTensorWithValue(
egr::EagerTensor Y = CreateTensorWithValue(
ddimY, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 2.0, true);
RetainGradForTensor(Y);
Expand Down Expand Up @@ -125,7 +125,7 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) {

TEST(Benchmark, EagerIntermediateMLPCUDA) {
paddle::platform::CUDAPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

auto tracer = std::make_shared<paddle::imperative::Tracer>();
tracer->SetExpectedPlace(place);
Expand All @@ -134,7 +134,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX =
paddle::framework::make_ddim({MLP_M, MLP_N});
egr::EagerTensor X = EagerUtils::CreateTensorWithValue(
egr::EagerTensor X = CreateTensorWithValue(
ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, MLP_X_VAL, true);
RetainGradForTensor(X);
Expand All @@ -144,13 +144,13 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
paddle::framework::DDim ddimW =
paddle::framework::make_ddim({MLP_N, MLP_K});
egr::EagerTensor W = EagerUtils::CreateTensorWithValue(
egr::EagerTensor W = CreateTensorWithValue(
ddimW, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, MLP_W_VAL, true);
RetainGradForTensor(W);

paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K});
egr::EagerTensor B = EagerUtils::CreateTensorWithValue(
egr::EagerTensor B = CreateTensorWithValue(
ddimB, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, MLP_B_VAL, true);
RetainGradForTensor(B);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "glog/logging.h"
#include "gtest/gtest.h"

#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/tracer.h"
Expand All @@ -45,7 +45,7 @@ namespace imperative {
TEST(Benchmark, FluidScaleCPU) {
// Prepare Device Contexts
platform::CPUPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

for (const std::string& mode : {"Accuracy", "Performance"}) {
std::shared_ptr<imperative::VarBase> X(new imperative::VarBase(true, "X"));
Expand Down Expand Up @@ -88,7 +88,7 @@ TEST(Benchmark, FluidScaleCPU) {
TEST(Benchmark, FluidMatmulCPU) {
// Prepare Device Contexts
platform::CPUPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

for (const std::string& mode : {"Accuracy", "Performance"}) {
std::shared_ptr<imperative::VarBase> X(new imperative::VarBase(true, "X"));
Expand Down Expand Up @@ -141,7 +141,7 @@ TEST(Benchmark, FluidMatmulCPU) {
TEST(Benchmark, FluidMLPCPU) {
// Prepare Device Contexts
platform::CPUPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

for (const std::string& mode : {"Accuracy", "Performance"}) {
std::vector<float> x_src_data(MLP_M * MLP_N, MLP_X_VAL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "glog/logging.h"
#include "gtest/gtest.h"

#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/tracer.h"
Expand All @@ -45,7 +45,7 @@ namespace imperative {
TEST(Benchmark, FluidScaleCUDA) {
// Prepare Device Contexts
platform::CUDAPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
std::shared_ptr<imperative::VarBase> X(new imperative::VarBase(true, "X"));
Expand Down Expand Up @@ -98,7 +98,7 @@ TEST(Benchmark, FluidScaleCUDA) {
TEST(Benchmark, FluidMatmulCUDA) {
// Prepare Device Contexts
platform::CUDAPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
std::shared_ptr<imperative::VarBase> X(new imperative::VarBase(true, "X"));
Expand Down Expand Up @@ -161,7 +161,7 @@ TEST(Benchmark, FluidMatmulCUDA) {
TEST(Benchmark, FluidMLPCUDA) {
// Prepare Device Contexts
platform::CUDAPlace place;
egr::InitEnv(place);
eager_test::InitEnv(place);

for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::platform::DeviceContextPool& pool =
Expand Down
20 changes: 8 additions & 12 deletions paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h"

#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"

#include "paddle/pten/core/kernel_registry.h"

static size_t max_num_benchmark_runs = 5000;

namespace egr {
Expand All @@ -64,9 +60,9 @@ void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) {

if (accuracy_check) {
// Examine Forward Grad (w.r.t max_num_runs = 10)
CompareTensorWithValue<float>(input_tensor, 8189.0);
eager_test::CompareTensorWithValue<float>(input_tensor, 8189.0);
// Examine Backward Grad (w.r.t max_num_runs = 10)
CompareGradTensorWithValue<float>(tensor, 1024.0);
eager_test::CompareGradTensorWithValue<float>(tensor, 1024.0);
}
}

Expand All @@ -89,10 +85,10 @@ void benchmark_eager_intermediate_matmul(const EagerTensor& X,

if (accuracy_check) {
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue<float>(input_tensor0, 16);
eager_test::CompareVariableWithValue<float>(input_tensor0, 16);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue<float>(X, 16);
CompareGradVariableWithValue<float>(Y, 16);
eager_test::CompareGradVariableWithValue<float>(X, 16);
eager_test::CompareGradVariableWithValue<float>(Y, 16);
}
}

Expand Down Expand Up @@ -122,11 +118,11 @@ void benchmark_eager_intermediate_mlp(const EagerTensor& X,
compute_mlp_expected_results();

// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue<float>(Out, result["Out"]);
eager_test::CompareVariableWithValue<float>(Out, result["Out"]);

// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue<float>(X, result["GradX"]);
CompareGradVariableWithValue<float>(Ws[0], result["GradW"]);
eager_test::CompareGradVariableWithValue<float>(X, result["GradX"]);
eager_test::CompareGradVariableWithValue<float>(Ws[0], result["GradW"]);
}
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/tests/task_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} ea
cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)

if(NOT DEFINED ON_INFER)
if(NOT ON_INFER)
cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps})
endif()

0 comments on commit 18ec0fa

Please sign in to comment.