diff --git a/paddle/fluid/operators/cinn/CMakeLists.txt b/paddle/fluid/operators/cinn/CMakeLists.txt
index b80916616a18b..a2fc080faadcf 100644
--- a/paddle/fluid/operators/cinn/CMakeLists.txt
+++ b/paddle/fluid/operators/cinn/CMakeLists.txt
@@ -10,6 +10,10 @@ if (WITH_TESTING)
   cc_test(cinn_launch_context_test SRCS cinn_launch_context_test.cc DEPS ddim lod_tensor scope cinn_launch_context)
   set_tests_properties(cinn_launch_context_test PROPERTIES LABELS "RUN_TYPE=CINN")
 
+  SET(CINN_RUN_ENVIRONMENT "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda")
   cc_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op elementwise_add_op)
-  set_tests_properties(cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda")
+  set_tests_properties(cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "${CINN_RUN_ENVIRONMENT}")
+
+  cc_test(cinn_instruction_run_op_test SRCS cinn_instruction_run_op_test.cc DEPS cinn_compiler cinn_launch_op cinn_instruction_run_op elementwise_add_op)
+  set_tests_properties(cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "${CINN_RUN_ENVIRONMENT}")
 endif()
diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
new file mode 100644
index 0000000000000..7c4bdc09a569e
--- /dev/null
+++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
@@ -0,0 +1,90 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stdlib.h>
+#include <string>
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/operators/cinn/test_helper.h"
+#include "paddle/fluid/platform/cpu_helper.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/init.h"
+
+USE_OP(cinn_launch);
+USE_OP(cinn_instruction_run);
+USE_OP_ITSELF(elementwise_add);
+
+namespace paddle::operators {
+
+using framework::paddle2cinn::CinnCompiler;
+
+TEST(CinnInstructionOpTest, TestWithElementwiseAdd) {
+  paddle::framework::InitDevices();
+  platform::SetNumThreads(1);
+  // cache test graph into CinnCompiler
+  const std::string& test_op_out_name = "cinn_instruction_run_op_out";
+  const std::string& add_op_out_name = "add_op_out";
+  auto compilation_key = CinnCompiler::GetInstance()->AddGraph(
+      CreateOnlyElementwiseAddGraph("x", "y", test_op_out_name));
+
+  // create a cinn_launch_op and run firstly to launch the compilation
+  // of the above graph and cache the compiled object in CinnCompiler
+  auto cinn_launch_op = paddle::framework::OpRegistry::CreateOp(
+      "cinn_launch", {{"X", {"x", "y"}}}, {{"Out", {test_op_out_name}}},
+      {{"compilation_key", compilation_key}});
+
+  // create cinn_instruction_run_op and elementwise_add op
+  auto cinn_instruction_run_op = paddle::framework::OpRegistry::CreateOp(
+      "cinn_instruction_run", {{"X", {"x", "y"}}},
+      {{"Out", {test_op_out_name}}},
+      {{"cached_index", 0}, {"instruction_index", 1}});
+  auto elementwise_add_op = paddle::framework::OpRegistry::CreateOp(
+      "elementwise_add", {{"X", {"x"}}, {"Y", {"y"}}},
+      {{"Out", {add_op_out_name}}}, {{}});
+
+  // check case: a compiled object not cached before cinn_launch_op run,
+  // so a cinn_instruction_run_op will throw an error
+  framework::Scope scope;
+  platform::CPUPlace place;
+  InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
+  scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+  ASSERT_THROW(cinn_instruction_run_op->Run(scope, place),
+               paddle::platform::EnforceNotMet);
+  cinn_launch_op->Run(scope, place);
+
+  // Run ops and check the computation results
+  auto run_and_check_fn = [&](const platform::Place& place) {
+    framework::Scope scope;
+    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
+    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(add_op_out_name)->GetMutable<LoDTensor>();
+    cinn_instruction_run_op->Run(scope, place);
+    elementwise_add_op->Run(scope, place);
+    CompareOpResult<float>(scope.GetVar(test_op_out_name),
+                           scope.GetVar(add_op_out_name));
+  };
+
+  // CPU
+  run_and_check_fn(platform::CPUPlace());
+  run_and_check_fn(platform::CPUPlace());
+#ifdef PADDLE_WITH_CUDA
+  // GPU
+  run_and_check_fn(platform::CUDAPlace());
+  run_and_check_fn(platform::CUDAPlace());
+#endif
+}
+
+}  // namespace paddle::operators
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc
index 282a8f69e4ec5..2ad958328ce46 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc
@@ -13,26 +13,47 @@
 // limitations under the License.
 
 #include "paddle/fluid/operators/cinn/cinn_launch_context.h"
+#include <algorithm>
 #include <functional>
+#include <utility>
 #include <vector>
+#include "cinn/hlir/framework/scope.h"
+#include "cinn/hlir/framework/tensor.h"
+#include "cinn/runtime/cinn_runtime.h"
+#include "paddle/fluid/framework/ddim.h"
+#include "paddle/fluid/string/printf.h"
 
 namespace paddle {
-namespace operators {
-namespace details {
+namespace operators::details {
+
+using LoDTensor = framework::LoDTensor;
 
 CinnLaunchContext::CinnLaunchContext(
     const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
     const std::shared_ptr<CinnScope>& cinn_scope)
-    : paddle2cinn_varmap_(paddle2cinn_varmap), cinn_scope_(cinn_scope) {
-  // generate all names of cinn used variables
+    : cinn_scope_(cinn_scope) {
+  // generate all names of the cinn execution arguments
   auto var_names = cinn_scope_->var_names();
-  cinn_variable_names_.reserve(var_names.size());
+  cinn_argument_names_.reserve(var_names.size());
   std::transform(
       var_names.begin(), var_names.end(),
-      std::inserter(cinn_variable_names_, cinn_variable_names_.end()),
+      std::inserter(cinn_argument_names_, cinn_argument_names_.end()),
       [](const auto& name_view) { return std::string(name_view.data()); });
-  // build the variable name map of cinn2paddle
-  for (const auto& x : paddle2cinn_varmap_) {
+  // build name map between the original variables and compiled ones
+  BuildVarNameMap(paddle2cinn_varmap, cinn_argument_names_);
+}
+
+void CinnLaunchContext::BuildVarNameMap(
+    const std::unordered_map<std::string, std::string>& compiled_varmap,
+    const std::unordered_set<std::string>& argument_names) {
+  for (const auto& x : compiled_varmap) {
+    if (!argument_names.count(x.second)) {
+      // exclude variables not used
+      continue;
+    }
+    // copy to local paddle2cinn map
+    paddle2cinn_varmap_.emplace(x.first, x.second);
+    // add an entry to local cinn2paddle map reversely
     auto res = cinn2paddle_varmap_.emplace(x.second, x.first);
     PADDLE_ENFORCE_EQ(
         res.second, true,
@@ -40,15 +61,21 @@ CinnLaunchContext::CinnLaunchContext(
             "Cinn variable(%s) maps to more than one paddle variable(%s,%s)",
             x.second, res.first->second, x.first));
   }
-  // supplement the relations of the remain variables not appearing in above
-  // map,
-  // they are internal variables and here we use the name from cinn compiled.
-  for (const auto& var_name : cinn_variable_names_) {
+  // supplement the relations of the remain variables
+  // not appearing in above map, which are internal variables
+  // and here we use the names from cinn compiled.
+  for (const auto& var_name : argument_names) {
     if (!cinn2paddle_varmap_.count(var_name)) {
       cinn2paddle_varmap_.emplace(var_name, var_name);
       paddle2cinn_varmap_.emplace(var_name, var_name);
     }
   }
+
+  PADDLE_ENFORCE_EQ(
+      paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size(),
+      platform::errors::PreconditionNotMet(
+          "Size of variables is not euqal, paddle[%ld] vs cinn[%ld]",
+          paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size()));
 }
 
 void CinnLaunchContext::UpdateCapturedEnv(const framework::Scope& scope,
@@ -74,56 +101,61 @@ bool CinnLaunchContext::IsArgumentsInitialized() const {
   return true;
 }
 
-bool CinnLaunchContext::IsVariableUsed(
-    const std::string& paddle_var_name) const {
-  return paddle2cinn_varmap_.count(paddle_var_name) > 0 &&
-         cinn_variable_names_.count(paddle2cinn_varmap_.at(paddle_var_name)) >
-             0;
+bool CinnLaunchContext::IsVariableUsed(const std::string& var_name) const {
+  return paddle2cinn_varmap_.count(var_name) > 0;
 }
 
-CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& var_name) {
-  PADDLE_ENFORCE_GT(cinn_variable_names_.count(var_name), 0,
-                    platform::errors::NotFound(
-                        "Variable(%s) not found in cinn scope.", var_name));
-  return cinn_scope_->GetTensor(var_name);
+CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& arg_name) {
+  PADDLE_ENFORCE_GT(cinn_argument_names_.count(arg_name), 0,
+                    platform::errors::InvalidArgument(
+                        "Variable(%s) not found in cinn scope.", arg_name));
+  return cinn_scope_->GetTensor(arg_name);
 }
 
-std::unordered_set<std::string> CinnLaunchContext::GetInternalVariableNames() {
-  std::unordered_set<std::string> all_parameters(cinn_variable_names_);
-  std::for_each(name2argument_.begin(), name2argument_.end(),
-                [&all_parameters](const auto& name2arg) {
-                  all_parameters.erase(name2arg.first);
-                });
-  return all_parameters;
+std::unordered_set<std::string> CinnLaunchContext::ExtractInternalVarNames(
+    const std::vector<std::string>& input_var_names,
+    const std::vector<std::string>& output_var_names) {
+  std::unordered_set<std::string> remain_var_names;
+  remain_var_names.reserve(paddle2cinn_varmap_.size());
+  std::transform(paddle2cinn_varmap_.begin(), paddle2cinn_varmap_.end(),
+                 std::inserter(remain_var_names, remain_var_names.end()),
+                 [](const auto& name_pair) { return name_pair.first; });
+
+  // exclude the input variables and output variables
+  auto exclude_names_fn = [&remain_var_names](const std::string& var_name) {
+    remain_var_names.erase(var_name);
+  };
+  std::for_each(input_var_names.begin(), input_var_names.end(),
+                exclude_names_fn);
+  std::for_each(output_var_names.begin(), output_var_names.end(),
+                exclude_names_fn);
+  return remain_var_names;
 }
 
-void CinnLaunchContext::CheckTensorEquivalent(
-    const std::string& paddle_var_name, const LoDTensor& paddle_tensor,
-    const CinnTensor& cinn_tensor) {
+void CinnLaunchContext::CheckTensorEquivalent(const std::string& var_name,
+                                              const LoDTensor& paddle_tensor,
+                                              const CinnTensor& cinn_tensor) {
   // check dimension
   auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data());
   PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims,
                     platform::errors::PreconditionNotMet(
                         "Tensors' shape in variable(%s) are not equivalent, "
                         "paddle's shape = [%s], but cinn's shape = [%s].",
-                        paddle_var_name, paddle_tensor.dims(), cinn_dims));
+                        var_name, paddle_tensor.dims(), cinn_dims));
 
   // TODO(CtfGo): check the underlying data type after CINN ready
 }
 
-void CinnLaunchContext::AssignExternalVariable(
-    const std::string& paddle_var_name) {
-  PADDLE_ENFORCE_EQ(
-      IsVariableUsed(paddle_var_name), true,
-      platform::errors::InvalidArgument("Paddle variable(%s) not used by cinn",
-                                        paddle_var_name));
-
-  const auto& cinn_var_name = paddle2cinn_varmap_.at(paddle_var_name);
-  const auto& paddle_tensor =
-      cached_scope_->GetVar(paddle_var_name)->Get<LoDTensor>();
-  CinnTensor cinn_tensor = GetCinnTensor(cinn_var_name);
+void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
+  PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
+                    platform::errors::InvalidArgument(
+                        "Variable(%s) not applied in cinn", var_name));
+  const auto& cinn_arg_name = paddle2cinn_varmap_.at(var_name);
+
+  const auto& paddle_tensor = cached_scope_->GetVar(var_name)->Get<LoDTensor>();
+  CinnTensor cinn_tensor = GetCinnTensor(cinn_arg_name);
   if (paddle_tensor.IsInitialized()) {
-    CheckTensorEquivalent(paddle_var_name, paddle_tensor, cinn_tensor);
+    CheckTensorEquivalent(var_name, paddle_tensor, cinn_tensor);
   }
 
   auto cinn_buffer = std::make_unique<cinn_buffer_t>();
@@ -131,9 +163,8 @@ void CinnLaunchContext::AssignExternalVariable(
   cinn_buffer->resize(cinn_tensor->shape().data().data(),
                       cinn_tensor->shape().data().size());
   cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
-      [this, paddle_var_name](void* ctx, cinn_buffer_t* buffer) {
-        auto* tensor =
-            cached_scope_->GetVar(paddle_var_name)->GetMutable<LoDTensor>();
+      [this, var_name](void* ctx, cinn_buffer_t* buffer) {
+        auto* tensor = cached_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
         tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
         buffer->memory = reinterpret_cast<uint8_t*>(
             tensor->mutable_data<float>(*cached_place_));
@@ -147,25 +178,25 @@ void CinnLaunchContext::AssignExternalVariable(
         return 0;
       });
 
-  return SetArgument(cinn_var_name, std::move(cinn_buffer));
+  return AppendArgument(cinn_arg_name, std::move(cinn_buffer));
 }
 
-void CinnLaunchContext::AssignInternalVariable(
-    const std::string& cinn_var_name) {
-  PADDLE_ENFORCE_GT(
-      cinn_variable_names_.count(cinn_var_name), 0,
-      platform::errors::InvalidArgument("Variable(%s) not found in cinn socpe.",
-                                        cinn_var_name));
-  CinnTensor cinn_tensor = GetCinnTensor(cinn_var_name);
+void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
+  PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
+                    platform::errors::InvalidArgument(
+                        "Variable(%s) not applied in cinn", var_name));
+  const auto& cinn_arg_name = paddle2cinn_varmap_.at(var_name);
+
+  CinnTensor cinn_tensor = GetCinnTensor(cinn_arg_name);
   auto cinn_buffer = std::make_unique<cinn_buffer_t>();
   // assign dimensions and alloc/free callback of cinn_buffer_t
   cinn_buffer->resize(cinn_tensor->shape().data().data(),
                       cinn_tensor->shape().data().size());
 
   cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
-      [this, cinn_var_name](void* ctx, cinn_buffer_t* buffer) {
+      [this, var_name](void* ctx, cinn_buffer_t* buffer) {
         auto* tensor =
-            cached_temp_scope_->Var(cinn_var_name)->GetMutable<LoDTensor>();
+            cached_temp_scope_->Var(var_name)->GetMutable<LoDTensor>();
         tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
         buffer->memory = reinterpret_cast<uint8_t*>(
             tensor->mutable_data<float>(*cached_place_));
@@ -175,53 +206,51 @@ void CinnLaunchContext::AssignInternalVariable(
   // internal variables should release its buffer immediately
   // if no instruction use it
   cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
-      [this, cinn_var_name](void* ctx, cinn_buffer_t* buffer) {
+      [this, var_name](void* ctx, cinn_buffer_t* buffer) {
         auto* tensor =
-            cached_temp_scope_->GetVar(cinn_var_name)->GetMutable<LoDTensor>();
+            cached_temp_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
         tensor->clear();
         return 0;
       });
-  return SetArgument(cinn_var_name, std::move(cinn_buffer));
+  return AppendArgument(cinn_arg_name, std::move(cinn_buffer));
 }
 
-void CinnLaunchContext::SetArgument(const std::string& cinn_var_name,
-                                    std::unique_ptr<cinn_buffer_t>&& buffer) {
-  VLOG(4) << "SetArgument-" << name2argument_.size() << ": name("
-          << cinn_var_name << "), dims("
-          << framework::DDim(buffer->dims, buffer->dimensions) << ").";
-
-  name2argument_.emplace(cinn_var_name, buffer.get());
+void CinnLaunchContext::AppendArgument(
+    const std::string& arg_name, std::unique_ptr<cinn_buffer_t>&& buffer) {
+  name2argument_.emplace(arg_name, buffer.get());
   hold_buffers_.emplace_back(std::move(buffer));
+  VLOG(4) << string::Sprintf(
+      "Append an argument:name(%s),dims(%s),argument size:(%lu)", arg_name,
+      framework::DDim(buffer->dims, buffer->dimensions).to_str(),
+      name2argument_.size());
 }
 
 const std::map<std::string, cinn_pod_value_t>&
 CinnLaunchContext::FinalizeArguments() const {
   // Check all execution parameters are assigned valued.
-  std::for_each(cinn_variable_names_.begin(), cinn_variable_names_.end(),
-                [this](const auto& var_name) {
-                  PADDLE_ENFORCE_GT(name2argument_.count(var_name), 0,
-                                    platform::errors::InvalidArgument(
-                                        "Variable(%s) is missed for launching "
-                                        "compiled program execution",
-                                        var_name));
+  std::for_each(cinn_argument_names_.begin(), cinn_argument_names_.end(),
+                [this](const auto& arg_name) {
+                  PADDLE_ENFORCE_GT(
+                      name2argument_.count(arg_name), 0,
+                      platform::errors::NotFound(
+                          "Argument(%s) is missed for execution", arg_name));
                 });
   return name2argument_;
 }
 
 cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar(
-    const std::string& paddle_var_name) {
-  auto res = paddle2cinn_varmap_.find(paddle_var_name);
+    const std::string& var_name) {
+  auto it = paddle2cinn_varmap_.find(var_name);
   PADDLE_ENFORCE_NE(
-      res, paddle2cinn_varmap_.end(),
+      it, paddle2cinn_varmap_.end(),
       platform::errors::InvalidArgument(
-          "Variable(%s) not found in compilation result", paddle_var_name));
-  auto it = name2argument_.find(res->second);
-  PADDLE_ENFORCE_NE(it, name2argument_.end(),
-                    platform::errors::InvalidArgument(
-                        "Argument(%s) not be initialized", res->second));
-  return static_cast<cinn_buffer_t*>(it->second);
+          "Variable(%s) not found in compilation result", var_name));
+  auto res = name2argument_.find(it->second);
+  PADDLE_ENFORCE_NE(res, name2argument_.end(),
+                    platform::errors::NotFound(
+                        "Argument(%s) not be initialized", it->second));
+  return static_cast<cinn_buffer_t*>(res->second);
 }
 
-}  // namespace details
-}  // namespace operators
+}  // namespace operators::details
 }  // namespace paddle
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h
index 71ddeb35420b5..52c90175a7b0d 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context.h
+++ b/paddle/fluid/operators/cinn/cinn_launch_context.h
@@ -19,21 +19,33 @@
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
-#include "cinn/hlir/framework/scope.h"
-#include "cinn/hlir/framework/tensor.h"
-#include "cinn/runtime/cinn_runtime.h"
-#include "paddle/fluid/framework/ddim.h"
+#include <vector>
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/platform/place.h"
+
+// type declaration forward
+struct cinn_buffer_t;
+struct cinn_pod_value_t;
+namespace cinn::hlir::framework {
+class Tensor;
+class Scope;
+class Program;
+}  // namespace cinn::hlir::framework
 
 namespace paddle {
-namespace operators {
-namespace details {
+namespace operators::details {
 
-using LoDTensor = framework::LoDTensor;
 using CinnTensor = ::cinn::hlir::framework::Tensor;
 using CinnScope = ::cinn::hlir::framework::Scope;
 
+// This class is used to cache some reusable data among repeated
+// executions for efficiency and it also provides easy interfaces
+// to get details of the compilation result.
+// A object of this class is constructed and saved in the
+// compilation cache once a graph compiled by CINN.
+// Generally speaking, here, a variable is refer to a Paddle
+// Variable while a CINN variable is called an Argument.
 class CinnLaunchContext {
  public:
   explicit CinnLaunchContext(
@@ -48,55 +60,63 @@ class CinnLaunchContext {
   // Return whether execution arguments has been initialized
   bool IsArgumentsInitialized() const;
 
-  // Return whether a Paddle variable used on compiled kernels
-  bool IsVariableUsed(const std::string& paddle_var_name) const;
+  // Return whether a Paddle variable used in cinn execution
+  bool IsVariableUsed(const std::string& var_name) const;
 
   // Assign tensor buffer to input or output variables
-  void AssignExternalVariable(const std::string& paddle_var_name);
+  void AssignExternalVariable(const std::string& var_name);
 
   // Assign tensor buffer to internal variables
-  void AssignInternalVariable(const std::string& cinn_var_name);
+  void AssignInternalVariable(const std::string& var_name);
 
-  // Extract internal variable names from CinnScope
-  // by excluding used input and output variables
-  std::unordered_set<std::string> GetInternalVariableNames();
+  // Extract internal variable names from all applied variables
+  // in execution by excluding the input and output variables
+  std::unordered_set<std::string> ExtractInternalVarNames(
+      const std::vector<std::string>& input_var_names,
+      const std::vector<std::string>& output_var_names);
 
-  // Finalize all execution arguments and return them
+  // Finalize all execution arguments and return the name->argument map
   const std::map<std::string, cinn_pod_value_t>& FinalizeArguments() const;
 
-  cinn_buffer_t* GetCinnBufferOfVar(const std::string& paddle_var_name);
+  // Return the cinn_buffer_t* of a specific variable
+  cinn_buffer_t* GetCinnBufferOfVar(const std::string& var_name);
 
  private:
-  // Get CinnTensor with CINN variable name
-  CinnTensor GetCinnTensor(const std::string& var_name);
-
-  // Check whether tensors from Paddle and CINN of the same variable
+  // Get CinnTensor with CINN argument name
+  CinnTensor GetCinnTensor(const std::string& arg_name);
+  // Build the name maps of paddle->cinn and cinn->paddle
+  // in reverse for all variables used in cinn execution
+  void BuildVarNameMap(
+      const std::unordered_map<std::string, std::string>& compiled_varmap,
+      const std::unordered_set<std::string>& argument_names);
+
+  // Check whether the tensor in Paddle and the compiled
+  // tensor returned by CINN of a same variable
   // are equivalent in type and dimension
   void CheckTensorEquivalent(const std::string& var_name,
-                             const LoDTensor& paddle_tensor,
+                             const framework::LoDTensor& paddle_tensor,
                              const CinnTensor& cinn_tensor);
 
-  // Set an argument with (cinn name)->(cinn_buffer_t) pair
-  void SetArgument(const std::string& cinn_var_name,
-                   std::unique_ptr<cinn_buffer_t>&& buffer);
+  // Append an argument with (cinn name)->(cinn_buffer_t) pair
+  void AppendArgument(const std::string& arg_name,
+                      std::unique_ptr<cinn_buffer_t>&& buffer);
 
  private:
   const framework::Scope* cached_scope_ = nullptr;
   const platform::Place* cached_place_ = nullptr;
   std::unique_ptr<framework::Scope> cached_temp_scope_ = nullptr;
 
-  // a variable name map from paddle to cinn
+  // a name map from paddle variables to cinn execution arguments
   std::unordered_map<std::string, std::string> paddle2cinn_varmap_;
-  // a variable name map from cinn to paddle
+  // a name map from cinn execution arguments to paddle variables
   std::unordered_map<std::string, std::string> cinn2paddle_varmap_;
-  // the variable scope of cinn
+  // the names of the cinn arguments used in compiled executable program
+  std::unordered_set<std::string> cinn_argument_names_;
+  // the variable scope compiled from cinn
   const std::shared_ptr<CinnScope> cinn_scope_;
 
-  // all names of cinn variables used by compiled executable program
-  std::unordered_set<std::string> cinn_variable_names_;
-
-  // because a cinn_pod_value_t does not own the cinn_buffer_t object,
-  // an extra stroage is necessary to keep the object and it can
+  // because a cinn_pod_value_t does not own a cinn_buffer_t object,
+  // an extra stroage is necessary to keep those objects and they can
   // not be released until the runtime program finish execution.
   std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers_;
 
@@ -105,6 +125,5 @@ class CinnLaunchContext {
   std::map<std::string, cinn_pod_value_t> name2argument_;
 };
 
-}  // namespace details
-}  // namespace operators
+}  // namespace operators::details
 }  // namespace paddle
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
index da7640c3c0f68..a97636a4e9f98 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
@@ -13,14 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/cinn/cinn_launch_context.h"
+#include "cinn/hlir/framework/scope.h"
+#include "cinn/hlir/framework/tensor.h"
+#include "cinn/runtime/cinn_runtime.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/framework/scope.h"
 
 namespace paddle {
-namespace operators {
-namespace details {
+namespace operators::details {
 
+using LoDTensor = framework::LoDTensor;
 using CinnShape = ::cinn::hlir::framework::Shape;
 
 std::unique_ptr<CinnLaunchContext> CreateDefaultLaunchContext() {
@@ -86,7 +89,7 @@ TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) {
                paddle::platform::EnforceNotMet);
 }
 
-TEST(CinnLaunchContextTest, TestSetArgument) {
+TEST(CinnLaunchContextTest, TestAppendArgument) {
   platform::CPUPlace cpu_place;
   platform::Place place(cpu_place);
   framework::Scope scope;
@@ -109,7 +112,8 @@ TEST(CinnLaunchContextTest, TestSetArgument) {
   ASSERT_THROW(launch_context->FinalizeArguments(),
                paddle::platform::EnforceNotMet);
   // test get internal variables
-  auto internal_variable_names = launch_context->GetInternalVariableNames();
+  auto internal_variable_names =
+      launch_context->ExtractInternalVarNames({"var1"}, {"var3"});
   ASSERT_EQ(internal_variable_names.size(), 1);
   EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2");
 
@@ -134,6 +138,5 @@ TEST(CinnLaunchContextTest, TestSetArgument) {
   EXPECT_FLOAT_EQ(shadow_data[10], 19.99f);
 }
 
-}  // namespace details
-}  // namespace operators
+}  // namespace operators::details
 }  // namespace paddle
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h
index bd9b30f559bdb..1db9f2f25e270 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op.h
+++ b/paddle/fluid/operators/cinn/cinn_launch_op.h
@@ -155,7 +155,8 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
       //     Here we directly use the names from CinnScope as Paddle variable
       //     names, because they will not be used outside the graph
       //     and should be destructed after computation finished.
-      auto internal_variable_names = launch_context->GetInternalVariableNames();
+      auto internal_variable_names = launch_context->ExtractInternalVarNames(
+          input_x_variable_names, output_variable_names);
       for (const auto& var_name : internal_variable_names) {
         launch_context->AssignInternalVariable(var_name);
       }
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
index b4cd91ea8a4bc..fb3b4d99a19de 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
@@ -18,154 +18,54 @@ limitations under the License. */
 #include <random>
 #include <string>
 #include "gtest/gtest.h"
-#include "paddle/fluid/framework/ddim.h"
-#include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/operators/cinn/test_helper.h"
 #include "paddle/fluid/platform/cpu_helper.h"
 #include "paddle/fluid/platform/init.h"
 
 USE_OP(cinn_launch);
 USE_OP_ITSELF(elementwise_add);
 
-namespace paddle {
-namespace operators {
+namespace paddle::operators {
 
-using framework::ir::Graph;
-using framework::ir::Node;
+using framework::paddle2cinn::CinnCompiler;
 
-std::unique_ptr<Graph> CreateOnlyElementwiseAddGraph(
-    const std::string& x_name, const std::string& y_name,
-    const std::string& out_name) {
-  auto g = std::make_unique<Graph>(framework::ProgramDesc());
-  framework::OpDesc feed_op_x, feed_op_y;
-  feed_op_x.SetType("feed");
-  feed_op_x.SetOutput("Out", {x_name});
-  feed_op_y.SetType("feed");
-  feed_op_y.SetOutput("Out", {y_name});
-
-  framework::VarDesc x_var(x_name);
-  framework::VarDesc y_var(y_name);
-  framework::VarDesc out_var(out_name);
-
-  framework::OpDesc elementwise_add_op;
-  elementwise_add_op.SetType("add");
-  elementwise_add_op.SetInput("X", {x_name});
-  elementwise_add_op.SetInput("Y", {y_name});
-  elementwise_add_op.SetOutput("Out", {out_name});
-
-  auto* feed_op_node_x = g->CreateOpNode(&feed_op_x);
-  auto* feed_op_node_y = g->CreateOpNode(&feed_op_y);
-  auto* elementwise_add_node = g->CreateOpNode(&elementwise_add_op);
-  auto* x_node = g->CreateVarNode(&x_var);
-  auto* y_node = g->CreateVarNode(&y_var);
-  auto* out_node = g->CreateVarNode(&out_var);
-
-  // fill op node
-  feed_op_node_x->outputs = {x_node};
-  feed_op_node_y->outputs = {y_node};
-  elementwise_add_node->inputs = {x_node, y_node};
-  elementwise_add_node->outputs = {out_node};
-
-  // fill variable node
-  x_node->inputs = {feed_op_node_x};
-  x_node->outputs = {elementwise_add_node};
-  y_node->inputs = {feed_op_node_y};
-  y_node->outputs = {elementwise_add_node};
-  out_node->inputs = {elementwise_add_node};
-  return g;
-}
-
-void CreateInputVariablesWithRandomData(
-    const std::vector<std::string>& variable_names,
-    const framework::DDim& common_ddim, framework::Scope* scope) {
-  std::random_device seed;
-  std::default_random_engine engine(seed());
-  std::uniform_real_distribution<float> dist(0.f, 2.f);
-
-  for (const auto& var_name : variable_names) {
-    auto* tensor = scope->Var(var_name)->GetMutable<LoDTensor>();
-    auto* data = tensor->mutable_data<float>(common_ddim, platform::CPUPlace());
-    for (auto i = 0; i < tensor->numel(); ++i) {
-      data[i] = dist(engine);
-    }
-  }
-}
-
-void CopyInputDataToPlace(const framework::Scope& scope,
-                          const platform::Place& dst_place,
-                          framework::Scope* dst_scope) {
-  for (const auto& var_name : scope.LocalVarNames()) {
-    const auto& src_tensor = scope.GetVar(var_name)->Get<LoDTensor>();
-    auto* dst_tensor = dst_scope->Var(var_name)->GetMutable<LoDTensor>();
-    paddle::framework::TensorCopySync(src_tensor, dst_place, dst_tensor);
-  }
-}
-
-TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
+TEST(CinnLaunchOpTest, TestWithElementwiseAdd) {
   paddle::framework::InitDevices();
   platform::SetNumThreads(1);
   // cache test graph into CinnCompiler
-  const auto& test_out_name = "test_out";
-  const auto& expected_out_name = "expected_out";
+  const std::string& test_op_out_name = "cinn_launch_op_out";
+  const std::string& add_op_out_name = "add_op_out";
   auto compilation_key = CinnCompiler::GetInstance()->AddGraph(
-      CreateOnlyElementwiseAddGraph("test_x", "test_y", test_out_name));
+      CreateOnlyElementwiseAddGraph("x", "y", test_op_out_name));
+
   // create cinn_launch_op and elementwise_add op
   auto cinn_launch_op = paddle::framework::OpRegistry::CreateOp(
-      "cinn_launch", {{"X", {"test_x", "test_y"}}}, {{"Out", {test_out_name}}},
+      "cinn_launch", {{"X", {"x", "y"}}}, {{"Out", {test_op_out_name}}},
       {{"compilation_key", compilation_key}});
   auto elementwise_add_op = paddle::framework::OpRegistry::CreateOp(
-      "elementwise_add", {{"X", {"test_x"}}, {"Y", {"test_y"}}},
-      {{"Out", {expected_out_name}}}, {{}});
-  // prepare input data
-  framework::Scope init_scope;
-  CreateInputVariablesWithRandomData({"test_x", "test_y"}, {10, 20},
-                                     &init_scope);
+      "elementwise_add", {{"X", {"x"}}, {"Y", {"y"}}},
+      {{"Out", {add_op_out_name}}}, {{}});
+
   // Run ops and check the computation results
   auto run_and_check_fn = [&](const platform::Place& place) {
     framework::Scope scope;
-    CopyInputDataToPlace(init_scope, place, &scope);
-    scope.Var(test_out_name)->GetMutable<LoDTensor>();
-    scope.Var(expected_out_name)->GetMutable<LoDTensor>();
-
-    platform::Place run_place(place);
-    cinn_launch_op->Run(scope, run_place);
-    elementwise_add_op->Run(scope, run_place);
-
-    LoDTensor test_out, expected_out;
-    paddle::framework::TensorCopySync(
-        scope.Var(test_out_name)->Get<LoDTensor>(), platform::CPUPlace(),
-        &test_out);
-    paddle::framework::TensorCopySync(
-        scope.Var(expected_out_name)->Get<LoDTensor>(), platform::CPUPlace(),
-        &expected_out);
-
-    ASSERT_TRUE(test_out.IsInitialized());
-    ASSERT_TRUE(expected_out.IsInitialized());
-    ASSERT_EQ(test_out.dims(), expected_out.dims());
-    const auto* test_data = test_out.data<float>();
-    const auto* excepted_data = expected_out.data<float>();
-    for (auto i = 0; i < expected_out.numel(); ++i) {
-      EXPECT_FLOAT_EQ(test_data[i], excepted_data[i]);
-    }
+    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
+    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(add_op_out_name)->GetMutable<LoDTensor>();
+    cinn_launch_op->Run(scope, place);
+    elementwise_add_op->Run(scope, place);
+    CompareOpResult<float>(scope.GetVar(test_op_out_name),
+                           scope.GetVar(add_op_out_name));
   };
 
-  LOG(INFO) << "Check compute result on cpu";
+  // CPU
   run_and_check_fn(platform::CPUPlace());
   run_and_check_fn(platform::CPUPlace());
-
 #ifdef PADDLE_WITH_CUDA
-  // create an new elementwise_add op
-  // because the above one cached the cpu kernel
-  LOG(INFO) << "Check compute result on gpu";
-  cinn_launch_op = paddle::framework::OpRegistry::CreateOp(
-      "cinn_launch", {{"X", {"test_x", "test_y"}}}, {{"Out", {test_out_name}}},
-      {{"compilation_key", compilation_key}});
-  elementwise_add_op = paddle::framework::OpRegistry::CreateOp(
-      "elementwise_add", {{"X", {"test_x"}}, {"Y", {"test_y"}}},
-      {{"Out", {expected_out_name}}}, {{}});
+  // GPU
   run_and_check_fn(platform::CUDAPlace());
   run_and_check_fn(platform::CUDAPlace());
 #endif
@@ -175,8 +75,6 @@ namespace details {
 // Testing helper function used on CinnLaunchOpKernel in the following:
 // firstly build test data, then check both expected and illegal situations
 
-using CinnShape = ::cinn::hlir::framework::Shape;
-
 TEST(CinnLaunchOpHelperTest, TestPlaceToCinnTarget) {
   ASSERT_EQ(PlaceToCinnTarget(platform::CPUPlace()),
             ::cinn::common::DefaultHostTarget());
@@ -187,5 +85,4 @@ TEST(CinnLaunchOpHelperTest, TestPlaceToCinnTarget) {
 }
 
 }  // namespace details
-}  // namespace operators
-}  // namespace paddle
+}  // namespace paddle::operators
diff --git a/paddle/fluid/operators/cinn/test_helper.h b/paddle/fluid/operators/cinn/test_helper.h
new file mode 100644
index 0000000000000..7b8abcc33d59d
--- /dev/null
+++ b/paddle/fluid/operators/cinn/test_helper.h
@@ -0,0 +1,116 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/ddim.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/scope.h"
+
+namespace paddle::operators {
+
+using LoDTensor = framework::LoDTensor;
+using Variable = framework::Variable;
+using Graph = framework::ir::Graph;
+using Node = framework::ir::Node;
+
+std::unique_ptr<Graph> CreateOnlyElementwiseAddGraph(
+    const std::string& x_name, const std::string& y_name,
+    const std::string& out_name) {
+  auto g = std::make_unique<Graph>(framework::ProgramDesc());
+  framework::OpDesc feed_op_x, feed_op_y;
+  feed_op_x.SetType("feed");
+  feed_op_x.SetOutput("Out", {x_name});
+  feed_op_y.SetType("feed");
+  feed_op_y.SetOutput("Out", {y_name});
+
+  framework::VarDesc x_var(x_name);
+  framework::VarDesc y_var(y_name);
+  framework::VarDesc out_var(out_name);
+
+  framework::OpDesc elementwise_add_op;
+  elementwise_add_op.SetType("add");
+  elementwise_add_op.SetInput("X", {x_name});
+  elementwise_add_op.SetInput("Y", {y_name});
+  elementwise_add_op.SetOutput("Out", {out_name});
+
+  auto* feed_op_node_x = g->CreateOpNode(&feed_op_x);
+  auto* feed_op_node_y = g->CreateOpNode(&feed_op_y);
+  auto* elementwise_add_node = g->CreateOpNode(&elementwise_add_op);
+  auto* x_node = g->CreateVarNode(&x_var);
+  auto* y_node = g->CreateVarNode(&y_var);
+  auto* out_node = g->CreateVarNode(&out_var);
+
+  // fill op node
+  feed_op_node_x->outputs = {x_node};
+  feed_op_node_y->outputs = {y_node};
+  elementwise_add_node->inputs = {x_node, y_node};
+  elementwise_add_node->outputs = {out_node};
+
+  // fill variable node
+  x_node->inputs = {feed_op_node_x};
+  x_node->outputs = {elementwise_add_node};
+  y_node->inputs = {feed_op_node_y};
+  y_node->outputs = {elementwise_add_node};
+  out_node->inputs = {elementwise_add_node};
+  return g;
+}
+
+template <typename DataType>
+void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
+                                  const framework::DDim& common_ddim,
+                                  const platform::Place& place,
+                                  framework::Scope* scope) {
+  std::random_device seed;
+  std::default_random_engine engine(seed());
+  std::uniform_real_distribution<float> dist(0, 100);
+
+  LoDTensor tmp_tensor;
+  auto* tmp_data =
+      tmp_tensor.mutable_data<DataType>(common_ddim, platform::CPUPlace());
+  for (const auto& var_name : var_names) {
+    auto* tensor = scope->Var(var_name)->GetMutable<LoDTensor>();
+    for (auto i = 0; i < tensor->numel(); ++i) {
+      tmp_data[i] = static_cast<DataType>(dist(engine));
+    }
+    paddle::framework::TensorCopySync(tmp_tensor, place, tensor);
+  }
+}
+
+template <typename DataType>
+void CompareOpResult(Variable* test_out, Variable* expected_out) {
+  LoDTensor test_tensor, expected_tensor;
+  paddle::framework::TensorCopySync(test_out->Get<LoDTensor>(),
+                                    platform::CPUPlace(), &test_tensor);
+  paddle::framework::TensorCopySync(expected_out->Get<LoDTensor>(),
+                                    platform::CPUPlace(), &expected_tensor);
+
+  ASSERT_TRUE(test_tensor.IsInitialized());
+  ASSERT_TRUE(expected_tensor.IsInitialized());
+  ASSERT_EQ(test_tensor.dims(), expected_tensor.dims());
+  const auto* test_data = test_tensor.data<DataType>();
+  const auto* excepted_data = expected_tensor.data<DataType>();
+  for (auto i = 0; i < expected_tensor.numel(); ++i) {
+    EXPECT_EQ(test_data[i], excepted_data[i]);
+  }
+}
+
+}  // namespace paddle::operators