From 0be006edc7c3ddae49d8aed19c33f77488ae2575 Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Sun, 12 Jul 2020 03:10:25 +0200
Subject: [PATCH] async_hooks: improve resource stack performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes some of the performance overhead that came with
`executionAsyncResource()` by using the JS resource array
only as a cache for the values provided by C++. The fact that we now
use an entry trampoline is used to pass the resource without
requiring extra C++/JS boundary crossings, and the direct accesses
to the JS resource array from C++ are removed in all fast paths.

This particularly improves performance when async hooks are not
being used.

This is a continuation of https://github.com/nodejs/node/pull/33575
and shares some of its code with it.

    ./node benchmark/compare.js --new ./node --old ./node-master --runs 30 --filter messageport worker | Rscript benchmark/compare.R
    [00:06:14|% 100| 1/1 files | 60/60 runs | 2/2 configs]: Done
                                                       confidence improvement accuracy (*)    (**)   (***)
     worker/messageport.js n=1000000 payload='object'         **     12.64 %       ±7.30%  ±9.72% ±12.65%
     worker/messageport.js n=1000000 payload='string'          *     11.08 %       ±9.00% ±11.98% ±15.59%

    ./node benchmark/compare.js --new ./node --old ./node-master --runs 20 --filter async-resource-vs-destroy async_hooks | Rscript benchmark/compare.R
    [00:22:35|% 100| 1/1 files | 40/40 runs | 6/6 configs]: Done
                                                                                                                                                                         confidence improvement accuracy (*)
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='async' type='async-local-storage' benchmarker='autocannon'                     1.60 %       ±7.35%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='async' type='async-resource' benchmarker='autocannon'                          6.05 %       ±6.57%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='async' type='destroy' benchmarker='autocannon'                          *      8.27 %       ±7.50%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='callbacks' type='async-local-storage' benchmarker='autocannon'                 7.42 %       ±8.22%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='callbacks' type='async-resource' benchmarker='autocannon'                      4.33 %       ±7.84%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='callbacks' type='destroy' benchmarker='autocannon'                             5.96 %       ±7.15%
                                                                                                                                                                           (**)   (***)
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='async' type='async-local-storage' benchmarker='autocannon'      ±9.84% ±12.94%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='async' type='async-resource' benchmarker='autocannon'           ±8.81% ±11.60%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='async' type='destroy' benchmarker='autocannon'                 ±10.07% ±13.28%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='callbacks' type='async-local-storage' benchmarker='autocannon' ±11.01% ±14.48%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='callbacks' type='async-resource' benchmarker='autocannon'      ±10.50% ±13.81%
     async_hooks/async-resource-vs-destroy.js n=1000000 duration=5 connections=500 path='/' asyncMethod='callbacks' type='destroy' benchmarker='autocannon'              ±9.58% ±12.62%

Refs: https://github.com/nodejs/node/pull/33575

PR-URL: https://github.com/nodejs/node/pull/34319
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Gerhard Stöbich <deb2001-github@yahoo.de>
Reviewed-By: Stephen Belanger <admin@stephenbelanger.com>
---
 lib/internal/async_hooks.js       | 33 +++++++-------
 lib/internal/process/execution.js |  6 +--
 src/api/callback.cc               | 21 +++++----
 src/async_wrap.cc                 | 23 +++++++++-
 src/async_wrap.h                  |  4 ++
 src/env-inl.h                     | 72 +++++++++++++++++++++++++------
 src/env.h                         | 14 ++++--
 7 files changed, 129 insertions(+), 44 deletions(-)

diff --git a/lib/internal/async_hooks.js b/lib/internal/async_hooks.js
index d98014c368202c..84e5025925c36a 100644
--- a/lib/internal/async_hooks.js
+++ b/lib/internal/async_hooks.js
@@ -48,7 +48,9 @@ const {
 // each hook's after() callback.
 const {
   pushAsyncContext: pushAsyncContext_,
-  popAsyncContext: popAsyncContext_
+  popAsyncContext: popAsyncContext_,
+  executionAsyncResource: executionAsyncResource_,
+  clearAsyncIdStack,
 } = async_wrap;
 // For performance reasons, only track Promises when a hook is enabled.
 const { enablePromiseHook, disablePromiseHook } = async_wrap;
@@ -87,7 +89,8 @@ const { resource_symbol, owner_symbol } = internalBinding('symbols');
 // for a given step, that step can bail out early.
 const { kInit, kBefore, kAfter, kDestroy, kTotals, kPromiseResolve,
         kCheck, kExecutionAsyncId, kAsyncIdCounter, kTriggerAsyncId,
-        kDefaultTriggerAsyncId, kStackLength } = async_wrap.constants;
+        kDefaultTriggerAsyncId, kStackLength, kUsesExecutionAsyncResource
+} = async_wrap.constants;
 
 // Used in AsyncHook and AsyncResource.
 const async_id_symbol = Symbol('asyncId');
@@ -108,7 +111,10 @@ function useDomainTrampoline(fn) {
   domain_cb = fn;
 }
 
-function callbackTrampoline(asyncId, cb, ...args) {
+function callbackTrampoline(asyncId, resource, cb, ...args) {
+  const index = async_hook_fields[kStackLength] - 1;
+  execution_async_resources[index] = resource;
+
   if (asyncId !== 0 && hasHooks(kBefore))
     emitBeforeNative(asyncId);
 
@@ -123,6 +129,7 @@ function callbackTrampoline(asyncId, cb, ...args) {
   if (asyncId !== 0 && hasHooks(kAfter))
     emitAfterNative(asyncId);
 
+  execution_async_resources.pop();
   return result;
 }
 
@@ -131,9 +138,15 @@ setCallbackTrampoline(callbackTrampoline);
 const topLevelResource = {};
 
 function executionAsyncResource() {
+  // Indicate to the native layer that this function is likely to be used,
+  // in which case it will inform JS about the current async resource via
+  // the trampoline above.
+  async_hook_fields[kUsesExecutionAsyncResource] = 1;
+
   const index = async_hook_fields[kStackLength] - 1;
   if (index === -1) return topLevelResource;
-  const resource = execution_async_resources[index];
+  const resource = execution_async_resources[index] ||
+    executionAsyncResource_(index);
   return lookupPublicResource(resource);
 }
 
@@ -413,16 +426,6 @@ function emitDestroyScript(asyncId) {
 }
 
 
-// Keep in sync with Environment::AsyncHooks::clear_async_id_stack
-// in src/env-inl.h.
-function clearAsyncIdStack() {
-  async_id_fields[kExecutionAsyncId] = 0;
-  async_id_fields[kTriggerAsyncId] = 0;
-  async_hook_fields[kStackLength] = 0;
-  execution_async_resources.splice(0, execution_async_resources.length);
-}
-
-
 function hasAsyncIdStack() {
   return hasHooks(kStackLength);
 }
@@ -432,7 +435,7 @@ function hasAsyncIdStack() {
 function pushAsyncContext(asyncId, triggerAsyncId, resource) {
   const offset = async_hook_fields[kStackLength];
   if (offset * 2 >= async_wrap.async_ids_stack.length)
-    return pushAsyncContext_(asyncId, triggerAsyncId, resource);
+    return pushAsyncContext_(asyncId, triggerAsyncId);
   async_wrap.async_ids_stack[offset * 2] = async_id_fields[kExecutionAsyncId];
   async_wrap.async_ids_stack[offset * 2 + 1] = async_id_fields[kTriggerAsyncId];
   execution_async_resources[offset] = resource;
diff --git a/lib/internal/process/execution.js b/lib/internal/process/execution.js
index 4406ea09bba30c..6f2f39500d9821 100644
--- a/lib/internal/process/execution.js
+++ b/lib/internal/process/execution.js
@@ -186,10 +186,10 @@ function createOnGlobalUncaughtException() {
       do {
         emitAfter(executionAsyncId());
       } while (hasAsyncIdStack());
-    // Or completely empty the id stack.
-    } else {
-      clearAsyncIdStack();
     }
+    // And completely empty the id stack, including anything that may be
+    // cached on the native side.
+    clearAsyncIdStack();
 
     return true;
   };
diff --git a/src/api/callback.cc b/src/api/callback.cc
index 9f52c25cf0d900..2bb34b088f74e1 100644
--- a/src/api/callback.cc
+++ b/src/api/callback.cc
@@ -160,12 +160,16 @@ MaybeLocal<Value> InternalMakeCallback(Environment* env,
 
   Local<Function> hook_cb = env->async_hooks_callback_trampoline();
   int flags = InternalCallbackScope::kNoFlags;
-  int hook_count = 0;
+  bool use_async_hooks_trampoline = false;
+  AsyncHooks* async_hooks = env->async_hooks();
   if (!hook_cb.IsEmpty()) {
+    // Use the callback trampoline if there are any before or after hooks, or
+    // we can expect some kind of usage of async_hooks.executionAsyncResource().
     flags = InternalCallbackScope::kSkipAsyncHooks;
-    AsyncHooks* async_hooks = env->async_hooks();
-    hook_count = async_hooks->fields()[AsyncHooks::kBefore] +
-                 async_hooks->fields()[AsyncHooks::kAfter];
+    use_async_hooks_trampoline =
+        async_hooks->fields()[AsyncHooks::kBefore] +
+        async_hooks->fields()[AsyncHooks::kAfter] +
+        async_hooks->fields()[AsyncHooks::kUsesExecutionAsyncResource] > 0;
   }
 
   InternalCallbackScope scope(env, resource, asyncContext, flags);
@@ -175,12 +179,13 @@ MaybeLocal<Value> InternalMakeCallback(Environment* env,
 
   MaybeLocal<Value> ret;
 
-  if (hook_count != 0) {
-    MaybeStackBuffer<Local<Value>, 16> args(2 + argc);
+  if (use_async_hooks_trampoline) {
+    MaybeStackBuffer<Local<Value>, 16> args(3 + argc);
     args[0] = v8::Number::New(env->isolate(), asyncContext.async_id);
-    args[1] = callback;
+    args[1] = resource;
+    args[2] = callback;
     for (int i = 0; i < argc; i++) {
-      args[i + 2] = argv[i];
+      args[i + 3] = argv[i];
     }
     ret = hook_cb->Call(env->context(), recv, args.length(), &args[0]);
   } else {
diff --git a/src/async_wrap.cc b/src/async_wrap.cc
index 43565eea16ea57..8056e4ba8bbe81 100644
--- a/src/async_wrap.cc
+++ b/src/async_wrap.cc
@@ -419,7 +419,7 @@ void AsyncWrap::PushAsyncContext(const FunctionCallbackInfo<Value>& args) {
   // then the checks in push_async_ids() and pop_async_id() will.
   double async_id = args[0]->NumberValue(env->context()).FromJust();
   double trigger_async_id = args[1]->NumberValue(env->context()).FromJust();
-  env->async_hooks()->push_async_context(async_id, trigger_async_id, args[2]);
+  env->async_hooks()->push_async_context(async_id, trigger_async_id, {});
 }
 
 
@@ -430,6 +430,22 @@ void AsyncWrap::PopAsyncContext(const FunctionCallbackInfo<Value>& args) {
 }
 
 
+void AsyncWrap::ExecutionAsyncResource(
+    const FunctionCallbackInfo<Value>& args) {
+  Environment* env = Environment::GetCurrent(args);
+  uint32_t index;
+  if (!args[0]->Uint32Value(env->context()).To(&index)) return;
+  args.GetReturnValue().Set(
+      env->async_hooks()->native_execution_async_resource(index));
+}
+
+
+void AsyncWrap::ClearAsyncIdStack(const FunctionCallbackInfo<Value>& args) {
+  Environment* env = Environment::GetCurrent(args);
+  env->async_hooks()->clear_async_id_stack();
+}
+
+
 void AsyncWrap::AsyncReset(const FunctionCallbackInfo<Value>& args) {
   CHECK(args[0]->IsObject());
 
@@ -502,6 +518,8 @@ void AsyncWrap::Initialize(Local<Object> target,
   env->SetMethod(target, "setCallbackTrampoline", SetCallbackTrampoline);
   env->SetMethod(target, "pushAsyncContext", PushAsyncContext);
   env->SetMethod(target, "popAsyncContext", PopAsyncContext);
+  env->SetMethod(target, "executionAsyncResource", ExecutionAsyncResource);
+  env->SetMethod(target, "clearAsyncIdStack", ClearAsyncIdStack);
   env->SetMethod(target, "queueDestroyAsyncId", QueueDestroyAsyncId);
   env->SetMethod(target, "enablePromiseHook", EnablePromiseHook);
   env->SetMethod(target, "disablePromiseHook", DisablePromiseHook);
@@ -540,7 +558,7 @@ void AsyncWrap::Initialize(Local<Object> target,
 
   FORCE_SET_TARGET_FIELD(target,
                          "execution_async_resources",
-                         env->async_hooks()->execution_async_resources());
+                         env->async_hooks()->js_execution_async_resources());
 
   target->Set(context,
               env->async_ids_stack_string(),
@@ -562,6 +580,7 @@ void AsyncWrap::Initialize(Local<Object> target,
   SET_HOOKS_CONSTANT(kTriggerAsyncId);
   SET_HOOKS_CONSTANT(kAsyncIdCounter);
   SET_HOOKS_CONSTANT(kDefaultTriggerAsyncId);
+  SET_HOOKS_CONSTANT(kUsesExecutionAsyncResource);
   SET_HOOKS_CONSTANT(kStackLength);
 #undef SET_HOOKS_CONSTANT
   FORCE_SET_TARGET_FIELD(target, "constants", constants);
diff --git a/src/async_wrap.h b/src/async_wrap.h
index 1248323ac5c0af..577d11b6cc881f 100644
--- a/src/async_wrap.h
+++ b/src/async_wrap.h
@@ -136,6 +136,10 @@ class AsyncWrap : public BaseObject {
   static void GetAsyncId(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void PushAsyncContext(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void PopAsyncContext(const v8::FunctionCallbackInfo<v8::Value>& args);
+  static void ExecutionAsyncResource(
+      const v8::FunctionCallbackInfo<v8::Value>& args);
+  static void ClearAsyncIdStack(
+      const v8::FunctionCallbackInfo<v8::Value>& args);
   static void AsyncReset(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void GetProviderType(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void QueueDestroyAsyncId(
diff --git a/src/env-inl.h b/src/env-inl.h
index c6ef9dc13ab6f1..a5b487d02d277e 100644
--- a/src/env-inl.h
+++ b/src/env-inl.h
@@ -104,8 +104,17 @@ inline AliasedFloat64Array& AsyncHooks::async_ids_stack() {
   return async_ids_stack_;
 }
 
-inline v8::Local<v8::Array> AsyncHooks::execution_async_resources() {
-  return PersistentToLocal::Strong(execution_async_resources_);
+v8::Local<v8::Array> AsyncHooks::js_execution_async_resources() {
+  if (UNLIKELY(js_execution_async_resources_.IsEmpty())) {
+    js_execution_async_resources_.Reset(
+        env()->isolate(), v8::Array::New(env()->isolate()));
+  }
+  return PersistentToLocal::Strong(js_execution_async_resources_);
+}
+
+v8::Local<v8::Object> AsyncHooks::native_execution_async_resource(size_t i) {
+  if (i >= native_execution_async_resources_.size()) return {};
+  return PersistentToLocal::Strong(native_execution_async_resources_[i]);
 }
 
 inline v8::Local<v8::String> AsyncHooks::provider_string(int idx) {
@@ -123,9 +132,7 @@ inline Environment* AsyncHooks::env() {
 // Remember to keep this code aligned with pushAsyncContext() in JS.
 inline void AsyncHooks::push_async_context(double async_id,
                                            double trigger_async_id,
-                                           v8::Local<v8::Value> resource) {
-  v8::HandleScope handle_scope(env()->isolate());
-
+                                           v8::Local<v8::Object> resource) {
   // Since async_hooks is experimental, do only perform the check
   // when async_hooks is enabled.
   if (fields_[kCheck] > 0) {
@@ -142,8 +149,19 @@ inline void AsyncHooks::push_async_context(double async_id,
   async_id_fields_[kExecutionAsyncId] = async_id;
   async_id_fields_[kTriggerAsyncId] = trigger_async_id;
 
-  auto resources = execution_async_resources();
-  USE(resources->Set(env()->context(), offset, resource));
+#ifdef DEBUG
+  for (uint32_t i = offset; i < native_execution_async_resources_.size(); i++)
+    CHECK(native_execution_async_resources_[i].IsEmpty());
+#endif
+
+  // When this call comes from JS (as a way of increasing the stack size),
+  // `resource` will be empty, because JS caches these values anyway, and
+  // we should avoid creating strong global references that might keep
+  // these JS resource objects alive longer than necessary.
+  if (!resource.IsEmpty()) {
+    native_execution_async_resources_.resize(offset + 1);
+    native_execution_async_resources_[offset].Reset(env()->isolate(), resource);
+  }
 }
 
 // Remember to keep this code aligned with popAsyncContext() in JS.
@@ -176,17 +194,45 @@ inline bool AsyncHooks::pop_async_context(double async_id) {
   async_id_fields_[kTriggerAsyncId] = async_ids_stack_[2 * offset + 1];
   fields_[kStackLength] = offset;
 
-  auto resources = execution_async_resources();
-  USE(resources->Delete(env()->context(), offset));
+  if (LIKELY(offset < native_execution_async_resources_.size() &&
+             !native_execution_async_resources_[offset].IsEmpty())) {
+#ifdef DEBUG
+    for (uint32_t i = offset + 1;
+         i < native_execution_async_resources_.size();
+         i++) {
+      CHECK(native_execution_async_resources_[i].IsEmpty());
+    }
+#endif
+    native_execution_async_resources_.resize(offset);
+    if (native_execution_async_resources_.size() <
+            native_execution_async_resources_.capacity() / 2 &&
+        native_execution_async_resources_.size() > 16) {
+      native_execution_async_resources_.shrink_to_fit();
+    }
+  }
+
+  if (UNLIKELY(js_execution_async_resources()->Length() > offset)) {
+    v8::HandleScope handle_scope(env()->isolate());
+    USE(js_execution_async_resources()->Set(
+        env()->context(),
+        env()->length_string(),
+        v8::Integer::NewFromUnsigned(env()->isolate(), offset)));
+  }
 
   return fields_[kStackLength] > 0;
 }
 
-// Keep in sync with clearAsyncIdStack in lib/internal/async_hooks.js.
-inline void AsyncHooks::clear_async_id_stack() {
-  auto isolate = env()->isolate();
+void AsyncHooks::clear_async_id_stack() {
+  v8::Isolate* isolate = env()->isolate();
   v8::HandleScope handle_scope(isolate);
-  execution_async_resources_.Reset(isolate, v8::Array::New(isolate));
+  if (!js_execution_async_resources_.IsEmpty()) {
+    USE(PersistentToLocal::Strong(js_execution_async_resources_)->Set(
+        env()->context(),
+        env()->length_string(),
+        v8::Integer::NewFromUnsigned(isolate, 0)));
+  }
+  native_execution_async_resources_.clear();
+  native_execution_async_resources_.shrink_to_fit();
 
   async_id_fields_[kExecutionAsyncId] = 0;
   async_id_fields_[kTriggerAsyncId] = 0;
diff --git a/src/env.h b/src/env.h
index d22b579b25ce4e..1c1c98f4f606ef 100644
--- a/src/env.h
+++ b/src/env.h
@@ -275,6 +275,7 @@ constexpr size_t kFsStatsBufferLength =
   V(issuercert_string, "issuerCertificate")                                    \
   V(kill_signal_string, "killSignal")                                          \
   V(kind_string, "kind")                                                       \
+  V(length_string, "length")                                                   \
   V(library_string, "library")                                                 \
   V(mac_string, "mac")                                                         \
   V(max_buffer_string, "maxBuffer")                                            \
@@ -642,6 +643,7 @@ class AsyncHooks : public MemoryRetainer {
     kTotals,
     kCheck,
     kStackLength,
+    kUsesExecutionAsyncResource,
     kFieldsCount,
   };
 
@@ -656,7 +658,12 @@ class AsyncHooks : public MemoryRetainer {
   inline AliasedUint32Array& fields();
   inline AliasedFloat64Array& async_id_fields();
   inline AliasedFloat64Array& async_ids_stack();
-  inline v8::Local<v8::Array> execution_async_resources();
+  inline v8::Local<v8::Array> js_execution_async_resources();
+  // Returns the native executionAsyncResource value at stack index `index`.
+  // Resources provided on the JS side are not stored on the native stack,
+  // in which case an empty `Local<>` is returned.
+  // The `js_execution_async_resources` array contains the value in that case.
+  inline v8::Local<v8::Object> native_execution_async_resource(size_t index);
 
   inline v8::Local<v8::String> provider_string(int idx);
 
@@ -664,7 +671,7 @@ class AsyncHooks : public MemoryRetainer {
   inline Environment* env();
 
   inline void push_async_context(double async_id, double trigger_async_id,
-      v8::Local<v8::Value> execution_async_resource_);
+      v8::Local<v8::Object> execution_async_resource_);
   inline bool pop_async_context(double async_id);
   inline void clear_async_id_stack();  // Used in fatal exceptions.
 
@@ -709,7 +716,8 @@ class AsyncHooks : public MemoryRetainer {
 
   void grow_async_ids_stack();
 
-  v8::Global<v8::Array> execution_async_resources_;
+  v8::Global<v8::Array> js_execution_async_resources_;
+  std::vector<v8::Global<v8::Object>> native_execution_async_resources_;
 };
 
 class ImmediateInfo : public MemoryRetainer {