feat(ops): fast async(lazy) and async(deferred) (denoland#269)

This doesn't move the needle much on async ops that are submitted and immediately awaited upon, but it does offer slighly better latency when taking a promise from an async op and then waiting on it after performing other work. One of the big refactorings in this PR is ensuring that async and sync fast ops share the same code path. The same refactoring can be applied to slow ops, but that would be slightly too much to tackle in this PR.
hardfist · Oct 23, 2023 · dea2935 · dea2935
1 parent c16caf0
commit dea2935
Show file tree

Hide file tree

Showing 30 changed files with 1,290 additions and 290 deletions.
diff --git a/core/Cargo.toml b/core/Cargo.toml
@@ -56,3 +56,9 @@ name = "ops_sync"
 path = "benches/ops/sync.rs"
 harness = false
 required-features = ["unsafe_runtime_options"]
+
+[[bench]]
+name = "ops_async"
+path = "benches/ops/async.rs"
+harness = false
+required-features = ["unsafe_runtime_options"]
diff --git a/core/benches/ops/async.rs b/core/benches/ops/async.rs
@@ -0,0 +1,223 @@
+// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
+use bencher::*;
+use deno_core::error::generic_error;
+use deno_core::*;
+use std::ffi::c_void;
+
+deno_core::extension!(
+  testing,
+  ops = [
+    op_void,
+    op_make_external,
+    op_async_void,
+    op_async_void_lazy,
+    op_async_void_lazy_nofast,
+    op_async_void_deferred,
+    op_async_void_deferred_nofast,
+    op_async_void_deferred_return,
+    op_async_yield,
+    op_async_yield_lazy,
+    op_async_yield_lazy_nofast,
+    op_async_yield_deferred,
+    op_async_yield_deferred_nofast,
+  ],
+);
+
+#[op2(fast)]
+pub fn op_void() {}
+
+#[op2(fast)]
+pub fn op_make_external() -> *const c_void {
+  std::ptr::null()
+}
+
+#[op2(async)]
+pub async fn op_async_void() {}
+
+#[op2(async)]
+pub async fn op_async_yield() {
+  tokio::task::yield_now().await
+}
+
+#[op2(async(lazy), fast)]
+pub async fn op_async_yield_lazy() {
+  tokio::task::yield_now().await
+}
+
+#[op2(async(lazy), nofast)]
+pub async fn op_async_yield_lazy_nofast() {
+  tokio::task::yield_now().await
+}
+
+#[op2(async(deferred), fast)]
+pub async fn op_async_yield_deferred() {
+  tokio::task::yield_now().await
+}
+
+#[op2(async(deferred), nofast)]
+pub async fn op_async_yield_deferred_nofast() {
+  tokio::task::yield_now().await
+}
+
+#[op2(async(lazy), fast)]
+pub async fn op_async_void_lazy() {}
+
+#[op2(async(lazy), nofast)]
+pub async fn op_async_void_lazy_nofast() {}
+
+#[op2(async(deferred), fast)]
+pub async fn op_async_void_deferred_return() -> u32 {
+  1
+}
+
+#[op2(async(deferred), fast)]
+pub async fn op_async_void_deferred() {}
+
+#[op2(async(deferred), nofast)]
+pub async fn op_async_void_deferred_nofast() {}
+
+fn bench_op(
+  b: &mut Bencher,
+  count: usize,
+  op: &str,
+  arg_count: usize,
+  call: &str,
+) {
+  #[cfg(not(feature = "unsafe_runtime_options"))]
+  unreachable!(
+    "This benchmark must be run with --features=unsafe_runtime_options"
+  );
+
+  let tokio = tokio::runtime::Builder::new_current_thread()
+    .build()
+    .unwrap();
+  let mut runtime = JsRuntime::new(RuntimeOptions {
+    extensions: vec![testing::init_ops_and_esm()],
+    // We need to feature gate this here to prevent IDE errors
+    #[cfg(feature = "unsafe_runtime_options")]
+    unsafe_expose_natives_and_gc: true,
+    ..Default::default()
+  });
+  let err_mapper =
+    |err| generic_error(format!("{op} test failed ({call}): {err:?}"));
+
+  let args = (0..arg_count)
+    .map(|n| format!("arg{n}"))
+    .collect::<Vec<_>>()
+    .join(", ");
+
+  let mut harness = include_str!("async_harness.js").to_owned();
+  for (key, value) in [
+    ("PERCENT", "%"),
+    ("CALL", call),
+    ("COUNT", &format!("{count}")),
+    ("ARGS", &args.to_string()),
+    ("OP", op),
+  ] {
+    harness = harness.replace(&format!("__{key}__"), value);
+  }
+
+  // Prime the optimizer
+  runtime
+    .execute_script("", FastString::Owned(harness.into()))
+    .map_err(err_mapper)
+    .unwrap();
+  let guard = tokio.enter();
+  let run = runtime.execute_script("", ascii_str!("run()")).unwrap();
+  let bench = tokio.block_on(runtime.resolve_value(run)).unwrap();
+  let mut scope = runtime.handle_scope();
+  let bench: v8::Local<v8::Function> =
+    v8::Local::new(&mut scope, bench).try_into().unwrap();
+  let bench = v8::Global::new(&mut scope, bench);
+  drop(scope);
+  drop(guard);
+  b.iter(move || {
+    tokio.block_on(async {
+      let guard = tokio.enter();
+      runtime.call_and_await(&bench).await.unwrap();
+      drop(guard);
+    });
+  });
+}
+
+const BENCH_COUNT: usize = 1000;
+
+/// Tests the overhead of execute_script.
+fn baseline(b: &mut Bencher) {
+  bench_op(b, BENCH_COUNT, "op_async_void", 0, "accum += __index__;");
+}
+
+/// Tests the overhead of execute_script with a promise.
+fn baseline_promise(b: &mut Bencher) {
+  bench_op(
+    b,
+    BENCH_COUNT,
+    "op_async_void",
+    0,
+    "await Promise.resolve(null);",
+  );
+}
+
+/// Tests the overhead of execute_script, but also returns a value so we can make sure things are
+/// working.
+fn bench_op_async_void_deferred_return(b: &mut Bencher) {
+  bench_op(
+    b,
+    BENCH_COUNT,
+    "op_async_void_deferred_return",
+    0,
+    "accum += await op_async_void_deferred_return();",
+  );
+}
+
+macro_rules! bench_void {
+  ($bench:ident, $op:ident) => {
+    fn $bench(b: &mut Bencher) {
+      bench_op(
+        b,
+        BENCH_COUNT,
+        stringify!($op),
+        0,
+        concat!("await ", stringify!($op), "()"),
+      );
+    }
+  };
+}
+
+bench_void!(baseline_sync, op_void);
+bench_void!(bench_op_async_yield, op_async_yield);
+bench_void!(bench_op_async_yield_lazy, op_async_yield_lazy);
+bench_void!(bench_op_async_yield_lazy_nofast, op_async_yield_lazy_nofast);
+bench_void!(bench_op_async_yield_deferred, op_async_yield_deferred);
+bench_void!(
+  bench_op_async_yield_deferred_nofast,
+  op_async_yield_deferred_nofast
+);
+bench_void!(bench_op_async_void, op_async_void);
+bench_void!(bench_op_async_void_lazy, op_async_void_lazy);
+bench_void!(bench_op_async_void_lazy_nofast, op_async_void_lazy_nofast);
+bench_void!(bench_op_async_void_deferred, op_async_void_deferred);
+bench_void!(
+  bench_op_async_void_deferred_nofast,
+  op_async_void_deferred_nofast
+);
+
+benchmark_group!(
+  benches,
+  baseline,
+  baseline_promise,
+  baseline_sync,
+  bench_op_async_yield,
+  bench_op_async_yield_lazy,
+  bench_op_async_yield_lazy_nofast,
+  bench_op_async_yield_deferred,
+  bench_op_async_yield_deferred_nofast,
+  bench_op_async_void,
+  bench_op_async_void_lazy,
+  bench_op_async_void_lazy_nofast,
+  bench_op_async_void_deferred,
+  bench_op_async_void_deferred_nofast,
+  bench_op_async_void_deferred_return,
+);
+
+benchmark_main!(benches);
diff --git a/core/benches/ops/async_harness.js b/core/benches/ops/async_harness.js
@@ -0,0 +1,41 @@
+// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
+// deno-lint-ignore-file no-unused-vars, prefer-const, require-await
+
+// This harness is dynamically generated for each individual bench run.
+async function run() {
+  const LARGE_STRING_1000000 = "*".repeat(1000000);
+  const LARGE_STRING_1000 = "*".repeat(1000);
+  const LARGE_STRING_UTF8_1000000 = "\u1000".repeat(1000000);
+  const LARGE_STRING_UTF8_1000 = "\u1000".repeat(1000);
+  const BUFFER = new Uint8Array(1024);
+  const ARRAYBUFFER = new ArrayBuffer(1024);
+  const { __OP__: op } = Deno.core.ensureFastOps();
+  const { op_make_external } = Deno.core.ensureFastOps();
+  const EXTERNAL = op_make_external();
+
+  // TODO(mmastrac): Because of current v8 limitations, these ops are not always fast unless we do this.
+  // The reason is not entirely clear.
+  function __OP__(__ARGS__) {
+    return op(__ARGS__);
+  }
+
+  let accum = 0;
+  let __index__ = 0;
+  __PERCENT__PrepareFunctionForOptimization(__OP__);
+  __CALL__;
+  __PERCENT__OptimizeFunctionOnNextCall(__OP__);
+  __CALL__;
+
+  async function bench() {
+    let accum = 0;
+    for (let __index__ = 0; __index__ < __COUNT__; __index__++) __CALL__;
+    return accum;
+  }
+
+  __PERCENT__PrepareFunctionForOptimization(bench);
+  await bench();
+  __PERCENT__OptimizeFunctionOnNextCall(bench);
+  await bench();
+
+  return bench;
+}