From 745b16e3d29639b9f0beecfefbd411c5a3f13a86 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 20 Feb 2019 08:39:46 -0800 Subject: [PATCH] Add support for `TextEncoder#encodeInto` This commit adds support for the recently implemented standard of [`TextEncoder#encodeInto`][standard]. This new function is a "bring your own buffer" style function where we can avoid an intermediate allocation and copy by encoding strings directly into wasm's memory. Currently we feature-detect whether `encodeInto` exists as it is only implemented in recent browsers and not in all browsers. Additionally this commit emits the binding using `encodeInto` by default, but this requires `realloc` functionality to be exposed by the wasm module. Measured locally an empty binary which takes `&str` previously took 7.6k, but after this commit takes 8.7k due to the extra code needed for `realloc`. [standard]: https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto Closes #1172 --- crates/cli-support/src/js/mod.rs | 68 +++++++++++++++++++++++++++--- crates/cli-support/src/lib.rs | 13 ++++++ crates/cli/src/bin/wasm-bindgen.rs | 13 +++++- src/lib.rs | 23 +++++++++- 4 files changed, 110 insertions(+), 7 deletions(-) diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index 0a7fd60489b..68d11086f62 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -1,6 +1,6 @@ use crate::decode; use crate::descriptor::{Descriptor, VectorKind}; -use crate::Bindgen; +use crate::{Bindgen, EncodeInto}; use failure::{bail, Error, ResultExt}; use std::collections::{HashMap, HashSet}; use walrus::{MemoryId, Module}; @@ -1168,19 +1168,77 @@ impl<'a> Context<'a> { } else { "" }; - self.global(&format!( + + // The first implementation we have for this is to use + // `TextEncoder#encode` which has been around for quite some time. + let use_encode = format!( " - function passStringToWasm(arg) {{ {} const buf = cachedTextEncoder.encode(arg); const ptr = wasm.__wbindgen_malloc(buf.length); getUint8Memory().set(buf, ptr); WASM_VECTOR_LEN = buf.length; return ptr; - }} ", debug - )); + ); + + // Another possibility is to use `TextEncoder#encodeInto` which is much + // newer and isn't implemented everywhere yet. It's more efficient, + // however, becaues it allows us to elide an intermediate allocation. + let use_encode_into = format!( + " + {} + let size = arg.length; + let ptr = wasm.__wbindgen_malloc(size); + let writeOffset = 0; + while (true) {{ + const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size); + const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view); + arg = arg.substring(read); + writeOffset += written; + if (arg.length === 0) {{ + break; + }} + ptr = wasm.__wbindgen_realloc(ptr, size, size * 2); + size *= 2; + }} + WASM_VECTOR_LEN = writeOffset; + return ptr; + ", + debug + ); + + match self.config.encode_into { + EncodeInto::Never => { + self.global(&format!( + "function passStringToWasm(arg) {{ {} }}", + use_encode, + )); + } + EncodeInto::Always => { + self.require_internal_export("__wbindgen_realloc")?; + self.global(&format!( + "function passStringToWasm(arg) {{ {} }}", + use_encode_into, + )); + } + EncodeInto::Test => { + self.require_internal_export("__wbindgen_realloc")?; + self.global(&format!( + " + let passStringToWasm; + if (typeof cachedTextEncoder.encodeInto === 'function') {{ + passStringToWasm = function(arg) {{ {} }}; + }} else {{ + passStringToWasm = function(arg) {{ {} }}; + }} + ", + use_encode_into, + use_encode, + )); + } + } Ok(()) } diff --git a/crates/cli-support/src/lib.rs b/crates/cli-support/src/lib.rs index cc8061487af..6518c6fb0a3 100755 --- a/crates/cli-support/src/lib.rs +++ b/crates/cli-support/src/lib.rs @@ -36,6 +36,7 @@ pub struct Bindgen { // module to be "ready to be instantiated on any thread" threads: Option, anyref: bool, + encode_into: EncodeInto, } enum Input { @@ -44,6 +45,12 @@ enum Input { None, } +pub enum EncodeInto { + Test, + Always, + Never, +} + impl Bindgen { pub fn new() -> Bindgen { Bindgen { @@ -64,6 +71,7 @@ impl Bindgen { weak_refs: env::var("WASM_BINDGEN_WEAKREF").is_ok(), threads: threads_config(), anyref: env::var("WASM_BINDGEN_ANYREF").is_ok(), + encode_into: EncodeInto::Test, } } @@ -144,6 +152,11 @@ impl Bindgen { self } + pub fn encode_into(&mut self, mode: EncodeInto) -> &mut Bindgen { + self.encode_into = mode; + self + } + pub fn generate>(&mut self, path: P) -> Result<(), Error> { self._generate(path.as_ref()) } diff --git a/crates/cli/src/bin/wasm-bindgen.rs b/crates/cli/src/bin/wasm-bindgen.rs index e83cebd0fd4..be5e31d0ecf 100644 --- a/crates/cli/src/bin/wasm-bindgen.rs +++ b/crates/cli/src/bin/wasm-bindgen.rs @@ -3,7 +3,7 @@ use failure::{bail, Error}; use serde::Deserialize; use std::path::PathBuf; use std::process; -use wasm_bindgen_cli_support::Bindgen; +use wasm_bindgen_cli_support::{Bindgen, EncodeInto}; // no need for jemalloc bloat in this binary (and we don't need speed) #[global_allocator] @@ -32,6 +32,8 @@ Options: --keep-debug Keep debug sections in wasm files --remove-name-section Remove the debugging `name` section of the file --remove-producers-section Remove the telemetry `producers` section + --encode-into MODE Whether or not to use TextEncoder#encodeInto, + valid values are [test, always, never] -V --version Print the version number of wasm-bindgen "; @@ -51,6 +53,7 @@ struct Args { flag_remove_name_section: bool, flag_remove_producers_section: bool, flag_keep_debug: bool, + flag_encode_into: Option, arg_input: Option, } @@ -100,6 +103,14 @@ fn rmain(args: &Args) -> Result<(), Error> { if let Some(ref name) = args.flag_out_name { b.out_name(name); } + if let Some(mode) = &args.flag_encode_into { + match mode.as_str() { + "test" => b.encode_into(EncodeInto::Test), + "always" => b.encode_into(EncodeInto::Always), + "never" => b.encode_into(EncodeInto::Never), + s => bail!("invalid encode-into mode: `{}`", s), + }; + } let out_dir = match args.flag_out_dir { Some(ref p) => p, diff --git a/src/lib.rs b/src/lib.rs index 14a22b8d999..703233b2672 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -892,7 +892,7 @@ pub mod __rt { } if_std! { - use std::alloc::{alloc, dealloc, Layout}; + use std::alloc::{alloc, dealloc, realloc, Layout}; use std::mem; #[no_mangle] @@ -911,6 +911,27 @@ pub mod __rt { } } + malloc_failure(); + } + + #[no_mangle] + pub extern "C" fn __wbindgen_realloc(ptr: *mut u8, old_size: usize, new_size: usize) -> *mut u8 { + let align = mem::align_of::(); + debug_assert!(old_size > 0); + debug_assert!(new_size > 0); + if let Ok(layout) = Layout::from_size_align(old_size, align) { + unsafe { + let ptr = realloc(ptr, layout, new_size); + if !ptr.is_null() { + return ptr + } + } + } + malloc_failure(); + } + + #[cold] + fn malloc_failure() -> ! { if cfg!(debug_assertions) { super::throw_str("invalid malloc request") } else {