Skip to content

Commit

Permalink
Merge pull request #1612 from Pauan/cache
Browse files Browse the repository at this point in the history
Initial interning implementation
  • Loading branch information
alexcrichton authored Jul 22, 2019
2 parents 029b8ff + 59af318 commit 68a1519
Show file tree
Hide file tree
Showing 12 changed files with 248 additions and 12 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ spans = ["wasm-bindgen-macro/spans"]
std = []
serde-serialize = ["serde", "serde_json", "std"]
nightly = []
enable-interning = ["std"]

# Whether or not the `#[wasm_bindgen]` macro is strict and generates an error on
# all unused attributes
Expand All @@ -38,6 +39,7 @@ xxx_debug_only_print_generated_code = ["wasm-bindgen-macro/xxx_debug_only_print_
wasm-bindgen-macro = { path = "crates/macro", version = "=0.2.48" }
serde = { version = "1.0", optional = true }
serde_json = { version = "1.0", optional = true }
cfg-if = "0.1.9"

[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
js-sys = { path = 'crates/js-sys', version = '0.3.25' }
Expand Down
2 changes: 2 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ jobs:
displayName: "Anyref test suite builds"
- script: cargo test --target wasm32-unknown-unknown --features serde-serialize
displayName: "Crate test suite (with serde)"
- script: cargo test --target wasm32-unknown-unknown --features enable-interning
displayName: "Crate test suite (with enable-interning)"
- script: cargo test --target wasm32-unknown-unknown -p no-std
displayName: "Crate test suite (no_std)"
- script: cargo test -p wasm-bindgen-futures
Expand Down
7 changes: 5 additions & 2 deletions crates/cli-support/src/descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ tys! {
BOOLEAN
FUNCTION
CLOSURE
CACHED_STRING
STRING
REF
REFMUT
Expand Down Expand Up @@ -58,6 +59,7 @@ pub enum Descriptor {
RefMut(Box<Descriptor>),
Slice(Box<Descriptor>),
Vector(Box<Descriptor>),
CachedString,
String,
Anyref,
Enum { hole: u32 },
Expand Down Expand Up @@ -127,6 +129,7 @@ impl Descriptor {
SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped))),
VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped))),
OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped))),
CACHED_STRING => Descriptor::CachedString,
STRING => Descriptor::String,
ANYREF => Descriptor::Anyref,
ENUM => Descriptor::Enum { hole: get(data) },
Expand Down Expand Up @@ -159,12 +162,12 @@ impl Descriptor {

pub fn vector_kind(&self) -> Option<VectorKind> {
let inner = match *self {
Descriptor::String => return Some(VectorKind::String),
Descriptor::String | Descriptor::CachedString => return Some(VectorKind::String),
Descriptor::Vector(ref d) => &**d,
Descriptor::Slice(ref d) => &**d,
Descriptor::Ref(ref d) => match **d {
Descriptor::Slice(ref d) => &**d,
Descriptor::String => return Some(VectorKind::String),
Descriptor::String | Descriptor::CachedString => return Some(VectorKind::String),
_ => return None,
},
Descriptor::RefMut(ref d) => match **d {
Expand Down
28 changes: 28 additions & 0 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,34 @@ impl<'a> Context<'a> {
Ok(())
}

fn expose_get_cached_string_from_wasm(&mut self) -> Result<(), Error> {
if !self.should_write_global("get_cached_string_from_wasm") {
return Ok(());
}

self.expose_get_object();
self.expose_get_string_from_wasm()?;

// This has support for both `&str` and `Option<&str>`.
//
// If `ptr` is not `0` then we know that it's a `&str` or `Some(&str)`, so we just decode it.
//
// If `ptr` is `0` then the `len` is a pointer to the cached `JsValue`, so we return that.
//
// If `ptr` and `len` are both `0` then that means it's `None`, in that case we rely upon
// the fact that `getObject(0)` is guaranteed to be `undefined`.
self.global("
function getCachedStringFromWasm(ptr, len) {
if (ptr === 0) {
return getObject(len);
} else {
return getStringFromWasm(ptr, len);
}
}
");
Ok(())
}

fn expose_get_array_js_value_from_wasm(&mut self) -> Result<(), Error> {
if !self.should_write_global("get_array_js_value_from_wasm") {
return Ok(());
Expand Down
37 changes: 37 additions & 0 deletions crates/cli-support/src/js/outgoing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,33 @@ impl<'a, 'b> Outgoing<'a, 'b> {
Ok(format!("v{}", i))
}

NonstandardOutgoing::CachedString {
offset,
length,
owned,
optional,
} => {
let ptr = self.arg(*offset);
let len = self.arg(*length);
let tmp = self.js.tmp();

if *optional {
self.js.typescript_optional("string");
} else {
self.js.typescript_required("string");
}

self.cx.expose_get_cached_string_from_wasm()?;

self.js.prelude(&format!("const v{} = getCachedStringFromWasm({}, {});", tmp, ptr, len));

if *owned {
self.prelude_free_cached_string(&ptr, &len)?;
}

Ok(format!("v{}", tmp))
}

NonstandardOutgoing::StackClosure {
a,
b,
Expand Down Expand Up @@ -408,4 +435,14 @@ impl<'a, 'b> Outgoing<'a, 'b> {
));
self.cx.require_internal_export("__wbindgen_free")
}

fn prelude_free_cached_string(&mut self, ptr: &str, len: &str) -> Result<(), Error> {
self.js.prelude(&format!(
"if ({ptr} !== 0) {{ wasm.__wbindgen_free({ptr}, {len}); }}",
ptr = ptr,
len = len,
));

self.cx.require_internal_export("__wbindgen_free")
}
}
6 changes: 3 additions & 3 deletions crates/cli-support/src/webidl/incoming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ impl IncomingBuilder {
Descriptor::RefMut(d) => self.process_ref(true, d)?,
Descriptor::Option(d) => self.process_option(d)?,

Descriptor::String | Descriptor::Vector(_) => {
Descriptor::String | Descriptor::CachedString | Descriptor::Vector(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!("unsupported argument type for calling Rust function from JS {:?}", arg)
})? ;
Expand Down Expand Up @@ -256,7 +256,7 @@ impl IncomingBuilder {
self.bindings
.push(NonstandardIncoming::BorrowedAnyref { val: expr });
}
Descriptor::String | Descriptor::Slice(_) => {
Descriptor::String | Descriptor::CachedString | Descriptor::Slice(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
"unsupported slice type for calling Rust function from JS {:?}",
Expand Down Expand Up @@ -363,7 +363,7 @@ impl IncomingBuilder {
self.webidl.push(ast::WebidlScalarType::Any);
}

Descriptor::String | Descriptor::Vector(_) => {
Descriptor::String | Descriptor::CachedString | Descriptor::Vector(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
"unsupported optional slice type for calling Rust function from JS {:?}",
Expand Down
31 changes: 31 additions & 0 deletions crates/cli-support/src/webidl/outgoing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ pub enum NonstandardOutgoing {
kind: VectorKind,
},

/// A Rust String (or &str) which might be cached, or might be `None`.
///
/// If `offset` is 0 then it is cached, and the cached JsValue's index is in `length`.
///
/// If `offset` and `length` are both 0, then it is `None`.
CachedString {
offset: u32,
length: u32,
owned: bool,
optional: bool,
},

/// A `&[u64]` or `&[i64]` is being passed to JS, and the 64-bit sizes here
/// aren't supported by WebIDL bindings yet.
View64 {
Expand Down Expand Up @@ -240,6 +252,8 @@ impl OutgoingBuilder<'_> {
Descriptor::Ref(d) => self.process_ref(false, d)?,
Descriptor::RefMut(d) => self.process_ref(true, d)?,

Descriptor::CachedString => self.cached_string(false, true),

Descriptor::Vector(_) | Descriptor::String => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
Expand Down Expand Up @@ -281,6 +295,7 @@ impl OutgoingBuilder<'_> {
self.bindings
.push(NonstandardOutgoing::BorrowedAnyref { idx });
}
Descriptor::CachedString => self.cached_string(false, false),
Descriptor::Slice(_) | Descriptor::String => {
use wasm_webidl_bindings::ast::WebidlScalarType::*;

Expand Down Expand Up @@ -422,6 +437,9 @@ impl OutgoingBuilder<'_> {
}
Descriptor::Ref(d) => self.process_option_ref(false, d)?,
Descriptor::RefMut(d) => self.process_option_ref(true, d)?,

Descriptor::CachedString => self.cached_string(true, true),

Descriptor::String | Descriptor::Vector(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
Expand Down Expand Up @@ -455,6 +473,7 @@ impl OutgoingBuilder<'_> {
self.bindings
.push(NonstandardOutgoing::BorrowedAnyref { idx });
}
Descriptor::CachedString => self.cached_string(true, false),
Descriptor::String | Descriptor::Slice(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
Expand Down Expand Up @@ -505,6 +524,18 @@ impl OutgoingBuilder<'_> {
.push(NonstandardOutgoing::Standard(binding.into()));
}

fn cached_string(&mut self, optional: bool, owned: bool) {
let offset = self.push_wasm(ValType::I32);
let length = self.push_wasm(ValType::I32);
self.webidl.push(ast::WebidlScalarType::DomString);
self.bindings.push(NonstandardOutgoing::CachedString {
offset,
length,
owned,
optional,
})
}

fn option_native(&mut self, signed: bool, ty: ValType) {
let present = self.push_wasm(ValType::I32);
let val = self.push_wasm(ty);
Expand Down
82 changes: 82 additions & 0 deletions src/cache/intern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use cfg_if::cfg_if;


cfg_if! {
if #[cfg(feature = "enable-interning")] {
use std::thread_local;
use std::string::String;
use std::borrow::ToOwned;
use std::cell::RefCell;
use std::collections::HashMap;
use crate::JsValue;

struct Cache {
entries: RefCell<HashMap<String, JsValue>>,
}

thread_local! {
static CACHE: Cache = Cache {
entries: RefCell::new(HashMap::new()),
};
}

/// This returns the raw index of the cached JsValue, so you must take care
/// so that you don't use it after it is freed.
pub(crate) fn unsafe_get_str(s: &str) -> Option<u32> {
CACHE.with(|cache| {
let cache = cache.entries.borrow();

cache.get(s).map(|x| x.idx)
})
}

fn intern_str(key: &str) {
CACHE.with(|cache| {
let mut cache = cache.entries.borrow_mut();

// Can't use `entry` because `entry` requires a `String`
if !cache.contains_key(key) {
cache.insert(key.to_owned(), JsValue::from(key));
}
})
}
}
}


/// Interns Rust strings so that it's much faster to send them to JS.
///
/// Sending strings from Rust to JS is slow, because it has to do a full `O(n)`
/// copy and *also* encode from UTF-8 to UTF-16. This must be done every single
/// time a string is sent to JS.
///
/// If you are sending the same string multiple times, you can call this `intern`
/// function, which simply returns its argument unchanged:
///
/// ```rust
/// # use wasm_bindgen::intern;
/// intern("foo") // returns "foo"
/// # ;
/// ```
///
/// However, if you enable the `"enable-interning"` feature for wasm-bindgen,
/// then it will add the string into an internal cache.
///
/// When you send that cached string to JS, it will look it up in the cache,
/// which completely avoids the `O(n)` copy and encoding. This has a significant
/// speed boost (as high as 783%)!
///
/// However, there is a small cost to this caching, so you shouldn't cache every
/// string. Only cache strings which have a high likelihood of being sent
/// to JS multiple times.
///
/// Also, keep in mind that this function is a *performance hint*: it's not
/// *guaranteed* that the string will be cached, and the caching strategy
/// might change at any time, so don't rely upon it.
#[inline]
pub fn intern(s: &str) -> &str {
#[cfg(feature = "enable-interning")]
intern_str(s);

s
}
1 change: 1 addition & 0 deletions src/cache/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod intern;
31 changes: 27 additions & 4 deletions src/convert/slices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::prelude::v1::*;
use core::slice;
use core::str;

use cfg_if::cfg_if;
use crate::convert::OptionIntoWasmAbi;
use crate::convert::{FromWasmAbi, IntoWasmAbi, RefFromWasmAbi, RefMutFromWasmAbi, WasmAbi};

Expand Down Expand Up @@ -123,6 +124,24 @@ vectors! {
u8 i8 u16 i16 u32 i32 u64 i64 usize isize f32 f64
}


cfg_if! {
if #[cfg(feature = "enable-interning")] {
#[inline]
fn unsafe_get_cached_str(x: &str) -> Option<WasmSlice> {
// This uses 0 for the ptr as an indication that it is a JsValue and not a str.
crate::cache::intern::unsafe_get_str(x).map(|x| WasmSlice { ptr: 0, len: x })
}

} else {
#[inline]
fn unsafe_get_cached_str(_x: &str) -> Option<WasmSlice> {
None
}
}
}


if_std! {
impl<T> IntoWasmAbi for Vec<T> where Box<[T]>: IntoWasmAbi<Abi = WasmSlice> {
type Abi = <Box<[T]> as IntoWasmAbi>::Abi;
Expand Down Expand Up @@ -153,12 +172,14 @@ if_std! {

#[inline]
fn into_abi(self) -> Self::Abi {
self.into_bytes().into_abi()
// This is safe because the JsValue is immediately looked up in the heap and
// then returned, so use-after-free cannot occur.
unsafe_get_cached_str(&self).unwrap_or_else(|| self.into_bytes().into_abi())
}
}

impl OptionIntoWasmAbi for String {
fn none() -> WasmSlice { null_slice() }
fn none() -> Self::Abi { null_slice() }
}

impl FromWasmAbi for String {
Expand All @@ -180,12 +201,14 @@ impl<'a> IntoWasmAbi for &'a str {

#[inline]
fn into_abi(self) -> Self::Abi {
self.as_bytes().into_abi()
// This is safe because the JsValue is immediately looked up in the heap and
// then returned, so use-after-free cannot occur.
unsafe_get_cached_str(self).unwrap_or_else(|| self.as_bytes().into_abi())
}
}

impl<'a> OptionIntoWasmAbi for &'a str {
fn none() -> WasmSlice {
fn none() -> Self::Abi {
null_slice()
}
}
Expand Down
Loading

0 comments on commit 68a1519

Please sign in to comment.