From 485f90e064d147fc8bce45983bd8e32c894cf9a7 Mon Sep 17 00:00:00 2001 From: Henri Verroken Date: Mon, 21 Feb 2022 05:47:51 -0800 Subject: [PATCH] Replace im_rc::OrdSet with rpds::HashTrieSet Summary: the `im`/`im-rc` crate contains logic bugs (we are hitting problems outlined in https://github.com/bodil/im-rs/issues/124 sporadically) and appears unmaintained. This diff switches to the `rpds` crate which offers the same kind of functionality and appears to be better maintained. Reviewed By: CatherineGasnier Differential Revision: D34339520 fbshipit-source-id: 5fd5eab468f46d0fcc5294d3d7142e1c8541fbd5 --- hphp/hack/Cargo.lock | 77 +++----- .../src/depgraph/cargo/depgraph/Cargo.toml | 2 +- hphp/hack/src/depgraph/depgraph/reader.rs | 8 +- hphp/hack/src/deps/cargo/deps_rust/Cargo.toml | 2 +- hphp/hack/src/deps/deps_rust/typing_deps.rs | 173 +++++++++++++----- hphp/hack/src/deps/typing_deps.ml | 2 +- 6 files changed, 157 insertions(+), 107 deletions(-) diff --git a/hphp/hack/Cargo.lock b/hphp/hack/Cargo.lock index db49fff8f87d8a..15f93bb22efe85 100644 --- a/hphp/hack/Cargo.lock +++ b/hphp/hack/Cargo.lock @@ -81,6 +81,15 @@ version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b26702f315f53b6071259e15dd9d64528213b44d61de1ec926eca7715d62203" +[[package]] +name = "archery" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02" +dependencies = [ + "static_assertions", +] + [[package]] name = "arena_collections" version = "0.0.0" @@ -247,15 +256,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitmaps" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" -dependencies = [ - "typenum", -] - [[package]] name = "block-buffer" version = "0.7.3" @@ -826,9 +826,9 @@ dependencies = [ name = "depgraph" version = "0.0.0" dependencies = [ - "im-rc", "memmap", "ocamlrep", + "rpds", ] [[package]] @@ -847,12 +847,12 @@ name = "deps_rust" version = "0.0.0" dependencies = [ "depgraph", - "im-rc", "ocamlrep", "ocamlrep_custom", "ocamlrep_ocamlpool", "once_cell", "oxidized", + "rpds", "typing_deps_hash", ] @@ -2099,21 +2099,6 @@ dependencies = [ "quick-error", ] -[[package]] -name = "im-rc" -version = "14.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "303f7e6256d546e01979071417432425f15c1891fb309a5f2d724ee908fabd6e" -dependencies = [ - "bitmaps", - "rand_core 0.5.1", - "rand_xoshiro", - "serde", - "sized-chunks", - "typenum", - "version_check", -] - [[package]] name = "indexmap" version = "1.8.0" @@ -2950,7 +2935,7 @@ checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" dependencies = [ "libc", "rand_chacha", - "rand_core 0.6.3", + "rand_core", "rand_hc", ] @@ -2961,15 +2946,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.3", + "rand_core", ] -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" - [[package]] name = "rand_core" version = "0.6.3" @@ -2985,16 +2964,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" dependencies = [ - "rand_core 0.6.3", -] - -[[package]] -name = "rand_xoshiro" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9fcdd2e881d02f1d9390ae47ad8e5696a9e4be7b547a1da2afbc61973217004" -dependencies = [ - "rand_core 0.5.1", + "rand_core", ] [[package]] @@ -3096,6 +3066,15 @@ dependencies = [ "oxidized", ] +[[package]] +name = "rpds" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ef5140bcb576bfd6d56cd2de709a7d17851ac1f3805e67fe9d99e42a11821f" +dependencies = [ + "archery", +] + [[package]] name = "runtime" version = "0.0.0" @@ -3335,16 +3314,6 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "sized-chunks" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59044ea371ad781ff976f7b06480b9f0180e834eda94114f2afb4afc12b7718" -dependencies = [ - "bitmaps", - "typenum", -] - [[package]] name = "smallvec" version = "1.7.0" diff --git a/hphp/hack/src/depgraph/cargo/depgraph/Cargo.toml b/hphp/hack/src/depgraph/cargo/depgraph/Cargo.toml index 928027d303d266..9fa9db42846d95 100644 --- a/hphp/hack/src/depgraph/cargo/depgraph/Cargo.toml +++ b/hphp/hack/src/depgraph/cargo/depgraph/Cargo.toml @@ -9,6 +9,6 @@ path = "../../depgraph/lib.rs" crate-type = ["lib", "staticlib"] [dependencies] -im-rc = { version = "14.2", features = ["serde"] } memmap = "0.7" ocamlrep = { path = "../../../ocamlrep" } +rpds = "0.11.0" diff --git a/hphp/hack/src/depgraph/depgraph/reader.rs b/hphp/hack/src/depgraph/depgraph/reader.rs index 0a8b3bea22fa9e..47f22fff9f3576 100644 --- a/hphp/hack/src/depgraph/depgraph/reader.rs +++ b/hphp/hack/src/depgraph/depgraph/reader.rs @@ -8,7 +8,7 @@ pub use crate::dep::Dep; use std::ops::Deref; -use im_rc::OrdSet; +use rpds::HashTrieSet; /// Use a `DepGraphOpener` to initialize a dependency graph from a file. /// @@ -153,16 +153,16 @@ impl<'bytes> DepGraph<'bytes> { } /// Add the direct typing dependencies for one dependency. - pub fn add_typing_deps_for_dep(&self, acc: &mut OrdSet, dep: Dep) { + pub fn add_typing_deps_for_dep(&self, acc: &mut HashTrieSet, dep: Dep) { if let Some(dept_hash_list) = self.hash_list_for(dep) { for dept in self.hash_list_hashes(dept_hash_list) { - acc.insert(dept); + acc.insert_mut(dept); } } } /// Query the direct typing dependencies for the given set of dependencies. - pub fn query_typing_deps_multi(&self, deps: &OrdSet) -> OrdSet { + pub fn query_typing_deps_multi(&self, deps: &HashTrieSet) -> HashTrieSet { let mut acc = deps.clone(); for dep in deps { self.add_typing_deps_for_dep(&mut acc, *dep); diff --git a/hphp/hack/src/deps/cargo/deps_rust/Cargo.toml b/hphp/hack/src/deps/cargo/deps_rust/Cargo.toml index 1f930362bc6a99..7a156a51a0505f 100644 --- a/hphp/hack/src/deps/cargo/deps_rust/Cargo.toml +++ b/hphp/hack/src/deps/cargo/deps_rust/Cargo.toml @@ -10,10 +10,10 @@ crate-type = ["lib", "staticlib"] [dependencies] depgraph = { path = "../../../depgraph/cargo/depgraph" } -im-rc = { version = "14.2", features = ["serde"] } ocamlrep = { path = "../../../ocamlrep" } ocamlrep_custom = { path = "../../../ocamlrep_custom" } ocamlrep_ocamlpool = { path = "../../../ocamlrep_ocamlpool" } once_cell = "1.8" oxidized = { path = "../../../oxidized" } +rpds = "0.11.0" typing_deps_hash = { path = "../typing_deps_hash" } diff --git a/hphp/hack/src/deps/deps_rust/typing_deps.rs b/hphp/hack/src/deps/deps_rust/typing_deps.rs index 484bb72513a8b7..ae9523b692f040 100644 --- a/hphp/hack/src/deps/deps_rust/typing_deps.rs +++ b/hphp/hack/src/deps/deps_rust/typing_deps.rs @@ -6,12 +6,12 @@ #![cfg_attr(use_unstable_features, feature(test))] use depgraph::reader::{Dep, DepGraph, DepGraphOpener}; -use im_rc::OrdSet; use ocamlrep::{FromError, FromOcamlRep, Value}; use ocamlrep_custom::{caml_serialize_default_impls, CamlSerialize, Custom}; use ocamlrep_ocamlpool::ocaml_ffi; use once_cell::sync::OnceCell; use oxidized::typing_deps_mode::TypingDepsMode; +use rpds::HashTrieSet; use std::cell::RefCell; use std::collections::{HashMap, HashSet, VecDeque}; use std::ffi::OsString; @@ -442,18 +442,18 @@ unsafe extern "C" fn hash2_ocaml( /// Rust set of dependencies that can be transferred from /// OCaml to Rust memory. #[derive(Debug, Eq, PartialEq)] -pub struct DepSet(OrdSet); +pub struct DepSet(HashTrieSet); impl std::ops::Deref for DepSet { - type Target = OrdSet; + type Target = HashTrieSet; - fn deref(&self) -> &OrdSet { + fn deref(&self) -> &HashTrieSet { &self.0 } } -impl From> for DepSet { - fn from(x: OrdSet) -> Self { +impl From> for DepSet { + fn from(x: HashTrieSet) -> Self { Self(x) } } @@ -462,7 +462,7 @@ impl CamlSerialize for DepSet { caml_serialize_default_impls!(); fn serialize(&self) -> Vec { - let num_elems = self.len(); + let num_elems = self.size(); let mut buf = Vec::with_capacity(std::mem::size_of::() * num_elems); for &x in self.iter() { let x: u64 = x.into(); @@ -476,17 +476,83 @@ impl CamlSerialize for DepSet { let num_elems = data.len() / U64_SIZE; let max_index = num_elems * U64_SIZE; - let mut s: OrdSet = OrdSet::new(); + let mut s: HashTrieSet = HashTrieSet::new(); let mut index = 0; while index < max_index { let x = u64::from_le_bytes(data[index..index + U64_SIZE].try_into().unwrap()); - s.insert(Dep::new(x)); + s.insert_mut(Dep::new(x)); index += U64_SIZE; } s.into() } } +impl DepSet { + /// Returns the union of two sets. + /// + /// The underlying data structure does not implement union. So let's + /// implement it here. + pub fn union(&self, other: &Self) -> Self { + // `HashTrieSet`'s insert is O(1) on average, O(n) worst-case, so let's + // make sure we loop over the smaller set. + // + // Note that the sizes of the arguments are expected to be + // very skewed. + let (bigger, smaller) = if self.size() > other.size() { + (self, other) + } else { + (other, self) + }; + let mut bigger = bigger.0.clone(); + for dep in smaller.iter() { + bigger.insert_mut(*dep); + } + bigger.into() + } + + /// Returns the intersection of two sets. + /// + /// The underlying data structure does not implement intersection. So let's + /// implement it here. + pub fn intersect(&self, other: &Self) -> Self { + // Let's make sure we loop over the smaller set. + let (bigger, smaller) = if self.size() > other.size() { + (self, other) + } else { + (other, self) + }; + let mut result = HashTrieSet::new(); + for dep in smaller.iter() { + if bigger.contains(dep) { + result.insert_mut(*dep); + } + } + result.into() + } + + /// Returns the difference of two sets, i.e. all elements in the first + /// set but not in the second set. + /// + /// The underlying data structure does not implement intersection. So let's + /// implement it here. + pub fn difference(&self, other: &Self) -> Self { + let mut result = self.0.clone(); + // Let's make sure we loop over the smaller set. + if self.size() < other.size() { + for dep in self.iter() { + if other.contains(dep) { + result.remove_mut(dep); + } + } + } else { + for dep in other.iter() { + result.remove_mut(dep); + } + } + result.into() + } +} + /// Rust set of visited hashes #[derive(Debug)] pub struct VisitedSet(RefCell>); @@ -533,17 +599,21 @@ ocaml_ffi! { } fn hh_custom_dep_graph_get_ideps_from_hash(mode: RawTypingDepsMode, dep: Dep) -> Custom { - let mut deps = OrdSet::new(); + let mut deps = HashTrieSet::new(); DepGraphDelta::with(|delta| { if let Some(delta_deps) = delta.get(dep) { - deps.extend(delta_deps.iter().copied()); + for delta_dep in delta_deps { + deps.insert_mut(*delta_dep) + } } }); // Safety: we don't call into OCaml again, so mode will remain valid. unsafe { UnsafeDepGraph::with_default(mode, (), |g| { if let Some(hash_list) = g.hash_list_for(dep) { - deps.extend(g.hash_list_hashes(hash_list)); + for hash in g.hash_list_hashes(hash_list) { + deps.insert_mut(hash); + } } }); } @@ -562,7 +632,9 @@ ocaml_ffi! { DepGraphDelta::with(|delta| { for dep in query.iter() { if let Some(depies) = delta.get(*dep) { - s.extend(depies.iter().copied()); + for depy in depies { + s.insert_mut(*depy); + } } } }); @@ -713,7 +785,7 @@ unsafe fn get_extend_deps_visit( visited: &mut HashSet, queue: &mut VecDeque, source_class: Dep, - acc: &mut OrdSet, + acc: &mut HashTrieSet, ) { if !visited.insert(source_class) { return; @@ -724,7 +796,8 @@ unsafe fn get_extend_deps_visit( }; let mut handle_extends_dep = |dep: Dep| { if dep.is_class() { - if acc.insert(dep).is_none() { + if !acc.contains(&dep) { + acc.insert_mut(dep); queue.push_back(dep); } } @@ -749,50 +822,31 @@ ocaml_ffi! { } fn hh_dep_set_make() -> Custom { - Custom::from(OrdSet::new().into()) + Custom::from(HashTrieSet::new().into()) } fn hh_dep_set_singleton(dep: Dep) -> Custom { - let mut s = OrdSet::new(); - s.insert(dep); + let mut s = HashTrieSet::new(); + s.insert_mut(dep); Custom::from(s.into()) } fn hh_dep_set_add(s: Custom, dep: Dep) -> Custom { let mut s = s.clone(); - s.insert(dep); + s.insert_mut(dep); Custom::from(s.into()) } fn hh_dep_set_union(s1: Custom, s2: Custom) -> Custom { - // OrdSet's implementation of union is `O(|rhs| * log |lhs| )`. This is - // in contrast to OCaml's `O(|lhs| + |rhs|)`, and poses a problem when - // both `|lhs|` and `|rhs|` are large. - // - // However, it seems that for our purposes, most often either `|s1|` or `|s2|` - // is very small. - let s1_len = s1.len(); - let s2_len = s2.len(); - let (left, right) = if s1_len > s2_len { - (s1, s2) - } else { - (s2, s1) - }; - let left = left.clone(); - let right = right.clone(); - Custom::from(left.union(right).into()) + Custom::from(s1.union(&s2)) } fn hh_dep_set_inter(s1: Custom, s2: Custom) -> Custom { - let s1 = s1.clone(); - let s2 = s2.clone(); - Custom::from(s1.intersection(s2).into()) + Custom::from(s1.intersect(&s2)) } fn hh_dep_set_diff(s1: Custom, s2: Custom) -> Custom { - let s1 = s1.clone(); - let s2 = s2.clone(); - Custom::from(s1.difference(s2).into()) + Custom::from(s1.difference(&s2)) } fn hh_dep_set_mem(s: Custom, dep: Dep) -> bool { @@ -804,7 +858,7 @@ ocaml_ffi! { } fn hh_dep_set_cardinal(s: Custom) -> usize { - s.len() + s.size() } fn hh_dep_set_is_empty(s: Custom) -> bool { @@ -812,7 +866,7 @@ ocaml_ffi! { } fn hh_dep_set_of_list(xs: Vec) -> Custom { - Custom::from(OrdSet::from(&xs).into()) + Custom::from(HashTrieSet::from_iter(xs).into()) } } @@ -884,9 +938,9 @@ mod tests { #[test] fn test_dep_set_serialize() { - let mut x: OrdSet = OrdSet::new(); - x.insert(Dep::new(1)); - x.insert(Dep::new(2)); + let mut x: HashTrieSet = HashTrieSet::new(); + x.insert_mut(Dep::new(1)); + x.insert_mut(Dep::new(2)); let x: DepSet = x.into(); let buf = x.serialize(); @@ -950,4 +1004,31 @@ mod tests { v.sort(); assert_eq!(v, edges); } + + #[test] + fn test_dep_set_union() { + let s = |x: &[u64]| DepSet::from(HashTrieSet::from_iter(x.iter().copied().map(Dep::new))); + + assert_eq!(s(&[4, 7]).union(&s(&[1, 4, 3])), s(&[1, 4, 3, 7])); + assert_eq!(s(&[1, 4, 3]).union(&s(&[4, 7])), s(&[1, 4, 3, 7])); + } + + #[test] + fn test_dep_set_inter() { + let s = |x: &[u64]| DepSet::from(HashTrieSet::from_iter(x.iter().copied().map(Dep::new))); + + assert_eq!(s(&[4, 7]).intersect(&s(&[1, 4, 3])), s(&[4])); + assert_eq!(s(&[1, 4, 3]).intersect(&s(&[4, 7])), s(&[4])); + } + + #[test] + fn test_dep_set_diff() { + let s = |x: &[u64]| DepSet::from(HashTrieSet::from_iter(x.iter().copied().map(Dep::new))); + + assert_eq!(s(&[4, 7]).difference(&s(&[1, 4, 3, 9, 8, 10])), s(&[7])); + assert_eq!( + s(&[1, 4, 3, 9, 8, 10]).difference(&s(&[4, 11])), + s(&[1, 3, 9, 8, 10]) + ); + } } diff --git a/hphp/hack/src/deps/typing_deps.ml b/hphp/hack/src/deps/typing_deps.ml index 6eb882b325e053..295b2b0cfc7294 100644 --- a/hphp/hack/src/deps/typing_deps.ml +++ b/hphp/hack/src/deps/typing_deps.ml @@ -214,7 +214,7 @@ let add_dependency_callback cb_name cb = (** Set of dependencies used for the custom system The type `t` is an abstract type managed by `typing_deps.rs`. - It is a pointer to an `OrdSet`, a persistent Rust map *) + It is a pointer to an `HashTrieSet`, a persistent Rust map *) module DepSet = struct type t (* Abstract type *)