Skip to content

Commit

Permalink
Add more zerovec impls for usize and FlexZeroVec (#2023)
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc authored Jun 9, 2022
1 parent 34ca520 commit 1699bbd
Show file tree
Hide file tree
Showing 12 changed files with 431 additions and 72 deletions.
1 change: 1 addition & 0 deletions utils/zerovec/src/flexzerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ pub(crate) mod slice;
pub(crate) mod vec;

pub use owned::FlexZeroVecOwned;
pub(crate) use slice::chunk_to_usize;
pub use slice::FlexZeroSlice;
pub use vec::FlexZeroVec;
6 changes: 6 additions & 0 deletions utils/zerovec/src/flexzerovec/owned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ impl FlexZeroVecOwned {
FlexZeroVec::Owned(self)
}

/// Clears all values out of this `FlexZeroVecOwned`.
#[inline]
pub fn clear(&mut self) {
*self = Self::new_empty()
}

/// Appends an item to the end of the vector.
///
/// # Panics
Expand Down
120 changes: 112 additions & 8 deletions utils/zerovec/src/flexzerovec/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
use super::FlexZeroVec;
use crate::ZeroVecError;
use alloc::vec::Vec;
use core::cmp::Ordering;
use core::fmt;
use core::mem;
use core::ops::Range;

const USIZE_WIDTH: usize = mem::size_of::<usize>();

Expand All @@ -25,7 +27,7 @@ pub struct FlexZeroSlice {
/// into a `usize`. We cannot call `usize::from_le_bytes` directly because that function
/// requires the high bits to be set to 0.
#[inline]
fn chunk_to_usize(chunk: &[u8], width: usize) -> usize {
pub(crate) fn chunk_to_usize(chunk: &[u8], width: usize) -> usize {
debug_assert_eq!(chunk.len(), width);
let mut bytes = [0; USIZE_WIDTH];
#[allow(clippy::indexing_slicing)] // protected by debug_assert above
Expand Down Expand Up @@ -194,9 +196,14 @@ impl FlexZeroSlice {
#[inline]
pub fn get(&self, index: usize) -> Option<usize> {
let w = self.get_width();
self.data
.get(index * w..index * w + w)
.map(|chunk| chunk_to_usize(chunk, w))
self.get_chunk(index).map(|chunk| chunk_to_usize(chunk, w))
}

/// Gets the element at `index` as a chunk of bytes, or `None` if `index >= self.len()`.
#[inline]
pub(crate) fn get_chunk(&self, index: usize) -> Option<&[u8]> {
let w = self.get_width();
self.data.get(index * w..index * w + w)
}

/// Gets the element at `index` without checking bounds.
Expand Down Expand Up @@ -244,7 +251,7 @@ impl FlexZeroSlice {

/// Creates a `Vec<usize>` from a [`FlexZeroSlice`] (or `FlexZeroVec`).
///
/// # Example
/// # Examples
///
/// ```
/// use zerovec::vecs::FlexZeroVec;
Expand All @@ -261,20 +268,117 @@ impl FlexZeroSlice {
}

/// Binary searches a sorted `FlexZeroSlice` for the given `usize` value.
///
/// # Examples
///
/// ```
/// use zerovec::vecs::FlexZeroVec;
///
/// let nums: &[usize] = &[211, 281, 421, 461];
/// let fzv: FlexZeroVec = nums.iter().copied().collect();
///
/// assert_eq!(fzv.binary_search(0), Err(0));
/// assert_eq!(fzv.binary_search(211), Ok(0));
/// assert_eq!(fzv.binary_search(250), Err(1));
/// assert_eq!(fzv.binary_search(281), Ok(1));
/// assert_eq!(fzv.binary_search(300), Err(2));
/// assert_eq!(fzv.binary_search(421), Ok(2));
/// assert_eq!(fzv.binary_search(450), Err(3));
/// assert_eq!(fzv.binary_search(461), Ok(3));
/// assert_eq!(fzv.binary_search(462), Err(4));
/// ```
#[inline]
pub fn binary_search(&self, needle: usize) -> Result<usize, usize> {
// See comments in components.rs regarding the following code.
self.binary_search_by(|probe| probe.cmp(&needle))
}

let zero_index = self.data.as_ptr() as *const _ as usize;
/// Binary searches a sorted range of a `FlexZeroSlice` for the given `usize` value.
///
/// Indices are returned relative to the start of the range.
///
/// # Examples
///
/// ```
/// use zerovec::vecs::FlexZeroVec;
///
/// // Make a FlexZeroVec with two sorted ranges: 0..3 and 3..5
/// let nums: &[usize] = &[111, 222, 444, 333, 555];
/// let fzv: FlexZeroVec = nums.iter().copied().collect();
///
/// // Search in the first range:
/// assert_eq!(fzv.binary_search_in_range(0, 0..3), Some(Err(0)));
/// assert_eq!(fzv.binary_search_in_range(111, 0..3), Some(Ok(0)));
/// assert_eq!(fzv.binary_search_in_range(199, 0..3), Some(Err(1)));
/// assert_eq!(fzv.binary_search_in_range(222, 0..3), Some(Ok(1)));
/// assert_eq!(fzv.binary_search_in_range(399, 0..3), Some(Err(2)));
/// assert_eq!(fzv.binary_search_in_range(444, 0..3), Some(Ok(2)));
/// assert_eq!(fzv.binary_search_in_range(999, 0..3), Some(Err(3)));
///
/// // Search in the second range:
/// assert_eq!(fzv.binary_search_in_range(0, 3..5), Some(Err(0)));
/// assert_eq!(fzv.binary_search_in_range(333, 3..5), Some(Ok(0)));
/// assert_eq!(fzv.binary_search_in_range(399, 3..5), Some(Err(1)));
/// assert_eq!(fzv.binary_search_in_range(555, 3..5), Some(Ok(1)));
/// assert_eq!(fzv.binary_search_in_range(999, 3..5), Some(Err(2)));
///
/// // Out-of-bounds range:
/// assert_eq!(fzv.binary_search_in_range(0, 4..6), None);
/// ```
#[inline]
pub fn binary_search_in_range(
&self,
needle: usize,
range: Range<usize>,
) -> Option<Result<usize, usize>> {
self.binary_search_in_range_by(|probe| probe.cmp(&needle), range)
}

/// Binary searches a sorted `FlexZeroSlice` according to a predicate function.
#[inline]
pub fn binary_search_by(
&self,
predicate: impl FnMut(usize) -> Ordering,
) -> Result<usize, usize> {
debug_assert!(self.len() <= self.data.len());
// Safety: self.len() <= self.data.len()
let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) };
self.binary_search_impl(predicate, scaled_slice)
}

/// Binary searches a sorted range of a `FlexZeroSlice` according to a predicate function.
///
/// Indices are returned relative to the start of the range.
#[inline]
pub fn binary_search_in_range_by(
&self,
predicate: impl FnMut(usize) -> Ordering,
range: Range<usize>,
) -> Option<Result<usize, usize>> {
// Note: We need to check bounds separately, since `self.data.get(range)` does not return
// bounds errors, since it is indexing directly into the upscaled data array
if range.start >= self.len() || range.end > self.len() {
return None;
}
let scaled_slice = self.data.get(range)?;
Some(self.binary_search_impl(predicate, scaled_slice))
}

/// # Safety
///
/// `scaled_slice` must be a subslice of `self.data`
fn binary_search_impl(
&self,
mut predicate: impl FnMut(usize) -> Ordering,
scaled_slice: &[u8],
) -> Result<usize, usize> {
// See comments in components.rs regarding the following code.
let zero_index = self.data.as_ptr() as *const _ as usize;
scaled_slice.binary_search_by(|probe: &_| {
// Note: `scaled_slice` is a slice of u8
let index = probe as *const _ as usize - zero_index;
// Safety: we know this is in bounds
let actual_probe = unsafe { self.get_unchecked(index) };
<usize as Ord>::cmp(&actual_probe, &needle)
predicate(actual_probe)
})
}
}
Expand Down
61 changes: 59 additions & 2 deletions utils/zerovec/src/flexzerovec/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ use core::ops::Deref;
///
/// The maximum value that can be stored in `FlexZeroVec` is `usize::MAX` on the current platform.
///
/// `FlexZeroVec` will be the data structure for storing `usize` in a `ZeroMap`; see
/// <https://github.com/unicode-org/icu4x/issues/1443>.
/// `FlexZeroVec` is the data structure for storing `usize` in a `ZeroMap`.
///
/// `FlexZeroVec` derefs to [`FlexZeroSlice`], which contains most of the methods.
///
/// # Examples
///
/// Storing a vec of `usize`s in a zero-copy way:
///
/// ```
/// use zerovec::vecs::FlexZeroVec;
///
Expand All @@ -44,6 +45,32 @@ use core::ops::Deref;
/// assert_eq!(7, bytes.len());
/// assert!(matches!(zv2, FlexZeroVec::Borrowed(_)));
/// ```
///
/// Storing a map of `usize` to `usize` in a zero-copy way:
///
/// ```
/// use zerovec::ZeroMap;
///
/// // Append some values to the ZeroMap
/// let mut zm = ZeroMap::<usize, usize>::new();
/// assert!(zm.try_append(&29, &92).is_none());
/// assert!(zm.try_append(&38, &83).is_none());
/// assert!(zm.try_append(&56, &65).is_none());
/// assert_eq!(zm.len(), 3);
///
/// // Insert another value into the middle
/// assert!(zm.try_append(&47, &74).is_some());
/// assert!(zm.insert(&47, &74).is_none());
/// assert_eq!(zm.len(), 4);
///
/// // Verify that the values are correct
/// assert_eq!(zm.get_copied(&0), None);
/// assert_eq!(zm.get_copied(&29), Some(92));
/// assert_eq!(zm.get_copied(&38), Some(83));
/// assert_eq!(zm.get_copied(&47), Some(74));
/// assert_eq!(zm.get_copied(&56), Some(65));
/// assert_eq!(zm.get_copied(&usize::MAX), None);
/// ```
#[derive(Debug)]
pub enum FlexZeroVec<'a> {
Owned(FlexZeroVecOwned),
Expand Down Expand Up @@ -215,3 +242,33 @@ impl FromIterator<usize> for FlexZeroVec<'_> {
FlexZeroVecOwned::from_iter(iter).into_flexzerovec()
}
}

#[test]
fn test_zeromap_usize() {
use crate::ZeroMap;

let mut zm = ZeroMap::<usize, usize>::new();
assert!(zm.try_append(&29, &92).is_none());
assert!(zm.try_append(&38, &83).is_none());
assert!(zm.try_append(&47, &74).is_none());
assert!(zm.try_append(&56, &65).is_none());

assert_eq!(zm.keys.get_width(), 1);
assert_eq!(zm.values.get_width(), 1);

assert_eq!(zm.insert(&47, &744), Some(74));
assert_eq!(zm.values.get_width(), 2);
assert_eq!(zm.insert(&47, &774), Some(744));
assert_eq!(zm.values.get_width(), 2);
assert!(zm.try_append(&1100, &1).is_none());
assert_eq!(zm.keys.get_width(), 2);
assert_eq!(zm.remove(&1100), Some(1));
assert_eq!(zm.keys.get_width(), 1);

assert_eq!(zm.get_copied(&0), None);
assert_eq!(zm.get_copied(&29), Some(92));
assert_eq!(zm.get_copied(&38), Some(83));
assert_eq!(zm.get_copied(&47), Some(774));
assert_eq!(zm.get_copied(&56), Some(65));
assert_eq!(zm.get_copied(&usize::MAX), None);
}
2 changes: 2 additions & 0 deletions utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ pub use crate::map2d::map::ZeroMap2d;
pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec};
pub use crate::zerovec::{ZeroSlice, ZeroVec};

pub(crate) use flexzerovec::chunk_to_usize;

#[doc(hidden)]
pub mod __zerovec_internal_reexport {
pub use zerofrom::ZeroFrom;
Expand Down
10 changes: 9 additions & 1 deletion utils/zerovec/src/map/kv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

use super::vecs::{MutableZeroVecLike, ZeroVecLike};
use crate::ule::*;
use crate::varzerovec::{VarZeroSlice, VarZeroVec};
use crate::vecs::{FlexZeroSlice, FlexZeroVec};
use crate::vecs::{VarZeroSlice, VarZeroVec};
use crate::zerovec::{ZeroSlice, ZeroVec};
use alloc::boxed::Box;

Expand Down Expand Up @@ -63,6 +64,13 @@ impl_sized_kv!(char);
impl_sized_kv!(f32);
impl_sized_kv!(f64);

impl<'a> ZeroMapKV<'a> for usize {
type Container = FlexZeroVec<'a>;
type Slice = FlexZeroSlice;
type GetType = [u8];
type OwnedType = usize;
}

impl<'a, T> ZeroMapKV<'a> for Option<T>
where
T: AsULE + 'static,
Expand Down
18 changes: 14 additions & 4 deletions utils/zerovec/src/map/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,8 @@ where
impl<'a, K, V> ZeroMap<'a, K, V>
where
K: ZeroMapKV<'a> + ?Sized + Ord,
V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized,
V: AsULE + Copy,
V: ZeroMapKV<'a> + ?Sized,
V: Copy,
{
/// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`.
///
Expand All @@ -392,9 +392,10 @@ where
/// map.insert(&2, &'b');
/// assert_eq!(map.get_copied(&1), Some('a'));
/// assert_eq!(map.get_copied(&3), None);
#[inline]
pub fn get_copied(&self, key: &K) -> Option<V> {
let index = self.keys.zvl_binary_search(key).ok()?;
ZeroSlice::get(&*self.values, index)
self.get_copied_at(index)
}

/// Binary search the map with `predicate` to find a key, returning the value.
Expand All @@ -413,9 +414,18 @@ where
/// assert_eq!(map.get_copied_by(|probe| probe.cmp(&1)), Some('a'));
/// assert_eq!(map.get_copied_by(|probe| probe.cmp(&3)), None);
/// ```
#[inline]
pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<V> {
let index = self.keys.zvl_binary_search_by(predicate).ok()?;
ZeroSlice::get(&*self.values, index)
self.get_copied_at(index)
}

fn get_copied_at(&self, index: usize) -> Option<V> {
let ule = self.values.zvl_get(index)?;
let mut result = Option::<V>::None;
V::Container::zvl_get_as_t(ule, |v| result.replace(*v));
#[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked
Some(result.unwrap())
}
}

Expand Down
Loading

0 comments on commit 1699bbd

Please sign in to comment.