From 4cb63fea4dbd425a9623544515ec96085c8206f0 Mon Sep 17 00:00:00 2001 From: IWANABETHATGUY <17974631+IWANABETHATGUY@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:01:56 +0000 Subject: [PATCH] feat(index): impl rayon related to trait for IndexVec (#5421) 1. Impl `rayon` related trait for `oxc_index::IndexVec` --- Cargo.lock | 1 + crates/oxc_index/Cargo.toml | 2 + crates/oxc_index/src/lib.rs | 4 + crates/oxc_index/src/rayon_impl.rs | 305 +++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 crates/oxc_index/src/rayon_impl.rs diff --git a/Cargo.lock b/Cargo.lock index 9e5506eb6deb8..57610fbf88867 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1572,6 +1572,7 @@ dependencies = [ name = "oxc_index" version = "0.26.0" dependencies = [ + "rayon", "serde", ] diff --git a/crates/oxc_index/Cargo.toml b/crates/oxc_index/Cargo.toml index fe1b1c3d3faa3..f726a86cba454 100644 --- a/crates/oxc_index/Cargo.toml +++ b/crates/oxc_index/Cargo.toml @@ -21,6 +21,8 @@ doctest = false [dependencies] serde = { workspace = true, optional = true } +rayon = { workspace = true, optional = true } [features] serialize = ["dep:serde"] +rayon = ["dep:rayon"] diff --git a/crates/oxc_index/src/lib.rs b/crates/oxc_index/src/lib.rs index ae78be60f2ed4..1b2892b4abf98 100644 --- a/crates/oxc_index/src/lib.rs +++ b/crates/oxc_index/src/lib.rs @@ -153,6 +153,10 @@ mod idxslice; mod indexing; pub use idxslice::{IndexBox, IndexSlice}; pub use indexing::{IdxRangeBounds, IdxSliceIndex}; +#[cfg(feature = "rayon")] +pub use rayon_impl::*; +#[cfg(feature = "rayon")] +mod rayon_impl; #[macro_use] mod macros; diff --git a/crates/oxc_index/src/rayon_impl.rs b/crates/oxc_index/src/rayon_impl.rs new file mode 100644 index 0000000000000..36cc42e251346 --- /dev/null +++ b/crates/oxc_index/src/rayon_impl.rs @@ -0,0 +1,305 @@ +//! Parallel iterator types for (`IndexVec`) +#![allow(clippy::undocumented_unsafe_blocks)] +#![allow(clippy::manual_assert)] +/// Disabled lint since we copy code from https://github.com/rayon-rs/rayon/blob/97c1133c2366a301a2d4ab35cf686bca7f74830f/src/vec.rs#L1-L284 +use alloc::vec::Vec; +use core::iter; +use core::mem; +use core::ops::{Range, RangeBounds}; +use core::ptr; +use core::slice; +use rayon::iter::plumbing::{bridge, Consumer, Producer, ProducerCallback, UnindexedConsumer}; +use rayon::iter::{ + IndexedParallelIterator, IntoParallelIterator, ParallelDrainRange, ParallelIterator, +}; +use rayon::slice::{Iter, IterMut}; + +use crate::Idx; +use crate::IndexVec; + +impl<'data, I: Idx, T: Sync + 'data> IntoParallelIterator for &'data IndexVec { + type Item = &'data T; + type Iter = Iter<'data, T>; + + fn into_par_iter(self) -> Self::Iter { + <&[T]>::into_par_iter(&self.raw) + } +} + +impl<'data, I: Idx, T: Send + 'data> IntoParallelIterator for &'data mut IndexVec { + type Item = &'data mut T; + type Iter = IterMut<'data, T>; + + fn into_par_iter(self) -> Self::Iter { + <&mut [T]>::into_par_iter(&mut self.raw) + } +} + +/// Parallel iterator that moves out of a vector. +#[derive(Debug, Clone)] +pub struct IntoIter { + vec: Vec, +} + +impl IntoParallelIterator for IndexVec { + type Item = T; + type Iter = IntoIter; + + fn into_par_iter(self) -> Self::Iter { + IntoIter { vec: self.raw } + } +} + +impl ParallelIterator for IntoIter { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for IntoIter { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.vec.len() + } + + fn with_producer(mut self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + // Drain every item, and then the vector only needs to free its buffer. + self.vec.par_drain(..).with_producer(callback) + } +} + +impl<'data, I: Idx, T: Send> ParallelDrainRange for &'data mut IndexVec { + type Iter = Drain<'data, T>; + type Item = T; + + fn par_drain>(self, range: R) -> Self::Iter { + Drain { orig_len: self.len(), range: simplify_range(range, self.len()), vec: &mut self.raw } + } +} + +/// Draining parallel iterator that moves a range out of a vector, but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'data, T: Send> { + vec: &'data mut Vec, + range: Range, + orig_len: usize, +} + +impl<'data, T: Send> ParallelIterator for Drain<'data, T> { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Send> IndexedParallelIterator for Drain<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.range.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + unsafe { + // Make the vector forget about the drained items, and temporarily the tail too. + self.vec.set_len(self.range.start); + + // Create the producer as the exclusive "owner" of the slice. + let producer = DrainProducer::from_vec(self.vec, self.range.len()); + + // The producer will move or drop each item from the drained range. + callback.callback(producer) + } + } +} + +impl<'data, T: Send> Drop for Drain<'data, T> { + fn drop(&mut self) { + let Range { start, end } = self.range; + if self.vec.len() == self.orig_len { + // We must not have produced, so just call a normal drain to remove the items. + self.vec.drain(start..end); + } else if start == end { + // Empty range, so just restore the length to its original state + unsafe { + self.vec.set_len(self.orig_len); + } + } else if end < self.orig_len { + // The producer was responsible for consuming the drained items. + // Move the tail items to their new place, then set the length to include them. + unsafe { + let ptr = self.vec.as_mut_ptr().add(start); + let tail_ptr = self.vec.as_ptr().add(end); + let tail_len = self.orig_len - end; + ptr::copy(tail_ptr, ptr, tail_len); + self.vec.set_len(start + tail_len); + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +pub(crate) struct DrainProducer<'data, T: Send> { + slice: &'data mut [T], +} + +impl DrainProducer<'_, T> { + /// Creates a draining producer, which *moves* items from the slice. + /// + /// Unsafe because `!Copy` data must not be read after the borrow is released. + pub(crate) unsafe fn new(slice: &mut [T]) -> DrainProducer<'_, T> { + DrainProducer { slice } + } + + /// Creates a draining producer, which *moves* items from the tail of the vector. + /// + /// Unsafe because we're moving from beyond `vec.len()`, so the caller must ensure + /// that data is initialized and not read after the borrow is released. + unsafe fn from_vec(vec: &mut Vec, len: usize) -> DrainProducer<'_, T> { + let start = vec.len(); + assert!(vec.capacity() - start >= len); + + // The pointer is derived from `Vec` directly, not through a `Deref`, + // so it has provenance over the whole allocation. + let ptr = vec.as_mut_ptr().add(start); + DrainProducer::new(slice::from_raw_parts_mut(ptr, len)) + } +} + +impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> { + type Item = T; + type IntoIter = SliceDrain<'data, T>; + + fn into_iter(mut self) -> Self::IntoIter { + // replace the slice so we don't drop it twice + let slice = mem::take(&mut self.slice); + SliceDrain { iter: slice.iter_mut() } + } + + fn split_at(mut self, index: usize) -> (Self, Self) { + // replace the slice so we don't drop it twice + let slice = mem::take(&mut self.slice); + let (left, right) = slice.split_at_mut(index); + unsafe { (DrainProducer::new(left), DrainProducer::new(right)) } + } +} + +impl<'data, T: 'data + Send> Drop for DrainProducer<'data, T> { + fn drop(&mut self) { + // extract the slice so we can use `Drop for [T]` + let slice_ptr: *mut [T] = mem::take::<&'data mut [T]>(&mut self.slice); + unsafe { ptr::drop_in_place::<[T]>(slice_ptr) }; + } +} + +/// //////////////////////////////////////////////////////////////////////// + +// like std::vec::Drain, without updating a source Vec +pub(crate) struct SliceDrain<'data, T> { + iter: slice::IterMut<'data, T>, +} + +impl<'data, T: 'data> Iterator for SliceDrain<'data, T> { + type Item = T; + + fn next(&mut self) -> Option { + // Coerce the pointer early, so we don't keep the + // reference that's about to be invalidated. + let ptr: *const T = self.iter.next()?; + Some(unsafe { ptr::read(ptr) }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + fn count(self) -> usize { + self.iter.len() + } +} + +impl<'data, T: 'data> DoubleEndedIterator for SliceDrain<'data, T> { + fn next_back(&mut self) -> Option { + // Coerce the pointer early, so we don't keep the + // reference that's about to be invalidated. + let ptr: *const T = self.iter.next_back()?; + Some(unsafe { ptr::read(ptr) }) + } +} + +impl<'data, T: 'data> ExactSizeIterator for SliceDrain<'data, T> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'data, T: 'data> iter::FusedIterator for SliceDrain<'data, T> {} + +impl<'data, T: 'data> Drop for SliceDrain<'data, T> { + fn drop(&mut self) { + // extract the iterator so we can use `Drop for [T]` + let slice_ptr: *mut [T] = mem::replace(&mut self.iter, [].iter_mut()).into_slice(); + unsafe { ptr::drop_in_place::<[T]>(slice_ptr) }; + } +} + +use core::ops::Bound; + +/// Normalize arbitrary `RangeBounds` to a `Range` +pub(super) fn simplify_range(range: impl RangeBounds, len: usize) -> Range { + let start = match range.start_bound() { + Bound::Unbounded => 0, + Bound::Included(&i) if i <= len => i, + Bound::Excluded(&i) if i < len => i + 1, + bound => panic!("range start {:?} should be <= length {}", bound, len), + }; + let end = match range.end_bound() { + Bound::Unbounded => len, + Bound::Excluded(&i) if i <= len => i, + Bound::Included(&i) if i < len => i + 1, + bound => panic!("range end {:?} should be <= length {}", bound, len), + }; + if start > end { + panic!( + "range start {:?} should be <= range end {:?}", + range.start_bound(), + range.end_bound() + ); + } + start..end +}