From b505555fa0d0b6ba127af992677fa9eced9d850b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Gammels=C3=A6ter?= Date: Fri, 4 Mar 2022 21:59:23 +0100 Subject: [PATCH 1/3] Extract bit-counting of a slice of Words into a function --- compiler/rustc_index/src/bit_set.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index 12bde0294945..2fb536208971 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -152,7 +152,7 @@ impl BitSet { /// Count the number of set bits in the set. pub fn count(&self) -> usize { - self.words.iter().map(|e| e.count_ones() as usize).sum() + bit_count(&self.words) } /// Returns `true` if `self` contains `elem`. @@ -628,10 +628,7 @@ impl BitRelations> for ChunkedBitSet { op, ); debug_assert!(has_changed); - *self_chunk_count = self_chunk_words[0..num_words] - .iter() - .map(|w| w.count_ones() as ChunkSize) - .sum(); + *self_chunk_count = bit_count(&self_chunk_words[0..num_words]) as ChunkSize; if *self_chunk_count == *self_chunk_domain_size { *self_chunk = Ones(*self_chunk_domain_size); } @@ -705,21 +702,12 @@ impl Chunk { assert!(0 < count && count < chunk_domain_size); // Check the number of set bits matches `count`. - assert_eq!( - words.iter().map(|w| w.count_ones() as ChunkSize).sum::(), - count - ); + assert_eq!(bit_count(words.as_ref()) as ChunkSize, count); // Check the not-in-use words are all zeroed. let num_words = num_words(chunk_domain_size as usize); if num_words < CHUNK_WORDS { - assert_eq!( - words[num_words..] - .iter() - .map(|w| w.count_ones() as ChunkSize) - .sum::(), - 0 - ); + assert_eq!(bit_count(&words[num_words..]), 0); } } } @@ -1585,7 +1573,7 @@ impl BitMatrix { /// Returns the number of elements in `row`. pub fn count(&self, row: R) -> usize { let (start, end) = self.range(row); - self.words[start..end].iter().map(|e| e.count_ones() as usize).sum() + bit_count(&self.words[start..end]) } } @@ -1796,6 +1784,11 @@ fn max_bit(word: Word) -> usize { WORD_BITS - 1 - word.leading_zeros() as usize } +#[inline] +fn bit_count(words: &[Word]) -> usize { + words.iter().map(|w| w.count_ones() as usize).sum() +} + /// Integral type used to represent the bit set. pub trait FiniteBitSetTy: BitAnd From 76d1d53eb7cb04b03c7ea9fe9f2310b9b5a12f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Gammels=C3=A6ter?= Date: Fri, 4 Mar 2022 22:01:55 +0100 Subject: [PATCH 2/3] Optimize ChunkedBitSet dense relations --- compiler/rustc_index/src/bit_set.rs | 167 +++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index 2fb536208971..f33a9af85454 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -651,17 +651,21 @@ impl BitRelations> for ChunkedBitSet { impl BitRelations> for ChunkedBitSet { fn union(&mut self, other: &HybridBitSet) -> bool { - // FIXME: this is slow if `other` is dense, and could easily be - // improved, but it hasn't been a problem in practice so far. assert_eq!(self.domain_size, other.domain_size()); - sequential_update(|elem| self.insert(elem), other.iter()) + + match other { + HybridBitSet::Sparse(_) => sequential_update(|elem| self.insert(elem), other.iter()), + HybridBitSet::Dense(dense) => self.union(dense), + } } fn subtract(&mut self, other: &HybridBitSet) -> bool { - // FIXME: this is slow if `other` is dense, and could easily be - // improved, but it hasn't been a problem in practice so far. assert_eq!(self.domain_size, other.domain_size()); - sequential_update(|elem| self.remove(elem), other.iter()) + + match other { + HybridBitSet::Sparse(_) => sequential_update(|elem| self.remove(elem), other.iter()), + HybridBitSet::Dense(dense) => self.subtract(dense), + } } fn intersect(&mut self, _other: &HybridBitSet) -> bool { @@ -669,6 +673,152 @@ impl BitRelations> for ChunkedBitSet { } } +impl BitRelations> for ChunkedBitSet { + fn union(&mut self, other: &BitSet) -> bool { + assert_eq!(self.domain_size, other.domain_size()); + + let mut changed = false; + for (chunk, other_words) in self.chunks.iter_mut().zip(other.words().chunks(CHUNK_WORDS)) { + match chunk { + Zeros(chunk_domain_size) => { + if let Some(first_nonzero_index) = first_nonzero(other_words) { + let other_count = + bit_count(&other_words[first_nonzero_index..]) as ChunkSize; + debug_assert!(other_count <= *chunk_domain_size); + if other_count == *chunk_domain_size { + *chunk = Ones(*chunk_domain_size); + changed = true; + } else if other_count != 0 { + // We take some effort to avoid copying the words. + let words = Rc::<[Word; CHUNK_WORDS]>::new_zeroed(); + // SAFETY: `words` can safely be all zeroes. + let mut words = unsafe { words.assume_init() }; + let words_ref = Rc::get_mut(&mut words).unwrap(); + + debug_assert_eq!( + num_words(*chunk_domain_size as usize), + other_words.len() + ); + words_ref[first_nonzero_index..other_words.len()] + .copy_from_slice(&other_words[first_nonzero_index..]); + + *chunk = Mixed(*chunk_domain_size, other_count, words); + changed = true; + } + } + } + Ones(_) => {} + Mixed(chunk_domain_size, chunk_count, chunk_words) => { + if let Some(first_nonzero_index) = first_nonzero(other_words) { + debug_assert_eq!(num_words(*chunk_domain_size as usize), other_words.len()); + let op = |a, b| a | b; + if bitwise_changes( + &chunk_words[first_nonzero_index..other_words.len()], + &other_words[first_nonzero_index..], + op, + ) { + let chunk_words = Rc::make_mut(chunk_words); + let has_changed = bitwise( + &mut chunk_words[first_nonzero_index..other_words.len()], + &other_words[first_nonzero_index..], + op, + ); + debug_assert!(has_changed); + + *chunk_count = bit_count(chunk_words) as ChunkSize; + debug_assert!(*chunk_count > 0); + if *chunk_count == *chunk_domain_size { + *chunk = Ones(*chunk_domain_size); + } + changed = true + } + } + } + } + } + changed + } + + fn subtract(&mut self, other: &BitSet) -> bool { + assert_eq!(self.domain_size, other.domain_size()); + + let mut changed = false; + for (chunk, other_words) in self.chunks.iter_mut().zip(other.words().chunks(CHUNK_WORDS)) { + match chunk { + Zeros(_) => {} + Ones(chunk_domain_size) => { + if let Some(first_nonzero_index) = first_nonzero(other_words) { + let other_count = + bit_count(&other_words[first_nonzero_index..]) as ChunkSize; + debug_assert!(other_count <= *chunk_domain_size); + if other_count == *chunk_domain_size { + *chunk = Zeros(*chunk_domain_size); + changed = true; + } else { + // We take some effort to avoid copying the words. + let words = Rc::<[Word; CHUNK_WORDS]>::new_zeroed(); + // SAFETY: `words` can safely be all zeroes. + let mut words = unsafe { words.assume_init() }; + let words_ref = Rc::get_mut(&mut words).unwrap(); + + debug_assert_eq!( + num_words(*chunk_domain_size as usize), + other_words.len() + ); + for (word, other) in words_ref[first_nonzero_index..] + .iter_mut() + .zip(other_words[first_nonzero_index..].iter()) + { + *word = !other; + } + + clear_excess_bits_in_final_word( + *chunk_domain_size as usize, + &mut words_ref[..other_words.len()], + ); + + *chunk = + Mixed(*chunk_domain_size, *chunk_domain_size - other_count, words); + changed = true; + } + } + } + Mixed(chunk_domain_size, chunk_count, chunk_words) => { + if let Some(first_nonzero_index) = first_nonzero(other_words) { + debug_assert_eq!(num_words(*chunk_domain_size as usize), other_words.len()); + let op = |a, b: Word| a & !b; + if bitwise_changes( + &chunk_words[first_nonzero_index..other_words.len()], + &other_words[first_nonzero_index..], + op, + ) { + let chunk_words = Rc::make_mut(chunk_words); + let has_changed = bitwise( + &mut chunk_words[first_nonzero_index..other_words.len()], + &other_words[first_nonzero_index..], + op, + ); + debug_assert!(has_changed); + + *chunk_count = bit_count(chunk_words) as ChunkSize; + debug_assert!(chunk_count < chunk_domain_size); + if *chunk_count == 0 { + *chunk = Zeros(*chunk_domain_size); + } + changed = true + } + } + } + } + } + changed + } + + fn intersect(&mut self, _other: &BitSet) -> bool { + unimplemented!("implement if/when necessary"); + } +} + impl Clone for ChunkedBitSet { fn clone(&self) -> Self { ChunkedBitSet { @@ -1771,6 +1921,11 @@ fn chunk_word_index_and_mask(elem: T) -> (usize, Word) { word_index_and_mask(chunk_elem) } +#[inline] +fn first_nonzero(words: &[Word]) -> Option { + words.iter().position(|w| *w != 0) +} + fn clear_excess_bits_in_final_word(domain_size: usize, words: &mut [Word]) { let num_bits_in_final_word = domain_size % WORD_BITS; if num_bits_in_final_word > 0 { From 2ffe6f3d7ed467ffce336f3e9a1ffc55095e708d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Gammels=C3=A6ter?= Date: Fri, 4 Mar 2022 22:02:20 +0100 Subject: [PATCH 3/3] Add some basic tests of ChunkedBitSet dense relations --- compiler/rustc_index/src/bit_set/tests.rs | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs index eec7dab5189a..8a9db05b7e16 100644 --- a/compiler/rustc_index/src/bit_set/tests.rs +++ b/compiler/rustc_index/src/bit_set/tests.rs @@ -340,6 +340,37 @@ fn chunked_bitset() { assert_eq!(b10000.count(), 6000); b10000.assert_valid(); b10000b.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b6900 = ChunkedBitSet::::new_empty(6900); + b6900.insert(3); + b6900.insert(17); + b6900.insert(68); + b6900.insert(2000); + b6900.insert(2500); + + let mut b6900b = BitSet::::new_empty(6900); + b6900b.insert(17); + b6900b.insert(42); + b6900b.insert(68); + b6900b.insert(2000); + b6900b.insert(4200); + + b6900.subtract(&b6900b); + b6900.assert_valid(); + assert!(b6900.contains(3)); + assert!(b6900.contains(2500)); + assert_eq!(b6900.count(), 2); + + b6900.union(&b6900b); + b6900.assert_valid(); + assert_eq!(b6900.count(), 7); + + b6900.subtract(&BitSet::::new_filled(6900)); + b6900.assert_valid(); + assert_eq!(b6900.count(), 0); + assert_eq!(b6900.chunks(), vec![Zeros(2048), Zeros(2048), Zeros(2048), Zeros(756)],); } #[test]