Skip to content

Commit

Permalink
Merge #127
Browse files Browse the repository at this point in the history
127: Add scalar optimizations from CRoaring / arXiv:1709.07821 section 3 r=Kerollmops a=saik0

### Purpose

This PR adds some optimizations from CRoaring as outlined in  arXiv:1709.07821 section 3

### Overview 
 * All inserts and removes are now branchless (!in arXiv:1709.0782, in CRoaring)
 * Section 3.1 was already implemented, except for `BitmapIter`. This is covered in #125
 * Implement Array-Bitset aggregates as outlined in section 3.2
   * Also branchless 😎
 * Tracks bitmap cardinality while performing bitmap-bitmap ops
   * This is a deviation from CRoaring, and will need to be benchmarked further before this Draft PR is ready
   * Curious to hear what you think about this `@lemire` 
 * In order to track bitmap cardinality the len field had to moved into `Store::Bitmap`
   * This is unfortunately a cross cutting change
 * `Store` was quite large (LoC) and had many responsibilities. The largest change in this draft is decomposing `Store` such hat it's field variants are two new container types: each responsible for maintaining their invariants and implementing `ops`
   * `Bitmap8K` keeps track of it's cardinality
   * `SortedU16Vec` maintains its sorting
   * `Store` now only delegates to these containers
   * My hope is that this will be useful when implementing run containers. 🤞
   * Unfortunately so much code was  moved this PR is _HUGE_


### Out of scope
 * Inline ASM for Array-Bitset aggregates
 * Section 4 (explicit SIMD). As noted by the paper authors: The compiler does a decent job of autovectorization, though not as good as hand-tuned

### Notes
 * I attempted to emulate the inline ASM Array-Bitset aggregates by using a mix of unsafe ptr arithmetic and x86-64 intrinsics, hoping to compile to the same instructions. I was unable to get it under 13 instructions per iteration (compared to the papers 5). While it was an improvement, I abandoned the effort in favor of waiting for the `asm!` macro to stabilize. rust-lang/rust#72016

Co-authored-by: saik0 <github@saik0.net>
Co-authored-by: Joel Pedraza <github@saik0.net>
  • Loading branch information
bors[bot] and saik0 committed Jan 12, 2022
2 parents 5bd5fc5 + dbc246e commit 72e1ca7
Show file tree
Hide file tree
Showing 9 changed files with 1,395 additions and 1,072 deletions.
49 changes: 22 additions & 27 deletions src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ const ARRAY_LIMIT: u64 = 4096;
#[derive(PartialEq, Clone)]
pub struct Container {
pub key: u16,
pub len: u64,
pub store: Store,
}

Expand All @@ -22,14 +21,17 @@ pub struct Iter<'a> {

impl Container {
pub fn new(key: u16) -> Container {
Container { key, len: 0, store: Store::Array(Vec::new()) }
Container { key, store: Store::new() }
}
}

impl Container {
pub fn len(&self) -> u64 {
self.store.len()
}

pub fn insert(&mut self, index: u16) -> bool {
if self.store.insert(index) {
self.len += 1;
self.ensure_correct_store();
true
} else {
Expand All @@ -39,7 +41,6 @@ impl Container {

pub fn insert_range(&mut self, range: RangeInclusive<u16>) -> u64 {
let inserted = self.store.insert_range(range);
self.len += inserted;
self.ensure_correct_store();
inserted
}
Expand All @@ -49,7 +50,6 @@ impl Container {
/// Returns whether the `index` was effectively pushed.
pub fn push(&mut self, index: u16) -> bool {
if self.store.push(index) {
self.len += 1;
self.ensure_correct_store();
true
} else {
Expand All @@ -59,7 +59,6 @@ impl Container {

pub fn remove(&mut self, index: u16) -> bool {
if self.store.remove(index) {
self.len -= 1;
self.ensure_correct_store();
true
} else {
Expand All @@ -69,7 +68,6 @@ impl Container {

pub fn remove_range(&mut self, range: RangeInclusive<u16>) -> u64 {
let result = self.store.remove_range(range);
self.len -= result;
self.ensure_correct_store();
result
}
Expand All @@ -83,7 +81,7 @@ impl Container {
}

pub fn is_subset(&self, other: &Self) -> bool {
self.len <= other.len && self.store.is_subset(&other.store)
self.len() <= other.len() && self.store.is_subset(&other.store)
}

pub fn min(&self) -> Option<u16> {
Expand All @@ -95,14 +93,18 @@ impl Container {
}

fn ensure_correct_store(&mut self) {
let new_store = match (&self.store, self.len) {
(store @ &Store::Bitmap(..), len) if len <= ARRAY_LIMIT => Some(store.to_array()),
(store @ &Store::Array(..), len) if len > ARRAY_LIMIT => Some(store.to_bitmap()),
_ => None,
match &self.store {
Store::Bitmap(ref bits) => {
if bits.len() <= ARRAY_LIMIT {
self.store = Store::Array(bits.to_array_store())
}
}
Store::Array(ref vec) => {
if vec.len() as u64 > ARRAY_LIMIT {
self.store = Store::Bitmap(vec.to_bitmap_store())
}
}
};
if let Some(new_store) = new_store {
self.store = new_store;
}
}
}

Expand All @@ -111,7 +113,7 @@ impl BitOr<&Container> for &Container {

fn bitor(self, rhs: &Container) -> Container {
let store = BitOr::bitor(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -120,15 +122,13 @@ impl BitOr<&Container> for &Container {
impl BitOrAssign<Container> for Container {
fn bitor_assign(&mut self, rhs: Container) {
BitOrAssign::bitor_assign(&mut self.store, rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}

impl BitOrAssign<&Container> for Container {
fn bitor_assign(&mut self, rhs: &Container) {
BitOrAssign::bitor_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand All @@ -138,7 +138,7 @@ impl BitAnd<&Container> for &Container {

fn bitand(self, rhs: &Container) -> Container {
let store = BitAnd::bitand(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -147,15 +147,13 @@ impl BitAnd<&Container> for &Container {
impl BitAndAssign<Container> for Container {
fn bitand_assign(&mut self, rhs: Container) {
BitAndAssign::bitand_assign(&mut self.store, rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}

impl BitAndAssign<&Container> for Container {
fn bitand_assign(&mut self, rhs: &Container) {
BitAndAssign::bitand_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand All @@ -165,7 +163,7 @@ impl Sub<&Container> for &Container {

fn sub(self, rhs: &Container) -> Container {
let store = Sub::sub(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -174,7 +172,6 @@ impl Sub<&Container> for &Container {
impl SubAssign<&Container> for Container {
fn sub_assign(&mut self, rhs: &Container) {
SubAssign::sub_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand All @@ -184,7 +181,7 @@ impl BitXor<&Container> for &Container {

fn bitxor(self, rhs: &Container) -> Container {
let store = BitXor::bitxor(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -193,15 +190,13 @@ impl BitXor<&Container> for &Container {
impl BitXorAssign<Container> for Container {
fn bitxor_assign(&mut self, rhs: Container) {
BitXorAssign::bitxor_assign(&mut self.store, rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}

impl BitXorAssign<&Container> for Container {
fn bitxor_assign(&mut self, rhs: &Container) {
BitXorAssign::bitxor_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand Down Expand Up @@ -236,6 +231,6 @@ impl<'a> Iterator for Iter<'a> {

impl fmt::Debug for Container {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
format!("Container<{:?} @ {:?}>", self.len, self.key).fmt(formatter)
format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter)
}
}
8 changes: 4 additions & 4 deletions src/bitmap/inherent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ impl RoaringBitmap {
match self.containers.binary_search_by_key(&key, |c| c.key) {
Ok(loc) => {
if self.containers[loc].remove(index) {
if self.containers[loc].len == 0 {
if self.containers[loc].len() == 0 {
self.containers.remove(loc);
}
true
Expand Down Expand Up @@ -214,7 +214,7 @@ impl RoaringBitmap {
let a = if key == start_container_key { start_index } else { 0 };
let b = if key == end_container_key { end_index } else { u16::MAX };
removed += self.containers[index].remove_range(a..=b);
if self.containers[index].len == 0 {
if self.containers[index].len() == 0 {
self.containers.remove(index);
continue;
}
Expand Down Expand Up @@ -297,7 +297,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.len(), 2);
/// ```
pub fn len(&self) -> u64 {
self.containers.iter().map(|container| container.len).sum()
self.containers.iter().map(|container| container.len()).sum()
}

/// Returns the minimum value in the set (if the set is non-empty).
Expand Down Expand Up @@ -447,7 +447,7 @@ mod tests {
let inserted = b.insert_range(u16::MAX as u32 + 1..=u16::MAX as u32 + 1);
assert_eq!(inserted, 1);

assert_eq!(b.containers[0].len, 1);
assert_eq!(b.containers[0].len(), 1);
assert_eq!(b.containers.len(), 1);

let removed = b.remove_range(u16::MAX as u32 + 1..=u16::MAX as u32 + 1);
Expand Down
4 changes: 2 additions & 2 deletions src/bitmap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ pub struct IntoIter {

impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
let size_hint = containers.iter().map(|c| c.len).sum();
let size_hint = containers.iter().map(|c| c.len()).sum();
Iter { inner: containers.iter().flat_map(identity as _), size_hint }
}
}

impl IntoIter {
fn new(containers: Vec<Container>) -> IntoIter {
let size_hint = containers.iter().map(|c| c.len).sum();
let size_hint = containers.iter().map(|c| c.len()).sum();
IntoIter { inner: containers.into_iter().flat_map(identity as _), size_hint }
}
}
Expand Down
16 changes: 8 additions & 8 deletions src/bitmap/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap {
for pair in Pairs::new(&self.containers, &rhs.containers) {
if let (Some(lhs), Some(rhs)) = pair {
let container = BitAnd::bitand(lhs, rhs);
if container.len != 0 {
if container.len() != 0 {
containers.push(container);
}
}
Expand All @@ -301,7 +301,7 @@ impl BitAndAssign<RoaringBitmap> for RoaringBitmap {
let rhs_cont = &mut rhs.containers[loc];
let rhs_cont = mem::replace(rhs_cont, Container::new(rhs_cont.key));
BitAndAssign::bitand_assign(cont, rhs_cont);
cont.len != 0
cont.len() != 0
}
Err(_) => false,
}
Expand All @@ -317,7 +317,7 @@ impl BitAndAssign<&RoaringBitmap> for RoaringBitmap {
match rhs.containers.binary_search_by_key(&key, |c| c.key) {
Ok(loc) => {
BitAndAssign::bitand_assign(cont, &rhs.containers[loc]);
cont.len != 0
cont.len() != 0
}
Err(_) => false,
}
Expand Down Expand Up @@ -367,7 +367,7 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap {
(None, Some(_)) => (),
(Some(lhs), Some(rhs)) => {
let container = Sub::sub(lhs, rhs);
if container.len != 0 {
if container.len() != 0 {
containers.push(container);
}
}
Expand All @@ -393,7 +393,7 @@ impl SubAssign<&RoaringBitmap> for RoaringBitmap {
match rhs.containers.binary_search_by_key(&cont.key, |c| c.key) {
Ok(loc) => {
SubAssign::sub_assign(cont, &rhs.containers[loc]);
cont.len != 0
cont.len() != 0
}
Err(_) => true,
}
Expand Down Expand Up @@ -443,7 +443,7 @@ impl BitXor<&RoaringBitmap> for &RoaringBitmap {
(None, Some(rhs)) => containers.push(rhs.clone()),
(Some(lhs), Some(rhs)) => {
let container = BitXor::bitxor(lhs, rhs);
if container.len != 0 {
if container.len() != 0 {
containers.push(container);
}
}
Expand All @@ -462,7 +462,7 @@ impl BitXorAssign<RoaringBitmap> for RoaringBitmap {
match pair {
(Some(mut lhs), Some(rhs)) => {
BitXorAssign::bitxor_assign(&mut lhs, rhs);
if lhs.len != 0 {
if lhs.len() != 0 {
self.containers.push(lhs);
}
}
Expand All @@ -481,7 +481,7 @@ impl BitXorAssign<&RoaringBitmap> for RoaringBitmap {
match pair {
(Some(mut lhs), Some(rhs)) => {
BitXorAssign::bitxor_assign(&mut lhs, rhs);
if lhs.len != 0 {
if lhs.len() != 0 {
self.containers.push(lhs);
}
}
Expand Down
18 changes: 9 additions & 9 deletions src/bitmap/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::io;

use super::container::Container;
use super::store::Store;
use crate::bitmap::store::{ArrayStore, BitmapStore, Store};
use crate::RoaringBitmap;

const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
Expand Down Expand Up @@ -32,7 +32,7 @@ impl RoaringBitmap {
.containers
.iter()
.map(|container| match container.store {
Store::Array(ref values) => 8 + values.len() * 2,
Store::Array(ref values) => 8 + values.len() as usize * 2,
Store::Bitmap(..) => 8 + 8 * 1024,
})
.sum();
Expand Down Expand Up @@ -64,7 +64,7 @@ impl RoaringBitmap {

for container in &self.containers {
writer.write_u16::<LittleEndian>(container.key)?;
writer.write_u16::<LittleEndian>((container.len - 1) as u16)?;
writer.write_u16::<LittleEndian>((container.len() - 1) as u16)?;
}

let mut offset = 8 + 8 * self.containers.len() as u32;
Expand All @@ -83,12 +83,12 @@ impl RoaringBitmap {
for container in &self.containers {
match container.store {
Store::Array(ref values) => {
for &value in values {
for &value in values.iter() {
writer.write_u16::<LittleEndian>(value)?;
}
}
Store::Bitmap(ref values) => {
for &value in values.iter() {
Store::Bitmap(ref bits) => {
for &value in bits.as_array() {
writer.write_u64::<LittleEndian>(value)?;
}
}
Expand Down Expand Up @@ -152,15 +152,15 @@ impl RoaringBitmap {
let mut values = vec![0; len as usize];
reader.read_exact(cast_slice_mut(&mut values))?;
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
Store::Array(values)
Store::Array(ArrayStore::from_vec_unchecked(values))
} else {
let mut values = Box::new([0; 1024]);
reader.read_exact(cast_slice_mut(&mut values[..]))?;
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
Store::Bitmap(values)
Store::Bitmap(BitmapStore::from_unchecked(len, values))
};

containers.push(Container { key, len, store });
containers.push(Container { key, store });
}

Ok(RoaringBitmap { containers })
Expand Down
Loading

0 comments on commit 72e1ca7

Please sign in to comment.