Skip to content

Commit

Permalink
Merge pull request #255 from RoaringBitmap/deserialize-run-containers
Browse files Browse the repository at this point in the history
Deserialize Run Containers
  • Loading branch information
Kerollmops authored Jul 10, 2023
2 parents 25251c9 + 95664bd commit 6aabe69
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 20 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you shall be dual licensed as above, without any
additional terms or conditions.

[github-actions-badge]: https://img.shields.io/github/workflow/status/RoaringBitmap/roaring-rs/Continuous%20integration.svg?style=flat-square
[github-actions-badge]:
https://github.com/RoaringBitmap/roaring-rs/actions/workflows/test.yml/badge.svg
[github-actions]: https://github.com/RoaringBitmap/roaring-rs/actions
[release-badge]: https://img.shields.io/github/release/RoaringBitmap/roaring-rs.svg?style=flat-square
[cargo]: https://crates.io/crates/roaring
Expand Down
2 changes: 1 addition & 1 deletion src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::ops::{
use super::store::{self, Store};
use super::util;

const ARRAY_LIMIT: u64 = 4096;
pub const ARRAY_LIMIT: u64 = 4096;

#[derive(PartialEq, Clone)]
pub struct Container {
Expand Down
68 changes: 52 additions & 16 deletions src/bitmap/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::convert::{Infallible, TryFrom};
use std::error::Error;
use std::io;
use std::ops::RangeInclusive;

use super::container::Container;
use crate::bitmap::store::{ArrayStore, BitmapStore, Store};
use crate::bitmap::container::{Container, ARRAY_LIMIT};
use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH};
use crate::RoaringBitmap;

const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
const SERIAL_COOKIE: u16 = 12347;
// TODO: Need this once run containers are supported
// const NO_OFFSET_THRESHOLD: u8 = 4;
const NO_OFFSET_THRESHOLD: usize = 4;

// Sizes of header structures
const DESCRIPTION_BYTES: usize = 4;
const OFFSET_BYTES: usize = 4;

impl RoaringBitmap {
/// Return the size in bytes of the serialized output.
Expand Down Expand Up @@ -163,49 +167,81 @@ impl RoaringBitmap {
B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
BErr: Error + Send + Sync + 'static,
{
let (size, has_offsets) = {
// First read the cookie to determine which version of the format we are reading
let (size, has_offsets, has_run_containers) = {
let cookie = reader.read_u32::<LittleEndian>()?;
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
(reader.read_u32::<LittleEndian>()? as usize, true)
(reader.read_u32::<LittleEndian>()? as usize, true, false)
} else if (cookie as u16) == SERIAL_COOKIE {
return Err(io::Error::new(io::ErrorKind::Other, "run containers are unsupported"));
let size = ((cookie >> 16) + 1) as usize;
(size, size >= NO_OFFSET_THRESHOLD, true)
} else {
return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
}
};

// Read the run container bitmap if necessary
let run_container_bitmap = if has_run_containers {
let mut bitmap = vec![0u8; (size + 7) / 8];
reader.read_exact(&mut bitmap)?;
Some(bitmap)
} else {
None
};

if size > u16::MAX as usize + 1 {
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
}

let mut description_bytes = vec![0u8; size * 4];
// Read the container descriptions
let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES];
reader.read_exact(&mut description_bytes)?;
let mut description_bytes = &description_bytes[..];

if has_offsets {
let mut offsets = vec![0u8; size * 4];
let mut offsets = vec![0u8; size * OFFSET_BYTES];
reader.read_exact(&mut offsets)?;
drop(offsets); // Not useful when deserializing into memory
}

let mut containers = Vec::with_capacity(size);

for _ in 0..size {
// Read each container
for i in 0..size {
let key = description_bytes.read_u16::<LittleEndian>()?;
let len = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
let cardinality = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;

// If the run container bitmap is present, check if this container is a run container
let is_run_container =
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);

let store = if is_run_container {
let runs = reader.read_u16::<LittleEndian>()?;
let mut intervals = vec![[0, 0]; runs as usize];
reader.read_exact(cast_slice_mut(&mut intervals))?;
intervals.iter_mut().for_each(|[s, len]| {
*s = u16::from_le(*s);
*len = u16::from_le(*len);
});

let store = if len <= 4096 {
let mut values = vec![0; len as usize];
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
let mut store = Store::with_capacity(cardinality);
intervals.into_iter().for_each(|[s, len]| {
store.insert_range(RangeInclusive::new(s, s + len));
});
store
} else if cardinality <= ARRAY_LIMIT {
let mut values = vec![0; cardinality as usize];
reader.read_exact(cast_slice_mut(&mut values))?;
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Store::Array(array)
} else {
let mut values = Box::new([0; 1024]);
let mut values = Box::new([0; BITMAP_LENGTH]);
reader.read_exact(cast_slice_mut(&mut values[..]))?;
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
let bitmap =
b(len, values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let bitmap = b(cardinality, values)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Store::Bitmap(bitmap)
};

Expand Down
4 changes: 4 additions & 0 deletions src/bitmap/store/array_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ impl ArrayStore {
ArrayStore { vec: vec![] }
}

pub fn with_capacity(capacity: usize) -> ArrayStore {
ArrayStore { vec: Vec::with_capacity(capacity) }
}

///
/// Create a new SortedU16Vec from a given vec
/// It is up to the caller to ensure the vec is sorted and deduplicated
Expand Down
12 changes: 11 additions & 1 deletion src/bitmap/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ use std::ops::{
};
use std::{slice, vec};

use self::bitmap_store::BITMAP_LENGTH;
pub use self::bitmap_store::BITMAP_LENGTH;
use self::Store::{Array, Bitmap};

pub use self::array_store::ArrayStore;
pub use self::bitmap_store::{BitmapIter, BitmapStore};

use crate::bitmap::container::ARRAY_LIMIT;

#[derive(Clone)]
pub enum Store {
Array(ArrayStore),
Expand All @@ -31,6 +33,14 @@ impl Store {
Store::Array(ArrayStore::new())
}

pub fn with_capacity(capacity: usize) -> Store {
if capacity <= ARRAY_LIMIT as usize {
Store::Array(ArrayStore::with_capacity(capacity))
} else {
Store::Bitmap(BitmapStore::new())
}
}

pub fn full() -> Store {
Store::Bitmap(BitmapStore::full())
}
Expand Down
Binary file added tests/bitmapwithruns.bin
Binary file not shown.
11 changes: 10 additions & 1 deletion tests/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use roaring::RoaringBitmap;

// Test data from https://github.com/RoaringBitmap/RoaringFormatSpec/tree/master/testdata
static BITMAP_WITHOUT_RUNS: &[u8] = include_bytes!("bitmapwithoutruns.bin");
static BITMAP_WITH_RUNS: &[u8] = include_bytes!("bitmapwithruns.bin");

fn test_data_bitmap() -> RoaringBitmap {
(0..100)
Expand All @@ -21,10 +22,18 @@ fn serialize_and_deserialize(bitmap: &RoaringBitmap) -> RoaringBitmap {
}

#[test]
fn test_deserialize_from_provided_data() {
fn test_deserialize_without_runs_from_provided_data() {
assert_eq!(RoaringBitmap::deserialize_from(BITMAP_WITHOUT_RUNS).unwrap(), test_data_bitmap());
}

#[test]
fn test_deserialize_with_runs_from_provided_data() {
assert_eq!(
RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(),
test_data_bitmap()
);
}

#[test]
fn test_serialize_into_provided_data() {
let bitmap = test_data_bitmap();
Expand Down

0 comments on commit 6aabe69

Please sign in to comment.