Skip to content

Commit

Permalink
support compact and speedy serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
droundy committed Aug 20, 2022
1 parent 0a6cfcc commit 2ac4367
Show file tree
Hide file tree
Showing 5 changed files with 269 additions and 92 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "tinyset"
version = "0.4.13"
version = "0.4.14"
authors = ["David Roundy <daveroundy@gmail.com>"]
categories = ["data-structures"]
keywords = ["set", "small", "compact"]
Expand Down Expand Up @@ -31,6 +31,7 @@ serde = { version = "1.0", optional = true }
[features]

default = ["rand"]
compactserde = ["serde"]

[[bench]]
name = "bench"
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ tinyset = { version = "0.4.13", features = ["serde"] }
```
to enable this feature.

There is also an experimental feature `compactserde` which serializes in a compact form
identical to what is held in memory. The format used, however, is not stable, so you
cannot expect your serialized sets to be readable by a different version of `tinyset`.
If you would like to have a stable and compact serialized format, please file an issue.
Note also that a corrupt (or malicious) file could easily trigger undefined behavior,
besides just triggering incorrect and confusing behavior.

# Benchmarks

To run the benchmark suite, run
Expand Down
114 changes: 114 additions & 0 deletions src/setu32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,13 +695,109 @@ impl Extend<u32> for SetU32 {
}
}

#[cfg(feature = "compactserde")]
impl SetU32 {
fn to_array(&self) -> Vec<u32> {
let mut out = Vec::new();
if self.0 as usize == 0 || self.0 as usize & 7 != 0 {
out.push(self.0 as u32);
} else {
let s = unsafe { &*self.0 };
let b = &s.b;
let a = unsafe { std::slice::from_raw_parts(&s.array as *const u32, b.cap as usize) };
out.push(b.sz as u32);
out.push(b.bits as u32);
out.extend(a);
}
out
}
fn from_array(v: &[u32]) -> SetU32 {
if v.len() > 1 {
let cap = v.len() - 2;
let mut set = SetU32::with_capacity_and_bits(cap, v[1]);
match set.internal_mut() {
InternalMut::Empty => unreachable!(),
InternalMut::Stack(_) => unreachable!(),
InternalMut::Dense { sz, a } => {
*sz = v[0];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
InternalMut::Heap { s, a } => {
s.sz = v[0];
s.bits = v[1];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
InternalMut::Big { s, a } => {
s.sz = v[0];
s.bits = v[1];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
}
set
} else {
SetU32(v[0] as *mut S)
}
}
}

#[cfg(feature = "compactserde")]
#[test]
fn to_from_array() {
use std::iter::FromIterator;

let set = SetU32::from_iter([0]);
let s = set.to_array();
assert_eq!(set, SetU32::from_array(&s));

let set = SetU32::from_iter([]);
let s = set.to_array();
assert_eq!(set, SetU32::from_array(&s));

let set = SetU32::from_iter([u32::MAX, u32::MAX - 100]);
let s = set.to_array();
let newset = SetU32::from_array(&s);
for n in set.iter() {
assert!(set.contains(n));
}
for n in newset.iter() {
assert!(newset.contains(n));
}
println!("set is {set:?}");
println!("newset is {newset:?}");
assert_eq!(set.len(), newset.len());
assert_eq!(set, SetU32::from_array(&s));

let set = SetU32::from_iter(0..10000);
let s = set.to_array();
assert_eq!(set, SetU32::from_array(&s));
}

#[cfg(feature = "serde")]
mod serde {
use crate::SetU32;
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
use serde::ser::{Serialize, SerializeSeq, Serializer};

impl Serialize for SetU32 {
#[cfg(feature = "compactserde")]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let a = self.to_array();
let mut seq = serializer.serialize_seq(Some(a.len()))?;
for e in a.into_iter() {
seq.serialize_element(&e)?;
}
seq.end()
}
#[cfg(not(feature = "compactserde"))]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
Expand Down Expand Up @@ -745,6 +841,24 @@ mod serde {
formatter.write_str("a set of usize")
}

#[cfg(feature = "compactserde")]
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
let mut v = if let Some(cap) = access.size_hint() {
Vec::with_capacity(cap)
} else {
Vec::new()
};
// While there are entries remaining in the input, add them
// into our map.
while let Some(elem) = access.next_element()? {
v.push(elem);
}
Ok(SetU32::from_array(&v))
}
#[cfg(not(feature = "compactserde"))]
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
Expand Down
114 changes: 114 additions & 0 deletions src/setu64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,13 +450,109 @@ impl Extend<u64> for SetU64 {
}
}

#[cfg(feature = "compactserde")]
impl SetU64 {
fn to_array(&self) -> Vec<u64> {
let mut out = Vec::new();
if self.0 as usize == 0 || self.0 as usize & 7 != 0 {
out.push(self.0 as u64);
} else {
let s = unsafe { &*self.0 };
let b = &s.b;
let a = unsafe { std::slice::from_raw_parts(&s.array as *const u64, b.cap) };
out.push(b.sz as u64);
out.push(b.bits as u64);
out.extend(a);
}
out
}
fn from_array(v: &[u64]) -> SetU64 {
if v.len() > 1 {
let cap = v.len() - 2;
let mut set = SetU64::with_capacity_and_bits(cap, v[1]);
match set.internal_mut() {
InternalMut::Empty => unreachable!(),
InternalMut::Stack(_) => unreachable!(),
InternalMut::Dense { sz, a } => {
*sz = v[0] as usize;
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
InternalMut::Heap { s, a } => {
s.sz = v[0] as usize;
s.bits = v[1];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
InternalMut::Big { s, a } => {
s.sz = v[0] as usize;
s.bits = v[1];
for (i, o) in v[2..].iter().zip(a.iter_mut()) {
*o = *i;
}
}
}
set
} else {
SetU64(v[0] as *mut S)
}
}
}

#[cfg(feature = "compactserde")]
#[test]
fn to_from_array() {
use std::iter::FromIterator;

let set = SetU64::from_iter([0]);
let s = set.to_array();
assert_eq!(set, SetU64::from_array(&s));

let set = SetU64::from_iter([]);
let s = set.to_array();
assert_eq!(set, SetU64::from_array(&s));

let set = SetU64::from_iter([u64::MAX, u64::MAX - 100]);
let s = set.to_array();
let newset = SetU64::from_array(&s);
for n in set.iter() {
assert!(set.contains(n));
}
for n in newset.iter() {
assert!(newset.contains(n));
}
println!("set is {set:?}");
println!("newset is {newset:?}");
assert_eq!(set.len(), newset.len());
assert_eq!(set, SetU64::from_array(&s));

let set = SetU64::from_iter(0..10000);
let s = set.to_array();
assert_eq!(set, SetU64::from_array(&s));
}

#[cfg(feature = "serde")]
mod serde {
use crate::SetU64;
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
use serde::ser::{Serialize, SerializeSeq, Serializer};

impl Serialize for SetU64 {
#[cfg(feature = "compactserde")]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let a = self.to_array();
let mut seq = serializer.serialize_seq(Some(a.len()))?;
for e in a.into_iter() {
seq.serialize_element(&e)?;
}
seq.end()
}
#[cfg(not(feature = "compactserde"))]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
Expand Down Expand Up @@ -500,6 +596,24 @@ mod serde {
formatter.write_str("a set of usize")
}

#[cfg(feature = "compactserde")]
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
let mut v = if let Some(cap) = access.size_hint() {
Vec::with_capacity(cap)
} else {
Vec::new()
};
// While there are entries remaining in the input, add them
// into our map.
while let Some(elem) = access.next_element()? {
v.push(elem);
}
Ok(SetU64::from_array(&v))
}
#[cfg(not(feature = "compactserde"))]
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
Expand Down
Loading

0 comments on commit 2ac4367

Please sign in to comment.