Skip to content

Commit

Permalink
Refactor integer RLEv1
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Sep 29, 2024
1 parent 278169c commit 1633850
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/encoding/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub mod rle_v1;
pub mod rle_v2;
mod util;

// TODO: consider having a separate varint.rs
pub use util::read_varint_zigzagged;

pub fn get_unsigned_rle_reader<R: Read + Send + 'static>(
Expand Down
45 changes: 23 additions & 22 deletions src/encoding/integer/rle_v1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,28 @@ use super::{util::read_varint_zigzagged, EncodingSign, NInt};
const MAX_RUN_LENGTH: usize = 130;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
// TODO: put header data in here, e.g. base value, len, etc.
enum EncodingType {
Run { length: usize },
Run { length: usize, delta: i8 },
Literals { length: usize },
}

impl EncodingType {
/// Decode header byte to determine sub-encoding.
/// Runs start with a positive byte, and literals with a negative byte.
#[inline]
fn from_header(header: u8) -> Self {
let header = header as i8;
if header < 0 {
let length = header.unsigned_abs() as usize;
Self::Literals { length }
} else {
// Technically +3 but we subtract 1 for the base
let length = header as u8 as usize + 2;
Self::Run { length }
}
fn from_header<R: Read>(reader: &mut R) -> Result<Option<Self>> {
let opt_encoding = match try_read_u8(reader)?.map(|b| b as i8) {
Some(header) if header < 0 => {
let length = header.unsigned_abs() as usize;
Some(Self::Literals { length })
}
Some(header) => {
let length = header as u8 as usize + 3;
let delta = read_u8(reader)? as i8;
Some(Self::Run { length, delta })
}
None => None,
};
Ok(opt_encoding)
}
}

Expand All @@ -78,18 +80,15 @@ impl<N: NInt, R: Read, S: EncodingSign> RleReaderV1<N, R, S> {
fn decode_batch(&mut self) -> Result<()> {
self.current_head = 0;
self.decoded_ints.clear();
let header = match try_read_u8(&mut self.reader)? {
Some(byte) => byte,
None => return Ok(()),
};

match EncodingType::from_header(header) {
EncodingType::Literals { length } => {
match EncodingType::from_header(&mut self.reader)? {
Some(EncodingType::Literals { length }) => {
read_literals::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length)
}
EncodingType::Run { length } => {
read_run::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length)
Some(EncodingType::Run { length, delta }) => {
read_run::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length, delta)
}
None => Ok(()),
}
}
}
Expand All @@ -110,9 +109,11 @@ fn read_run<N: NInt, R: Read, S: EncodingSign>(
reader: &mut R,
out_ints: &mut Vec<N>,
length: usize,
delta: i8,
) -> Result<()> {
let delta = read_u8(reader)? as i8;
let mut base = read_varint_zigzagged::<_, _, S>(reader)?;
// Account for base value
let length = length - 1;
out_ints.push(base);
if delta < 0 {
let delta = delta.unsigned_abs();
Expand Down

0 comments on commit 1633850

Please sign in to comment.