diff --git a/src/encoding/integer/mod.rs b/src/encoding/integer/mod.rs index 2cc9af5f..02bca71f 100644 --- a/src/encoding/integer/mod.rs +++ b/src/encoding/integer/mod.rs @@ -43,6 +43,7 @@ pub mod rle_v1; pub mod rle_v2; mod util; +// TODO: consider having a separate varint.rs pub use util::read_varint_zigzagged; pub fn get_unsigned_rle_reader( diff --git a/src/encoding/integer/rle_v1.rs b/src/encoding/integer/rle_v1.rs index 4e8bab7a..36545d04 100644 --- a/src/encoding/integer/rle_v1.rs +++ b/src/encoding/integer/rle_v1.rs @@ -34,26 +34,28 @@ use super::{util::read_varint_zigzagged, EncodingSign, NInt}; const MAX_RUN_LENGTH: usize = 130; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -// TODO: put header data in here, e.g. base value, len, etc. enum EncodingType { - Run { length: usize }, + Run { length: usize, delta: i8 }, Literals { length: usize }, } impl EncodingType { /// Decode header byte to determine sub-encoding. /// Runs start with a positive byte, and literals with a negative byte. - #[inline] - fn from_header(header: u8) -> Self { - let header = header as i8; - if header < 0 { - let length = header.unsigned_abs() as usize; - Self::Literals { length } - } else { - // Technically +3 but we subtract 1 for the base - let length = header as u8 as usize + 2; - Self::Run { length } - } + fn from_header(reader: &mut R) -> Result> { + let opt_encoding = match try_read_u8(reader)?.map(|b| b as i8) { + Some(header) if header < 0 => { + let length = header.unsigned_abs() as usize; + Some(Self::Literals { length }) + } + Some(header) => { + let length = header as u8 as usize + 3; + let delta = read_u8(reader)? as i8; + Some(Self::Run { length, delta }) + } + None => None, + }; + Ok(opt_encoding) } } @@ -78,18 +80,15 @@ impl RleReaderV1 { fn decode_batch(&mut self) -> Result<()> { self.current_head = 0; self.decoded_ints.clear(); - let header = match try_read_u8(&mut self.reader)? { - Some(byte) => byte, - None => return Ok(()), - }; - match EncodingType::from_header(header) { - EncodingType::Literals { length } => { + match EncodingType::from_header(&mut self.reader)? { + Some(EncodingType::Literals { length }) => { read_literals::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length) } - EncodingType::Run { length } => { - read_run::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length) + Some(EncodingType::Run { length, delta }) => { + read_run::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length, delta) } + None => Ok(()), } } } @@ -110,9 +109,11 @@ fn read_run( reader: &mut R, out_ints: &mut Vec, length: usize, + delta: i8, ) -> Result<()> { - let delta = read_u8(reader)? as i8; let mut base = read_varint_zigzagged::<_, _, S>(reader)?; + // Account for base value + let length = length - 1; out_ints.push(base); if delta < 0 { let delta = delta.unsigned_abs();