Skip to content

Commit

Permalink
improve hybrid rle decoding performance ~-40%
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 26, 2022
1 parent 0301708 commit b431d44
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 19 deletions.
6 changes: 3 additions & 3 deletions src/encoding/delta_bitpacked/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,15 @@ pub struct Decoder<'a> {
impl<'a> Decoder<'a> {
pub fn try_new(mut values: &'a [u8]) -> Result<Self, Error> {
let mut consumed_bytes = 0;
let (block_size, consumed) = uleb128::decode(values)?;
let (block_size, consumed) = uleb128::decode(values);
consumed_bytes += consumed;
assert_eq!(block_size % 128, 0);
values = &values[consumed..];
let (num_mini_blocks, consumed) = uleb128::decode(values)?;
let (num_mini_blocks, consumed) = uleb128::decode(values);
let num_mini_blocks = num_mini_blocks as usize;
consumed_bytes += consumed;
values = &values[consumed..];
let (total_count, consumed) = uleb128::decode(values)?;
let (total_count, consumed) = uleb128::decode(values);
let total_count = total_count as usize;
consumed_bytes += consumed;
values = &values[consumed..];
Expand Down
5 changes: 1 addition & 4 deletions src/encoding/hybrid_rle/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@ impl<'a> Iterator for Decoder<'a> {
return None;
}

let (indicator, consumed) = match uleb128::decode(self.values) {
Ok((indicator, consumed)) => (indicator, consumed),
Err(e) => return Some(Err(e)),
};
let (indicator, consumed) = uleb128::decode(self.values);
self.values = &self.values[consumed..];
if self.values.is_empty() {
return None;
Expand Down
10 changes: 9 additions & 1 deletion src/encoding/hybrid_rle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ enum State<'a> {
None,
Bitpacked(bitpacked::Decoder<'a, u32>),
Rle(std::iter::Take<std::iter::Repeat<u32>>),
// Add a special branch for a single value to
// adhere to the law of small numbers
Single(u32),
}

/// [`Iterator`] of [`u32`] from a byte slice of Hybrid-RLE encoded values
Expand All @@ -50,7 +53,11 @@ fn read_next<'a, 'b>(decoder: &'b mut Decoder<'a>, remaining: usize) -> Result<S
.enumerate()
.for_each(|(i, byte)| bytes[i] = *byte);
let value = u32::from_le_bytes(bytes);
State::Rle(std::iter::repeat(value).take(additional))
if additional == 1 {
State::Single(value)
} else {
State::Rle(std::iter::repeat(value).take(additional))
}
}
None => State::None,
})
Expand Down Expand Up @@ -80,6 +87,7 @@ impl<'a> Iterator for HybridRleDecoder<'a> {
let result = match &mut self.state {
State::Bitpacked(decoder) => decoder.next(),
State::Rle(iter) => iter.next(),
State::Single(i) => Some(*i),
State::None => Some(0),
};
if let Some(result) = result {
Expand Down
17 changes: 8 additions & 9 deletions src/encoding/uleb128.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::error::Error;

pub fn decode(values: &[u8]) -> Result<(u64, usize), Error> {
#[inline]
pub fn decode(values: &[u8]) -> (u64, usize) {
let mut result = 0;
let mut shift = 0;

Expand All @@ -19,7 +18,7 @@ pub fn decode(values: &[u8]) -> Result<(u64, usize), Error> {

shift += 7;
}
Ok((result, consumed))
(result, consumed)
}

/// Encodes `value` in ULEB128 into `container`. The exact number of bytes written
Expand Down Expand Up @@ -52,15 +51,15 @@ mod tests {
#[test]
fn decode_1() {
let data = vec![0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF];
let (value, len) = decode(&data).unwrap();
let (value, len) = decode(&data);
assert_eq!(value, 624_485);
assert_eq!(len, 3);
}

#[test]
fn decode_2() {
let data = vec![0b00010000, 0b00000001, 0b00000011, 0b00000011];
let (value, len) = decode(&data).unwrap();
let (value, len) = decode(&data);
assert_eq!(value, 16);
assert_eq!(len, 1);
}
Expand All @@ -70,7 +69,7 @@ mod tests {
let original = 123124234u64;
let mut container = [0u8; 10];
let encoded_len = encode(original, &mut container);
let (value, len) = decode(&container).unwrap();
let (value, len) = decode(&container);
assert_eq!(value, original);
assert_eq!(len, encoded_len);
}
Expand All @@ -80,7 +79,7 @@ mod tests {
let original = u64::MIN;
let mut container = [0u8; 10];
let encoded_len = encode(original, &mut container);
let (value, len) = decode(&container).unwrap();
let (value, len) = decode(&container);
assert_eq!(value, original);
assert_eq!(len, encoded_len);
}
Expand All @@ -90,7 +89,7 @@ mod tests {
let original = u64::MAX;
let mut container = [0u8; 10];
let encoded_len = encode(original, &mut container);
let (value, len) = decode(&container).unwrap();
let (value, len) = decode(&container);
assert_eq!(value, original);
assert_eq!(len, encoded_len);
}
Expand Down
2 changes: 1 addition & 1 deletion src/encoding/zigzag_leb128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::error::Error;
use super::uleb128;

pub fn decode(values: &[u8]) -> Result<(i64, usize), Error> {
let (u, consumed) = uleb128::decode(values)?;
let (u, consumed) = uleb128::decode(values);
Ok(((u >> 1) as i64 ^ -((u & 1) as i64), consumed))
}

Expand Down
2 changes: 1 addition & 1 deletion tests/it/read/primitive_nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ fn read_dict_array<T: NativeType>(
let bit_width = values[0];
let values = &values[1..];

let (_, consumed) = uleb128::decode(values)?;
let (_, consumed) = uleb128::decode(values);
let values = &values[consumed..];

let indices = bitpacked::Decoder::<u32>::try_new(values, bit_width as usize, length as usize)?;
Expand Down

0 comments on commit b431d44

Please sign in to comment.