Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Fixed error in reading fixed_len from parquet (#549)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Oct 24, 2021
1 parent e6b6c83 commit 788f382
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/io/parquet/read/fixed_size_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub(crate) fn read_dict_buffer(
values: &mut MutableBuffer<u8>,
validity: &mut MutableBitmap,
) {
let length = values.len() * size + additional;
let length = values.len() + additional * size;
let dict_values = dict.values();

// SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32),
Expand All @@ -42,13 +42,13 @@ pub(crate) fn read_dict_buffer(
for run in validity_iterator {
match run {
hybrid_rle::HybridEncoded::Bitpacked(packed) => {
let remaining = length - values.len() * size;
let remaining = (length - values.len()) / size;
let len = std::cmp::min(packed.len() * 8, remaining);
for is_valid in BitmapIter::new(packed, 0, len) {
validity.push(is_valid);
if is_valid {
let index = indices.next().unwrap() as usize;
values.extend_from_slice(&dict_values[index..(index + 1) * size]);
values.extend_from_slice(&dict_values[index * size..(index + 1) * size]);
} else {
values.extend_constant(size, 0);
}
Expand All @@ -60,7 +60,7 @@ pub(crate) fn read_dict_buffer(
if is_set {
(0..additional).for_each(|_| {
let index = indices.next().unwrap() as usize;
values.extend_from_slice(&dict_values[index..(index + 1) * size]);
values.extend_from_slice(&dict_values[index * size..(index + 1) * size]);
})
} else {
values.extend_constant(additional * size, 0)
Expand All @@ -78,7 +78,7 @@ pub(crate) fn read_optional(
values: &mut MutableBuffer<u8>,
validity: &mut MutableBitmap,
) {
let length = values.len() * size + additional;
let length = values.len() + additional * size;

assert_eq!(values_buffer.len() % size, 0);
let mut values_iterator = values_buffer.chunks_exact(size);
Expand All @@ -89,7 +89,7 @@ pub(crate) fn read_optional(
match run {
hybrid_rle::HybridEncoded::Bitpacked(packed) => {
// the pack may contain more items than needed.
let remaining = length - values.len() * size;
let remaining = (length - values.len()) / size;
let len = std::cmp::min(packed.len() * 8, remaining);
for is_valid in BitmapIter::new(packed, 0, len) {
validity.push(is_valid);
Expand Down
5 changes: 5 additions & 0 deletions tests/it/io/parquet/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ fn v1_decimal_9_required() -> Result<()> {
test_pyarrow_integration(6, 1, "basic", false, true)
}

#[test]
fn v1_decimal_9_nullable_dict() -> Result<()> {
test_pyarrow_integration(7, 1, "basic", true, false)
}

#[test]
fn v1_decimal_18_nullable() -> Result<()> {
test_pyarrow_integration(8, 1, "basic", false, false)
Expand Down

0 comments on commit 788f382

Please sign in to comment.