This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 224
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for projection pushdown on IPC files (#264)
- Loading branch information
1 parent
d988539
commit 79ce377
Showing
23 changed files
with
1,104 additions
and
676 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
use std::collections::VecDeque; | ||
use std::convert::TryInto; | ||
use std::io::{Read, Seek}; | ||
|
||
use crate::array::{BinaryArray, Offset}; | ||
use crate::buffer::Buffer; | ||
use crate::error::Result; | ||
use crate::io::ipc::gen::Message::BodyCompression; | ||
use crate::types::NativeType; | ||
|
||
use super::super::super::gen; | ||
use super::super::deserialize::Node; | ||
use super::super::read_basic::*; | ||
|
||
pub fn read_binary<O: Offset, R: Read + Seek>( | ||
field_nodes: &mut VecDeque<Node>, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
reader: &mut R, | ||
block_offset: u64, | ||
is_little_endian: bool, | ||
compression: Option<BodyCompression>, | ||
) -> Result<BinaryArray<O>> | ||
where | ||
Vec<u8>: TryInto<O::Bytes> + TryInto<<u8 as NativeType>::Bytes>, | ||
{ | ||
let field_node = field_nodes.pop_front().unwrap().0; | ||
|
||
let validity = read_validity( | ||
buffers, | ||
field_node, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
)?; | ||
|
||
let offsets: Buffer<O> = read_buffer( | ||
buffers, | ||
1 + field_node.length() as usize, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
) | ||
// Older versions of the IPC format sometimes do not report an offset | ||
.or_else(|_| Result::Ok(Buffer::<O>::from(&[O::default()])))?; | ||
|
||
let last_offset = offsets.as_slice()[offsets.len() - 1].to_usize(); | ||
let values = read_buffer( | ||
buffers, | ||
last_offset, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
)?; | ||
|
||
Ok(BinaryArray::<O>::from_data(offsets, values, validity)) | ||
} | ||
|
||
pub fn skip_binary(field_nodes: &mut VecDeque<Node>, buffers: &mut VecDeque<&gen::Schema::Buffer>) { | ||
let _ = field_nodes.pop_front().unwrap(); | ||
|
||
let _ = buffers.pop_front().unwrap(); | ||
let _ = buffers.pop_front().unwrap(); | ||
let _ = buffers.pop_front().unwrap(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
use std::collections::VecDeque; | ||
use std::io::{Read, Seek}; | ||
|
||
use crate::array::BooleanArray; | ||
use crate::error::Result; | ||
|
||
use super::super::super::gen; | ||
use super::super::deserialize::Node; | ||
use super::super::read_basic::*; | ||
|
||
pub fn read_boolean<R: Read + Seek>( | ||
field_nodes: &mut VecDeque<Node>, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
reader: &mut R, | ||
block_offset: u64, | ||
is_little_endian: bool, | ||
) -> Result<BooleanArray> { | ||
let field_node = field_nodes.pop_front().unwrap().0; | ||
|
||
let length = field_node.length() as usize; | ||
let validity = read_validity( | ||
buffers, | ||
field_node, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
None, | ||
)?; | ||
|
||
let values = read_bitmap( | ||
buffers, | ||
length, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
None, | ||
)?; | ||
Ok(BooleanArray::from_data(values, validity)) | ||
} | ||
|
||
pub fn skip_boolean( | ||
field_nodes: &mut VecDeque<Node>, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
) { | ||
let _ = field_nodes.pop_front().unwrap(); | ||
|
||
let _ = buffers.pop_front().unwrap(); | ||
let _ = buffers.pop_front().unwrap(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
use std::collections::VecDeque; | ||
use std::convert::TryInto; | ||
use std::io::{Read, Seek}; | ||
|
||
use crate::array::{DictionaryArray, DictionaryKey}; | ||
use crate::error::Result; | ||
|
||
use super::super::super::gen; | ||
use super::super::deserialize::Node; | ||
use super::{read_primitive, skip_primitive}; | ||
|
||
pub fn read_dictionary<T: DictionaryKey, R: Read + Seek>( | ||
field_nodes: &mut VecDeque<Node>, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
reader: &mut R, | ||
block_offset: u64, | ||
is_little_endian: bool, | ||
) -> Result<DictionaryArray<T>> | ||
where | ||
Vec<u8>: TryInto<T::Bytes>, | ||
{ | ||
let values = field_nodes.front().unwrap().1.as_ref().unwrap(); | ||
|
||
let keys = read_primitive( | ||
field_nodes, | ||
T::DATA_TYPE, | ||
buffers, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
None, | ||
)?; | ||
|
||
Ok(DictionaryArray::<T>::from_data(keys, values.clone())) | ||
} | ||
|
||
pub fn skip_dictionary( | ||
field_nodes: &mut VecDeque<Node>, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
) { | ||
skip_primitive(field_nodes, buffers) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
use std::collections::VecDeque; | ||
use std::io::{Read, Seek}; | ||
|
||
use crate::array::FixedSizeBinaryArray; | ||
use crate::datatypes::DataType; | ||
use crate::error::Result; | ||
use crate::io::ipc::gen::Message::BodyCompression; | ||
|
||
use super::super::super::gen; | ||
use super::super::deserialize::Node; | ||
use super::super::read_basic::*; | ||
|
||
pub fn read_fixed_size_binary<R: Read + Seek>( | ||
field_nodes: &mut VecDeque<Node>, | ||
data_type: DataType, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
reader: &mut R, | ||
block_offset: u64, | ||
is_little_endian: bool, | ||
compression: Option<BodyCompression>, | ||
) -> Result<FixedSizeBinaryArray> { | ||
let field_node = field_nodes.pop_front().unwrap().0; | ||
|
||
let validity = read_validity( | ||
buffers, | ||
field_node, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
)?; | ||
|
||
let length = | ||
field_node.length() as usize * (*FixedSizeBinaryArray::get_size(&data_type) as usize); | ||
let values = read_buffer( | ||
buffers, | ||
length, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
)?; | ||
|
||
Ok(FixedSizeBinaryArray::from_data(data_type, values, validity)) | ||
} | ||
|
||
pub fn skip_fixed_size_binary( | ||
field_nodes: &mut VecDeque<Node>, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
) { | ||
let _ = field_nodes.pop_front().unwrap(); | ||
|
||
let _ = buffers.pop_front().unwrap(); | ||
let _ = buffers.pop_front().unwrap(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
use std::collections::VecDeque; | ||
use std::io::{Read, Seek}; | ||
|
||
use crate::array::FixedSizeListArray; | ||
use crate::datatypes::DataType; | ||
use crate::error::Result; | ||
use crate::io::ipc::gen::Message::BodyCompression; | ||
|
||
use super::super::super::gen; | ||
use super::super::deserialize::{read, skip, Node}; | ||
use super::super::read_basic::*; | ||
|
||
pub fn read_fixed_size_list<R: Read + Seek>( | ||
field_nodes: &mut VecDeque<Node>, | ||
data_type: DataType, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
reader: &mut R, | ||
block_offset: u64, | ||
is_little_endian: bool, | ||
compression: Option<BodyCompression>, | ||
) -> Result<FixedSizeListArray> { | ||
let field_node = field_nodes.pop_front().unwrap().0; | ||
|
||
let validity = read_validity( | ||
buffers, | ||
field_node, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
)?; | ||
|
||
let (value_data_type, _) = FixedSizeListArray::get_child_and_size(&data_type); | ||
|
||
let values = read( | ||
field_nodes, | ||
value_data_type.clone(), | ||
buffers, | ||
reader, | ||
block_offset, | ||
is_little_endian, | ||
compression, | ||
)?; | ||
Ok(FixedSizeListArray::from_data(data_type, values, validity)) | ||
} | ||
|
||
pub fn skip_fixed_size_list( | ||
field_nodes: &mut VecDeque<Node>, | ||
data_type: &DataType, | ||
buffers: &mut VecDeque<&gen::Schema::Buffer>, | ||
) { | ||
let _ = field_nodes.pop_front().unwrap(); | ||
|
||
let _ = buffers.pop_front().unwrap(); | ||
|
||
let (data_type, _) = FixedSizeListArray::get_child_and_size(data_type); | ||
|
||
skip(field_nodes, data_type, buffers) | ||
} |
Oops, something went wrong.