-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Struct layout eval with sub-expression slicing and push down (#1893)
Co-authored-by: Nicholas Gates <nick@nickgates.com>
- Loading branch information
1 parent
e8228c0
commit cedcb24
Showing
12 changed files
with
236 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,155 @@ | ||
use async_trait::async_trait; | ||
use vortex_array::ArrayData; | ||
use futures::future::try_join_all; | ||
use itertools::Itertools; | ||
use vortex_array::array::StructArray; | ||
use vortex_array::validity::Validity; | ||
use vortex_array::{ArrayData, IntoArrayData}; | ||
use vortex_error::VortexResult; | ||
use vortex_expr::transform::partition::partition; | ||
use vortex_expr::ExprRef; | ||
use vortex_scan::RowMask; | ||
|
||
use crate::layouts::struct_::reader::StructScan; | ||
use crate::layouts::struct_::reader::StructReader; | ||
use crate::ExprEvaluator; | ||
|
||
#[async_trait(?Send)] | ||
impl ExprEvaluator for StructScan { | ||
async fn evaluate_expr(&self, _row_mask: RowMask, _expr: ExprRef) -> VortexResult<ArrayData> { | ||
todo!() | ||
impl ExprEvaluator for StructReader { | ||
async fn evaluate_expr(&self, row_mask: RowMask, expr: ExprRef) -> VortexResult<ArrayData> { | ||
// Partition the expression into expressions that can be evaluated over individual fields | ||
let partitioned = partition(expr, self.struct_dtype())?; | ||
let field_readers: Vec<_> = partitioned | ||
.partitions | ||
.iter() | ||
.map(|partition| self.child(&partition.field)) | ||
.try_collect()?; | ||
|
||
let arrays = try_join_all( | ||
field_readers | ||
.iter() | ||
.zip_eq(partitioned.partitions.iter()) | ||
.map(|(reader, partition)| { | ||
reader.evaluate_expr(row_mask.clone(), partition.expr.clone()) | ||
}), | ||
) | ||
.await?; | ||
|
||
let row_count = row_mask.true_count(); | ||
debug_assert!(arrays.iter().all(|a| a.len() == row_count)); | ||
|
||
let root_scope = StructArray::try_new( | ||
partitioned | ||
.partitions | ||
.iter() | ||
.map(|p| p.name.clone()) | ||
.collect::<Vec<_>>() | ||
.into(), | ||
arrays, | ||
row_count, | ||
Validity::NonNullable, | ||
)? | ||
.into_array(); | ||
|
||
// Recombine the partitioned expressions into a single expression | ||
partitioned.root.evaluate(&root_scope) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use std::sync::Arc; | ||
|
||
use futures::executor::block_on; | ||
use vortex_array::array::StructArray; | ||
use vortex_array::compute::FilterMask; | ||
use vortex_array::{IntoArrayData, IntoArrayVariant}; | ||
use vortex_buffer::buffer; | ||
use vortex_dtype::PType::I32; | ||
use vortex_dtype::{DType, Nullability, StructDType}; | ||
use vortex_expr::{get_item, gt, ident}; | ||
use vortex_scan::RowMask; | ||
|
||
use crate::layouts::flat::writer::FlatLayoutWriter; | ||
use crate::layouts::struct_::writer::StructLayoutWriter; | ||
use crate::segments::test::TestSegments; | ||
use crate::strategies::LayoutWriterExt; | ||
use crate::LayoutData; | ||
|
||
/// Create a chunked layout with three chunks of primitive arrays. | ||
fn struct_layout() -> (Arc<TestSegments>, LayoutData) { | ||
let mut segments = TestSegments::default(); | ||
|
||
let layout = StructLayoutWriter::new( | ||
DType::Struct( | ||
StructDType::new( | ||
vec!["a".into(), "b".into(), "c".into()].into(), | ||
vec![I32.into(), I32.into(), I32.into()], | ||
), | ||
Nullability::NonNullable, | ||
), | ||
vec![ | ||
Box::new(FlatLayoutWriter::new(I32.into())), | ||
Box::new(FlatLayoutWriter::new(I32.into())), | ||
Box::new(FlatLayoutWriter::new(I32.into())), | ||
], | ||
) | ||
.push_all( | ||
&mut segments, | ||
[StructArray::from_fields( | ||
[ | ||
("a", buffer![7, 2, 3].into_array()), | ||
("b", buffer![4, 5, 6].into_array()), | ||
("c", buffer![4, 5, 6].into_array()), | ||
] | ||
.as_slice(), | ||
) | ||
.map(IntoArrayData::into_array)], | ||
) | ||
.unwrap(); | ||
(Arc::new(segments), layout) | ||
} | ||
|
||
#[test] | ||
fn test_struct_layout() { | ||
let (segments, layout) = struct_layout(); | ||
|
||
let reader = layout.reader(segments, Default::default()).unwrap(); | ||
let expr = gt(get_item("a", ident()), get_item("b", ident())); | ||
let result = | ||
block_on(reader.evaluate_expr(RowMask::new_valid_between(0, 3), expr)).unwrap(); | ||
assert_eq!( | ||
vec![true, false, false], | ||
result | ||
.into_bool() | ||
.unwrap() | ||
.boolean_buffer() | ||
.iter() | ||
.collect::<Vec<_>>() | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_struct_layout_row_mask() { | ||
let (segments, layout) = struct_layout(); | ||
|
||
let reader = layout.reader(segments, Default::default()).unwrap(); | ||
let expr = gt(get_item("a", ident()), get_item("b", ident())); | ||
let result = block_on(reader.evaluate_expr( | ||
// Take rows 0 and 1, skip row 2, and anything after that | ||
RowMask::new(FilterMask::from_iter([true, true, false]), 0), | ||
expr, | ||
)) | ||
.unwrap(); | ||
|
||
assert_eq!(result.len(), 2); | ||
|
||
assert_eq!( | ||
vec![true, false], | ||
result | ||
.into_bool() | ||
.unwrap() | ||
.boolean_buffer() | ||
.iter() | ||
.collect::<Vec<_>>() | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters