diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e91b7f2f83a..160575c93f0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -126,6 +126,8 @@ jobs: - uses: Swatinem/rust-cache@v1 - name: Run run: cargo check-all-features + - name: Bench Check + run: cargo bench --no-run --features full,benchmarks cross: name: cross diff --git a/benches/arithmetic_kernels.rs b/benches/arithmetic_kernels.rs index b0e487341b9..c96b3ea288c 100644 --- a/benches/arithmetic_kernels.rs +++ b/benches/arithmetic_kernels.rs @@ -1,23 +1,22 @@ +use arrow2::compute::arithmetics::basic::NativeArithmetics; use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; use arrow2::util::bench_util::*; -use arrow2::{ - compute::arithmetics::basic::add, compute::arithmetics::basic::div_scalar, types::NativeType, -}; +use arrow2::{compute::arithmetics::basic::add, compute::arithmetics::basic::div_scalar}; use num_traits::NumCast; use std::ops::{Add, Div}; fn bench_div_scalar(lhs: &PrimitiveArray, rhs: &T) where - T: NativeType + Div + NumCast, + T: NativeArithmetics + Div + NumCast, { criterion::black_box(div_scalar(lhs, rhs)); } fn bench_add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) where - T: NativeType + Add + NumCast, + T: NativeArithmetics + Add + NumCast, { criterion::black_box(add(lhs, rhs)); } diff --git a/benches/avro_read.rs b/benches/avro_read.rs index 23a9e9e1602..56749995068 100644 --- a/benches/avro_read.rs +++ b/benches/avro_read.rs @@ -1,5 +1,4 @@ use std::io::Cursor; -use std::sync::Arc; use avro_rs::types::Record; use criterion::*; @@ -52,13 +51,13 @@ fn read_batch(buffer: &[u8], size: usize) -> Result<()> { codec, ), avro_schema, - Arc::new(schema), + schema.fields().clone(), ); let mut rows = 0; for maybe_batch in reader { let batch = maybe_batch?; - rows += batch.num_rows(); + rows += batch.len(); } assert_eq!(rows, size); Ok(()) diff --git a/benches/filter_kernels.rs b/benches/filter_kernels.rs index fb9376b0d66..db4633e4b27 100644 --- a/benches/filter_kernels.rs +++ b/benches/filter_kernels.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -14,21 +16,19 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use std::sync::Arc; - use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; use arrow2::chunk::Chunk; use arrow2::compute::filter::{build_filter, filter, filter_chunk, Filter}; -use arrow2::datatypes::{DataType, Field, Schema}; +use arrow2::datatypes::DataType; use arrow2::util::bench_util::{create_boolean_array, create_primitive_array, create_string_array}; fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) { criterion::black_box(filter(data_array, filter_array).unwrap()); } -fn bench_built_filter<'a>(filter: &Filter<'a>, array: &impl Array) { +fn bench_built_filter<'a>(filter: &Filter<'a>, array: &dyn Array) { criterion::black_box(filter(array)); } @@ -125,10 +125,9 @@ fn add_benchmark(c: &mut Criterion) { let data_array = create_primitive_array::(size, 0.0); - let columns = Chunk::try_new(vec![Arc::new(data_array)]).unwrap(); - + let columns = Chunk::try_new(vec![Arc::new(data_array) as ArrayRef]).unwrap(); c.bench_function("filter single record batch", |b| { - b.iter(|| filter_record_batch(&columns, &filter_array)) + b.iter(|| filter_chunk(&columns, &filter_array)) }); } diff --git a/benches/write_csv.rs b/benches/write_csv.rs index d88afdc16f9..0d341cea9d7 100644 --- a/benches/write_csv.rs +++ b/benches/write_csv.rs @@ -8,14 +8,16 @@ use arrow2::error::Result; use arrow2::io::csv::write; use arrow2::util::bench_util::*; -fn write_batch(columns: &Chunk) -> Result<()> { +type ChunkArc = Chunk>; + +fn write_batch(columns: &ChunkArc) -> Result<()> { let writer = &mut write::WriterBuilder::new().from_writer(vec![]); assert_eq!(columns.arrays().len(), 1); write::write_header(writer, &["a"])?; let options = write::SerializeOptions::default(); - write::write_batch(writer, batch, &options) + write::write_chunk(writer, columns, &options) } fn make_chunk(array: impl Array + 'static) -> Chunk> { diff --git a/benches/write_ipc.rs b/benches/write_ipc.rs index 2ec35a5b9a4..df8337ce8be 100644 --- a/benches/write_ipc.rs +++ b/benches/write_ipc.rs @@ -1,7 +1,5 @@ -use std::io::Cursor; -use std::sync::Arc; - use criterion::{criterion_group, criterion_main, Criterion}; +use std::io::Cursor; use arrow2::array::*; use arrow2::chunk::Chunk; diff --git a/benches/write_parquet.rs b/benches/write_parquet.rs index e8dc072e24f..88c18a658ed 100644 --- a/benches/write_parquet.rs +++ b/benches/write_parquet.rs @@ -1,5 +1,7 @@ use std::io::Cursor; +use std::sync::Arc; +use arrow2::datatypes::{Field, Schema}; use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::{clone, Array}; @@ -8,9 +10,11 @@ use arrow2::error::Result; use arrow2::io::parquet::write::*; use arrow2::util::bench_util::{create_boolean_array, create_primitive_array, create_string_array}; +type ChunkArc = Chunk>; + fn write(array: &dyn Array, encoding: Encoding) -> Result<()> { - let columns = Chunk::new(vec![clone(array).into()]); - let schema = batch.schema().clone(); + let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), true)]); + let columns: ChunkArc = Chunk::new(vec![clone(array).into()]); let options = WriteOptions { write_statistics: false,