From 2ca4b16026a51aca5ddc9ae77aac1e173b57ed7f Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sun, 16 May 2021 08:35:30 +0000 Subject: [PATCH] Added remaining scalars and improved API. --- src/compute/aggregate/sum.rs | 39 ++++- src/lib.rs | 1 + src/scalar/README.md | 2 +- src/scalar/binary.rs | 50 ++++++ src/scalar/boolean.rs | 42 +++++ src/scalar/equal.rs | 115 ++++++++++++ src/scalar/list.rs | 62 +++++++ src/scalar/mod.rs | 330 ++++++++++++----------------------- src/scalar/null.rs | 36 ++++ src/scalar/primitive.rs | 67 +++++++ src/scalar/struct_.rs | 46 +++++ src/scalar/utf8.rs | 50 ++++++ 12 files changed, 619 insertions(+), 221 deletions(-) create mode 100644 src/scalar/binary.rs create mode 100644 src/scalar/boolean.rs create mode 100644 src/scalar/equal.rs create mode 100644 src/scalar/list.rs create mode 100644 src/scalar/null.rs create mode 100644 src/scalar/primitive.rs create mode 100644 src/scalar/struct_.rs create mode 100644 src/scalar/utf8.rs diff --git a/src/compute/aggregate/sum.rs b/src/compute/aggregate/sum.rs index 49e3f392e35..eb3b3ef4d2f 100644 --- a/src/compute/aggregate/sum.rs +++ b/src/compute/aggregate/sum.rs @@ -118,6 +118,32 @@ macro_rules! dyn_sum { }}; } +pub fn can_sum(data_type: &DataType) -> bool { + use DataType::*; + matches!( + data_type, + Int8 | Int16 + | Date32 + | Time32(_) + | Interval(IntervalUnit::YearMonth) + | Int64 + | Date64 + | Time64(_) + | Timestamp(_, _) + | Duration(_) + | UInt8 + | UInt16 + | UInt32 + | UInt64 + | Float32 + | Float64 + ) +} + +/// Returns the sum of all elements in `array` as a [`Scalar`] of the same physical +/// and logical types as `array`. +/// # Error +/// Errors iff the operation is not supported. pub fn sum(array: &dyn Array) -> Result> { Ok(match array.data_type() { DataType::Int8 => dyn_sum!(i8, array), @@ -158,13 +184,22 @@ mod tests { #[test] fn test_primitive_array_sum() { let a = Int32Array::from_slice(&[1, 2, 3, 4, 5]); - assert_eq!(15, sum(&a).unwrap()); + assert_eq!( + &PrimitiveScalar::::from(Some(15)) as &dyn Scalar, + sum(&a).unwrap().as_ref() + ); + + let a = a.to(DataType::Date32); + assert_eq!( + &PrimitiveScalar::::from(Some(15)).to(DataType::Date32) as &dyn Scalar, + sum(&a).unwrap().as_ref() + ); } #[test] fn test_primitive_array_float_sum() { let a = Float64Array::from_slice(&[1.1f64, 2.2, 3.3, 4.4, 5.5]); - assert!((16.5 - sum(&a).unwrap()).abs() < f64::EPSILON); + assert!((16.5 - sum_primitive(&a).unwrap()).abs() < f64::EPSILON); } #[test] diff --git a/src/lib.rs b/src/lib.rs index f0b43d91fa0..7a5b27346db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod bitmap; pub mod buffer; mod endianess; pub mod error; +#[cfg(feature = "compute")] pub mod scalar; pub mod trusted_len; pub mod types; diff --git a/src/scalar/README.md b/src/scalar/README.md index 0948317004b..2bac790873b 100644 --- a/src/scalar/README.md +++ b/src/scalar/README.md @@ -10,7 +10,7 @@ There are three reasons: * forward-compatibility: a new entry on an `enum` is backward-incompatible * do not expose implementation details to users (reduce the surface of the public API) -### `Scalar` should contain nullability information +### `Scalar` MUST contain nullability information This is to be aligned with the general notion of arrow's `Array`. diff --git a/src/scalar/binary.rs b/src/scalar/binary.rs new file mode 100644 index 00000000000..de99c3d9fc3 --- /dev/null +++ b/src/scalar/binary.rs @@ -0,0 +1,50 @@ +use crate::{array::*, buffer::Buffer, datatypes::DataType}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct BinaryScalar { + value: Buffer, + is_valid: bool, + phantom: std::marker::PhantomData, +} + +impl BinaryScalar { + #[inline] + pub fn new(v: Option<&[u8]>) -> Self { + let is_valid = v.is_some(); + O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); + let value = Buffer::from(v.unwrap_or(&[])); + Self { + value, + is_valid, + phantom: std::marker::PhantomData, + } + } + + #[inline] + pub fn value(&self) -> &[u8] { + self.value.as_slice() + } +} + +impl Scalar for BinaryScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + if O::is_large() { + &DataType::LargeBinary + } else { + &DataType::Binary + } + } +} diff --git a/src/scalar/boolean.rs b/src/scalar/boolean.rs new file mode 100644 index 00000000000..bc13931b2c3 --- /dev/null +++ b/src/scalar/boolean.rs @@ -0,0 +1,42 @@ +use crate::datatypes::DataType; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct BooleanScalar { + value: bool, + is_valid: bool, +} + +impl BooleanScalar { + #[inline] + pub fn new(v: Option) -> Self { + let is_valid = v.is_some(); + Self { + value: v.unwrap_or_default(), + is_valid, + } + } + + #[inline] + pub fn value(&self) -> bool { + self.value + } +} + +impl Scalar for BooleanScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + &DataType::Boolean + } +} diff --git a/src/scalar/equal.rs b/src/scalar/equal.rs new file mode 100644 index 00000000000..503f9fc6856 --- /dev/null +++ b/src/scalar/equal.rs @@ -0,0 +1,115 @@ +use super::*; + +impl PartialEq for dyn Scalar { + fn eq(&self, other: &Self) -> bool { + equal(self, other) + } +} + +macro_rules! dyn_eq { + ($ty:ty, $lhs:expr, $rhs:expr) => {{ + let lhs = $lhs + .as_any() + .downcast_ref::>() + .unwrap(); + let rhs = $rhs + .as_any() + .downcast_ref::>() + .unwrap(); + lhs == rhs + }}; +} + +fn equal(lhs: &dyn Scalar, rhs: &dyn Scalar) -> bool { + if lhs.data_type() != rhs.data_type() { + return false; + } + + match lhs.data_type() { + DataType::Null => { + let lhs = lhs.as_any().downcast_ref::().unwrap(); + let rhs = rhs.as_any().downcast_ref::().unwrap(); + lhs == rhs + } + DataType::Boolean => { + let lhs = lhs.as_any().downcast_ref::().unwrap(); + let rhs = rhs.as_any().downcast_ref::().unwrap(); + lhs == rhs + } + DataType::UInt8 => { + dyn_eq!(u8, lhs, rhs) + } + DataType::UInt16 => { + dyn_eq!(u16, lhs, rhs) + } + DataType::UInt32 => { + dyn_eq!(u32, lhs, rhs) + } + DataType::UInt64 => { + dyn_eq!(u64, lhs, rhs) + } + DataType::Int8 => { + dyn_eq!(i8, lhs, rhs) + } + DataType::Int16 => { + dyn_eq!(i16, lhs, rhs) + } + DataType::Int32 + | DataType::Date32 + | DataType::Time32(_) + | DataType::Interval(IntervalUnit::YearMonth) => { + dyn_eq!(i32, lhs, rhs) + } + DataType::Int64 + | DataType::Date64 + | DataType::Time64(_) + | DataType::Timestamp(_, _) + | DataType::Duration(_) => { + dyn_eq!(i64, lhs, rhs) + } + DataType::Decimal(_, _) => { + dyn_eq!(i128, lhs, rhs) + } + DataType::Interval(IntervalUnit::DayTime) => { + dyn_eq!(days_ms, lhs, rhs) + } + DataType::Float16 => unreachable!(), + DataType::Float32 => { + dyn_eq!(f32, lhs, rhs) + } + DataType::Float64 => { + dyn_eq!(f64, lhs, rhs) + } + DataType::Utf8 => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::LargeUtf8 => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::Binary => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::LargeBinary => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::List(_) => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + DataType::LargeList(_) => { + let lhs = lhs.as_any().downcast_ref::>().unwrap(); + let rhs = rhs.as_any().downcast_ref::>().unwrap(); + lhs == rhs + } + _ => unimplemented!(), + } +} diff --git a/src/scalar/list.rs b/src/scalar/list.rs new file mode 100644 index 00000000000..1bf52d873c8 --- /dev/null +++ b/src/scalar/list.rs @@ -0,0 +1,62 @@ +use std::any::Any; +use std::sync::Arc; + +use crate::{array::*, datatypes::DataType}; + +use super::Scalar; + +/// The scalar equivalent of [`ListArray`]. Like [`ListArray`], this struct holds a dynamically-typed +/// [`Array`]. The only difference is that this has only one element. +#[derive(Debug, Clone)] +pub struct ListScalar { + values: Arc, + is_valid: bool, + phantom: std::marker::PhantomData, + data_type: DataType, +} + +impl PartialEq for ListScalar { + fn eq(&self, other: &Self) -> bool { + (self.data_type == other.data_type) + && (self.is_valid == other.is_valid) + && (self.is_valid && (self.values.as_ref() == other.values.as_ref())) + } +} + +pub enum ListScalarNew { + Array(Arc), + DataType(DataType), +} + +impl ListScalar { + #[inline] + pub fn new(data_type: DataType, values: Option>) -> Self { + let (is_valid, values) = match values { + Some(values) => (true, values), + None => { + let data_type = ListArray::::get_child_type(&data_type).clone(); + (false, new_empty_array(data_type).into()) + } + }; + Self { + values, + is_valid, + phantom: std::marker::PhantomData, + data_type, + } + } +} + +impl Scalar for ListScalar { + fn as_any(&self) -> &dyn Any { + self + } + + fn is_valid(&self) -> bool { + self.is_valid + } + + fn data_type(&self) -> &DataType { + &self.data_type + } +} diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index b91be027d2a..071f4c3328c 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -1,6 +1,22 @@ use std::any::Any; -use crate::{array::*, bitmap::Bitmap, buffer::Buffer, datatypes::DataType, types::NativeType}; +use crate::{array::*, datatypes::*, types::days_ms}; + +mod equal; +mod primitive; +pub use primitive::*; +mod utf8; +pub use utf8::*; +mod binary; +pub use binary::*; +mod boolean; +pub use boolean::*; +mod list; +pub use list::*; +mod null; +pub use null::*; +mod struct_; +pub use struct_::*; pub trait Scalar: std::fmt::Debug { fn as_any(&self) -> &dyn Any; @@ -8,238 +24,116 @@ pub trait Scalar: std::fmt::Debug { fn is_valid(&self) -> bool; fn data_type(&self) -> &DataType; - - fn to_boxed_array(&self, length: usize) -> Box; -} - -#[derive(Debug, Clone)] -pub struct PrimitiveScalar { - // Not Option because this offers a stabler pointer offset on the struct - value: T, - is_valid: bool, - data_type: DataType, -} - -impl PrimitiveScalar { - #[inline] - pub fn new(data_type: DataType, v: Option) -> Self { - let is_valid = v.is_some(); - Self { - value: v.unwrap_or_default(), - is_valid, - data_type, - } - } - - #[inline] - pub fn value(&self) -> T { - self.value - } } -impl Scalar for PrimitiveScalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - &self.data_type - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let values = Buffer::from_trusted_len_iter(std::iter::repeat(self.value).take(length)); - Box::new(PrimitiveArray::from_data( - self.data_type.clone(), - values, - None, - )) +macro_rules! dyn_new { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array + .as_any() + .downcast_ref::>() + .unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) } else { - Box::new(PrimitiveArray::::new_null( - self.data_type.clone(), - length, - )) - } - } + None + }; + Box::new(PrimitiveScalar::new(array.data_type().clone(), value)) + }}; } -#[derive(Debug, Clone)] -pub struct BooleanScalar { - value: bool, - is_valid: bool, -} - -impl BooleanScalar { - #[inline] - pub fn new(v: Option) -> Self { - let is_valid = v.is_some(); - Self { - value: v.unwrap_or_default(), - is_valid, - } - } - - #[inline] - pub fn value(&self) -> bool { - self.value - } -} - -impl Scalar for BooleanScalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - &DataType::Boolean - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let values = Bitmap::from_trusted_len_iter(std::iter::repeat(self.value).take(length)); - Box::new(BooleanArray::from_data(values, None)) +macro_rules! dyn_new_utf8 { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array.as_any().downcast_ref::>().unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) } else { - Box::new(BooleanArray::new_null(length)) - } - } + None + }; + Box::new(Utf8Scalar::<$type>::new(value)) + }}; } -#[derive(Debug, Clone)] -pub struct Utf8Scalar { - value: Buffer, - is_valid: bool, - phantom: std::marker::PhantomData, -} - -impl Utf8Scalar { - #[inline] - pub fn new(v: Option<&str>) -> Self { - let is_valid = v.is_some(); - O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); - let value = Buffer::from(v.map(|x| x.as_bytes()).unwrap_or(&[])); - Self { - value, - is_valid, - phantom: std::marker::PhantomData, - } - } - - #[inline] - pub fn value(&self) -> &str { - unsafe { std::str::from_utf8_unchecked(self.value.as_slice()) } - } -} - -impl Scalar for Utf8Scalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - if O::is_large() { - &DataType::LargeUtf8 - } else { - &DataType::Utf8 - } - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new` - let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length); - let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; - let values = std::iter::repeat(self.value.as_slice()) - .take(length) - .flatten() - .copied() - .collect(); - Box::new(Utf8Array::::from_data(offsets, values, None)) +macro_rules! dyn_new_binary { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array + .as_any() + .downcast_ref::>() + .unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index)) } else { - Box::new(Utf8Array::::new_null(length)) - } - } + None + }; + Box::new(BinaryScalar::<$type>::new(value)) + }}; } -#[derive(Debug, Clone)] -pub struct BinaryScalar { - value: Buffer, - is_valid: bool, - phantom: std::marker::PhantomData, +macro_rules! dyn_new_list { + ($array:expr, $index:expr, $type:ty) => {{ + let array = $array.as_any().downcast_ref::>().unwrap(); + let value = if array.is_valid($index) { + Some(array.value($index).into()) + } else { + None + }; + Box::new(ListScalar::<$type>::new(array.data_type().clone(), value)) + }}; } -impl BinaryScalar { - #[inline] - pub fn new(v: Option<&str>) -> Self { - let is_valid = v.is_some(); - O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); - let value = Buffer::from(v.map(|x| x.as_bytes()).unwrap_or(&[])); - Self { - value, - is_valid, - phantom: std::marker::PhantomData, +/// creates a new [`Scalar`] from an [`Array`]. +pub fn new_scalar(array: &dyn Array, index: usize) -> Box { + use DataType::*; + match array.data_type() { + Null => Box::new(NullScalar::new()), + Boolean => { + let array = array.as_any().downcast_ref::().unwrap(); + let value = if array.is_valid(index) { + Some(array.value(index)) + } else { + None + }; + Box::new(BooleanScalar::new(value)) } - } - - #[inline] - pub fn value(&self) -> &[u8] { - self.value.as_slice() - } -} - -impl Scalar for BinaryScalar { - #[inline] - fn as_any(&self) -> &dyn std::any::Any { - self - } - - #[inline] - fn is_valid(&self) -> bool { - self.is_valid - } - - #[inline] - fn data_type(&self) -> &DataType { - if O::is_large() { - &DataType::LargeBinary - } else { - &DataType::Binary + Int8 => dyn_new!(array, index, i8), + Int16 => dyn_new!(array, index, i16), + Int32 | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => { + dyn_new!(array, index, i32) } - } - - fn to_boxed_array(&self, length: usize) -> Box { - if self.is_valid { - let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new` - let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length); - let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) }; - let values = std::iter::repeat(self.value.as_slice()) - .take(length) - .flatten() - .copied() - .collect(); - Box::new(BinaryArray::::from_data(offsets, values, None)) - } else { - Box::new(BinaryArray::::new_null(length)) + Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => dyn_new!(array, index, i64), + Interval(IntervalUnit::DayTime) => dyn_new!(array, index, days_ms), + UInt8 => dyn_new!(array, index, u8), + UInt16 => dyn_new!(array, index, u16), + UInt32 => dyn_new!(array, index, u32), + UInt64 => dyn_new!(array, index, u64), + Decimal(_, _) => dyn_new!(array, index, i128), + Float16 => unreachable!(), + Float32 => dyn_new!(array, index, f32), + Float64 => dyn_new!(array, index, f64), + Utf8 => dyn_new_utf8!(array, index, i32), + LargeUtf8 => dyn_new_utf8!(array, index, i64), + Binary => dyn_new_binary!(array, index, i32), + LargeBinary => dyn_new_binary!(array, index, i64), + List(_) => dyn_new_list!(array, index, i32), + LargeList(_) => dyn_new_list!(array, index, i64), + Struct(_) => { + let array = array.as_any().downcast_ref::().unwrap(); + if array.is_valid(index) { + let values = array + .values() + .iter() + .map(|x| new_scalar(x.as_ref(), index).into()) + .collect(); + Box::new(StructScalar::new(array.data_type().clone(), Some(values))) + } else { + Box::new(StructScalar::new(array.data_type().clone(), None)) + } } + /* + FixedSizeBinary(_) => {} + FixedSizeList(_, _) => {} + Union(_) => {} + Dictionary(_, _) => {} + */ + _ => todo!(), } } diff --git a/src/scalar/null.rs b/src/scalar/null.rs new file mode 100644 index 00000000000..3751c6cfbd6 --- /dev/null +++ b/src/scalar/null.rs @@ -0,0 +1,36 @@ +use crate::datatypes::DataType; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct NullScalar {} + +impl NullScalar { + #[inline] + pub fn new() -> Self { + Self {} + } +} + +impl Default for NullScalar { + fn default() -> Self { + Self::new() + } +} + +impl Scalar for NullScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + false + } + + #[inline] + fn data_type(&self) -> &DataType { + &DataType::Null + } +} diff --git a/src/scalar/primitive.rs b/src/scalar/primitive.rs new file mode 100644 index 00000000000..1e925039d76 --- /dev/null +++ b/src/scalar/primitive.rs @@ -0,0 +1,67 @@ +use crate::{ + datatypes::DataType, + types::{NativeType, NaturalDataType}, +}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct PrimitiveScalar { + // Not Option because this offers a stabler pointer offset on the struct + value: T, + is_valid: bool, + data_type: DataType, +} + +impl PrimitiveScalar { + #[inline] + pub fn new(data_type: DataType, v: Option) -> Self { + let is_valid = v.is_some(); + Self { + value: v.unwrap_or_default(), + is_valid, + data_type, + } + } + + #[inline] + pub fn value(&self) -> T { + self.value + } + + /// Returns a new `PrimitiveScalar` with the same value but different [`DataType`] + /// # Panic + /// This function panics if the `data_type` is not valid for self's physical type `T`. + pub fn to(self, data_type: DataType) -> Self { + let v = if self.is_valid { + Some(self.value) + } else { + None + }; + Self::new(data_type, v) + } +} + +impl From> for PrimitiveScalar { + #[inline] + fn from(v: Option) -> Self { + Self::new(T::DATA_TYPE, v) + } +} + +impl Scalar for PrimitiveScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + &self.data_type + } +} diff --git a/src/scalar/struct_.rs b/src/scalar/struct_.rs new file mode 100644 index 00000000000..26bf643c6cb --- /dev/null +++ b/src/scalar/struct_.rs @@ -0,0 +1,46 @@ +use std::sync::Arc; + +use crate::datatypes::DataType; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct StructScalar { + values: Vec>, + is_valid: bool, + data_type: DataType, +} + +impl StructScalar { + #[inline] + pub fn new(data_type: DataType, values: Option>>) -> Self { + let is_valid = values.is_some(); + Self { + values: values.unwrap_or_default(), + is_valid, + data_type, + } + } + + #[inline] + pub fn values(&self) -> &[Arc] { + &self.values + } +} + +impl Scalar for StructScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + &self.data_type + } +} diff --git a/src/scalar/utf8.rs b/src/scalar/utf8.rs new file mode 100644 index 00000000000..16465735019 --- /dev/null +++ b/src/scalar/utf8.rs @@ -0,0 +1,50 @@ +use crate::{array::*, buffer::Buffer, datatypes::DataType}; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +pub struct Utf8Scalar { + value: Buffer, + is_valid: bool, + phantom: std::marker::PhantomData, +} + +impl Utf8Scalar { + #[inline] + pub fn new(v: Option<&str>) -> Self { + let is_valid = v.is_some(); + O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large"); + let value = Buffer::from(v.map(|x| x.as_bytes()).unwrap_or(&[])); + Self { + value, + is_valid, + phantom: std::marker::PhantomData, + } + } + + #[inline] + pub fn value(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(self.value.as_slice()) } + } +} + +impl Scalar for Utf8Scalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.is_valid + } + + #[inline] + fn data_type(&self) -> &DataType { + if O::is_large() { + &DataType::LargeUtf8 + } else { + &DataType::Utf8 + } + } +}