From 7f0d78cdda2a7fe46a688c77bc982c0f25430c3d Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 6 Jan 2025 15:12:18 +0100 Subject: [PATCH 1/6] chore(rust): Rename is_numeric to is_primitive_numeric --- .../src/chunked_array/builder/fixed_size_list.rs | 2 +- .../src/chunked_array/builder/list/primitive.rs | 2 +- .../src/chunked_array/logical/categorical/mod.rs | 2 +- .../src/chunked_array/logical/date.rs | 2 +- .../src/chunked_array/logical/datetime.rs | 2 +- .../src/chunked_array/logical/duration.rs | 2 +- .../src/chunked_array/logical/time.rs | 2 +- .../src/chunked_array/ops/explode_and_offsets.rs | 2 +- .../src/chunked_array/ops/fill_null.rs | 6 +++--- .../polars-core/src/chunked_array/ops/reverse.rs | 2 +- crates/polars-core/src/datatypes/any_value.rs | 2 +- crates/polars-core/src/datatypes/dtype.rs | 16 ++++++++-------- crates/polars-core/src/fmt.rs | 2 +- .../src/frame/group_by/aggregations/dispatch.rs | 6 +++--- crates/polars-core/src/scalar/reduce.rs | 2 +- .../polars-core/src/series/arithmetic/owned.rs | 2 +- crates/polars-core/src/series/comparison.rs | 2 +- .../src/series/implementations/list.rs | 6 +++--- crates/polars-core/src/series/iterator.rs | 2 +- crates/polars-core/src/series/mod.rs | 10 +++++----- crates/polars-core/src/utils/supertype.rs | 4 ++-- .../polars-expr/src/expressions/aggregation.rs | 2 +- crates/polars-expr/src/expressions/binary.rs | 2 +- crates/polars-expr/src/expressions/window.rs | 2 +- crates/polars-expr/src/reduce/mean.rs | 4 ++-- crates/polars-expr/src/reduce/var_std.rs | 2 +- crates/polars-io/src/predicates.rs | 2 +- .../src/chunked_array/array/namespace.rs | 2 +- .../src/chunked_array/gather/chunked.rs | 4 ++-- crates/polars-ops/src/chunked_array/hist.rs | 2 +- crates/polars-ops/src/chunked_array/list/hash.rs | 2 +- .../polars-ops/src/chunked_array/list/min_max.rs | 8 ++++---- .../src/chunked_array/list/namespace.rs | 4 ++-- crates/polars-ops/src/chunked_array/repeat_by.rs | 2 +- crates/polars-ops/src/frame/join/asof/mod.rs | 2 +- .../src/frame/join/hash_join/sort_merge.rs | 4 ++-- crates/polars-ops/src/frame/pivot/mod.rs | 2 +- crates/polars-ops/src/series/ops/arg_min_max.rs | 4 ++-- crates/polars-ops/src/series/ops/clip.rs | 12 ++++++------ crates/polars-ops/src/series/ops/cum_agg.rs | 4 ++-- crates/polars-ops/src/series/ops/floor_divide.rs | 2 +- crates/polars-ops/src/series/ops/horizontal.rs | 4 ++-- .../src/series/ops/is_first_distinct.rs | 2 +- crates/polars-ops/src/series/ops/is_in.rs | 2 +- .../src/series/ops/is_last_distinct.rs | 2 +- crates/polars-ops/src/series/ops/is_unique.rs | 2 +- crates/polars-ops/src/series/ops/log.rs | 2 +- crates/polars-ops/src/series/ops/rolling.rs | 2 +- crates/polars-ops/src/series/ops/round.rs | 8 ++++---- .../polars-ops/src/series/ops/search_sorted.rs | 2 +- crates/polars-ops/src/series/ops/unique.rs | 2 +- crates/polars-ops/src/series/ops/various.rs | 2 +- .../sinks/group_by/aggregates/convert.rs | 2 +- .../src/executors/sinks/group_by/string.rs | 2 +- crates/polars-plan/src/dsl/function_expr/list.rs | 2 +- crates/polars-plan/src/dsl/function_expr/pow.rs | 4 ++-- .../polars-plan/src/dsl/function_expr/schema.rs | 4 ++-- .../src/dsl/function_expr/shift_and_fill.rs | 2 +- .../src/dsl/function_expr/shrink_type.rs | 2 +- crates/polars-plan/src/dsl/function_expr/sign.rs | 2 +- .../src/dsl/function_expr/trigonometry.rs | 2 +- crates/polars-plan/src/plans/aexpr/properties.rs | 2 +- crates/polars-plan/src/plans/aexpr/schema.rs | 8 ++++---- .../src/plans/conversion/dsl_to_ir.rs | 12 ++++++------ .../src/plans/conversion/type_coercion/binary.rs | 10 +++++----- .../src/plans/conversion/type_coercion/is_in.rs | 8 ++++---- .../src/plans/optimizer/collapse_joins.rs | 2 +- crates/polars-plan/src/plans/optimizer/fused.rs | 4 ++-- .../src/interop/numpy/to_numpy_df.rs | 6 +++--- .../src/interop/numpy/to_numpy_series.rs | 2 +- crates/polars-python/src/interop/numpy/utils.rs | 2 +- crates/polars-python/src/series/buffers.rs | 8 ++++---- crates/polars-python/src/series/general.rs | 4 ++-- 73 files changed, 133 insertions(+), 133 deletions(-) diff --git a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs index 400ee22ce8b0..9bdc6fb98d1a 100644 --- a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs +++ b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs @@ -139,7 +139,7 @@ pub(crate) fn get_fixed_size_list_builder( ) -> PolarsResult> { let phys_dtype = inner_type_logical.to_physical(); - let builder = if phys_dtype.is_numeric() { + let builder = if phys_dtype.is_primitive_numeric() { with_match_physical_numeric_type!(phys_dtype, |$T| { // SAFETY: physical type match logical type unsafe { diff --git a/crates/polars-core/src/chunked_array/builder/list/primitive.rs b/crates/polars-core/src/chunked_array/builder/list/primitive.rs index 039cf3f9b83e..ba4cbcc1ea19 100644 --- a/crates/polars-core/src/chunked_array/builder/list/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/list/primitive.rs @@ -20,7 +20,7 @@ where inner_type: DataType, ) -> Self { debug_assert!( - inner_type.to_physical().is_numeric(), + inner_type.to_physical().is_primitive_numeric(), "inner type must be primitive, got {}", inner_type ); diff --git a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs index f9de31cb353b..92376f4abe4d 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs @@ -418,7 +418,7 @@ impl LogicalType for CategoricalChunked { // Otherwise we do nothing Ok(self.clone().set_ordering(*ordering, true).into_series()) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { // Apply the cast to the categories and then index into the casted series. // This has to be local for the gather. let slf = self.to_local(); diff --git a/crates/polars-core/src/chunked_array/logical/date.rs b/crates/polars-core/src/chunked_array/logical/date.rs index 881044acac86..ae67fd09ed0e 100644 --- a/crates/polars-core/src/chunked_array/logical/date.rs +++ b/crates/polars-core/src/chunked_array/logical/date.rs @@ -48,7 +48,7 @@ impl LogicalType for DateChunked { .into_datetime(*tu, tz.clone()) .into_series()) }, - dt if dt.is_numeric() => self.0.cast_with_options(dtype, cast_options), + dt if dt.is_primitive_numeric() => self.0.cast_with_options(dtype, cast_options), dt => { polars_bail!( InvalidOperation: diff --git a/crates/polars-core/src/chunked_array/logical/datetime.rs b/crates/polars-core/src/chunked_array/logical/datetime.rs index fd99ac74ce0f..f14608d07efd 100644 --- a/crates/polars-core/src/chunked_array/logical/datetime.rs +++ b/crates/polars-core/src/chunked_array/logical/datetime.rs @@ -101,7 +101,7 @@ impl LogicalType for DatetimeChunked { .into_time() .into_series()); }, - dt if dt.is_numeric() => return self.0.cast_with_options(dtype, cast_options), + dt if dt.is_primitive_numeric() => return self.0.cast_with_options(dtype, cast_options), dt => { polars_bail!( InvalidOperation: diff --git a/crates/polars-core/src/chunked_array/logical/duration.rs b/crates/polars-core/src/chunked_array/logical/duration.rs index ca0347d87b5a..1dc0eab17c5d 100644 --- a/crates/polars-core/src/chunked_array/logical/duration.rs +++ b/crates/polars-core/src/chunked_array/logical/duration.rs @@ -54,7 +54,7 @@ impl LogicalType for DurationChunked { }; Ok(out.into_duration(to_unit).into_series()) }, - dt if dt.is_numeric() => self.0.cast_with_options(dtype, cast_options), + dt if dt.is_primitive_numeric() => self.0.cast_with_options(dtype, cast_options), dt => { polars_bail!( InvalidOperation: diff --git a/crates/polars-core/src/chunked_array/logical/time.rs b/crates/polars-core/src/chunked_array/logical/time.rs index f63784faceb0..ce9b890282d8 100644 --- a/crates/polars-core/src/chunked_array/logical/time.rs +++ b/crates/polars-core/src/chunked_array/logical/time.rs @@ -92,7 +92,7 @@ impl LogicalType for TimeChunked { self.dtype(), dtype ) }, - dt if dt.is_numeric() => self.0.cast_with_options(dtype, cast_options), + dt if dt.is_primitive_numeric() => self.0.cast_with_options(dtype, cast_options), _ => { polars_bail!( InvalidOperation: diff --git a/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs b/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs index 8e626e04a9b1..eb7a33a170aa 100644 --- a/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs +++ b/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs @@ -110,7 +110,7 @@ impl ChunkExplode for ListChunked { let (indices, new_offsets) = if listarr.null_count() == 0 { // SPECIALIZED path. let inner_phys = self.inner_dtype().to_physical(); - if inner_phys.is_numeric() || inner_phys.is_null() || inner_phys.is_bool() { + if inner_phys.is_primitive_numeric() || inner_phys.is_null() || inner_phys.is_bool() { return Ok(self.specialized(values, offsets, offsets_buf)); } // Use gather diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index 2129f7703cfb..926b070fcf21 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -85,11 +85,11 @@ impl Series { let physical_type = self.dtype().to_physical(); match strategy { - FillNullStrategy::Forward(None) if !physical_type.is_numeric() => { + FillNullStrategy::Forward(None) if !physical_type.is_primitive_numeric() => { fill_forward_gather(self) }, FillNullStrategy::Forward(Some(limit)) => fill_forward_gather_limit(self, limit), - FillNullStrategy::Backward(None) if !physical_type.is_numeric() => { + FillNullStrategy::Backward(None) if !physical_type.is_primitive_numeric() => { fill_backward_gather(self) }, FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit), @@ -108,7 +108,7 @@ impl Series { let ca = s.binary().unwrap(); fill_null_binary(ca, strategy).map(|ca| ca.into_series()) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); fill_null_numeric(ca, strategy).map(|ca| ca.into_series()) diff --git a/crates/polars-core/src/chunked_array/ops/reverse.rs b/crates/polars-core/src/chunked_array/ops/reverse.rs index 0436b2264c5c..1298eeccb45b 100644 --- a/crates/polars-core/src/chunked_array/ops/reverse.rs +++ b/crates/polars-core/src/chunked_array/ops/reverse.rs @@ -84,7 +84,7 @@ impl ChunkReverse for StringChunked { #[cfg(feature = "dtype-array")] impl ChunkReverse for ArrayChunked { fn reverse(&self) -> Self { - if !self.inner_dtype().is_numeric() { + if !self.inner_dtype().is_primitive_numeric() { todo!("reverse for FixedSizeList with non-numeric dtypes not yet supported") } let ca = self.rechunk(); diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index ae96f2ff0e8a..62263204af3d 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -364,7 +364,7 @@ impl AnyValue<'static> { DataType::Binary => AnyValue::BinaryOwned(Vec::new()), DataType::Boolean => (0 as IdxSize).into(), // SAFETY: numeric values are static, inform the compiler of this. - d if d.is_numeric() => unsafe { + d if d.is_primitive_numeric() => unsafe { std::mem::transmute::, AnyValue<'static>>( AnyValue::UInt8(0).cast(dtype), ) diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index a590958b0ad7..c0c145d26c63 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -360,7 +360,7 @@ impl DataType { if self == to { return Some(true); } - if self.is_numeric() && to.is_numeric() { + if self.is_primitive_numeric() && to.is_primitive_numeric() { return Some(true); } @@ -376,7 +376,7 @@ impl DataType { (D::Object(_, _), _) | (_, D::Object(_, _)) => false, (D::Boolean, dt) | (dt, D::Boolean) => match dt { - dt if dt.is_numeric() => true, + dt if dt.is_primitive_numeric() => true, #[cfg(feature = "dtype-decimal")] D::Decimal(_, _) => true, D::String | D::Binary => true, @@ -445,7 +445,7 @@ impl DataType { } pub fn is_supported_list_arithmetic_input(&self) -> bool { - self.is_numeric() || self.is_bool() || self.is_null() + self.is_primitive_numeric() || self.is_bool() || self.is_null() } /// Check if this [`DataType`] is a logical type @@ -460,17 +460,17 @@ impl DataType { } /// Check if datatype is a primitive type. By that we mean that - /// it is not a container type. + /// it is not a nested or logical type. pub fn is_primitive(&self) -> bool { - self.is_numeric() + self.is_primitive_numeric() | matches!( self, DataType::Boolean | DataType::String | DataType::Binary ) } - /// Check if this [`DataType`] is a basic numeric type (excludes Decimal). - pub fn is_numeric(&self) -> bool { + /// Check if this [`DataType`] is a primitive numeric type (excludes Decimal). + pub fn is_primitive_numeric(&self) -> bool { self.is_float() || self.is_integer() } @@ -588,7 +588,7 @@ impl DataType { let is_cat = false; let phys = self.to_physical(); - (phys.is_numeric() + (phys.is_primitive_numeric() || self.is_decimal() || matches!( phys, diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index 5a1392369eef..fdfa3b70c40b 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -785,7 +785,7 @@ impl Display for DataFrame { for (column_index, column) in table.column_iter_mut().enumerate() { let dtype = fields[column_index].dtype(); let mut preset = str_preset.as_str(); - if dtype.is_numeric() || dtype.is_decimal() { + if dtype.is_primitive_numeric() || dtype.is_decimal() { preset = num_preset.as_str(); } match preset { diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index 8f01ce3f291a..e941c842d6a4 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -141,7 +141,7 @@ impl Series { Boolean => s.cast(&Float64).unwrap().agg_mean(groups), Float32 => SeriesWrap(s.f32().unwrap().clone()).agg_mean(groups), Float64 => SeriesWrap(s.f64().unwrap().clone()).agg_mean(groups), - dt if dt.is_numeric() => apply_method_physical_integer!(s, agg_mean, groups), + dt if dt.is_primitive_numeric() => apply_method_physical_integer!(s, agg_mean, groups), #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() @@ -193,7 +193,7 @@ impl Series { Boolean => s.cast(&Float64).unwrap().agg_median(groups), Float32 => SeriesWrap(s.f32().unwrap().clone()).agg_median(groups), Float64 => SeriesWrap(s.f64().unwrap().clone()).agg_median(groups), - dt if dt.is_numeric() => apply_method_physical_integer!(s, agg_median, groups), + dt if dt.is_primitive_numeric() => apply_method_physical_integer!(s, agg_median, groups), #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() @@ -249,7 +249,7 @@ impl Series { match s.dtype() { Float32 => s.f32().unwrap().agg_quantile(groups, quantile, method), Float64 => s.f64().unwrap().agg_quantile(groups, quantile, method), - dt if dt.is_numeric() || dt.is_temporal() => { + dt if dt.is_primitive_numeric() || dt.is_temporal() => { let ca = s.to_physical_repr(); let physical_type = ca.dtype(); let s = apply_method_physical_integer!(ca, agg_quantile, groups, quantile, method); diff --git a/crates/polars-core/src/scalar/reduce.rs b/crates/polars-core/src/scalar/reduce.rs index 078dc0411fa4..a5d8f788274a 100644 --- a/crates/polars-core/src/scalar/reduce.rs +++ b/crates/polars-core/src/scalar/reduce.rs @@ -9,7 +9,7 @@ pub fn mean_reduce(value: Option, dtype: DataType) -> Scalar { let val = value.map(|m| m as f32); Scalar::new(dtype, val.into()) }, - dt if dt.is_numeric() || dt.is_decimal() || dt.is_bool() => { + dt if dt.is_primitive_numeric() || dt.is_decimal() || dt.is_bool() => { Scalar::new(DataType::Float64, value.into()) }, #[cfg(feature = "dtype-date")] diff --git a/crates/polars-core/src/series/arithmetic/owned.rs b/crates/polars-core/src/series/arithmetic/owned.rs index f05df279738c..3426acd440d4 100644 --- a/crates/polars-core/src/series/arithmetic/owned.rs +++ b/crates/polars-core/src/series/arithmetic/owned.rs @@ -19,7 +19,7 @@ pub fn coerce_lhs_rhs_owned(lhs: Series, rhs: Series) -> PolarsResult<(Series, S } fn is_eligible(lhs: &DataType, rhs: &DataType) -> bool { - !lhs.is_logical() && lhs.to_physical().is_numeric() && rhs.to_physical().is_numeric() + !lhs.is_logical() && lhs.to_physical().is_primitive_numeric() && rhs.to_physical().is_primitive_numeric() } #[cfg(feature = "performant")] diff --git a/crates/polars-core/src/series/comparison.rs b/crates/polars-core/src/series/comparison.rs index 5946ac45644f..c4ed9b5769f1 100644 --- a/crates/polars-core/src/series/comparison.rs +++ b/crates/polars-core/src/series/comparison.rs @@ -184,7 +184,7 @@ fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> { use DataType::*; match (left, right) { - (String, dt) | (dt, String) if dt.is_numeric() => { + (String, dt) | (dt, String) if dt.is_primitive_numeric() => { polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt) }, #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-core/src/series/implementations/list.rs b/crates/polars-core/src/series/implementations/list.rs index 7b5d9c3f79f5..4217b0b2c9f2 100644 --- a/crates/polars-core/src/series/implementations/list.rs +++ b/crates/polars-core/src/series/implementations/list.rs @@ -184,7 +184,7 @@ impl SeriesTrait for SeriesWrap { #[cfg(feature = "algorithm_group_by")] fn unique(&self) -> PolarsResult { - if !self.inner_dtype().is_numeric() { + if !self.inner_dtype().is_primitive_numeric() { polars_bail!(opq = unique, self.dtype()); } // this can be called in aggregation, so this fast path can be worth a lot @@ -200,7 +200,7 @@ impl SeriesTrait for SeriesWrap { #[cfg(feature = "algorithm_group_by")] fn n_unique(&self) -> PolarsResult { - if !self.inner_dtype().is_numeric() { + if !self.inner_dtype().is_primitive_numeric() { polars_bail!(opq = n_unique, self.dtype()); } // this can be called in aggregation, so this fast path can be worth a lot @@ -217,7 +217,7 @@ impl SeriesTrait for SeriesWrap { #[cfg(feature = "algorithm_group_by")] fn arg_unique(&self) -> PolarsResult { - if !self.inner_dtype().is_numeric() { + if !self.inner_dtype().is_primitive_numeric() { polars_bail!(opq = arg_unique, self.dtype()); } // this can be called in aggregation, so this fast path can be worth a lot diff --git a/crates/polars-core/src/series/iterator.rs b/crates/polars-core/src/series/iterator.rs index d4dc5df63ccb..cb39ae7867fd 100644 --- a/crates/polars-core/src/series/iterator.rs +++ b/crates/polars-core/src/series/iterator.rs @@ -109,7 +109,7 @@ impl Series { ); let arr = &*self.chunks()[0]; - if phys_dtype.is_numeric() { + if phys_dtype.is_primitive_numeric() { if arr.null_count() == 0 { with_match_physical_numeric_type!(phys_dtype, |$T| { let arr = arr.as_any().downcast_ref::>().unwrap(); diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 43de366857f8..7ee9991c1f6b 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -448,7 +448,7 @@ impl Series { #[cfg(feature = "dtype-struct")] DataType::Struct(_) => self.struct_().unwrap().cast_unchecked(dtype), DataType::List(_) => self.list().unwrap().cast_unchecked(dtype), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref(); ca.cast_unchecked(dtype) @@ -594,7 +594,7 @@ impl Series { match self.dtype() { DataType::Float32 => Ok(self.f32().unwrap().is_nan()), DataType::Float64 => Ok(self.f64().unwrap().is_nan()), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean) .with_validity(self.rechunk_validity()); Ok(BooleanChunked::with_chunk(self.name().clone(), arr)) @@ -608,7 +608,7 @@ impl Series { match self.dtype() { DataType::Float32 => Ok(self.f32().unwrap().is_not_nan()), DataType::Float64 => Ok(self.f64().unwrap().is_not_nan()), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean) .with_validity(self.rechunk_validity()); Ok(BooleanChunked::with_chunk(self.name().clone(), arr)) @@ -622,7 +622,7 @@ impl Series { match self.dtype() { DataType::Float32 => Ok(self.f32().unwrap().is_finite()), DataType::Float64 => Ok(self.f64().unwrap().is_finite()), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let arr = BooleanArray::full(self.len(), true, ArrowDataType::Boolean) .with_validity(self.rechunk_validity()); Ok(BooleanChunked::with_chunk(self.name().clone(), arr)) @@ -636,7 +636,7 @@ impl Series { match self.dtype() { DataType::Float32 => Ok(self.f32().unwrap().is_infinite()), DataType::Float64 => Ok(self.f64().unwrap().is_infinite()), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let arr = BooleanArray::full(self.len(), false, ArrowDataType::Boolean) .with_validity(self.rechunk_validity()); Ok(BooleanChunked::with_chunk(self.name().clone(), arr)) diff --git a/crates/polars-core/src/utils/supertype.rs b/crates/polars-core/src/utils/supertype.rs index 48555a7eb520..30895f9e01fe 100644 --- a/crates/polars-core/src/utils/supertype.rs +++ b/crates/polars-core/src/utils/supertype.rs @@ -418,7 +418,7 @@ pub fn get_supertype_with_options( // Keep unknown dynam if dt.is_null() => Some(Unknown(*dynam)), // Find integers sizes - UnknownKind::Int(v) if dt.is_numeric() => { + UnknownKind::Int(v) if dt.is_primitive_numeric() => { // Both dyn int if let Unknown(UnknownKind::Int(v_other)) = dt { // Take the maximum value to ensure we bubble up the required minimal size. @@ -451,7 +451,7 @@ pub fn get_supertype_with_options( super_type_structs(fields_a, fields_b) } #[cfg(feature = "dtype-struct")] - (Struct(fields_a), rhs) if rhs.is_numeric() => { + (Struct(fields_a), rhs) if rhs.is_primitive_numeric() => { let mut new_fields = Vec::with_capacity(fields_a.len()); for a in fields_a { let st = get_supertype(&a.dtype, rhs)?; diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 3710a33258ea..e1e22e1a63c2 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -483,7 +483,7 @@ impl PartitionedAggregation for AggregationExpr { }; agg_s.rename(new_name.clone()); - if !agg_s.dtype().is_numeric() { + if !agg_s.dtype().is_primitive_numeric() { Ok(agg_s) } else { let agg_s = match agg_s.dtype() { diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs index 3696b0d9bf46..dd6ee46cb7b0 100644 --- a/crates/polars-expr/src/expressions/binary.rs +++ b/crates/polars-expr/src/expressions/binary.rs @@ -46,7 +46,7 @@ fn apply_operator_owned(left: Column, right: Column, op: Operator) -> PolarsResu match op { Operator::Plus => left.try_add_owned(right), Operator::Minus => left.try_sub_owned(right), - Operator::Multiply if left.dtype().is_numeric() && right.dtype().is_numeric() => { + Operator::Multiply if left.dtype().is_primitive_numeric() && right.dtype().is_primitive_numeric() => { left.try_mul_owned(right) }, _ => apply_operator(&left, &right, op), diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs index dd92e8318a61..4e61224c6dcd 100644 --- a/crates/polars-expr/src/expressions/window.rs +++ b/crates/polars-expr/src/expressions/window.rs @@ -697,7 +697,7 @@ fn set_by_groups( if update_groups { return None; } - if s.dtype().to_physical().is_numeric() { + if s.dtype().to_physical().is_primitive_numeric() { let dtype = s.dtype(); let s = s.to_physical_repr(); diff --git a/crates/polars-expr/src/reduce/mean.rs b/crates/polars-expr/src/reduce/mean.rs index 4a8ec962f237..4da71251307f 100644 --- a/crates/polars-expr/src/reduce/mean.rs +++ b/crates/polars-expr/src/reduce/mean.rs @@ -10,7 +10,7 @@ pub fn new_mean_reduction(dtype: DataType) -> Box { use VecGroupedReduction as VGR; match dtype { Boolean => Box::new(VGR::new(dtype, BoolMeanReducer)), - _ if dtype.is_numeric() || dtype.is_temporal() => { + _ if dtype.is_primitive_numeric() || dtype.is_temporal() => { with_match_physical_numeric_polars_type!(dtype.to_physical(), |$T| { Box::new(VGR::new(dtype, NumMeanReducer::<$T>(PhantomData))) }) @@ -30,7 +30,7 @@ fn finish_output(values: Vec<(f64, usize)>, dtype: &DataType) -> Series { .collect_ca(PlSmallStr::EMPTY); ca.into_series() }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let ca: Float64Chunked = values .into_iter() .map(|(s, c)| (c != 0).then(|| s / c as f64)) diff --git a/crates/polars-expr/src/reduce/var_std.rs b/crates/polars-expr/src/reduce/var_std.rs index 7993c06e6a4e..af60db051935 100644 --- a/crates/polars-expr/src/reduce/var_std.rs +++ b/crates/polars-expr/src/reduce/var_std.rs @@ -11,7 +11,7 @@ pub fn new_var_std_reduction(dtype: DataType, is_std: bool, ddof: u8) -> Box Box::new(VGR::new(dtype, BoolVarStdReducer { is_std, ddof })), - _ if dtype.is_numeric() => { + _ if dtype.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dtype.to_physical(), |$T| { Box::new(VGR::new(dtype, VarStdReducer::<$T> { is_std, diff --git a/crates/polars-io/src/predicates.rs b/crates/polars-io/src/predicates.rs index 75ebc922a4b9..c455ae0966c8 100644 --- a/crates/polars-io/src/predicates.rs +++ b/crates/polars-io/src/predicates.rs @@ -196,7 +196,7 @@ impl ColumnStats { /// Returns whether the [`DataType`] supports minimum/maximum operations. fn use_min_max(dtype: &DataType) -> bool { - dtype.is_numeric() + dtype.is_primitive_numeric() || dtype.is_temporal() || matches!( dtype, diff --git a/crates/polars-ops/src/chunked_array/array/namespace.rs b/crates/polars-ops/src/chunked_array/array/namespace.rs index 909ef5db8f6d..652f94032504 100644 --- a/crates/polars-ops/src/chunked_array/array/namespace.rs +++ b/crates/polars-ops/src/chunked_array/array/namespace.rs @@ -46,7 +46,7 @@ pub trait ArrayNameSpace: AsArray { match ca.inner_dtype() { DataType::Boolean => Ok(count_boolean_bits(ca).into_series()), - dt if dt.is_numeric() => Ok(sum_array_numerical(ca, dt)), + dt if dt.is_primitive_numeric() => Ok(sum_array_numerical(ca, dt)), dt => sum_with_nulls(ca, dt), } } diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs index 093af5aeb534..b1346f83649b 100644 --- a/crates/polars-ops/src/chunked_array/gather/chunked.rs +++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs @@ -120,7 +120,7 @@ impl TakeChunked for Series { ) -> Self { use DataType::*; match self.dtype() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(self.dtype(), |$T| { let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref(); ca.take_chunked_unchecked(by, sorted).into_series() @@ -216,7 +216,7 @@ impl TakeChunked for Series { unsafe fn take_opt_chunked_unchecked(&self, by: &[ChunkId]) -> Self { use DataType::*; match self.dtype() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(self.dtype(), |$T| { let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref(); ca.take_opt_chunked_unchecked(by).into_series() diff --git a/crates/polars-ops/src/chunked_array/hist.rs b/crates/polars-ops/src/chunked_array/hist.rs index f57ff6ed60aa..5fde70019d52 100644 --- a/crates/polars-ops/src/chunked_array/hist.rs +++ b/crates/polars-ops/src/chunked_array/hist.rs @@ -259,7 +259,7 @@ pub fn hist_series( let bins = bins.cont_slice().unwrap(); bins_arg = Some(bins); }; - polars_ensure!(s.dtype().is_numeric(), InvalidOperation: "'hist' is only supported for numeric data"); + polars_ensure!(s.dtype().is_primitive_numeric(), InvalidOperation: "'hist' is only supported for numeric data"); let out = with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); diff --git a/crates/polars-ops/src/chunked_array/list/hash.rs b/crates/polars-ops/src/chunked_array/list/hash.rs index 0c567c729041..90218d7799dd 100644 --- a/crates/polars-ops/src/chunked_array/list/hash.rs +++ b/crates/polars-ops/src/chunked_array/list/hash.rs @@ -45,7 +45,7 @@ where } pub(crate) fn hash(ca: &mut ListChunked, build_hasher: PlRandomState) -> UInt64Chunked { - if !ca.inner_dtype().to_physical().is_numeric() { + if !ca.inner_dtype().to_physical().is_primitive_numeric() { panic!( "Hashing a list with a non-numeric inner type not supported. Got dtype: {:?}", ca.dtype() diff --git a/crates/polars-ops/src/chunked_array/list/min_max.rs b/crates/polars-ops/src/chunked_array/list/min_max.rs index 626b193fa74c..fafdf1bd593d 100644 --- a/crates/polars-ops/src/chunked_array/list/min_max.rs +++ b/crates/polars-ops/src/chunked_array/list/min_max.rs @@ -78,7 +78,7 @@ pub(super) fn list_min_function(ca: &ListChunked) -> PolarsResult { .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().min())); Ok(out.into_series()) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let out: ChunkedArray<$T> = ca.apply_amortized_generic(|opt_s| { @@ -106,7 +106,7 @@ pub(super) fn list_min_function(ca: &ListChunked) -> PolarsResult { }; match ca.inner_dtype() { - dt if dt.is_numeric() => Ok(min_list_numerical(ca, dt)), + dt if dt.is_primitive_numeric() => Ok(min_list_numerical(ca, dt)), _ => inner(ca), } } @@ -188,7 +188,7 @@ pub(super) fn list_max_function(ca: &ListChunked) -> PolarsResult { .apply_amortized_generic(|s| s.and_then(|s| s.as_ref().bool().unwrap().max())); Ok(out.into_series()) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let out: ChunkedArray<$T> = ca.apply_amortized_generic(|opt_s| { @@ -217,7 +217,7 @@ pub(super) fn list_max_function(ca: &ListChunked) -> PolarsResult { }; match ca.inner_dtype() { - dt if dt.is_numeric() => Ok(max_list_numerical(ca, dt)), + dt if dt.is_primitive_numeric() => Ok(max_list_numerical(ca, dt)), _ => inner(ca), } } diff --git a/crates/polars-ops/src/chunked_array/list/namespace.rs b/crates/polars-ops/src/chunked_array/list/namespace.rs index a5e4f1837d26..5c21a7e65ac3 100644 --- a/crates/polars-ops/src/chunked_array/list/namespace.rs +++ b/crates/polars-ops/src/chunked_array/list/namespace.rs @@ -200,7 +200,7 @@ pub trait ListNameSpaceImpl: AsList { match ca.inner_dtype() { DataType::Boolean => Ok(count_boolean_bits(ca).into_series()), - dt if dt.is_numeric() => Ok(sum_list_numerical(ca, dt)), + dt if dt.is_primitive_numeric() => Ok(sum_list_numerical(ca, dt)), dt => sum_with_nulls(ca, dt), } } @@ -213,7 +213,7 @@ pub trait ListNameSpaceImpl: AsList { }; match ca.inner_dtype() { - dt if dt.is_numeric() => mean_list_numerical(ca, dt), + dt if dt.is_primitive_numeric() => mean_list_numerical(ca, dt), _ => sum_mean::mean_with_nulls(ca), } } diff --git a/crates/polars-ops/src/chunked_array/repeat_by.rs b/crates/polars-ops/src/chunked_array/repeat_by.rs index 49a83d4659c1..49d8483594c8 100644 --- a/crates/polars-ops/src/chunked_array/repeat_by.rs +++ b/crates/polars-ops/src/chunked_array/repeat_by.rs @@ -122,7 +122,7 @@ pub fn repeat_by(s: &Series, by: &IdxCa) -> PolarsResult { .and_then(|ca| ca.apply_to_inner(&|s| unsafe { s.cast_unchecked(&String) })) }, Binary => repeat_by_binary(s_phys.binary().unwrap(), by), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s_phys.as_ref().as_ref().as_ref(); repeat_by_primitive(ca, by) diff --git a/crates/polars-ops/src/frame/join/asof/mod.rs b/crates/polars-ops/src/frame/join/asof/mod.rs index cb122a649c4f..b3308c5f7065 100644 --- a/crates/polars-ops/src/frame/join/asof/mod.rs +++ b/crates/polars-ops/src/frame/join/asof/mod.rs @@ -167,7 +167,7 @@ fn check_asof_columns( let dtype_b = b.dtype(); if has_tolerance { polars_ensure!( - dtype_a.to_physical().is_numeric() && dtype_b.to_physical().is_numeric(), + dtype_a.to_physical().is_primitive_numeric() && dtype_b.to_physical().is_primitive_numeric(), InvalidOperation: "asof join with tolerance is only supported on numeric/temporal keys" ); diff --git a/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs b/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs index 5b2f83282a76..4040c1260a7f 100644 --- a/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs +++ b/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs @@ -209,7 +209,7 @@ pub(crate) fn _sort_or_hash_inner( let size_factor_acceptable = std::env::var("POLARS_JOIN_SORT_FACTOR") .map(|s| s.parse::().unwrap()) .unwrap_or(1.0); - let is_numeric = s_left.dtype().to_physical().is_numeric(); + let is_numeric = s_left.dtype().to_physical().is_primitive_numeric(); if validate.needs_checks() { return s_left.hash_join_inner(s_right, validate, join_nulls); @@ -311,7 +311,7 @@ pub(crate) fn sort_or_hash_left( let size_factor_acceptable = std::env::var("POLARS_JOIN_SORT_FACTOR") .map(|s| s.parse::().unwrap()) .unwrap_or(1.0); - let is_numeric = s_left.dtype().to_physical().is_numeric(); + let is_numeric = s_left.dtype().to_physical().is_primitive_numeric(); let no_nulls = s_left.null_count() == 0 && s_right.null_count() == 0; diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index 46244ffd344b..f91e665698e0 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -334,7 +334,7 @@ fn pivot_impl_single_column( debug_assert_eq!(row_locations.len(), col_locations.len()); debug_assert_eq!(value_agg_phys.len(), row_locations.len()); - let mut cols = if value_agg_phys.dtype().is_numeric() { + let mut cols = if value_agg_phys.dtype().is_primitive_numeric() { macro_rules! dispatch { ($ca:expr) => {{ positioning::position_aggregates_numeric( diff --git a/crates/polars-ops/src/series/ops/arg_min_max.rs b/crates/polars-ops/src/series/ops/arg_min_max.rs index 4eb0804f5db8..faba98eb93aa 100644 --- a/crates/polars-ops/src/series/ops/arg_min_max.rs +++ b/crates/polars-ops/src/series/ops/arg_min_max.rs @@ -79,7 +79,7 @@ impl ArgAgg for Series { let ca = s.i64().unwrap(); arg_min_numeric_dispatch(ca) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); arg_min_numeric_dispatch(ca) @@ -125,7 +125,7 @@ impl ArgAgg for Series { let ca = s.i64().unwrap(); arg_max_numeric_dispatch(ca) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); arg_max_numeric_dispatch(ca) diff --git a/crates/polars-ops/src/series/ops/clip.rs b/crates/polars-ops/src/series/ops/clip.rs index 485b2d7a064b..35b8e957a170 100644 --- a/crates/polars-ops/src/series/ops/clip.rs +++ b/crates/polars-ops/src/series/ops/clip.rs @@ -5,7 +5,7 @@ use polars_core::with_match_physical_numeric_polars_type; /// Set values outside the given boundaries to the boundary value. pub fn clip(s: &Series, min: &Series, max: &Series) -> PolarsResult { polars_ensure!( - s.dtype().to_physical().is_numeric(), + s.dtype().to_physical().is_primitive_numeric(), InvalidOperation: "`clip` only supports physical numeric types" ); @@ -19,7 +19,7 @@ pub fn clip(s: &Series, min: &Series, max: &Series) -> PolarsResult { ); match s.dtype() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); let min: &ChunkedArray<$T> = min.as_ref().as_ref().as_ref(); @@ -39,7 +39,7 @@ pub fn clip(s: &Series, min: &Series, max: &Series) -> PolarsResult { /// Set values above the given maximum to the maximum value. pub fn clip_max(s: &Series, max: &Series) -> PolarsResult { polars_ensure!( - s.dtype().to_physical().is_numeric(), + s.dtype().to_physical().is_primitive_numeric(), InvalidOperation: "`clip` only supports physical numeric types" ); @@ -49,7 +49,7 @@ pub fn clip_max(s: &Series, max: &Series) -> PolarsResult { let (s, max) = (s.to_physical_repr(), max.to_physical_repr()); match s.dtype() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); let max: &ChunkedArray<$T> = max.as_ref().as_ref().as_ref(); @@ -68,7 +68,7 @@ pub fn clip_max(s: &Series, max: &Series) -> PolarsResult { /// Set values below the given minimum to the minimum value. pub fn clip_min(s: &Series, min: &Series) -> PolarsResult { polars_ensure!( - s.dtype().to_physical().is_numeric(), + s.dtype().to_physical().is_primitive_numeric(), InvalidOperation: "`clip` only supports physical numeric types" ); @@ -78,7 +78,7 @@ pub fn clip_min(s: &Series, min: &Series) -> PolarsResult { let (s, min) = (s.to_physical_repr(), min.to_physical_repr()); match s.dtype() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); let min: &ChunkedArray<$T> = min.as_ref().as_ref().as_ref(); diff --git a/crates/polars-ops/src/series/ops/cum_agg.rs b/crates/polars-ops/src/series/ops/cum_agg.rs index 163aa10eb080..817c53975087 100644 --- a/crates/polars-ops/src/series/ops/cum_agg.rs +++ b/crates/polars-ops/src/series/ops/cum_agg.rs @@ -236,7 +236,7 @@ pub fn cum_min(s: &Series, reverse: bool) -> PolarsResult { let s = s.to_physical_repr(); match s.dtype() { DataType::Boolean => Ok(cum_min_bool(s.bool()?, reverse).into_series()), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); let out = cum_min_numeric(ca, reverse).into_series(); @@ -257,7 +257,7 @@ pub fn cum_max(s: &Series, reverse: bool) -> PolarsResult { let s = s.to_physical_repr(); match s.dtype() { DataType::Boolean => Ok(cum_max_bool(s.bool()?, reverse).into_series()), - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); let out = cum_max_numeric(ca, reverse).into_series(); diff --git a/crates/polars-ops/src/series/ops/floor_divide.rs b/crates/polars-ops/src/series/ops/floor_divide.rs index 87d5994d8f75..3fd66a7736a9 100644 --- a/crates/polars-ops/src/series/ops/floor_divide.rs +++ b/crates/polars-ops/src/series/ops/floor_divide.rs @@ -36,7 +36,7 @@ pub fn floor_div_series(a: &Series, b: &Series) -> PolarsResult { _ => {}, } - if !a.dtype().is_numeric() { + if !a.dtype().is_primitive_numeric() { polars_bail!(op = "floor_div", a.dtype()); } diff --git a/crates/polars-ops/src/series/ops/horizontal.rs b/crates/polars-ops/src/series/ops/horizontal.rs index 6a6960480c47..e558b84b00e0 100644 --- a/crates/polars-ops/src/series/ops/horizontal.rs +++ b/crates/polars-ops/src/series/ops/horizontal.rs @@ -92,7 +92,7 @@ where } fn min_max_binary_columns(left: &Column, right: &Column, min: bool) -> PolarsResult { - if left.dtype().to_physical().is_numeric() + if left.dtype().to_physical().is_primitive_numeric() && left.null_count() == 0 && right.null_count() == 0 && left.len() == right.len() @@ -278,7 +278,7 @@ pub fn mean_horizontal( let (numeric_columns, non_numeric_columns): (Vec<_>, Vec<_>) = columns.iter().partition(|s| { let dtype = s.dtype(); - dtype.is_numeric() || dtype.is_decimal() || dtype.is_bool() || dtype.is_null() + dtype.is_primitive_numeric() || dtype.is_decimal() || dtype.is_bool() || dtype.is_null() }); if !non_numeric_columns.is_empty() { diff --git a/crates/polars-ops/src/series/ops/is_first_distinct.rs b/crates/polars-ops/src/series/ops/is_first_distinct.rs index 4fdb10e162c3..614708fa4f22 100644 --- a/crates/polars-ops/src/series/ops/is_first_distinct.rs +++ b/crates/polars-ops/src/series/ops/is_first_distinct.rs @@ -127,7 +127,7 @@ pub fn is_first_distinct(s: &Series) -> PolarsResult { let s = s.cast(&Binary).unwrap(); return is_first_distinct(&s); }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); is_first_distinct_numeric(ca) diff --git a/crates/polars-ops/src/series/ops/is_in.rs b/crates/polars-ops/src/series/ops/is_in.rs index d6b83f59bf6c..0f6e807ec03d 100644 --- a/crates/polars-ops/src/series/ops/is_in.rs +++ b/crates/polars-ops/src/series/ops/is_in.rs @@ -728,7 +728,7 @@ pub fn is_in(s: &Series, other: &Series) -> PolarsResult { is_in_numeric(s.physical(), other.to_physical_repr().as_ref()) }, - dt if dt.to_physical().is_numeric() => { + dt if dt.to_physical().is_primitive_numeric() => { let s = s.to_physical_repr(); with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); diff --git a/crates/polars-ops/src/series/ops/is_last_distinct.rs b/crates/polars-ops/src/series/ops/is_last_distinct.rs index 46f9aca1ac8d..ae145936aca0 100644 --- a/crates/polars-ops/src/series/ops/is_last_distinct.rs +++ b/crates/polars-ops/src/series/ops/is_last_distinct.rs @@ -32,7 +32,7 @@ pub fn is_last_distinct(s: &Series) -> PolarsResult { let s = s.cast(&Binary).unwrap(); return is_last_distinct(&s); }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); is_last_distinct_numeric(ca) diff --git a/crates/polars-ops/src/series/ops/is_unique.rs b/crates/polars-ops/src/series/ops/is_unique.rs index 2f1d3de652ba..4cafa1ca435b 100644 --- a/crates/polars-ops/src/series/ops/is_unique.rs +++ b/crates/polars-ops/src/series/ops/is_unique.rs @@ -79,7 +79,7 @@ fn dispatcher(s: &Series, invert: bool) -> PolarsResult { 1 => BooleanChunked::new(s.name().clone(), [!invert]), len => BooleanChunked::full(s.name().clone(), invert, len), }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_integer_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); is_unique_ca(ca, invert) diff --git a/crates/polars-ops/src/series/ops/log.rs b/crates/polars-ops/src/series/ops/log.rs index e0c870321a47..db838f3e83cc 100644 --- a/crates/polars-ops/src/series/ops/log.rs +++ b/crates/polars-ops/src/series/ops/log.rs @@ -81,7 +81,7 @@ pub trait LogSeries: SeriesSealed { /// where `pk` are discrete probabilities. fn entropy(&self, base: f64, normalize: bool) -> PolarsResult { let s = self.as_series().to_physical_repr(); - polars_ensure!(s.dtype().is_numeric(), InvalidOperation: "expected numerical input for 'entropy'"); + polars_ensure!(s.dtype().is_primitive_numeric(), InvalidOperation: "expected numerical input for 'entropy'"); // if there is only one value in the series, return 0.0 to prevent the // function from returning -0.0 if s.len() == 1 { diff --git a/crates/polars-ops/src/series/ops/rolling.rs b/crates/polars-ops/src/series/ops/rolling.rs index 6e468816d354..ca5633c4ac07 100644 --- a/crates/polars-ops/src/series/ops/rolling.rs +++ b/crates/polars-ops/src/series/ops/rolling.rs @@ -51,7 +51,7 @@ pub trait RollingSeries: SeriesSealed { let ca = s.f32().unwrap(); rolling_skew(ca, window_size, bias).map(|ca| ca.into_series()) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let s = s.cast(&DataType::Float64).unwrap(); s.rolling_skew(window_size, bias) }, diff --git a/crates/polars-ops/src/series/ops/round.rs b/crates/polars-ops/src/series/ops/round.rs index 338a771e4998..9303668b7a36 100644 --- a/crates/polars-ops/src/series/ops/round.rs +++ b/crates/polars-ops/src/series/ops/round.rs @@ -67,7 +67,7 @@ pub trait RoundSeries: SeriesSealed { return Ok(ca.into_series()); } - polars_ensure!(s.dtype().is_numeric(), InvalidOperation: "round can only be used on numeric types" ); + polars_ensure!(s.dtype().is_primitive_numeric(), InvalidOperation: "round can only be used on numeric types" ); Ok(s.clone()) } @@ -115,7 +115,7 @@ pub trait RoundSeries: SeriesSealed { return Ok(s); } - polars_ensure!(s.dtype().is_numeric(), InvalidOperation: "round_sig_figs can only be used on numeric types" ); + polars_ensure!(s.dtype().is_primitive_numeric(), InvalidOperation: "round_sig_figs can only be used on numeric types" ); with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); let s = ca.apply_values(|value| { @@ -165,7 +165,7 @@ pub trait RoundSeries: SeriesSealed { return Ok(ca.into_series()); } - polars_ensure!(s.dtype().is_numeric(), InvalidOperation: "floor can only be used on numeric types" ); + polars_ensure!(s.dtype().is_primitive_numeric(), InvalidOperation: "floor can only be used on numeric types" ); Ok(s.clone()) } @@ -204,7 +204,7 @@ pub trait RoundSeries: SeriesSealed { return Ok(ca.into_series()); } - polars_ensure!(s.dtype().is_numeric(), InvalidOperation: "ceil can only be used on numeric types" ); + polars_ensure!(s.dtype().is_primitive_numeric(), InvalidOperation: "ceil can only be used on numeric types" ); Ok(s.clone()) } } diff --git a/crates/polars-ops/src/series/ops/search_sorted.rs b/crates/polars-ops/src/series/ops/search_sorted.rs index 17a8d2fb3560..b95a33d3bc42 100644 --- a/crates/polars-ops/src/series/ops/search_sorted.rs +++ b/crates/polars-ops/src/series/ops/search_sorted.rs @@ -65,7 +65,7 @@ pub fn search_sorted( Ok(IdxCa::new_vec(s.name().clone(), idx)) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let search_values = search_values.to_physical_repr(); let idx = with_match_physical_numeric_polars_type!(s.dtype(), |$T| { diff --git a/crates/polars-ops/src/series/ops/unique.rs b/crates/polars-ops/src/series/ops/unique.rs index e48509b1ce73..906aa304d186 100644 --- a/crates/polars-ops/src/series/ops/unique.rs +++ b/crates/polars-ops/src/series/ops/unique.rs @@ -27,7 +27,7 @@ where /// Returns a count of the unique values in the order of appearance. pub fn unique_counts(s: &Series) -> PolarsResult { - if s.dtype().to_physical().is_numeric() { + if s.dtype().to_physical().is_primitive_numeric() { let s_physical = s.to_physical_repr(); with_match_physical_numeric_polars_type!(s_physical.dtype(), |$T| { diff --git a/crates/polars-ops/src/series/ops/various.rs b/crates/polars-ops/src/series/ops/various.rs index 47d467f7f7ba..a3492b3c60d2 100644 --- a/crates/polars-ops/src/series/ops/various.rs +++ b/crates/polars-ops/src/series/ops/various.rs @@ -123,7 +123,7 @@ pub trait SeriesMethods: SeriesSealed { } } - if s.dtype().is_numeric() { + if s.dtype().is_primitive_numeric() { with_match_physical_numeric_polars_type!(s.dtype(), |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); return Ok(is_sorted_ca_num::<$T>(ca, options)) diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs index d00094fae4b6..190c6a2549e5 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/convert.rs @@ -108,7 +108,7 @@ pub fn can_convert_to_hash_agg( DataType::Date => { matches!(agg_fn, IRAggExpr::Mean(_) | IRAggExpr::Median(_)) }, - _ => field.dtype.to_physical().is_numeric(), + _ => field.dtype.to_physical().is_primitive_numeric(), } } else { false diff --git a/crates/polars-pipe/src/executors/sinks/group_by/string.rs b/crates/polars-pipe/src/executors/sinks/group_by/string.rs index e81dacec72ca..309d28a764b3 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/string.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/string.rs @@ -545,7 +545,7 @@ pub(super) fn apply_aggregate( }}; } - if has_physical_agg && aggregation_s.dtype().is_numeric() { + if has_physical_agg && aggregation_s.dtype().is_primitive_numeric() { macro_rules! dispatch { ($ca:expr, $name:ident) => {{ let arr = $ca.downcast_iter().next().unwrap(); diff --git a/crates/polars-plan/src/dsl/function_expr/list.rs b/crates/polars-plan/src/dsl/function_expr/list.rs index ddf8fb1fff20..e6b1468f4f82 100644 --- a/crates/polars-plan/src/dsl/function_expr/list.rs +++ b/crates/polars-plan/src/dsl/function_expr/list.rs @@ -511,7 +511,7 @@ pub(super) fn gather(args: &[Column], null_on_oob: bool) -> PolarsResult let idx = &args[1]; let ca = ca.list()?; - if idx.len() == 1 && idx.dtype().is_numeric() && null_on_oob { + if idx.len() == 1 && idx.dtype().is_primitive_numeric() && null_on_oob { // fast path let idx = idx.get(0)?.try_extract::()?; let out = ca.lst_get(idx, null_on_oob).map(Column::from)?; diff --git a/crates/polars-plan/src/dsl/function_expr/pow.rs b/crates/polars-plan/src/dsl/function_expr/pow.rs index 23804a786012..eb6eefc73c3e 100644 --- a/crates/polars-plan/src/dsl/function_expr/pow.rs +++ b/crates/polars-plan/src/dsl/function_expr/pow.rs @@ -139,12 +139,12 @@ fn pow_on_series(base: &Column, exponent: &Column) -> PolarsResult FieldsMapper<'a> { /// Map to a float supertype if numeric, else preserve pub fn map_numeric_to_float_dtype(&self) -> PolarsResult { self.map_dtype(|dtype| { - if dtype.is_numeric() { + if dtype.is_primitive_numeric() { match dtype { DataType::Float32 => DataType::Float32, _ => DataType::Float64, diff --git a/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs b/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs index 5890edf1c930..d0d27742ed54 100644 --- a/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs +++ b/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs @@ -97,7 +97,7 @@ pub(super) fn shift_and_fill(args: &[Column]) -> PolarsResult { Struct(_) => shift_and_fill_with_mask(s, n, fill_value_s), #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => shift_and_fill_with_mask(s, n, fill_value_s), - dt if dt.is_numeric() || dt.is_logical() => { + dt if dt.is_primitive_numeric() || dt.is_logical() => { macro_rules! dispatch { ($ca:expr, $n:expr, $fill_value:expr) => {{ shift_and_fill_numeric($ca, $n, $fill_value).into_column() diff --git a/crates/polars-plan/src/dsl/function_expr/shrink_type.rs b/crates/polars-plan/src/dsl/function_expr/shrink_type.rs index 224691e98ef4..0cc917c0c264 100644 --- a/crates/polars-plan/src/dsl/function_expr/shrink_type.rs +++ b/crates/polars-plan/src/dsl/function_expr/shrink_type.rs @@ -1,7 +1,7 @@ use super::*; pub(super) fn shrink(c: Column) -> PolarsResult { - if !c.dtype().is_numeric() { + if !c.dtype().is_primitive_numeric() { return Ok(c); } diff --git a/crates/polars-plan/src/dsl/function_expr/sign.rs b/crates/polars-plan/src/dsl/function_expr/sign.rs index 471c76f8ad2d..3c0f8cf73d90 100644 --- a/crates/polars-plan/src/dsl/function_expr/sign.rs +++ b/crates/polars-plan/src/dsl/function_expr/sign.rs @@ -7,7 +7,7 @@ use super::*; pub(super) fn sign(s: &Column) -> PolarsResult { let s = s.as_materialized_series(); let dt = s.dtype(); - polars_ensure!(dt.is_numeric(), opq = sign, dt); + polars_ensure!(dt.is_primitive_numeric(), opq = sign, dt); with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref(); Ok(sign_impl(ca)) diff --git a/crates/polars-plan/src/dsl/function_expr/trigonometry.rs b/crates/polars-plan/src/dsl/function_expr/trigonometry.rs index 5398f43f4323..cceed34f9110 100644 --- a/crates/polars-plan/src/dsl/function_expr/trigonometry.rs +++ b/crates/polars-plan/src/dsl/function_expr/trigonometry.rs @@ -61,7 +61,7 @@ pub(super) fn apply_trigonometric_function( let ca = s.f64().unwrap(); apply_trigonometric_function_to_float(ca, trig_function) }, - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let s = s.cast(&Float64)?; apply_trigonometric_function(&s, trig_function) }, diff --git a/crates/polars-plan/src/plans/aexpr/properties.rs b/crates/polars-plan/src/plans/aexpr/properties.rs index 4d83ed416406..a8870839a44c 100644 --- a/crates/polars-plan/src/plans/aexpr/properties.rs +++ b/crates/polars-plan/src/plans/aexpr/properties.rs @@ -243,7 +243,7 @@ pub fn can_pre_agg(agg: Node, expr_arena: &Arena, _input_schema: &Schema) expr_arena .get(agg) .get_type(_input_schema, Context::Default, expr_arena) - .map(|dt| { dt.is_numeric() }), + .map(|dt| { dt.is_primitive_numeric() }), Ok(true) ) }, diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index 043fe86470fd..ce2ad4669eae 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -12,7 +12,7 @@ fn float_type(field: &mut Field) { #[cfg(feature = "dtype-decimal")] DataType::Decimal(..) => true, DataType::Boolean => true, - dt => dt.is_numeric(), + dt => dt.is_primitive_numeric(), }; if should_coerce { field.coerce(DataType::Float64); @@ -586,7 +586,7 @@ fn get_arithmetic_field( // True divide handled somewhere else polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type) }, - (l, Duration(_)) if l.is_numeric() => match op { + (l, Duration(_)) if l.is_primitive_numeric() => match op { Operator::Multiply => { left_field.coerce(right_type); return Ok(left_field); @@ -720,11 +720,11 @@ fn get_truediv_field( let scale = _get_decimal_scale_div(*scale_left); Decimal(None, Some(scale)) }, - (dt, _) if dt.is_numeric() => Float64, + (dt, _) if dt.is_primitive_numeric() => Float64, #[cfg(feature = "dtype-duration")] (Duration(_), Duration(_)) => Float64, #[cfg(feature = "dtype-duration")] - (Duration(_), dt) if dt.is_numeric() => return Ok(left_field), + (Duration(_), dt) if dt.is_primitive_numeric() => return Ok(left_field), #[cfg(feature = "dtype-duration")] (Duration(_), dt) => { polars_bail!(InvalidOperation: "true division of {} with {} is not allowed", left_field.dtype(), dt) diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs index 3474c8079079..91a9d032d78f 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs @@ -799,28 +799,28 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult DslFunction::Stats(sf) => { let exprs = match sf { StatsFunction::Var { ddof } => stats_helper( - |dt| dt.is_numeric() || dt.is_bool(), + |dt| dt.is_primitive_numeric() || dt.is_bool(), |name| col(name.clone()).var(ddof), &input_schema, ), StatsFunction::Std { ddof } => stats_helper( - |dt| dt.is_numeric() || dt.is_bool(), + |dt| dt.is_primitive_numeric() || dt.is_bool(), |name| col(name.clone()).std(ddof), &input_schema, ), StatsFunction::Quantile { quantile, method } => stats_helper( - |dt| dt.is_numeric(), + |dt| dt.is_primitive_numeric(), |name| col(name.clone()).quantile(quantile.clone(), method), &input_schema, ), StatsFunction::Mean => stats_helper( - |dt| dt.is_numeric() || dt.is_temporal() || dt == &DataType::Boolean, + |dt| dt.is_primitive_numeric() || dt.is_temporal() || dt == &DataType::Boolean, |name| col(name.clone()).mean(), &input_schema, ), StatsFunction::Sum => stats_helper( |dt| { - dt.is_numeric() + dt.is_primitive_numeric() || dt.is_decimal() || matches!(dt, DataType::Boolean | DataType::Duration(_)) }, @@ -838,7 +838,7 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult &input_schema, ), StatsFunction::Median => stats_helper( - |dt| dt.is_numeric() || dt.is_temporal() || dt == &DataType::Boolean, + |dt| dt.is_primitive_numeric() || dt.is_temporal() || dt == &DataType::Boolean, |name| col(name.clone()).median(), &input_schema, ), diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs b/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs index 45c96323e023..151d94028400 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs @@ -174,14 +174,14 @@ pub(super) fn process_binary( #[cfg(feature = "dtype-categorical")] (String | Unknown(UnknownKind::Str) | Categorical(_, _), dt, op) | (dt, Unknown(UnknownKind::Str) | String | Categorical(_, _), op) - if op.is_comparison() && dt.is_numeric() => + if op.is_comparison() && dt.is_primitive_numeric() => { return Ok(None) }, #[cfg(feature = "dtype-categorical")] (Unknown(UnknownKind::Str) | String | Enum(_, _), dt, op) | (dt, Unknown(UnknownKind::Str) | String | Enum(_, _), op) - if op.is_comparison() && dt.is_numeric() => + if op.is_comparison() && dt.is_primitive_numeric() => { return Ok(None) }, @@ -212,8 +212,8 @@ pub(super) fn process_binary( if op.is_arithmetic() { match (&type_left, &type_right) { (Duration(_), Duration(_)) => return Ok(None), - (Duration(_), r) if r.is_numeric() => return Ok(None), - (String, a) | (a, String) if a.is_numeric() => { + (Duration(_), r) if r.is_primitive_numeric() => return Ok(None), + (String, a) | (a, String) if a.is_primitive_numeric() => { polars_bail!(InvalidOperation: "arithmetic on string and numeric not allowed, try an explicit cast first") }, (Datetime(_, _), _) @@ -229,7 +229,7 @@ pub(super) fn process_binary( #[cfg(feature = "dtype-array")] (Array(..), _) | (_, Array(..)) => return Ok(None), #[cfg(feature = "dtype-struct")] - (Struct(_), a) | (a, Struct(_)) if a.is_numeric() => { + (Struct(_), a) | (a, Struct(_)) if a.is_primitive_numeric() => { return process_struct_numeric_arithmetic( type_left, type_right, node_left, node_right, op, expr_arena, ) diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs b/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs index 34f54f6eb42e..d57ce6c73750 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs @@ -35,7 +35,7 @@ pub(super) fn resolve_is_in( #[cfg(feature = "dtype-categorical")] (DataType::String, DataType::Categorical(_, _) | DataType::Enum(_, _)) => return Ok(None), #[cfg(feature = "dtype-decimal")] - (DataType::Decimal(_, _), dt) if dt.is_numeric() => AExpr::Cast { + (DataType::Decimal(_, _), dt) if dt.is_primitive_numeric() => AExpr::Cast { expr: other_e.node(), dtype: type_left, options: CastOptions::NonStrict, @@ -64,7 +64,7 @@ pub(super) fn resolve_is_in( if other_inner.as_ref() == &type_left || (type_left == DataType::Null) || (other_inner.as_ref() == &DataType::Null) - || (other_inner.as_ref().is_numeric() && type_left.is_numeric()) + || (other_inner.as_ref().is_primitive_numeric() && type_left.is_primitive_numeric()) { return Ok(None); } @@ -75,7 +75,7 @@ pub(super) fn resolve_is_in( if other_inner.as_ref() == &type_left || (type_left == DataType::Null) || (other_inner.as_ref() == &DataType::Null) - || (other_inner.as_ref().is_numeric() && type_left.is_numeric()) + || (other_inner.as_ref().is_primitive_numeric() && type_left.is_primitive_numeric()) { return Ok(None); } @@ -87,7 +87,7 @@ pub(super) fn resolve_is_in( // don't attempt to cast between obviously mismatched types, but // allow integer/float comparison (will use their supertypes). (a, b) => { - if (a.is_numeric() && b.is_numeric()) || (a == &DataType::Null) { + if (a.is_primitive_numeric() && b.is_primitive_numeric()) || (a == &DataType::Null) { return Ok(None); } polars_bail!(InvalidOperation: "'is_in' cannot check for {:?} values in {:?} data", &type_other, &type_left) diff --git a/crates/polars-plan/src/plans/optimizer/collapse_joins.rs b/crates/polars-plan/src/plans/optimizer/collapse_joins.rs index 66bc8b8c6ce5..2716340b4e52 100644 --- a/crates/polars-plan/src/plans/optimizer/collapse_joins.rs +++ b/crates/polars-plan/src/plans/optimizer/collapse_joins.rs @@ -311,7 +311,7 @@ pub fn optimize(root: Node, lp_arena: &mut Arena, expr_arena: &mut Arena bool { aexpr_to_leaf_names_iter(node, expr_arena).any(|name| { if let Some(dt) = schema.get(name.as_str()) { - dt.to_physical().is_numeric() + dt.to_physical().is_primitive_numeric() } else { false } diff --git a/crates/polars-plan/src/plans/optimizer/fused.rs b/crates/polars-plan/src/plans/optimizer/fused.rs index 5009a9fa061d..be9c8ea8cfad 100644 --- a/crates/polars-plan/src/plans/optimizer/fused.rs +++ b/crates/polars-plan/src/plans/optimizer/fused.rs @@ -45,8 +45,8 @@ fn check_eligible( // Exclude literals for now as these will not benefit from fused operations downstream #9857 // This optimization would also interfere with the `col -> lit` type-coercion rules // And it might also interfere with constant folding which is a more suitable optimizations here - if type_left.is_numeric() - && type_right.is_numeric() + if type_left.is_primitive_numeric() + && type_right.is_primitive_numeric() && !has_aexpr_literal(*left, expr_arena) && !has_aexpr_literal(*right, expr_arena) { diff --git a/crates/polars-python/src/interop/numpy/to_numpy_df.rs b/crates/polars-python/src/interop/numpy/to_numpy_df.rs index 5e0b61774dd6..b174316b578e 100644 --- a/crates/polars-python/src/interop/numpy/to_numpy_df.rs +++ b/crates/polars-python/src/interop/numpy/to_numpy_df.rs @@ -81,7 +81,7 @@ fn try_df_to_numpy_view(py: Python, df: &DataFrame, allow_nulls: bool) -> Option let owner = PyDataFrame::from(df.clone()).into_py_any(py).ok()?; // Keep the DataFrame memory alive. let arr = match first_dtype { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numpy_polars_type!(first_dtype, |$T| { numeric_df_to_numpy_view::<$T>(py, df, owner) }) @@ -124,7 +124,7 @@ fn check_df_columns_contiguous(df: &DataFrame) -> bool { } match columns.first().unwrap().dtype() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let slices = columns .iter() @@ -244,7 +244,7 @@ fn try_df_to_numpy_numeric_supertype( let st = dtypes_to_supertype(df.iter().map(|s| s.dtype())).ok()?; let np_array = match st { - dt if dt.is_numeric() => with_match_physical_numpy_polars_type!(dt, |$T| { + dt if dt.is_primitive_numeric() => with_match_physical_numpy_polars_type!(dt, |$T| { df.to_ndarray::<$T>(order).ok()?.into_pyarray(py).into_py_any(py).ok()? }), _ => return None, diff --git a/crates/polars-python/src/interop/numpy/to_numpy_series.rs b/crates/polars-python/src/interop/numpy/to_numpy_series.rs index 8974a2d4364d..136f954758cd 100644 --- a/crates/polars-python/src/interop/numpy/to_numpy_series.rs +++ b/crates/polars-python/src/interop/numpy/to_numpy_series.rs @@ -107,7 +107,7 @@ fn handle_chunks(py: Python, s: &Series, allow_rechunk: bool) -> Option<(Series, fn series_to_numpy_view_recursive(py: Python, s: Series, writable: bool) -> PyObject { debug_assert!(s.n_chunks() == 1); match s.dtype() { - dt if dt.is_numeric() => numeric_series_to_numpy_view(py, s, writable), + dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable), DataType::Datetime(_, _) | DataType::Duration(_) => { temporal_series_to_numpy_view(py, s, writable) }, diff --git a/crates/polars-python/src/interop/numpy/utils.rs b/crates/polars-python/src/interop/numpy/utils.rs index dfaa805e0352..06909d0015ab 100644 --- a/crates/polars-python/src/interop/numpy/utils.rs +++ b/crates/polars-python/src/interop/numpy/utils.rs @@ -47,7 +47,7 @@ where /// Returns whether the data type supports creating a NumPy view. pub(super) fn dtype_supports_view(dtype: &DataType) -> bool { match dtype { - dt if dt.is_numeric() => true, + dt if dt.is_primitive_numeric() => true, DataType::Datetime(_, _) | DataType::Duration(_) => true, DataType::Array(inner, _) => dtype_supports_view(inner.as_ref()), _ => false, diff --git a/crates/polars-python/src/series/buffers.rs b/crates/polars-python/src/series/buffers.rs index 960b44949337..493353d4105e 100644 --- a/crates/polars-python/src/series/buffers.rs +++ b/crates/polars-python/src/series/buffers.rs @@ -75,7 +75,7 @@ impl PySeries { length: len, }) }, - dt if dt.is_numeric() => Ok(with_match_physical_numeric_polars_type!(dt, |$T| { + dt if dt.is_primitive_numeric() => Ok(with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); BufferInfo { pointer: get_pointer(ca), offset: 0, length: ca.len() } })), @@ -90,7 +90,7 @@ impl PySeries { fn _get_buffers(&self, py: Python) -> PyResult<(Self, Option, Option)> { let s = &self.series; py.allow_threads(|| match s.dtype().to_physical() { - dt if dt.is_numeric() => get_buffers_from_primitive(s), + dt if dt.is_primitive_numeric() => get_buffers_from_primitive(s), DataType::Boolean => get_buffers_from_primitive(s), DataType::String => get_buffers_from_string(s), dt => { @@ -191,7 +191,7 @@ impl PySeries { let owner = owner.to_owned().unbind(); let arr_boxed = match dtype { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { with_match_physical_numeric_type!(dt, |$T| unsafe { from_buffer_impl::<$T>(pointer, offset, length, owner) }) @@ -294,7 +294,7 @@ impl PySeries { }; let s = match dtype.to_physical() { - dt if dt.is_numeric() => { + dt if dt.is_primitive_numeric() => { let values = data.into_iter().next().unwrap(); with_match_physical_numeric_polars_type!(dt, |$T| { let values_buffer = series_to_buffer::<$T>(values); diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs index 24ef7a752fa6..408217963bde 100644 --- a/crates/polars-python/src/series/general.rs +++ b/crates/polars-python/src/series/general.rs @@ -368,10 +368,10 @@ impl PySeries { let lhs_dtype = self.series.dtype(); let rhs_dtype = other.series.dtype(); - if !lhs_dtype.is_numeric() { + if !lhs_dtype.is_primitive_numeric() { return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into()); }; - if !rhs_dtype.is_numeric() { + if !rhs_dtype.is_primitive_numeric() { return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into()); } From e32a1b537af57a8c675c9da07802de9c0d8ebe17 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 6 Jan 2025 15:12:59 +0100 Subject: [PATCH 2/6] fmt --- .../src/chunked_array/logical/datetime.rs | 4 +++- .../src/chunked_array/ops/explode_and_offsets.rs | 3 ++- .../src/frame/group_by/aggregations/dispatch.rs | 4 +++- crates/polars-core/src/series/arithmetic/owned.rs | 4 +++- crates/polars-expr/src/expressions/binary.rs | 4 +++- crates/polars-plan/src/plans/conversion/dsl_to_ir.rs | 12 ++++++++++-- crates/polars-python/src/series/buffers.rs | 10 ++++++---- 7 files changed, 30 insertions(+), 11 deletions(-) diff --git a/crates/polars-core/src/chunked_array/logical/datetime.rs b/crates/polars-core/src/chunked_array/logical/datetime.rs index f14608d07efd..3b8c2b02024a 100644 --- a/crates/polars-core/src/chunked_array/logical/datetime.rs +++ b/crates/polars-core/src/chunked_array/logical/datetime.rs @@ -101,7 +101,9 @@ impl LogicalType for DatetimeChunked { .into_time() .into_series()); }, - dt if dt.is_primitive_numeric() => return self.0.cast_with_options(dtype, cast_options), + dt if dt.is_primitive_numeric() => { + return self.0.cast_with_options(dtype, cast_options) + }, dt => { polars_bail!( InvalidOperation: diff --git a/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs b/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs index eb7a33a170aa..a2435b5ce873 100644 --- a/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs +++ b/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs @@ -110,7 +110,8 @@ impl ChunkExplode for ListChunked { let (indices, new_offsets) = if listarr.null_count() == 0 { // SPECIALIZED path. let inner_phys = self.inner_dtype().to_physical(); - if inner_phys.is_primitive_numeric() || inner_phys.is_null() || inner_phys.is_bool() { + if inner_phys.is_primitive_numeric() || inner_phys.is_null() || inner_phys.is_bool() + { return Ok(self.specialized(values, offsets, offsets_buf)); } // Use gather diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index e941c842d6a4..a5e5ee199e67 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -193,7 +193,9 @@ impl Series { Boolean => s.cast(&Float64).unwrap().agg_median(groups), Float32 => SeriesWrap(s.f32().unwrap().clone()).agg_median(groups), Float64 => SeriesWrap(s.f64().unwrap().clone()).agg_median(groups), - dt if dt.is_primitive_numeric() => apply_method_physical_integer!(s, agg_median, groups), + dt if dt.is_primitive_numeric() => { + apply_method_physical_integer!(s, agg_median, groups) + }, #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() diff --git a/crates/polars-core/src/series/arithmetic/owned.rs b/crates/polars-core/src/series/arithmetic/owned.rs index 3426acd440d4..837f9e03e823 100644 --- a/crates/polars-core/src/series/arithmetic/owned.rs +++ b/crates/polars-core/src/series/arithmetic/owned.rs @@ -19,7 +19,9 @@ pub fn coerce_lhs_rhs_owned(lhs: Series, rhs: Series) -> PolarsResult<(Series, S } fn is_eligible(lhs: &DataType, rhs: &DataType) -> bool { - !lhs.is_logical() && lhs.to_physical().is_primitive_numeric() && rhs.to_physical().is_primitive_numeric() + !lhs.is_logical() + && lhs.to_physical().is_primitive_numeric() + && rhs.to_physical().is_primitive_numeric() } #[cfg(feature = "performant")] diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs index dd6ee46cb7b0..44c0912fbeb2 100644 --- a/crates/polars-expr/src/expressions/binary.rs +++ b/crates/polars-expr/src/expressions/binary.rs @@ -46,7 +46,9 @@ fn apply_operator_owned(left: Column, right: Column, op: Operator) -> PolarsResu match op { Operator::Plus => left.try_add_owned(right), Operator::Minus => left.try_sub_owned(right), - Operator::Multiply if left.dtype().is_primitive_numeric() && right.dtype().is_primitive_numeric() => { + Operator::Multiply + if left.dtype().is_primitive_numeric() && right.dtype().is_primitive_numeric() => + { left.try_mul_owned(right) }, _ => apply_operator(&left, &right, op), diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs index 91a9d032d78f..2b75c86260fb 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs @@ -814,7 +814,11 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult &input_schema, ), StatsFunction::Mean => stats_helper( - |dt| dt.is_primitive_numeric() || dt.is_temporal() || dt == &DataType::Boolean, + |dt| { + dt.is_primitive_numeric() + || dt.is_temporal() + || dt == &DataType::Boolean + }, |name| col(name.clone()).mean(), &input_schema, ), @@ -838,7 +842,11 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult &input_schema, ), StatsFunction::Median => stats_helper( - |dt| dt.is_primitive_numeric() || dt.is_temporal() || dt == &DataType::Boolean, + |dt| { + dt.is_primitive_numeric() + || dt.is_temporal() + || dt == &DataType::Boolean + }, |name| col(name.clone()).median(), &input_schema, ), diff --git a/crates/polars-python/src/series/buffers.rs b/crates/polars-python/src/series/buffers.rs index 493353d4105e..956e13e578cf 100644 --- a/crates/polars-python/src/series/buffers.rs +++ b/crates/polars-python/src/series/buffers.rs @@ -75,10 +75,12 @@ impl PySeries { length: len, }) }, - dt if dt.is_primitive_numeric() => Ok(with_match_physical_numeric_polars_type!(dt, |$T| { - let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); - BufferInfo { pointer: get_pointer(ca), offset: 0, length: ca.len() } - })), + dt if dt.is_primitive_numeric() => { + Ok(with_match_physical_numeric_polars_type!(dt, |$T| { + let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); + BufferInfo { pointer: get_pointer(ca), offset: 0, length: ca.len() } + })) + }, dt => { let msg = format!("`_get_buffer_info` not implemented for non-physical type {dt}; try to select a buffer first"); Err(PyTypeError::new_err(msg)) From ad3b883918eb849e89b6510c1efe121843ca9643 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 6 Jan 2025 15:35:16 +0100 Subject: [PATCH 3/6] also apply to AnyValue --- crates/polars-core/src/datatypes/any_value.rs | 10 +++++----- crates/polars-core/src/frame/row/av_buffer.rs | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 62263204af3d..945a6646687f 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -503,7 +503,7 @@ impl<'a> AnyValue<'a> { matches!(self, AnyValue::Boolean(_)) } - pub fn is_numeric(&self) -> bool { + pub fn is_primitive_numeric(&self) -> bool { self.is_integer() || self.is_float() } @@ -611,7 +611,7 @@ impl<'a> AnyValue<'a> { // to datetime #[cfg(feature = "dtype-datetime")] - (av, DataType::Datetime(tu, tz)) if av.is_numeric() => { + (av, DataType::Datetime(tu, tz)) if av.is_primitive_numeric() => { AnyValue::Datetime(av.extract::()?, *tu, tz.as_ref()) }, #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))] @@ -644,7 +644,7 @@ impl<'a> AnyValue<'a> { // to date #[cfg(feature = "dtype-date")] - (av, DataType::Date) if av.is_numeric() => AnyValue::Date(av.extract::()?), + (av, DataType::Date) if av.is_primitive_numeric() => AnyValue::Date(av.extract::()?), #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))] (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => { AnyValue::Date(match tu { @@ -656,7 +656,7 @@ impl<'a> AnyValue<'a> { // to time #[cfg(feature = "dtype-time")] - (av, DataType::Time) if av.is_numeric() => AnyValue::Time(av.extract::()?), + (av, DataType::Time) if av.is_primitive_numeric() => AnyValue::Time(av.extract::()?), #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))] (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => { AnyValue::Time(match tu { @@ -668,7 +668,7 @@ impl<'a> AnyValue<'a> { // to duration #[cfg(feature = "dtype-duration")] - (av, DataType::Duration(tu)) if av.is_numeric() => { + (av, DataType::Duration(tu)) if av.is_primitive_numeric() => { AnyValue::Duration(av.extract::()?, *tu) }, #[cfg(all(feature = "dtype-duration", feature = "dtype-time"))] diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 6d1999660b10..9d5d753c28c6 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -89,7 +89,7 @@ impl<'a> AnyValueBuffer<'a> { #[cfg(feature = "dtype-date")] (Date(builder), AnyValue::Date(v)) => builder.append_value(v), #[cfg(feature = "dtype-date")] - (Date(builder), val) if val.is_numeric() => builder.append_value(val.extract()?), + (Date(builder), val) if val.is_primitive_numeric() => builder.append_value(val.extract()?), #[cfg(feature = "dtype-datetime")] (Datetime(builder, _, _), AnyValue::Null) => builder.append_null(), #[cfg(feature = "dtype-datetime")] @@ -100,7 +100,7 @@ impl<'a> AnyValueBuffer<'a> { builder.append_value(v) }, #[cfg(feature = "dtype-datetime")] - (Datetime(builder, _, _), val) if val.is_numeric() => { + (Datetime(builder, _, _), val) if val.is_primitive_numeric() => { builder.append_value(val.extract()?) }, #[cfg(feature = "dtype-duration")] @@ -111,13 +111,13 @@ impl<'a> AnyValueBuffer<'a> { builder.append_value(v) }, #[cfg(feature = "dtype-duration")] - (Duration(builder, _), val) if val.is_numeric() => builder.append_value(val.extract()?), + (Duration(builder, _), val) if val.is_primitive_numeric() => builder.append_value(val.extract()?), #[cfg(feature = "dtype-time")] (Time(builder), AnyValue::Time(v)) => builder.append_value(v), #[cfg(feature = "dtype-time")] (Time(builder), AnyValue::Null) => builder.append_null(), #[cfg(feature = "dtype-time")] - (Time(builder), val) if val.is_numeric() => builder.append_value(val.extract()?), + (Time(builder), val) if val.is_primitive_numeric() => builder.append_value(val.extract()?), (Null(builder), AnyValue::Null) => builder.append_null(), // Struct and List can be recursive so use AnyValues for that (All(_, vals), v) => vals.push(v), From 5bb0bf21829fd342248cf936854ccda97eabb902 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 6 Jan 2025 15:35:57 +0100 Subject: [PATCH 4/6] conditional compilation wasn't autorefactored --- crates/polars-plan/src/plans/conversion/type_coercion/binary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs b/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs index 151d94028400..12e9f236cebd 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/binary.rs @@ -167,7 +167,7 @@ pub(super) fn process_binary( match (&type_left, &type_right, op) { #[cfg(not(feature = "dtype-categorical"))] (DataType::String, dt, op) | (dt, DataType::String, op) - if op.is_comparison() && dt.is_numeric() => + if op.is_comparison() && dt.is_primitive_numeric() => { return Ok(None) }, From 4b84eefd40f7c79b22a5a428d775da35ca25d059 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 6 Jan 2025 16:29:12 +0100 Subject: [PATCH 5/6] fmt --- crates/polars-core/src/datatypes/any_value.rs | 8 +- crates/polars-core/src/frame/row/av_buffer.rs | 12 +- .../src/chunked_idx_table/binary.rs | 311 ++++++++++++++++++ 3 files changed, 326 insertions(+), 5 deletions(-) create mode 100644 crates/polars-expr/src/chunked_idx_table/binary.rs diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 945a6646687f..5cd41ada2f02 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -644,7 +644,9 @@ impl<'a> AnyValue<'a> { // to date #[cfg(feature = "dtype-date")] - (av, DataType::Date) if av.is_primitive_numeric() => AnyValue::Date(av.extract::()?), + (av, DataType::Date) if av.is_primitive_numeric() => { + AnyValue::Date(av.extract::()?) + }, #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))] (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => { AnyValue::Date(match tu { @@ -656,7 +658,9 @@ impl<'a> AnyValue<'a> { // to time #[cfg(feature = "dtype-time")] - (av, DataType::Time) if av.is_primitive_numeric() => AnyValue::Time(av.extract::()?), + (av, DataType::Time) if av.is_primitive_numeric() => { + AnyValue::Time(av.extract::()?) + }, #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))] (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => { AnyValue::Time(match tu { diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 9d5d753c28c6..58ceb5c29784 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -89,7 +89,9 @@ impl<'a> AnyValueBuffer<'a> { #[cfg(feature = "dtype-date")] (Date(builder), AnyValue::Date(v)) => builder.append_value(v), #[cfg(feature = "dtype-date")] - (Date(builder), val) if val.is_primitive_numeric() => builder.append_value(val.extract()?), + (Date(builder), val) if val.is_primitive_numeric() => { + builder.append_value(val.extract()?) + }, #[cfg(feature = "dtype-datetime")] (Datetime(builder, _, _), AnyValue::Null) => builder.append_null(), #[cfg(feature = "dtype-datetime")] @@ -111,13 +113,17 @@ impl<'a> AnyValueBuffer<'a> { builder.append_value(v) }, #[cfg(feature = "dtype-duration")] - (Duration(builder, _), val) if val.is_primitive_numeric() => builder.append_value(val.extract()?), + (Duration(builder, _), val) if val.is_primitive_numeric() => { + builder.append_value(val.extract()?) + }, #[cfg(feature = "dtype-time")] (Time(builder), AnyValue::Time(v)) => builder.append_value(v), #[cfg(feature = "dtype-time")] (Time(builder), AnyValue::Null) => builder.append_null(), #[cfg(feature = "dtype-time")] - (Time(builder), val) if val.is_primitive_numeric() => builder.append_value(val.extract()?), + (Time(builder), val) if val.is_primitive_numeric() => { + builder.append_value(val.extract()?) + }, (Null(builder), AnyValue::Null) => builder.append_null(), // Struct and List can be recursive so use AnyValues for that (All(_, vals), v) => vals.push(v), diff --git a/crates/polars-expr/src/chunked_idx_table/binary.rs b/crates/polars-expr/src/chunked_idx_table/binary.rs new file mode 100644 index 000000000000..0cc58c9e131c --- /dev/null +++ b/crates/polars-expr/src/chunked_idx_table/binary.rs @@ -0,0 +1,311 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +use arrow::array::Array; +use polars_utils::idx_map::bytes_idx_map::{BytesIndexMap, Entry}; +use polars_utils::idx_vec::UnitVec; +use polars_utils::itertools::Itertools; +use polars_utils::unitvec; + +use super::*; +use crate::hash_keys::HashKeys; + +#[derive(Default)] +pub struct BinaryChunkedIdxTable { + // These AtomicU64s actually are ChunkIds, but we use the top bit of the + // first chunk in each to mark keys during probing. + idx_map: BytesIndexMap>, + chunk_ctr: u32, + null_keys: Vec>, +} + +impl BinaryChunkedIdxTable { + pub fn new() -> Self { + Self { + idx_map: BytesIndexMap::new(), + chunk_ctr: 0, + null_keys: Vec::new(), + } + } +} + +impl BinaryChunkedIdxTable { + #[inline(always)] + fn probe_one( + &self, + key_idx: IdxSize, + hash: u64, + key: &[u8], + table_match: &mut Vec>, + probe_match: &mut Vec, + ) -> bool { + if let Some(chunk_ids) = self.idx_map.get(hash, key) { + for chunk_id in &chunk_ids[..] { + // Create matches, making sure to clear top bit. + let raw_chunk_id = chunk_id.load(Ordering::Relaxed); + let chunk_id = ChunkId::from_inner(raw_chunk_id & !(1 << 63)); + table_match.push(chunk_id); + probe_match.push(key_idx); + } + + // Mark if necessary. This action is idempotent so doesn't + // need any synchronization on the load, nor does it need a + // fetch_or to do it atomically. + if MARK_MATCHES { + let first_chunk_id = unsafe { chunk_ids.get_unchecked(0) }; + let first_chunk_val = first_chunk_id.load(Ordering::Relaxed); + if first_chunk_val >> 63 == 0 { + first_chunk_id.store(first_chunk_val | (1 << 63), Ordering::Release); + } + } + true + } else { + false + } + } + + fn probe_impl<'a, const MARK_MATCHES: bool, const EMIT_UNMATCHED: bool>( + &self, + hash_keys: impl Iterator)>, + table_match: &mut Vec>, + probe_match: &mut Vec, + limit: IdxSize, + ) -> IdxSize { + table_match.clear(); + probe_match.clear(); + + let mut keys_processed = 0; + for (key_idx, hash, key) in hash_keys { + let found_match = if let Some(key) = key { + self.probe_one::(key_idx, hash, key, table_match, probe_match) + } else { + false + }; + + if EMIT_UNMATCHED && !found_match { + table_match.push(ChunkId::null()); + probe_match.push(key_idx); + } + + keys_processed += 1; + if table_match.len() >= limit as usize { + break; + } + } + keys_processed + } + + fn probe_dispatch<'a>( + &self, + hash_keys: impl Iterator)>, + table_match: &mut Vec>, + probe_match: &mut Vec, + mark_matches: bool, + emit_unmatched: bool, + limit: IdxSize, + ) -> IdxSize { + match (mark_matches, emit_unmatched) { + (false, false) => { + self.probe_impl::(hash_keys, table_match, probe_match, limit) + }, + (false, true) => { + self.probe_impl::(hash_keys, table_match, probe_match, limit) + }, + (true, false) => { + self.probe_impl::(hash_keys, table_match, probe_match, limit) + }, + (true, true) => { + self.probe_impl::(hash_keys, table_match, probe_match, limit) + }, + } + } +} + +impl ChunkedIdxTable for BinaryChunkedIdxTable { + fn new_empty(&self) -> Box { + Box::new(Self::new()) + } + + fn reserve(&mut self, additional: usize) { + self.idx_map.reserve(additional); + } + + fn num_keys(&self) -> IdxSize { + self.idx_map.len() + } + + fn insert_key_chunk(&mut self, hash_keys: HashKeys, track_unmatchable: bool) { + let HashKeys::Binary(hash_keys) = hash_keys else { + unreachable!() + }; + if hash_keys.keys.len() >= 1 << 31 { + panic!("overly large chunk in BinaryChunkedIdxTable"); + } + + for (i, (hash, key)) in hash_keys + .hashes + .values_iter() + .zip(hash_keys.keys.iter()) + .enumerate_idx() + { + let chunk_id = ChunkId::<32>::store(self.chunk_ctr as IdxSize, i); + if let Some(key) = key { + let chunk_id = AtomicU64::new(chunk_id.into_inner()); + match self.idx_map.entry(*hash, key) { + Entry::Occupied(o) => { + o.into_mut().push(chunk_id); + }, + Entry::Vacant(v) => { + v.insert(unitvec![chunk_id]); + }, + } + } else if track_unmatchable { + self.null_keys.push(chunk_id); + } + } + + self.chunk_ctr = self.chunk_ctr.checked_add(1).unwrap(); + } + + fn probe( + &self, + hash_keys: &HashKeys, + table_match: &mut Vec>, + probe_match: &mut Vec, + mark_matches: bool, + emit_unmatched: bool, + limit: IdxSize, + ) -> IdxSize { + let HashKeys::Binary(hash_keys) = hash_keys else { + unreachable!() + }; + + if hash_keys.keys.has_nulls() { + let iter = hash_keys + .hashes + .values_iter() + .copied() + .zip(hash_keys.keys.iter()) + .enumerate_idx() + .map(|(i, (h, k))| (i, h, k)); + self.probe_dispatch( + iter, + table_match, + probe_match, + mark_matches, + emit_unmatched, + limit, + ) + } else { + let iter = hash_keys + .hashes + .values_iter() + .copied() + .zip(hash_keys.keys.values_iter().map(Some)) + .enumerate_idx() + .map(|(i, (h, k))| (i, h, k)); + self.probe_dispatch( + iter, + table_match, + probe_match, + mark_matches, + emit_unmatched, + limit, + ) + } + } + + unsafe fn probe_subset( + &self, + hash_keys: &HashKeys, + subset: &[IdxSize], + table_match: &mut Vec>, + probe_match: &mut Vec, + mark_matches: bool, + emit_unmatched: bool, + limit: IdxSize, + ) -> IdxSize { + let HashKeys::Binary(hash_keys) = hash_keys else { + unreachable!() + }; + + if hash_keys.keys.has_nulls() { + let iter = subset.iter().map(|i| { + ( + *i, + hash_keys.hashes.value_unchecked(*i as usize), + hash_keys.keys.get_unchecked(*i as usize), + ) + }); + self.probe_dispatch( + iter, + table_match, + probe_match, + mark_matches, + emit_unmatched, + limit, + ) + } else { + let iter = subset.iter().map(|i| { + ( + *i, + hash_keys.hashes.value_unchecked(*i as usize), + Some(hash_keys.keys.value_unchecked(*i as usize)), + ) + }); + self.probe_dispatch( + iter, + table_match, + probe_match, + mark_matches, + emit_unmatched, + limit, + ) + } + } + + fn unmarked_keys( + &self, + out: &mut Vec>, + mut offset: IdxSize, + limit: IdxSize, + ) -> IdxSize { + out.clear(); + + let mut keys_processed = 0; + if (offset as usize) < self.null_keys.len() { + out.extend( + self.null_keys[offset as usize..] + .iter() + .copied() + .take(limit as usize), + ); + keys_processed += out.len() as IdxSize; + offset += out.len() as IdxSize; + if out.len() >= limit as usize { + return keys_processed; + } + } + + offset -= self.null_keys.len() as IdxSize; + + while let Some((_, _, chunk_ids)) = self.idx_map.get_index(offset) { + let first_chunk_id = unsafe { chunk_ids.get_unchecked(0) }; + let first_chunk_val = first_chunk_id.load(Ordering::Acquire); + if first_chunk_val >> 63 == 0 { + for chunk_id in &chunk_ids[..] { + let raw_chunk_id = chunk_id.load(Ordering::Relaxed); + let chunk_id = ChunkId::from_inner(raw_chunk_id & !(1 << 63)); + out.push(chunk_id); + } + } + + keys_processed += 1; + offset += 1; + if out.len() >= limit as usize { + break; + } + } + + keys_processed + } +} From e74254122d8c7458dbc9d8675cb071049f359dfc Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Tue, 7 Jan 2025 12:03:11 +0100 Subject: [PATCH 6/6] skip new-streaming categorical test --- py-polars/tests/unit/datatypes/test_categorical.py | 1 + 1 file changed, 1 insertion(+) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 1b37763c0d08..d0e928061ee2 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -934,6 +934,7 @@ def test_categorical_unique() -> None: assert s.unique().sort().to_list() == [None, "a", "b"] +@pytest.mark.may_fail_auto_streaming @pytest.mark.usefixtures("test_global_and_local") def test_categorical_unique_20539() -> None: df = pl.DataFrame({"number": [1, 1, 2, 2, 3], "letter": ["a", "b", "b", "c", "c"]})