From c85e3dc8ecc7859cc64594bb91b8de264514b397 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 1 Mar 2025 14:31:38 -0500 Subject: [PATCH] fix: Categorical min/max panicking when string cache is enabled --- .../src/chunked_array/ops/aggregate/mod.rs | 30 +++++++++---------- .../tests/unit/datatypes/test_categorical.py | 1 + 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs b/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs index 10e5dc10703d..0a30a45c6b26 100644 --- a/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs @@ -462,7 +462,7 @@ impl CategoricalChunked { let rev_map = self.get_rev_map(); // Fast path where all categories are used let c = if self._can_fast_unique() { - self.get_rev_map().get_categories().min_ignore_nan_kernel() + rev_map.get_categories().min_ignore_nan_kernel() } else { // SAFETY: // Indices are in bounds @@ -475,10 +475,7 @@ impl CategoricalChunked { }; rev_map.find(c.unwrap()) } else { - match self._can_fast_unique() { - true => Some(0), - false => self.physical().min(), - } + self.physical().min() } } @@ -490,7 +487,7 @@ impl CategoricalChunked { let rev_map = self.get_rev_map(); // Fast path where all categories are used let c = if self._can_fast_unique() { - self.get_rev_map().get_categories().max_ignore_nan_kernel() + rev_map.get_categories().max_ignore_nan_kernel() } else { // SAFETY: // Indices are in bounds @@ -503,10 +500,7 @@ impl CategoricalChunked { }; rev_map.find(c.unwrap()) } else { - match self._can_fast_unique() { - true => Some((self.get_rev_map().len() - 1) as u32), - false => self.physical().max(), - } + self.physical().max() } } } @@ -534,14 +528,16 @@ impl ChunkAggSeries for CategoricalChunked { DataType::Categorical(r, _) => match self.min_categorical() { None => Scalar::new(self.dtype().clone(), AnyValue::Null), Some(v) => { - let RevMapping::Local(arr, _) = &**r.as_ref().unwrap() else { - unreachable!() + let r = r.as_ref().unwrap(); + let arr = match &**r { + RevMapping::Local(arr, _) => arr, + RevMapping::Global(_, arr, _) => arr, }; Scalar::new( self.dtype().clone(), AnyValue::CategoricalOwned( v, - r.as_ref().unwrap().clone(), + r.clone(), SyncPtr::from_const(arr as *const _), ), ) @@ -571,14 +567,16 @@ impl ChunkAggSeries for CategoricalChunked { DataType::Categorical(r, _) => match self.max_categorical() { None => Scalar::new(self.dtype().clone(), AnyValue::Null), Some(v) => { - let RevMapping::Local(arr, _) = &**r.as_ref().unwrap() else { - unreachable!() + let r = r.as_ref().unwrap(); + let arr = match &**r { + RevMapping::Local(arr, _) => arr, + RevMapping::Global(_, arr, _) => arr, }; Scalar::new( self.dtype().clone(), AnyValue::CategoricalOwned( v, - r.as_ref().unwrap().clone(), + r.clone(), SyncPtr::from_const(arr as *const _), ), ) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 229da067f263..00756edc0502 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -980,6 +980,7 @@ def test_categorical_prefill() -> None: @pytest.mark.may_fail_auto_streaming # not implemented +@pytest.mark.usefixtures("test_global_and_local") def test_categorical_min_max() -> None: schema = pl.Schema( {