Skip to content

Commit

Permalink
perf: Reduce sharing in stringview arrays in new-streaming equijoin (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp authored Feb 7, 2025
1 parent 1145ec0 commit 9565c70
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 58 deletions.
9 changes: 9 additions & 0 deletions crates/polars-arrow/src/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,15 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
mutable.freeze().with_validity(self.validity)
}

pub fn deshare(&self) -> Self {
if Arc::strong_count(&self.buffers) == 1
&& self.buffers.iter().all(|b| b.storage_refcount() == 1)
{
return self.clone();
}
self.clone().gc()
}

pub fn is_sliced(&self) -> bool {
self.views.as_ptr() != self.views.storage_ptr()
}
Expand Down
18 changes: 18 additions & 0 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,24 @@ impl DataFrame {
}
}

pub fn _deshare_views_mut(&mut self) {
// SAFETY: We never adjust the length or names of the columns.
unsafe {
let columns = self.get_columns_mut();
for col in columns {
let Column::Series(s) = col else { continue };

if let Ok(ca) = s.binary() {
let gc_ca = ca.apply_kernel(&|a| a.deshare().into_boxed());
*col = Column::from(gc_ca.into_series());
} else if let Ok(ca) = s.str() {
let gc_ca = ca.apply_kernel(&|a| a.deshare().into_boxed());
*col = Column::from(gc_ca.into_series());
}
}
}
}

/// Rechunks all columns to only have a single chunk and turns it into a [`RecordBatchT`].
pub fn rechunk_to_record_batch(
self,
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-expr/src/expressions/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ fn materialize_column(join_opt_ids: &ChunkJoinOptIds, out_column: &Column) -> Co
Either::Left(ids) => unsafe {
IdxCa::with_nullable_idx(ids, |idx| out_column.take_unchecked(idx))
},
Either::Right(ids) => unsafe { out_column.take_opt_chunked_unchecked(ids) },
Either::Right(ids) => unsafe { out_column.take_opt_chunked_unchecked(ids, false) },
}
}
}
Expand Down
Loading

0 comments on commit 9565c70

Please sign in to comment.