From 853c003816522d3078ca44cd2e03918e5f994258 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 23 Mar 2023 15:30:47 +0800 Subject: [PATCH 1/5] add docs --- src/common/src/array/column.rs | 12 ++++++++++-- src/common/src/array/data_chunk.rs | 31 ++++++++++++++++++++++++++++-- src/common/src/array/vis.rs | 8 ++++++-- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/common/src/array/column.rs b/src/common/src/array/column.rs index d235acb7874e9..fec58ea0ae036 100644 --- a/src/common/src/array/column.rs +++ b/src/common/src/array/column.rs @@ -20,8 +20,16 @@ use risingwave_pb::data::PbColumn; use super::{Array, ArrayError, ArrayResult, I64Array}; use crate::array::{ArrayImpl, ArrayRef}; -/// Column is owned by `DataChunk`. It consists of logic data type and physical array -/// implementation. +/// Column is owned by [`DataChunk`]. +/// It consists of logic data type and physical array implementation. +/// It contains all datums bound to a [`Column`]. +/// For instance, given: +/// | v1 | v2 | +/// |----|----| +/// | 1 | a | +/// | 1 | b | +/// | 1 | c | +/// [`ArrayRef`] will contain: [1,1,1] #[derive(Clone, Debug, PartialEq)] pub struct Column { array: ArrayRef, diff --git a/src/common/src/array/data_chunk.rs b/src/common/src/array/data_chunk.rs index 76e1f8d26f575..ab5d0adf69732 100644 --- a/src/common/src/array/data_chunk.rs +++ b/src/common/src/array/data_chunk.rs @@ -34,7 +34,23 @@ use crate::util::hash_util::finalize_hashers; use crate::util::iter_util::{ZipEqDebug, ZipEqFast}; use crate::util::value_encoding::{serialize_datum_into, ValueRowSerializer}; -/// `DataChunk` is a collection of arrays with visibility mask. +/// [`DataChunk`] is a collection of Columns, +/// a with visibility mask for each row. +/// For instance, we could have a [`DataChunk`] of this format. +/// | v1 | v2 | v3 | +/// |----|----|----| +/// | 1 | a | t | +/// | 2 | b | f | +/// | 3 | c | t | +/// | 4 | d | f | +/// +/// Our columns are v1, v2, v3. +/// Then, if the Visibility Mask hides rows 2 and 4, +/// We will only have these columns visible: +/// | v1 | v2 | v3 | +/// |----|----|----| +/// | 1 | a | t | +/// | 3 | c | t | #[derive(Clone, PartialEq)] #[must_use] pub struct DataChunk { @@ -170,7 +186,18 @@ impl DataChunk { } /// `compact` will convert the chunk to compact format. - /// Compact format means that `visibility == None`. + /// Compacting removes the hidden rows, and returns a new visibility + /// mask which indicates this. + /// + /// `compact` has trade-offs: + /// + /// Cost: + /// It has to rebuild the each column, meaning it will incur cost + /// of copying over bytes from the original column array to the new one. + /// + /// Benefit: + /// The main benefit is that the data chunk is smaller, taking up less memory. + /// We can also save the cost of iterating over many hidden rows. pub fn compact(self) -> Self { match &self.vis2 { Vis::Compact(_) => self, diff --git a/src/common/src/array/vis.rs b/src/common/src/array/vis.rs index 02607e6836f99..d5830dc8d38ef 100644 --- a/src/common/src/array/vis.rs +++ b/src/common/src/array/vis.rs @@ -17,11 +17,15 @@ use itertools::repeat_n; use crate::buffer::{Bitmap, BitmapBuilder}; -/// `Vis` is a visibility bitmap of rows. When all rows are visible, it is considered compact and -/// is represented by a single cardinality number rather than that many of ones. +/// `Vis` is a visibility bitmap of rows. #[derive(Clone, PartialEq, Debug)] pub enum Vis { + /// Non-compact variant. + /// Certain rows are excluded using this bitmap. Bitmap(Bitmap), + + /// Compact variant which just stores cardinality of rows. + /// This is used when all rows are visible. Compact(usize), // equivalent to all ones of this size } From 49ea9dcbd83311b27eba6fc503eb420d5516520c Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 23 Mar 2023 15:45:50 +0800 Subject: [PATCH 2/5] improve --- src/common/src/array/column.rs | 12 +++++++----- src/common/src/array/mod.rs | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/common/src/array/column.rs b/src/common/src/array/column.rs index fec58ea0ae036..258b861c36021 100644 --- a/src/common/src/array/column.rs +++ b/src/common/src/array/column.rs @@ -20,16 +20,18 @@ use risingwave_pb::data::PbColumn; use super::{Array, ArrayError, ArrayResult, I64Array}; use crate::array::{ArrayImpl, ArrayRef}; -/// Column is owned by [`DataChunk`]. -/// It consists of logic data type and physical array implementation. -/// It contains all datums bound to a [`Column`]. -/// For instance, given: +/// A [`Column`] consists of its logical data type +/// and its corresponding physical array implementation, +/// which contains all the datums bound to this [`Column`]. +/// It is owned by [`DataChunk`]. +/// +/// For instance, in this [`DataChunk`], +/// for column `v1`, [`ArrayRef`] will contain: [1,1,1] /// | v1 | v2 | /// |----|----| /// | 1 | a | /// | 1 | b | /// | 1 | c | -/// [`ArrayRef`] will contain: [1,1,1] #[derive(Clone, Debug, PartialEq)] pub struct Column { array: ArrayRef, diff --git a/src/common/src/array/mod.rs b/src/common/src/array/mod.rs index 881f1b041b4cb..e75c2095a7b78 100644 --- a/src/common/src/array/mod.rs +++ b/src/common/src/array/mod.rs @@ -496,7 +496,8 @@ macro_rules! impl_array_builder { } } - /// Append a [`Datum`] or [`DatumRef`] multiple times, return error while type not match. + /// Append a [`Datum`] or [`DatumRef`] multiple times, + /// panicking if the datum's type does not match the array builder's type. pub fn append_datum_n(&mut self, n: usize, datum: impl ToDatumRef) { match datum.to_datum_ref() { None => match self { From 0a6a7bcbbe071a418f224b29a7b0673ee4a12364 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 23 Mar 2023 15:47:10 +0800 Subject: [PATCH 3/5] improve --- src/common/src/array/column.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/src/array/column.rs b/src/common/src/array/column.rs index 258b861c36021..e14c5a9498c43 100644 --- a/src/common/src/array/column.rs +++ b/src/common/src/array/column.rs @@ -22,8 +22,8 @@ use crate::array::{ArrayImpl, ArrayRef}; /// A [`Column`] consists of its logical data type /// and its corresponding physical array implementation, -/// which contains all the datums bound to this [`Column`]. -/// It is owned by [`DataChunk`]. +/// The array contains all the datums bound to this [`Column`]. +/// [`Column`] is owned by [`DataChunk`]. /// /// For instance, in this [`DataChunk`], /// for column `v1`, [`ArrayRef`] will contain: [1,1,1] From 6953fd054ba9dafad7c9ac9aeac2bd3837c4a1f0 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 23 Mar 2023 15:48:50 +0800 Subject: [PATCH 4/5] improve --- src/common/src/array/vis.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/src/array/vis.rs b/src/common/src/array/vis.rs index d5830dc8d38ef..9d65e92c7da06 100644 --- a/src/common/src/array/vis.rs +++ b/src/common/src/array/vis.rs @@ -21,11 +21,11 @@ use crate::buffer::{Bitmap, BitmapBuilder}; #[derive(Clone, PartialEq, Debug)] pub enum Vis { /// Non-compact variant. - /// Certain rows are excluded using this bitmap. + /// Certain rows are hidden using this bitmap. Bitmap(Bitmap), /// Compact variant which just stores cardinality of rows. - /// This is used when all rows are visible. + /// This can be used when all rows are visible. Compact(usize), // equivalent to all ones of this size } From fee7cb79e6c2905c5c026dc6ed0d55708c9a80f2 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 23 Mar 2023 15:52:36 +0800 Subject: [PATCH 5/5] typo --- src/common/src/array/data_chunk.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/array/data_chunk.rs b/src/common/src/array/data_chunk.rs index ab5d0adf69732..f035c9f5a90af 100644 --- a/src/common/src/array/data_chunk.rs +++ b/src/common/src/array/data_chunk.rs @@ -46,7 +46,7 @@ use crate::util::value_encoding::{serialize_datum_into, ValueRowSerializer}; /// /// Our columns are v1, v2, v3. /// Then, if the Visibility Mask hides rows 2 and 4, -/// We will only have these columns visible: +/// We will only have these rows visible: /// | v1 | v2 | v3 | /// |----|----|----| /// | 1 | a | t |