Skip to content

Commit

Permalink
add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Blizzara committed Jul 24, 2024
1 parent ac29169 commit 8bb99ea
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 2 deletions.
3 changes: 3 additions & 0 deletions native/spark-expr/src/scalar_funcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ pub fn spark_floor(
}
}

/// Spark-compatible `UnscaledValue` expression (internal to Spark optimizer)
pub fn spark_unscaled_value(args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
match &args[0] {
ColumnarValue::Scalar(v) => match v {
Expand All @@ -196,6 +197,7 @@ pub fn spark_unscaled_value(args: &[ColumnarValue]) -> DataFusionResult<Columnar
}
}

/// Spark-compatible `MakeDecimal` expression (internal to Spark optimizer)
pub fn spark_make_decimal(
args: &[ColumnarValue],
data_type: &DataType,
Expand Down Expand Up @@ -483,6 +485,7 @@ pub fn spark_decimal_div(
Ok(ColumnarValue::Array(Arc::new(result)))
}

/// Spark-compatible `isnan` expression
pub fn spark_isnan(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
fn set_nulls_to_false(is_nan: BooleanArray) -> ColumnarValue {
match is_nan.nulls() {
Expand Down
1 change: 1 addition & 0 deletions native/spark-expr/src/scalar_funcs/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}

/// Spark-compatible `chr` expression
#[derive(Debug)]
pub struct SparkChrFunc {
signature: Signature,
Expand Down
6 changes: 4 additions & 2 deletions native/spark-expr/src/scalar_funcs/hash_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,22 @@ pub fn spark_xxhash64(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusio
}
}

// Note: Spark actually has just a single sha2 function, which takes the bit len as arg
// but for Comet we split the calls in QueryPlanSerde
/// `sha224` function that simulates Spark's `sha2` expression with bit width 224
pub fn spark_sha224(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
wrap_digest_result_as_hex_string(args, sha224().fun())
}

/// `sha256` function that simulates Spark's `sha2` expression with bit width 0 or 256
pub fn spark_sha256(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
wrap_digest_result_as_hex_string(args, sha256().fun())
}

/// `sha384` function that simulates Spark's `sha2` expression with bit width 384
pub fn spark_sha384(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
wrap_digest_result_as_hex_string(args, sha384().fun())
}

/// `sha512` function that simulates Spark's `sha2` expression with bit width 512
pub fn spark_sha512(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
wrap_digest_result_as_hex_string(args, sha512().fun())
}
Expand Down
1 change: 1 addition & 0 deletions native/spark-expr/src/scalar_funcs/hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ fn hex_bytes<T: AsRef<[u8]>>(bytes: T) -> Result<String, std::fmt::Error> {
Ok(hex_string)
}

/// Spark-compatible `hex` function
pub fn spark_hex(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
if args.len() != 1 {
return Err(DataFusionError::Internal(
Expand Down
1 change: 1 addition & 0 deletions native/spark-expr/src/scalar_funcs/unhex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ fn spark_unhex_inner<T: OffsetSizeTrait>(
}
}

/// Spark-compatible `unhex` expression
pub fn spark_unhex(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
if args.len() > 2 {
return exec_err!("unhex takes at most 2 arguments, but got: {}", args.len());
Expand Down

0 comments on commit 8bb99ea

Please sign in to comment.