Skip to content

Commit 8714293

Browse files
committed
feat: approx_quantile dataframe function
Adds the approx_quantile() dataframe function, and exports it in the prelude.
1 parent d5fc006 commit 8714293

File tree

4 files changed

+42
-13
lines changed

4 files changed

+42
-13
lines changed

datafusion/src/logical_plan/expr.rs

+9
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,15 @@ pub fn approx_distinct(expr: Expr) -> Expr {
16351635
}
16361636
}
16371637

1638+
/// Calculate an approximation of the specified `quantile` for `expr`.
1639+
pub fn approx_quantile(expr: Expr, quantile: Expr) -> Expr {
1640+
Expr::AggregateFunction {
1641+
fun: aggregates::AggregateFunction::ApproxQuantile,
1642+
distinct: false,
1643+
args: vec![expr, quantile],
1644+
}
1645+
}
1646+
16381647
// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
16391648
// varying arity functions
16401649
/// Create an convenience function representing a unary scalar function

datafusion/src/logical_plan/mod.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ pub use builder::{
3636
pub use dfschema::{DFField, DFSchema, DFSchemaRef, ToDFSchema};
3737
pub use display::display_schema;
3838
pub use expr::{
39-
abs, acos, and, approx_distinct, array, ascii, asin, atan, avg, binary_expr,
40-
bit_length, btrim, case, ceil, character_length, chr, col, columnize_expr,
41-
combine_filters, concat, concat_ws, cos, count, count_distinct, create_udaf,
42-
create_udf, date_part, date_trunc, digest, exp, exprlist_to_fields, floor, in_list,
43-
initcap, left, length, lit, lit_timestamp_nano, ln, log10, log2, lower, lpad, ltrim,
44-
max, md5, min, normalize_col, normalize_cols, now, octet_length, or, random,
45-
regexp_match, regexp_replace, repeat, replace, replace_col, reverse,
39+
abs, acos, and, approx_distinct, approx_quantile, array, ascii, asin, atan, avg,
40+
binary_expr, bit_length, btrim, case, ceil, character_length, chr, col,
41+
columnize_expr, combine_filters, concat, concat_ws, cos, count, count_distinct,
42+
create_udaf, create_udf, date_part, date_trunc, digest, exp, exprlist_to_fields,
43+
floor, in_list, initcap, left, length, lit, lit_timestamp_nano, ln, log10, log2,
44+
lower, lpad, ltrim, max, md5, min, normalize_col, normalize_cols, now, octet_length,
45+
or, random, regexp_match, regexp_replace, repeat, replace, replace_col, reverse,
4646
rewrite_sort_cols_by_aggs, right, round, rpad, rtrim, sha224, sha256, sha384, sha512,
4747
signum, sin, split_part, sqrt, starts_with, strpos, substr, sum, tan, to_hex,
4848
translate, trim, trunc, unalias, unnormalize_col, unnormalize_cols, upper, when,

datafusion/src/prelude.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ pub use crate::execution::context::{ExecutionConfig, ExecutionContext};
3030
pub use crate::execution::options::AvroReadOptions;
3131
pub use crate::execution::options::{CsvReadOptions, NdJsonReadOptions};
3232
pub use crate::logical_plan::{
33-
array, ascii, avg, bit_length, btrim, character_length, chr, col, concat, concat_ws,
34-
count, create_udf, date_part, date_trunc, digest, in_list, initcap, left, length,
35-
lit, lower, lpad, ltrim, max, md5, min, now, octet_length, random, regexp_match,
36-
regexp_replace, repeat, replace, reverse, right, rpad, rtrim, sha224, sha256, sha384,
37-
sha512, split_part, starts_with, strpos, substr, sum, to_hex, translate, trim, upper,
38-
Column, JoinType, Partitioning,
33+
approx_quantile, array, ascii, avg, bit_length, btrim, character_length, chr, col,
34+
concat, concat_ws, count, create_udf, date_part, date_trunc, digest, in_list,
35+
initcap, left, length, lit, lower, lpad, ltrim, max, md5, min, now, octet_length,
36+
random, regexp_match, regexp_replace, repeat, replace, reverse, right, rpad, rtrim,
37+
sha224, sha256, sha384, sha512, split_part, starts_with, strpos, substr, sum, to_hex,
38+
translate, trim, upper, Column, JoinType, Partitioning,
3939
};

datafusion/tests/dataframe_functions.rs

+20
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,26 @@ async fn test_fn_btrim_with_chars() -> Result<()> {
153153
Ok(())
154154
}
155155

156+
#[tokio::test]
157+
async fn test_fn_approx_quantile() -> Result<()> {
158+
let expr = approx_quantile(col("b"), lit(0.5));
159+
160+
let expected = vec![
161+
"+-------------------------------------+",
162+
"| APPROXQUANTILE(test.b,Float64(0.5)) |",
163+
"+-------------------------------------+",
164+
"| 10 |",
165+
"+-------------------------------------+",
166+
];
167+
168+
let df = create_test_table()?;
169+
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
170+
171+
assert_batches_eq!(expected, &batches);
172+
173+
Ok(())
174+
}
175+
156176
#[tokio::test]
157177
async fn test_fn_character_length() -> Result<()> {
158178
let expr = character_length(col("a"));

0 commit comments

Comments
 (0)