Skip to content

Commit

Permalink
Add datafusion example of expression apis
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Oct 12, 2022
1 parent 61c38b7 commit 27dcf9e
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 2 deletions.
136 changes: 136 additions & 0 deletions datafusion-examples/examples/expr_api.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};

use datafusion::error::Result;
use datafusion::logical_plan::ToDFSchema;
use datafusion::optimizer::expr_simplifier::{ExprSimplifier, SimplifyContext};
use datafusion::physical_expr::execution_props::ExecutionProps;
use datafusion::prelude::*;
use datafusion::{logical_plan::Operator, scalar::ScalarValue};

/// This example demonstrates the DataFusion [`Expr`] API.
///
/// DataFusion comes with a powerful and extensive system for
/// representing and manipulating expressions such as `A + 5` and `X
/// IN ('foo', 'bar', 'baz')` and many other constructs.
#[tokio::main]
async fn main() -> Result<()> {
// The easiest way to do create expressions is to use the
// "fluent"-style API, like this:
let expr = col("a") + lit(5);

// this creates the same expression as the following though with
// much less code,
let expr2 = Expr::BinaryExpr {
left: Box::new(col("a")),
op: Operator::Plus,
right: Box::new(Expr::Literal(ScalarValue::Int32(Some(5)))),
};
assert_eq!(expr, expr2);

simplify_demo()?;

Ok(())
}

/// In addition to easy construction, DataFusion exposes APIs for
/// working with and simplifying such expressions that call into the
/// same powerful and extensive implementation used for the query
/// engine.
fn simplify_demo() -> Result<()> {
// For example, lets say you have has created an expression such
// ts = to_timestamp("2020-09-08T12:00:00+00:00")
let expr = col("ts").eq(call_fn(
"to_timestamp",
vec![lit("2020-09-08T12:00:00+00:00")],
)?);

// Naively evaluating such an expression against a large number of
// rows would involve re-converting "2020-09-08T12:00:00+00:00" to a
// timestamp for each row which gets expensive
//
// However, DataFusion's simplification logic can do this for you

// you need to tell DataFusion the type of column "ts":
let schema = Schema::new(vec![make_ts_field("ts")]).to_dfschema_ref()?;

// And then build a simplifier
// the ExecutionProps carries information needed to simplify
// expressions, such as the current time (to evaluate `now()`
// correctly)
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props).with_schema(schema);
let simplifier = ExprSimplifier::new(context);

// And then call the simplify_expr function:
let expr = simplifier.simplify(expr)?;

// DataFusion has simplified the expression to a comparison with a constant
// ts = 1599566400000000000; Tada!
assert_eq!(
expr,
col("ts").eq(lit_timestamp_nano(1599566400000000000i64))
);

// here are some other examples of what DataFusion is capable of
let schema = Schema::new(vec![
make_field("i", DataType::Int64),
make_field("b", DataType::Boolean),
])
.to_dfschema_ref()?;
let context = SimplifyContext::new(&props).with_schema(schema);
let simplifier = ExprSimplifier::new(context);

// basic arithmetic simplification
// i + 1 + 2 => a + 3
// (note this is not done if the expr is (col("i") + (lit(1) + lit(2))))
assert_eq!(
simplifier.simplify(col("i") + (lit(1) + lit(2)))?,
col("i") + lit(3)
);

// TODO uncomment when https://github.com/apache/arrow-datafusion/issues/1160 is done
// (i * 0) > 5 --> false (only if null)
// assert_eq!(
// simplifier.simplify((col("i") * lit(0)).gt(lit(5)))?,
// lit(false)
// );

// Logical simplification

// ((i > 5) AND FALSE) OR (i < 10) --> i < 10
assert_eq!(
simplifier
.simplify(col("i").gt(lit(5)).and(lit(false)).or(col("i").lt(lit(10))))?,
col("i").lt(lit(10))
);

Ok(())
}

fn make_field(name: &str, data_type: DataType) -> Field {
let nullable = false;
Field::new(name, data_type, nullable)
}

fn make_ts_field(name: &str) -> Field {
let tz = None;
make_field(name, DataType::Timestamp(TimeUnit::Nanosecond, tz))
}
2 changes: 1 addition & 1 deletion datafusion/core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub use crate::execution::options::{
pub use datafusion_common::Column;
pub use datafusion_expr::{
expr_fn::*,
lit,
lit, lit_timestamp_nano,
logical_plan::{JoinType, Partitioning},
Expr,
};
3 changes: 2 additions & 1 deletion datafusion/optimizer/src/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ pub struct ExprSimplifier<S> {
}

impl<S: SimplifyInfo> ExprSimplifier<S> {
/// Create a new `ExprSimplifier` with the given `info`. See
/// Create a new `ExprSimplifier` with the given `info` such as an
/// instance of [`SimplifyContext`]. See
/// [`simplify`](Self::simplify) for an example.
pub fn new(info: S) -> Self {
Self { info }
Expand Down

0 comments on commit 27dcf9e

Please sign in to comment.