From a79d56d603408b0c14cca7017a09a20ac0adc969 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 12 Oct 2022 16:48:37 -0400 Subject: [PATCH] Clarify initial example with respect to capitalization --- docs/source/user-guide/example-usage.md | 60 +++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index ced84ffa6ec5..32b1122abc03 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -19,9 +19,7 @@ # Example Usage -In this example some simple processing is performed on a csv file. Please be aware that all identifiers are made lower-case in SQL, so if your csv file has capital letters (ex: Name) you should put your column name in double quotes or the example won't work. - -The following example uses [this file](../../../datafusion/core/tests/capitalized_example.csv) +In this example some simple processing is performed on the [`example.csv`](../../../datafusion/core/tests/example.csv) file. ## Update `Cargo.toml` @@ -37,6 +35,62 @@ tokio = "1.0" ```rust use datafusion::prelude::*; +#[tokio::main] +async fn main() -> datafusion::error::Result<()> { + // register the table + let ctx = SessionContext::new(); + ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?; + + // create a plan to run a SQL query + let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; + + // execute and print results + df.show().await?; + Ok(()) +} +``` + +## Use the DataFrame API to process data stored in a CSV: + +```rust +use datafusion::prelude::*; + +#[tokio::main] +async fn main() -> datafusion::error::Result<()> { + // create the dataframe + let ctx = SessionContext::new(); + let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; + + let df = df.filter(col("a").lt_eq(col("b")))? + .aggregate(vec![col("a")], vec![min(col("b"))])?; + + // execute and print results + df.show_limit(100).await?; + Ok(()) +} +``` + +## Output from both examples + +```text ++---+--------+ +| a | MIN(b) | ++---+--------+ +| 1 | 2 | ++---+--------+ +``` + +# Identifiers and Capitalization + +Please be aware that all identifiers are effectively made lower-case in SQL, so if your csv file has capital letters (ex: `Name`) you must put your column name in double quotes or the examples won't work. + +To illustrate this behavior, consider the [`capitalized_example.csv`](../../../datafusion/core/tests/capitalized_example.csv) file: + +## Run a SQL query against data stored in a CSV: + +```rust +use datafusion::prelude::*; + #[tokio::main] async fn main() -> datafusion::error::Result<()> { // register the table