From 49a3b005ca3e0415e920b2459b1121dad729b743 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 15 Aug 2022 12:41:21 -0600 Subject: [PATCH] combine CLI pages (#3133) --- docs/source/cli/index.rst | 128 ----------------------------- docs/source/index.rst | 1 - docs/source/user-guide/cli.md | 148 ++++++++++++++++++++++++++-------- 3 files changed, 114 insertions(+), 163 deletions(-) delete mode 100644 docs/source/cli/index.rst diff --git a/docs/source/cli/index.rst b/docs/source/cli/index.rst deleted file mode 100644 index c10db36dfd63..000000000000 --- a/docs/source/cli/index.rst +++ /dev/null @@ -1,128 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - -======================= -DataFusion Command-line -======================= - -The Arrow DataFusion CLI is a command-line interactive SQL utility that allows -queries to be executed against CSV and Parquet files. It is a convenient way to -try DataFusion out with your own data sources. - -Install and run using Cargo -=========================== - -The easiest way to install DataFusion CLI a spin is via `cargo install datafusion-cli`. - -Install and run using Homebrew (on MacOS) -========================================= - -DataFusion CLI can also be installed via Homebrew (on MacOS). Install it as any other pre-built software like this: - -.. code-block:: bash - - brew install datafusion - # ==> Downloading https://ghcr.io/v2/homebrew/core/datafusion/manifests/5.0.0 - # ######################################################################## 100.0% - # ==> Downloading https://ghcr.io/v2/homebrew/core/datafusion/blobs/sha256:9ecc8a01be47ceb9a53b39976696afa87c0a8 - # ==> Downloading from https://pkg-containers.githubusercontent.com/ghcr1/blobs/sha256:9ecc8a01be47ceb9a53b39976 - # ######################################################################## 100.0% - # ==> Pouring datafusion--5.0.0.big_sur.bottle.tar.gz - # 🍺 /usr/local/Cellar/datafusion/5.0.0: 9 files, 17.4MB - - datafusion-cli - - -Run using Docker -================ - -There is no officially published Docker image for the DataFusion CLI, so it is necessary to build from source -instead. - -Use the following commands to clone this repository and build a Docker image containing the CLI tool. Note that there is :code:`.dockerignore` file in the root of the repository that may need to be deleted in order for this to work. - -.. code-block:: bash - - git clone https://github.com/apache/arrow-datafusion - git checkout 8.0.0 - cd arrow-datafusion - docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli - docker run -it -v $(your_data_location):/data datafusion-cli - - -Usage -===== - -.. code-block:: bash - - Apache Arrow - Command Line Client for DataFusion query engine and Ballista distributed computation engine. - - USAGE: - datafusion-cli [OPTIONS] - - OPTIONS: - -c, --batch-size The batch size of each query, or use DataFusion default - -f, --file ... Execute commands from file(s), then exit - --format [default: table] [possible values: csv, tsv, table, json, - nd-json] - -h, --help Print help information - -p, --data-path Path to your data, default to current directory - -q, --quiet Reduce printing other than the results and work quietly - -r, --rc ... Run the provided files on startup instead of ~/.datafusionrc - -V, --version Print version information - -Type `exit` or `quit` to exit the CLI. - - -Registering Parquet Data Sources -================================ - -Parquet data sources can be registered by executing a :code:`CREATE EXTERNAL TABLE` SQL statement. It is not necessary to provide schema information for Parquet files. - -.. code-block:: sql - - CREATE EXTERNAL TABLE taxi - STORED AS PARQUET - LOCATION '/mnt/nyctaxi/tripdata.parquet'; - - -Registering CSV Data Sources -============================ - -CSV data sources can be registered by executing a :code:`CREATE EXTERNAL TABLE` SQL statement. It is necessary to provide schema information for CSV files since DataFusion does not automatically infer the schema when using SQL to query CSV files. - -.. code-block:: sql - - CREATE EXTERNAL TABLE test ( - c1 VARCHAR NOT NULL, - c2 INT NOT NULL, - c3 SMALLINT NOT NULL, - c4 SMALLINT NOT NULL, - c5 INT NOT NULL, - c6 BIGINT NOT NULL, - c7 SMALLINT NOT NULL, - c8 INT NOT NULL, - c9 BIGINT NOT NULL, - c10 VARCHAR NOT NULL, - c11 FLOAT NOT NULL, - c12 DOUBLE NOT NULL, - c13 VARCHAR NOT NULL - ) - STORED AS CSV - WITH HEADER ROW - LOCATION '/path/to/aggregate_test_100.csv'; diff --git a/docs/source/index.rst b/docs/source/index.rst index 66b3386d525b..34e9b135be47 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,7 +29,6 @@ Table of Contents :caption: Supported Environments Rust - Command line .. _toc.guide: diff --git a/docs/source/user-guide/cli.md b/docs/source/user-guide/cli.md index 93c6ecc415c9..ed6131fe957e 100644 --- a/docs/source/user-guide/cli.md +++ b/docs/source/user-guide/cli.md @@ -17,25 +17,11 @@ under the License. --> -# DataFusion Command-line Interface +# DataFusion Command-line SQL Utility -The DataFusion CLI allows SQL queries to be executed by an in-process DataFusion context. - -``` -USAGE: - datafusion-cli [FLAGS] [OPTIONS] - -FLAGS: - -h, --help Prints help information - -q, --quiet Reduce printing other than the results and work quietly - -V, --version Prints version information - -OPTIONS: - -c, --batch-size The batch size of each query, or use DataFusion default - -p, --data-path Path to your data, default to current directory - -f, --file ... Execute commands from file(s), then exit - --format Output format [default: table] [possible values: csv, tsv, table, json, ndjson] -``` +The DataFusion CLI is a command-line interactive SQL utility that allows +queries to be executed against any supported data files. It is a convenient way to +try DataFusion out with your own data sources. ## Example @@ -47,31 +33,125 @@ $ echo "1,2" > data.csv ```bash $ datafusion-cli +DataFusion CLI v11.0.0 +❯ CREATE EXTERNAL TABLE foo STORED AS CSV LOCATION 'data.csv'; +0 rows in set. Query took 0.017 seconds. +❯ select * from foo; ++----------+----------+ +| column_1 | column_2 | ++----------+----------+ +| 1 | 2 | ++----------+----------+ +1 row in set. Query took 0.012 seconds. +``` + +## Installation -DataFusion CLI v8.0.0 +### Install and run using Cargo -> CREATE EXTERNAL TABLE foo (a INT, b INT) STORED AS CSV LOCATION 'data.csv'; -0 rows in set. Query took 0.001 seconds. +The easiest way to install DataFusion CLI a spin is via `cargo install datafusion-cli`. -> SELECT * FROM foo; -+---+---+ -| a | b | -+---+---+ -| 1 | 2 | -+---+---+ -1 row in set. Query took 0.017 seconds. +### Install and run using Homebrew (on MacOS) + +DataFusion CLI can also be installed via Homebrew (on MacOS). Install it as any other pre-built software like this: + +```bash +brew install datafusion +# ==> Downloading https://ghcr.io/v2/homebrew/core/datafusion/manifests/5.0.0 +# ######################################################################## 100.0% +# ==> Downloading https://ghcr.io/v2/homebrew/core/datafusion/blobs/sha256:9ecc8a01be47ceb9a53b39976696afa87c0a8 +# ==> Downloading from https://pkg-containers.githubusercontent.com/ghcr1/blobs/sha256:9ecc8a01be47ceb9a53b39976 +# ######################################################################## 100.0% +# ==> Pouring datafusion--5.0.0.big_sur.bottle.tar.gz +# 🍺 /usr/local/Cellar/datafusion/5.0.0: 9 files, 17.4MB + +datafusion-cli ``` -## DataFusion-Cli +### Run using Docker + +There is no officially published Docker image for the DataFusion CLI, so it is necessary to build from source +instead. -Build the `datafusion-cli`: +Use the following commands to clone this repository and build a Docker image containing the CLI tool. Note +that there is `.dockerignore` file in the root of the repository that may need to be deleted in order for +this to work. ```bash -cd arrow-datafusion/datafusion-cli -cargo build +git clone https://github.com/apache/arrow-datafusion +git checkout 8.0.0 +cd arrow-datafusion +docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli +docker run -it -v $(your_data_location):/data datafusion-cli ``` -## Cli commands +## Usage + +```bash +Apache Arrow +Command Line Client for DataFusion query engine. + +USAGE: + datafusion-cli [OPTIONS] + +OPTIONS: + -c, --batch-size The batch size of each query, or use DataFusion default + -f, --file ... Execute commands from file(s), then exit + --format [default: table] [possible values: csv, tsv, table, json, + nd-json] + -h, --help Print help information + -p, --data-path Path to your data, default to current directory + -q, --quiet Reduce printing other than the results and work quietly + -r, --rc ... Run the provided files on startup instead of ~/.datafusionrc + -V, --version Print version information + +Type `exit` or `quit` to exit the CLI. +``` + +## Registering Parquet Data Sources + +Parquet data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is not necessary to provide schema information for Parquet files. + +```sql +CREATE EXTERNAL TABLE taxi +STORED AS PARQUET +LOCATION '/mnt/nyctaxi/tripdata.parquet'; +``` + +## Registering CSV Data Sources + +CSV data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. + +```sql +CREATE EXTERNAL TABLE test +STORED AS CSV +WITH HEADER ROW +LOCATION '/path/to/aggregate_test_100.csv'; +``` + +It is also possible to provide schema information. + +```sql +CREATE EXTERNAL TABLE test ( + c1 VARCHAR NOT NULL, + c2 INT NOT NULL, + c3 SMALLINT NOT NULL, + c4 SMALLINT NOT NULL, + c5 INT NOT NULL, + c6 BIGINT NOT NULL, + c7 SMALLINT NOT NULL, + c8 INT NOT NULL, + c9 BIGINT NOT NULL, + c10 VARCHAR NOT NULL, + c11 FLOAT NOT NULL, + c12 DOUBLE NOT NULL, + c13 VARCHAR NOT NULL +) +STORED AS CSV +LOCATION '/path/to/aggregate_test_100.csv'; +``` + +## Commands Available commands inside DataFusion CLI are: @@ -101,7 +181,7 @@ Available commands inside DataFusion CLI are: - QuietMode -``` +```bash > \quiet [true|false] ```