Skip to content

Commit

Permalink
Merge branch 'main' of github.com:apache/arrow-datafusion into univer…
Browse files Browse the repository at this point in the history
…salmind303/fsl-signature
  • Loading branch information
universalmind303 committed Mar 5, 2024
2 parents 9e6340d + 31c23dc commit 46f26de
Show file tree
Hide file tree
Showing 424 changed files with 16,065 additions and 8,813 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ jobs:
- name: Check function packages (array_expressions)
run: cargo check --no-default-features --features=array_expressions -p datafusion

- name: Check function packages (datetime_expressions)
run: cargo check --no-default-features --features=datetime_expressions -p datafusion

- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
Expand Down
19 changes: 13 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
exclude = ["datafusion-cli"]
members = [
"datafusion/common",
"datafusion/common_runtime",
"datafusion/core",
"datafusion/expr",
"datafusion/execution",
Expand Down Expand Up @@ -51,6 +52,11 @@ rust-version = "1.72"
version = "36.0.0"

[workspace.dependencies]
# We turn off default-features for some dependencies here so the workspaces which inherit them can
# selectively turn them on if needed, since we can override default-features = true (from false)
# for the inherited dependency but cannot do the reverse (override from true to false).
#
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
arrow = { version = "50.0.0", features = ["prettyprint"] }
arrow-array = { version = "50.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "50.0.0", default-features = false }
Expand All @@ -65,14 +71,15 @@ bytes = "1.4"
chrono = { version = "0.4.34", default-features = false }
ctor = "0.2.0"
dashmap = "5.4.0"
datafusion = { path = "datafusion/core", version = "36.0.0" }
datafusion-common = { path = "datafusion/common", version = "36.0.0" }
datafusion = { path = "datafusion/core", version = "36.0.0", default-features = false }
datafusion-common = { path = "datafusion/common", version = "36.0.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common_runtime", version = "36.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "36.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "36.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "36.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "36.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "36.0.0" }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "36.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "36.0.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "36.0.0", default-features = false }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "36.0.0" }
datafusion-proto = { path = "datafusion/proto", version = "36.0.0" }
datafusion-sql = { path = "datafusion/sql", version = "36.0.0" }
Expand All @@ -81,7 +88,7 @@ datafusion-substrait = { path = "datafusion/substrait", version = "36.0.0" }
doc-comment = "0.3"
env_logger = "0.11"
futures = "0.3"
half = "2.2.1"
half = { version = "2.2.1", default-features = false }
indexmap = "2.0.0"
itertools = "0.12"
log = "^0.4"
Expand All @@ -92,7 +99,7 @@ parquet = { version = "50.0.0", default-features = false, features = ["arrow", "
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
sqlparser = { version = "0.43.0", features = ["visitor"] }
sqlparser = { version = "0.44.0", features = ["visitor"] }
tempfile = "3"
thiserror = "1.0.44"
tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
[API Docs](https://docs.rs/datafusion/latest/datafusion/) |
[Chat](https://discord.com/channels/885562378132000778/885562378132000781)

<img src="https://arrow.apache.org/datafusion/_images/DataFusion-Logo-Background-White.png" width="256" alt="logo"/>
<img src="./docs/source/_static/images/2x_bgwhite_original.png" width="512" alt="logo"/>

DataFusion is a very fast, extensible query engine for building high-quality data-centric systems in
[Rust](http://rustlang.org), using the [Apache Arrow](https://arrow.apache.org)
Expand Down Expand Up @@ -78,6 +78,7 @@ Default features:
- `array_expressions`: functions for working with arrays such as `array_to_string`
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `datetime_expressions`: date and time functions such as `to_timestamp`
- `encoding_expressions`: `encode` and `decode` functions
- `parquet`: support for reading the [Apache Parquet] format
- `regex_expressions`: regular expression functions, such as `regexp_match`
Expand Down
16 changes: 8 additions & 8 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
[package]
name = "datafusion-benchmarks"
description = "DataFusion Benchmarks"
version = "36.0.0"
version = { workspace = true }
edition = { workspace = true }
authors = ["Apache Arrow <[email protected]>"]
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
license = "Apache-2.0"
authors = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
rust-version = { workspace = true }

[features]
Expand All @@ -33,8 +33,8 @@ snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = { workspace = true }
datafusion = { path = "../datafusion/core", version = "36.0.0" }
datafusion-common = { path = "../datafusion/common", version = "36.0.0" }
datafusion = { workspace = true, default-features = true }
datafusion-common = { workspace = true, default-features = true }
env_logger = { workspace = true }
futures = { workspace = true }
log = { workspace = true }
Expand All @@ -49,4 +49,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { path = "../datafusion/proto", version = "36.0.0" }
datafusion-proto = { workspace = true }
3 changes: 2 additions & 1 deletion benchmarks/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
// under the License.

use std::path::Path;
use std::{path::PathBuf, time::Instant};
use std::path::PathBuf;

use datafusion::{
error::{DataFusionError, Result},
prelude::SessionContext,
};
use datafusion_common::exec_datafusion_err;
use datafusion_common::instant::Instant;
use structopt::StructOpt;

use crate::{BenchmarkRun, CommonOpt};
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/parquet_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ use datafusion::logical_expr::{lit, or, Expr};
use datafusion::physical_plan::collect;
use datafusion::prelude::{col, SessionContext};
use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile};
use datafusion_common::instant::Instant;
use std::path::PathBuf;
use std::time::Instant;
use structopt::StructOpt;

/// Test performance of parquet filter pushdown
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ use datafusion::physical_plan::collect;
use datafusion::physical_plan::sorts::sort::SortExec;
use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion::test_util::parquet::TestParquetFile;
use datafusion_common::instant::Instant;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use structopt::StructOpt;

/// Test performance of sorting large datasets
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/tpch/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use datafusion_common::instant::Instant;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::Instant;

use datafusion::common::not_impl_err;
use datafusion::error::DataFusionError;

use datafusion::error::Result;
use datafusion::prelude::*;
use parquet::basic::Compression;
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use arrow::datatypes::SchemaBuilder;
use datafusion::{
arrow::datatypes::{DataType, Field, Schema},
common::plan_err,
error::{DataFusionError, Result},
error::Result,
};
use std::fs;
mod run;
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/tpch/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ use datafusion::physical_plan::{collect, displayable};
use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};
use log::info;

use datafusion_common::instant::Instant;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;

use datafusion::error::Result;
use datafusion::prelude::*;
Expand Down
4 changes: 1 addition & 3 deletions ci/scripts/rust_example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@
set -ex
cd datafusion-examples/examples/
cargo fmt --all -- --check
cargo check --examples

files=$(ls .)
for filename in $files
do
example_name=`basename $filename ".rs"`
# Skip tests that rely on external storage and flight
# todo: Currently, catalog.rs is placed in the external-dependence directory because there is a problem parsing
# the parquet file of the external parquet-test that it currently relies on.
# We will wait for this issue[https://github.com/apache/arrow-datafusion/issues/8041] to be resolved.
if [ ! -d $filename ]; then
cargo run --example $example_name
fi
Expand Down
8 changes: 8 additions & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
disallowed-methods = [
{ path = "tokio::task::spawn", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/arrow-datafusion/issues/6513)" },
{ path = "tokio::task::spawn_blocking", reason = "To provide cancel-safety, use `SpawnedTask::spawn_blocking` instead (https://github.com/apache/arrow-datafusion/issues/6513)" },
]

disallowed-types = [
{ path = "std::time::Instant", reason = "Use `datafusion_common::instant::Instant` instead for WASM compatibility" },
]
Loading

0 comments on commit 46f26de

Please sign in to comment.