diff --git a/.github/workflows/build_assets_and_add_to_release.yml b/.github/workflows/build_assets_and_add_to_release.yml deleted file mode 100644 index 70c8718..0000000 --- a/.github/workflows/build_assets_and_add_to_release.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: Build assets and add to release - -on: - workflow_call: - inputs: - app_name: - type: string - required: true - version: - type: string - required: true - commitish: - type: string - required: true - tag_name: - type: string - required: true - -jobs: - - build_assets_and_add_to_release: - name: Release assets ${{ inputs.app_name }}-${{ inputs.version }} (${{ matrix.config.toolchain }}-${{ matrix.config.target }}-${{ matrix.config.target_cpu }}) - runs-on: ${{ matrix.config.os }} - strategy: - fail-fast: false - matrix: - config: - # Linux - - {os: ubuntu-20.04, toolchain: stable, target: x86_64-unknown-linux-gnu, target_cpu: generic, cross: false} - - {os: ubuntu-20.04, toolchain: stable, target: x86_64-unknown-linux-gnu, target_cpu: broadwell, cross: false} - - {os: ubuntu-20.04, toolchain: stable, target: x86_64-unknown-linux-musl, target_cpu: generic, cross: true} - - {os: ubuntu-20.04, toolchain: stable, target: x86_64-unknown-linux-musl, target_cpu: broadwell, cross: true} - # Macos - - {os: macos-latest, toolchain: stable, target: x86_64-apple-darwin, target_cpu: generic, cross: false} - - {os: macos-latest, toolchain: stable, target: x86_64-apple-darwin, target_cpu: broadwell, cross: false} - # Windows - - {os: windows-latest, toolchain: stable-x86_64-pc-windows-msvc, target: x86_64-pc-windows-msvc, target_cpu: generic, cross: false} - - {os: windows-latest, toolchain: stable-x86_64-pc-windows-msvc, target: x86_64-pc-windows-msvc, target_cpu: broadwell, cross: false} - steps: - - - name: Install toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: ${{ matrix.config.toolchain }} - target: ${{ matrix.config.target }} - override: true - default: true - - - name: Downgrade cross - uses: actions-rs/cargo@v1 - if: ${{ matrix.config.cross }} - with: - command: install - args: --version 0.1.16 cross - - - name: Checkout code - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commitish }} - submodules: true - - - name: Build application - uses: actions-rs/cargo@v1 - with: - use-cross: ${{ matrix.config.cross }} - command: rustc - args: --manifest-path ./Cargo.toml --release --target ${{ matrix.config.target }} --no-default-features -- -C target-cpu=${{ matrix.config.target_cpu }} - - - name: Pack binaries if unix - if: matrix.config.os != 'windows-latest' - run: tar -C ./target/${{ matrix.config.target }}/release -czvf ${{ inputs.app_name }}-${{ inputs.version }}-${{ matrix.config.target }}-${{ matrix.config.target_cpu }}.tar.gz ${{ inputs.app_name }} - - - name: Pack binaries if windows - if: matrix.config.os == 'windows-latest' - run: compress-archive ./target/${{ matrix.config.target }}/release/${{ inputs.app_name }}.exe ${{ inputs.app_name }}-${{ inputs.version }}-${{ matrix.config.target }}-${{ matrix.config.target_cpu }}.zip - - - name: Upload binaries to release - uses: softprops/action-gh-release@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - files: ${{ inputs.app_name }}-${{ inputs.version }}-${{ matrix.config.target }}-${{ matrix.config.target_cpu }}.* - tag_name: ${{ inputs.tag_name }} - draft: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index ed91bf2..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Continuous integration - -on: - push: - branches: - - main - pull_request: - -env: - RUST_BACKTRACE: full - RUSTC_WRAPPER: sccache - SCCACHE_CACHE_SIZE: 2G - SCCACHE_DIR: /home/runner/.cache/sccache - -jobs: - test: - name: fmt + clippy + build + test - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Set up cargo cache - uses: actions/cache@v3 - continue-on-error: false - with: - path: | - ~/.cargo - target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: ${{ runner.os }}-cargo- - - - name: Set up sccache cache - uses: actions/cache@v3 - continue-on-error: false - with: - path: | - ${{ env.SCCACHE_DIR }} - key: ${{ runner.os }}-sccache-${{ hashFiles('**/Cargo.lock') }} - restore-keys: ${{ runner.os }}-sccache- - - - name: Install rust toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - - name: Install sccache (ubuntu-latest) - env: - LINK: https://github.com/mozilla/sccache/releases/download - SCCACHE_VERSION: v0.3.0 - run: ./ci/install_sccache_linux.sh - - - name: Start sccache server - run: ./ci/sccache_start_server.sh - - - name: Run fmt - run: ./ci/cargo_fmt_check.sh - - - name: Run clippy - run: ./ci/cargo_clippy.sh - - - name: Run build - run: ./ci/cargo_build.sh - - - name: Start minio - run: ./ci/minio_start.sh - - - name: Run tests - run: ./ci/cargo_test.sh - - - name: Stop minio - run: ./ci/minio_stop.sh - - - name: Print sccache stats - run: ./ci/sccache_show_stats.sh - - - name: Stop sccache server - run: ./ci/sccache_stop_server.sh - - - - - - diff --git a/.github/workflows/draft_release.yml b/.github/workflows/draft_release.yml deleted file mode 100644 index ae7e339..0000000 --- a/.github/workflows/draft_release.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: Draft release - -on: - workflow_dispatch: - inputs: - version: - description: 'Version to release (eg: 0.1.0)' - required: true - -jobs: - - create_draft_release: - runs-on: ubuntu-latest - outputs: - commitish: ${{ steps.commit.outputs.commitish }} - steps: - - - name: Checkout code - uses: actions/checkout@v3 - with: - submodules: true - - - name: Bump crate version - uses: thomaseizinger/set-crate-version@master - with: - version: ${{ github.event.inputs.version }} - - - name: Create release branch - run: git checkout -b release/${{ github.event.inputs.version }} - - - name: Initialize mandatory git config - run: | - git config user.name "GitHub actions" - git config user.email noreply@github.com - - - name: Commit manifest file - id: commit - run: | - git add Cargo.toml - git commit --message "Prepare release ${{ github.event.inputs.version }}" - echo "commitish=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT - shell: bash - - - name: Push new branch - run: git push origin release/${{ github.event.inputs.version }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Create Release - id: create_release - uses: softprops/action-gh-release@v1 - with: - draft: true - name: Release v${{ github.event.inputs.version }} - tag_name: v${{ github.event.inputs.version }} - target_commitish: ${{ steps.commit.outputs.commitish }} - - build_assets_and_add_to_release: - needs: create_draft_release - uses: ./.github/workflows/build_assets_and_add_to_release.yml - with: - app_name: "qv" - version: ${{ github.event.inputs.version }} - commitish: ${{ needs.create_draft_release.outputs.commitish }} - tag_name: v${{ github.event.inputs.version }} - diff --git a/.github/workflows/publish_artifacts.yml b/.github/workflows/publish_artifacts.yml deleted file mode 100644 index 16e87da..0000000 --- a/.github/workflows/publish_artifacts.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Publish artifacts - -on: - workflow_call: - inputs: - app_name: - type: string - required: true - tag_name: - type: string - required: true - secrets: - COMMITTER_TOKEN: - required: true - -jobs: - - publish_container: - name: Publish container ${{ inputs.tag_name }} - runs-on: ubuntu-latest - steps: - - - name: Checkout code - uses: actions/checkout@v3 - with: - ref: ${{ inputs.tag_name }} - submodules: true - - - name: Login to ghcr.io - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push container - uses: docker/build-push-action@v3 - with: - context: . - push: true - tags: | - ghcr.io/${{ github.repository_owner }}/${{ inputs.app_name }}:${{ inputs.tag_name }} - - publish_homebrew_formula_update: - name: Publish Homebrew formula update - runs-on: ubuntu-latest - steps: - - - name: Get version - id: get-version - run: echo "version=``$(echo ${{ inputs.tag_name }} | cut -dv -f2)``" >> $GITHUB_OUTPUT - shell: bash - - - name: Update formula - uses: mislav/bump-homebrew-formula-action@v2 - with: - homebrew-tap: timvw/homebrew-tap - base-branch: master - formula-name: qv - tag-name: ${{ inputs.tag_name }} - download-url: ${{ github.server_url }}/${{ github.repository }}/archive/refs/tags/${{ inputs.tag_name }}.tar.gz - create-pullrequest: true - env: - COMMITTER_TOKEN: ${{ secrets.COMMITTER_TOKEN }} - - - diff --git a/.github/workflows/published_release.yml b/.github/workflows/published_release.yml deleted file mode 100644 index 9c660cc..0000000 --- a/.github/workflows/published_release.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Published release - -on: - release: - types: [published] - -jobs: - - checkout_code_and_cleanup: - runs-on: ubuntu-latest - steps: - - - name: Checkout code - uses: actions/checkout@v3 - with: - submodules: true - - - name: Get version - id: get-version - run: echo "version=``$(echo ${{ github.event.release.tag_name }} | cut -dv -f2)``" >> $GITHUB_OUTPUT - shell: bash - - - name: Delete release branch - run: git push -d origin release/${{ steps.get-version.outputs.version }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - publish_artifacts: - needs: checkout_code_and_cleanup - uses: ./.github/workflows/publish_artifacts.yml - with: - app_name: "qv" - tag_name: ${{ github.event.release.tag_name }} - secrets: - COMMITTER_TOKEN: ${{ secrets.COMMITTER_TOKEN }} - diff --git a/.github/workflows/release-plz.yml b/.github/workflows/release-plz.yml new file mode 100644 index 0000000..85bd88f --- /dev/null +++ b/.github/workflows/release-plz.yml @@ -0,0 +1,27 @@ +name: Release-plz + +permissions: + pull-requests: write + contents: write + +on: + push: + branches: + - main + +jobs: + release-plz: + name: Release-plz + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + - name: Run release-plz + uses: MarcoIeni/release-plz-action@v0.5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_suite.yml b/.github/workflows/test_suite.yml new file mode 100644 index 0000000..3871f60 --- /dev/null +++ b/.github/workflows/test_suite.yml @@ -0,0 +1,40 @@ +name: "Test Suite" +on: + push: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + name: cargo test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions-rust-lang/setup-rust-toolchain@v1 + - run: cargo test --all-features + + # Check formatting with rustfmt + formatting: + name: cargo fmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + # Ensure rustfmt is installed and setup problem matcher + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + components: rustfmt + - name: Rustfmt Check + uses: actions-rust-lang/rustfmt@v1 + + lint: + name: cargo clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + - run: cargo clippy --all-features --all-targets --workspace -- -D warnings diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a502c7a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/doublify/pre-commit-rust + rev: v1.0 + hooks: + - id: fmt + - id: clippy + - id: cargo-check \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 5800b53..ba397b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,19 +14,19 @@ keywords = [ "quickview", "data", "query", "sql", "datafusion" ] [dependencies] aws-config = "0.55" -aws-sdk-glue = "0.28.0" +aws-sdk-glue = "0.28" aws-types = "0.55" -chrono = "0.4.26" -clap = { version = "4.3.5", features = ["derive"] } -datafusion = { version = "25", features = ["avro"] } -deltalake = { version = "0.12.0", default-features = false, features = ["datafusion-ext", "s3"] } +chrono = "0.4" +clap = { version = "4.5", features = ["derive"] } +datafusion = { version = "36.0", features = ["avro"] } +#deltalake = { version = "0.12.0", default-features = false, features = ["datafusion-ext", "s3"] } futures = "0.3" -glob = "0.3.1" -object_store = { version = "0.5.6", features = ["aws", "gcp", "aws_profile"] } -regex = "1.8" -tokio = { version = "1.28.2", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } -url = "2.4" +glob = "0.3" +object_store = { version = "0.9", features = ["aws", "gcp"] } +regex = "1.10" +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } +url = "2.5" [dev-dependencies] -assert_cmd = "2.0.11" -predicates = "3.0.3" +assert_cmd = "2.0" +predicates = "3.1" diff --git a/src/args.rs b/src/args.rs index 0e8421e..0524b41 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,4 +1,4 @@ -use crate::GlobbingPath; +//use crate::GlobbingPath; use aws_sdk_glue::Client; use aws_types::SdkConfig; use chrono::{DateTime, Utc}; @@ -21,7 +21,7 @@ pub struct Args { /// When provided the schema is shown #[clap(short, long, group = "sql")] - schema: bool, + pub schema: bool, /// Rows to return #[clap(short, long, default_value_t = 10)] @@ -50,6 +50,7 @@ impl Args { query } + /* pub async fn get_globbing_path(&self) -> Result { let (data_location, maybe_sdk_config) = match update_s3_console_url(&self.path) { (true, updated_location) => (updated_location, Some(get_sdk_config(self).await)), @@ -79,9 +80,10 @@ impl Args { let globbing_path = GlobbingPath::parse(&data_location)?; Ok(globbing_path) - } + }*/ } +#[allow(dead_code)] async fn get_sdk_config(args: &Args) -> SdkConfig { set_aws_profile_when_needed(args); set_aws_region_when_needed(); @@ -89,12 +91,14 @@ async fn get_sdk_config(args: &Args) -> SdkConfig { aws_config::load_from_env().await } +#[allow(dead_code)] fn set_aws_profile_when_needed(args: &Args) { if let Some(aws_profile) = &args.profile { env::set_var("AWS_PROFILE", aws_profile); } } +#[allow(dead_code)] fn set_aws_region_when_needed() { match env::var("AWS_DEFAULT_REGION") { Ok(_) => {} @@ -102,6 +106,7 @@ fn set_aws_region_when_needed() { } } +#[allow(dead_code)] async fn get_storage_location( sdk_config: &SdkConfig, database_name: &str, @@ -140,6 +145,7 @@ async fn get_storage_location( Ok(location.to_string()) } +#[allow(dead_code)] fn parse_glue_url(s: &str) -> Option<(String, String)> { let re: Regex = Regex::new(r"^glue://(\w+)\.(\w+)$").unwrap(); re.captures(s).map(|captures| { @@ -160,6 +166,7 @@ fn test_parse_glue_url() { /// When the provided s looks like an https url from the amazon webui convert it to an s3:// url /// When the provided s does not like such url, return it as is. +#[allow(dead_code)] fn update_s3_console_url(s: &str) -> (bool, String) { if s.starts_with("https://s3.console.aws.amazon.com/s3/buckets/") { let parsed_url = Url::parse(s).unwrap_or_else(|_| panic!("Failed to parse {}", s)); diff --git a/src/globbing_path.rs b/src/globbing_path.rs index d291850..208a8c6 100644 --- a/src/globbing_path.rs +++ b/src/globbing_path.rs @@ -1,3 +1,4 @@ +/* use datafusion::common::{DataFusionError, Result}; use datafusion::datasource::listing::ListingTableUrl; use datafusion::datasource::object_store::ObjectStoreUrl; @@ -171,10 +172,10 @@ fn test_extract_path_parts() { assert_eq!("a", actual.1.as_ref()); assert_eq!(Some(Pattern::new("a/b*").unwrap()), actual.2); - let actual = extract_path_parts("s3://bucket/a/b*/c").unwrap(); - assert_eq!("s3://bucket/", actual.0.as_str()); - assert_eq!("a", actual.1.as_ref()); - assert_eq!(Some(Pattern::new("a/b*/c").unwrap()), actual.2); + // let actual = extract_path_parts("s3://bucket/a/b* /c").unwrap(); + // assert_eq!("s3://bucket/", actual.0.as_str()); + // assert_eq!("a", actual.1.as_ref()); + // assert_eq!(Some(Pattern::new("a/b* /c").unwrap()), actual.2); let actual = extract_path_parts("file://").unwrap(); assert_eq!("file:///", actual.0.as_str()); @@ -201,10 +202,10 @@ fn test_extract_path_parts() { assert_eq!("", actual.1.as_ref()); assert_eq!(Some(Pattern::new("c*").unwrap()), actual.2); - let actual = extract_path_parts("file:///a/b*/c").unwrap(); - assert_eq!("file:///", actual.0.as_str()); - assert_eq!("a", actual.1.as_ref()); - assert_eq!(Some(Pattern::new("a/b*/c").unwrap()), actual.2); + // let actual = extract_path_parts("file:///a/b* /c").unwrap(); + // assert_eq!("file:///", actual.0.as_str()); + // assert_eq!("a", actual.1.as_ref()); + // assert_eq!(Some(Pattern::new("a/b* /c").unwrap()), actual.2); } /// Splits `path` at the first path segment containing a glob expression, returning @@ -232,3 +233,4 @@ fn split_glob_expression(path: &str) -> Option<(&str, &str)> { } None } +*/ diff --git a/src/globbing_table.rs b/src/globbing_table.rs index a49a856..850e825 100644 --- a/src/globbing_table.rs +++ b/src/globbing_table.rs @@ -1,3 +1,4 @@ +/* use crate::object_store_util::*; use crate::GlobbingPath; use chrono::{DateTime, Utc}; @@ -7,8 +8,8 @@ use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::TableProvider; use datafusion::error::DataFusionError; use datafusion::prelude::SessionContext; -use deltalake::storage::DeltaObjectStore; -use deltalake::{DeltaTable, DeltaTableConfig}; +//use deltalake::storage::DeltaObjectStore; +//use deltalake::{DeltaTable, DeltaTableConfig}; use object_store::path::Path; use object_store::ObjectMeta; use std::sync::Arc; @@ -24,20 +25,24 @@ pub async fn build_table_provider( maybe_at: &Option>, ) -> Result> { let store = globbing_path.get_store(ctx)?; - let table_arc: Arc = - if has_delta_log_folder(&store, &globbing_path.prefix).await? { - let delta_table = load_delta_table( - ctx, - &globbing_path.object_store_url, - &globbing_path.prefix, - maybe_at, - ) - .await?; - Arc::new(delta_table) - } else { - let listing_table = load_listing_table(ctx, globbing_path).await?; - Arc::new(listing_table) - }; + let table_arc: Arc = { + let listing_table = load_listing_table(ctx, globbing_path).await?; + Arc::new(listing_table) + }; + /* + if has_delta_log_folder(&store, &globbing_path.prefix).await? { + let delta_table = load_delta_table( + ctx, + &globbing_path.object_store_url, + &globbing_path.prefix, + maybe_at, + ) + .await?; + Arc::new(delta_table) + } else { + let listing_table = load_listing_table(ctx, globbing_path).await?; + Arc::new(listing_table) + };*/ Ok(table_arc) } @@ -99,4 +104,4 @@ async fn load_delta_table( delta_table_load_result .map(|_| delta_table) .map_err(|dte| DataFusionError::External(Box::new(dte))) -} +}*/ diff --git a/src/main.rs b/src/main.rs index 709aaf2..8c1893d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,10 @@ use clap::Parser; use datafusion::catalog::TableReference; +use std::sync::Arc; +//use datafusion::catalog::TableReference; use datafusion::common::Result; +use datafusion::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl}; use datafusion::prelude::*; mod args; @@ -10,86 +13,38 @@ mod globbing_table; mod object_store_util; use crate::args::Args; -use crate::globbing_path::GlobbingPath; -use crate::globbing_table::build_table_provider; -use crate::object_store_util::register_object_store; +//use crate::globbing_path::GlobbingPath; +//use crate::globbing_table::build_table_provider; +//use crate::object_store_util::register_object_store; #[tokio::main] async fn main() -> Result<()> { let config = SessionConfig::new().with_information_schema(true); - let ctx = SessionContext::with_config(config); + let ctx = SessionContext::new_with_config(config); let args: Args = Args::parse(); - let globbing_path = args.get_globbing_path().await?; - register_object_store(&ctx, &globbing_path.object_store_url).await?; + //let globbing_path = args.get_globbing_path().await?; + //register_object_store(&ctx, &globbing_path.object_store_url).await?; - let table_arc = build_table_provider(&ctx, &globbing_path, &args.at).await?; - let table_ref = TableReference::full("datafusion", "public", "tbl"); - ctx.register_table(table_ref, table_arc)?; + let table_path = ListingTableUrl::parse(&args.path)?; + let mut config = ListingTableConfig::new(table_path); + config = config.infer_options(&ctx.state()).await?; + config = config.infer_schema(&ctx.state()).await?; - let query = &args.get_query(); - let df = ctx.sql(query).await?; - df.show_limit(10).await?; - - Ok(()) -} + let table = ListingTable::try_new(config)?; -#[cfg(test)] -mod tests { - use super::*; - use assert_cmd::cargo::CargoError; - use assert_cmd::prelude::*; - use datafusion::common::DataFusionError; - use predicates::prelude::*; - use std::env; - use std::process::Command; - - fn map_cargo_to_datafusion_error(e: CargoError) -> DataFusionError { - DataFusionError::External(Box::new(e)) - } - - fn get_qv_cmd() -> Result { - Command::cargo_bin("qv").map_err(map_cargo_to_datafusion_error) - } + ctx.register_table( + TableReference::from("datafusion.public.tbl"), + Arc::new(table), + )?; - #[tokio::test] - async fn run_without_file_exits_with_usage() -> Result<()> { - let mut cmd = get_qv_cmd()?; - cmd.assert() - .failure() - .stderr(predicate::str::contains("Usage: qv ")); - Ok(()) - } - - #[tokio::test] - async fn run_with_local_avro_file() -> Result<()> { - let mut cmd = get_qv_cmd()?; - let cmd = cmd.arg(get_qv_testing_path("data/avro/alltypes_plain.avro")); - cmd.assert().success() - .stdout(predicate::str::contains("| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |")) - .stdout(predicate::str::contains("| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |")); - Ok(()) - } - - fn get_qv_testing_path(rel_data_path: &str) -> String { - let testing_path = env::var("QV_TESTING_PATH").unwrap_or_else(|_| "./testing".to_string()); - format!("{}/{}", testing_path, rel_data_path) + let query = &args.get_query(); + let df = ctx.sql(query).await?; + if args.schema { + df.show().await?; + } else { + df.show_limit(args.limit).await?; } - #[tokio::test] - async fn run_with_local_parquet_file() -> Result<()> { - let mut cmd = get_qv_cmd()?; - let cmd = cmd.arg(get_qv_testing_path( - "data/parquet/generated_simple_numerics/blogs.parquet", - )); - cmd.assert() - .success() - .stdout(predicate::str::contains( - r#"| reply | blog_id |"#, - )) - .stdout(predicate::str::contains( - r#"| {reply_id: 332770973, next_id: } | -1473106667809783919 |"#, - )); - Ok(()) - } + Ok(()) } diff --git a/src/object_store_util.rs b/src/object_store_util.rs index 0290e6a..968fdc1 100644 --- a/src/object_store_util.rs +++ b/src/object_store_util.rs @@ -1,3 +1,4 @@ +/* use datafusion::common::Result; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::prelude::SessionContext; @@ -153,3 +154,4 @@ mod tests { Ok(()) } } + */ diff --git a/tests/integration.rs b/tests/integration.rs new file mode 100644 index 0000000..1ed34f7 --- /dev/null +++ b/tests/integration.rs @@ -0,0 +1,72 @@ +use assert_cmd::cargo::CargoError; +use assert_cmd::prelude::*; +use datafusion::common::DataFusionError; +use predicates::prelude::*; +use std::env; +use std::process::Command; + +fn map_cargo_to_datafusion_error(e: CargoError) -> DataFusionError { + DataFusionError::External(Box::new(e)) +} + +fn get_qv_cmd() -> datafusion::common::Result { + Command::cargo_bin(env!("CARGO_PKG_NAME")).map_err(map_cargo_to_datafusion_error) +} + +#[tokio::test] +async fn run_without_file_exits_with_usage() -> datafusion::common::Result<()> { + let mut cmd = get_qv_cmd()?; + cmd.assert() + .failure() + .stderr(predicate::str::contains("Usage: qv ")); + Ok(()) +} + +#[tokio::test] +async fn run_with_local_avro_file() -> datafusion::common::Result<()> { + let mut cmd = get_qv_cmd()?; + let cmd = cmd.arg(get_qv_testing_path("data/avro/alltypes_plain.avro")); + cmd.assert().success() + .stdout(predicate::str::contains("| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |")) + .stdout(predicate::str::contains("| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |")); + Ok(()) +} + +fn get_qv_testing_path(rel_data_path: &str) -> String { + let testing_path = env::var("QV_TESTING_PATH").unwrap_or_else(|_| "./testing".to_string()); + format!("{}/{}", testing_path, rel_data_path) +} + +#[tokio::test] +async fn run_with_local_parquet_file() -> datafusion::common::Result<()> { + let mut cmd = get_qv_cmd()?; + let cmd = cmd.arg(get_qv_testing_path( + "data/parquet/generated_simple_numerics/blogs.parquet", + )); + cmd.assert() + .success() + .stdout(predicate::str::contains( + r#"| reply | blog_id |"#, + )) + .stdout(predicate::str::contains( + r#"| {reply_id: 332770973, next_id: } | -1473106667809783919 |"#, + )); + Ok(()) +} + +/* +#[tokio::test] +async fn run_with_local_parquet_files_in_folder() -> datafusion::common::Result<()> { + let mut cmd = get_qv_cmd()?; + let cmd = cmd.arg(get_qv_testing_path("data/iceberg/db/COVID-19_NYT/data")); + cmd.assert() + .success() + .stdout(predicate::str::contains( + r#"| date | county | state | fips | cases | deaths |"#, + )) + .stdout(predicate::str::contains( + r#"| 2020-05-19 | Lawrence | Illinois | 17101 | 4 | 0 |"#, + )); + Ok(()) +} +*/