diff --git a/datafusion/core/src/physical_plan/file_format/parquet.rs b/datafusion/core/src/physical_plan/file_format/parquet.rs index 61d2e5badb6b..270271d43ebe 100644 --- a/datafusion/core/src/physical_plan/file_format/parquet.rs +++ b/datafusion/core/src/physical_plan/file_format/parquet.rs @@ -900,6 +900,8 @@ pub async fn plan_to_parquet( #[cfg(test)] mod tests { + // See also `parquet_exec` integration test + use super::*; use crate::config::ConfigOptions; use crate::datasource::file_format::parquet::test_util::store_parquet; diff --git a/datafusion/core/tests/parquet/repeat_much.snappy.parquet b/datafusion/core/tests/parquet/data/repeat_much.snappy.parquet similarity index 100% rename from datafusion/core/tests/parquet/repeat_much.snappy.parquet rename to datafusion/core/tests/parquet/data/repeat_much.snappy.parquet diff --git a/datafusion/core/tests/parquet/test_binary.parquet b/datafusion/core/tests/parquet/data/test_binary.parquet similarity index 100% rename from datafusion/core/tests/parquet/test_binary.parquet rename to datafusion/core/tests/parquet/data/test_binary.parquet diff --git a/datafusion/core/tests/parquet_filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs similarity index 99% rename from datafusion/core/tests/parquet_filter_pushdown.rs rename to datafusion/core/tests/parquet/filter_pushdown.rs index 54b7d8d169e4..657f00d0cea5 100644 --- a/datafusion/core/tests/parquet_filter_pushdown.rs +++ b/datafusion/core/tests/parquet/filter_pushdown.rs @@ -272,7 +272,7 @@ async fn single_file_small_data_pages() { // TestCase::new(&test_parquet_file) // .with_name("selective") - // // predicagte is chosen carefully to prune pages 0, 1, 2, 3, 4 + // // predicate is chosen carefully to prune pages 0, 1, 2, 3, 4 // // pod = 'iadnalqpdzthpifrvewossmpqibgtsuin' // .with_filter(col("pod").eq(lit("iadnalqpdzthpifrvewossmpqibgtsuin"))) // .with_pushdown_expected(PushdownExpected::Some) @@ -291,7 +291,7 @@ async fn single_file_small_data_pages() { // page 5: DLE:RLE RLE:RLE VLE:RLE_DICTIONARY ST:[min: 1970-01-01T00:00:00.000000000, max: 1970-01-01T00:00:00.005330944, num_nulls not defined] CRC:[none] SZ:12601 VC:7739 TestCase::new(&test_parquet_file) .with_name("selective") - // predicagte is chosen carefully to prune pages + // predicate is chosen carefully to prune pages 1, 2, 4, and 5 // time > 1970-01-01T00:00:00.004300000 .with_filter(col("time").gt(lit_timestamp_nano(4300000))) .with_pushdown_expected(PushdownExpected::Some) diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs new file mode 100644 index 000000000000..00ca670e3d23 --- /dev/null +++ b/datafusion/core/tests/parquet/mod.rs @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Parquet integration tests +mod filter_pushdown; +mod page_pruning; +mod row_group_pruning; diff --git a/datafusion/core/tests/parquet_page_index_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs similarity index 100% rename from datafusion/core/tests/parquet_page_index_pruning.rs rename to datafusion/core/tests/parquet/page_pruning.rs diff --git a/datafusion/core/tests/parquet_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs similarity index 100% rename from datafusion/core/tests/parquet_pruning.rs rename to datafusion/core/tests/parquet/row_group_pruning.rs diff --git a/datafusion/core/tests/parquet_exec.rs b/datafusion/core/tests/parquet_exec.rs new file mode 100644 index 000000000000..43ceb615a062 --- /dev/null +++ b/datafusion/core/tests/parquet_exec.rs @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Run all tests that are found in the `parquet` directory +mod parquet; diff --git a/datafusion/core/tests/sql/order.rs b/datafusion/core/tests/sql/order.rs index e6c88e0a16d8..8f08612459c3 100644 --- a/datafusion/core/tests/sql/order.rs +++ b/datafusion/core/tests/sql/order.rs @@ -214,7 +214,7 @@ async fn sort_empty() -> Result<()> { #[tokio::test] async fn sort_with_lots_of_repetition_values() -> Result<()> { let ctx = SessionContext::new(); - let filename = "tests/parquet/repeat_much.snappy.parquet"; + let filename = "tests/parquet/data/repeat_much.snappy.parquet"; ctx.register_parquet("rep", filename, ParquetReadOptions::default()) .await?; diff --git a/datafusion/core/tests/sql/parquet.rs b/datafusion/core/tests/sql/parquet.rs index 2777e8c29455..7a0db41f1984 100644 --- a/datafusion/core/tests/sql/parquet.rs +++ b/datafusion/core/tests/sql/parquet.rs @@ -53,7 +53,7 @@ async fn fixed_size_binary_columns() { let ctx = SessionContext::new(); ctx.register_parquet( "t0", - "tests/parquet/test_binary.parquet", + "tests/parquet/data/test_binary.parquet", ParquetReadOptions::default(), ) .await