apache · alamb · Dec 5, 2022 · Dec 4, 2022 · Dec 4, 2022 · Dec 4, 2022
diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md
@@ -23,7 +23,21 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/s
 
 #### Running tests
 
-`cargo test -p datafusion --test sqllogictests`
+```shell
+cargo test -p datafusion --test sqllogictests
+```
+
+Run tests with debug logging enabled:
+
+```shell
+RUST_LOG=debug cargo test -p datafusion --test sqllogictests
+```
+
+Run only the tests in `information_schema.slt`:
+
+```shell
+cargo test -p datafusion --test sqllogictests -- information_schema.slt
+```
 
 #### sqllogictests
 

diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -19,7 +19,8 @@ use async_trait::async_trait;
 use datafusion::arrow::csv::WriterBuilder;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::prelude::{SessionConfig, SessionContext};
-use std::path::Path;
+use log::info;
+use std::path::{Path, PathBuf};
 use std::time::Duration;
 
 use sqllogictest::TestError;
@@ -70,56 +71,96 @@ pub async fn main() -> Result<()> {
 #[tokio::main]
 #[cfg(not(target_family = "windows"))]
 pub async fn main() -> Result<()> {
-    let paths = std::fs::read_dir(TEST_DIRECTORY).unwrap();
+    // Enable logging (e.g. set RUST_LOG=debug to see debug logs)
+    env_logger::init();
 
-    // run each file using its own new SessionContext
+    // run each file using its own new DB
     //
     // Note: can't use tester.run_parallel_async()
     // as that will reuse the same SessionContext
     //
     // We could run these tests in parallel eventually if we wanted.
 
-    for path in paths {
-        // TODO better error handling
-        let path = path.unwrap().path();
+    let files = get_test_files();
+    info!("Running test files {:?}", files);
 
-        run_file(&path).await?;
+    for path in files {
+        println!("Running: {}", path.display());
+
+        let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
+
+        let ctx = context_for_test_file(&file_name).await;
+
+        let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
+        tester.run_file_async(path).await?;
     }
 
     Ok(())
 }
 
-/// Run the tests in the specified `.slt` file
-async fn run_file(path: &Path) -> Result<()> {
-    println!("Running: {}", path.display());
-
-    let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
-
-    let ctx = context_for_test_file(&file_name).await;
+/// Gets a list of test files to execute. If there were arguments
+/// passed to the program treat it as a cargo test filter (substring match on filenames)
+fn get_test_files() -> Vec<PathBuf> {
+    info!("Test directory: {}", TEST_DIRECTORY);
+
+    let args: Vec<_> = std::env::args().collect();
+
+    // treat args after the first as filters to run (substring matching)
+    let filters = if !args.is_empty() {
+        args.iter()
+            .skip(1)
+            .map(|arg| arg.as_str())
+            .collect::<Vec<_>>()
+    } else {
+        vec![]
+    };
+
+    // default to all files in test directory filtering based on name
+    std::fs::read_dir(TEST_DIRECTORY)
+        .unwrap()
+        .map(|path| path.unwrap().path())
+        .filter(|path| check_test_file(&filters, path.as_path()))
+        .collect()
+}
 
-    let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
-    tester.run_file_async(path).await?;
+/// because this test can be run as a cargo test, commands like
+///
+/// ```shell
+/// cargo test foo
+/// ```
+///
+/// Will end up passing `foo` as a command line argument.
+///
+/// be compatible with this, treat the command line arguments as a
+/// filter and that does a substring match on each input.
+/// returns true f this path should be run
+fn check_test_file(filters: &[&str], path: &Path) -> bool {
+    if filters.is_empty() {
+        return true;
+    }
 
-    Ok(())
+    // otherwise check if any filter matches
+    let path_str = path.to_string_lossy();
+    filters.iter().any(|filter| path_str.contains(filter))
 }
 
 /// Create a SessionContext, configured for the specific test
 async fn context_for_test_file(file_name: &str) -> SessionContext {
     match file_name {
         "aggregate.slt" => {
-            println!("Registering aggregate tables");
+            info!("Registering aggregate tables");
             let ctx = SessionContext::new();
             setup::register_aggregate_tables(&ctx).await;
             ctx
         }
         "information_schema.slt" => {
-            println!("Enabling information schema");
+            info!("Enabling information schema");
             SessionContext::with_config(
                 SessionConfig::new().with_information_schema(true),
             )
         }
         _ => {
-            println!("Using default SessionContex");
+            info!("Using default SessionContex");
             SessionContext::new()
         }
     }