Skip to content

Commit

Permalink
Normalize datafusion configuration names (#4596)
Browse files Browse the repository at this point in the history
Co-authored-by: yangzhong <[email protected]>
  • Loading branch information
yahoNanJing and kyotoYaho authored Dec 14, 2022
1 parent f8a3d58 commit a5cf577
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 38 deletions.
88 changes: 52 additions & 36 deletions datafusion/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,37 +27,25 @@ use std::env;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;

/// Configuration option "datafusion.execution.target_partitions"
pub const OPT_TARGET_PARTITIONS: &str = "datafusion.execution.target_partitions";

/*-************************************
* Catalog related
**************************************/
/// Configuration option "datafusion.catalog.create_default_catalog_and_schema"
pub const OPT_CREATE_DEFAULT_CATALOG_AND_SCHEMA: &str =
"datafusion.catalog.create_default_catalog_and_schema";

/// Configuration option "datafusion.catalog.information_schema"
pub const OPT_INFORMATION_SCHEMA: &str = "datafusion.catalog.information_schema";

/// Configuration option "datafusion.optimizer.repartition_joins"
pub const OPT_REPARTITION_JOINS: &str = "datafusion.optimizer.repartition_joins";

/// Configuration option "datafusion.optimizer.repartition_aggregations"
pub const OPT_REPARTITION_AGGREGATIONS: &str =
"datafusion.optimizer.repartition_aggregations";

/// Configuration option "datafusion.optimizer.repartition_windows"
pub const OPT_REPARTITION_WINDOWS: &str = "datafusion.optimizer.repartition_windows";

/// Configuration option "datafusion.execuction_collect_statistics"
pub const OPT_COLLECT_STATISTICS: &str = "datafusion.execuction_collect_statistics";

/// Configuration option "datafusion.optimizer.filter_null_join_keys"
pub const OPT_FILTER_NULL_JOIN_KEYS: &str = "datafusion.optimizer.filter_null_join_keys";

/// Configuration option "datafusion.explain.logical_plan_only"
pub const OPT_EXPLAIN_LOGICAL_PLAN_ONLY: &str = "datafusion.explain.logical_plan_only";
/// Location scanned to load tables for `default` schema
pub const OPT_CATALOG_LOCATION: &str = "datafusion.catalog.location";

/// Configuration option "datafusion.explain.physical_plan_only"
pub const OPT_EXPLAIN_PHYSICAL_PLAN_ONLY: &str = "datafusion.explain.physical_plan_only";
/// Type of `TableProvider` to use when loading `default` schema
pub const OPT_CATALOG_TYPE: &str = "datafusion.catalog.type";

/*-************************************
* Execution related
**************************************/
/// Configuration option "datafusion.execution.batch_size"
pub const OPT_BATCH_SIZE: &str = "datafusion.execution.batch_size";

Expand All @@ -68,9 +56,25 @@ pub const OPT_COALESCE_BATCHES: &str = "datafusion.execution.coalesce_batches";
pub const OPT_COALESCE_TARGET_BATCH_SIZE: &str =
"datafusion.execution.coalesce_target_batch_size";

/// Configuration option "datafusion.execution.collect_statistics"
pub const OPT_COLLECT_STATISTICS: &str = "datafusion.execution.collect_statistics";

/// Configuration option "datafusion.execution.target_partitions"
pub const OPT_TARGET_PARTITIONS: &str = "datafusion.execution.target_partitions";

/// Configuration option "datafusion.execution.time_zone"
pub const OPT_TIME_ZONE: &str = "datafusion.execution.time_zone";

/*-************************************
* Execution parquet related
**************************************/
/// Configuration option "datafusion.execution.parquet.enable_page_index"
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
"datafusion.execution.parquet.enable_page_index";

/// Configuration option "datafusion.execution.parquet.pruning"
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";

/// Configuration option "datafusion.execution.parquet.pushdown_filters"
pub const OPT_PARQUET_PUSHDOWN_FILTERS: &str =
"datafusion.execution.parquet.pushdown_filters";
Expand All @@ -79,33 +83,45 @@ pub const OPT_PARQUET_PUSHDOWN_FILTERS: &str =
pub const OPT_PARQUET_REORDER_FILTERS: &str =
"datafusion.execution.parquet.reorder_filters";

/// Configuration option "datafusion.execution.parquet.enable_page_index"
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
"datafusion.execution.parquet.enable_page_index";

/// Configuration option "datafusion.execution.parquet.pruning"
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";

/// Configuration option "datafusion.execution.parquet.skip_metadata"
pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata";

/// Configuration option "datafusion.execution.parquet.metadata_size_hint"
pub const OPT_PARQUET_METADATA_SIZE_HINT: &str =
"datafusion.execution.parquet.metadata_size_hint";

/*-************************************
* Explain related
**************************************/
/// Configuration option "datafusion.explain.logical_plan_only"
pub const OPT_EXPLAIN_LOGICAL_PLAN_ONLY: &str = "datafusion.explain.logical_plan_only";

/// Configuration option "datafusion.explain.physical_plan_only"
pub const OPT_EXPLAIN_PHYSICAL_PLAN_ONLY: &str = "datafusion.explain.physical_plan_only";

/*-************************************
* Optimizer related
**************************************/
/// Configuration option "datafusion.optimizer.filter_null_join_keys"
pub const OPT_FILTER_NULL_JOIN_KEYS: &str = "datafusion.optimizer.filter_null_join_keys";

/// Configuration option "datafusion.optimizer.repartition_aggregations"
pub const OPT_REPARTITION_AGGREGATIONS: &str =
"datafusion.optimizer.repartition_aggregations";

/// Configuration option "datafusion.optimizer.repartition_joins"
pub const OPT_REPARTITION_JOINS: &str = "datafusion.optimizer.repartition_joins";

/// Configuration option "datafusion.optimizer.repartition_windows"
pub const OPT_REPARTITION_WINDOWS: &str = "datafusion.optimizer.repartition_windows";

/// Configuration option "datafusion.optimizer.skip_failed_rules"
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
"datafusion.optimizer.skip_failed_rules";

/// Configuration option "datafusion.optimizer.max_passes"
pub const OPT_OPTIMIZER_MAX_PASSES: &str = "datafusion.optimizer.max_passes";

/// Location scanned to load tables for `default` schema
pub const OPT_CATALOG_LOCATION: &str = "datafusion.catalog.location";

/// Type of `TableProvider` to use when loading `default` schema
pub const OPT_CATALOG_TYPE: &str = "datafusion.catalog.type";

/// Configuration option "datafusion.optimizer.top_down_join_key_reordering"
pub const OPT_TOP_DOWN_JOIN_KEY_REORDERING: &str =
"datafusion.optimizer.top_down_join_key_reordering";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ datafusion.catalog.create_default_catalog_and_schema true
datafusion.catalog.information_schema true
datafusion.catalog.location NULL
datafusion.catalog.type NULL
datafusion.execuction_collect_statistics false
datafusion.execution.batch_size 8192
datafusion.execution.coalesce_batches true
datafusion.execution.coalesce_target_batch_size 4096
datafusion.execution.collect_statistics false
datafusion.execution.parquet.enable_page_index false
datafusion.execution.parquet.metadata_size_hint NULL
datafusion.execution.parquet.pruning true
Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.catalog.information_schema | Boolean | false | Should DataFusion provide access to `information_schema` virtual tables for displaying schema information |
| datafusion.catalog.location | Utf8 | NULL | Location scanned to load tables for `default` schema, defaults to None |
| datafusion.catalog.type | Utf8 | NULL | Type of `TableProvider` to use when loading `default` schema. Defaults to None |
| datafusion.execuction_collect_statistics | Boolean | false | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level |
| datafusion.execution.batch_size | UInt64 | 8192 | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would results in too much metadata memory consumption. |
| datafusion.execution.coalesce_batches | Boolean | true | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting 'datafusion.execution.coalesce_target_batch_size'. |
| datafusion.execution.coalesce_target_batch_size | UInt64 | 4096 | Target batch size when coalescing batches. Uses in conjunction with the configuration setting 'datafusion.execution.coalesce_batches'. |
| datafusion.execution.collect_statistics | Boolean | false | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level |
| datafusion.execution.parquet.enable_page_index | Boolean | false | If true, uses parquet data page level metadata (Page Index) statistics to reduce the number of rows decoded. |
| datafusion.execution.parquet.metadata_size_hint | UInt64 | NULL | If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two read are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer. |
| datafusion.execution.parquet.pruning | Boolean | true | If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file. |
Expand Down

0 comments on commit a5cf577

Please sign in to comment.