Skip to content

Commit

Permalink
Consolidate remaining parquet config options into ConfigOptions
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Oct 20, 2022
1 parent 6d44791 commit 308a442
Show file tree
Hide file tree
Showing 12 changed files with 175 additions and 105 deletions.
5 changes: 3 additions & 2 deletions datafusion-examples/examples/flight_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ impl FlightService for FlightServiceImpl {
) -> Result<Response<SchemaResult>, Status> {
let request = request.into_inner();

let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
let ctx = SessionContext::new();
let format = Arc::new(ParquetFormat::new(ctx.config_options()));
let listing_options = ListingOptions::new(format);
let table_path =
ListingTableUrl::parse(&request.path[0]).map_err(to_tonic_err)?;

let ctx = SessionContext::new();
let schema = listing_options
.infer_schema(&ctx.state(), &table_path)
.await
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/parquet_sql_multiple_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async fn main() -> Result<()> {
let testdata = datafusion::test_util::parquet_test_data();

// Configure listing options
let file_format = ParquetFormat::default().with_enable_pruning(true);
let file_format = ParquetFormat::new(ctx.config_options());
let listing_options = ListingOptions {
file_extension: FileType::PARQUET.get_ext(),
format: Arc::new(file_format),
Expand Down
32 changes: 32 additions & 0 deletions datafusion/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ pub const OPT_PARQUET_REORDER_FILTERS: &str =
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
"datafusion.execution.parquet.enable_page_index";

/// Configuration option "datafusion.execution.parquet.pruning"
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";

/// Configuration option "datafusion.execution.parquet.skip_metadata"
pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata";

/// Configuration option "datafusion.execution.parquet.metadata_size_hint"
pub const OPT_PARQUET_METADATA_SIZE_HINT: &str =
"datafusion.execution.parquet.metadata_size_hint";

/// Configuration option "datafusion.optimizer.skip_failed_rules"
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
"datafusion.optimizer.skip_failed_rules";
Expand Down Expand Up @@ -237,6 +247,28 @@ impl BuiltInConfigs {
to reduce the number of rows decoded.",
false,
),
ConfigDefinition::new_bool(
OPT_PARQUET_ENABLE_PRUNING,
"If true, the parquet reader attempts to skip entire row groups based \
on the predicate in the query.",
true,
),
ConfigDefinition::new_bool(
OPT_PARQUET_SKIP_METADATA,
"If true, the parquet reader skip the optional embedded metadata that may be in \
the file Schema. This setting can help avoid schema conflicts when querying \
multiple parquet files with schemas containing compatible types but different metadata.",
true,
),
ConfigDefinition::new(
OPT_PARQUET_METADATA_SIZE_HINT,
"If specified, the parquet reader will try and fetch the last `size_hint` \
bytes of the parquet file optimistically. If not specified, two read are required: \
One read to fetch the 8-byte parquet footer and \
another to fetch the metadata length encoded in the footer.",
DataType::Boolean,
ScalarValue::Boolean(None),
),
ConfigDefinition::new_bool(
OPT_OPTIMIZER_SKIP_FAILED_RULES,
"When set to true, the logical plan optimizer will produce warning \
Expand Down
Loading

0 comments on commit 308a442

Please sign in to comment.