Skip to content

Commit 2e4452a

Browse files
committed
[doc] typos and clarity
1 parent d69fc9a commit 2e4452a

File tree

10 files changed

+20
-23
lines changed

10 files changed

+20
-23
lines changed

ballista/rust/client/src/context.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ impl BallistaContext {
239239
/// Create a DataFrame from a SQL statement.
240240
///
241241
/// This method is `async` because queries of type `CREATE EXTERNAL TABLE`
242-
/// might require the schema to be infered.
242+
/// might require the schema to be inferred.
243243
pub async fn sql(&self, sql: &str) -> Result<Arc<dyn DataFrame>> {
244244
let mut ctx = {
245245
let state = self.state.lock().unwrap();

ballista/rust/core/src/serde/physical_plan/to_proto.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
261261
.as_ref()
262262
.ok_or_else(|| {
263263
BallistaError::General(
264-
"projection in CsvExec dosn not exist.".to_owned(),
264+
"projection in CsvExec does not exist.".to_owned(),
265265
)
266266
})?
267267
.iter()
@@ -320,7 +320,7 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
320320
.as_ref()
321321
.ok_or_else(|| {
322322
BallistaError::General(
323-
"projection in AvroExec dosn not exist.".to_owned(),
323+
"projection in AvroExec does not exist.".to_owned(),
324324
)
325325
})?
326326
.iter()

datafusion/src/datasource/file_format/mod.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ use std::fmt;
2727
use std::sync::Arc;
2828

2929
use crate::arrow::datatypes::SchemaRef;
30-
use crate::datasource::{create_max_min_accs, get_col_stats};
3130
use crate::error::Result;
3231
use crate::logical_plan::Expr;
3332
use crate::physical_plan::{ExecutionPlan, Statistics};
@@ -44,15 +43,15 @@ pub struct PhysicalPlanConfig {
4443
pub object_store: Arc<dyn ObjectStore>,
4544
/// Schema before projection
4645
pub schema: SchemaRef,
47-
/// Partitioned fields to process in the executor
46+
/// List of files to be processed, grouped into partitions
4847
pub files: Vec<Vec<PartitionedFile>>,
49-
/// Estimated overall statistics of source plan
48+
/// Estimated overall statistics of the plan, taking `filters` into account
5049
pub statistics: Statistics,
5150
/// Columns on which to project the data
5251
pub projection: Option<Vec<usize>>,
5352
/// The maximum number of records per arrow column
5453
pub batch_size: usize,
55-
/// The filters that where pushed down to this execution plan
54+
/// The filters that were pushed down to this execution plan
5655
pub filters: Vec<Expr>,
5756
/// The minimum number of records required from this source plan
5857
pub limit: Option<usize>,

datafusion/src/datasource/file_format/parquet.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ use parquet::file::statistics::Statistics as ParquetStatistics;
3636

3737
use super::FileFormat;
3838
use super::PhysicalPlanConfig;
39-
use super::{create_max_min_accs, get_col_stats};
4039
use crate::arrow::datatypes::{DataType, Field};
4140
use crate::datasource::object_store::{ObjectReader, ObjectReaderStream};
41+
use crate::datasource::{create_max_min_accs, get_col_stats};
4242
use crate::error::DataFusionError;
4343
use crate::error::Result;
4444
use crate::logical_plan::combine_filters;

datafusion/src/datasource/listing.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub struct ListingOptions {
4646
pub file_extension: String,
4747
/// The file format
4848
pub format: Arc<dyn FileFormat>,
49-
/// The expected partition column names.
49+
/// The expected partition column names in the folder structure.
5050
/// For example `Vec["a", "b"]` means that the two first levels of
5151
/// partitioning expected should be named "a" and "b":
5252
/// - If there is a third level of partitioning it will be ignored.
@@ -55,11 +55,11 @@ pub struct ListingOptions {
5555
/// TODO implement case where partitions.len() > 0
5656
pub partitions: Vec<String>,
5757
/// Set true to try to guess statistics from the files.
58-
/// This can add a lot of overhead as it requires files to
59-
/// be opened and partially parsed.
58+
/// This can add a lot of overhead as it will usually require files
59+
/// to be opened and at least partially parsed.
6060
pub collect_stat: bool,
61-
/// Group files to avoid that the number of partitions
62-
/// exceeds this limit
61+
/// Group files to avoid that the number of partitions exceeds
62+
/// this limit
6363
pub target_partitions: usize,
6464
}
6565

@@ -80,8 +80,8 @@ impl ListingOptions {
8080
}
8181
}
8282

83-
/// Infer the schema of the files at the given uri, including the partitioning
84-
/// columns.
83+
/// Infer the schema of the files at the given path on the provided object store.
84+
/// The inferred schema should include the partitioning columns.
8585
///
8686
/// This method will not be called by the table itself but before creating it.
8787
/// This way when creating the logical plan we can decide to resolve the schema

datafusion/src/datasource/mod.rs

-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ use std::pin::Pin;
4040
/// if the optional `limit` is provided, includes only sufficient files
4141
/// needed to read up to `limit` number of rows
4242
/// TODO fix case where `num_rows` and `total_byte_size` are not defined (stat should be None instead of Some(0))
43-
/// TODO move back to crate::datasource::mod.rs once legacy cleaned up
4443
pub async fn get_statistics_with_limit(
4544
all_files: impl Stream<Item = Result<(PartitionedFile, Statistics)>>,
4645
schema: SchemaRef,
@@ -126,7 +125,6 @@ pub async fn get_statistics_with_limit(
126125
#[derive(Debug, Clone)]
127126
/// A single file that should be read, along with its schema, statistics
128127
/// and partition column values that need to be appended to each row.
129-
/// TODO move back to crate::datasource::mod.rs once legacy cleaned up
130128
pub struct PartitionedFile {
131129
/// Path for the file (e.g. URL, filesystem path, etc)
132130
pub file_meta: FileMeta,
@@ -159,7 +157,6 @@ impl std::fmt::Display for PartitionedFile {
159157

160158
#[derive(Debug, Clone)]
161159
/// A collection of files that should be read in a single task
162-
/// TODO move back to crate::datasource::mod.rs once legacy cleaned up
163160
pub struct FilePartition {
164161
/// The index of the partition among all partitions
165162
pub index: usize,

datafusion/src/datasource/object_store/local.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,13 @@ async fn list_all(prefix: String) -> Result<FileMetaStream> {
152152
}
153153
}
154154

155-
/// Create a stream of `ObjectReader` by opening each file in the `files` vector
155+
/// Create a stream of `ObjectReader` by converting each file in the `files` vector
156+
/// into instances of `LocalFileReader`
156157
pub fn local_object_reader_stream(files: Vec<String>) -> ObjectReaderStream {
157158
Box::pin(futures::stream::iter(files).map(|f| Ok(local_object_reader(f))))
158159
}
159160

160-
/// Helper method to convert a file location to an ObjectReader
161+
/// Helper method to convert a file location to a `LocalFileReader`
161162
pub fn local_object_reader(file: String) -> Arc<dyn ObjectReader> {
162163
LocalFileSystem
163164
.file_reader(local_file_meta(file).sized_file)

datafusion/src/datasource/object_store/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ pub enum ListEntry {
7373
#[derive(Debug, Clone)]
7474
pub struct SizedFile {
7575
/// Path of the file. It is relative to the current object
76-
/// store (it does not specify the xx:// scheme).
76+
/// store (it does not specify the `xx://` scheme).
7777
pub path: String,
7878
/// File size in total
7979
pub size: u64,

datafusion/src/execution/context.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ impl ExecutionContext {
183183
/// Creates a dataframe that will execute a SQL query.
184184
///
185185
/// This method is `async` because queries of type `CREATE EXTERNAL TABLE`
186-
/// might require the schema to be infered.
186+
/// might require the schema to be inferred.
187187
pub async fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>> {
188188
let plan = self.create_logical_plan(sql)?;
189189
match plan {

datafusion/src/physical_plan/expressions/binary.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ pub fn binary_operator_data_type(
484484
rhs_type: &DataType,
485485
) -> Result<DataType> {
486486
// validate that it is possible to perform the operation on incoming types.
487-
// (or the return datatype cannot be infered)
487+
// (or the return datatype cannot be inferred)
488488
let common_type = common_binary_type(lhs_type, op, rhs_type)?;
489489

490490
match op {

0 commit comments

Comments
 (0)