Skip to content

Commit

Permalink
Add support for show_col_types for edition 1 parser
Browse files Browse the repository at this point in the history
`read_table()` is one of the functions that currently doesn't have a
vroom equivalent, so is still using the first edition parser. When we
added support for `show_col_types` we didn't port that back to the first
edition parser, so it was missing in `read_table()` and the
`read_delim_chunked()` functions.

Fixes #1331
  • Loading branch information
jimhester committed Nov 22, 2021
1 parent 0cf3379 commit 57f30bb
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 45 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# readr (development version)

* `read_table()` and edition 1 parsers gain support for `show_col_types()` (#1331)
* Fix buffer overflow when trying to parse an integer from a field that is over 64 characters long (#1326)

# readr 2.1.0
Expand Down
26 changes: 20 additions & 6 deletions R/read_delim.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ read_delim <- function(file, delim = NULL, quote = '"',
return(read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
))
}
if (!missing(quoted_na)) {
Expand Down Expand Up @@ -230,7 +231,8 @@ read_csv <- function(file,
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
)
}
Expand Down Expand Up @@ -300,7 +302,8 @@ read_csv2 <- function(file,
return(read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
))
}
vroom::vroom(file,
Expand Down Expand Up @@ -349,7 +352,8 @@ read_tsv <- function(file, col_names = TRUE, col_types = NULL,
return(read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
))
}

Expand Down Expand Up @@ -386,9 +390,17 @@ read_tokens <- function(data, tokenizer, col_specs, col_names, locale_, n_max, p
read_tokens_(data, tokenizer, col_specs, col_names, locale_, n_max, progress)
}

should_show_col_types <- function(has_col_types, show_col_types) {
if (is.null(show_col_types)) {
return(isTRUE(!has_col_types))
}
isTRUE(show_col_types)
}

read_delimited <- function(file, tokenizer, col_names = TRUE, col_types = NULL,
locale = default_locale(), skip = 0, skip_empty_rows = TRUE, skip_quote = TRUE,
comment = "", n_max = Inf, guess_max = min(1000, n_max), progress = show_progress()) {
comment = "", n_max = Inf, guess_max = min(1000, n_max), progress = show_progress(),
show_col_types = should_show_col_types()) {
name <- source_name(file)
# If connection needed, read once.
file <- standardise_path(file)
Expand Down Expand Up @@ -420,7 +432,9 @@ read_delimited <- function(file, tokenizer, col_names = TRUE, col_types = NULL,

ds <- datasource(data, skip = spec$skip, skip_empty_rows = skip_empty_rows, comment = comment, skip_quote = skip_quote)

if (is.null(col_types) && !inherits(ds, "source_string") && !is_testing()) {
has_col_types <- !is.null(col_types)

if (((is.null(show_col_types) && !has_col_types) || isTRUE(show_col_types)) && !inherits(ds, "source_string") && !is_testing()) {
show_cols_spec(spec)
}

Expand Down
25 changes: 18 additions & 7 deletions R/read_delim_chunked.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ read_delim_chunked <- function(file, callback, delim = NULL, chunk_size = 10000,
comment = "", trim_ws = FALSE,
skip = 0, guess_max = chunk_size,
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_delim(delim,
quote = quote,
Expand All @@ -79,7 +80,8 @@ read_delim_chunked <- function(file, callback, delim = NULL, chunk_size = 10000,
callback = callback, chunk_size = chunk_size, tokenizer = tokenizer,
col_names = col_names, col_types = col_types, locale = locale, skip = skip,
skip_empty_rows = skip_empty_rows, comment = comment, guess_max = guess_max,
progress = progress
progress = progress,
show_col_types = show_col_types
)
}

Expand All @@ -89,7 +91,9 @@ read_csv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, guess_max = chunk_size,
progress = show_progress(), skip_empty_rows = TRUE) {
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_csv(
na = na, quoted_na = quoted_na, quote = quote,
comment = comment, trim_ws = trim_ws, skip_empty_rows = skip_empty_rows
Expand All @@ -98,7 +102,8 @@ read_csv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
callback = callback, chunk_size = chunk_size,
tokenizer = tokenizer, col_names = col_names, col_types = col_types, locale = locale,
skip = skip, skip_empty_rows = skip_empty_rows, comment = comment,
guess_max = guess_max, progress = progress
guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}

Expand All @@ -108,7 +113,9 @@ read_csv2_chunked <- function(file, callback, chunk_size = 10000, col_names = TR
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, guess_max = chunk_size,
progress = show_progress(), skip_empty_rows = TRUE) {
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_delim(
delim = ";", na = na, quoted_na = quoted_na,
quote = quote, comment = comment, trim_ws = trim_ws,
Expand All @@ -118,7 +125,8 @@ read_csv2_chunked <- function(file, callback, chunk_size = 10000, col_names = TR
callback = callback, chunk_size = chunk_size,
tokenizer = tokenizer, col_names = col_names, col_types = col_types, locale = locale,
skip = skip, skip_empty_rows = skip_empty_rows, comment = comment,
guess_max = guess_max, progress = progress
guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}

Expand All @@ -128,7 +136,9 @@ read_tsv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, guess_max = chunk_size,
progress = show_progress(), skip_empty_rows = TRUE) {
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_tsv(
na = na, quoted_na = quoted_na, quote = quote,
comment = comment, trim_ws = trim_ws, skip_empty_rows = skip_empty_rows
Expand All @@ -137,6 +147,7 @@ read_tsv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
callback = callback, chunk_size = chunk_size,
tokenizer = tokenizer, col_names = col_names, col_types = col_types, locale = locale,
skip = skip, skip_empty_rows = skip_empty_rows, comment = comment,
guess_max = guess_max, progress = progress
guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}
7 changes: 5 additions & 2 deletions R/read_log.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
#' read_log(readr_example("example.log"))
read_log <- function(file, col_names = FALSE, col_types = NULL,
trim_ws = TRUE,
skip = 0, n_max = Inf, progress = show_progress()) {
skip = 0, n_max = Inf,
show_col_types = should_show_types(),
progress = show_progress()) {
tokenizer <- tokenizer_log(trim_ws = trim_ws)
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
skip = skip, n_max = n_max, progress = progress
skip = skip, n_max = n_max, progress = progress,
show_col_types = show_col_types
)
}
4 changes: 3 additions & 1 deletion R/read_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ read_table <- function(file, col_names = TRUE, col_types = NULL,
locale = default_locale(), na = "NA", skip = 0,
n_max = Inf, guess_max = min(n_max, 1000),
progress = show_progress(), comment = "",
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_ws(
na = na, comment = comment,
Expand All @@ -42,7 +43,8 @@ read_table <- function(file, col_names = TRUE, col_types = NULL,
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
skip_quote = FALSE, comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
skip_quote = FALSE, comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}

Expand Down
31 changes: 2 additions & 29 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


#include "cpp11/declarations.hpp"
#include <R_ext/Visibility.h>

// CollectorGuess.cpp
std::string collectorGuess(const cpp11::strings& input, const cpp11::list& locale_, bool guessInteger);
Expand Down Expand Up @@ -197,34 +198,6 @@ extern "C" SEXP _readr_write_file_raw_(SEXP x, SEXP connection) {
}

extern "C" {
/* .Call calls */
extern SEXP _readr_collectorGuess(SEXP, SEXP, SEXP);
extern SEXP _readr_count_fields_(SEXP, SEXP, SEXP);
extern SEXP _readr_dim_tokens_(SEXP, SEXP);
extern SEXP _readr_guess_header_(SEXP, SEXP, SEXP);
extern SEXP _readr_guess_types_(SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_melt_tokens_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_melt_tokens_chunked_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_parse_vector_(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_read_connection_(SEXP, SEXP, SEXP);
extern SEXP _readr_read_file_(SEXP, SEXP);
extern SEXP _readr_read_file_raw_(SEXP);
extern SEXP _readr_read_lines_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_read_lines_chunked_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_read_lines_raw_(SEXP, SEXP, SEXP);
extern SEXP _readr_read_lines_raw_chunked_(SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_read_tokens_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_read_tokens_chunked_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_stream_delim_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_tokenize_(SEXP, SEXP, SEXP);
extern SEXP _readr_type_convert_col(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_utctime_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_whitespaceColumns(SEXP, SEXP, SEXP);
extern SEXP _readr_write_file_(SEXP, SEXP);
extern SEXP _readr_write_file_raw_(SEXP, SEXP);
extern SEXP _readr_write_lines_(SEXP, SEXP, SEXP, SEXP);
extern SEXP _readr_write_lines_raw_(SEXP, SEXP, SEXP);

static const R_CallMethodDef CallEntries[] = {
{"_readr_collectorGuess", (DL_FUNC) &_readr_collectorGuess, 3},
{"_readr_count_fields_", (DL_FUNC) &_readr_count_fields_, 3},
Expand Down Expand Up @@ -256,7 +229,7 @@ static const R_CallMethodDef CallEntries[] = {
};
}

extern "C" void R_init_readr(DllInfo* dll){
extern "C" attribute_visible void R_init_readr(DllInfo* dll){
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);
R_forceSymbols(dll, TRUE);
Expand Down

0 comments on commit 57f30bb

Please sign in to comment.