Skip to content

Commit

Permalink
Add num_threads argument to functions
Browse files Browse the repository at this point in the history
Fixes #1201
  • Loading branch information
jimhester committed Apr 30, 2021
1 parent 95f4fc3 commit 77885d5
Show file tree
Hide file tree
Showing 14 changed files with 130 additions and 24 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ export(read_table2)
export(read_tsv)
export(read_tsv_chunked)
export(readr_example)
export(readr_threads)
export(should_show_types)
export(show_progress)
export(spec)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

* `write_file()` now forces its argument before opening the output file (#1158)

* All `read_*()` and `write_*()` functions gain a `num_threads` argument to control the number of processing threads they use (#1201)

## Additional features and fixes

* `read_*()` functions gain a `show_col_types` argument, if set to `FALSE` this turns off showing the column types unconditionally.
Expand Down
13 changes: 9 additions & 4 deletions R/lines.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ read_lines <- function(file, skip = 0, skip_empty_rows = FALSE, n_max = Inf,
locale = default_locale(),
na = character(),
lazy = TRUE,
num_threads = readr_threads(),
progress = show_progress()) {
if (edition_first()) {
if (is.infinite(n_max)) {
Expand All @@ -58,13 +59,15 @@ read_lines <- function(file, skip = 0, skip_empty_rows = FALSE, n_max = Inf,
lifecycle::deprecate_soft("2.0.0", "readr::read_lines(skip_empty_rows = )")
}

vroom::vroom_lines(file, skip = skip, locale = locale, n_max = n_max, progress = progress, altrep = lazy, na = na)
vroom::vroom_lines(file, skip = skip, locale = locale, n_max = n_max, progress = progress, altrep = lazy, na = na, num_threads = num_threads)
}

#' @export
#' @rdname read_lines
read_lines_raw <- function(file, skip = 0,
n_max = -1L, progress = show_progress()) {
n_max = -1L,
num_threads = readr_threads(),
progress = show_progress()) {
if (empty_file(file)) {
return(list())
}
Expand All @@ -77,7 +80,9 @@ read_lines_raw <- function(file, skip = 0,
#' @return `write_lines()` returns `x`, invisibly.
#' @export
#' @rdname read_lines
write_lines <- function(x, file, sep = "\n", na = "NA", append = FALSE, path = deprecated()) {
write_lines <- function(x, file, sep = "\n", na = "NA", append = FALSE,
num_threads = readr_threads(),
path = deprecated()) {
is_raw <- is.list(x) && inherits(x[[1]], "raw")

if (is_raw || edition_first()) {
Expand Down Expand Up @@ -105,7 +110,7 @@ write_lines <- function(x, file, sep = "\n", na = "NA", append = FALSE, path = d
return(invisible(x))
}

vroom::vroom_write_lines(as.character(x), file, eol = sep, na = na, append = append)
vroom::vroom_write_lines(as.character(x), file, eol = sep, na = na, append = append, num_threads = num_threads)

invisible(x)
}
16 changes: 11 additions & 5 deletions R/read_delim.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ NULL
#' setting option `readr.show_progress` to `FALSE`.
#' @param lazy Read values lazily? By default the file is initially only
#' indexed. The actual values are read lazily on-demand when accessed.
#' @param num_threads The number of processing threads to use for initial
#' parsing and lazy reading of data.
#' @return A [tibble()]. If there are parsing problems, a warning tells you
#' how many, and you can retrieve the details with [problems()].
#' @export
Expand Down Expand Up @@ -115,6 +117,7 @@ read_delim <- function(file, delim = NULL, quote = '"',
na = c("", "NA"), quoted_na = TRUE,
comment = "", trim_ws = FALSE,
skip = 0, n_max = Inf, guess_max = min(1000, n_max),
num_threads = readr_threads(),
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE, lazy = TRUE) {
Expand Down Expand Up @@ -142,7 +145,7 @@ read_delim <- function(file, delim = NULL, quote = '"',
vroom::vroom(file, delim = delim, col_names = col_names, col_types = col_types,
skip = skip, n_max = n_max, na = na, quote = quote, comment = comment, trim_ws = trim_ws,
escape_double = escape_double, escape_backslash = escape_backslash, locale = locale, guess_max = guess_max,
progress = progress, altrep = lazy, show_col_types = show_col_types)
progress = progress, altrep = lazy, show_col_types = show_col_types, num_threads = num_threads)
}

#' @rdname read_delim
Expand All @@ -151,6 +154,7 @@ read_csv <- function(file, col_names = TRUE, col_types = NULL,
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, n_max = Inf, guess_max = min(1000, n_max),
num_threads = readr_threads(),
progress = show_progress(), show_col_types = should_show_types(), skip_empty_rows = TRUE, lazy = TRUE) {
if (edition_first()) {
tokenizer <- tokenizer_csv(na = na, quoted_na = quoted_na, quote = quote,
Expand All @@ -170,7 +174,7 @@ read_csv <- function(file, col_names = TRUE, col_types = NULL,
skip = skip, n_max = n_max, na = na, quote = quote, comment = comment, trim_ws = trim_ws,
escape_double = TRUE, escape_backslash = FALSE, locale = locale, guess_max = guess_max,
show_col_types = show_col_types,
progress = progress, altrep = lazy)
progress = progress, altrep = lazy, num_threads = num_threads)
}

#' @rdname read_delim
Expand All @@ -180,6 +184,7 @@ read_csv2 <- function(file, col_names = TRUE, col_types = NULL,
na = c("", "NA"), quoted_na = TRUE, quote = "\"",
comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
guess_max = min(1000, n_max), progress = show_progress(),
num_threads = readr_threads(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE, lazy = TRUE) {

Expand All @@ -200,7 +205,7 @@ read_csv2 <- function(file, col_names = TRUE, col_types = NULL,
skip = skip, n_max = n_max, na = na, quote = quote, comment = comment, trim_ws = trim_ws,
escape_double = TRUE, escape_backslash = FALSE, locale = locale, guess_max = guess_max,
show_col_types = show_col_types,
progress = progress, altrep = lazy)
progress = progress, altrep = lazy, num_threads = num_threads)
}

#' @rdname read_delim
Expand All @@ -210,7 +215,8 @@ read_tsv <- function(file, col_names = TRUE, col_types = NULL,
na = c("", "NA"), quoted_na = TRUE, quote = "\"",
comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
guess_max = min(1000, n_max), progress = show_progress(),
show_col_types = should_show_types(),
num_threads = readr_threads(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE, lazy = TRUE) {
tokenizer <- tokenizer_tsv(na = na, quoted_na = quoted_na, quote = quote,
comment = comment, trim_ws = trim_ws, skip_empty_rows = skip_empty_rows)
Expand All @@ -223,7 +229,7 @@ read_tsv <- function(file, col_names = TRUE, col_types = NULL,
vroom::vroom(file, delim = "\t", col_names = col_names,
col_types = col_types, locale = locale, skip = skip, comment = comment,
n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types, altrep = lazy)
show_col_types = show_col_types, altrep = lazy, num_threads = num_threads)
}

# Helper functions for reading from delimited files ----------------------------
Expand Down
5 changes: 3 additions & 2 deletions R/read_fwf.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ read_fwf <- function(file, col_positions = fwf_empty(file, skip, n = guess_max),
locale = default_locale(), na = c("", "NA"),
comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
guess_max = min(n_max, 1000), progress = show_progress(),
show_col_types = should_show_types(),
num_threads = readr_threads(),
show_col_types = should_show_types(),
lazy = TRUE, skip_empty_rows = TRUE) {
if (edition_first()) {
ds <- datasource(file, skip = skip, skip_empty_rows = skip_empty_rows)
Expand Down Expand Up @@ -78,7 +79,7 @@ read_fwf <- function(file, col_positions = fwf_empty(file, skip, n = guess_max),
vroom::vroom_fwf(file, col_positions = col_positions, col_types = col_types,
locale = locale, na = na, comment = comment, trim_ws = trim_ws, skip = skip,
n_max = n_max, guess_max = guess_max, show_col_types = show_col_types,
progress = progress, altrep = lazy)
progress = progress, altrep = lazy, num_threads = num_threads)
}

#' @rdname read_fwf
Expand Down
22 changes: 22 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,28 @@ is_integerish <- function(x) {
floor(x) == x
}

#' Determine how many threads readr should use when processing
#'
#' The number of threads returned can be set by
#' - The global option `readr.num_threads`
#' - The environment variable `VROOM_THREADS`
#' - The value of [parallel::detectCores()]
#' @export
readr_threads <- function() {
res <- getOption("readr.num_threads")

if (is.null(res)) {
res <- as.integer(Sys.getenv("VROOM_THREADS", parallel::detectCores()))
options("readr.num_threads" = res)
}

if (is.na(res) || res <= 0) {
res <- 1
}

res
}

#' @export
`[.spec_tbl_df` <- function(x, ...) {
attr(x, "spec") <- NULL
Expand Down
37 changes: 25 additions & 12 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#' Unix style newlines, or `"\r\n"` for Windows style newlines.
#' @param path \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}
#' @return `write_*()` returns the input `x` invisibly.
#' @inheritParams read_delim
#' @references Florian Loitsch, Printing Floating-Point Numbers Quickly and
#' Accurately with Integers, PLDI '10,
#' <http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf>
Expand All @@ -67,7 +68,9 @@
#'
#' \dontshow{setwd(.old_wd)}
write_delim <- function(x, file, delim = " ", na = "NA", append = FALSE,
col_names = !append, quote_escape = "double", eol = "\n", path = deprecated()) {
col_names = !append, quote_escape = "double", eol = "\n",
num_threads = readr_threads(),
path = deprecated()) {
if (is_present(path)) {
deprecate_warn("1.4.0", "write_delim(path = )", "write_delim(file = )")
file <- path
Expand All @@ -84,27 +87,31 @@ write_delim <- function(x, file, delim = " ", na = "NA", append = FALSE,
return(invisible(x_out))
}
vroom::vroom_write(x, file, delim = delim, col_names = col_names, append = append,
na = na, eol = eol, escape = quote_escape)
na = na, eol = eol, escape = quote_escape, num_threads = num_threads)

invisible(x_out)
}

#' @rdname write_delim
#' @export
write_csv <- function(x, file, na = "NA", append = FALSE, col_names = !append,
quote_escape = "double", eol = "\n", path = deprecated()) {
quote_escape = "double", eol = "\n",
num_threads = readr_threads(),
path = deprecated()) {
if (is_present(path)) {
deprecate_warn("1.4.0", "write_csv(path = )", "write_csv(file = )")
file <- path
}
write_delim(x, file, delim = ",", na = na, append = append,
col_names = col_names, quote_escape = quote_escape, eol = eol)
col_names = col_names, quote_escape = quote_escape, eol = eol, num_threads = num_threads)
}

#' @rdname write_delim
#' @export
write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
quote_escape = "double", eol = "\n", path = deprecated()) {
quote_escape = "double", eol = "\n",
num_threads = readr_threads(),
path = deprecated()) {
if (is_present(path)) {
deprecate_warn("1.4.0", "write_csv2(path = )", "write_csv2(file = )")
file <- path
Expand All @@ -113,7 +120,7 @@ write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
x_out <- x
x <- change_decimal_separator(x, decimal_mark = ",")
write_delim(x, file, delim = ";", na = na, append = append,
col_names = col_names, quote_escape = quote_escape, eol = eol)
col_names = col_names, quote_escape = quote_escape, eol = eol, num_threads = num_threads)

invisible(x_out)
}
Expand All @@ -122,7 +129,9 @@ write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
#' @export
write_excel_csv <- function(x, file, na = "NA", append = FALSE,
col_names = !append, delim = ",", quote_escape = "double",
eol = "\n", path = deprecated()) {
eol = "\n",
num_threads = readr_threads(),
path = deprecated()) {
if (is_present(path)) {
deprecate_warn("1.4.0", "write_excel_csv(path = )", "write_excel_csv(file = )")
file <- path
Expand All @@ -143,7 +152,7 @@ write_excel_csv <- function(x, file, na = "NA", append = FALSE,
return(invisible(x_out))
}
vroom::vroom_write(x, file, delim, col_names = col_names, append = append,
na = na, bom = !append, eol = eol
na = na, bom = !append, eol = eol, num_threads = num_threads
)

invisible(x_out)
Expand All @@ -153,7 +162,9 @@ write_excel_csv <- function(x, file, na = "NA", append = FALSE,
#' @export
write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,
col_names = !append, delim = ";", quote_escape = "double",
eol = "\n", path = deprecated()) {
eol = "\n",
num_threads = readr_threads(),
path = deprecated()) {
if (is_present(path)) {
deprecate_warn("1.4.0", "write_excel_csv2(path = )", "write_excel_csv2(file = )")
file <- path
Expand All @@ -170,7 +181,7 @@ write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,

x[] <- lapply(x, output_column)
write_excel_csv(x, file, na, append, col_names, delim, quote_escape = quote_escape,
eol = eol
eol = eol, num_threads = num_threads
)

invisible(x_out)
Expand All @@ -179,14 +190,16 @@ write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,
#' @rdname write_delim
#' @export
write_tsv <- function(x, file, na = "NA", append = FALSE, col_names = !append,
quote_escape = "double", eol = "\n", path = deprecated()) {
quote_escape = "double", eol = "\n",
num_threads = readr_threads(),
path = deprecated()) {
if (is_present(path)) {
deprecate_warn("1.4.0", "write_tsv(path = )", "write_tsv(file = )")
file <- path
}

write_delim(x, file, delim = '\t', na = na, append = append, col_names =
col_names, quote_escape = quote_escape, eol = eol
col_names, quote_escape = quote_escape, eol = eol, num_threads = num_threads
)
}

Expand Down
7 changes: 7 additions & 0 deletions man/read_delim.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_fwf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 77885d5

Please sign in to comment.