Add num_threads argument to functions

Fixes #1201
tidyverse · Apr 30, 2021 · 77885d5 · 77885d5
1 parent 95f4fc3
commit 77885d5
Show file tree

Hide file tree

Showing 14 changed files with 130 additions and 24 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -110,6 +110,7 @@ export(read_table2)
 export(read_tsv)
 export(read_tsv_chunked)
 export(readr_example)
+export(readr_threads)
 export(should_show_types)
 export(show_progress)
 export(spec)

diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,8 @@
 
 * `write_file()` now forces its argument before opening the output file (#1158)
 
+* All `read_*()` and `write_*()` functions gain a `num_threads` argument to control the number of processing threads they use (#1201)
+
 ## Additional features and fixes
 
 * `read_*()` functions gain a `show_col_types` argument, if set to `FALSE` this turns off showing the column types unconditionally.

diff --git a/R/lines.R b/R/lines.R
@@ -42,6 +42,7 @@ read_lines <- function(file, skip = 0, skip_empty_rows = FALSE, n_max = Inf,
                        locale = default_locale(),
                        na = character(),
                        lazy = TRUE,
+                       num_threads = readr_threads(),
                        progress = show_progress()) {
   if (edition_first()) {
     if (is.infinite(n_max)) {
@@ -58,13 +59,15 @@ read_lines <- function(file, skip = 0, skip_empty_rows = FALSE, n_max = Inf,
     lifecycle::deprecate_soft("2.0.0", "readr::read_lines(skip_empty_rows = )")
   }
 
-  vroom::vroom_lines(file, skip = skip, locale = locale, n_max = n_max, progress = progress, altrep = lazy, na = na)
+  vroom::vroom_lines(file, skip = skip, locale = locale, n_max = n_max, progress = progress, altrep = lazy, na = na, num_threads = num_threads)
 }
 
 #' @export
 #' @rdname read_lines
 read_lines_raw <- function(file, skip = 0,
-                           n_max = -1L, progress = show_progress()) {
+                           n_max = -1L,
+                           num_threads = readr_threads(),
+                           progress = show_progress()) {
   if (empty_file(file)) {
     return(list())
   }
@@ -77,7 +80,9 @@ read_lines_raw <- function(file, skip = 0,
 #' @return `write_lines()` returns `x`, invisibly.
 #' @export
 #' @rdname read_lines
-write_lines <- function(x, file, sep = "\n", na = "NA", append = FALSE, path = deprecated()) {
+write_lines <- function(x, file, sep = "\n", na = "NA", append = FALSE,
+  num_threads = readr_threads(),
+  path = deprecated()) {
   is_raw <- is.list(x) && inherits(x[[1]], "raw")
 
   if (is_raw || edition_first()) {
@@ -105,7 +110,7 @@ write_lines <- function(x, file, sep = "\n", na = "NA", append = FALSE, path = d
     return(invisible(x))
   }
 
-  vroom::vroom_write_lines(as.character(x), file, eol = sep, na = na, append = append)
+  vroom::vroom_write_lines(as.character(x), file, eol = sep, na = na, append = append, num_threads = num_threads)
 
   invisible(x)
 }
diff --git a/R/read_delim.R b/R/read_delim.R
@@ -71,6 +71,8 @@ NULL
 #'   setting option `readr.show_progress` to `FALSE`.
 #' @param lazy Read values lazily? By default the file is initially only
 #'   indexed. The actual values are read lazily on-demand when accessed.
+#' @param num_threads The number of processing threads to use for initial
+#'   parsing and lazy reading of data.
 #' @return A [tibble()]. If there are parsing problems, a warning tells you
 #'   how many, and you can retrieve the details with [problems()].
 #' @export
@@ -115,6 +117,7 @@ read_delim <- function(file, delim = NULL, quote = '"',
                        na = c("", "NA"), quoted_na = TRUE,
                        comment = "", trim_ws = FALSE,
                        skip = 0, n_max = Inf, guess_max = min(1000, n_max),
+                       num_threads = readr_threads(),
                        progress = show_progress(),
                        show_col_types = should_show_types(),
                        skip_empty_rows = TRUE, lazy = TRUE) {
@@ -142,7 +145,7 @@ read_delim <- function(file, delim = NULL, quote = '"',
   vroom::vroom(file, delim = delim, col_names = col_names, col_types = col_types,
     skip = skip, n_max = n_max, na = na, quote = quote, comment = comment, trim_ws = trim_ws,
     escape_double = escape_double, escape_backslash = escape_backslash, locale = locale, guess_max = guess_max,
-    progress = progress, altrep = lazy, show_col_types = show_col_types)
+    progress = progress, altrep = lazy, show_col_types = show_col_types, num_threads = num_threads)
 }
 
 #' @rdname read_delim
@@ -151,6 +154,7 @@ read_csv <- function(file, col_names = TRUE, col_types = NULL,
                      locale = default_locale(), na = c("", "NA"),
                      quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
                      skip = 0, n_max = Inf, guess_max = min(1000, n_max),
+                     num_threads = readr_threads(),
                      progress = show_progress(), show_col_types = should_show_types(), skip_empty_rows = TRUE, lazy = TRUE) {
   if (edition_first()) {
   tokenizer <- tokenizer_csv(na = na, quoted_na = quoted_na, quote = quote,
@@ -170,7 +174,7 @@ read_csv <- function(file, col_names = TRUE, col_types = NULL,
     skip = skip, n_max = n_max, na = na, quote = quote, comment = comment, trim_ws = trim_ws,
     escape_double = TRUE, escape_backslash = FALSE, locale = locale, guess_max = guess_max,
     show_col_types = show_col_types,
-    progress = progress, altrep = lazy)
+    progress = progress, altrep = lazy, num_threads = num_threads)
 }
 
 #' @rdname read_delim
@@ -180,6 +184,7 @@ read_csv2 <- function(file, col_names = TRUE, col_types = NULL,
                       na = c("", "NA"), quoted_na = TRUE, quote = "\"",
                       comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
                       guess_max = min(1000, n_max), progress = show_progress(),
+                      num_threads = readr_threads(),
                       show_col_types = should_show_types(),
                       skip_empty_rows = TRUE, lazy = TRUE) {
 
@@ -200,7 +205,7 @@ read_csv2 <- function(file, col_names = TRUE, col_types = NULL,
     skip = skip, n_max = n_max, na = na, quote = quote, comment = comment, trim_ws = trim_ws,
     escape_double = TRUE, escape_backslash = FALSE, locale = locale, guess_max = guess_max,
     show_col_types = show_col_types,
-    progress = progress, altrep = lazy)
+    progress = progress, altrep = lazy, num_threads = num_threads)
 }
 
 #' @rdname read_delim
@@ -210,7 +215,8 @@ read_tsv <- function(file, col_names = TRUE, col_types = NULL,
                      na = c("", "NA"), quoted_na = TRUE, quote = "\"",
                      comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
                      guess_max = min(1000, n_max), progress = show_progress(),
-                      show_col_types = should_show_types(),
+                     num_threads = readr_threads(),
+                     show_col_types = should_show_types(),
                      skip_empty_rows = TRUE, lazy = TRUE) {
   tokenizer <- tokenizer_tsv(na = na, quoted_na = quoted_na, quote = quote,
     comment = comment, trim_ws = trim_ws, skip_empty_rows = skip_empty_rows)
@@ -223,7 +229,7 @@ read_tsv <- function(file, col_names = TRUE, col_types = NULL,
   vroom::vroom(file, delim = "\t", col_names = col_names,
     col_types = col_types, locale = locale, skip = skip, comment = comment,
     n_max = n_max, guess_max = guess_max, progress = progress,
-    show_col_types = show_col_types, altrep = lazy)
+    show_col_types = show_col_types, altrep = lazy, num_threads = num_threads)
 }
 
 # Helper functions for reading from delimited files ----------------------------

diff --git a/R/read_fwf.R b/R/read_fwf.R
@@ -37,7 +37,8 @@ read_fwf <- function(file, col_positions = fwf_empty(file, skip, n = guess_max),
                      locale = default_locale(), na = c("", "NA"),
                      comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
                      guess_max = min(n_max, 1000), progress = show_progress(),
-                      show_col_types = should_show_types(),
+                     num_threads = readr_threads(),
+                     show_col_types = should_show_types(),
                      lazy = TRUE, skip_empty_rows = TRUE) {
   if (edition_first()) {
     ds <- datasource(file, skip = skip, skip_empty_rows = skip_empty_rows)
@@ -78,7 +79,7 @@ read_fwf <- function(file, col_positions = fwf_empty(file, skip, n = guess_max),
   vroom::vroom_fwf(file, col_positions = col_positions, col_types = col_types,
     locale = locale, na = na, comment = comment, trim_ws = trim_ws, skip = skip,
     n_max = n_max, guess_max = guess_max, show_col_types = show_col_types,
-    progress = progress, altrep = lazy)
+    progress = progress, altrep = lazy, num_threads = num_threads)
 }
 
 #' @rdname read_fwf

diff --git a/R/utils.R b/R/utils.R
@@ -47,6 +47,28 @@ is_integerish <- function(x) {
   floor(x) == x
 }
 
+#' Determine how many threads readr should use when processing
+#'
+#' The number of threads returned can be set by
+#' - The global option `readr.num_threads`
+#' - The environment variable `VROOM_THREADS`
+#' - The value of [parallel::detectCores()]
+#' @export
+readr_threads <- function() {
+  res <- getOption("readr.num_threads")
+
+  if (is.null(res)) {
+    res <- as.integer(Sys.getenv("VROOM_THREADS", parallel::detectCores()))
+    options("readr.num_threads" = res)
+  }
+
+  if (is.na(res) || res <= 0) {
+    res <- 1
+  }
+
+  res
+}
+
 #' @export
 `[.spec_tbl_df` <- function(x, ...) {
   attr(x, "spec") <- NULL

diff --git a/R/write.R b/R/write.R
@@ -46,6 +46,7 @@
 #'   Unix style newlines, or `"\r\n"` for Windows style newlines.
 #' @param path \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}
 #' @return `write_*()` returns the input `x` invisibly.
+#' @inheritParams read_delim
 #' @references Florian Loitsch, Printing Floating-Point Numbers Quickly and
 #' Accurately with Integers, PLDI '10,
 #' <http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf>
@@ -67,7 +68,9 @@
 #'
 #' \dontshow{setwd(.old_wd)}
 write_delim <- function(x, file, delim = " ", na = "NA", append = FALSE,
-                        col_names = !append, quote_escape = "double", eol = "\n", path = deprecated()) {
+                        col_names = !append, quote_escape = "double", eol = "\n",
+                        num_threads = readr_threads(),
+                        path = deprecated()) {
   if (is_present(path)) {
     deprecate_warn("1.4.0", "write_delim(path = )", "write_delim(file = )")
     file <- path
@@ -84,27 +87,31 @@ write_delim <- function(x, file, delim = " ", na = "NA", append = FALSE,
     return(invisible(x_out))
   }
   vroom::vroom_write(x, file, delim = delim, col_names = col_names, append = append,
-    na = na, eol = eol, escape = quote_escape)
+    na = na, eol = eol, escape = quote_escape, num_threads = num_threads)
 
   invisible(x_out)
 }
 
 #' @rdname write_delim
 #' @export
 write_csv <- function(x, file, na = "NA", append = FALSE, col_names = !append,
-                      quote_escape = "double", eol = "\n", path = deprecated()) {
+                      quote_escape = "double", eol = "\n",
+                      num_threads = readr_threads(),
+                      path = deprecated()) {
   if (is_present(path)) {
     deprecate_warn("1.4.0", "write_csv(path = )", "write_csv(file = )")
     file <- path
   }
   write_delim(x, file, delim = ",", na = na, append = append,
-    col_names = col_names, quote_escape = quote_escape, eol = eol)
+    col_names = col_names, quote_escape = quote_escape, eol = eol, num_threads = num_threads)
 }
 
 #' @rdname write_delim
 #' @export
 write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
-                       quote_escape = "double", eol = "\n", path = deprecated()) {
+                       quote_escape = "double", eol = "\n",
+                       num_threads = readr_threads(),
+                       path = deprecated()) {
   if (is_present(path)) {
     deprecate_warn("1.4.0", "write_csv2(path = )", "write_csv2(file = )")
     file <- path
@@ -113,7 +120,7 @@ write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
   x_out <- x
   x <- change_decimal_separator(x, decimal_mark = ",")
   write_delim(x, file, delim = ";", na = na, append = append,
-    col_names = col_names, quote_escape = quote_escape, eol = eol)
+    col_names = col_names, quote_escape = quote_escape, eol = eol, num_threads = num_threads)
 
   invisible(x_out)
 }
@@ -122,7 +129,9 @@ write_csv2 <- function(x, file, na = "NA", append = FALSE, col_names = !append,
 #' @export
 write_excel_csv <- function(x, file, na = "NA", append = FALSE,
                             col_names = !append, delim = ",", quote_escape = "double",
-                            eol = "\n", path = deprecated()) {
+                            eol = "\n",
+                            num_threads = readr_threads(),
+                            path = deprecated()) {
   if (is_present(path)) {
     deprecate_warn("1.4.0", "write_excel_csv(path = )", "write_excel_csv(file = )")
     file <- path
@@ -143,7 +152,7 @@ write_excel_csv <- function(x, file, na = "NA", append = FALSE,
     return(invisible(x_out))
   }
   vroom::vroom_write(x, file, delim, col_names = col_names, append = append,
-    na = na, bom = !append, eol = eol
+    na = na, bom = !append, eol = eol, num_threads = num_threads
   )
 
   invisible(x_out)
@@ -153,7 +162,9 @@ write_excel_csv <- function(x, file, na = "NA", append = FALSE,
 #' @export
 write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,
                              col_names = !append, delim = ";", quote_escape = "double",
-                             eol = "\n", path = deprecated()) {
+                             eol = "\n",
+                             num_threads = readr_threads(),
+                             path = deprecated()) {
   if (is_present(path)) {
     deprecate_warn("1.4.0", "write_excel_csv2(path = )", "write_excel_csv2(file = )")
     file <- path
@@ -170,7 +181,7 @@ write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,
 
   x[] <- lapply(x, output_column)
   write_excel_csv(x, file, na, append, col_names, delim, quote_escape = quote_escape,
-    eol = eol
+    eol = eol, num_threads = num_threads
   )
 
   invisible(x_out)
@@ -179,14 +190,16 @@ write_excel_csv2 <- function(x, file, na = "NA", append = FALSE,
 #' @rdname write_delim
 #' @export
 write_tsv <- function(x, file, na = "NA", append = FALSE, col_names = !append,
-                      quote_escape = "double", eol = "\n", path = deprecated()) {
+                      quote_escape = "double", eol = "\n",
+                      num_threads = readr_threads(),
+                      path = deprecated()) {
   if (is_present(path)) {
     deprecate_warn("1.4.0", "write_tsv(path = )", "write_tsv(file = )")
     file <- path
   }
 
   write_delim(x, file, delim = '\t', na = na, append = append, col_names =
-              col_names, quote_escape = quote_escape, eol = eol
+              col_names, quote_escape = quote_escape, eol = eol, num_threads = num_threads
   )
 }
 

diff --git a/man/read_delim.Rd b/man/read_delim.Rd
diff --git a/man/read_fwf.Rd b/man/read_fwf.Rd