Skip to content

Commit

Permalink
Fix #415 (#420)
Browse files Browse the repository at this point in the history
* Fix #415

* Basic support for `import_list()` with tar formats

However, using import_list() to import archive files generated with export_list(archive = x)
raises error if x is just "*.tar.gz" (when though the files inside the
archive are ".csv"). The archive must be named like "*.csv.tar.gz".

Slightly annonying.

* Update tests

* Update test again

* Update doc
  • Loading branch information
chainsawriot authored May 20, 2024
1 parent d5775f3 commit 4f53898
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 21 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Bug fixes

- Fix #412, prevent double usage of `which` for archive formats
- Fix #415, both `import_list()` and `export_list()` support tar archives.

# rio 1.0.2

Expand Down
16 changes: 13 additions & 3 deletions R/compression.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,12 @@ parse_archive <- function(file, which, file_type, ...) {
return(.parse_rutils(filename = file, file_type = file_type))
}
if (file_type == "zip") {
file_list <- utils::unzip(file, list = TRUE)$Name
extract_func <- utils::unzip
}
if (file_type %in% c("tar", "tar.gz", "tar.bz2")) {
file_list <- utils::untar(file, list = TRUE)
extract_func <- utils::untar
}

file_list <- .list_archive(file, file_type)
d <- tempfile()
dir.create(d)

Expand All @@ -97,6 +95,18 @@ parse_archive <- function(file, which, file_type, ...) {
return(file.path(d, which))
}

.list_archive <- function(file, file_type = c("zip", "tar", "tar.gz", "tar.bz2")) {
## just a simple wrapper to unify the interface of utils::unzip and utils::untar
file_type <- match.arg(file_type)
if (file_type == "zip") {
file_list <- utils::unzip(file, list = TRUE)$Name
}
if (file_type %in% c("tar", "tar.gz", "tar.bz2")) {
file_list <- utils::untar(file, list = TRUE)
}
return(file_list)
}

.compress_rutils <- function(filename, cfile, ext, remove = TRUE, FUN = gzfile) {
## Caution: Please note that remove = TRUE by default, it will delete `filename`!
if (ext == "bzip2") {
Expand Down
8 changes: 6 additions & 2 deletions R/export_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@
export_list <- function(x, file, archive = "", ...) {
.check_file(file, single_only = FALSE)
archive_format <- find_compress(archive)
supported_archive_formats <- c("zip", "tar", "tar.gz", "tar.bz2")
if (!is.na(archive_format$compress) && !archive_format$compress %in% supported_archive_formats) {
stop("'archive' is specified but format is not supported. Only zip and tar formats are supported.", call. = FALSE)
}
if (inherits(x, "data.frame")) {
stop("'x' must be a list. Perhaps you want export()?")
stop("'x' must be a list. Perhaps you want export()?", call. = FALSE)
}

outfiles <- .create_outfiles(file, x)
Expand All @@ -68,7 +72,7 @@ export_list <- function(x, file, archive = "", ...) {
}
if (!is.na(archive_format$compress)) {
.create_directory_if_not_exists(archive)
compress_out(archive, outfiles_normalized)
compress_out(archive, outfiles_normalized, type = archive_format$compress)
unlink(outfiles_normalized)
return(invisible(archive))
}
Expand Down
15 changes: 7 additions & 8 deletions R/import_list.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @title Import list of data frames
#' @description Use [import()] to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, zipped directory in a zip file, or HTML file)
#' @param file A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip file, or HTML file), or a vector of file paths for multiple files to be imported.
#' @description Use [import()] to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, compressed directory in a zip file or tar archive, or HTML file)
#' @param file A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip file, tar archive, or HTML file), or a vector of file paths for multiple files to be imported.
#' @param which If `file` is a single file path, this specifies which objects should be extracted (passed to [import()]'s `which` argument). Ignored otherwise.
#' @param rbind A logical indicating whether to pass the import list of data frames through [data.table::rbindlist()].
#' @param rbind_label If `rbind = TRUE`, a character string specifying the name of a column to add to the data frame indicating its source file.
Expand All @@ -11,7 +11,7 @@
#' @inheritSection import Which
#' @inherit import references
#' @return If `rbind=FALSE` (the default), a list of a data frames. Otherwise, that list is passed to [data.table::rbindlist()] with `fill = TRUE` and returns a data frame object of class set by the `setclass` argument; if this operation fails, the list is returned.
#' @details When file is a vector of file paths and any files are missing, those files are ignored (with warnings) and this function will not raise any error.
#' @details When file is a vector of file paths and any files are missing, those files are ignored (with warnings) and this function will not raise any error. For compressed files, the file name must also contain information about the file format of all compressed files, e.g. `files.csv.zip` for this function to work.
#' @examples
#' ## For demo, a temp. file path is created with the file extension .xlsx
#' xlsx_file <- tempfile(fileext = ".xlsx")
Expand Down Expand Up @@ -89,7 +89,8 @@ import_list <- function(file, setclass = getOption("rio.import.class", "data.fra
if (get_info(file)$format == "rdata") {
return(.import.rio_rdata(file = file, .return_everything = TRUE, ...))
}
if (!get_info(file)$format %in% c("html", "xlsx", "xls", "zip")) {
archive_format <- find_compress(file)
if (!get_info(file)$format %in% c("html", "xlsx", "xls") && !archive_format$compress %in% c("zip", "tar", "tar.gz", "tar.bz2")) {
which <- 1
whichnames <- NULL
}
Expand Down Expand Up @@ -118,16 +119,14 @@ import_list <- function(file, setclass = getOption("rio.import.class", "data.fra
whichnames <- whichnames[which]
}
}
if (get_info(file)$format %in% c("zip")) {
if (archive_format$compress %in% c("zip", "tar", "tar.gz", "tar.bz2")) {
whichnames <- .list_archive(file, archive_format$compress)
if (missing(which)) {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
which <- seq_along(whichnames)
names(which) <- .strip_exts(whichnames)
} else if (is.character(which)) {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
whichnames <- whichnames[whichnames %in% which]
} else {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
names(which) <- .strip_exts(whichnames)
}
}
Expand Down
6 changes: 3 additions & 3 deletions man/import_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 1 addition & 5 deletions tests/testthat/test_compress.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,7 @@ test_that("Prevent the reuse of `which` for zip and tar", {
withr::with_tempfile("data_path", fileext = paste0(".xlsx.", format), code = {
rio::export(list(some_iris = head(iris)), data_path)
expect_error(import(data_path), NA)
if (format == "zip") {
raw_file <- utils::unzip(data_path, list = TRUE)$Name[1]
} else {
raw_file <- utils::untar(data_path, list = TRUE)
}
raw_file <- .list_archive(data_path, find_compress(data_path)$compress)[1]
expect_error(import(data_path, which = raw_file), NA)
expect_error(suppressWarnings(import(data_path, which = "some_iris")))
})
Expand Down
12 changes: 12 additions & 0 deletions tests/testthat/test_export_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ test_that("export_list() works", {
})
})

test_that("archive formats, #415", {
withr::with_tempdir({
mylist <- list(mtcars3 = mtcars[1:10, ], mtcars2 = mtcars[11:20, ], mtcars1 = mtcars[21:32, ])
expect_error(export_list(mylist, file = paste0("file_", 1:3, ".csv"), archive = "archive.csv.gz"), "specified but format is not supported")
expect_error(export_list(mylist, file = paste0("file_", 1:3, ".csv"), archive = "archive.csv.bz2"), "specified but format is not supported")
expect_error(export_list(mylist, file = paste0("file_", 1:3, ".csv"), archive = "archive.csv.zip"), NA)
expect_error(export_list(mylist, file = paste0("file_", 1:3, ".csv"), archive = "archive.csv.tar"), NA)
expect_error(export_list(mylist, file = paste0("file_", 1:3, ".csv"), archive = "archive.csv.tar.gz"), NA)
expect_error(export_list(mylist, file = paste0("file_", 1:3, ".csv"), archive = "archive.csv.tar.bz2"), NA)
})
})

test_that("List length of one, #385", {
withr::with_tempdir({
example1 <- list("iris" = iris)
Expand Down
27 changes: 27 additions & 0 deletions tests/testthat/test_import_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,33 @@ test_that("Import single file from zip via import_list()", {
})
})

test_that("Import multiple files from zip via import_list()", {
withr::with_tempfile("data_file", fileext = ".csv.zip", code = {
mylist <- list(mtcars3 = mtcars[1:10, ], mtcars2 = mtcars[11:20, ], mtcars1 = mtcars[21:32, ])
expect_error(export_list(mylist, file = paste0("mtcars", 1:3, ".csv"), archive = data_file), NA)
expect_error(res <- import_list(data_file), NA)
expect_true(is.list(res))
expect_equal(length(res), 3)
expect_true(is.data.frame(res[[1]]))
expect_true(is.data.frame(res[[2]]))
expect_true(is.data.frame(res[[3]]))
})
})

test_that("Import multiple files from zip via import_list()", {
skip_if(getRversion() <= "4.0")
withr::with_tempfile("data_file", fileext = ".csv.tar.gz", code = {
mylist <- list(mtcars3 = mtcars[1:10, ], mtcars2 = mtcars[11:20, ], mtcars1 = mtcars[21:32, ])
expect_error(export_list(mylist, file = paste0("mtcars", 1:3, ".csv"), archive = data_file), NA)
expect_error(res <- import_list(data_file), NA)
expect_true(is.list(res))
expect_equal(length(res), 3)
expect_true(is.data.frame(res[[1]]))
expect_true(is.data.frame(res[[2]]))
expect_true(is.data.frame(res[[3]]))
})
})

test_that("Using setclass in import_list()", {
withr::with_tempfile("data_file", fileext = ".rds", code = {
export(mtcars, data_file)
Expand Down

0 comments on commit 4f53898

Please sign in to comment.