diff --git a/DESCRIPTION b/DESCRIPTION index 7b29f2a..c867238 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,10 @@ Depends: R (>= 3.1) Imports: datasets, - vroom + vroom, + httr, + tibble, + utils Suggests: dplyr, ggplot2, diff --git a/R/calculate_taxes.R b/R/calculate_taxes.R index 4ddd6f7..66c8b5a 100644 --- a/R/calculate_taxes.R +++ b/R/calculate_taxes.R @@ -107,6 +107,9 @@ create_dataset_for_taxsim <- function(.data) { #' @param return_all_information Boolean (TRUE or FALSE). Whether to return all information from TAXSIM (TRUE), #' or only key information (FALSE). Returning all information returns 42 columns of output, while only #' returning key information returns 9 columns. It is faster to download results with only key information. +#' @param interface String indicating which NBER TAXSIM interface to use. Should be one of: +#' - 'ssh': Uses SSH to connect to taxsimssh.nber.org. Your system must already have SSH installed. +#' - 'http': Uses CURL to connect to https://taxsim.nber.org/uptest/webfile.cgi. Approximate max file size: 1000 rows. #' #' @section Formatting your data: #' @@ -168,7 +171,7 @@ create_dataset_for_taxsim <- function(.data) { #' Journal of Policy Analysis and Management vol 12 no 1, Winter 1993, pages 189-194. #' #' @export -taxsim_calculate_taxes <- function(.data, marginal_tax_rates = 'Wages', return_all_information = FALSE) { +taxsim_calculate_taxes <- function(.data, marginal_tax_rates = 'Wages', return_all_information = FALSE, interface = "ssh") { # save input ID numbers as object, so we can make sure the output ID numbers are the same input_s <- .data$taxsimid @@ -192,25 +195,55 @@ taxsim_calculate_taxes <- function(.data, marginal_tax_rates = 'Wages', return_a # save csv file of data set to a temp folder to_taxsim_tmp_filename <- tempfile(pattern = 'upload_', fileext = ".csv") - vroom::vroom_write(.data, to_taxsim_tmp_filename, delim = ",", progress = FALSE) - from_taxsim_tmp_filename <- tempfile(pattern = 'download_', fileext = ".csv") - # try uploading and downloading via ssh stop_error_message <- paste0( - "There was a problem in trying to retrieve your data.\n", - "Either we could not connect to the TAXSIM server or your data is not in the proper format.\n", - "You can try manually uploading the data to TAXSIM as an avenue of troubleshooting.\n", - "See the following address for more information: https://www.shaneorr.io/r/usincometaxes/articles/send-data-to-taxsim.html" + "There was a problem in trying to retrieve your data.\n", + "Either we could not connect to the TAXSIM server or your data is not in the proper format.\n", + "You can try manually uploading the data to TAXSIM as an avenue of troubleshooting.\n", + "See the following address for more information: https://www.shaneorr.io/r/usincometaxes/articles/send-data-to-taxsim.html" ) - std_error_filename <- tempfile(pattern = 'std_error_', fileext = ".txt") - known_hosts_file <- paste0(tempdir(), '/known_hosts') + if (interface == "ssh") { - from_taxsim <- tryCatch( - error = function(cnd) stop(stop_error_message, call. = FALSE), - import_data_ssh(to_taxsim_tmp_filename, from_taxsim_tmp_filename, std_error_filename, known_hosts_file, idtl) - ) + vroom::vroom_write(.data, to_taxsim_tmp_filename, delim = ",", progress = FALSE) + + # try uploading and downloading via ssh + std_error_filename <- tempfile(pattern = 'std_error_', fileext = ".txt") + known_hosts_file <- paste0(tempdir(), '/known_hosts') + + from_taxsim <- tryCatch( + error = function(cnd) stop(stop_error_message, call. = FALSE), + import_data_ssh(to_taxsim_tmp_filename, from_taxsim_tmp_filename, std_error_filename, known_hosts_file, idtl) + ) + + } else if (interface == "http") { + + # convert input data to string + data_string <- vroom::vroom_format(.data, delim = ",") + + # remove trailing newline character - causes error with TAXSIM + # and write to file + cat(sub(x = data_string, "(\r\n|\n)$", ""), + file = to_taxsim_tmp_filename) + + # create http post and send to NBER + http_response <- httr::POST( + url = "https://taxsim.nber.org/uptest/webfile.cgi", + body = list(txpydata.raw = httr::upload_file(to_taxsim_tmp_filename))) + + # extract response body as to text + response_text <- httr::content(http_response, as = 'text') + + # convert text to a tibble to match vroom format + from_taxsim <- tibble::tibble( + utils::read.table(text = response_text, + header = T, + sep = ",")) + + } else { + stop("Invalid value for `interface` argument.") + } message("Connected to TAXSIM server and downloaded tax data.") diff --git a/man/taxsim_calculate_taxes.Rd b/man/taxsim_calculate_taxes.Rd index 39c7026..14456a9 100644 --- a/man/taxsim_calculate_taxes.Rd +++ b/man/taxsim_calculate_taxes.Rd @@ -7,7 +7,8 @@ taxsim_calculate_taxes( .data, marginal_tax_rates = "Wages", - return_all_information = FALSE + return_all_information = FALSE, + interface = "ssh" ) } \arguments{ @@ -20,6 +21,10 @@ This data set will be sent to TAXSIM. Data frame must have specified column name \item{return_all_information}{Boolean (TRUE or FALSE). Whether to return all information from TAXSIM (TRUE), or only key information (FALSE). Returning all information returns 42 columns of output, while only returning key information returns 9 columns. It is faster to download results with only key information.} + +\item{interface}{String indicating which NBER TAXSIM interface to use. Should be one of: +- 'ssh': Uses SSH to connect to taxsimssh.nber.org. Your system must already have SSH installed. +- 'http': Uses CURL to connect to https://taxsim.nber.org/uptest/webfile.cgi. Approximate max file size: 1000 rows.} } \value{ The output data set contains all the information returned by \href{http://taxsim.nber.org/taxsim35/}{TAXSIM 35}, diff --git a/tests/testthat/test-calculate_taxes.R b/tests/testthat/test-calculate_taxes.R index 2569a02..c38dd01 100644 --- a/tests/testthat/test-calculate_taxes.R +++ b/tests/testthat/test-calculate_taxes.R @@ -79,3 +79,30 @@ test_that("All states work", { expect_equal(nrow(taxsim_output), 50) }) + +test_that("All interface options return same values", { + + states <- state.abb + + id_nums <- seq(1, length(states)) + + taxsim_input <- data.frame( + taxsimid = id_nums, + mstat = 2, + year = 2018, + pwages = 50000, + state = states + ) + + ssh_results <- taxsim_calculate_taxes(taxsim_input, + return_all_information = T, + interface = 'ssh') + + http_results <- taxsim_calculate_taxes(taxsim_input, + return_all_information = T, + interface = 'http') + + expect(all.equal(ssh_results, http_results), + failure_message = "HTTP results do not match SSH results.") +}) +