diff --git a/DESCRIPTION b/DESCRIPTION index fcb8852b..668323bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: covidregionaldata Title: Subnational Data for COVID-19 Epidemiology -Version: 0.9.2.3000 +Version: 0.9.2.4000 Authors@R: c(person(given = "Joseph", family = "Palmer", diff --git a/NAMESPACE b/NAMESPACE index 35d1f272..7c85cb04 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -61,6 +61,7 @@ importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_at) +importFrom(dplyr,group_vars) importFrom(dplyr,if_else) importFrom(dplyr,lag) importFrom(dplyr,last_col) diff --git a/NEWS.md b/NEWS.md index 16faffd2..8062c9c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,7 @@ This release is currently under development ## Bug fixes +- Fixed a bug in `fill_empty_dates_with_na()` caused by changes made in version `1.2.0` of `tidyr`. - Fixed a bug in the data sourced from Germany so that instead of treating it as a line list of individuals it is treated as a relatively finely resolved count data which needs to be summed up (by @sbfnk). # covidregionaldata 0.9.2 diff --git a/R/processing.R b/R/processing.R index 75b63ce1..81c5ec4f 100644 --- a/R/processing.R +++ b/R/processing.R @@ -51,16 +51,21 @@ set_negative_values_to_zero <- function(data) { #' @family compulsory_processing #' @concept compulsory_processing #' @importFrom tidyr complete full_seq nesting -#' @importFrom dplyr starts_with +#' @importFrom dplyr starts_with group_by ungroup group_vars #' @importFrom rlang !!! syms fill_empty_dates_with_na <- function(data) { regions <- select(data, starts_with("level_")) %>% names() + + groups <- group_vars(data) + data <- data %>% + ungroup() %>% complete( date = full_seq(data$date, period = 1), nesting(!!!syms(regions)) - ) + ) %>% + group_by(across(.cols = all_of(groups))) return(data) } @@ -258,9 +263,9 @@ process_internal <- function(clean_data, level, group_vars, "hosp_new", "hosp_total", "tested_new", "tested_total" )), everything() - ) %>% - arrange(.data$date, all_of(group_vars_standard[1])) + ) } + dat <- ungroup(dat) if (localise) { diff --git a/R/shared-methods.R b/R/shared-methods.R index 62a88ba8..0433831e 100644 --- a/R/shared-methods.R +++ b/R/shared-methods.R @@ -640,7 +640,8 @@ CountryDataClass <- R6::R6Class("CountryDataClass", if (!is.null(self$target_regions)) { self$target_regions <- countryname( self$target_regions, - destination = "country.name.en" + destination = "country.name.en", + warn = FALSE ) if (all(is.na(self$target_regions))) { stop("No countries found with target names") diff --git a/tests/testthat/custom_data/ecdc.rds b/tests/testthat/custom_data/ecdc.rds index 9e6c2294..6edff880 100644 Binary files a/tests/testthat/custom_data/ecdc.rds and b/tests/testthat/custom_data/ecdc.rds differ diff --git a/tests/testthat/custom_data/mexico_level_1_snap.rds b/tests/testthat/custom_data/mexico_level_1_snap.rds index cc89f66d..71abefd0 100644 Binary files a/tests/testthat/custom_data/mexico_level_1_snap.rds and b/tests/testthat/custom_data/mexico_level_1_snap.rds differ diff --git a/tests/testthat/custom_data/mexico_level_2_snap.rds b/tests/testthat/custom_data/mexico_level_2_snap.rds index b6dfc25b..8b021b02 100644 Binary files a/tests/testthat/custom_data/mexico_level_2_snap.rds and b/tests/testthat/custom_data/mexico_level_2_snap.rds differ diff --git a/tests/testthat/custom_data/who.rds b/tests/testthat/custom_data/who.rds index 14b102ee..6f004077 100644 Binary files a/tests/testthat/custom_data/who.rds and b/tests/testthat/custom_data/who.rds differ diff --git a/tests/testthat/custom_tests/mock_data.R b/tests/testthat/custom_tests/mock_data.R index 4b09b694..006cffdf 100644 --- a/tests/testthat/custom_tests/mock_data.R +++ b/tests/testthat/custom_tests/mock_data.R @@ -207,12 +207,16 @@ get_expected_data_for_fill_empty_dates_with_na_test <- function() { # full data is data with all dates/regions + some NAs in the cases column expected_data <- data.frame(expand.grid(dates, regions)) colnames(expected_data) <- c("date", "level_1_region") + expected_data$date <- as.Date(expected_data$date) expected_data$level_1_region <- as.character(expected_data$level_1_region) expected_data <- expected_data %>% dplyr::arrange(date, level_1_region) %>% dplyr::left_join(region_codes, by = c("level_1_region" = "region")) expected_data$cases <- c(1:5, rep(NA, 4), 10:12) + expected_data <- dplyr::select( + expected_data, date, level_1_region, level_1_region_code, everything() + ) return(dplyr::tibble(expected_data)) } @@ -234,10 +238,16 @@ get_expected_data_for_complete_cumulative_columns_test <- function() { partial_data <- expected_data[-c(6:9), ] # manually add cumulative cases to get expected data - full_data_with_cum_cases_filled <- covidregionaldata:::fill_empty_dates_with_na(partial_data) - full_data_with_cum_cases_filled <- dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date) - full_data_with_cum_cases_filled <- cbind(full_data_with_cum_cases_filled, as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15))) + full_data_with_cum_cases_filled <- partial_data %>% + covidregionaldata:::fill_empty_dates_with_na() + full_data_with_cum_cases_filled <- + dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date) + full_data_with_cum_cases_filled <- + cbind( + full_data_with_cum_cases_filled, + as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15)) + ) colnames(full_data_with_cum_cases_filled)[5] <- "cases_total" - return(dplyr::tibble(full_data_with_cum_cases_filled)) + return(full_data_with_cum_cases_filled) } diff --git a/tests/testthat/test-processing.R b/tests/testthat/test-processing.R index 92f6410a..e13ce6ad 100644 --- a/tests/testthat/test-processing.R +++ b/tests/testthat/test-processing.R @@ -80,19 +80,28 @@ test_that("fill_empty_dates_with_na fills empty dates with NA", { expected_data <- get_expected_data_for_fill_empty_dates_with_na_test() # partial data deletes some rows (i.e. gets rid of some dates - all the ones # with NA in cases) + expected_data <- expected_data + partial_data <- expected_data[-c(6:9), ] - expect_equal(fill_empty_dates_with_na(partial_data), expected_data) + expect_equal( + fill_empty_dates_with_na(partial_data), + expected_data + ) expected_data <- dplyr::mutate( expected_data, level_2_region = level_1_region, level_2_region_code = level_1_region_code ) %>% + group_by(level_2_region, level_2_region_code) %>% select( - date, level_2_region, level_2_region_code, - level_1_region, level_1_region_code, cases + date, level_1_region, level_1_region_code, + level_2_region, level_2_region_code, cases ) partial_data <- expected_data[-c(6:9), ] - expect_equal(fill_empty_dates_with_na(partial_data), expected_data) + expect_equal( + fill_empty_dates_with_na(partial_data), + expected_data + ) }) test_that("complete_cumulative_columns works", { diff --git a/tests/testthat/test-regional-datasets.R b/tests/testthat/test-regional-datasets.R index 4a6061ca..f3f865ec 100644 --- a/tests/testthat/test-regional-datasets.R +++ b/tests/testthat/test-regional-datasets.R @@ -19,40 +19,51 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) { download <- getOption("testDownload") } - # get datasets for testing - sources <- get_available_datasets() %>% - dplyr::filter(.data$type %in% - c("national", "regional")) %>% - dplyr::select( - source = class, - level_1_region, level_2_region, level_3_region - ) %>% - tidyr::pivot_longer( - cols = -source, - names_to = "level", - values_to = "regions" - ) %>% - dplyr::mutate( - level = stringr::str_split(level, "_"), - level = purrr::map_chr(level, ~ .[2]) - ) %>% - tidyr::drop_na(regions) - - # filter out target datasets if (!is.null(source_of_interest)) { - sources <- sources %>% - dplyr::filter(source %in% source_of_interest) + test_regions <- TRUE + }else{ + test_regions <- FALSE + } + if (!is.null(getOption("testRegions"))) { + test_regions <- getOption("testRegions") } - # apply tests to each data source in turn - sources %>% - dplyr::rowwise() %>% - dplyr::group_split() %>% - purrr::walk( - ~ test_regional_dataset( - source = .$source[[1]], - level = .$level[[1]], - download = download + if (test_regions) { + # get datasets for testing + sources <- get_available_datasets() %>% + dplyr::filter(.data$type %in% + c("national", "regional")) %>% + dplyr::select( + source = class, + level_1_region, level_2_region, level_3_region + ) %>% + tidyr::pivot_longer( + cols = -source, + names_to = "level", + values_to = "regions" + ) %>% + dplyr::mutate( + level = stringr::str_split(level, "_"), + level = purrr::map_chr(level, ~ .[2]) + ) %>% + tidyr::drop_na(regions) + + # filter out target datasets + if (!is.null(source_of_interest)) { + sources <- sources %>% + dplyr::filter(source %in% source_of_interest) + } + + # apply tests to each data source in turn + sources %>% + dplyr::rowwise() %>% + dplyr::group_split() %>% + purrr::walk( + ~ test_regional_dataset( + source = .$source[[1]], + level = .$level[[1]], + download = download + ) ) - ) + } }