Merge branch 'master' into fix-netherlands-430-remove-hospitalisation

RichardMN · Feb 5, 2022 · d1b7997 · d1b7997
2 parents c048eb8 + f54b220
commit d1b7997
Show file tree

Hide file tree

Showing 14 changed files with 111 additions and 68 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: covidregionaldata
 Title: Subnational Data for COVID-19 Epidemiology
-Version: 0.9.2.3000
+Version: 0.9.2.4000
 Authors@R:
     c(person(given = "Joseph",
              family = "Palmer",
@@ -98,6 +98,7 @@ Imports:
     R6,
     readxl,
     rlang,
+    stringi,
     stringr,
     tidyr (>= 1.0.0),
     vroom,

diff --git a/NAMESPACE b/NAMESPACE
@@ -61,6 +61,7 @@ importFrom(dplyr,filter)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_at)
+importFrom(dplyr,group_vars)
 importFrom(dplyr,if_else)
 importFrom(dplyr,lag)
 importFrom(dplyr,last_col)
@@ -116,12 +117,12 @@ importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,syms)
-importFrom(stringr,str_conv)
+importFrom(stringi,stri_trans_general)
+importFrom(stringi,stri_trim_both)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_to_sentence)
 importFrom(stringr,str_to_title)
-importFrom(stringr,str_trim)
 importFrom(tidyr,complete)
 importFrom(tidyr,drop_na)
 importFrom(tidyr,fill)

diff --git a/NEWS.md b/NEWS.md
@@ -15,6 +15,7 @@ This release is currently under development
 
 ## Bug fixes
 
+- Fixed a bug in `fill_empty_dates_with_na()` caused by changes made in version `1.2.0` of `tidyr`. 
 - Fixed a bug in the data sourced from Germany so that instead of treating it as a line list of individuals it is treated as a relatively finely resolved count data which needs to be summed up (by @sbfnk).
 
 # covidregionaldata 0.9.2

diff --git a/R/Vietnam.R b/R/Vietnam.R
@@ -64,7 +64,8 @@ Vietnam <- R6::R6Class("Vietnam",
     #' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
     #' @importFrom tidyr replace_na drop_na separate
     #' @importFrom purrr map
-    #' @importFrom stringr str_conv str_trim str_to_title str_replace_all
+    #' @importFrom stringr str_to_title str_replace_all
+    #' @importFrom stringi stri_trans_general stri_trim_both
     #' @importFrom lubridate dmy
     clean_common = function() {
       # The first three elements of self$data$raw are the data
@@ -77,31 +78,33 @@ Vietnam <- R6::R6Class("Vietnam",
           function(x) as_tibble(unlist(x),
                                 rownames = "date")),
         function(y) {
-          y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
+          separate(y, date, sep = "[.]+", into = c(NA, "province", "date"))
         }
       )
-      self$data$clean <- full_join(
-        full_join(
-          flat_all$case_by_time, flat_all$death_by_time,
-          by = c("province", "date"),
-          suffix = c(".cases", ".deaths"),
-          copy = TRUE
-        ),
-        flat_all$recovered_by_time,
-        by = c("province", "date"),
-        suffix = c("", ".recovered"),
-        copy = TRUE
-      ) %>%
+      index_cols <- bind_rows(
+        select(flat_all$case_by_time, "date", "province"),
+        select(flat_all$death_by_time, "date", "province"),
+        select(flat_all$recovered_by_time, "date", "province")) %>%
+        unique()
+
+      self$data$clean <- index_cols %>%
+        left_join(rename(flat_all$case_by_time, cases_total = value),
+                  by = c("province", "date") ) %>%
+        left_join(rename(flat_all$death_by_time, deaths_total = value),
+                  by = c("province", "date") ) %>%
+        left_join(rename(flat_all$recovered_by_time, recovered_total = value),
+                  by = c("province", "date") ) %>%
         # The api uses integer codes for provinces which do not
         # line up with ISO 3166-2 (some of which are not numbers)
         # so we use this as a temporary code to line names up
         # with data.
         select(
           ncsc_region_code = province,
           date,
-          cases_total = value.cases,
-          deaths_total = value.deaths,
-          recovered_total = value) %>%
+          cases_total,
+          deaths_total,
+          recovered_total
+          ) %>%
         mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
         left_join(
           self$data$raw$provinces %>%
@@ -119,8 +122,8 @@ Vietnam <- R6::R6Class("Vietnam",
         #
         #tidyr::drop_na(date, region_name) %>%
         mutate(
-          level_1_region = str_conv(level_1_region, "ASCII"),
-          level_1_region = str_trim(level_1_region, side = "both"),
+          level_1_region = stri_trans_general(level_1_region, "ASCII"),
+          level_1_region = stri_trim_both(level_1_region),
           level_1_region = str_replace_all(level_1_region,
                                            "\\(.*\\)|-| ", ""),
           level_1_region = str_to_title(level_1_region),

diff --git a/R/processing.R b/R/processing.R
@@ -51,16 +51,21 @@ set_negative_values_to_zero <- function(data) {
 #' @family compulsory_processing
 #' @concept compulsory_processing
 #' @importFrom tidyr complete full_seq nesting
-#' @importFrom dplyr starts_with
+#' @importFrom dplyr starts_with group_by ungroup group_vars
 #' @importFrom rlang !!! syms
 fill_empty_dates_with_na <- function(data) {
   regions <- select(data, starts_with("level_")) %>%
     names()
+
+  groups <- group_vars(data)
+
   data <- data %>%
+    ungroup() %>%
     complete(
       date = full_seq(data$date, period = 1),
       nesting(!!!syms(regions))
-    )
+    ) %>%
+    group_by(across(.cols = all_of(groups)))
   return(data)
 }
 
@@ -258,9 +263,9 @@ process_internal <- function(clean_data, level, group_vars,
           "hosp_new", "hosp_total", "tested_new", "tested_total"
         )),
         everything()
-      ) %>%
-      arrange(.data$date, all_of(group_vars_standard[1]))
+      )
   }
+
   dat <- ungroup(dat)
 
   if (localise) {

diff --git a/R/shared-methods.R b/R/shared-methods.R
@@ -640,7 +640,8 @@ CountryDataClass <- R6::R6Class("CountryDataClass",
         if (!is.null(self$target_regions)) {
           self$target_regions <- countryname(
             self$target_regions,
-            destination = "country.name.en"
+            destination = "country.name.en",
+            warn = FALSE
           )
           if (all(is.na(self$target_regions))) {
             stop("No countries found with target names")

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -1,8 +1,9 @@
 url: https://epiforecasts.io/covidregionaldata/
 template:
   bootstrap: 5
+  package: preferably
   params:
-    bootswatch: lumen
+    toggle: manual
     docsearch:
       api_key: 721a43acf6af66699c04bd8b2af75ff1
       index_name: epiforecasts

diff --git a/tests/testthat/custom_data/ecdc.rds b/tests/testthat/custom_data/ecdc.rds
diff --git a/tests/testthat/custom_data/mexico_level_1_snap.rds b/tests/testthat/custom_data/mexico_level_1_snap.rds
diff --git a/tests/testthat/custom_data/mexico_level_2_snap.rds b/tests/testthat/custom_data/mexico_level_2_snap.rds
diff --git a/tests/testthat/custom_data/who.rds b/tests/testthat/custom_data/who.rds
diff --git a/tests/testthat/custom_tests/mock_data.R b/tests/testthat/custom_tests/mock_data.R
@@ -207,12 +207,16 @@ get_expected_data_for_fill_empty_dates_with_na_test <- function() {
   # full data is data with all dates/regions + some NAs in the cases column
   expected_data <- data.frame(expand.grid(dates, regions))
   colnames(expected_data) <- c("date", "level_1_region")
+
   expected_data$date <- as.Date(expected_data$date)
   expected_data$level_1_region <- as.character(expected_data$level_1_region)
   expected_data <- expected_data %>%
     dplyr::arrange(date, level_1_region) %>%
     dplyr::left_join(region_codes, by = c("level_1_region" = "region"))
   expected_data$cases <- c(1:5, rep(NA, 4), 10:12)
+  expected_data <- dplyr::select(
+    expected_data, date, level_1_region, level_1_region_code, everything()
+  )
   return(dplyr::tibble(expected_data))
 }
 
@@ -234,10 +238,16 @@ get_expected_data_for_complete_cumulative_columns_test <- function() {
   partial_data <- expected_data[-c(6:9), ]
 
   # manually add cumulative cases to get expected data
-  full_data_with_cum_cases_filled <- covidregionaldata:::fill_empty_dates_with_na(partial_data)
-  full_data_with_cum_cases_filled <- dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date)
-  full_data_with_cum_cases_filled <- cbind(full_data_with_cum_cases_filled, as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15)))
+  full_data_with_cum_cases_filled <- partial_data %>%
+    covidregionaldata:::fill_empty_dates_with_na()
+  full_data_with_cum_cases_filled <-
+    dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date)
+  full_data_with_cum_cases_filled <-
+   cbind(
+     full_data_with_cum_cases_filled,
+     as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15))
+   )
   colnames(full_data_with_cum_cases_filled)[5] <- "cases_total"
 
-  return(dplyr::tibble(full_data_with_cum_cases_filled))
+  return(full_data_with_cum_cases_filled)
 }
diff --git a/tests/testthat/test-processing.R b/tests/testthat/test-processing.R
@@ -80,19 +80,28 @@ test_that("fill_empty_dates_with_na fills empty dates with NA", {
   expected_data <- get_expected_data_for_fill_empty_dates_with_na_test()
   # partial data deletes some rows (i.e. gets rid of some dates - all the ones
   # with NA in cases)
+  expected_data <- expected_data
+
   partial_data <- expected_data[-c(6:9), ]
-  expect_equal(fill_empty_dates_with_na(partial_data), expected_data)
+  expect_equal(
+    fill_empty_dates_with_na(partial_data),
+    expected_data
+  )
   expected_data <- dplyr::mutate(
     expected_data,
     level_2_region = level_1_region,
     level_2_region_code = level_1_region_code
   ) %>%
+    group_by(level_2_region, level_2_region_code) %>%
     select(
-      date, level_2_region, level_2_region_code,
-      level_1_region, level_1_region_code, cases
+      date, level_1_region, level_1_region_code,
+      level_2_region, level_2_region_code, cases
     )
   partial_data <- expected_data[-c(6:9), ]
-  expect_equal(fill_empty_dates_with_na(partial_data), expected_data)
+  expect_equal(
+    fill_empty_dates_with_na(partial_data),
+    expected_data
+  )
 })
 
 test_that("complete_cumulative_columns works", {

diff --git a/tests/testthat/test-regional-datasets.R b/tests/testthat/test-regional-datasets.R
@@ -19,40 +19,51 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
     download <- getOption("testDownload")
   }
 
-  # get datasets for testing
-  sources <- get_available_datasets() %>%
-    dplyr::filter(.data$type %in%
-      c("national", "regional")) %>%
-    dplyr::select(
-      source = class,
-      level_1_region, level_2_region, level_3_region
-    ) %>%
-    tidyr::pivot_longer(
-      cols = -source,
-      names_to = "level",
-      values_to = "regions"
-    ) %>%
-    dplyr::mutate(
-      level = stringr::str_split(level, "_"),
-      level = purrr::map_chr(level, ~ .[2])
-    ) %>%
-    tidyr::drop_na(regions)
-
-  # filter out target datasets
   if (!is.null(source_of_interest)) {
-    sources <- sources %>%
-      dplyr::filter(source %in% source_of_interest)
+    test_regions <- TRUE
+  }else{
+    test_regions <- FALSE
+  }
+  if (!is.null(getOption("testRegions"))) {
+    test_regions <- getOption("testRegions")
   }
 
-  # apply tests to each data source in turn
-  sources %>%
-    dplyr::rowwise() %>%
-    dplyr::group_split() %>%
-    purrr::walk(
-      ~ test_regional_dataset(
-        source = .$source[[1]],
-        level = .$level[[1]],
-        download = download
+  if (test_regions) {
+    # get datasets for testing
+    sources <- get_available_datasets() %>%
+      dplyr::filter(.data$type %in%
+        c("national", "regional")) %>%
+      dplyr::select(
+        source = class,
+        level_1_region, level_2_region, level_3_region
+      ) %>%
+      tidyr::pivot_longer(
+        cols = -source,
+        names_to = "level",
+        values_to = "regions"
+      ) %>%
+      dplyr::mutate(
+        level = stringr::str_split(level, "_"),
+        level = purrr::map_chr(level, ~ .[2])
+      ) %>%
+      tidyr::drop_na(regions)
+
+    # filter out target datasets
+    if (!is.null(source_of_interest)) {
+      sources <- sources %>%
+        dplyr::filter(source %in% source_of_interest)
+    }
+
+    # apply tests to each data source in turn
+    sources %>%
+      dplyr::rowwise() %>%
+      dplyr::group_split() %>%
+      purrr::walk(
+        ~ test_regional_dataset(
+          source = .$source[[1]],
+          level = .$level[[1]],
+          download = download
+        )
       )
-    )
+  }
 }