Skip to content

Commit

Permalink
Merge branch 'master' into fix-netherlands-430-remove-hospitalisation
Browse files Browse the repository at this point in the history
  • Loading branch information
seabbs authored Feb 5, 2022
2 parents c048eb8 + f54b220 commit d1b7997
Show file tree
Hide file tree
Showing 14 changed files with 111 additions and 68 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: covidregionaldata
Title: Subnational Data for COVID-19 Epidemiology
Version: 0.9.2.3000
Version: 0.9.2.4000
Authors@R:
c(person(given = "Joseph",
family = "Palmer",
Expand Down Expand Up @@ -98,6 +98,7 @@ Imports:
R6,
readxl,
rlang,
stringi,
stringr,
tidyr (>= 1.0.0),
vroom,
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_vars)
importFrom(dplyr,if_else)
importFrom(dplyr,lag)
importFrom(dplyr,last_col)
Expand Down Expand Up @@ -116,12 +117,12 @@ importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,syms)
importFrom(stringr,str_conv)
importFrom(stringi,stri_trans_general)
importFrom(stringi,stri_trim_both)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_sentence)
importFrom(stringr,str_to_title)
importFrom(stringr,str_trim)
importFrom(tidyr,complete)
importFrom(tidyr,drop_na)
importFrom(tidyr,fill)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This release is currently under development

## Bug fixes

- Fixed a bug in `fill_empty_dates_with_na()` caused by changes made in version `1.2.0` of `tidyr`.
- Fixed a bug in the data sourced from Germany so that instead of treating it as a line list of individuals it is treated as a relatively finely resolved count data which needs to be summed up (by @sbfnk).

# covidregionaldata 0.9.2
Expand Down
41 changes: 22 additions & 19 deletions R/Vietnam.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ Vietnam <- R6::R6Class("Vietnam",
#' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
#' @importFrom tidyr replace_na drop_na separate
#' @importFrom purrr map
#' @importFrom stringr str_conv str_trim str_to_title str_replace_all
#' @importFrom stringr str_to_title str_replace_all
#' @importFrom stringi stri_trans_general stri_trim_both
#' @importFrom lubridate dmy
clean_common = function() {
# The first three elements of self$data$raw are the data
Expand All @@ -77,31 +78,33 @@ Vietnam <- R6::R6Class("Vietnam",
function(x) as_tibble(unlist(x),
rownames = "date")),
function(y) {
y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
separate(y, date, sep = "[.]+", into = c(NA, "province", "date"))
}
)
self$data$clean <- full_join(
full_join(
flat_all$case_by_time, flat_all$death_by_time,
by = c("province", "date"),
suffix = c(".cases", ".deaths"),
copy = TRUE
),
flat_all$recovered_by_time,
by = c("province", "date"),
suffix = c("", ".recovered"),
copy = TRUE
) %>%
index_cols <- bind_rows(
select(flat_all$case_by_time, "date", "province"),
select(flat_all$death_by_time, "date", "province"),
select(flat_all$recovered_by_time, "date", "province")) %>%
unique()

self$data$clean <- index_cols %>%
left_join(rename(flat_all$case_by_time, cases_total = value),
by = c("province", "date") ) %>%
left_join(rename(flat_all$death_by_time, deaths_total = value),
by = c("province", "date") ) %>%
left_join(rename(flat_all$recovered_by_time, recovered_total = value),
by = c("province", "date") ) %>%
# The api uses integer codes for provinces which do not
# line up with ISO 3166-2 (some of which are not numbers)
# so we use this as a temporary code to line names up
# with data.
select(
ncsc_region_code = province,
date,
cases_total = value.cases,
deaths_total = value.deaths,
recovered_total = value) %>%
cases_total,
deaths_total,
recovered_total
) %>%
mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
left_join(
self$data$raw$provinces %>%
Expand All @@ -119,8 +122,8 @@ Vietnam <- R6::R6Class("Vietnam",
#
#tidyr::drop_na(date, region_name) %>%
mutate(
level_1_region = str_conv(level_1_region, "ASCII"),
level_1_region = str_trim(level_1_region, side = "both"),
level_1_region = stri_trans_general(level_1_region, "ASCII"),
level_1_region = stri_trim_both(level_1_region),
level_1_region = str_replace_all(level_1_region,
"\\(.*\\)|-| ", ""),
level_1_region = str_to_title(level_1_region),
Expand Down
13 changes: 9 additions & 4 deletions R/processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,21 @@ set_negative_values_to_zero <- function(data) {
#' @family compulsory_processing
#' @concept compulsory_processing
#' @importFrom tidyr complete full_seq nesting
#' @importFrom dplyr starts_with
#' @importFrom dplyr starts_with group_by ungroup group_vars
#' @importFrom rlang !!! syms
fill_empty_dates_with_na <- function(data) {
regions <- select(data, starts_with("level_")) %>%
names()

groups <- group_vars(data)

data <- data %>%
ungroup() %>%
complete(
date = full_seq(data$date, period = 1),
nesting(!!!syms(regions))
)
) %>%
group_by(across(.cols = all_of(groups)))
return(data)
}

Expand Down Expand Up @@ -258,9 +263,9 @@ process_internal <- function(clean_data, level, group_vars,
"hosp_new", "hosp_total", "tested_new", "tested_total"
)),
everything()
) %>%
arrange(.data$date, all_of(group_vars_standard[1]))
)
}

dat <- ungroup(dat)

if (localise) {
Expand Down
3 changes: 2 additions & 1 deletion R/shared-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,8 @@ CountryDataClass <- R6::R6Class("CountryDataClass",
if (!is.null(self$target_regions)) {
self$target_regions <- countryname(
self$target_regions,
destination = "country.name.en"
destination = "country.name.en",
warn = FALSE
)
if (all(is.na(self$target_regions))) {
stop("No countries found with target names")
Expand Down
3 changes: 2 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
url: https://epiforecasts.io/covidregionaldata/
template:
bootstrap: 5
package: preferably
params:
bootswatch: lumen
toggle: manual
docsearch:
api_key: 721a43acf6af66699c04bd8b2af75ff1
index_name: epiforecasts
Expand Down
Binary file modified tests/testthat/custom_data/ecdc.rds
Binary file not shown.
Binary file modified tests/testthat/custom_data/mexico_level_1_snap.rds
Binary file not shown.
Binary file modified tests/testthat/custom_data/mexico_level_2_snap.rds
Binary file not shown.
Binary file modified tests/testthat/custom_data/who.rds
Binary file not shown.
18 changes: 14 additions & 4 deletions tests/testthat/custom_tests/mock_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,16 @@ get_expected_data_for_fill_empty_dates_with_na_test <- function() {
# full data is data with all dates/regions + some NAs in the cases column
expected_data <- data.frame(expand.grid(dates, regions))
colnames(expected_data) <- c("date", "level_1_region")

expected_data$date <- as.Date(expected_data$date)
expected_data$level_1_region <- as.character(expected_data$level_1_region)
expected_data <- expected_data %>%
dplyr::arrange(date, level_1_region) %>%
dplyr::left_join(region_codes, by = c("level_1_region" = "region"))
expected_data$cases <- c(1:5, rep(NA, 4), 10:12)
expected_data <- dplyr::select(
expected_data, date, level_1_region, level_1_region_code, everything()
)
return(dplyr::tibble(expected_data))
}

Expand All @@ -234,10 +238,16 @@ get_expected_data_for_complete_cumulative_columns_test <- function() {
partial_data <- expected_data[-c(6:9), ]

# manually add cumulative cases to get expected data
full_data_with_cum_cases_filled <- covidregionaldata:::fill_empty_dates_with_na(partial_data)
full_data_with_cum_cases_filled <- dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date)
full_data_with_cum_cases_filled <- cbind(full_data_with_cum_cases_filled, as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15)))
full_data_with_cum_cases_filled <- partial_data %>%
covidregionaldata:::fill_empty_dates_with_na()
full_data_with_cum_cases_filled <-
dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date)
full_data_with_cum_cases_filled <-
cbind(
full_data_with_cum_cases_filled,
as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15))
)
colnames(full_data_with_cum_cases_filled)[5] <- "cases_total"

return(dplyr::tibble(full_data_with_cum_cases_filled))
return(full_data_with_cum_cases_filled)
}
17 changes: 13 additions & 4 deletions tests/testthat/test-processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,28 @@ test_that("fill_empty_dates_with_na fills empty dates with NA", {
expected_data <- get_expected_data_for_fill_empty_dates_with_na_test()
# partial data deletes some rows (i.e. gets rid of some dates - all the ones
# with NA in cases)
expected_data <- expected_data

partial_data <- expected_data[-c(6:9), ]
expect_equal(fill_empty_dates_with_na(partial_data), expected_data)
expect_equal(
fill_empty_dates_with_na(partial_data),
expected_data
)
expected_data <- dplyr::mutate(
expected_data,
level_2_region = level_1_region,
level_2_region_code = level_1_region_code
) %>%
group_by(level_2_region, level_2_region_code) %>%
select(
date, level_2_region, level_2_region_code,
level_1_region, level_1_region_code, cases
date, level_1_region, level_1_region_code,
level_2_region, level_2_region_code, cases
)
partial_data <- expected_data[-c(6:9), ]
expect_equal(fill_empty_dates_with_na(partial_data), expected_data)
expect_equal(
fill_empty_dates_with_na(partial_data),
expected_data
)
})

test_that("complete_cumulative_columns works", {
Expand Down
75 changes: 43 additions & 32 deletions tests/testthat/test-regional-datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,40 +19,51 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
download <- getOption("testDownload")
}

# get datasets for testing
sources <- get_available_datasets() %>%
dplyr::filter(.data$type %in%
c("national", "regional")) %>%
dplyr::select(
source = class,
level_1_region, level_2_region, level_3_region
) %>%
tidyr::pivot_longer(
cols = -source,
names_to = "level",
values_to = "regions"
) %>%
dplyr::mutate(
level = stringr::str_split(level, "_"),
level = purrr::map_chr(level, ~ .[2])
) %>%
tidyr::drop_na(regions)

# filter out target datasets
if (!is.null(source_of_interest)) {
sources <- sources %>%
dplyr::filter(source %in% source_of_interest)
test_regions <- TRUE
}else{
test_regions <- FALSE
}
if (!is.null(getOption("testRegions"))) {
test_regions <- getOption("testRegions")
}

# apply tests to each data source in turn
sources %>%
dplyr::rowwise() %>%
dplyr::group_split() %>%
purrr::walk(
~ test_regional_dataset(
source = .$source[[1]],
level = .$level[[1]],
download = download
if (test_regions) {
# get datasets for testing
sources <- get_available_datasets() %>%
dplyr::filter(.data$type %in%
c("national", "regional")) %>%
dplyr::select(
source = class,
level_1_region, level_2_region, level_3_region
) %>%
tidyr::pivot_longer(
cols = -source,
names_to = "level",
values_to = "regions"
) %>%
dplyr::mutate(
level = stringr::str_split(level, "_"),
level = purrr::map_chr(level, ~ .[2])
) %>%
tidyr::drop_na(regions)

# filter out target datasets
if (!is.null(source_of_interest)) {
sources <- sources %>%
dplyr::filter(source %in% source_of_interest)
}

# apply tests to each data source in turn
sources %>%
dplyr::rowwise() %>%
dplyr::group_split() %>%
purrr::walk(
~ test_regional_dataset(
source = .$source[[1]],
level = .$level[[1]],
download = download
)
)
)
}
}

0 comments on commit d1b7997

Please sign in to comment.