diff --git a/.github/workflows/pr-checklist.yaml b/.github/workflows/pr-checklist.yaml index 3c292ca4..b3e982c9 100644 --- a/.github/workflows/pr-checklist.yaml +++ b/.github/workflows/pr-checklist.yaml @@ -14,4 +14,4 @@ jobs: issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: '👋 Thanks for opening this pull request! Can you please run through the following checklist before requesting review (ticking as complete or if not relevant). \n\n - [ ] Read our [contribution guidelines](https://github.com/epiforecasts/covidregionaldata/wiki/Contributing) if you have not already done so. \n- [ ] If you have altered an existing class please run the tests locally (using `devtools::load_all(); devtools::test()`) first setting `options(testDownload=TRUE, testSource=class-name)` and report your findings. \n- [ ] If you have added a new data class please run the tests locally for that class (using `devtools::load_all(); devtools::test()`). \n- [ ] Check your code passes our CI checks and review any style and code coverage warnings. \n- [ ] Comment with details if unable to get our CI checks to pass or unable to remove all warnings. \n\nThank you again for the contribution. If making large scale changes consider using our `pre-commit` hooks (see the [contributing guide](https://github.com/epiforecasts/covidregionaldata/wiki/Contributing)) to more easily comply with our guidelines.'}) + body: '👋 Thanks for opening this pull request! Can you please run through the following checklist before requesting review (ticking as complete or if not relevant). \n\n - [ ] Read our [contribution guidelines](https://github.com/epiforecasts/covidregionaldata/wiki/Contributing) if you have not already done so. \n- [ ] If you have altered an existing class please run the tests locally (using `devtools::load_all(); devtools::test()`) first setting `options(testDownload=TRUE, testSource=class-name)` and report your findings. \n- [ ] If you have added a new data class please run the tests locally for that class (using `devtools::load_all(); devtools::test()`). \n- [ ] Check your code passes our CI checks and review any style and code coverage warnings. \n- [ ] Comment with details if unable to get our CI checks to pass or unable to remove all warnings. \n- [ ] Update the [news](https://github.com/epiforecasts/covidregionaldata/blob/master/NEWS.md) file with information on your changes (crediting yourself at the same time) \n\nThank you again for the contribution. If making large scale changes consider using our `pre-commit` hooks (see the [contributing guide](https://github.com/epiforecasts/covidregionaldata/wiki/Contributing)) to more easily comply with our guidelines.'}) diff --git a/DESCRIPTION b/DESCRIPTION index 20e44186..22712616 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -93,7 +93,6 @@ Imports: jsonlite, lifecycle, lubridate, - magrittr, memoise, purrr, R6, @@ -101,11 +100,8 @@ Imports: rlang, stringi, stringr, - tibble, tidyr (>= 1.0.0), - tidyselect, vroom, - withr, xml2 Suggests: ggplot2, diff --git a/NAMESPACE b/NAMESPACE index 5cdf4ccd..35d1f272 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,12 +48,14 @@ importFrom(countrycode,countrycode) importFrom(countrycode,countryname) importFrom(dplyr,"%>%") importFrom(dplyr,across) +importFrom(dplyr,all_of) importFrom(dplyr,arrange) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_rows) importFrom(dplyr,count) importFrom(dplyr,distinct) importFrom(dplyr,do) +importFrom(dplyr,ends_with) importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,full_join) @@ -78,6 +80,7 @@ importFrom(dplyr,summarise) importFrom(dplyr,tally) importFrom(dplyr,tibble) importFrom(dplyr,transmute) +importFrom(dplyr,tribble) importFrom(dplyr,ungroup) importFrom(dplyr,vars) importFrom(httr,GET) @@ -95,10 +98,10 @@ importFrom(lubridate,month) importFrom(lubridate,year) importFrom(lubridate,ymd) importFrom(lubridate,ymd_hms) -importFrom(magrittr,"%>%") importFrom(memoise,cache_filesystem) importFrom(memoise,memoise) importFrom(purrr,compact) +importFrom(purrr,keep) importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(purrr,map_lgl) @@ -113,17 +116,12 @@ importFrom(rlang,"!!") importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,syms) -importFrom(stringi,stri_replace_all) -importFrom(stringi,stri_trans_general) -importFrom(stringi,stri_trim_both) +importFrom(stringr,str_conv) importFrom(stringr,str_detect) importFrom(stringr,str_replace_all) importFrom(stringr,str_to_sentence) importFrom(stringr,str_to_title) -importFrom(tibble,add_column) -importFrom(tibble,as_tibble) -importFrom(tibble,tibble) -importFrom(tibble,tribble) +importFrom(stringr,str_trim) importFrom(tidyr,complete) importFrom(tidyr,drop_na) importFrom(tidyr,fill) @@ -133,13 +131,8 @@ importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) importFrom(tidyr,replace_na) importFrom(tidyr,separate) -importFrom(tidyselect,all_of) -importFrom(tidyselect,ends_with) -importFrom(tidyselect,starts_with) -importFrom(tidyselect,vars_select_helpers) importFrom(utils,download.file) importFrom(utils,untar) importFrom(vroom,vroom) -importFrom(withr,with_envvar) importFrom(xml2,xml_find_first) importFrom(xml2,xml_text) diff --git a/NEWS.md b/NEWS.md index 61e9897b..4a270315 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,8 +9,9 @@ This release is currently under development ## Other changes +* Change the data source for Switzerland to draw data from the Swiss Federal Office of Public Health (FOPH) * Updated the package logo to include the newly supported data sets. - +* Reduced the number of package dependencies (@bisaloo and @RichardMN) ## Bug fixes - Fixed a bug in the data sourced from Germany so that instead of treating it as a line list of individuals it is treated as a relatively finely resolved count data which needs to be summed up (by @sbfnk). diff --git a/R/Belgium.R b/R/Belgium.R index 24b9775d..8207eb74 100644 --- a/R/Belgium.R +++ b/R/Belgium.R @@ -57,13 +57,13 @@ Belgium <- R6::R6Class("Belgium", source_url = "https://epistat.wiv-isp.be/covid/", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble tribble + #' @importFrom dplyr tibble tribble set_region_codes = function() { - self$codes_lookup$`1` <- tibble::tibble( + self$codes_lookup$`1` <- tibble( level_1_region_code = c("BE-BRU", "BE-VLG", "BE-WAL"), level_1_region = c("Brussels", "Flanders", "Wallonia") ) - self$codes_lookup$`2` <- tibble::tribble( + self$codes_lookup$`2` <- tribble( ~level_2_region_code, ~level_2_region, ~level_1_region_code, "BE-VAN", "Antwerpen", "BE-VLG", "BE-WBR", "BrabantWallon", "BE-WAL", @@ -82,8 +82,7 @@ Belgium <- R6::R6Class("Belgium", #' @description Downloads data from source and (for Belgium) #' applies an initial data patch. - #' @importFrom dplyr select mutate filter bind_rows - #' @importFrom tibble tribble + #' @importFrom dplyr select mutate filter bind_rows tribble download = function() { # do the actual downloading using the parent download method super$download() @@ -92,7 +91,7 @@ Belgium <- R6::R6Class("Belgium", # For now, we filter out the broken lines and replace them # with the following data shim - fixed_lines <- tibble::tribble( + fixed_lines <- tribble( ~DATE, ~PROVINCE, ~REGION, ~AGEGROUP, ~SEX, ~CASES, "2020-04-22", "Limburg", "Flanders", "50-59", "F", 10, "2021-02-17", "VlaamsBrabant", "Flanders", "10-19", "M", 12 diff --git a/R/Brazil.R b/R/Brazil.R index 6be4ef3a..ba47b462 100644 --- a/R/Brazil.R +++ b/R/Brazil.R @@ -47,7 +47,7 @@ Brazil <- R6::R6Class("Brazil", source_url = "https://github.com/wcota/covid19br/blob/master/README.en.md", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tribble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup <- tibble( state_name = c( diff --git a/R/Canada.R b/R/Canada.R index c6e3bb4c..b0036a39 100644 --- a/R/Canada.R +++ b/R/Canada.R @@ -44,7 +44,7 @@ Canada <- R6::R6Class("Canada", source_url = "https://open.canada.ca/data/en/dataset/261c32ab-4cfd-4f81-9dea-7b64065690dc", # nolint #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { canada_codes <- tibble( code = c( diff --git a/R/Colombia.R b/R/Colombia.R index c1a3154c..b920f1d9 100644 --- a/R/Colombia.R +++ b/R/Colombia.R @@ -41,7 +41,6 @@ Colombia <- R6::R6Class("Colombia", source_url = "https://github.com/danielcs88/colombia_covid-19/", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble #' @importFrom dplyr mutate set_region_codes = function() { self$codes_lookup$`1` <- covidregionaldata::colombia_codes diff --git a/R/Cuba.R b/R/Cuba.R index b286652a..f6c13c4c 100644 --- a/R/Cuba.R +++ b/R/Cuba.R @@ -42,7 +42,7 @@ Cuba <- R6::R6Class("Cuba", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c( diff --git a/R/Estonia.R b/R/Estonia.R index 564601b8..0cf17a55 100644 --- a/R/Estonia.R +++ b/R/Estonia.R @@ -43,7 +43,7 @@ Estonia <- R6::R6Class("Estonia", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c("EE-37", diff --git a/R/France.R b/R/France.R index 643f6536..5c0fa701 100644 --- a/R/France.R +++ b/R/France.R @@ -50,7 +50,6 @@ France <- R6::R6Class("France", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble #' @importFrom dplyr select set_region_codes = function() { self$codes_lookup$`1` <- france_codes %>% diff --git a/R/Germany.R b/R/Germany.R index 3108357a..67d047c5 100644 --- a/R/Germany.R +++ b/R/Germany.R @@ -44,8 +44,7 @@ Germany <- R6::R6Class("Germany", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble - #' @importFrom dplyr mutate + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c( diff --git a/R/India.R b/R/India.R index 56903124..a1040db3 100644 --- a/R/India.R +++ b/R/India.R @@ -37,7 +37,7 @@ India <- R6::R6Class("India", source_url = "https://www.covid19india.org", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c( diff --git a/R/Italy.R b/R/Italy.R index 30a5b731..18fd963e 100644 --- a/R/Italy.R +++ b/R/Italy.R @@ -42,7 +42,7 @@ Italy <- R6::R6Class("Italy", source_url = "https://github.com/pcm-dpc/COVID-19/blob/master/README_EN.md", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c( diff --git a/R/JHU.R b/R/JHU.R index 4d303593..23e2925c 100644 --- a/R/JHU.R +++ b/R/JHU.R @@ -84,7 +84,6 @@ JHU <- R6::R6Class("JHU", # rename to country name source_url = "https://github.com/CSSEGISandData/COVID-19/", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble set_region_codes = function() { self$codes_lookup$`1` <- JHU_codes }, diff --git a/R/Lithuania.R b/R/Lithuania.R index a1b891fe..db931df0 100644 --- a/R/Lithuania.R +++ b/R/Lithuania.R @@ -184,8 +184,7 @@ Lithuania <- R6::R6Class("Lithuania", #' @description Common data cleaning for both levels #' # nolint start - #' @importFrom dplyr mutate group_by summarise if_else filter select bind_rows rename left_join everything across lead - #' @importFrom tidyselect all_of + #' @importFrom dplyr mutate group_by summarise if_else filter select bind_rows rename left_join everything across lead all_of #' @importFrom lubridate as_date # nolint end clean_common = function() { @@ -244,7 +243,7 @@ Lithuania <- R6::R6Class("Lithuania", # or qualitative) sum_cols <- names(select( self$data$raw$main, - "population":tidyselect::last_col() + "population":dplyr::last_col() )) sum_cols <- sum_cols[!grepl("prc|map_colors", sum_cols)] @@ -345,7 +344,6 @@ Lithuania <- R6::R6Class("Lithuania", #' provided by the source at the level 2 (municipality) regional level. #' #' @importFrom dplyr group_by summarise ungroup full_join across if_else - #' @importFrom tidyselect vars_select_helpers clean_level_1 = function() { self$data$clean <- self$data$clean %>% group_by( @@ -354,7 +352,7 @@ Lithuania <- R6::R6Class("Lithuania", ) %>% summarise( across( - tidyselect::vars_select_helpers$where(is.numeric), + where(is.numeric), sum ) ) %>% diff --git a/R/Mexico.R b/R/Mexico.R index 4c09e291..a94ddfb6 100644 --- a/R/Mexico.R +++ b/R/Mexico.R @@ -54,7 +54,6 @@ Mexico <- R6::R6Class("Mexico", source_url = "https://datos.covid-19.conacyt.mx", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble #' @importFrom dplyr select set_region_codes = function() { self$codes_lookup$`1` <- covidregionaldata::mexico_codes %>% diff --git a/R/Netherlands.R b/R/Netherlands.R index 94a4fc55..a4afdf05 100644 --- a/R/Netherlands.R +++ b/R/Netherlands.R @@ -46,7 +46,6 @@ Netherlands <- R6::R6Class("Netherlands", source_url = "https://data.rivm.nl/covid-19/", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble set_region_codes = function() { }, diff --git a/R/SouthAfrica.R b/R/SouthAfrica.R index 152ca767..47463cf7 100644 --- a/R/SouthAfrica.R +++ b/R/SouthAfrica.R @@ -41,7 +41,7 @@ SouthAfrica <- R6::R6Class("SouthAfrica", source_url = "https://github.com/dsfsi/covid19za", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c( diff --git a/R/Switzerland.R b/R/Switzerland.R index bc296b90..020b7f5b 100644 --- a/R/Switzerland.R +++ b/R/Switzerland.R @@ -2,31 +2,6 @@ #' @description Information for downloading, cleaning #' and processing COVID-19 region data for Switzerland #' -#' @section Liechtenstein: -#' Liechtenstein is not a canton of Switzerland, but is presented in the -#' source data as a peer of Swiss cantons and assigned the two letter code -#' `FL`. `covidregionaldata` modifies this and presents the region code -#' for Liechtenstein as `FL-FL`, consistent with the Swiss ISO 3166-2 codes -#' which are of the form `CH-BE`, `CH-ZH`, `CH-VD`, ... -#' -#' If you do not wish to work with Liechtenstein -#' data, filter out on this code. Note that this is labelled as a ISO 3166-2 -#' code but Liechtenstein's real ISO 3166-2 codes refer to sub-national -#' regions. -#' -#' @section Additional data: -#' -#' In addition to the standard `covidregionaldata` columns provided, -#' the OpenDataZH source data provides other figures for ICU occupancy, -#' number of patients on ventilators, and the how many individuals are -#' isolated or quarantined. These columns are passed through unchanged. - -#' Further detail on them can be found at -# nolint start -#' \url{https://github.com/openZH/covid_19/#swiss-cantons-and-principality-of-liechtenstein-unified-dataset} -#' @source \url{https://github.com/openZH/covid_19/} -# nolint end -#' #' @export #' @concept dataset #' @family subnational @@ -48,12 +23,12 @@ Switzerland <- R6::R6Class("Switzerland", supported_region_names = list("1" = "canton"), #' @field supported_region_codes A list of region codes in order of level. supported_region_codes = list("1" = "iso_3166_2"), - #' @field common_data_urls List of named links to raw data. - # nolint start + #' @field common_data_urls List of named links to raw data. This url links + #' to a JSON file which provides the addresses for the most recently-updated + #' CSV files, which are then downloaded. common_data_urls = list( - "main" = "https://github.com/openZH/covid_19/raw/master/COVID19_Fallzahlen_CH_total_v2.csv" + "main" = "https://www.covid19.admin.ch/api/data/context" ), - # nolint end #' @field source_data_cols existing columns within the raw data source_data_cols = c( "hosp_new", @@ -63,13 +38,13 @@ Switzerland <- R6::R6Class("Switzerland", "tested_total" ), #' @field source_text Plain text description of the source of the data - source_text = "Open Data, Canton of Zurich", + source_text = "Swiss Federal Office of Public Health FOPH", #' @field source_url Website address for explanation/introduction of the #' data - source_url = "https://github.com/openZH/covid_19/", + source_url = "https://www.covid19.admin.ch/en/overview", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup$`1` <- tibble( code = c( @@ -94,19 +69,71 @@ Switzerland <- R6::R6Class("Switzerland", ) }, + #' @description Download function to get raw data. Downloads + #' the updated list of CSV files using `download_JSON`, filters + #' that to identify the required CSV files, then uses the parent + #' method `download` to download the CSV files. + #' @importFrom purrr keep + download = function() { + message_verbose( + self$verbose, + paste0("Downloading updated URLs from ", self$common_data_urls$main)) + + super$download_JSON() + + self$data_urls <- + self$data$raw$main$data$sources$individual$csv$daily %>% + keep(names(.) %in% c("cases", "test", "death", "hosp")) + + super$download() + }, + #' @description Switzerland specific state level data cleaning - #' @importFrom dplyr select filter mutate left_join rename + #' @importFrom dplyr select filter mutate left_join rename full_join #' @importFrom lubridate as_date ymd #' @importFrom rlang .data #' clean_common = function() { - self$data$clean <- self$data$raw[["main"]] %>% - select(-time, -source) %>% + cases <- self$data$raw$cases %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + cases_new = entries, + cases_total = sumTotal) + hosp <- self$data$raw$hosp %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + hosp_new = entries, + hosp_total = sumTotal) + deaths <- self$data$raw$death %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + deaths_new = entries, + deaths_total = sumTotal) + tests <- self$data$raw$test %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + # note that the data has entries_pos and entries_neg and we're + # currently not using it. + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + tested_new = entries, + tested_total = sumTotal) + + self$data$clean <- + full_join(cases, deaths, by = c("date", "level_1_region_code")) %>% + full_join(hosp, by = c("date", "level_1_region_code")) %>% + full_join(tests, by = c("date", "level_1_region_code")) %>% mutate( level_1_region_code = if_else( - .data$abbreviation_canton_and_fl == "FL", + .data$level_1_region_code == "FL", "FL-FL", - paste0("CH-", .data$abbreviation_canton_and_fl) + paste0("CH-", .data$level_1_region_code) ), date = as_date(ymd(.data$date)) ) %>% @@ -114,15 +141,7 @@ Switzerland <- R6::R6Class("Switzerland", self$codes_lookup$`1`, by = c("level_1_region_code" = "code") ) %>% - select(-abbreviation_canton_and_fl) %>% - rename( - level_1_region = .data$region, - cases_total = .data$ncumul_conf, - deaths_total = .data$ncumul_deceased, - hosp_new = .data$new_hosp, - recovered_total = .data$ncumul_released, - tested_total = .data$ncumul_tested - ) + rename(level_1_region = region) } ) ) diff --git a/R/UK.R b/R/UK.R index 05d1d55a..547f4da2 100644 --- a/R/UK.R +++ b/R/UK.R @@ -450,8 +450,7 @@ UK <- R6::R6Class("UK", #' Section 2, "2. Estimated new hospital cases" #' @importFrom lubridate year month #' @importFrom readxl read_excel cell_limits - #' @importFrom tibble as_tibble - #' @importFrom dplyr mutate select %>% group_by summarise left_join + #' @importFrom dplyr mutate select %>% group_by summarise left_join as_tibble #' @importFrom tidyr pivot_longer #' @param clean_data Cleaned UK covid-19 data #' @param nhs_data NHS region data diff --git a/R/USA.R b/R/USA.R index a00b9ece..4ae7a0cf 100644 --- a/R/USA.R +++ b/R/USA.R @@ -49,7 +49,7 @@ USA <- R6::R6Class("USA", source_url = "https://github.com/nytimes/covid-19-data", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tribble + #' @importFrom dplyr tibble set_region_codes = function() { self$codes_lookup <- usa_codes <- tibble( level_1_region_code = c( diff --git a/R/Vietnam.R b/R/Vietnam.R index 12758396..44062df8 100644 --- a/R/Vietnam.R +++ b/R/Vietnam.R @@ -47,7 +47,6 @@ Vietnam <- R6::R6Class("Vietnam", source_url = "https://covid19.ncsc.gov.vn", #' @description Set up a table of region codes for clean data - #' @importFrom tibble tibble set_region_codes = function() { self$codes_lookup$`1` <- covidregionaldata::vietnam_codes }, @@ -65,8 +64,7 @@ Vietnam <- R6::R6Class("Vietnam", #' @importFrom dplyr filter select mutate rename tibble as_tibble full_join #' @importFrom tidyr replace_na drop_na separate #' @importFrom purrr map - #' @importFrom stringi stri_trans_general stri_trim_both stri_replace_all - #' @importFrom stringr str_to_title str_replace_all + #' @importFrom stringr str_conv str_trim str_to_title str_replace_all #' @importFrom lubridate dmy clean_common = function() { # The first three elements of self$data$raw are the data @@ -121,8 +119,8 @@ Vietnam <- R6::R6Class("Vietnam", # #tidyr::drop_na(date, region_name) %>% mutate( - level_1_region = stri_trans_general(level_1_region, "latin-ascii"), - level_1_region = stri_trim_both(level_1_region), + level_1_region = str_conv(level_1_region, "ASCII"), + level_1_region = str_trim(level_1_region, side = "both"), level_1_region = str_replace_all(level_1_region, "\\(.*\\)|-| ", ""), level_1_region = str_to_title(level_1_region), diff --git a/R/get_available_datasets.R b/R/get_available_datasets.R index 90f828e1..171fded0 100644 --- a/R/get_available_datasets.R +++ b/R/get_available_datasets.R @@ -18,7 +18,6 @@ #' @family interface #' @importFrom rlang .data #' @importFrom dplyr select bind_rows filter -#' @importFrom tibble as_tibble #' @export #' @examples #' # see all available datasets diff --git a/R/get_linelist.R b/R/get_linelist.R index b0d6a179..6b5f48ca 100644 --- a/R/get_linelist.R +++ b/R/get_linelist.R @@ -24,7 +24,6 @@ #' @importFrom lifecycle deprecate_warn #' @importFrom dplyr if_else select mutate filter #' @importFrom lubridate dmy -#' @importFrom tibble as_tibble #' @importFrom utils download.file untar #' @examples #' \dontrun{ diff --git a/R/processing.R b/R/processing.R index 66887605..23a0b2ad 100644 --- a/R/processing.R +++ b/R/processing.R @@ -7,7 +7,7 @@ #' @return A tibble with relevant NA columns added #' @family compulsory_processing #' @concept compulsory_processing -#' @importFrom tibble tibble add_column +#' @importFrom dplyr mutate #' @importFrom rlang !!! add_extra_na_cols <- function(data) { expected_col_names <- c( @@ -17,7 +17,7 @@ add_extra_na_cols <- function(data) { new_cols <- rep(list(NA_real_), length(expected_col_names)) names(new_cols) <- expected_col_names - data <- add_column( + data <- mutate( data, !!!new_cols[!(names(new_cols) %in% names(data))] ) @@ -50,9 +50,8 @@ set_negative_values_to_zero <- function(data) { #' @return A tibble with rows of NAs added. #' @family compulsory_processing #' @concept compulsory_processing -#' @importFrom tibble tibble #' @importFrom tidyr complete full_seq nesting -#' @importFrom tidyselect starts_with +#' @importFrom dplyr starts_with #' @importFrom rlang !!! syms fill_empty_dates_with_na <- function(data) { regions <- select(data, starts_with("level_")) %>% @@ -76,7 +75,7 @@ fill_empty_dates_with_na <- function(data) { #' @family compulsory_processing #' @concept compulsory_processing #' @importFrom tidyr fill -#' @importFrom tidyselect all_of +#' @importFrom dplyr all_of complete_cumulative_columns <- function(data) { cumulative_col_names <- c( "deaths_total", "cases_total", "recovered_total", @@ -99,11 +98,9 @@ complete_cumulative_columns <- function(data) { #' @return A data frame with extra columns if required #' @family compulsory_processing #' @concept compulsory_processing -#' @importFrom dplyr mutate group_by_at arrange vars starts_with lag +#' @importFrom dplyr mutate group_by_at arrange vars starts_with lag ends_with #' @importFrom purrr walk2 #' @importFrom tidyr replace_na -#' @importFrom tidyselect ends_with -#' @importFrom tibble tibble #' @importFrom rlang !! := calculate_columns_from_existing_data <- function(data) { possible_counts <- c("cases", "deaths", "hosp", "recovered", "tested") @@ -150,7 +147,6 @@ calculate_columns_from_existing_data <- function(data) { #' @param data A data table #' @return A data table, totalled up #' @importFrom dplyr left_join group_by summarise select arrange -#' @importFrom tibble tibble #' @family optional_processing #' @concept optional_processing totalise_data <- function(data) { @@ -223,10 +219,9 @@ run_optional_processing_fns <- function(data, process_fns) { #' processing steps #' @concept utility #' @family processing -#' @importFrom dplyr do group_by_at across ungroup select everything arrange +#' @importFrom dplyr do group_by_at across ungroup select everything arrange all_of #' @importFrom dplyr rename #' @importFrom tidyr drop_na -#' @importFrom tidyselect all_of #' @importFrom rlang !!! process_internal <- function(clean_data, level, group_vars, totals = FALSE, localise = TRUE, diff --git a/R/shared-methods.R b/R/shared-methods.R index 3b413380..62a88ba8 100644 --- a/R/shared-methods.R +++ b/R/shared-methods.R @@ -351,8 +351,7 @@ DataClass <- R6::R6Class( #' field. #' @param level A character string indicating the level to filter at. #' Defaults to using the `filter_level` field if not specified - #' @importFrom tidyselect all_of - #' @importFrom dplyr select filter pull + #' @importFrom dplyr select filter pull all_of available_regions = function(level) { if (is.null(self$data$clean)) { stop("Data must first be cleaned using the clean method") @@ -505,7 +504,7 @@ DataClass <- R6::R6Class( #' @description Create a table of summary information for the data set #' being processed. - #' @importFrom tibble tibble + #' @importFrom dplyr tibble #' @return Returns a single row summary tibble containing the origin of the #' data source, class, level 1 and 2 region names, the type of data, #' the urls of the raw data and the columns present in the raw data. diff --git a/R/utils.R b/R/utils.R index da992a3d..80083d40 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,19 +1,6 @@ -#' Pipe operator -#' -#' @description See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -#' @name %>% -#' @rdname pipe -#' @keywords internal +#' @importFrom dplyr %>% #' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @param lhs A value or the magrittr placeholder. -#' @param rhs A function call using the magrittr semantics. -#' @return The result of calling `rhs(lhs)`. -NULL - -#' @importFrom rlang .data -rlang::`.data` +dplyr::`%>%` #' Custom CSV reading function #' @@ -26,8 +13,7 @@ rlang::`.data` #' @return A data table #' @importFrom memoise memoise cache_filesystem #' @importFrom vroom vroom -#' @importFrom tibble tibble -#' @importFrom withr with_envvar +#' @importFrom dplyr tibble #' @concept utility csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) { read_csv_fun <- vroom @@ -39,14 +25,35 @@ csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) { } if (verbose) { message("Downloading data from ", file) - data <- read_csv_fun(file, ..., guess_max = guess_max) + data <- read_csv_fun(file, progress = TRUE, ..., guess_max = guess_max) } else { - with_envvar( - new = c("VROOM_SHOW_PROGRESS" = "false"), - data <- suppressWarnings( - suppressMessages( - read_csv_fun(file, ..., guess_max = guess_max) - ) + data <- suppressWarnings( + suppressMessages( + read_csv_fun(file, progress = FALSE, ..., guess_max = guess_max) + ) + ) + } + return(tibble(data)) +} + +#' Custom JSON reading function +#' +#' @description Checks for use of memoise and then uses vroom::vroom. +#' @param file A URL or filepath to a JSON +#' @param ... extra parameters to be passed to jsonlite::fromJSON +#' @inheritParams message_verbose +#' @return A data table +#' @importFrom dplyr tibble +#' @importFrom jsonlite fromJSON +#' @concept utility +json_reader <- function(file, verbose = FALSE, ...) { + if (verbose) { + message("Downloading data from ", file) + data <- fromJSON(file, ...) + } else { + data <- suppressWarnings( + suppressMessages( + fromJSON(file, ...) ) ) } @@ -320,3 +327,7 @@ make_new_data_source <- function(source, type = "subnational", ) make_github_workflow(source) } + +# Hack to work around the fact that `where()` is not exported +# (https://github.com/r-lib/tidyselect/issues/201) +utils::globalVariables("where") diff --git a/README.Rmd b/README.Rmd index feac3205..a42bda15 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( [![R-CMD-check](https://github.com/epiforecasts/covidregionaldata/workflows/R-CMD-check/badge.svg)](https://github.com/epiforecasts/covidregionaldata/actions) [![Codecov test coverage](https://codecov.io/gh/epiforecasts/covidregionaldata/branch/master/graph/badge.svg)](https://codecov.io/gh/epiforecasts/covidregionaldata?branch=master) [![Data status](https://img.shields.io/badge/Data-status-lightblue.svg?style=flat)](https://epiforecasts.io/covidregionaldata/articles/supported-countries.html) [![metacran downloads](http://cranlogs.r-pkg.org/badges/grand-total/covidregionaldata?color=ff69b4)](https://cran.r-project.org/package=covidregionaldata) -[![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/epiforecasts/covidregionaldata/blob/master/LICENSE.md/) [![GitHub contributors](https://img.shields.io/github/contributors/epiforecasts/covidregionaldata)](https://github.com/epiforecasts/covidregionaldata/graphs/contributors) [![Discord](https://img.shields.io/discord/864828485981306890?logo=Discord)](https://discord.gg/9YPDDADVt3) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-yellow.svg)](https://makeapullrequest.com/) [![GitHub commits](https://img.shields.io/github/commits-since/epiforecasts/covidregionaldata/0.9.2.svg?color=orange)](https://GitHub.com/epiforecasts/covidregionaldata/commit/master/) +[![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/epiforecasts/covidregionaldata/blob/master/LICENSE.md/) [![GitHub contributors](https://img.shields.io/github/contributors/epiforecasts/covidregionaldata)](https://github.com/epiforecasts/covidregionaldata/graphs/contributors) [![Discord](https://img.shields.io/discord/864828485981306890?logo=Discord)](https://discord.gg/9YPDDADVt3) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-yellow.svg)](https://makeapullrequest.com/) [![GitHub commits](https://img.shields.io/github/commits-since/epiforecasts/covidregionaldata/0.9.2.svg?color=orange)](https://GitHub.com/epiforecasts/covidregionaldata/commit/master/) [![JOSS](https://joss.theoj.org/papers/10.21105/joss.03290/status.svg)](https://doi.org/10.21105/joss.03290) [![Zenodo](https://zenodo.org/badge/271601189.svg)](https://zenodo.org/badge/latestdoi/271601189) diff --git a/_pkgdown.yml b/_pkgdown.yml index 27a7fa1e..da51ab70 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: epiforecasts.io/covidregionaldata/ +url: https://epiforecasts.io/covidregionaldata/ template: bootstrap: 5 params: diff --git a/inst/WORDLIST b/inst/WORDLIST index 5a6b0080..abf97ef6 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -46,6 +46,7 @@ filepath flavio Flavio fns +FOPH fromJSON gb geocode diff --git a/man/Switzerland.Rd b/man/Switzerland.Rd index c5ab1201..3f942b05 100644 --- a/man/Switzerland.Rd +++ b/man/Switzerland.Rd @@ -3,38 +3,10 @@ \name{Switzerland} \alias{Switzerland} \title{Switzerland Class for downloading, cleaning and processing notification data} -\source{ -\url{https://github.com/openZH/covid_19/} -} \description{ Information for downloading, cleaning and processing COVID-19 region data for Switzerland } -\section{Liechtenstein}{ - -Liechtenstein is not a canton of Switzerland, but is presented in the -source data as a peer of Swiss cantons and assigned the two letter code -\code{FL}. \code{covidregionaldata} modifies this and presents the region code -for Liechtenstein as \code{FL-FL}, consistent with the Swiss ISO 3166-2 codes -which are of the form \code{CH-BE}, \code{CH-ZH}, \code{CH-VD}, ... - -If you do not wish to work with Liechtenstein -data, filter out on this code. Note that this is labelled as a ISO 3166-2 -code but Liechtenstein's real ISO 3166-2 codes refer to sub-national -regions. -} - -\section{Additional data}{ - - -In addition to the standard \code{covidregionaldata} columns provided, -the OpenDataZH source data provides other figures for ICU occupancy, -number of patients on ventilators, and the how many individuals are -isolated or quarantined. These columns are passed through unchanged. -Further detail on them can be found at -\url{https://github.com/openZH/covid_19/#swiss-cantons-and-principality-of-liechtenstein-unified-dataset} -} - \examples{ \dontrun{ region <- Switzerland$new(verbose = TRUE, steps = TRUE, get = TRUE) @@ -80,7 +52,9 @@ Subnational data sources \item{\code{supported_region_codes}}{A list of region codes in order of level.} -\item{\code{common_data_urls}}{List of named links to raw data.} +\item{\code{common_data_urls}}{List of named links to raw data. This url links +to a JSON file which provides the addresses for the most recently-updated +CSV files, which are then downloaded.} \item{\code{source_data_cols}}{existing columns within the raw data} @@ -95,6 +69,7 @@ data} \subsection{Public methods}{ \itemize{ \item \href{#method-set_region_codes}{\code{Switzerland$set_region_codes()}} +\item \href{#method-download}{\code{Switzerland$download()}} \item \href{#method-clean_common}{\code{Switzerland$clean_common()}} \item \href{#method-clone}{\code{Switzerland$clone()}} } @@ -104,7 +79,6 @@ data} \itemize{ \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-available_regions}{\code{covidregionaldata::DataClass$available_regions()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-clean}{\code{covidregionaldata::DataClass$clean()}}\out{} -\item \out{}\href{../../covidregionaldata/html/DataClass.html#method-download}{\code{covidregionaldata::DataClass$download()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-download_JSON}{\code{covidregionaldata::DataClass$download_JSON()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-filter}{\code{covidregionaldata::DataClass$filter()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-get}{\code{covidregionaldata::DataClass$get()}}\out{} @@ -125,6 +99,19 @@ Set up a table of region codes for clean data \if{html}{\out{
}}\preformatted{Switzerland$set_region_codes()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-download}{}}} +\subsection{Method \code{download()}}{ +Download function to get raw data. Downloads +the updated list of CSV files using \code{download_JSON}, filters +that to identify the required CSV files, then uses the parent +method \code{download} to download the CSV files. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Switzerland$download()}\if{html}{\out{
}} +} + } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/pipe.Rd b/man/pipe.Rd deleted file mode 100644 index 5fa90fe3..00000000 --- a/man/pipe.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{\%>\%} -\alias{\%>\%} -\title{Pipe operator} -\usage{ -lhs \%>\% rhs -} -\arguments{ -\item{lhs}{A value or the magrittr placeholder.} - -\item{rhs}{A function call using the magrittr semantics.} -} -\value{ -The result of calling \code{rhs(lhs)}. -} -\description{ -See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -} -\keyword{internal} diff --git a/man/reexports.Rd b/man/reexports.Rd index 3b0d9eb6..bd44fd4a 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -3,7 +3,7 @@ \docType{import} \name{reexports} \alias{reexports} -\alias{.data} +\alias{\%>\%} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -11,6 +11,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{rlang}{\code{\link[rlang:tidyeval-data]{.data}}} + \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} }} diff --git a/tests/testthat/custom_tests/mock_data.R b/tests/testthat/custom_tests/mock_data.R index 7262ead6..4b09b694 100644 --- a/tests/testthat/custom_tests/mock_data.R +++ b/tests/testthat/custom_tests/mock_data.R @@ -15,7 +15,7 @@ get_expected_data_for_get_regional_data_tests_only_level_1_regions <- function() dates <- c("2020-01-31", "2020-02-01", "2020-02-02", "2020-02-03", "2020-02-04", "2020-02-05") provinces <- c("Northland", "Eastland", "Southland", "Westland", "Virginia") ## Fake region codes - region_codes <- tibble::tibble(iso_3166_2 = c("NO", "EA", "SO", "WE", "VA"), region = provinces) + region_codes <- dplyr::tibble(iso_3166_2 = c("NO", "EA", "SO", "WE", "VA"), region = provinces) expected_data_for_provinces <- list() for (i in 1:length(provinces)) { @@ -87,7 +87,7 @@ get_expected_data_for_get_regional_data_tests_only_level_1_regions <- function() ) %>% dplyr::arrange(date, province) - return(tibble::tibble(expected_data)) + return(dplyr::tibble(expected_data)) } get_input_data_for_get_regional_data_tests_only_level_1_regions <- function() { @@ -109,7 +109,7 @@ get_expected_totals_data_for_get_regional_data_tests_only_level_1_regions <- fun colnames(totals_data) <- c("province", "iso_3166_2", "cases_total", "deaths_total", "recovered_total", "hosp_total", "tested_total") totals_data <- totals_data %>% dplyr::arrange(-cases_total) - return(tibble::tibble(totals_data)) + return(dplyr::tibble(totals_data)) } @@ -117,7 +117,7 @@ get_expected_totals_data_for_get_regional_data_tests_only_level_1_regions <- fun get_input_data_for_get_regional_data_tests_with_level_2_regions <- function() { data <- get_input_data_for_get_regional_data_tests_only_level_1_regions() colnames(data)[2] <- "level_2_region" - regions_table <- tibble::tibble( + regions_table <- dplyr::tibble( level_2_region = c("Northland", "Eastland", "Southland", "Westland", "Virginia"), level_1_region = c("Oneland", "Oneland", "Twoland", "Twoland", "USA") ) @@ -137,11 +137,11 @@ get_expected_data_for_get_regional_data_tests_with_level_2_regions <- function() data <- get_expected_data_for_get_regional_data_tests_only_level_1_regions() data <- data[, -3] data$region <- rep(c("Oneland", "Oneland", "Twoland", "USA", "Twoland"), 6) - region_codes <- tibble::tibble( + region_codes <- dplyr::tibble( iso_3166_2 = c("ON", "TW", "US"), region = c("Oneland", "Twoland", "USA") ) - level_2_region_codes <- tibble::tibble( + level_2_region_codes <- dplyr::tibble( iso_3166_2_province = c("NO", "EA", "SO", "WE", "VA"), region = c( "Northland", "Eastland", "Southland", @@ -168,11 +168,11 @@ get_expected_totals_data_for_get_regional_data_tests_with_level_2_regions <- fun data <- data[, -2] data$region <- c("Oneland", "USA", "Twoland", "Twoland", "Oneland") - region_codes <- tibble::tibble( + region_codes <- dplyr::tibble( iso_3166_2 = c("ON", "TW", "US"), region = c("Oneland", "Twoland", "USA") ) - level_2_region_codes <- tibble::tibble( + level_2_region_codes <- dplyr::tibble( iso_3166_2_province = c("NO", "EA", "SO", "WE", "VA"), region = c( "Northland", "Eastland", "Southland", @@ -188,7 +188,7 @@ get_expected_totals_data_for_get_regional_data_tests_with_level_2_regions <- fun recovered_total, hosp_total, tested_total ) - return(tibble::tibble(data)) + return(dplyr::tibble(data)) } @@ -199,7 +199,7 @@ get_expected_data_for_fill_empty_dates_with_na_test <- function() { dates <- c("2020-01-31", "2020-02-01", "2020-02-02", "2020-02-03") regions <- c("Northland", "Eastland", "Wisconsin") - region_codes <- tibble::tibble( + region_codes <- dplyr::tibble( region = regions, level_1_region_code = c("NO", "EA", "WI") ) @@ -213,7 +213,7 @@ get_expected_data_for_fill_empty_dates_with_na_test <- function() { dplyr::arrange(date, level_1_region) %>% dplyr::left_join(region_codes, by = c("level_1_region" = "region")) expected_data$cases <- c(1:5, rep(NA, 4), 10:12) - return(tibble::tibble(expected_data)) + return(dplyr::tibble(expected_data)) } get_input_data_for_complete_cumulative_columns_test <- function() { @@ -239,5 +239,5 @@ get_expected_data_for_complete_cumulative_columns_test <- function() { full_data_with_cum_cases_filled <- cbind(full_data_with_cum_cases_filled, as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15))) colnames(full_data_with_cum_cases_filled)[5] <- "cases_total" - return(tibble::tibble(full_data_with_cum_cases_filled)) + return(dplyr::tibble(full_data_with_cum_cases_filled)) } diff --git a/tests/testthat/test-csv_reader.R b/tests/testthat/test-csv_reader.R index c611980a..bc166e27 100644 --- a/tests/testthat/test-csv_reader.R +++ b/tests/testthat/test-csv_reader.R @@ -1,5 +1,5 @@ test_path <- "custom_data/mtcars.csv" -target <- tibble::as_tibble(head(mtcars)) +target <- dplyr::as_tibble(head(mtcars)) test_that("csv_reader can read in a simple dataset", { test <- csv_reader(test_path) diff --git a/tests/testthat/test-processing.R b/tests/testthat/test-processing.R index e40cbc08..10fdcc3c 100644 --- a/tests/testthat/test-processing.R +++ b/tests/testthat/test-processing.R @@ -12,31 +12,31 @@ test_that("default functions are called", { "add_extra_na_cols", function(x) dplyr::mutate(x, A = A + 2), ) - x <- tibble::tibble(A = c(1, 2, 3)) - expected <- tibble::tibble("A" = c(4, 5, 6)) + x <- dplyr::tibble(A = c(1, 2, 3)) + expected <- dplyr::tibble("A" = c(4, 5, 6)) expect_identical(expected, run_default_processing_fns(x)) }) test_that("optional functions can be empty", { - x <- tibble::tibble(A = c(1, 2, 3)) + x <- dplyr::tibble(A = c(1, 2, 3)) expect_identical(x, run_optional_processing_fns(x, c())) expect_identical(x, run_optional_processing_fns(x)) expect_identical(x, run_optional_processing_fns(x, NULL)) }) test_that("optional functions run", { - x <- tibble::tibble(A = c(1, 2, 3)) + x <- dplyr::tibble(A = c(1, 2, 3)) process_fns <- c(function(x) { return(dplyr::mutate(x, A = A^2)) }) expect_identical( - tibble::tibble(A = c(1, 4, 9)), + dplyr::tibble(A = c(1, 4, 9)), run_optional_processing_fns(x, process_fns) ) }) test_that("calculate_columns_from_existing_data returns correct results", { - input_data <- tibble::tibble( + input_data <- dplyr::tibble( "date" = seq.Date(as.Date("2020-01-01"), as.Date("2020-01-07"), by = 1), "level_1_region" = c(rep("A", 4), rep("B", 3)), "cases_new" = c(0, 1, NA_integer_, 1, 1, 1, 1), @@ -68,10 +68,10 @@ test_that("add_extra_na_cols is working", { test_that("set_negative_values_to_zero works", { dates <- c(rep(Sys.Date(), 100)) values <- 49:-50 - df <- tibble::tibble(date = dates, cases_total = values) + df <- dplyr::tibble(date = dates, cases_total = values) colnames(df) <- c("date", "cases_total") - df_expected <- tibble::tibble(date = dates, cases_total = c(49:0, rep(0, 50))) + df_expected <- dplyr::tibble(date = dates, cases_total = c(49:0, rep(0, 50))) df_actual <- set_negative_values_to_zero(df) expect_equal(df_actual, df_expected) })