diff --git a/NAMESPACE b/NAMESPACE index 5cdf4ccd..737bd862 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -99,6 +99,7 @@ importFrom(magrittr,"%>%") importFrom(memoise,cache_filesystem) importFrom(memoise,memoise) importFrom(purrr,compact) +importFrom(purrr,keep) importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(purrr,map_lgl) diff --git a/NEWS.md b/NEWS.md index 17b3e22f..39790cf5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,10 +5,11 @@ This release is currently under development ## New data sets * Support for level 1 region data in Estonia (thanks to @RichardMN). See `?Estonia` for details. -* Support for level 1 region data in Vietnam (thanks to @biocyberman). See `?Vietname` for details. +* Support for level 1 region data in Vietnam (thanks to @biocyberman). See `?Vietnam` for details. ## Other changes +* Change the data source for Switzerland to draw data from the Swiss Federal Office of Public Health (FOPH) * Updated the package logo to include the newly supported data sets. ## Bug fixes diff --git a/R/Switzerland.R b/R/Switzerland.R index bc296b90..a5d43415 100644 --- a/R/Switzerland.R +++ b/R/Switzerland.R @@ -2,31 +2,6 @@ #' @description Information for downloading, cleaning #' and processing COVID-19 region data for Switzerland #' -#' @section Liechtenstein: -#' Liechtenstein is not a canton of Switzerland, but is presented in the -#' source data as a peer of Swiss cantons and assigned the two letter code -#' `FL`. `covidregionaldata` modifies this and presents the region code -#' for Liechtenstein as `FL-FL`, consistent with the Swiss ISO 3166-2 codes -#' which are of the form `CH-BE`, `CH-ZH`, `CH-VD`, ... -#' -#' If you do not wish to work with Liechtenstein -#' data, filter out on this code. Note that this is labelled as a ISO 3166-2 -#' code but Liechtenstein's real ISO 3166-2 codes refer to sub-national -#' regions. -#' -#' @section Additional data: -#' -#' In addition to the standard `covidregionaldata` columns provided, -#' the OpenDataZH source data provides other figures for ICU occupancy, -#' number of patients on ventilators, and the how many individuals are -#' isolated or quarantined. These columns are passed through unchanged. - -#' Further detail on them can be found at -# nolint start -#' \url{https://github.com/openZH/covid_19/#swiss-cantons-and-principality-of-liechtenstein-unified-dataset} -#' @source \url{https://github.com/openZH/covid_19/} -# nolint end -#' #' @export #' @concept dataset #' @family subnational @@ -48,12 +23,12 @@ Switzerland <- R6::R6Class("Switzerland", supported_region_names = list("1" = "canton"), #' @field supported_region_codes A list of region codes in order of level. supported_region_codes = list("1" = "iso_3166_2"), - #' @field common_data_urls List of named links to raw data. - # nolint start + #' @field common_data_urls List of named links to raw data. This url links + #' to a JSON file which provides the addresses for the most recently-updated + #' CSV files, which are then downloaded. common_data_urls = list( - "main" = "https://github.com/openZH/covid_19/raw/master/COVID19_Fallzahlen_CH_total_v2.csv" + "main" = "https://www.covid19.admin.ch/api/data/context" ), - # nolint end #' @field source_data_cols existing columns within the raw data source_data_cols = c( "hosp_new", @@ -63,10 +38,10 @@ Switzerland <- R6::R6Class("Switzerland", "tested_total" ), #' @field source_text Plain text description of the source of the data - source_text = "Open Data, Canton of Zurich", + source_text = "Swiss Federal Office of Public Health FOPH", #' @field source_url Website address for explanation/introduction of the #' data - source_url = "https://github.com/openZH/covid_19/", + source_url = "https://www.covid19.admin.ch/en/overview", #' @description Set up a table of region codes for clean data #' @importFrom tibble tibble @@ -94,19 +69,71 @@ Switzerland <- R6::R6Class("Switzerland", ) }, + #' @description Download function to get raw data. Downloads + #' the updated list of CSV files using `download_JSON`, filters + #' that to identify the required CSV files, then uses the parent + #' method `download` to download the CSV files. + #' @importFrom purrr keep + download = function() { + message_verbose( + self$verbose, + paste0("Downloading updated URLs from ", self$common_data_urls$main)) + + super$download_JSON() + + self$data_urls <- + self$data$raw$main$data$sources$individual$csv$daily %>% + keep(names(.) %in% c("cases", "test", "death", "hosp")) + + super$download() + }, + #' @description Switzerland specific state level data cleaning - #' @importFrom dplyr select filter mutate left_join rename + #' @importFrom dplyr select filter mutate left_join rename full_join #' @importFrom lubridate as_date ymd #' @importFrom rlang .data #' clean_common = function() { - self$data$clean <- self$data$raw[["main"]] %>% - select(-time, -source) %>% + cases <- self$data$raw$cases %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + cases_new = entries, + cases_total = sumTotal) + hosp <- self$data$raw$hosp %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + hosp_new = entries, + hosp_total = sumTotal) + deaths <- self$data$raw$death %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + deaths_new = entries, + deaths_total = sumTotal) + tests <- self$data$raw$test %>% + filter(geoRegion != "CH", geoRegion != "CHFL", datum_unit == "day") %>% + # note that the data has entries_pos and entries_neg and we're + # currently not using it. + select(geoRegion, datum, entries, sumTotal) %>% + rename(level_1_region_code = geoRegion, + date = datum, + tested_new = entries, + tested_total = sumTotal) + + self$data$clean <- + full_join(cases, deaths, by = c("date", "level_1_region_code")) %>% + full_join(hosp, by = c("date", "level_1_region_code")) %>% + full_join(tests, by = c("date", "level_1_region_code")) %>% mutate( level_1_region_code = if_else( - .data$abbreviation_canton_and_fl == "FL", + .data$level_1_region_code == "FL", "FL-FL", - paste0("CH-", .data$abbreviation_canton_and_fl) + paste0("CH-", .data$level_1_region_code) ), date = as_date(ymd(.data$date)) ) %>% @@ -114,15 +141,7 @@ Switzerland <- R6::R6Class("Switzerland", self$codes_lookup$`1`, by = c("level_1_region_code" = "code") ) %>% - select(-abbreviation_canton_and_fl) %>% - rename( - level_1_region = .data$region, - cases_total = .data$ncumul_conf, - deaths_total = .data$ncumul_deceased, - hosp_new = .data$new_hosp, - recovered_total = .data$ncumul_released, - tested_total = .data$ncumul_tested - ) + rename(level_1_region = region) } ) ) diff --git a/data/all_country_data.rda b/data/all_country_data.rda index 0d177cf3..5e83bf89 100644 Binary files a/data/all_country_data.rda and b/data/all_country_data.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index e3fb521e..4423c8e6 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -48,6 +48,7 @@ filepath flavio Flavio fns +FOPH fromJSON gb geocode diff --git a/man/Switzerland.Rd b/man/Switzerland.Rd index c5ab1201..3f942b05 100644 --- a/man/Switzerland.Rd +++ b/man/Switzerland.Rd @@ -3,38 +3,10 @@ \name{Switzerland} \alias{Switzerland} \title{Switzerland Class for downloading, cleaning and processing notification data} -\source{ -\url{https://github.com/openZH/covid_19/} -} \description{ Information for downloading, cleaning and processing COVID-19 region data for Switzerland } -\section{Liechtenstein}{ - -Liechtenstein is not a canton of Switzerland, but is presented in the -source data as a peer of Swiss cantons and assigned the two letter code -\code{FL}. \code{covidregionaldata} modifies this and presents the region code -for Liechtenstein as \code{FL-FL}, consistent with the Swiss ISO 3166-2 codes -which are of the form \code{CH-BE}, \code{CH-ZH}, \code{CH-VD}, ... - -If you do not wish to work with Liechtenstein -data, filter out on this code. Note that this is labelled as a ISO 3166-2 -code but Liechtenstein's real ISO 3166-2 codes refer to sub-national -regions. -} - -\section{Additional data}{ - - -In addition to the standard \code{covidregionaldata} columns provided, -the OpenDataZH source data provides other figures for ICU occupancy, -number of patients on ventilators, and the how many individuals are -isolated or quarantined. These columns are passed through unchanged. -Further detail on them can be found at -\url{https://github.com/openZH/covid_19/#swiss-cantons-and-principality-of-liechtenstein-unified-dataset} -} - \examples{ \dontrun{ region <- Switzerland$new(verbose = TRUE, steps = TRUE, get = TRUE) @@ -80,7 +52,9 @@ Subnational data sources \item{\code{supported_region_codes}}{A list of region codes in order of level.} -\item{\code{common_data_urls}}{List of named links to raw data.} +\item{\code{common_data_urls}}{List of named links to raw data. This url links +to a JSON file which provides the addresses for the most recently-updated +CSV files, which are then downloaded.} \item{\code{source_data_cols}}{existing columns within the raw data} @@ -95,6 +69,7 @@ data} \subsection{Public methods}{ \itemize{ \item \href{#method-set_region_codes}{\code{Switzerland$set_region_codes()}} +\item \href{#method-download}{\code{Switzerland$download()}} \item \href{#method-clean_common}{\code{Switzerland$clean_common()}} \item \href{#method-clone}{\code{Switzerland$clone()}} } @@ -104,7 +79,6 @@ data} \itemize{ \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="available_regions">}\href{../../covidregionaldata/html/DataClass.html#method-available_regions}{\code{covidregionaldata::DataClass$available_regions()}}\out{</span>} \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="clean">}\href{../../covidregionaldata/html/DataClass.html#method-clean}{\code{covidregionaldata::DataClass$clean()}}\out{</span>} -\item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="download">}\href{../../covidregionaldata/html/DataClass.html#method-download}{\code{covidregionaldata::DataClass$download()}}\out{</span>} \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="download_JSON">}\href{../../covidregionaldata/html/DataClass.html#method-download_JSON}{\code{covidregionaldata::DataClass$download_JSON()}}\out{</span>} \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="filter">}\href{../../covidregionaldata/html/DataClass.html#method-filter}{\code{covidregionaldata::DataClass$filter()}}\out{</span>} \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="get">}\href{../../covidregionaldata/html/DataClass.html#method-get}{\code{covidregionaldata::DataClass$get()}}\out{</span>} @@ -125,6 +99,19 @@ Set up a table of region codes for clean data \if{html}{\out{<div class="r">}}\preformatted{Switzerland$set_region_codes()}\if{html}{\out{</div>}} } +} +\if{html}{\out{<hr>}} +\if{html}{\out{<a id="method-download"></a>}} +\if{latex}{\out{\hypertarget{method-download}{}}} +\subsection{Method \code{download()}}{ +Download function to get raw data. Downloads +the updated list of CSV files using \code{download_JSON}, filters +that to identify the required CSV files, then uses the parent +method \code{download} to download the CSV files. +\subsection{Usage}{ +\if{html}{\out{<div class="r">}}\preformatted{Switzerland$download()}\if{html}{\out{</div>}} +} + } \if{html}{\out{<hr>}} \if{html}{\out{<a id="method-clean_common"></a>}} diff --git a/tests/testthat/custom_data/Switzerland_level_1.rds b/tests/testthat/custom_data/Switzerland_level_1.rds index 0206b40f..0c0fc1be 100644 Binary files a/tests/testthat/custom_data/Switzerland_level_1.rds and b/tests/testthat/custom_data/Switzerland_level_1.rds differ