Skip to content

Commit

Permalink
Merge pull request epiforecasts#413 from biocyberman/vietnam
Browse files Browse the repository at this point in the history
Add subnational data for Vietnam
  • Loading branch information
seabbs authored Sep 27, 2021
2 parents c59154d + d1232f1 commit 73784b7
Show file tree
Hide file tree
Showing 49 changed files with 647 additions and 49 deletions.
3 changes: 2 additions & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ RUN install2.r --error --skipinstalled --repos ${CRAN} --ncpus -1 \
RUN apt-get update \
&& export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends libgdal-dev \
libudunits2-dev libharfbuzz-dev libfribidi-dev
libudunits2-dev libharfbuzz-dev libfribidi-dev \
libjq-dev libprotobuf-dev

# install dependencies
COPY DESCRIPTION /tmp/package/DESCRIPTION
Expand Down
4 changes: 3 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
"extensions": [
"ikuyadeu.r",
"reditorsupport.r-lsp",
"shan.code-settings-sync"
"shan.code-settings-sync",
"searking.preview-vscode",
"tht13.html-preview-vscode"
],

// Use 'forwardPorts' to make a list of ports inside the container available locally.
Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/Vietnam.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
on:
schedule:
- cron: '36 12 * * *'
workflow_dispatch:

name: Vietnam

jobs:
Vietnam:
runs-on: macOS-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@v1

- name: Query dependencies
run: |
install.packages('remotes')
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

- name: Cache R packages
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-

- name: Install dependencies
run: |
install.packages(c("remotes"))
remotes::install_deps(dependencies = TRUE)
install.packages("devtools")
shell: Rscript {0}

- name: Install package
run: R CMD INSTALL .

- name: Test dataset
run: |
options("testDownload" = TRUE)
options("testSource" = "Vietnam")
devtools::load_all()
testthat::test_file("tests/testthat/test-regional-datasets.R", reporter = c("summary", "fail"))
shell: Rscript {0}
8 changes: 7 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: covidregionaldata
Title: Subnational Data for COVID-19 Epidemiology
Version: 0.9.2.2000
Version: 0.9.2.3000
Authors@R:
c(person(given = "Joseph",
family = "Palmer",
Expand Down Expand Up @@ -55,6 +55,10 @@ Authors@R:
family = "Gruson",
role = "ctb",
comment = c(ORCID = "0000-0002-4094-1476")),
person(given = "Vang",
family = "Le",
role = "ctb",
comment = c(URL = "https://github.com/biocyberman")),
person(given = "Sebastian",
family = "Funk",
role = "aut",
Expand Down Expand Up @@ -86,6 +90,7 @@ Imports:
countrycode (>= 1.2.0),
dplyr,
httr,
jsonlite,
lifecycle,
lubridate,
magrittr,
Expand All @@ -94,6 +99,7 @@ Imports:
R6,
readxl,
rlang,
stringi,
stringr,
tibble,
tidyr (>= 1.0.0),
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export(SouthAfrica)
export(Switzerland)
export(UK)
export(USA)
export(Vietnam)
export(WHO)
export(expect_clean_cols)
export(expect_columns_contain_data)
Expand All @@ -48,6 +49,7 @@ importFrom(countrycode,countryname)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,bind_rows)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
Expand All @@ -74,13 +76,15 @@ importFrom(dplyr,slice_tail)
importFrom(dplyr,starts_with)
importFrom(dplyr,summarise)
importFrom(dplyr,tally)
importFrom(dplyr,tibble)
importFrom(dplyr,transmute)
importFrom(dplyr,ungroup)
importFrom(dplyr,vars)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(httr,status_code)
importFrom(jsonlite,fromJSON)
importFrom(lifecycle,deprecate_warn)
importFrom(lifecycle,deprecated)
importFrom(lifecycle,is_present)
Expand Down Expand Up @@ -109,6 +113,9 @@ importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,syms)
importFrom(stringi,stri_replace_all)
importFrom(stringi,stri_trans_general)
importFrom(stringi,stri_trim_both)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_sentence)
Expand All @@ -125,6 +132,7 @@ importFrom(tidyr,nesting)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,replace_na)
importFrom(tidyr,separate)
importFrom(tidyselect,all_of)
importFrom(tidyselect,ends_with)
importFrom(tidyselect,starts_with)
Expand Down
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ This release is currrently under development

## New data sets

- Support for level 1 region data in Estonia (thanks to @RichardMN). See `?Estonia` for details.
- Support for level 1 region data in Estonia (thanks to @RichardMN). See `?Estonia` for details.
- Support for level 1 region data in Vietnam (thanks to @biocyberman). See `?Vietname` for details.

# covidregionaldata 0.9.2

This release adds support for the Covid19 Data Hub which includes Google and Apple mobility data amongst a large range of other data sets, data from the European Commission's Joint Research Centre which is at both the regional and national level, and individual sources for regional data from several countries. Package updates have been made in line with a software review at the [Journal of Open Source Software](https://github.com/openjournals/joss-reviews/issues/3290). Finally, this release exposes more of the testing infrastructure to users and adds a package hexsticker.
This release adds support for the Covid19 Data Hub which includes Google and Apple mobility data amongst a large range of other data sets, data from the European Commission's Joint Research Centre which is at both the regional and national level, and individual sources for regional data from several countries. Package updates have been made in line with a software review at the [Journal of Open Source Software](https://github.com/openjournals/joss-reviews/issues/3290). Finally, this release exposes more of the testing infrastructure to users and adds a package hexsticker.

Thanks to @joseph-palmer, @RichardMN, and @kathsherratt for contributions towards this release.

Expand Down
137 changes: 137 additions & 0 deletions R/Vietnam.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#' Vietnam Class for downloading, cleaning and processing
#' notification data
#'
#' @description Information for downloading, cleaning
#' and processing covid-19 region data for Vietnam.
#'
#' @source \url{https://covid.ncsc.gov.vn}
#' @export
#' @concept dataset
#' @family subnational
#' @examples
#' \dontrun{
#' region <- Vietnam$new(verbose = TRUE, steps = TRUE, get = TRUE)
#' region$return()
#' }
Vietnam <- R6::R6Class("Vietnam",
inherit = DataClass,
public = list(

# Core Attributes (amend each parameter for country specific information)
#' @field origin name of country to fetch data for
origin = "Vietnam",
#' @field supported_levels List of supported levels.
supported_levels = list("1"),
#' @field supported_region_names List of region names in order of level.
supported_region_names = list("1" = "region"),
#' @field supported_region_codes List of region codes in order of level.
supported_region_codes = list("1" = "iso_3166_2"),
#' @field common_data_urls List of named links to raw data.
common_data_urls = list(
# nolint start
"case_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time",
"death_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time",
"recovered_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time",
"provinces" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces"
# nolint end
),
#' @field source_data_cols existing columns within the raw data
source_data_cols = c(
"cases_total", "deaths_total", "recovered_total"
),
#' @field source_text Plain text description of the source of the data
source_text =
"Public COVID-19 for Vietnam, curated by NCSC's COVID-19 team",
#' @field source_url Website address for explanation/introduction of the
#' data
source_url = "https://covid.ncsc.gov.vn",

#' @description Set up a table of region codes for clean data
#' @importFrom tibble tibble
set_region_codes = function() {
self$codes_lookup$`1` <- covidregionaldata::vietnam_codes
},

#' @description Download function to get raw data. Uses the
#' parent class JSON-specific method for downloads.
download = function() {
super$download_JSON()
},

#' @description Provincial Level Data
#' cleaning
#' @param ... pass additional arguments
#'
#' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
#' @importFrom tidyr replace_na drop_na separate
#' @importFrom purrr map
#' @importFrom stringi stri_trans_general stri_trim_both stri_replace_all
#' @importFrom stringr str_to_title str_replace_all
#' @importFrom lubridate dmy
clean_common = function() {
# The first three elements of self$data$raw are the data
# tables downloaded and so these can be processed identically
#
data_inputs <- self$data$raw[1:3]
flat_all <- map(
map(
data_inputs,
function(x) as_tibble(unlist(x),
rownames = "date")),
function(y) {
y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
}
)
self$data$clean <- full_join(
full_join(
flat_all$case_by_time, flat_all$death_by_time,
by = c("province", "date"),
suffix = c(".cases", ".deaths"),
copy = TRUE
),
flat_all$recovered_by_time,
by = c("province", "date"),
suffix = c("", ".recovered"),
copy = TRUE
) %>%
# The api uses integer codes for provinces which do not
# line up with ISO 3166-2 (some of which are not numbers)
# so we use this as a temporary code to line names up
# with data.
select(
ncsc_region_code = province,
date,
cases_total = value.cases,
deaths_total = value.deaths,
recovered_total = value) %>%
mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
left_join(
self$data$raw$provinces %>%
select(ncsc_region_code = id, level_1_region = name),
by = c("ncsc_region_code")) %>%
select(-ncsc_region_code) %>%
mutate(
date = dmy(date),
cases_total = as.numeric(cases_total),
deaths_total = as.numeric(deaths_total),
recovered_total = as.numeric(recovered_total),
level_1_region = str_replace_all(level_1_region,
"TP HCM", "Hochiminh"),
) %>%
#
#tidyr::drop_na(date, region_name) %>%
mutate(
level_1_region = stri_trans_general(level_1_region, "latin-ascii"),
level_1_region = stri_trim_both(level_1_region),
level_1_region = str_replace_all(level_1_region,
"\\(.*\\)|-| ", ""),
level_1_region = str_to_title(level_1_region),
level_1_region = replace_na(level_1_region, "Unknown")
) %>%
left_join(
self$codes_lookup$`1`,
by = c("level_1_region" = "level_1_region")
)
}
)
)
6 changes: 6 additions & 0 deletions R/datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
#' @return A tibble of region codes and related information.
"france_codes"

#' Region Codes for Vietnam Dataset.
#'
#' @description The region codes for Viet Nam
#' @return A tibble of region codes and related information.
"vietnam_codes"

#' Region Codes for JHU Dataset. Taken from the region codes provided as
#' part of the WHO dataset.
#'
Expand Down
14 changes: 14 additions & 0 deletions R/shared-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,20 @@ DataClass <- R6::R6Class(
)
},

#' @description Download raw data from `data_urls`, stores a named list
#' of the `data_url` name and the corresponding raw data table in
#' `data$raw`. Designed as a drop-in replacement for `download` so
#' it can be used in sub-classes.
#' @importFrom purrr map
download_JSON = function() {
if (length(self$data_urls) == 0) {
stop("No data to download as data_urls is empty")
}
self$data$raw <- map(self$data_urls, json_reader,
verbose = self$verbose
)
},

#' @description Cleans raw data (corrects format, converts column types,
#' etc). Works on raw data and so should be called after
#' \href{#method-download}{\code{download()}}
Expand Down
3 changes: 2 additions & 1 deletion R/test-DataClass.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ test_download <- function(DataClass_obj, download, snapshot_path) {
walk(DataClass_obj$data$raw, function(data) {
testthat::expect_s3_class(data, "data.frame")
testthat::expect_true(nrow(data) > 0)
testthat::expect_true(ncol(data) >= 2)
testthat::expect_true(ncol(data) >= 2
|| typeof(data[[1]]) == "list")
})
}
)
Expand Down
24 changes: 24 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,30 @@ csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) {
return(tibble(data))
}

#' Custom JSON reading function
#'
#' @description Checks for use of memoise and then uses vroom::vroom.
#' @param file A URL or filepath to a JSON
#' @param ... extra parameters to be passed to jsonlite::fromJSON
#' @inheritParams message_verbose
#' @return A data table
#' @importFrom tibble tibble
#' @importFrom jsonlite fromJSON
#' @concept utility
json_reader <- function(file, verbose = FALSE, ...) {
if (verbose) {
message("Downloading data from ", file)
data <- fromJSON(file, ...)
} else {
data <- suppressWarnings(
suppressMessages(
fromJSON(file, ...)
)
)
}
return(tibble(data))
}

#' Wrapper for message
#'
#' @description A wrapper for `message` that only prints output when
Expand Down
Loading

0 comments on commit 73784b7

Please sign in to comment.