Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 9c35694
Author: Richard Martin-Nielsen <[email protected]>
Date:   Sun Sep 19 21:23:51 2021 +0300

    lint cleaning

commit 26cd8ad
Author: Richard Martin-Nielsen <[email protected]>
Date:   Sun Sep 19 21:19:07 2021 +0300

    Remove prefixes in vietnam_codes.R

commit 2c20067
Author: Richard Martin-Nielsen <[email protected]>
Date:   Sun Sep 19 20:02:55 2021 +0300

    JSON-reading implementation of Vietnam

    Uses download_json method for downloading. Adjustment to the DataClass tests to deal with tables of lists. Addition of stringi to imported packages, and of fromJSON and jsonlinte to the WORDLIST. Tests run and data stored.

commit f0cec88
Merge: 5f7e82e 5681a2c
Author: Richard Martin-Nielsen <[email protected]>
Date:   Sun Sep 19 19:02:02 2021 +0300

    Merge branch 'json-reader-vietnam' into pr/413

commit 5f7e82e
Merge: b54bd03 abf4cdd
Author: Richard Martin-Nielsen <[email protected]>
Date:   Sun Sep 19 18:56:41 2021 +0300

    Merge branch 'json-reader' into pr/413

commit 5681a2c
Author: Richard Martin-Nielsen <[email protected]>
Date:   Fri Sep 17 22:04:01 2021 +0300

    Fixing final glitches in clean_common

commit 3aeddbe
Author: Richard Martin-Nielsen <[email protected]>
Date:   Fri Sep 17 21:40:09 2021 +0300

    Closer to a tidy version of cleaning Vietnam data

commit 7bd4fb0
Author: Richard Martin-Nielsen <[email protected]>
Date:   Thu Sep 16 21:43:23 2021 +0300

    Using JSON reader to download Vietnam code

commit abf4cdd
Author: Richard Martin-Nielsen <[email protected]>
Date:   Thu Sep 16 20:24:28 2021 +0300

    Initial implementation of a generic json_reader function and download_JSON method

commit b54bd03
Author: Richard Martin-Nielsen <[email protected]>
Date:   Thu Sep 16 19:57:23 2021 +0300

    Reformatting of code

commit ce5de11
Merge: 0f0cafb 29070dd
Author: Richard Martin-Nielsen <[email protected]>
Date:   Thu Sep 16 19:29:49 2021 +0300

    Merge branch 'vietnam' of https://github.com/biocyberman/covidregionaldata into pr/413

commit 29070dd
Author: biocyberman <[email protected]>
Date:   Wed Sep 15 21:36:40 2021 +0200

    Refactor code for PR epiforecasts#413

commit 7ca169a
Author: biocyberman <[email protected]>
Date:   Mon Sep 13 19:09:18 2021 +0200

    Change data source for Vietnam

commit 7edc95c
Author: biocyberman <[email protected]>
Date:   Mon Sep 13 00:31:35 2021 +0200

    Update with cases_death from 5F team

commit cd25e34
Author: biocyberman <[email protected]>
Date:   Thu Sep 2 21:17:11 2021 +0200

    Add empty locations to Unknown

commit abcd78a
Author: biocyberman <[email protected]>
Date:   Sat Aug 28 23:29:05 2021 +0200

    Add priliminary support for Vietnam subnational data

commit 0f0cafb
Author: Vang Le-Quy <vlequy@viko01>
Date:   Mon Sep 13 19:09:18 2021 +0200

    Change data source for Vietnam

commit a66539f
Author: Vang Le-Quy <vlequy@viko01>
Date:   Mon Sep 13 00:31:35 2021 +0200

    Update with cases_death from 5F team

commit 4f57d0f
Author: Vang Le-Quy <vlequy@viko01>
Date:   Thu Sep 2 21:17:11 2021 +0200

    Add empty locations to Unknown

commit 8996977
Author: Vang Le-Quy <vlequy@viko01>
Date:   Sat Aug 28 23:29:05 2021 +0200

    Add priliminary support for Vietnam subnational data
  • Loading branch information
RichardMN committed Sep 19, 2021
1 parent aa85f36 commit c3084fd
Show file tree
Hide file tree
Showing 44 changed files with 566 additions and 45 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/Vietnam.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
on:
schedule:
- cron: '36 12 * * *'
workflow_dispatch:

name: Vietnam

jobs:
Vietnam:
runs-on: macOS-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@v1

- name: Query dependencies
run: |
install.packages('remotes')
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

- name: Cache R packages
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-

- name: Install dependencies
run: |
install.packages(c("remotes"))
remotes::install_deps(dependencies = TRUE)
install.packages("devtools")
shell: Rscript {0}

- name: Install package
run: R CMD INSTALL .

- name: Test dataset
run: |
options("testDownload" = TRUE)
options("testSource" = "Vietnam")
devtools::load_all()
testthat::test_file("tests/testthat/test-regional-datasets.R", reporter = c("summary", "fail"))
shell: Rscript {0}
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Imports:
countrycode (>= 1.2.0),
dplyr,
httr,
jsonlite,
lifecycle,
lubridate,
magrittr,
Expand All @@ -94,13 +95,14 @@ Imports:
R6,
readxl,
rlang,
stringi,
stringr,
tibble,
tidyr (>= 1.0.0),
tidyselect,
vroom,
withr,
xml2
xml2,
Suggests:
ggplot2,
ggspatial,
Expand All @@ -121,4 +123,4 @@ Encoding: UTF-8
Language: en-gb
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export(SouthAfrica)
export(Switzerland)
export(UK)
export(USA)
export(Vietnam)
export(WHO)
export(expect_clean_cols)
export(expect_columns_contain_data)
Expand All @@ -47,6 +48,7 @@ importFrom(countrycode,countryname)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,bind_rows)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
Expand All @@ -73,12 +75,14 @@ importFrom(dplyr,slice_tail)
importFrom(dplyr,starts_with)
importFrom(dplyr,summarise)
importFrom(dplyr,tally)
importFrom(dplyr,tibble)
importFrom(dplyr,ungroup)
importFrom(dplyr,vars)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(httr,status_code)
importFrom(jsonlite,fromJSON)
importFrom(lifecycle,deprecate_warn)
importFrom(lifecycle,deprecated)
importFrom(lifecycle,is_present)
Expand Down Expand Up @@ -107,6 +111,9 @@ importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,syms)
importFrom(stringi,stri_replace_all)
importFrom(stringi,stri_trans_general)
importFrom(stringi,stri_trim_both)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_sentence)
Expand All @@ -123,6 +130,7 @@ importFrom(tidyr,nesting)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,replace_na)
importFrom(tidyr,separate)
importFrom(tidyselect,all_of)
importFrom(tidyselect,ends_with)
importFrom(tidyselect,starts_with)
Expand Down
137 changes: 137 additions & 0 deletions R/Vietnam.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#' Vietnam Class for downloading, cleaning and processing
#' notification data
#'
#' @description Information for downloading, cleaning
#' and processing covid-19 region data for Vietnam.
#'
#' @source \url{https://covid.ncsc.gov.vn}
#' @export
#' @concept dataset
#' @family subnational
#' @examples
#' \dontrun{
#' region <- Vietnam$new(verbose = TRUE, steps = TRUE, get = TRUE)
#' region$return()
#' }
Vietnam <- R6::R6Class("Vietnam",
inherit = DataClass,
public = list(

# Core Attributes (amend each parameter for country specific information)
#' @field origin name of country to fetch data for
origin = "Vietnam",
#' @field supported_levels List of supported levels.
supported_levels = list("1"),
#' @field supported_region_names List of region names in order of level.
supported_region_names = list("1" = "region"),
#' @field supported_region_codes List of region codes in order of level.
supported_region_codes = list("1" = "iso_3166_2"),
#' @field common_data_urls List of named links to raw data.
common_data_urls = list(
# nolint start
"case_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time",
"death_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time",
"recovered_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time",
"provinces" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces"
# nolint end
),
#' @field source_data_cols existing columns within the raw data
source_data_cols = c(
"cases_total", "deaths_total", "recovered_total"
),
#' @field source_text Plain text description of the source of the data
source_text =
"Public COVID-19 for Vietnam, curated by NCSC's COVID-19 team",
#' @field source_url Website address for explanation/introduction of the
#' data
source_url = "https://covid.ncsc.gov.vn",

#' @description Set up a table of region codes for clean data
#' @importFrom tibble tibble
set_region_codes = function() {
self$codes_lookup$`1` <- covidregionaldata::vietnam_codes
},

#' @description Download function to get raw data. Uses the
#' parent class JSON-specific method for downloads.
download = function() {
super$download_JSON()
},

#' @description Provincial Level Data
#' cleaning
#' @param ... pass additional arguments
#'
#' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
#' @importFrom tidyr replace_na drop_na separate
#' @importFrom purrr map
#' @importFrom stringi stri_trans_general stri_trim_both stri_replace_all
#' @importFrom stringr str_to_title str_replace_all
#' @importFrom lubridate dmy
clean_common = function() {
# The first three elements of self$data$raw are the data
# tables downloaded and so these can be processed identically
#
data_inputs <- self$data$raw[1:3]
flat_all <- map(
map(
data_inputs,
function(x) as_tibble(unlist(x),
rownames = "date")),
function(y) {
y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
}
)
self$data$clean <- full_join(
full_join(
flat_all$case_by_time, flat_all$death_by_time,
by = c("province", "date"),
suffix = c(".cases", ".deaths"),
copy = TRUE
),
flat_all$recovered_by_time,
by = c("province", "date"),
suffix = c("", ".recovered"),
copy = TRUE
) %>%
# The api uses integer codes for provinces which do not
# line up with ISO 3166-2 (some of which are not numbers)
# so we use this as a temporary code to line names up
# with data.
select(
ncsc_region_code = province,
date,
cases_total = value.cases,
deaths_total = value.deaths,
recovered_total = value) %>%
mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
left_join(
self$data$raw$provinces %>%
select(ncsc_region_code = id, level_1_region = name),
by = c("ncsc_region_code")) %>%
select(-ncsc_region_code) %>%
mutate(
date = dmy(date),
cases_total = as.numeric(cases_total),
deaths_total = as.numeric(deaths_total),
recovered_total = as.numeric(recovered_total),
level_1_region = str_replace_all(level_1_region,
"TP HCM", "Hochiminh"),
) %>%
#
#tidyr::drop_na(date, region_name) %>%
mutate(
level_1_region = stri_trans_general(level_1_region, "latin-ascii"),
level_1_region = stri_trim_both(level_1_region),
level_1_region = str_replace_all(level_1_region,
"\\(.*\\)|-| ", ""),
level_1_region = str_to_title(level_1_region),
level_1_region = replace_na(level_1_region, "Unknown")
) %>%
left_join(
self$codes_lookup$`1`,
by = c("level_1_region" = "level_1_region")
)
}
)
)
6 changes: 6 additions & 0 deletions R/datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
#' @return A tibble of region codes and related information.
"france_codes"

#' Region Codes for Vietnam Dataset.
#'
#' @description The region codes for Viet Nam
#' @return A tibble of region codes and related information.
"vietnam_codes"

#' Region Codes for JHU Dataset. Taken from the region codes provided as
#' part of the WHO dataset.
#'
Expand Down
14 changes: 14 additions & 0 deletions R/shared-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,20 @@ DataClass <- R6::R6Class(
)
},

#' @description Download raw data from `data_urls`, stores a named list
#' of the `data_url` name and the corresponding raw data table in
#' `data$raw`. Designed as a drop-in replacement for `download` so
#' it can be used in sub-classes.
#' @importFrom purrr map
download_JSON = function() {
if (length(self$data_urls) == 0) {
stop("No data to download as data_urls is empty")
}
self$data$raw <- map(self$data_urls, json_reader,
verbose = self$verbose
)
},

#' @description Cleans raw data (corrects format, converts column types,
#' etc). Works on raw data and so should be called after
#' \href{#method-download}{\code{download()}}
Expand Down
3 changes: 2 additions & 1 deletion R/test-DataClass.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ test_download <- function(DataClass_obj, download, snapshot_path) {
walk(DataClass_obj$data$raw, function(data) {
testthat::expect_s3_class(data, "data.frame")
testthat::expect_true(nrow(data) > 0)
testthat::expect_true(ncol(data) >= 2)
testthat::expect_true(ncol(data) >= 2
|| typeof(data[[1]]) == "list")
})
}
)
Expand Down
24 changes: 24 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,30 @@ csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) {
return(tibble(data))
}

#' Custom JSON reading function
#'
#' @description Checks for use of memoise and then uses vroom::vroom.
#' @param file A URL or filepath to a JSON
#' @param ... extra parameters to be passed to jsonlite::fromJSON
#' @inheritParams message_verbose
#' @return A data table
#' @importFrom tibble tibble
#' @importFrom jsonlite fromJSON
#' @concept utility
json_reader <- function(file, verbose = FALSE, ...) {
if (verbose) {
message("Downloading data from ", file)
data <- fromJSON(file, ...)
} else {
data <- suppressWarnings(
suppressMessages(
fromJSON(file, ...)
)
)
}
return(tibble(data))
}

#' Wrapper for message
#'
#' @description A wrapper for `message` that only prints output when
Expand Down
Loading

0 comments on commit c3084fd

Please sign in to comment.