Squashed commit of the following:

commit 9c35694 Author: Richard Martin-Nielsen <[email protected]> Date: Sun Sep 19 21:23:51 2021 +0300 lint cleaning commit 26cd8ad Author: Richard Martin-Nielsen <[email protected]> Date: Sun Sep 19 21:19:07 2021 +0300 Remove prefixes in vietnam_codes.R commit 2c20067 Author: Richard Martin-Nielsen <[email protected]> Date: Sun Sep 19 20:02:55 2021 +0300 JSON-reading implementation of Vietnam Uses download_json method for downloading. Adjustment to the DataClass tests to deal with tables of lists. Addition of stringi to imported packages, and of fromJSON and jsonlinte to the WORDLIST. Tests run and data stored. commit f0cec88 Merge: 5f7e82e 5681a2c Author: Richard Martin-Nielsen <[email protected]> Date: Sun Sep 19 19:02:02 2021 +0300 Merge branch 'json-reader-vietnam' into pr/413 commit 5f7e82e Merge: b54bd03 abf4cdd Author: Richard Martin-Nielsen <[email protected]> Date: Sun Sep 19 18:56:41 2021 +0300 Merge branch 'json-reader' into pr/413 commit 5681a2c Author: Richard Martin-Nielsen <[email protected]> Date: Fri Sep 17 22:04:01 2021 +0300 Fixing final glitches in clean_common commit 3aeddbe Author: Richard Martin-Nielsen <[email protected]> Date: Fri Sep 17 21:40:09 2021 +0300 Closer to a tidy version of cleaning Vietnam data commit 7bd4fb0 Author: Richard Martin-Nielsen <[email protected]> Date: Thu Sep 16 21:43:23 2021 +0300 Using JSON reader to download Vietnam code commit abf4cdd Author: Richard Martin-Nielsen <[email protected]> Date: Thu Sep 16 20:24:28 2021 +0300 Initial implementation of a generic json_reader function and download_JSON method commit b54bd03 Author: Richard Martin-Nielsen <[email protected]> Date: Thu Sep 16 19:57:23 2021 +0300 Reformatting of code commit ce5de11 Merge: 0f0cafb 29070dd Author: Richard Martin-Nielsen <[email protected]> Date: Thu Sep 16 19:29:49 2021 +0300 Merge branch 'vietnam' of https://github.com/biocyberman/covidregionaldata into pr/413 commit 29070dd Author: biocyberman <[email protected]> Date: Wed Sep 15 21:36:40 2021 +0200 Refactor code for PR epiforecasts#413 commit 7ca169a Author: biocyberman <[email protected]> Date: Mon Sep 13 19:09:18 2021 +0200 Change data source for Vietnam commit 7edc95c Author: biocyberman <[email protected]> Date: Mon Sep 13 00:31:35 2021 +0200 Update with cases_death from 5F team commit cd25e34 Author: biocyberman <[email protected]> Date: Thu Sep 2 21:17:11 2021 +0200 Add empty locations to Unknown commit abcd78a Author: biocyberman <[email protected]> Date: Sat Aug 28 23:29:05 2021 +0200 Add priliminary support for Vietnam subnational data commit 0f0cafb Author: Vang Le-Quy <vlequy@viko01> Date: Mon Sep 13 19:09:18 2021 +0200 Change data source for Vietnam commit a66539f Author: Vang Le-Quy <vlequy@viko01> Date: Mon Sep 13 00:31:35 2021 +0200 Update with cases_death from 5F team commit 4f57d0f Author: Vang Le-Quy <vlequy@viko01> Date: Thu Sep 2 21:17:11 2021 +0200 Add empty locations to Unknown commit 8996977 Author: Vang Le-Quy <vlequy@viko01> Date: Sat Aug 28 23:29:05 2021 +0200 Add priliminary support for Vietnam subnational data
RichardMN · Sep 19, 2021 · c3084fd · c3084fd
1 parent aa85f36
commit c3084fd
Show file tree

Hide file tree

Showing 44 changed files with 566 additions and 45 deletions.
diff --git a/.github/workflows/Vietnam.yaml b/.github/workflows/Vietnam.yaml
@@ -0,0 +1,48 @@
+on:
+  schedule:
+    - cron: '36 12 * * *'
+  workflow_dispatch:
+
+name: Vietnam
+
+jobs:
+  Vietnam:
+    runs-on: macOS-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-r@v1
+
+      - name: Query dependencies
+        run: |
+          install.packages('remotes')
+          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
+          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
+        shell: Rscript {0}
+
+      - name: Cache R packages
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.R_LIBS_USER }}
+          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
+
+      - name: Install dependencies
+        run: |
+          install.packages(c("remotes"))
+          remotes::install_deps(dependencies = TRUE)
+          install.packages("devtools")
+        shell: Rscript {0}
+
+      - name: Install package
+        run: R CMD INSTALL .
+
+      - name: Test dataset
+        run: |
+          options("testDownload" = TRUE)
+          options("testSource" = "Vietnam")
+          devtools::load_all()
+          testthat::test_file("tests/testthat/test-regional-datasets.R", reporter = c("summary", "fail"))
+        shell: Rscript {0}
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -86,6 +86,7 @@ Imports:
     countrycode (>= 1.2.0),
     dplyr,
     httr,
+    jsonlite,
     lifecycle,
     lubridate,
     magrittr,
@@ -94,13 +95,14 @@ Imports:
     R6,
     readxl,
     rlang,
+    stringi,
     stringr,
     tibble,
     tidyr (>= 1.0.0),
     tidyselect,
     vroom,
     withr,
-    xml2
+    xml2,
 Suggests:
     ggplot2,
     ggspatial,
@@ -121,4 +123,4 @@ Encoding: UTF-8
 Language: en-gb
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -24,6 +24,7 @@ export(SouthAfrica)
 export(Switzerland)
 export(UK)
 export(USA)
+export(Vietnam)
 export(WHO)
 export(expect_clean_cols)
 export(expect_columns_contain_data)
@@ -47,6 +48,7 @@ importFrom(countrycode,countryname)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
+importFrom(dplyr,as_tibble)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,count)
 importFrom(dplyr,distinct)
@@ -73,12 +75,14 @@ importFrom(dplyr,slice_tail)
 importFrom(dplyr,starts_with)
 importFrom(dplyr,summarise)
 importFrom(dplyr,tally)
+importFrom(dplyr,tibble)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
 importFrom(httr,GET)
 importFrom(httr,POST)
 importFrom(httr,content)
 importFrom(httr,status_code)
+importFrom(jsonlite,fromJSON)
 importFrom(lifecycle,deprecate_warn)
 importFrom(lifecycle,deprecated)
 importFrom(lifecycle,is_present)
@@ -107,6 +111,9 @@ importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,syms)
+importFrom(stringi,stri_replace_all)
+importFrom(stringi,stri_trans_general)
+importFrom(stringi,stri_trim_both)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_to_sentence)
@@ -123,6 +130,7 @@ importFrom(tidyr,nesting)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
+importFrom(tidyr,separate)
 importFrom(tidyselect,all_of)
 importFrom(tidyselect,ends_with)
 importFrom(tidyselect,starts_with)

diff --git a/R/Vietnam.R b/R/Vietnam.R
@@ -0,0 +1,137 @@
+#' Vietnam Class for downloading, cleaning and processing
+#' notification data
+#'
+#' @description Information for downloading, cleaning
+#'  and processing covid-19 region data for Vietnam.
+#'
+#' @source \url{https://covid.ncsc.gov.vn}
+#' @export
+#' @concept dataset
+#' @family subnational
+#' @examples
+#' \dontrun{
+#' region <- Vietnam$new(verbose = TRUE, steps = TRUE, get = TRUE)
+#' region$return()
+#' }
+Vietnam <- R6::R6Class("Vietnam",
+  inherit = DataClass,
+  public = list(
+
+    # Core Attributes (amend each parameter for country specific information)
+    #' @field origin name of country to fetch data for
+    origin = "Vietnam",
+    #' @field supported_levels List of supported levels.
+    supported_levels = list("1"),
+    #' @field supported_region_names List of region names in order of level.
+    supported_region_names = list("1" = "region"),
+    #' @field supported_region_codes List of region codes in order of level.
+    supported_region_codes = list("1" = "iso_3166_2"),
+    #' @field common_data_urls List of named links to raw data.
+    common_data_urls = list(
+      # nolint start
+      "case_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time",
+      "death_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time",
+      "recovered_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time",
+      "provinces" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces"
+      # nolint end
+    ),
+    #' @field source_data_cols existing columns within the raw data
+    source_data_cols = c(
+      "cases_total", "deaths_total", "recovered_total"
+    ),
+    #' @field source_text Plain text description of the source of the data
+    source_text =
+      "Public COVID-19 for Vietnam, curated by NCSC's COVID-19 team",
+    #' @field source_url Website address for explanation/introduction of the
+    #' data
+    source_url = "https://covid.ncsc.gov.vn",
+
+    #' @description Set up a table of region codes for clean data
+    #' @importFrom tibble tibble
+    set_region_codes = function() {
+      self$codes_lookup$`1` <- covidregionaldata::vietnam_codes
+    },
+
+    #' @description Download function to get raw data. Uses the
+    #' parent class JSON-specific method for downloads.
+    download = function() {
+      super$download_JSON()
+    },
+
+    #' @description Provincial Level Data
+    #' cleaning
+    #' @param ... pass additional arguments
+    #'
+    #' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
+    #' @importFrom tidyr replace_na drop_na separate
+    #' @importFrom purrr map
+    #' @importFrom stringi stri_trans_general stri_trim_both stri_replace_all
+    #' @importFrom stringr str_to_title str_replace_all
+    #' @importFrom lubridate dmy
+    clean_common = function() {
+      # The first three elements of self$data$raw are the data
+      # tables downloaded and so these can be processed identically
+      #
+      data_inputs <- self$data$raw[1:3]
+      flat_all <- map(
+        map(
+          data_inputs,
+          function(x) as_tibble(unlist(x),
+                                rownames = "date")),
+        function(y) {
+          y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
+        }
+      )
+      self$data$clean <- full_join(
+        full_join(
+          flat_all$case_by_time, flat_all$death_by_time,
+          by = c("province", "date"),
+          suffix = c(".cases", ".deaths"),
+          copy = TRUE
+        ),
+        flat_all$recovered_by_time,
+        by = c("province", "date"),
+        suffix = c("", ".recovered"),
+        copy = TRUE
+      ) %>%
+        # The api uses integer codes for provinces which do not
+        # line up with ISO 3166-2 (some of which are not numbers)
+        # so we use this as a temporary code to line names up
+        # with data.
+        select(
+          ncsc_region_code = province,
+          date,
+          cases_total = value.cases,
+          deaths_total = value.deaths,
+          recovered_total = value) %>%
+        mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
+        left_join(
+          self$data$raw$provinces %>%
+            select(ncsc_region_code = id, level_1_region = name),
+          by = c("ncsc_region_code")) %>%
+        select(-ncsc_region_code) %>%
+        mutate(
+          date = dmy(date),
+          cases_total = as.numeric(cases_total),
+          deaths_total = as.numeric(deaths_total),
+          recovered_total = as.numeric(recovered_total),
+          level_1_region = str_replace_all(level_1_region,
+                                        "TP HCM", "Hochiminh"),
+        ) %>%
+        #
+        #tidyr::drop_na(date, region_name) %>%
+        mutate(
+          level_1_region = stri_trans_general(level_1_region, "latin-ascii"),
+          level_1_region = stri_trim_both(level_1_region),
+          level_1_region = str_replace_all(level_1_region,
+                                           "\\(.*\\)|-| ", ""),
+          level_1_region = str_to_title(level_1_region),
+          level_1_region = replace_na(level_1_region, "Unknown")
+        ) %>%
+        left_join(
+          self$codes_lookup$`1`,
+          by = c("level_1_region" = "level_1_region")
+        )
+    }
+  )
+)
diff --git a/R/datasets.R b/R/datasets.R
@@ -33,6 +33,12 @@
 #' @return A tibble of region codes and related information.
 "france_codes"
 
+#' Region Codes for Vietnam Dataset.
+#'
+#' @description The region codes for Viet Nam
+#' @return A tibble of region codes and related information.
+"vietnam_codes"
+
 #' Region Codes for JHU Dataset. Taken from the region codes provided as
 #' part of the WHO dataset.
 #'

diff --git a/R/shared-methods.R b/R/shared-methods.R
@@ -302,6 +302,20 @@ DataClass <- R6::R6Class(
       )
     },
 
+    #' @description Download raw data from `data_urls`, stores a named list
+    #' of the `data_url` name and the corresponding raw data table in
+    #' `data$raw`. Designed as a drop-in replacement for `download` so
+    #' it can be used in sub-classes.
+    #' @importFrom purrr map
+    download_JSON = function() {
+      if (length(self$data_urls) == 0) {
+        stop("No data to download as data_urls is empty")
+      }
+      self$data$raw <- map(self$data_urls, json_reader,
+        verbose = self$verbose
+      )
+    },
+
     #' @description Cleans raw data (corrects format, converts column types,
     #' etc). Works on raw data and so should be called after
     #' \href{#method-download}{\code{download()}}

diff --git a/R/test-DataClass.R b/R/test-DataClass.R
@@ -102,7 +102,8 @@ test_download <- function(DataClass_obj, download, snapshot_path) {
         walk(DataClass_obj$data$raw, function(data) {
           testthat::expect_s3_class(data, "data.frame")
           testthat::expect_true(nrow(data) > 0)
-          testthat::expect_true(ncol(data) >= 2)
+          testthat::expect_true(ncol(data) >= 2
+                                || typeof(data[[1]]) == "list")
         })
       }
     )

diff --git a/R/utils.R b/R/utils.R
@@ -53,6 +53,30 @@ csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) {
   return(tibble(data))
 }
 
+#' Custom JSON reading function
+#'
+#' @description Checks for use of memoise and then uses vroom::vroom.
+#' @param file A URL or filepath to a JSON
+#' @param ... extra parameters to be passed to jsonlite::fromJSON
+#' @inheritParams message_verbose
+#' @return A data table
+#' @importFrom tibble tibble
+#' @importFrom jsonlite fromJSON
+#' @concept utility
+json_reader <- function(file, verbose = FALSE, ...) {
+  if (verbose) {
+    message("Downloading data from ", file)
+    data <- fromJSON(file, ...)
+  } else {
+    data <- suppressWarnings(
+      suppressMessages(
+        fromJSON(file, ...)
+      )
+    )
+  }
+  return(tibble(data))
+}
+
 #' Wrapper for message
 #'
 #' @description A wrapper for `message` that only prints output when