epiforecasts · seabbs · Sep 27, 2021 · Aug 28, 2021 · Sep 2, 2021 · Sep 12, 2021
diff --git a/.github/workflows/Vietnam.yaml b/.github/workflows/Vietnam.yaml
@@ -0,0 +1,48 @@
+on:
+  schedule:
+    - cron: '36 12 * * *'
+  workflow_dispatch:
+
+name: Vietnam
+
+jobs:
+  Vietnam:
+    runs-on: macOS-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-r@v1
+
+      - name: Query dependencies
+        run: |
+          install.packages('remotes')
+          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
+          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
+        shell: Rscript {0}
+
+      - name: Cache R packages
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.R_LIBS_USER }}
+          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
+
+      - name: Install dependencies
+        run: |
+          install.packages(c("remotes"))
+          remotes::install_deps(dependencies = TRUE)
+          install.packages("devtools")
+        shell: Rscript {0}
+
+      - name: Install package
+        run: R CMD INSTALL .
+
+      - name: Test dataset
+        run: |
+          options("testDownload" = TRUE)
+          options("testSource" = "Vietnam")
+          devtools::load_all()
+          testthat::test_file("tests/testthat/test-regional-datasets.R", reporter = c("summary", "fail"))
+        shell: Rscript {0}
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -86,6 +86,7 @@ Imports:
     countrycode (>= 1.2.0),
     dplyr,
     httr,
+    jsonlite,
     lifecycle,
     lubridate,
     magrittr,
@@ -94,13 +95,14 @@ Imports:
     R6,
     readxl,
     rlang,
+    stringi,
     stringr,
     tibble,
     tidyr (>= 1.0.0),
     tidyselect,
     vroom,
     withr,
-    xml2
+    xml2,
 Suggests:
     ggplot2,
     ggspatial,
@@ -121,4 +123,4 @@ Encoding: UTF-8
 Language: en-gb
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -24,6 +24,7 @@ export(SouthAfrica)
 export(Switzerland)
 export(UK)
 export(USA)
+export(Vietnam)
 export(WHO)
 export(expect_clean_cols)
 export(expect_columns_contain_data)
@@ -47,6 +48,7 @@ importFrom(countrycode,countryname)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
+importFrom(dplyr,as_tibble)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,count)
 importFrom(dplyr,distinct)
@@ -73,12 +75,14 @@ importFrom(dplyr,slice_tail)
 importFrom(dplyr,starts_with)
 importFrom(dplyr,summarise)
 importFrom(dplyr,tally)
+importFrom(dplyr,tibble)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
 importFrom(httr,GET)
 importFrom(httr,POST)
 importFrom(httr,content)
 importFrom(httr,status_code)
+importFrom(jsonlite,fromJSON)
 importFrom(lifecycle,deprecate_warn)
 importFrom(lifecycle,deprecated)
 importFrom(lifecycle,is_present)
@@ -107,6 +111,9 @@ importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,syms)
+importFrom(stringi,stri_replace_all)
+importFrom(stringi,stri_trans_general)
+importFrom(stringi,stri_trim_both)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_to_sentence)
@@ -123,6 +130,7 @@ importFrom(tidyr,nesting)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
+importFrom(tidyr,separate)
 importFrom(tidyselect,all_of)
 importFrom(tidyselect,ends_with)
 importFrom(tidyselect,starts_with)

diff --git a/R/Vietnam.R b/R/Vietnam.R
@@ -0,0 +1,137 @@
+#' Vietnam Class for downloading, cleaning and processing
+#' notification data
+#'
+#' @description Information for downloading, cleaning
+#'  and processing covid-19 region data for Vietnam.
+#'
+#' @source \url{https://covid.ncsc.gov.vn}
+#' @export
+#' @concept dataset
+#' @family subnational
+#' @examples
+#' \dontrun{
+#' region <- Vietnam$new(verbose = TRUE, steps = TRUE, get = TRUE)
+#' region$return()
+#' }
+Vietnam <- R6::R6Class("Vietnam",
+  inherit = DataClass,
+  public = list(
+
+    # Core Attributes (amend each parameter for country specific information)
+    #' @field origin name of country to fetch data for
+    origin = "Vietnam",
+    #' @field supported_levels List of supported levels.
+    supported_levels = list("1"),
+    #' @field supported_region_names List of region names in order of level.
+    supported_region_names = list("1" = "region"),
+    #' @field supported_region_codes List of region codes in order of level.
+    supported_region_codes = list("1" = "iso_3166_2"),
+    #' @field common_data_urls List of named links to raw data.
+    common_data_urls = list(
+      # nolint start
+      "case_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time",
+      "death_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time",
+      "recovered_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time",
+      "provinces" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces"
+      # nolint end
+    ),
+    #' @field source_data_cols existing columns within the raw data
+    source_data_cols = c(
+      "cases_total", "deaths_total", "recovered_total"
+    ),
+    #' @field source_text Plain text description of the source of the data
+    source_text =
+      "Public COVID-19 for Vietnam, curated by NCSC's COVID-19 team",
+    #' @field source_url Website address for explanation/introduction of the
+    #' data
+    source_url = "https://covid.ncsc.gov.vn",
+
+    #' @description Set up a table of region codes for clean data
+    #' @importFrom tibble tibble
+    set_region_codes = function() {
+      self$codes_lookup$`1` <- covidregionaldata::vietnam_codes
+    },
+
+    #' @description Download function to get raw data. Uses the
+    #' parent class JSON-specific method for downloads.
+    download = function() {
+      super$download_JSON()
+    },
+
+    #' @description Provincial Level Data
+    #' cleaning
+    #' @param ... pass additional arguments
+    #'
+    #' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
+    #' @importFrom tidyr replace_na drop_na separate
+    #' @importFrom purrr map
+    #' @importFrom stringi stri_trans_general stri_trim_both stri_replace_all
+    #' @importFrom stringr str_to_title str_replace_all
+    #' @importFrom lubridate dmy
+    clean_common = function() {
+      # The first three elements of self$data$raw are the data
+      # tables downloaded and so these can be processed identically
+      #
+      data_inputs <- self$data$raw[1:3]
+      flat_all <- map(
+        map(
+          data_inputs,
+          function(x) as_tibble(unlist(x),
+                                rownames = "date")),
+        function(y) {
+          y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
+        }
+      )
+      self$data$clean <- full_join(
+        full_join(
+          flat_all$case_by_time, flat_all$death_by_time,
+          by = c("province", "date"),
+          suffix = c(".cases", ".deaths"),
+          copy = TRUE
+        ),
+        flat_all$recovered_by_time,
+        by = c("province", "date"),
+        suffix = c("", ".recovered"),
+        copy = TRUE
+      ) %>%
+        # The api uses integer codes for provinces which do not
+        # line up with ISO 3166-2 (some of which are not numbers)
+        # so we use this as a temporary code to line names up
+        # with data.
+        select(
+          ncsc_region_code = province,
+          date,
+          cases_total = value.cases,
+          deaths_total = value.deaths,
+          recovered_total = value) %>%
+        mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
+        left_join(
+          self$data$raw$provinces %>%
+            select(ncsc_region_code = id, level_1_region = name),
+          by = c("ncsc_region_code")) %>%
+        select( -ncsc_region_code ) %>%
+        mutate(
+          date = dmy(date),
+          cases_total = as.numeric(cases_total),
+          deaths_total = as.numeric(deaths_total),
+          recovered_total = as.numeric(recovered_total),
+          level_1_region = str_replace_all(level_1_region,
+                                        "TP HCM", "Hochiminh"),
+        ) %>%
+        # 
+        #tidyr::drop_na(date, region_name) %>%
+        mutate(
+          level_1_region = stri_trans_general(level_1_region, "latin-ascii"),
+          level_1_region = stri_trim_both(level_1_region),
+          level_1_region = str_replace_all(level_1_region,
+                                           "\\(.*\\)|-| ", ""),
+          level_1_region = str_to_title(level_1_region),
+          level_1_region = replace_na(level_1_region, "Unknown")
+        ) %>%
+        left_join(
+          self$codes_lookup$`1`,
+          by = c("level_1_region" = "level_1_region")
+        )
+    }
+  )
+)
diff --git a/R/datasets.R b/R/datasets.R
@@ -33,6 +33,12 @@
 #' @return A tibble of region codes and related information.
 "france_codes"
 
+#' Region Codes for Vietnam Dataset.
+#'
+#' @description The region codes for Viet Nam
+#' @return A tibble of region codes and related information.
+"vietnam_codes"
+
 #' Region Codes for JHU Dataset. Taken from the region codes provided as
 #' part of the WHO dataset.
 #'

diff --git a/R/shared-methods.R b/R/shared-methods.R
@@ -302,6 +302,20 @@ DataClass <- R6::R6Class(
       )
     },
 
+    #' @description Download raw data from `data_urls`, stores a named list
+    #' of the `data_url` name and the corresponding raw data table in
+    #' `data$raw`. Designed as a drop-in replacement for `download` so
+    #' it can be used in sub-classes.
+    #' @importFrom purrr map
+    download_JSON = function() {
+      if (length(self$data_urls) == 0) {
+        stop("No data to download as data_urls is empty")
+      }
+      self$data$raw <- map(self$data_urls, json_reader,
+        verbose = self$verbose
+      )
+    },
+
     #' @description Cleans raw data (corrects format, converts column types,
     #' etc). Works on raw data and so should be called after
     #' \href{#method-download}{\code{download()}}

diff --git a/R/test-DataClass.R b/R/test-DataClass.R
@@ -102,7 +102,8 @@ test_download <- function(DataClass_obj, download, snapshot_path) {
         walk(DataClass_obj$data$raw, function(data) {
           testthat::expect_s3_class(data, "data.frame")
           testthat::expect_true(nrow(data) > 0)
-          testthat::expect_true(ncol(data) >= 2)
+          testthat::expect_true(ncol(data) >= 2
+                                || typeof(data[[1]])=="list")
         })
       }
     )

diff --git a/R/utils.R b/R/utils.R
@@ -53,6 +53,30 @@ csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) {
   return(tibble(data))
 }
 
+#' Custom JSON reading function
+#'
+#' @description Checks for use of memoise and then uses vroom::vroom.
+#' @param file A URL or filepath to a JSON
+#' @param ... extra parameters to be passed to jsonlite::fromJSON
+#' @inheritParams message_verbose
+#' @return A data table
+#' @importFrom tibble tibble
+#' @importFrom jsonlite fromJSON
+#' @concept utility
+json_reader <- function(file, verbose = FALSE, ...) {
+  if (verbose) {
+    message("Downloading data from ", file)
+    data <- fromJSON(file, ...)
+  } else {
+    data <- suppressWarnings(
+      suppressMessages(
+        fromJSON(file, ...)
+      )
+    )
+  }
+  return(tibble(data))
+}
+
 #' Wrapper for message
 #'
 #' @description A wrapper for `message` that only prints output when