Skip to content

Commit

Permalink
JSON-reading implementation of Vietnam
Browse files Browse the repository at this point in the history
Uses download_json method for downloading. Adjustment to the DataClass tests to deal with tables of lists. Addition of stringi to imported packages, and of fromJSON and jsonlinte to the WORDLIST. Tests run and data stored.
  • Loading branch information
RichardMN committed Sep 19, 2021
1 parent f0cec88 commit 2c20067
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 73 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Imports:
R6,
readxl,
rlang,
stringi,
stringr,
tibble,
tidyr (>= 1.0.0),
Expand Down
128 changes: 57 additions & 71 deletions R/Vietnam.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,82 +28,35 @@ Vietnam <- R6::R6Class("Vietnam",
supported_region_codes = list("1" = "iso_3166_2"),
#' @field common_data_urls List of named links to raw data.
common_data_urls = list(
# nolint start
"case_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time",
"death_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time",
"recovered_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time",
"provinces" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces"
# nolint end
),
#' @field source_data_cols existing columns within the raw data
source_data_cols = c(
"cases_total", "deaths_total", "recovered_total"
),
#' @field source_text Plain text description of the source of the data
source_text = "Public COVID-19 for Vietnam, curated by NCSC's COVID-19 team",
source_text =
"Public COVID-19 for Vietnam, curated by NCSC's COVID-19 team",
#' @field source_url Website address for explanation/introduction of the
#' data
source_url = "https://covid.ncsc.gov.vn", # nolint
source_url = "https://covid.ncsc.gov.vn",

#' @description Set up a table of region codes for clean data
#' @importFrom tibble tibble
set_region_codes = function() {
self$codes_lookup$`1` <- covidregionaldata::vietnam_codes
},

#' @description download function to get raw data
#' @importFrom tidyr replace_na drop_na
#' @importFrom lubridate dmy
#' @importFrom jsonlite fromJSON
download = function(){
#' @description Download function to get raw data. Uses the
#' parent class JSON-specific method for downloads.
download = function() {
super$download_JSON()
},
# function() {
# bundles_urls <- list(
# "case_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time",
# "death_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time",
# "recovered_by_time" = "https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time"
# )
# Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 4) # Fix VROOM error
# provines_url <- "https://covid.ncsc.gov.vn/api/v3/covid/provinces"
# bundles <- names(bundles_urls)
# provines_data <- fromJSON(provines_url)
#
# get_bundles_data <- function(bundles) {
# bundles_data <- list()
# for (bundle in bundles) {
# url <- paste0("https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=", bundle)
# data <- fromJSON(url)
# bundles_data <- c(bundles_data, setNames(list(data), bundle))
# }
# bundles_data
# }
#
# bundles_data <- get_bundles_data(bundles)
#
# get_province <- function(id, data) {
# row_dat <- provines_data[(id <- id), ]
# death_by_time <- do.call(cbind, data$death_by_time[id])
# case_by_time <- do.call(cbind, data$case_by_time[id])
# recovered_by_time <- do.call(cbind, data$recovered_by_time[id])
# if (!identical(row.names(death_by_time), row.names(death_by_time))) {
# stop("Dates on case_by_time and death_by_time do not match!")
# }
# df <- tibble(
# date = dmy(row.names(case_by_time)),
# id = row_dat$id,
# name = row_dat$name,
# case_by_time = case_by_time,
# death_by_time = death_by_time,
# recovered_by_time = recovered_by_time
# )
# df
# }
#
# df <- do.call(rbind, lapply(provines_data$id, function(id) {
# get_province(id, bundles_data)
# }))
# names(df) <- c("date", "id", "region_name", "cases_total", "deaths_total", "recovered_total")
# self$data$raw[["main"]] <- df
# },

#' @description Provincial Level Data
#' cleaning
Expand All @@ -116,31 +69,64 @@ Vietnam <- R6::R6Class("Vietnam",
#' @importFrom stringr str_to_title str_replace_all
#' @importFrom lubridate dmy
clean_common = function() {
# The first three elements of self$data$raw are the data
# tables downloaded and so these can be processed identically
#
data_inputs <- self$data$raw[1:3]
flat_all <- map(map(vn_data_inputs, function(x) as_tibble(unlist(x), rownames="date")),
function(x) {x %>% separate(date, sep="[.]+", into=c(NA, "province", "date"))})
flat_all <- map(
map(
data_inputs,
function(x) as_tibble(unlist(x),
rownames = "date")),
function(y) {
y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
}
)
self$data$clean <- full_join(
full_join(
vn_flat_all$case_by_time, vn_flat_all$death_by_time, by=c("province","date"),suffix=c(".cases", ".deaths"),copy=TRUE),
vn_flat_all$recovered_by_time, by=c("province","date"), suffix=c("",".recovered"), copy=TRUE) %>%
select(level_1_region_code=province, date, cases_total = value.cases, deaths_total=value.deaths, recovered_total = value) %>%
mutate(level_1_region_code = as.numeric(level_1_region_code)) %>%
left_join(self$data$raw$provinces%>%select(level_1_region_code=id,level_1_region=name), by=c("level_1_region_code")) %>%
#select(date, region_name, cases_total, deaths_total, recovered_total) %>%
flat_all$case_by_time, flat_all$death_by_time,
by = c("province", "date"),
suffix = c(".cases", ".deaths"),
copy = TRUE
),
flat_all$recovered_by_time,
by = c("province", "date"),
suffix = c("", ".recovered"),
copy = TRUE
) %>%
# The api uses integer codes for provinces which do not
# line up with ISO 3166-2 (some of which are not numbers)
# so we use this as a temporary code to line names up
# with data.
select(
ncsc_region_code = province,
date,
cases_total = value.cases,
deaths_total = value.deaths,
recovered_total = value) %>%
mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
left_join(
self$data$raw$provinces %>%
select(ncsc_region_code = id, level_1_region = name),
by = c("ncsc_region_code")) %>%
select( -ncsc_region_code ) %>%
mutate(
date = dmy(date),
cases_total = as.numeric(cases_total),
deaths_total = as.numeric(deaths_total),
recovered_total = as.numeric(recovered_total),
region_name = stringr::str_replace_all(region_name, "TP HCM", "Hochiminh"),
level_1_region = str_replace_all(level_1_region,
"TP HCM", "Hochiminh"),
) %>%
tidyr::drop_na(date, region_name) %>%
rename(level_1_region = region_name) %>%
#
#tidyr::drop_na(date, region_name) %>%
mutate(
level_1_region = stringi::stri_trans_general(level_1_region, "latin-ascii"),
level_1_region = stringi::stri_trim_both(level_1_region),
level_1_region = stringr::str_replace_all(level_1_region, "\\(.*\\)|-| ", ""),
level_1_region = stringr::str_to_title(level_1_region),
level_1_region = tidyr::replace_na(level_1_region, "Unknown")
level_1_region = stri_trans_general(level_1_region, "latin-ascii"),
level_1_region = stri_trim_both(level_1_region),
level_1_region = str_replace_all(level_1_region,
"\\(.*\\)|-| ", ""),
level_1_region = str_to_title(level_1_region),
level_1_region = replace_na(level_1_region, "Unknown")
) %>%
left_join(
self$codes_lookup$`1`,
Expand Down
3 changes: 2 additions & 1 deletion R/test-DataClass.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ test_download <- function(DataClass_obj, download, snapshot_path) {
walk(DataClass_obj$data$raw, function(data) {
testthat::expect_s3_class(data, "data.frame")
testthat::expect_true(nrow(data) > 0)
testthat::expect_true(ncol(data) >= 2)
testthat::expect_true(ncol(data) >= 2
|| typeof(data[[1]])=="list")
})
}
)
Expand Down
Binary file modified data/all_country_data.rda
Binary file not shown.
2 changes: 2 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Estados
etc
filepath
fns
fromJSON
geocode
geocoding
github
Expand All @@ -50,6 +51,7 @@ iso
JHU
jrc
JRC
jsonlite
jure
Kreise
Landkreis
Expand Down
3 changes: 2 additions & 1 deletion man/Vietnam.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/testthat/custom_data/Vietnam_level_1.rds
Binary file not shown.

0 comments on commit 2c20067

Please sign in to comment.