Skip to content

Commit

Permalink
Merge pull request epiforecasts#448 from RichardMN/fix-vietnam-447-sw…
Browse files Browse the repository at this point in the history
…itch-to-stringi

Refactor Vietnam cleaning code and change string conversion function - fixes epiforecasts#447
  • Loading branch information
seabbs authored Feb 5, 2022
2 parents 1ebcc74 + 9a10f04 commit f54b220
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 21 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Imports:
R6,
readxl,
rlang,
stringi,
stringr,
tidyr (>= 1.0.0),
vroom,
Expand Down
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,12 @@ importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,syms)
importFrom(stringr,str_conv)
importFrom(stringi,stri_trans_general)
importFrom(stringi,stri_trim_both)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_sentence)
importFrom(stringr,str_to_title)
importFrom(stringr,str_trim)
importFrom(tidyr,complete)
importFrom(tidyr,drop_na)
importFrom(tidyr,fill)
Expand Down
41 changes: 22 additions & 19 deletions R/Vietnam.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ Vietnam <- R6::R6Class("Vietnam",
#' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
#' @importFrom tidyr replace_na drop_na separate
#' @importFrom purrr map
#' @importFrom stringr str_conv str_trim str_to_title str_replace_all
#' @importFrom stringr str_to_title str_replace_all
#' @importFrom stringi stri_trans_general stri_trim_both
#' @importFrom lubridate dmy
clean_common = function() {
# The first three elements of self$data$raw are the data
Expand All @@ -77,31 +78,33 @@ Vietnam <- R6::R6Class("Vietnam",
function(x) as_tibble(unlist(x),
rownames = "date")),
function(y) {
y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
separate(y, date, sep = "[.]+", into = c(NA, "province", "date"))
}
)
self$data$clean <- full_join(
full_join(
flat_all$case_by_time, flat_all$death_by_time,
by = c("province", "date"),
suffix = c(".cases", ".deaths"),
copy = TRUE
),
flat_all$recovered_by_time,
by = c("province", "date"),
suffix = c("", ".recovered"),
copy = TRUE
) %>%
index_cols <- bind_rows(
select(flat_all$case_by_time, "date", "province"),
select(flat_all$death_by_time, "date", "province"),
select(flat_all$recovered_by_time, "date", "province")) %>%
unique()

self$data$clean <- index_cols %>%
left_join(rename(flat_all$case_by_time, cases_total = value),
by = c("province", "date") ) %>%
left_join(rename(flat_all$death_by_time, deaths_total = value),
by = c("province", "date") ) %>%
left_join(rename(flat_all$recovered_by_time, recovered_total = value),
by = c("province", "date") ) %>%
# The api uses integer codes for provinces which do not
# line up with ISO 3166-2 (some of which are not numbers)
# so we use this as a temporary code to line names up
# with data.
select(
ncsc_region_code = province,
date,
cases_total = value.cases,
deaths_total = value.deaths,
recovered_total = value) %>%
cases_total,
deaths_total,
recovered_total
) %>%
mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
left_join(
self$data$raw$provinces %>%
Expand All @@ -119,8 +122,8 @@ Vietnam <- R6::R6Class("Vietnam",
#
#tidyr::drop_na(date, region_name) %>%
mutate(
level_1_region = str_conv(level_1_region, "ASCII"),
level_1_region = str_trim(level_1_region, side = "both"),
level_1_region = stri_trans_general(level_1_region, "ASCII"),
level_1_region = stri_trim_both(level_1_region),
level_1_region = str_replace_all(level_1_region,
"\\(.*\\)|-| ", ""),
level_1_region = str_to_title(level_1_region),
Expand Down

0 comments on commit f54b220

Please sign in to comment.