Merge pull request epiforecasts#448 from RichardMN/fix-vietnam-447-sw…

…itch-to-stringi Refactor Vietnam cleaning code and change string conversion function - fixes epiforecasts#447
RichardMN · Feb 5, 2022 · f54b220 · f54b220
2 parents 1ebcc74 + 9a10f04
commit f54b220
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 21 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -98,6 +98,7 @@ Imports:
     R6,
     readxl,
     rlang,
+    stringi,
     stringr,
     tidyr (>= 1.0.0),
     vroom,

diff --git a/NAMESPACE b/NAMESPACE
@@ -117,12 +117,12 @@ importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,syms)
-importFrom(stringr,str_conv)
+importFrom(stringi,stri_trans_general)
+importFrom(stringi,stri_trim_both)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_to_sentence)
 importFrom(stringr,str_to_title)
-importFrom(stringr,str_trim)
 importFrom(tidyr,complete)
 importFrom(tidyr,drop_na)
 importFrom(tidyr,fill)

diff --git a/R/Vietnam.R b/R/Vietnam.R
@@ -64,7 +64,8 @@ Vietnam <- R6::R6Class("Vietnam",
     #' @importFrom dplyr filter select mutate rename tibble as_tibble full_join
     #' @importFrom tidyr replace_na drop_na separate
     #' @importFrom purrr map
-    #' @importFrom stringr str_conv str_trim str_to_title str_replace_all
+    #' @importFrom stringr str_to_title str_replace_all
+    #' @importFrom stringi stri_trans_general stri_trim_both
     #' @importFrom lubridate dmy
     clean_common = function() {
       # The first three elements of self$data$raw are the data
@@ -77,31 +78,33 @@ Vietnam <- R6::R6Class("Vietnam",
           function(x) as_tibble(unlist(x),
                                 rownames = "date")),
         function(y) {
-          y %>% separate(date, sep = "[.]+", into = c(NA, "province", "date"))
+          separate(y, date, sep = "[.]+", into = c(NA, "province", "date"))
         }
       )
-      self$data$clean <- full_join(
-        full_join(
-          flat_all$case_by_time, flat_all$death_by_time,
-          by = c("province", "date"),
-          suffix = c(".cases", ".deaths"),
-          copy = TRUE
-        ),
-        flat_all$recovered_by_time,
-        by = c("province", "date"),
-        suffix = c("", ".recovered"),
-        copy = TRUE
-      ) %>%
+      index_cols <- bind_rows(
+        select(flat_all$case_by_time, "date", "province"),
+        select(flat_all$death_by_time, "date", "province"),
+        select(flat_all$recovered_by_time, "date", "province")) %>%
+        unique()
+
+      self$data$clean <- index_cols %>%
+        left_join(rename(flat_all$case_by_time, cases_total = value),
+                  by = c("province", "date") ) %>%
+        left_join(rename(flat_all$death_by_time, deaths_total = value),
+                  by = c("province", "date") ) %>%
+        left_join(rename(flat_all$recovered_by_time, recovered_total = value),
+                  by = c("province", "date") ) %>%
         # The api uses integer codes for provinces which do not
         # line up with ISO 3166-2 (some of which are not numbers)
         # so we use this as a temporary code to line names up
         # with data.
         select(
           ncsc_region_code = province,
           date,
-          cases_total = value.cases,
-          deaths_total = value.deaths,
-          recovered_total = value) %>%
+          cases_total,
+          deaths_total,
+          recovered_total
+          ) %>%
         mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
         left_join(
           self$data$raw$provinces %>%
@@ -119,8 +122,8 @@ Vietnam <- R6::R6Class("Vietnam",
         #
         #tidyr::drop_na(date, region_name) %>%
         mutate(
-          level_1_region = str_conv(level_1_region, "ASCII"),
-          level_1_region = str_trim(level_1_region, side = "both"),
+          level_1_region = stri_trans_general(level_1_region, "ASCII"),
+          level_1_region = stri_trim_both(level_1_region),
           level_1_region = str_replace_all(level_1_region,
                                            "\\(.*\\)|-| ", ""),
           level_1_region = str_to_title(level_1_region),
-Original file line number
+Diff line change
@@ Expand Up / @@ -98,6 +98,7 @@ Imports: @@
         R6,
         readxl,
         rlang,
+        stringi,
         stringr,
         tidyr (>= 1.0.0),
         vroom,
@@ Expand Down @@