From 01572c1a2d4f4dd116705b4567168dca5412d852 Mon Sep 17 00:00:00 2001 From: Richard Martin-Nielsen Date: Wed, 3 Nov 2021 22:20:47 +0200 Subject: [PATCH] Initial adjustment for new data source Switch to using Socrata api to download case list from Colombian open data source. Aggregation to level 1 not yet working. --- NAMESPACE | 2 + R/Colombia.R | 98 ++++++++++++++++++++++++++++++++++--------------- man/Colombia.Rd | 30 ++++++++++++++- 3 files changed, 99 insertions(+), 31 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5cdf4ccd..22eb3cbe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(test_download) export(test_processing) export(test_return) importFrom(R6,R6Class) +importFrom(RSocrata,read.socrata) importFrom(countrycode,countrycode) importFrom(countrycode,countryname) importFrom(dplyr,"%>%") @@ -90,6 +91,7 @@ importFrom(lifecycle,deprecated) importFrom(lifecycle,is_present) importFrom(lubridate,as_date) importFrom(lubridate,dmy) +importFrom(lubridate,dmy_hms) importFrom(lubridate,mdy) importFrom(lubridate,month) importFrom(lubridate,year) diff --git a/R/Colombia.R b/R/Colombia.R index c1a3154c..1f41f223 100644 --- a/R/Colombia.R +++ b/R/Colombia.R @@ -3,6 +3,7 @@ #' and processing COVID-19 region data for Colombia #' # nolint start +#' @source \url{https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr} #' @source \url{https://github.com/danielcs88/colombia_covid-19/} # nolint end #' @export @@ -21,24 +22,30 @@ Colombia <- R6::R6Class("Colombia", #' @field origin name of origin to fetch data for origin = "Colombia", #' @field supported_levels A list of supported levels. - supported_levels = list("1"), + supported_levels = list("1", "2"), #' @field supported_region_names A list of region names in order of level. - supported_region_names = list("1" = "departamento"), + supported_region_names = list( + "1" = "departamento", + "2" = "municipio" + ), #' @field supported_region_codes A list of region codes in order of level. - supported_region_codes = list("1" = "iso_3166_2"), + supported_region_codes = list( + "1" = "iso_3166_2", + "2" = "codigo_municipio" + ), #' @field common_data_urls List of named links to raw data. # nolint start common_data_urls = list( - "main" = "https://raw.githubusercontent.com/danielcs88/colombia_covid-19/master/datos/cronologia.csv" + "main" = "https://www.datos.gov.co/resource/gt2j-8ykr.csv?$select=fecha_diagnostico,departamento_nom,ciudad_municipio_nom,ciudad_municipio" ), # nolint end #' @field source_data_cols existing columns within the raw data - source_data_cols = c("cases_total"), + source_data_cols = c("cases_new"), #' @field source_text Plain text description of the source of the data - source_text = "Daniel C\u00e1rdenas", + source_text = "Datos abiertos Colombia (Colombia open data)", #' @field source_url Website address for explanation/introduction of the #' data - source_url = "https://github.com/danielcs88/colombia_covid-19/", + source_url = "https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr", # nolint #' @description Set up a table of region codes for clean data #' @importFrom tibble tibble @@ -47,38 +54,71 @@ Colombia <- R6::R6Class("Colombia", self$codes_lookup$`1` <- covidregionaldata::colombia_codes }, - #' @description Colombia specific state level data cleaning - #' @importFrom dplyr select mutate - #' @importFrom lubridate ymd + #' @description Colombia specific download using Socrata API + #' @importFrom RSocrata read.socrata + download = function () { + message_verbose(self$verbose, + "Downloading Colombia data. This may take a while.") + self$data$raw$main <- read.socrata(self$data_urls[["main"]]) + }, + + #' @description Colombia specific data cleaning + #' @importFrom dplyr select mutate rename summarise group_by + #' @importFrom lubridate dmy_hms as_date #' @importFrom stringr str_replace_all str_to_sentence str_to_title #' @importFrom rlang .data #' clean_common = function() { self$data$clean <- self$data$raw[["main"]] %>% - select( - date = .data$fecha, - level_1_region = .data$departamento, - cases_total = .data$casos - ) %>% - mutate( - date = ymd(.data$date), - level_1_region = iconv(.data$level_1_region, - from = "UTF-8", - to = "ASCII//TRANSLIT" - ), - level_1_region = str_replace_all(.data$level_1_region, " D.C.", ""), - level_1_region = str_replace_all( - .data$level_1_region, - "San Andres y Providencia", - "San Andres, Providencia y Santa Catalina" - ), - level_1_region = str_to_sentence(.data$level_1_region), - level_1_region = str_to_title(.data$level_1_region) + rename( + date = .data$fecha_diagnostico, + level_1_region = .data$departamento_nom, + level_2_region = .data$ciudad_municipio_nom, + level_2_region_code = .data$ciudad_municipio ) %>% + group_by(date, level_1_region, level_2_region, level_2_region_code) %>% + summarise(cases_new = n(), .groups = "drop") %>% + mutate(date = as_date(dmy_hms(date)), + level_1_region = iconv(.data$level_1_region, + from = "UTF-8", + to = "ASCII//TRANSLIT" + ), + level_1_region = + str_replace_all(.data$level_1_region, + c(" D.C." = "", + "San Andres y Providencia" + = "San Andres, Providencia y Santa Catalina", + "Norte Santander" = "Norte De Santander" + ) + ), + level_1_region = str_to_sentence(.data$level_1_region), + level_1_region = str_to_title(.data$level_1_region)) %>% left_join( self$codes_lookup$`1`, by = c("level_1_region" = "level_1_region") ) + }, + + #' @description Colombia Specific Department Level Data Cleaning + #' + #' Aggregates data to the level 1 (department) regional level. Data is + #' provided by the source at the level 2 (municipality) regional level. + #' + #' @importFrom dplyr group_by summarise ungroup full_join across if_else + #' @importFrom tidyselect vars_select_helpers + clean_level_1 = function() { + self$data$clean <- self$data$clean %>% + group_by( + .data$date, + .data$level_1_region, .data$level_1_region_code + ) %>% + summarise( + across( + tidyselect::vars_select_helpers$where(is.numeric), + sum + ) + ) %>% + ungroup() } ) ) diff --git a/man/Colombia.Rd b/man/Colombia.Rd index d1f88abb..8fbbdf6d 100644 --- a/man/Colombia.Rd +++ b/man/Colombia.Rd @@ -4,6 +4,8 @@ \alias{Colombia} \title{Colombia Class for downloading, cleaning and processing notification data} \source{ +\url{https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr} + \url{https://github.com/danielcs88/colombia_covid-19/} } \description{ @@ -70,7 +72,9 @@ data} \subsection{Public methods}{ \itemize{ \item \href{#method-set_region_codes}{\code{Colombia$set_region_codes()}} +\item \href{#method-download}{\code{Colombia$download()}} \item \href{#method-clean_common}{\code{Colombia$clean_common()}} +\item \href{#method-clean_level_1}{\code{Colombia$clean_level_1()}} \item \href{#method-clone}{\code{Colombia$clone()}} } } @@ -79,7 +83,6 @@ data} \itemize{ \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-available_regions}{\code{covidregionaldata::DataClass$available_regions()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-clean}{\code{covidregionaldata::DataClass$clean()}}\out{} -\item \out{}\href{../../covidregionaldata/html/DataClass.html#method-download}{\code{covidregionaldata::DataClass$download()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-download_JSON}{\code{covidregionaldata::DataClass$download_JSON()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-filter}{\code{covidregionaldata::DataClass$filter()}}\out{} \item \out{}\href{../../covidregionaldata/html/DataClass.html#method-get}{\code{covidregionaldata::DataClass$get()}}\out{} @@ -100,16 +103,39 @@ Set up a table of region codes for clean data \if{html}{\out{
}}\preformatted{Colombia$set_region_codes()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-download}{}}} +\subsection{Method \code{download()}}{ +Colombia specific download using Socrata API +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Colombia$download()}\if{html}{\out{
}} +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-clean_common}{}}} \subsection{Method \code{clean_common()}}{ -Colombia specific state level data cleaning +Colombia specific data cleaning \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Colombia$clean_common()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clean_level_1}{}}} +\subsection{Method \code{clean_level_1()}}{ +Colombia Specific Department Level Data Cleaning + +Aggregates data to the level 1 (department) regional level. Data is +provided by the source at the level 2 (municipality) regional level. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Colombia$clean_level_1()}\if{html}{\out{
}} +} + } \if{html}{\out{
}} \if{html}{\out{}}