diff --git a/.gitignore b/.gitignore index ad1fdd6..4486d0e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,3 @@ vignettes/*.R inst/doc data-raw README.Rmd -data-raw/LoadTestdata.R diff --git a/DESCRIPTION b/DESCRIPTION index 9ccb1f7..9c2ec47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: PHEindicatormethods Type: Package -Version: 2.0.1 +Version: 2.0.2 Title: Common Public Health Statistics and their Confidence Intervals Description: Functions to calculate commonly used public health statistics and their confidence intervals using methods approved for use in the production @@ -21,12 +21,13 @@ Description: Functions to calculate commonly used public health statistics and Silcocks PBS et al (2001) . Low and Low (2004) . Authors@R: c(person("Anderson", "Georgina", email = "georgina.anderson@dhsc.gov.uk", role = c("aut", "cre")), - person("Fox", "Sebastian", email = "sebastian.fox@dhsc.gov.uk", role = c("ctb")), + person("Fox", "Sebastian", role = c("ctb")), person("Francis", "Matthew", role = c("ctb")), person("Fryers", "Paul", email = "paul.fryers@dhsc.gov.uk", role = c("ctb")), person("Clegg", "Emma", role = c("ctb")), person("Westermann", "Annabel", email = "annabel.westermann@dhsc.gov.uk", role = c("ctb")), - person("Woolner", "Joshua", role = c("ctb")) + person("Woolner", "Joshua", role = c("ctb")), + person("Fellows", "Charlotte", email = "charlotte.fellows@dhsc.gov.uk", role = c("ctb")) ) BugReports: https://github.com/ukhsa-collaboration/PHEindicatormethods/issues Depends: R (>= 3.1.0) @@ -46,8 +47,9 @@ Suggests: knitr, readxl, rmarkdown, - testthat, + testthat (>= 3.0.0), withr Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 VignetteBuilder: knitr +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index b67170c..d3d10ed 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(rlang,":=") importFrom(rlang,.data) +importFrom(rlang,.env) importFrom(rlang,quo_name) importFrom(rlang,quo_text) importFrom(rlang,sym) diff --git a/NEWS.md b/NEWS.md index e71c17d..fd64fec 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +## PHEindicatormethods v2.0.2 +* Amended phe_quantile function so it will not produce quantiles when the number of small areas within a group is less than the number of quantiles requested. A warning will be generated when quantiles cannot be produced for this reason. +* removed the highergeog argument from phe_quantile function, previously soft-deprecated in v1.2.0. +* `phe_sii` amended to allow data to be transformed prior to calculation of the SII, and to allow the intercept value to be output. + ## PHEindicatormethods v2.0.1 * `calculate_ISRate` and `calculate_ISRatio` updated so observed events can be passed as total without age breakdowns * amended GitHub referencing as code repository now owned and hosted by UKHSA-collaboration not PublicHealthEngland diff --git a/R/Proportions.R b/R/Proportions.R index 2dc1bef..2ac4828 100644 --- a/R/Proportions.R +++ b/R/Proportions.R @@ -153,7 +153,7 @@ phe_proportion <- function(data, x, n, type="full", confidence=0.95, multiplier= uppercl = wilson_upper(({{ x }}),({{ n }}),confidence) * multiplier, confidence = paste(confidence*100,"%",sep=""), method = "Wilson") |> - relocate(.data$statistic, .after = confidence) + relocate("statistic", .after = "confidence") } diff --git a/R/Quantiles.R b/R/Quantiles.R index 0440796..d46b367 100644 --- a/R/Quantiles.R +++ b/R/Quantiles.R @@ -1,48 +1,54 @@ -# ------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ #' Assign Quantiles using phe_quantile #' #' Assigns data to quantiles based on numeric data rankings. #' -#' @param data a data frame containing the quantitative data to be assigned to quantiles. -#' If pre-grouped, separate sets of quantiles will be assigned for each grouping set; -#' unquoted string; no default -#' @param values field name from data containing the numeric values to rank data by and assign quantiles from; -#' unquoted string; no default -#' @param highergeog deprecated - functionality replaced by pre-grouping the input data frame -#' @param nquantiles the number of quantiles to separate each grouping set into; numeric; default=10L -#' @param invert whether the quantiles should be directly (FALSE) or inversely (TRUE) related to the numerical value order; -#' logical (to apply same value to all grouping sets) OR unquoted string referencing field name from data -#' that stores logical values for each grouping set; default = TRUE (ie highest values assigned to quantile 1) -#' @param inverttype whether the invert argument has been specified as a logical value or a field name from data; -#' quoted string "field" or "logical"; default = "logical" -#' @param type defines whether to include metadata columns in output to reference the arguments passed; -#' can be "standard" or "full"; quoted string; default = "full" +#' @param data a data frame containing the quantitative data to be assigned to +#' quantiles. If pre-grouped, separate sets of quantiles will be assigned for +#' each grouping set; unquoted string; no default +#' @param values field name from data containing the numeric values to rank data +#' by and assign quantiles from; unquoted string; no default +#' @param nquantiles the number of quantiles to separate each grouping set into; +#' numeric; default=10L +#' @param invert whether the quantiles should be directly (FALSE) or inversely +#' (TRUE) related to the numerical value order; logical (to apply same value +#' to all grouping sets) OR unquoted string referencing field name from data +#' that stores logical values for each grouping set; default = TRUE (ie +#' highest values assigned to quantile 1) +#' @param inverttype whether the invert argument has been specified as a logical +#' value or a field name from data; quoted string "field" or "logical"; +#' default = "logical" +#' @param type defines whether to include metadata columns in output to +#' reference the arguments passed; can be "standard" or "full"; quoted string; +#' default = "full" #' #' @import dplyr #' @importFrom rlang sym quo_name #' @export #' -#' @return When type = "full", returns the original data.frame with quantile (quantile value), -#' nquantiles (number of quantiles requested), groupvars (grouping sets quantiles assigned within) -#' and invert (indicating direction of quantile assignment) fields appended. -#' +#' @return When type = "full", returns the original data.frame with quantile +#' (quantile value), nquantiles (number of quantiles requested), groupvars +#' (grouping sets quantiles assigned within) and invert (indicating direction +#' of quantile assignment) fields appended. #' -#' @section Notes: See [PHE Technical Guide - Assigning Deprivation Quintiles](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods) for methodology. -#' In particular, note that this function strictly applies the algorithm defined but some manual -#' review, and potentially adjustment, is advised in some cases where multiple small areas with equal rank -#' fall across a natural quantile boundary. +#' @section Notes: See [OHID Technical Guide - Assigning Deprivation Categories](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods) +#' for methodology. In particular, note that this function strictly applies +#' the algorithm defined but some manual review, and potentially adjustment, +#' is advised in some cases where multiple small areas with equal rank fall +#' across a natural quantile boundary. #' #' @examples -#' -#' df <- data.frame(region = as.character(rep(c("Region1","Region2","Region3","Region4"), each=250)), -#' smallarea = as.character(paste0("Area",seq_along(1:1000))), -#' vals = as.numeric(sample(200, 1000, replace = TRUE)), -#' stringsAsFactors=FALSE) +#' df <- data.frame( +#' region = as.character(rep(c("Region1","Region2","Region3","Region4"), +#' each=250)), +#' smallarea = as.character(paste0("Area",seq_along(1:1000))), +#' vals = as.numeric(sample(200, 1000, replace = TRUE)), +#' stringsAsFactors = FALSE) #' #' # assign small areas to deciles across whole data frame #' phe_quantile(df, vals) #' -#' # assign small area to deciles within regions by pre-grouping the input data frame +#' # assign small areas to deciles within regions by pre-grouping the data frame #' library(dplyr) #' df_grp <- df %>% group_by(region) #' phe_quantile(df_grp, vals) @@ -53,36 +59,34 @@ #' @family PHEindicatormethods package functions # ------------------------------------------------------------------------------------------------- - # create phe_quantile function using PHE method -phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L, - invert=TRUE, inverttype = "logical", type = "full") { +phe_quantile <- function(data, + values, + nquantiles = 10L, + invert = TRUE, + inverttype = "logical", + type = "full") { # check required arguments present - if (missing(data)|missing(values)) { - stop("function phe_quantile requires at least 2 arguments: data and values") + if (missing(data) | missing(values)) { + stop(paste0("function phe_quantile requires at least 2 arguments: ", + "data and values")) } - # give useful error if deprecated highergeog argument used - if (!missing(highergeog)) { - stop("highergeog argument is deprecated - pregroup input dataframe to replace this functionality") - } - - # check invert is valid and append to data - if (!(inverttype %in% c("logical","field"))) { + if (!(inverttype %in% c("logical", "field"))) { stop("valid values for inverttype are logical and field") } else if (inverttype == "logical") { - if (!(invert %in% c(TRUE,FALSE))) { + if (!(invert %in% c(TRUE, FALSE))) { stop("invert expressed as a logical must equal TRUE or FALSE") } - data <- mutate(data,invert_calc = invert) + data <- mutate(data, invert_calc = invert) } else if (inverttype == "field") { if (deparse(substitute(invert)) %in% colnames(data)) { - data <- mutate(data,invert_calc = {{ invert }}) + data <- mutate(data, invert_calc = {{ invert }}) } else stop("invert is not a field name from data") } @@ -93,34 +97,54 @@ phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L, } #check all invert values are identical within groups - if (!n_groups(data) == nrow(unique(select(data,"invert_calc")))) { - stop("invert field values must take the same logical value for each data grouping set") + if (!n_groups(data) == nrow(unique(select(data, "invert_calc")))) { + stop(paste0("invert field values must take the same logical value for ", + "each data grouping set")) } - # assign quantiles + # assign quantiles - unless number of small areas within a group is less + # than number of quantiles requested. Ignore areas with no value present phe_quantile <- data %>% - mutate(naflag = if_else(is.na({{ values }}),0,1)) %>% - add_count(name = "na_flag", wt = .data$naflag) %>% - mutate(adj_value = if_else(.data$invert_calc == TRUE, max({{ values }}, na.rm=TRUE)-{{ values }},{{ values }}), - rank = rank(.data$adj_value, ties.method="min", na.last = "keep"), - quantile = floor((nquantiles+1)-ceiling(((.data$na_flag+1)-rank)/(.data$na_flag/nquantiles))), - quantile = if_else(.data$quantile == 0, 1, .data$quantile)) %>% - select(!c("naflag", "na_flag", "adj_value", "rank")) %>% + mutate(num_small_areas = sum(!is.na({{ values }})), + rank = case_when( + .data$invert_calc == TRUE ~ rank(- {{ values }}, + ties.method = "min", + na.last = "keep"), + .default = rank({{ values }}, + ties.method = "min", + na.last = "keep") + ), + quantile = if_else(.data$num_small_areas < nquantiles, + NA_real_, + floor((nquantiles + 1) - ceiling(((.data$num_small_areas + 1) - rank) / + (.data$num_small_areas / nquantiles) + ) + ) + ), + quantile = if_else(.data$quantile == 0, 1, .data$quantile) + ) %>% + select(!c("num_small_areas", "rank")) %>% mutate(nquantiles= nquantiles, - groupvars = paste0(group_vars(data),collapse = ", "), + groupvars = paste0(group_vars(data), collapse = ", "), qinverted = if_else(.data$invert_calc == TRUE, "lowest quantile represents highest values", "lowest quantile represents lowest values")) + # warn if any groups had too few snall areas with values to assign quantiles + if (nrow(filter(phe_quantile, all(is.na(.data$quantile)))) > 0 + ) { + warning(paste0("One or more groups had too few small areas with values to ", + "allow quantiles to be assigned")) + } # remove columns if not required based on value of type argument if (type == "standard") { phe_quantile <- phe_quantile %>% - select(!c("nquantiles", "groupvars", "qinverted", "invert_calc")) + select(!c("nquantiles", "groupvars", "qinverted", "invert_calc")) } else { phe_quantile <- phe_quantile %>% - select(!c("invert_calc")) + select(!c("invert_calc")) } diff --git a/R/SII_function.R b/R/SII_function.R index 3a2f3a7..c84a5df 100644 --- a/R/SII_function.R +++ b/R/SII_function.R @@ -41,69 +41,102 @@ #' of the fitted value at \code{x=1,Y1} and the fitted value at \code{x=0,Y0}. #' which can be calculated as: \code{RII = (Y0 + SII)/Y0} #' -#' @section Function arguments: +#' @section Transformations: #' -#' The indicator type can be specified via the \code{value_type} parameter. -#' Transformations can be applied to the indicator value and its confidence -#' limits before calculating the standard error in cases where the confidence -#' interval around the indicator value is likely to be non-symmetric. This is a -#' log transformation for rates, and logit for proportions. If the standard -#' error is supplied directly to the function from the input dataset, this is -#' used instead of calculating one from the indicator confidence limits. +#' The indicator type can be specified as 1 (rate), 2 (proportion) or 0 (other), +#' using the \code{value_type} parameter. This setting determines the data +#' transformations that will be applied in the following two parts of the +#' method. +#' +#' Use in conjunction with the \code{transform} parameter in calculation of the +#' SII: It is recommended that rates and proportions are transformed prior to +#' calculation of the SII by setting the \code{transform} parameter to TRUE for +#' these indicator types. This will perform a log transformation for rates, or +#' logit for proportions, and return outputs transformed back to the original +#' units of the indicator. These transformations are recommended to improve the +#' linearity between the indicator values and the quantile, which is an +#' assumption of the method. A user-provided standard error will not be accepted +#' when the \code{transform} parameter is set to TRUE as the confidence limits +#' are required for this transformation. +#' +#' Use in calculation of the standard error: Rates and proportions, and their +#' confidence limits, are transformed prior to calculation of the standard error +#' for each quantile. This is because it is assumed that the confidence interval +#' around the indicator value is non-symmetric for these indicator types. Note +#' that this transformation is not controlled by the \code{transform} parameter +#' and is applied based on the value of the \code{value_type} parameter only. A +#' user-provided standard error will not be accepted when the \code{transform} +#' parameter is set to TRUE as the confidence limits are required for this +#' transformation. #' #' @section Warning: #' #' The SII calculation assumes a linear relationship between indicator value and -#' quantile, and small populations within quantiles can make it unstable. This +#' quantile. Where this is not the case the transform option should be considered. +#' Small populations within quantiles can make the SII unstable. This #' function does not include checks for linearity or stability; it is the user's #' responsibility to ensure the input data is suitable for the SII calculation. #' -#' @section Notes: -#' -#' This function is using nest and unnest functions from tidyr version 1.0.0. -#' -#' @param data data.frame containing the required input fields, pre-grouped if an SII is required for -#' each subgroup; unquoted string; no default -#' @param quantile field name within data that contains the quantile label (e.g. decile). The number -#' of quantiles should be between 5 and 100; unquoted string; no default -#' @param population field name within data that contains the quantile populations (ie, denominator). -#' Non-zero populations are required for all quantiles to calculate SII for an area; -#' unquoted string; no default -#' @param x (for indicators that are proportions) field name within data that contains -#' the members of the population with the attribute of interest (ie, numerator). This will be -#' divided by population to calculate a proportion as the indicator value -#' (if value field is not provided); unquoted string; no default -#' @param value field name within data that contains the indicator value (this does not need to be supplied -#' for proportions if count and population are given); unquoted string; no default -#' @param value_type indicates the indicator type (1 = rate, 2 = proportion, 0 = other); -#' integer; default 0 -#' @param lower_cl field name within data that contains 95 percent lower confidence limit -#' of indicator value (to calculate standard error of indicator value). This field is needed -#' if the se field is not supplied; unquoted string; no default -#' @param upper_cl field name within data that contains 95 percent upper confidence limit -#' of indicator value (to calculate standard error of indicator value). This field is needed -#' if the se field is not supplied; unquoted string; no default -#' @param se field name within data that contains the standard error of the indicator -#' value. If not supplied, this will be calculated from the 95 percent lower and upper confidence -#' limits (i.e. one or the other of these fields must be supplied); unquoted string; no default -#' @param multiplier factor to multiply the SII and SII confidence limits by (e.g. set to 100 to return -#' prevalences on a percentage scale between 0 and 100). If the multiplier is negative, the -#' inverse of the RII is taken to account for the change in polarity; numeric; default 1; -#' @param repetitions number of random samples to perform to return confidence interval of SII (and RII). -#' Minimum is 1000, no maximum (though the more repetitions, the longer the run time); -#' numeric; default 100,000 -#' @param confidence confidence level used to calculate the lower and upper confidence limits of SII, -#' expressed as a number between 0.9 and 1, or 90 and 100. It can be a vector of 0.95 and 0.998, -#' for example, to output both 95 percent and 99.8 percent CIs; numeric; default 0.95 -#' @param rii option to return the Relative Index of Inequality (RII) with associated confidence limits -#' as well as the SII; logical; default FALSE -#' @param reliability_stat option to carry out the SII confidence interval simulation 10 times instead -#' of once and return the Mean Average Difference between the first and subsequent samples (as a -#' measure of the amount of variation). Warning: this will significantly increase run time of the -#' function and should first be tested on a small number of repetitions; logical; default FALSE -#' @param type "full" output includes columns in the output dataset specifying the parameters the user -#' has input to the function (value_type, multiplier, CI_confidence, CI_method); character string -#' either "full" or "standard"; default "full" +#' @param data data.frame containing the required input fields, pre-grouped if +#' an SII is required for each subgroup; unquoted string; no default +#' @param quantile field name within data that contains the quantile label (e.g. +#' decile). The number of quantiles should be between 5 and 100; unquoted +#' string; no default +#' @param population field name within data that contains the quantile +#' populations (ie, denominator). Non-zero populations are required for all +#' quantiles to calculate SII for an area; unquoted string; no default +#' @param x (for indicators that are proportions) field name within data that +#' contains the members of the population with the attribute of interest (ie, +#' numerator). This will be divided by population to calculate a proportion as +#' the indicator value (if value field is not provided); unquoted string; no +#' default +#' @param value field name within data that contains the indicator value (this +#' does not need to be supplied for proportions if count and population are +#' given); unquoted string; no default +#' @param value_type indicates the indicator type (1 = rate, 2 = proportion, 0 = +#' other). The \code{value_type} argument is used to determine whether data should +#' be transformed prior to calculation of the standard error and/or SII. See +#' the \code{Tansformations} section for full details; integer; default 0 +#' @param transform option to transform input rates or proportions prior to +#' calculation of the SII. See the \code{Transformations} section for full +#' details; logical; default FALSE +#' @param lower_cl field name within data that contains 95 percent lower +#' confidence limit of indicator value (to calculate standard error of +#' indicator value). This field is needed if the se field is not supplied; +#' unquoted string; no default +#' @param upper_cl field name within data that contains 95 percent upper +#' confidence limit of indicator value (to calculate standard error of +#' indicator value). This field is needed if the se field is not supplied; +#' unquoted string; no default +#' @param se field name within data that contains the standard error of the +#' indicator value. If not supplied, this will be calculated from the 95 +#' percent lower and upper confidence limits (i.e. one or the other of these +#' fields must be supplied); unquoted string; no default +#' @param multiplier factor to multiply the SII and SII confidence limits by +#' (e.g. set to 100 to return prevalences on a percentage scale between 0 and +#' 100). If the multiplier is negative, the inverse of the RII is taken to +#' account for the change in polarity; numeric; default 1; +#' @param repetitions number of random samples to perform to return confidence +#' interval of SII (and RII). Minimum is 1000, no maximum (though the more +#' repetitions, the longer the run time); numeric; default 100,000 +#' @param confidence confidence level used to calculate the lower and upper +#' confidence limits of SII, expressed as a number between 0.9 and 1, or 90 +#' and 100. It can be a vector of 0.95 and 0.998, for example, to output both +#' 95 percent and 99.8 percent CIs; numeric; default 0.95 +#' @param rii option to return the Relative Index of Inequality (RII) with +#' associated confidence limits as well as the SII; logical; default FALSE +#' @param intercept option to return the intercept value of the regression line +#' (y value where x=0); logical; default FALSE +#' @param reliability_stat option to carry out the SII confidence interval +#' simulation 10 times instead of once and return the Mean Average Difference +#' between the first and subsequent samples (as a measure of the amount of +#' variation). Warning: this will significantly increase run time of the +#' function and should first be tested on a small number of repetitions; +#' logical; default FALSE +#' @param type "full" output includes columns in the output dataset specifying +#' the parameters the user has input to the function (value_type, multiplier, +#' CI_confidence, CI_method); character string either "full" or "standard"; +#' default "full" #' #' @references #' (1) Low A & Low A. Measuring the gap: quantifying and comparing local health inequalities. @@ -116,6 +149,7 @@ #' @importFrom tidyr nest unnest spread #' @importFrom stats rnorm qnorm lm #' @importFrom tidyselect where +#' @importFrom rlang := .data .env #' #' @examples #' library(dplyr) @@ -159,13 +193,15 @@ #' rii = TRUE, #' type = "standard") #' -#' # multiple confidence intervals +#' # multiple confidence intervals, log transforming the data if they are rates #' phe_sii(group_by(data, area), #' decile, #' population, -#' value_type = 0, +#' value_type = 1, +#' transform = TRUE, #' value = value, -#' se = StandardError, +#' lower_cl = lowerCL, +#' upper_cl = upperCL, #' confidence = c(0.95, 0.998), #' repetitions = 10000, #' rii = TRUE, @@ -177,11 +213,13 @@ #' the inputted data.frame. #' #' @family PHEindicatormethods package functions +# ------------------------------------------------------------------------------------------------- phe_sii <- function(data, quantile, population, # compulsory fields x = NULL, # optional fields value = NULL, value_type = 0, + transform = FALSE, lower_cl = NULL, upper_cl = NULL, se = NULL, @@ -189,6 +227,7 @@ phe_sii <- function(data, quantile, population, # compulsory fields repetitions = 100000, confidence = 0.95, rii = FALSE, + intercept = FALSE, reliability_stat = FALSE, type = "full") { @@ -212,7 +251,18 @@ phe_sii <- function(data, quantile, population, # compulsory fields if (repetitions < 1000) { stop("number of repetitions must be 1000 or greater. Default is 100,000") } - + # if transform is true then value type must be rate or proportion + if (transform == TRUE & value_type == 0) { + stop("value_type should be 1 or 2 when transform is true") + } + # if transform is true then se cannot be provided + if (transform == TRUE & !(missing(se))) { + stop("function phe_sii requires se to be missing when transform is true") + } + # if transform is true then upper and lower cls must be provided + if (transform == TRUE & (missing(upper_cl) | missing(lower_cl))) { + stop("function phe_sii requires lower_cl and upper_cl fields when transform is true") + } # check on confidence limit requirements if (any(confidence < 0.9) | (any(confidence > 1) & any(confidence < 90)) | any(confidence > 100)) { stop("all confidence levels must be between 90 and 100 or between 0.9 and 1") @@ -374,7 +424,7 @@ phe_sii <- function(data, quantile, population, # compulsory fields pops_prep <- mutate(pops_prep, value = {{ x }} / {{ population }}) } - # Transform value, lower and upper confidence limits if value is a rate or proportion + # Transform value if value is a rate or proportion pops_prep <- pops_prep %>% mutate(value = ifelse(value_type == 1, log(.data$value), @@ -389,11 +439,11 @@ phe_sii <- function(data, quantile, population, # compulsory fields pops_prep <- pops_prep %>% mutate(lower_cl = ifelse(value_type == 0, {{ lower_cl }}, ifelse(value_type == 1, log({{ lower_cl }}), - ifelse(value_type == 2, log({{ lower_cl }}/(1-{{ lower_cl }})), + ifelse(value_type == 2, log({{ lower_cl }} / (1 - {{ lower_cl }})), NA))), upper_cl = ifelse(value_type == 0, {{ upper_cl }}, ifelse(value_type == 1, log({{ upper_cl }}), - ifelse(value_type == 2, log({{ upper_cl }}/(1-{{ upper_cl }})), + ifelse(value_type == 2, log({{ upper_cl }} / (1 - {{ upper_cl }})), NA)))) } @@ -415,7 +465,8 @@ phe_sii <- function(data, quantile, population, # compulsory fields b_vals = FindXValues({{ population }}, no_quantiles)) # Calculate sqrt(a), bsqrt(a) and un-transformed y value for regression - pops_prep_ab <- pops_prep_ab %>% + if(transform == FALSE) { + pops_prep_ab <- pops_prep_ab %>% group_by(!!! syms(c(grouping_variables, rlang::quo_text(quantile)))) %>% mutate(sqrt_a = sqrt(.data$a_vals), b_sqrt_a = .data$b_vals * .data$sqrt_a, @@ -424,6 +475,14 @@ phe_sii <- function(data, quantile, population, # compulsory fields exp(.data$value) / (1 + exp(.data$value)), .data$value)), yvals = .data$sqrt_a * .data$value_transform) + } else { + pops_prep_ab <- pops_prep_ab %>% + group_by(!!! syms(c(grouping_variables, rlang::quo_text(quantile)))) %>% + mutate(sqrt_a = sqrt(.data$a_vals), + b_sqrt_a = .data$b_vals * .data$sqrt_a, + value_transform = .data$value, + yvals = .data$sqrt_a * .data$value_transform) + } # calculate confidence interval for SII via simulation # Repeat this 10 times to get a "variability" measure if requested @@ -452,98 +511,348 @@ phe_sii <- function(data, quantile, population, # compulsory fields .data$sqrt_a, .data$b_sqrt_a, rii, + transform, reliability_stat))) } else { sim_CI <- pops_prep_ab %>% group_by(!!! syms(grouping_variables)) %>% - tidyr::nest() %>% - mutate(CI_params = purrr::map(data, ~ SimulationFunc(data = ., - .data$value, - value_type, - .data$se_calc, - repetitions, - confidence, - multiplier, - .data$sqrt_a, - .data$b_sqrt_a, - rii, - reliability_stat))) + tidyr::nest() %>% + mutate(CI_params = purrr::map(data, ~ SimulationFunc(data = ., + .data$value, + value_type, + .data$se_calc, + repetitions, + confidence, + multiplier, + .data$sqrt_a, + .data$b_sqrt_a, + rii, + transform, + reliability_stat))) } - # Unnest confidence limits and reliability measures in a data frame for joining - sim_CI <- sim_CI %>% - select(!c("data")) %>% - tidyr::unnest("CI_params") - - # Perform regression to calculate SII and extract model parameters + # Perform regression to calculate SII and extract model parameters popsSII_model <- popsSII_model %>% - # perform linear model - mutate(model = purrr::map(data, function(df) - stats::lm(yvals ~ sqrt_a + b_sqrt_a - 1, data = df))) %>% - # extract model coefficients - mutate(model = purrr::map(.data$model, broom::tidy)) %>% - tidyr::unnest("model") %>% - # remove unecessary fields - select(!c("std.error", "statistic", "p.value")) %>% - # create columns for each parameter - tidyr::pivot_wider(names_from = "term", - values_from = "estimate") %>% - # Extract SII and RII values + # perform linear model + mutate(model = purrr::map(data, function(df) + stats::lm(yvals ~ sqrt_a + b_sqrt_a - 1, data = df))) %>% + # extract model coefficients + mutate(model = purrr::map(.data$model, broom::tidy)) %>% + tidyr::unnest("model") %>% + # remove unecessary fields + select(!c("std.error", "statistic", "p.value")) %>% + # create columns for each parameter + tidyr::pivot_wider(names_from = "term", + values_from = "estimate") + + + # Format results according to whether transform = T/F + + if(transform == FALSE) { #no anti-transform needed + # Extract SII and RII values + popsSII_model <- popsSII_model %>% mutate(sii = multiplier * .data$b_sqrt_a, - rii = (.data$sqrt_a + .data$b_sqrt_a)/.data$sqrt_a) %>% + rii = (.data$sqrt_a + .data$b_sqrt_a)/.data$sqrt_a, + intercept = .data$sqrt_a) %>% # Take inverse of RII if multiplier is negative - mutate(rii = ifelse(multiplier < 0, - 1/rii, - rii)) %>% + mutate(rii = ifelse(multiplier < 0, 1 / rii, rii)) %>% + # Select fields to keep + select(all_of(grouping_variables), "sii", "rii", "intercept") + + # join on dataset with SII/ RII confidence limits + # Get CIs from first round of reps + # Unnest confidence limits in a data frame for joining + + sim_CI_rep1 <- sim_CI %>% + select(!c("data")) %>% + tidyr::unnest("CI_params") |> + slice_head(n = 1) + + if (length(grouping_variables) > 0) { + # (grouped dataset) + popsSII_model <- popsSII_model %>% + left_join(sim_CI_rep1, by = grouping_variables) + } else { + # ungrouped dataset + popsSII_model <- popsSII_model %>% + cbind(sim_CI_rep1) + } + + # Add reliability stats + + if (isTRUE(reliability_stat)) { + + sim_CI <- rename(sim_CI, "CI_calcs" = "CI_params") + + reliabity_stats <- calc_reliability( + CI_data = sim_CI, + confidence = confidence, + rii = rii + ) + + if (length(grouping_variables) > 0) { + # (grouped dataset) + popsSII_model <- popsSII_model %>% + left_join(reliabity_stats, by = grouping_variables) + } else { + # ungrouped dataset + popsSII_model <- popsSII_model %>% + cbind(reliabity_stats) + } + + } + + } else { + + popsSII_model <- popsSII_model %>% + mutate(sii = .data$b_sqrt_a, + intercept = .data$sqrt_a) %>% # Select fields to keep - select(all_of(grouping_variables), "sii", "rii") + select(all_of(grouping_variables), "sii", "intercept") + + #Do calculations that can be done outside of loop as they don't need the CI fields + if (value_type == 1) {#anti-log needed + + popsSII_model <- popsSII_model %>% + mutate(xequals1 = .data$intercept + .data$sii, + xequalshalf = (.data$intercept + .data$xequals1) / 2, + antilogintercept = exp(.data$intercept), + antilogxequals1 = exp(.data$xequals1), + multiplier = .env$multiplier, + sii = (.data$antilogxequals1 - .data$antilogintercept) * .data$multiplier, + rii = if_else( + .data$multiplier < 0, + 1 / (.data$antilogxequals1 / .data$antilogintercept), + .data$antilogxequals1 / .data$antilogintercept + ), + intercept = .data$antilogintercept * abs(.data$multiplier)) + + } else if (value_type == 2) {#anti-logit needed + + popsSII_model <- popsSII_model %>% + mutate(xequals1 = .data$intercept + .data$sii, + xequalshalf = (.data$intercept + .data$xequals1) / 2, + antilogintercept = exp(.data$intercept) / (1 + exp(.data$intercept)), + antilogxequals1 = exp(.data$xequals1) / (1 + exp(.data$xequals1)), + multiplier = .env$multiplier, + sii = (.data$antilogxequals1 - .data$antilogintercept) * .data$multiplier, + rii = if_else( + multiplier < 0, + 1 / (.data$antilogxequals1 / .data$antilogintercept), + .data$antilogxequals1 / .data$antilogintercept + ), + intercept = .data$antilogintercept * abs(.data$multiplier)) + } - # join on dataset with confidence limits and reliability stats - if (length(grouping_variables) > 0) { - # (grouped dataset) - popsSII_model <- popsSII_model %>% - left_join(sim_CI, by = grouping_variables) - } else { - # ungrouped dataset - popsSII_model <- popsSII_model %>% - cbind(sim_CI) - } + popsSII_model_CIs <- popsSII_model |> + select(all_of(grouping_variables), "xequalshalf") + popsSII_model <- popsSII_model |> + select(all_of(grouping_variables), "sii", "rii", "intercept") - # Part 3 - Choose and format output fields -------------------------------- + # join on dataset with confidence limits + if (length(grouping_variables) > 0) { + # (grouped dataset) + popsSII_model_CIs <- popsSII_model_CIs %>% + left_join(sim_CI, by = grouping_variables) + } else { + # ungrouped dataset + popsSII_model_CIs <- popsSII_model_CIs %>% + cbind(sim_CI) + } - # Remove reliability stat columns (if not requested by user) - if (reliability_stat == FALSE) { - popsSII_model <- popsSII_model %>% - select(-contains("MAD")) - } + # Calculate SII and RII for each rep + + popsSII_model_CIs <- popsSII_model_CIs |> + mutate( + CI_calcs = purrr::map2(.data$CI_params, .data$xequalshalf, function(data, xequalshalf) { + + map(confidence, function(conf) { + + conf_formatted <- + gsub("\\.", "_", formatC(conf * 100, format = "f", digits = 1)) + + selected_data <- data %>% + select(contains(conf_formatted)) |> + select(contains("sii")) |> + rename( + "sii_lower" = contains("sii_lower"), + "sii_upper" = contains("sii_upper") + ) + + if (value_type == 1) { + + SII_calculations <- selected_data %>% + mutate(interceptlcl = xequalshalf - (.data$sii_lower / 2), + interceptucl = xequalshalf - (.data$sii_upper / 2), + xequals1lcl = xequalshalf + (.data$sii_lower / 2), + xequals1ucl = xequalshalf + (.data$sii_upper / 2), + multiplier = .env$multiplier, + sii_lower = if_else(.data$multiplier < 1, (exp(.data$xequals1ucl) - exp(.data$interceptucl)) * .data$multiplier, + (exp(.data$xequals1lcl) - exp(.data$interceptlcl)) * .data$multiplier), + sii_upper = if_else(.data$multiplier < 1, (exp(.data$xequals1lcl) - exp(.data$interceptlcl)) * .data$multiplier, + (exp(.data$xequals1ucl) - exp(.data$interceptucl)) * .data$multiplier) + ) + + if (isTRUE(rii)) { + SII_calculations <- SII_calculations |> + mutate( + rii_lower = if_else( + .data$multiplier < 1, + 1 / (exp(.data$xequals1ucl) / exp(.data$interceptucl) + ), + exp(.data$xequals1lcl) / exp(.data$interceptlcl)), + rii_upper = if_else( + .data$multiplier < 1, + 1 / (exp(.data$xequals1lcl) / exp(.data$interceptlcl)), + exp(.data$xequals1ucl) / exp(.data$interceptucl) + ) + ) + } + + } else if (value_type == 2) { + + SII_calculations <- selected_data %>% + mutate(interceptlcl = xequalshalf - (.data$sii_lower / 2), + interceptucl = xequalshalf - (.data$sii_upper / 2), + xequals1lcl = xequalshalf + (.data$sii_lower / 2), + xequals1ucl = xequalshalf + (.data$sii_upper / 2), + multiplier = .env$multiplier, + sii_lower = if_else( + .data$multiplier < 0, + ((exp(.data$xequals1ucl) / (1 + exp(.data$xequals1ucl))) - (exp(.data$interceptucl) / (1 + exp(.data$interceptucl)))) * .data$multiplier, + ((exp(.data$xequals1lcl) / (1 + exp(.data$xequals1lcl))) - (exp(.data$interceptlcl) / (1 + exp(.data$interceptlcl)))) * .data$multiplier + ), + sii_upper = if_else( + .data$multiplier < 0, + ((exp(.data$xequals1lcl) / (1 + exp(.data$xequals1lcl))) - (exp(.data$interceptlcl) / (1 + exp(.data$interceptlcl)))) * .data$multiplier, + ((exp(.data$xequals1ucl) / (1 + exp(.data$xequals1ucl))) - (exp(.data$interceptucl)/(1 + exp(.data$interceptucl)))) * .data$multiplier + ) + ) + + if (isTRUE(rii)) { + + SII_calculations <- SII_calculations %>% + mutate( + rii_lower = if_else( + multiplier < 0, + 1 / ((exp(.data$xequals1ucl) / (1 + exp(.data$xequals1ucl))) / (exp(.data$interceptucl) / (1 + exp(.data$interceptucl)))), + ((exp(.data$xequals1lcl)/(1 + exp(.data$xequals1lcl))) / (exp(.data$interceptlcl)/(1 + exp(.data$interceptlcl))))), + rii_upper = if_else( + .data$multiplier < 0, + 1 / ((exp(.data$xequals1lcl) / (1 + exp(.data$xequals1lcl))) / (exp(.data$interceptlcl)/(1 + exp(.data$interceptlcl)))), + ((exp(.data$xequals1ucl) / (1 + exp(.data$xequals1ucl))) / (exp(.data$interceptucl)/(1 + exp(.data$interceptucl)))) + ) + ) + } + + } + + SII_calculations <- SII_calculations |> + select(any_of(contains(c("sii_lower", "sii_upper", + "rii_lower", "rii_upper")))) |> + rename_with(.fn = \(x) paste0(x, conf_formatted)) + + } + ) |> + bind_cols() + } + ) + ) |> + select(all_of(grouping_variables), "CI_calcs") + + # Add CIs to model + # join on dataset with SII/ RII confidence limits + # Get CIs from first round of reps + # Unnest confidence limits in a data frame for joining + + CI_rep1 <- popsSII_model_CIs %>% + select(all_of(grouping_variables), "CI_calcs") %>% + tidyr::unnest("CI_calcs") |> + slice_head(n = 1) + + if (length(grouping_variables) > 0) { + # (grouped dataset) + popsSII_model <- popsSII_model %>% + left_join(CI_rep1, by = grouping_variables) + } else { + # ungrouped dataset + popsSII_model <- popsSII_model %>% + cbind(CI_rep1) + } - # Remove RII columns (if not requested by user) - if(rii == FALSE) { - popsSII_model <- popsSII_model %>% - select(!contains("rii")) - } + # Add reliability stats + if (isTRUE(reliability_stat)) { + + reliabity_stats <- calc_reliability( + CI_data = popsSII_model_CIs, + confidence = confidence, + rii = rii + ) + + if (length(grouping_variables) > 0) { + # (grouped dataset) + popsSII_model <- popsSII_model %>% + left_join(reliabity_stats, by = grouping_variables) + } else { + # ungrouped dataset + popsSII_model <- popsSII_model %>% + cbind(reliabity_stats) + } - # Add metadata columns to output dataset (if requested by user) - if(type == "full") { + } + + } + + # Part 3 - Choose and format output fields -------------------------------- + + # Remove reliability stat columns (if not requested by user) + if (reliability_stat == FALSE) { + popsSII_model <- popsSII_model %>% + select(-contains("mad")) + } + + # Remove RII columns (if not requested by user) + if(rii == FALSE) { + popsSII_model <- popsSII_model %>% + select(!contains("rii")) + } + + # Remove intercept columns (if not requested by user) + if(intercept == FALSE) { + popsSII_model <- popsSII_model %>% + select(!"intercept") + } - popsSII_model <- popsSII_model %>% + # Move intercept to last column of dataframe + if(intercept == TRUE) { + popsSII_model <- popsSII_model %>% + select(!"intercept", "intercept") + } - mutate(indicator_type = ifelse(value_type == 0, "normal", - ifelse(value_type == 1, - "rate", "proportion")), - multiplier = multiplier, - CI_confidence = paste0(confidence * 100, "%", - collapse = ", "), - CI_method = paste0("simulation ", repetitions, " reps")) + # Add metadata columns to output dataset (if requested by user) + if (type == "full") { - } + popsSII_model <- popsSII_model %>% + + mutate(indicator_type = if_else(value_type == 0, "normal", + if_else(value_type == 1, + "rate", "proportion")), + multiplier = multiplier, + transform = if_else(transform == TRUE & value_type == 1, "log", + if_else(transform == TRUE & value_type == 2, "logit", + "none")), + CI_confidence = paste0(confidence * 100, "%", + collapse = ", "), + CI_method = paste0("simulation ", repetitions, " reps")) + + } # return output dataset - return(popsSII_model) + return(popsSII_model) } diff --git a/R/sysdata.rda b/R/sysdata.rda index 53a4df9..a76d5cf 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/utils.R b/R/utils.R index bfc2e3d..737b097 100644 --- a/R/utils.R +++ b/R/utils.R @@ -292,14 +292,14 @@ FindXValues <- function(xvals, no_quantiles){ # ------------------------------------------------------------------------------------------------- #' SimulationFunc #' -#' Function to simulate SII range through random sampling of the indicator value +#' Function to simulate SII and RII range through random sampling of the indicator value #' for each quantile, based on the associated mean and standard error #' -#' @return returns lower and upper SII confidence limits according to user +#' @return returns lower and upper SII and RII confidence limits according to user #' specified confidence #' -#' @param data data.frame containing the data to calculate SII confidence limits -#' from; unquoted string; no default +#' @param data data.frame containing the data to calculate SII and RII +#' confidence limits from; unquoted string; no default #' @param value field name within data that contains the indicator value; unquoted #' string; no default #' @param value_type indicates the indicator type (1 = rate, 2 = proportion, 0 = other); @@ -337,6 +337,7 @@ SimulationFunc <- function(data, sqrt_a, b_sqrt_a, rii = FALSE, + transform = FALSE, reliability_stat = FALSE) { # Use NSE on input fields - apply quotes @@ -349,7 +350,11 @@ SimulationFunc <- function(data, confidence_value <- confidence + ((1 - confidence) / 2) # Set 10x no. of reps if reliability stats requested - no_reps <- ifelse(reliability_stat == TRUE, 10*repeats, repeats) + no_reps <- if (reliability_stat == TRUE) { + 10 * repeats + } else { + repeats + } # Take random samples from a normal distribution with mean as the indicator value # sd. as the associated standard error. Store results in a @@ -357,10 +362,10 @@ SimulationFunc <- function(data, yvals <- matrix(stats::rnorm(no_reps*length(pull(data, !!value)), pull(data, !!value), pull(data, !!se)), ncol = no_reps) # Retransform y values for rates (1) and proportions (2) - if (value_type == 1) { + if (value_type == 1 & transform == FALSE) { yvals_transformed <- exp(yvals) - } else if (value_type == 2){ - yvals_transformed <- exp(yvals)/(1+exp(yvals)) + } else if (value_type == 2 & transform == FALSE) { + yvals_transformed <- exp(yvals)/(1 + exp(yvals)) } else { yvals_transformed <- yvals } @@ -372,15 +377,12 @@ SimulationFunc <- function(data, # Calculate inverse of (m*transpose (m))*m to use in calculation # of least squares estimate of regression parameters # Ref: https://onlinecourses.science.psu.edu/stat501/node/38 - invm_m <- solve((m)%*%t(m))%*%m + invm_m <- solve((m) %*% t(m)) %*% m # Multiply transformed yvals matrix element-wise by sqrta - this weights the sampled # yvals by a measure of population final_yvals <- yvals_transformed*pull(data, !!sqrt_a) - # Prepare empty numeric vector to hold parameter results - sloperesults <- numeric(no_reps) - # Define function to matrix multiply invm_m by a vector matrix_mult <- function(x) {invm_m %*% x} @@ -395,25 +397,27 @@ SimulationFunc <- function(data, # RII only calculations if (rii == TRUE) { - # Calculate the RII - RII_results <- (params_bsqrta + params_sqrta)/params_sqrta + # Calculate the RII + RII_results <- (params_bsqrta + params_sqrta)/params_sqrta - # Split simulated SII/RIIs into 10 samples if reliability stats requested - if (reliability_stat == TRUE) { - SII_results <- matrix(params_bsqrta, ncol = 10) - RII_results <- matrix(RII_results, ncol = 10) - } else { - SII_results <- matrix(params_bsqrta, ncol = 1) - RII_results <- matrix(RII_results, ncol = 1) - } + # Split simulated SII/RIIs into 10 samples if reliability stats requested + if (reliability_stat == TRUE) { + SII_results <- matrix(params_bsqrta, ncol = 10) + RII_results <- matrix(RII_results, ncol = 10) + } else { + SII_results <- matrix(params_bsqrta, ncol = 1) + RII_results <- matrix(RII_results, ncol = 1) + } - # Apply multiplicative factor to RII - if(multiplier < 0) { - RII_results <- 1/RII_results - } + # Apply multiplicative factor to RII if transform = FALSE + if (multiplier < 0 & transform == FALSE) { + RII_results <- 1/RII_results + } else { + RII_results <- RII_results + } - # Order simulated RIIs from lowest to highest - sortresults_RII <- apply(RII_results, 2, sort, decreasing = FALSE) + # Order simulated RIIs from lowest to highest + sortresults_RII <- apply(RII_results, 2, sort, decreasing = FALSE) } else { @@ -425,8 +429,12 @@ SimulationFunc <- function(data, } } - # Apply multiplicative factor to SII - SII_results <- multiplier * SII_results + # Apply multiplicative factor to SII if transform = FALSE + if (transform == FALSE) { + SII_results <- SII_results * multiplier + } else { + SII_results <- SII_results + } # Order simulated SIIs from lowest to highest sortresults_SII <- apply(SII_results, 2, sort, decreasing = FALSE) @@ -435,116 +443,98 @@ SimulationFunc <- function(data, # as confidence limits # position of lower percentile - pos_lower <- round(repeats*(1-confidence_value), digits=0) + pos_lower <- round(repeats * (1 - confidence_value), digits = 0) # position of upper percentile - pos_upper <- round(repeats*confidence_value, digits=0) + pos_upper <- round(repeats * confidence_value, digits = 0) # Combine position indexes for SII CLs pos <- rbind(pos_lower, pos_upper) + colnames(pos) <- formatC(confidence * 100, format = "f", digits = 1) - # Extract SII confidence limits from critical percentiles of first column of samples - SII_lower_cls <- sortresults_SII[pos_lower, 1] - SII_upper_cls <- sortresults_SII[pos_upper, 1] - - # Define column names (adding in confidence level) - names(SII_lower_cls) <- paste0("sii_lower", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), - "cl") - names(SII_upper_cls) <- paste0("sii_upper", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), - "cl") - - # Combine lower and upper SII CLs - SII_cls <- t(c(SII_lower_cls, SII_upper_cls)) - - # CASE 1 - Calculate RII CLs if requested - if(rii == TRUE) { - - # Extract RII confidence limits from critical percentiles of first column of samples - RII_lower_cls <- sortresults_RII[pos_lower, 1] - RII_upper_cls <- sortresults_RII[pos_upper, 1] - - # Define column names (adding in confidence level) - names(RII_lower_cls) <- paste0("rii_lower", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), - "cl") - names(RII_upper_cls) <- paste0("rii_upper", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), - "cl") - - # Combine lower and upper RII CLs - RII_cls <- t(c(RII_lower_cls, RII_upper_cls)) - - if (reliability_stat == TRUE) { - - # Calculate variability by taking the absolute difference between each of the lower/upper - # limits in the additional 9 sample sets and the initial lower/upper limits - SII_diffs <- apply(pos, 2, function(x) { # run function over each column of "pos" (i.e. for each confidence) - - diffs_sample_original <- t(apply(sortresults_SII[c(x[1], x[2]),], 1, function(y) abs(y - y[1]))) - - # Calculate mean absolute difference over all 18 differences - sii_mad <- mean(diffs_sample_original[, 2:10]) - }) - - RII_diffs <- apply(pos, 2, function(x) { # run function over each column of "pos" (i.e. for each confidence) + results_SII <- data.frame(sortresults_SII[pos, , drop = FALSE]) + names(results_SII) <- paste0("Rep_", seq_along(results_SII)) + rownames(results_SII) <- paste0( + rep(c("sii_lower", "sii_upper"), length(confidence)), + rep(paste0(gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), "cl"), each = 2) + ) - diffs_sample_original <- t(apply(sortresults_RII[c(x[1], x[2]),], 1, function(y) abs(y - y[1]))) - - # Calculate mean absolute difference over all 18 differences - rii_mad <- mean(diffs_sample_original[, 2:10]) - }) - - # Define column names (adding in confidence level) - names(SII_diffs) <- paste0("sii_mad", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1))) - names(RII_diffs) <- paste0("rii_mad", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1))) - - # Return SII confidence limits from first of the 10 samples, plus the - # reliability measures - results <- cbind(SII_cls, RII_cls, t(SII_diffs), t(RII_diffs)) - - - } else { - # Return SII and RII confidence limits from single sample taken - results <- cbind(SII_cls, RII_cls) - } - - # CASE 2 - Return SII stats only - } else { + if (isFALSE(rii)) { + results <- data.frame(t(results_SII)) + } - if (reliability_stat == TRUE) { + if (isTRUE(rii)) { + results_RII <- data.frame(sortresults_RII[pos, , drop = FALSE]) + names(results_RII) <- paste0("Rep_", seq_along(results_RII)) + rownames(results_RII) <- paste0( + rep(c("rii_lower", "rii_upper"), length(confidence)), + rep(paste0(gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), "cl"), each = 2) + ) - # Calculate variability by taking the absolute difference between each of the lower/upper - # limits in the additional 9 sample sets and the initial lower/upper limits - SII_diffs <- apply(pos, 2, function(x) { # run function over each column of "pos" (i.e. for each confidence) + results <- data.frame(t(bind_rows(results_SII, results_RII))) + } - diffs_sample_original <- t(apply(sortresults_SII[c(x[1], x[2]),], 1, function(y) abs(y - y[1]))) + results - # Calculate mean absolute difference over all 18 differences - sii_MAD <- mean(diffs_sample_original[, 2:10]) - }) +} - # Define column names (adding in confidence level) - names(SII_diffs) <- paste0("sii_mad", - gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1))) - # Return SII confidence limits from first of the 10 samples, plus the - # reliability measures - results <- cbind(SII_cls, t(SII_diffs)) +# ------------------------------------------------------------------------------ +#' SII reliability stats +#' @param CI_data A nested dataframe containing the SII and RII CIs for each rep +#' @param confidence Confidence level used to calculate the lower and upper confidence limits of SII; +#' numeric between 0.5 and 0.9999 or 50 and 99.99; default 0.95 +#' @param rii Option to return the Relative Index of Inequality (RII) with +#' associated confidence limits +#' +#' @return a data frame +#' +#' @noRd +# ------------------------------------------------------------------------------ - } else { - # Return SII confidence limits only from single sample taken - results <- SII_cls - } - } +calc_reliability <- function(CI_data, + confidence, + rii) { + + groups <- group_vars(CI_data) + + reliabity_stats <- CI_data %>% + mutate( + reliabity_stats_data = purrr::map(.data$CI_calcs, function(x){ + # Calculate mean average difference in SII and RII from first rep + diffs_sample_original <- x |> + mutate(across(everything(), function(y) {abs(y - y[1])})) |> + slice(-1) + + map(confidence, function(conf) { + conf_formatted <- + gsub("\\.", "_", formatC(conf * 100, format = "f", digits = 1)) + + if (isTRUE(rii)) { + diffs_sample_original |> + select(contains(conf_formatted)) |> + summarise( + "sii_mad{conf_formatted}" := mean(c_across(contains("sii"))), + "rii_mad{conf_formatted}" := mean(c_across(contains("rii"))) + ) + } else { + diffs_sample_original |> + select(contains(conf_formatted)) |> + summarise( + "sii_mad{conf_formatted}" := mean(c_across(contains("sii"))) + ) + } + + }) |> + bind_cols() - return(data.frame(results)) + } + ) + ) |> + select(all_of(groups), "reliabity_stats_data") |> + unnest("reliabity_stats_data") } - # ------------------------------------------------------------------------------ #' Poisson Function for funnel plots for ratios and rates #' diff --git a/README.Rmd b/README.Rmd index 84b36d2..cf4dd09 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,9 @@ knitr::opts_chunk$set( # PHEindicatormethods -This is an R package to support analysts in the execution of statistical methods approved for use in the production of PHE indicators such as those presented via Fingertips. It provides functions for the generation of Proportions, Rates, DSRs, ISRs, Means, Life Expectancy and Slope Index of Inequality including confidence intervals for these statistics, and a function for assigning data to quantiles. +This is an R package to support analysts in the execution of statistical methods approved for use in the production of PHE indicators such as those presented via [Fingertips](https://fingertips.phe.org.uk/). It provides functions for the generation of Proportions, Rates, DSRs, ISRs, Means, Life Expectancy and Slope Index of Inequality including confidence intervals for these statistics, and a function for assigning data to quantiles. + +In October 2021 Public Health England (PHE) was disbanded and as a result this package is now owned by the Department of Health and Social Care. It will continue to be supported and to prevent breaking changes there are currently no immediate plans to rename the package or its functions in light of this organisational change. Any feedback would be appreciated and can be provided using the Issues section of the [PHEindicatormethods GitHub repository](https://github.com/ukhsa-collaboration/PHEindicatormethods). diff --git a/README.md b/README.md index 2904234..e32e600 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,17 @@ This is an R package to support analysts in the execution of statistical methods approved for use in the production of PHE indicators such as -those presented via Fingertips. It provides functions for the generation -of Proportions, Rates, DSRs, ISRs, Means, Life Expectancy and Slope -Index of Inequality including confidence intervals for these statistics, -and a function for assigning data to quantiles. +those presented via [Fingertips](https://fingertips.phe.org.uk/). It +provides functions for the generation of Proportions, Rates, DSRs, ISRs, +Means, Life Expectancy and Slope Index of Inequality including +confidence intervals for these statistics, and a function for assigning +data to quantiles. + +In October 2021 Public Health England (PHE) was disbanded and as a +result this package is now owned by the Department of Health and Social +Care. It will continue to be supported and to prevent breaking changes +there are currently no immediate plans to rename the package or its +functions in light of this organisational change. Any feedback would be appreciated and can be provided using the Issues section of the [PHEindicatormethods GitHub diff --git a/man/phe_quantile.Rd b/man/phe_quantile.Rd index 4fd83ad..ae25583 100644 --- a/man/phe_quantile.Rd +++ b/man/phe_quantile.Rd @@ -7,7 +7,6 @@ phe_quantile( data, values, - highergeog = NULL, nquantiles = 10L, invert = TRUE, inverttype = "logical", @@ -15,53 +14,59 @@ phe_quantile( ) } \arguments{ -\item{data}{a data frame containing the quantitative data to be assigned to quantiles. -If pre-grouped, separate sets of quantiles will be assigned for each grouping set; -unquoted string; no default} +\item{data}{a data frame containing the quantitative data to be assigned to +quantiles. If pre-grouped, separate sets of quantiles will be assigned for +each grouping set; unquoted string; no default} -\item{values}{field name from data containing the numeric values to rank data by and assign quantiles from; -unquoted string; no default} +\item{values}{field name from data containing the numeric values to rank data +by and assign quantiles from; unquoted string; no default} -\item{highergeog}{deprecated - functionality replaced by pre-grouping the input data frame} +\item{nquantiles}{the number of quantiles to separate each grouping set into; +numeric; default=10L} -\item{nquantiles}{the number of quantiles to separate each grouping set into; numeric; default=10L} +\item{invert}{whether the quantiles should be directly (FALSE) or inversely +(TRUE) related to the numerical value order; logical (to apply same value +to all grouping sets) OR unquoted string referencing field name from data +that stores logical values for each grouping set; default = TRUE (ie +highest values assigned to quantile 1)} -\item{invert}{whether the quantiles should be directly (FALSE) or inversely (TRUE) related to the numerical value order; -logical (to apply same value to all grouping sets) OR unquoted string referencing field name from data -that stores logical values for each grouping set; default = TRUE (ie highest values assigned to quantile 1)} +\item{inverttype}{whether the invert argument has been specified as a logical +value or a field name from data; quoted string "field" or "logical"; +default = "logical"} -\item{inverttype}{whether the invert argument has been specified as a logical value or a field name from data; -quoted string "field" or "logical"; default = "logical"} - -\item{type}{defines whether to include metadata columns in output to reference the arguments passed; -can be "standard" or "full"; quoted string; default = "full"} +\item{type}{defines whether to include metadata columns in output to +reference the arguments passed; can be "standard" or "full"; quoted string; +default = "full"} } \value{ -When type = "full", returns the original data.frame with quantile (quantile value), -nquantiles (number of quantiles requested), groupvars (grouping sets quantiles assigned within) -and invert (indicating direction of quantile assignment) fields appended. +When type = "full", returns the original data.frame with quantile +(quantile value), nquantiles (number of quantiles requested), groupvars +(grouping sets quantiles assigned within) and invert (indicating direction +of quantile assignment) fields appended. } \description{ Assigns data to quantiles based on numeric data rankings. } \section{Notes}{ - See \href{https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods}{PHE Technical Guide - Assigning Deprivation Quintiles} for methodology. -In particular, note that this function strictly applies the algorithm defined but some manual -review, and potentially adjustment, is advised in some cases where multiple small areas with equal rank -fall across a natural quantile boundary. + See \href{https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods}{OHID Technical Guide - Assigning Deprivation Categories} +for methodology. In particular, note that this function strictly applies +the algorithm defined but some manual review, and potentially adjustment, +is advised in some cases where multiple small areas with equal rank fall +across a natural quantile boundary. } \examples{ - -df <- data.frame(region = as.character(rep(c("Region1","Region2","Region3","Region4"), each=250)), - smallarea = as.character(paste0("Area",seq_along(1:1000))), - vals = as.numeric(sample(200, 1000, replace = TRUE)), - stringsAsFactors=FALSE) +df <- data.frame( + region = as.character(rep(c("Region1","Region2","Region3","Region4"), + each=250)), + smallarea = as.character(paste0("Area",seq_along(1:1000))), + vals = as.numeric(sample(200, 1000, replace = TRUE)), + stringsAsFactors = FALSE) # assign small areas to deciles across whole data frame phe_quantile(df, vals) -# assign small area to deciles within regions by pre-grouping the input data frame +# assign small areas to deciles within regions by pre-grouping the data frame library(dplyr) df_grp <- df \%>\% group_by(region) phe_quantile(df_grp, vals) diff --git a/man/phe_sii.Rd b/man/phe_sii.Rd index af91ec2..5fe355a 100644 --- a/man/phe_sii.Rd +++ b/man/phe_sii.Rd @@ -11,6 +11,7 @@ phe_sii( x = NULL, value = NULL, value_type = 0, + transform = FALSE, lower_cl = NULL, upper_cl = NULL, se = NULL, @@ -18,67 +19,88 @@ phe_sii( repetitions = 1e+05, confidence = 0.95, rii = FALSE, + intercept = FALSE, reliability_stat = FALSE, type = "full" ) } \arguments{ -\item{data}{data.frame containing the required input fields, pre-grouped if an SII is required for -each subgroup; unquoted string; no default} - -\item{quantile}{field name within data that contains the quantile label (e.g. decile). The number -of quantiles should be between 5 and 100; unquoted string; no default} - -\item{population}{field name within data that contains the quantile populations (ie, denominator). -Non-zero populations are required for all quantiles to calculate SII for an area; +\item{data}{data.frame containing the required input fields, pre-grouped if +an SII is required for each subgroup; unquoted string; no default} + +\item{quantile}{field name within data that contains the quantile label (e.g. +decile). The number of quantiles should be between 5 and 100; unquoted +string; no default} + +\item{population}{field name within data that contains the quantile +populations (ie, denominator). Non-zero populations are required for all +quantiles to calculate SII for an area; unquoted string; no default} + +\item{x}{(for indicators that are proportions) field name within data that +contains the members of the population with the attribute of interest (ie, +numerator). This will be divided by population to calculate a proportion as +the indicator value (if value field is not provided); unquoted string; no +default} + +\item{value}{field name within data that contains the indicator value (this +does not need to be supplied for proportions if count and population are +given); unquoted string; no default} + +\item{value_type}{indicates the indicator type (1 = rate, 2 = proportion, 0 = +other). The \code{value_type} argument is used to determine whether data should +be transformed prior to calculation of the standard error and/or SII. See +the \code{Tansformations} section for full details; integer; default 0} + +\item{transform}{option to transform input rates or proportions prior to +calculation of the SII. See the \code{Transformations} section for full +details; logical; default FALSE} + +\item{lower_cl}{field name within data that contains 95 percent lower +confidence limit of indicator value (to calculate standard error of +indicator value). This field is needed if the se field is not supplied; unquoted string; no default} -\item{x}{(for indicators that are proportions) field name within data that contains -the members of the population with the attribute of interest (ie, numerator). This will be -divided by population to calculate a proportion as the indicator value -(if value field is not provided); unquoted string; no default} - -\item{value}{field name within data that contains the indicator value (this does not need to be supplied -for proportions if count and population are given); unquoted string; no default} - -\item{value_type}{indicates the indicator type (1 = rate, 2 = proportion, 0 = other); -integer; default 0} - -\item{lower_cl}{field name within data that contains 95 percent lower confidence limit -of indicator value (to calculate standard error of indicator value). This field is needed -if the se field is not supplied; unquoted string; no default} - -\item{upper_cl}{field name within data that contains 95 percent upper confidence limit -of indicator value (to calculate standard error of indicator value). This field is needed -if the se field is not supplied; unquoted string; no default} - -\item{se}{field name within data that contains the standard error of the indicator -value. If not supplied, this will be calculated from the 95 percent lower and upper confidence -limits (i.e. one or the other of these fields must be supplied); unquoted string; no default} - -\item{multiplier}{factor to multiply the SII and SII confidence limits by (e.g. set to 100 to return -prevalences on a percentage scale between 0 and 100). If the multiplier is negative, the -inverse of the RII is taken to account for the change in polarity; numeric; default 1;} - -\item{repetitions}{number of random samples to perform to return confidence interval of SII (and RII). -Minimum is 1000, no maximum (though the more repetitions, the longer the run time); -numeric; default 100,000} - -\item{confidence}{confidence level used to calculate the lower and upper confidence limits of SII, -expressed as a number between 0.9 and 1, or 90 and 100. It can be a vector of 0.95 and 0.998, -for example, to output both 95 percent and 99.8 percent CIs; numeric; default 0.95} - -\item{rii}{option to return the Relative Index of Inequality (RII) with associated confidence limits -as well as the SII; logical; default FALSE} - -\item{reliability_stat}{option to carry out the SII confidence interval simulation 10 times instead -of once and return the Mean Average Difference between the first and subsequent samples (as a -measure of the amount of variation). Warning: this will significantly increase run time of the -function and should first be tested on a small number of repetitions; logical; default FALSE} +\item{upper_cl}{field name within data that contains 95 percent upper +confidence limit of indicator value (to calculate standard error of +indicator value). This field is needed if the se field is not supplied; +unquoted string; no default} -\item{type}{"full" output includes columns in the output dataset specifying the parameters the user -has input to the function (value_type, multiplier, CI_confidence, CI_method); character string -either "full" or "standard"; default "full"} +\item{se}{field name within data that contains the standard error of the +indicator value. If not supplied, this will be calculated from the 95 +percent lower and upper confidence limits (i.e. one or the other of these +fields must be supplied); unquoted string; no default} + +\item{multiplier}{factor to multiply the SII and SII confidence limits by +(e.g. set to 100 to return prevalences on a percentage scale between 0 and +100). If the multiplier is negative, the inverse of the RII is taken to +account for the change in polarity; numeric; default 1;} + +\item{repetitions}{number of random samples to perform to return confidence +interval of SII (and RII). Minimum is 1000, no maximum (though the more +repetitions, the longer the run time); numeric; default 100,000} + +\item{confidence}{confidence level used to calculate the lower and upper +confidence limits of SII, expressed as a number between 0.9 and 1, or 90 +and 100. It can be a vector of 0.95 and 0.998, for example, to output both +95 percent and 99.8 percent CIs; numeric; default 0.95} + +\item{rii}{option to return the Relative Index of Inequality (RII) with +associated confidence limits as well as the SII; logical; default FALSE} + +\item{intercept}{option to return the intercept value of the regression line +(y value where x=0); logical; default FALSE} + +\item{reliability_stat}{option to carry out the SII confidence interval +simulation 10 times instead of once and return the Mean Average Difference +between the first and subsequent samples (as a measure of the amount of +variation). Warning: this will significantly increase run time of the +function and should first be tested on a small number of repetitions; +logical; default FALSE} + +\item{type}{"full" output includes columns in the output dataset specifying +the parameters the user has input to the function (value_type, multiplier, +CI_confidence, CI_method); character string either "full" or "standard"; +default "full"} } \value{ The SII with lower and upper confidence limits for each subgroup of @@ -126,33 +148,46 @@ of the fitted value at \code{x=1,Y1} and the fitted value at \code{x=0,Y0}. which can be calculated as: \code{RII = (Y0 + SII)/Y0} } -\section{Function arguments}{ - - -The indicator type can be specified via the \code{value_type} parameter. -Transformations can be applied to the indicator value and its confidence -limits before calculating the standard error in cases where the confidence -interval around the indicator value is likely to be non-symmetric. This is a -log transformation for rates, and logit for proportions. If the standard -error is supplied directly to the function from the input dataset, this is -used instead of calculating one from the indicator confidence limits. +\section{Transformations}{ + + +The indicator type can be specified as 1 (rate), 2 (proportion) or 0 (other), +using the \code{value_type} parameter. This setting determines the data +transformations that will be applied in the following two parts of the +method. + +Use in conjunction with the \code{transform} parameter in calculation of the +SII: It is recommended that rates and proportions are transformed prior to +calculation of the SII by setting the \code{transform} parameter to TRUE for +these indicator types. This will perform a log transformation for rates, or +logit for proportions, and return outputs transformed back to the original +units of the indicator. These transformations are recommended to improve the +linearity between the indicator values and the quantile, which is an +assumption of the method. A user-provided standard error will not be accepted +when the \code{transform} parameter is set to TRUE as the confidence limits +are required for this transformation. + +Use in calculation of the standard error: Rates and proportions, and their +confidence limits, are transformed prior to calculation of the standard error +for each quantile. This is because it is assumed that the confidence interval +around the indicator value is non-symmetric for these indicator types. Note +that this transformation is not controlled by the \code{transform} parameter +and is applied based on the value of the \code{value_type} parameter only. A +user-provided standard error will not be accepted when the \code{transform} +parameter is set to TRUE as the confidence limits are required for this +transformation. } \section{Warning}{ The SII calculation assumes a linear relationship between indicator value and -quantile, and small populations within quantiles can make it unstable. This +quantile. Where this is not the case the transform option should be considered. +Small populations within quantiles can make the SII unstable. This function does not include checks for linearity or stability; it is the user's responsibility to ensure the input data is suitable for the SII calculation. } -\section{Notes}{ - - -This function is using nest and unnest functions from tidyr version 1.0.0. -} - \examples{ library(dplyr) @@ -195,13 +230,15 @@ phe_sii(group_by(data, area), rii = TRUE, type = "standard") -# multiple confidence intervals +# multiple confidence intervals, log transforming the data if they are rates phe_sii(group_by(data, area), decile, population, - value_type = 0, + value_type = 1, + transform = TRUE, value = value, - se = StandardError, + lower_cl = lowerCL, + upper_cl = upperCL, confidence = c(0.95, 0.998), repetitions = 10000, rii = TRUE, diff --git a/tests/testthat.R b/tests/testthat.R index f2329b6..faa6cca 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,3 +1,11 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + library(testthat) library(PHEindicatormethods) diff --git a/tests/testthat/testByars.R b/tests/testthat/testByars.R index da696df..e041cdb 100644 --- a/tests/testthat/testByars.R +++ b/tests/testthat/testByars.R @@ -1,22 +1,20 @@ -context("test_byars") - #test calculations test_that("byars_lower calculate correctly",{ expect_equal(data.frame(lowercl = byars_lower(c(65,1045,1))), - data.frame(slice(test_BW,1:3)[3]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,1:3)[3]),ignore_attr=TRUE, info="test default") expect_equal(data.frame(lowercl = byars_lower(c(65,1045,10),confidence=99.8)), - data.frame(slice(test_BW,4:6)[3]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,4:6)[3]),ignore_attr=TRUE, info="test default") expect_equal(data.frame(lowercl = byars_upper(c(65,1045,1))), - data.frame(slice(test_BW,1:3)[4]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,1:3)[4]),ignore_attr=TRUE, info="test default") expect_equal(data.frame(lowercl = byars_upper(c(65,1045,10),confidence=0.998)), - data.frame(slice(test_BW,4:6)[4]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,4:6)[4]),ignore_attr=TRUE, info="test default") expect_equal(data.frame(lowercl = byars_upper(c(65,1045,10),confidence=99.8)), - data.frame(slice(test_BW,4:6)[4]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,4:6)[4]),ignore_attr=TRUE, info="test default") }) diff --git a/tests/testthat/testDSRs.R b/tests/testthat/testDSRs.R index cc9ddf7..c696e05 100644 --- a/tests/testthat/testDSRs.R +++ b/tests/testthat/testDSRs.R @@ -1,80 +1,78 @@ -context("test_phe_dsr") - #test calculations test_that("dsrs and CIs calculate correctly",{ expect_equal(data.frame(select(phe_dsr(test_multiarea, count, pop),1:6)), data.frame(test_DSR_results[5:7,1:6]), - check.attributes=FALSE, check.names=FALSE,info="test default") + ignore_attr = TRUE,info="test default") expect_equal(data.frame(select(phe_dsr(test_multiarea, count, pop, confidence = c(0.95,0.998)),1:8,10:11)), data.frame(test_DSR_results[5:7,]), - check.attributes=FALSE, check.names=FALSE,info="test full output with 2 CIs") + ignore_attr = TRUE,info="test full output with 2 CIs") expect_equal(data.frame(phe_dsr(test_DSR_1976, count, pop, stdpop = test_DSR_1976$esp1976, type="standard")), select(slice(test_DSR_results,8),2:6), - check.attributes=FALSE, check.names=FALSE,info="test with user specified vector") + ignore_attr = TRUE,info="test with user specified vector") expect_equal(data.frame(phe_dsr(test_DSR_1976, count, pop, stdpop = esp1976, stdpoptype="field", type="standard")), data.frame(select(slice(test_DSR_results,8),2:6)), - check.attributes=FALSE, check.names=FALSE,info="test with user specified stdpop by col name") + ignore_attr = TRUE,info="test with user specified stdpop by col name") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, type="standard", stdpop = c(5000, 5500, 5500, 5500, 6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500, 6000, 5500, 5000, 4000, 2500, 1500, 1000))), data.frame(select(slice(test_DSR_results,5:7),1:6)), - check.attributes=FALSE, check.names=FALSE,info="test stdpop as specified vector") + ignore_attr = TRUE,info="test stdpop as specified vector") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, stdpop = esp2013, type="standard")), data.frame(select(slice(test_DSR_results,5:7),1:6)), - check.attributes=FALSE, check.names=FALSE,info="test standard") + ignore_attr = TRUE,info="test standard") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, confidence = c(0.95,0.998), type="standard")), data.frame(select(slice(test_DSR_results,5:7),1:8)), - check.attributes=FALSE, check.names=FALSE,info="test standard 2CIs") + ignore_attr = TRUE,info="test standard 2CIs") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, type="value")), data.frame(select(slice(test_DSR_results,5:7),1,4)), - check.attributes=FALSE, check.names=FALSE,info="test value") + ignore_attr = TRUE,info="test value") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, confidence = c(0.95,0.998), type="value")), data.frame(select(slice(test_DSR_results,5:7),1,4)), - check.attributes=FALSE, check.names=FALSE,info="test value 2CIs") + ignore_attr = TRUE,info="test value 2CIs") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, type="lower")), data.frame(select(slice(test_DSR_results,5:7),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test lower") + ignore_attr = TRUE,info="test lower") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, confidence = c(0.95,0.998), type="lower")), data.frame(select(slice(test_DSR_results,5:7),1,5,7)), - check.attributes=FALSE, check.names=FALSE,info="test lower 2 CIs") + ignore_attr = TRUE,info="test lower 2 CIs") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, type="upper")), data.frame(select(slice(test_DSR_results,5:7),1,6)), - check.attributes=FALSE, check.names=FALSE,info="test upper") + ignore_attr = TRUE,info="test upper") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, confidence = c(0.95,0.998), type="upper")), data.frame(select(slice(test_DSR_results,5:7),1,6,8)), - check.attributes=FALSE, check.names=FALSE,info="test upper 2 CIs") + ignore_attr = TRUE,info="test upper 2 CIs") expect_equal(data.frame(select(phe_dsr(test_multiarea, count, pop, stdpop = esp2013, confidence = 99.8),1:6,8:9)), data.frame(select(slice(test_DSR_results,5:7),1:4,7:10)), - check.attributes=FALSE, check.names=FALSE,info="test confidence") + ignore_attr = TRUE,info="test confidence") expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, stdpop = esp2013, multiplier=10000, type="standard")), data.frame(select(slice(test_DSR_results,1:3),1:6)), - check.attributes=FALSE, check.names=FALSE,info="test multiplier") + ignore_attr = TRUE,info="test multiplier") expect_equal(data.frame(select(ungroup(phe_dsr(test_multiarea, count, pop, confidence = c(0.95, 0.998))),9)), data.frame(confidence = c("95%, 99.8%","95%, 99.8%","95%, 99.8%"), stringsAsFactors=FALSE), - check.attributes=FALSE, check.names=FALSE,info="test 2 CIs metadata") + ignore_attr = TRUE,info="test 2 CIs metadata") expect_equal(data.frame(select(ungroup(phe_dsr(test_multiarea, count, pop)),7)), data.frame(confidence = c("95%", "95%", "95%"), stringsAsFactors=FALSE), - check.attributes=FALSE, check.names=FALSE,info="test 95% CI metadata") + ignore_attr = TRUE,info="test 95% CI metadata") expect_equal(data.frame(select(ungroup(phe_dsr(test_multiarea, count, pop, confidence = 0.998)),7)), data.frame(confidence = c("99.8%", "99.8%", "99.8%"), stringsAsFactors=FALSE), - check.attributes=FALSE, check.names=FALSE,info="test 99.8% CI metadata") + ignore_attr = TRUE,info="test 99.8% CI metadata") }) diff --git a/tests/testthat/testISRates.R b/tests/testthat/testISRates.R index e74fb9f..e4e517f 100644 --- a/tests/testthat/testISRates.R +++ b/tests/testthat/testISRates.R @@ -4,104 +4,104 @@ test_that("isrates and CIs calculate correctly",{ expect_equal(data.frame(select(calculate_ISRate(select(test_ISR_ownref,-refcount,-refpop), count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop),1:7,9:10)), data.frame(select(slice(test_ISR_results,1:3),1:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default") + ignore_attr = TRUE,info="test default") expect_equal(data.frame(select(calculate_ISRate(select(test_ISR_ownref,-count, -refcount,-refpop), total_count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, observed_totals = test_ISR_lookup), 1:7,9:10)), data.frame(select(slice(test_ISR_results,1:3),1:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default with observed_totals") + ignore_attr = TRUE,info="test default with observed_totals") expect_equal(data.frame(select(calculate_ISRate(select(test_ISR_ownref,-refcount,-refpop), count, pop, confidence = c(0.95,0.998), x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop),1:9,11:12)), data.frame(slice(test_ISR_results,1:3)), - check.attributes=FALSE, check.names=FALSE,info="test full output with 2 CIs") + ignore_attr = TRUE,info="test full output with 2 CIs") expect_equal(data.frame(select(calculate_ISRate(test_ISR_ownref, count, pop, refcount, refpop, refpoptype="field", type="standard"), 1:7)), data.frame(select(slice(test_ISR_results,1:3),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test default with own ref data by col name") + ignore_attr = TRUE,info="test default with own ref data by col name") expect_equal(data.frame(calculate_ISRate(select(test_ISR_ownref,-refcount,-refpop), count, pop, test_ISR_ownref$refcount[1:19], test_ISR_ownref$refpop[1:19], type="standard")), data.frame(select(slice(test_ISR_results,1:3),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test default with own ref data as vector") + ignore_attr = TRUE,info="test default with own ref data as vector") expect_equal(data.frame(calculate_ISRate(test_err2, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="standard")), data.frame(select(slice(test_ISR_results,13:14),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test zero population") + ignore_attr = TRUE,info="test zero population") expect_equal(data.frame(calculate_ISRate(select(test_ISR_ownref,-refcount,-refpop), count, pop, type="standard", x_ref = c(10303,2824,NA,3615,3641,3490,3789,3213,3031,2771,3089,3490,3595,4745,5514,7125,5694,6210,5757), n_ref = c(50520,57173,60213,54659,44345,50128,62163,67423,62899,55463,60479,49974,44140,40888,37239,30819,18136,15325,13918))), data.frame(select(slice(test_ISR_results,1:3),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test ref as specified vector") + ignore_attr = TRUE,info="test ref as specified vector") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="standard")), data.frame(select(slice(test_ISR_results,1:3),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test standard") + ignore_attr = TRUE,info="test standard") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95,0.998), type="standard")), data.frame(select(slice(test_ISR_results,1:3),1:9)), - check.attributes=FALSE, check.names=FALSE,info="test standard 2 CIs") + ignore_attr = TRUE,info="test standard 2 CIs") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="value")), data.frame(select(slice(test_ISR_results,1:3),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test value") + ignore_attr = TRUE,info="test value") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95,0.998), type="value")), data.frame(select(slice(test_ISR_results,1:3),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test value 2 CIs") + ignore_attr = TRUE,info="test value 2 CIs") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="lower")), data.frame(select(slice(test_ISR_results,1:3),1,6)), - check.attributes=FALSE, check.names=FALSE,info="test lower") + ignore_attr = TRUE,info="test lower") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95,0.998), type="lower")), data.frame(select(slice(test_ISR_results,1:3),1,6,8)), - check.attributes=FALSE, check.names=FALSE,info="test lower 2 CIs") + ignore_attr = TRUE,info="test lower 2 CIs") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop,type="upper")), data.frame(select(slice(test_ISR_results,1:3),1,7)), - check.attributes=FALSE, check.names=FALSE,info="test upper") + ignore_attr = TRUE,info="test upper") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95,0.998), type="upper")), data.frame(select(slice(test_ISR_results,1:3),1,7,9)), - check.attributes=FALSE, check.names=FALSE,info="test upper 2 CIs") + ignore_attr = TRUE,info="test upper 2 CIs") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop,confidence = 99.8)), data.frame(select(slice(test_ISR_results,1:3),1:5,8:9)), - check.attributes=FALSE, check.names=FALSE,info="test confidence") + ignore_attr = TRUE,info="test confidence") expect_equal(data.frame(calculate_ISRate(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, multiplier=1000)), data.frame(select(slice(test_ISR_results,4:6),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test multiplier") + ignore_attr = TRUE,info="test multiplier") expect_equal(data.frame(select(calculate_ISRate(select(test_ISR_ownref,-refcount,-refpop), count, pop, confidence = c(0.95,0.998), x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop),1:7)), data.frame(select(slice(test_ISR_results,1:3),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test two CIS 95%") + ignore_attr = TRUE,info="test two CIS 95%") expect_equal(data.frame(select(calculate_ISRate(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop,confidence = c(0.95, 0.998)),1:5,8,9)), data.frame(select(slice(test_ISR_results,1:3),1:5,8:9)), - check.attributes=FALSE, check.names=FALSE,info="test two CIs 99.8") + ignore_attr = TRUE,info="test two CIs 99.8") expect_equal(data.frame(select(ungroup(calculate_ISRate(select(test_ISR_ownref,-refcount,-refpop), count, pop, confidence = c(0.95,0.998), x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop)),10)), data.frame(confidence = rep("95%, 99.8%",3), stringsAsFactors=FALSE), - check.attributes=FALSE, check.names=FALSE,info="test two CIS metadata") + ignore_attr = TRUE,info="test two CIS metadata") }) diff --git a/tests/testthat/testISRatios.R b/tests/testthat/testISRatios.R index 38e9256..2d10170 100644 --- a/tests/testthat/testISRatios.R +++ b/tests/testthat/testISRatios.R @@ -4,33 +4,33 @@ test_that("isratios and CIs calculate correctly",{ expect_equal(data.frame(select(calculate_ISRatio(select(test_ISR_ownref,-refcount,-refpop), count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop),1:6,8:9)), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default") + ignore_attr = TRUE,info="test default") expect_equal(data.frame(select(calculate_ISRatio(select(test_ISR_ownref,-count, -refcount,-refpop), total_count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, observed_totals = test_ISR_lookup), 1:6,8:9)), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default with observed_totals") + ignore_attr = TRUE,info="test default with observed_totals") expect_equal(data.frame(select(calculate_ISRatio(select(test_ISR_ownref,-refcount,-refpop), count, pop, confidence = c(0.95,0.998), x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop),1:8,10:11)), data.frame(select(slice(test_ISR_results,7:9),1:3,5:11)), - check.attributes=FALSE, check.names=FALSE,info="test full output 2 CIs") + ignore_attr = TRUE,info="test full output 2 CIs") expect_equal(data.frame(select(calculate_ISRatio(test_ISR_ownref, count, pop, refcount, refpop, refpoptype="field"), 1:6,8:9)), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default with own ref data by col name") + ignore_attr = TRUE,info="test default with own ref data by col name") expect_equal(data.frame(select(calculate_ISRatio(select(test_ISR_ownref,-refcount,-refpop), count, pop, test_ISR_ownref$refcount[1:19], test_ISR_ownref$refpop[1:19]),1:6,8:9)), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default with own ref data as vector") + ignore_attr = TRUE,info="test default with own ref data as vector") expect_equal(data.frame(calculate_ISRatio(test_err2, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, refvalue=100)), data.frame(select(slice(test_ISR_results,15:16),1:3,5:7)), - check.attributes=FALSE, check.names=FALSE,info="test n = 0") + ignore_attr = TRUE,info="test n = 0") expect_equal(data.frame(calculate_ISRatio(select(test_ISR_ownref,-refcount,-refpop), count, pop, type="standard", x_ref = c(10303,2824,NA,3615,3641,3490,3789,3213,3031,2771, @@ -38,78 +38,78 @@ test_that("isratios and CIs calculate correctly",{ n_ref = c(50520,57173,60213,54659,44345,50128,62163,67423, 62899,55463,60479,49974,44140,40888,37239,30819,18136,15325,13918))), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7)), - check.attributes=FALSE, check.names=FALSE,info="test ref as specified vector") + ignore_attr = TRUE,info="test ref as specified vector") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="standard")), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7)), - check.attributes=FALSE, check.names=FALSE,info="test standard") + ignore_attr = TRUE,info="test standard") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95, 0.998), type="standard")), data.frame(select(slice(test_ISR_results,7:9),1:3,5:9)), - check.attributes=FALSE, check.names=FALSE,info="test standard 2 CIs") + ignore_attr = TRUE,info="test standard 2 CIs") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="value")), data.frame(select(slice(test_ISR_results,7:9),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test value") + ignore_attr = TRUE,info="test value") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95, 0.998), type="value")), data.frame(select(slice(test_ISR_results,7:9),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test value 2 CIs") + ignore_attr = TRUE,info="test value 2 CIs") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, type="lower")), data.frame(select(slice(test_ISR_results,7:9),1,6)), - check.attributes=FALSE, check.names=FALSE,info="test lower") + ignore_attr = TRUE,info="test lower") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95, 0.998), type="lower")), data.frame(select(slice(test_ISR_results,7:9),1,6,8)), - check.attributes=FALSE, check.names=FALSE,info="test lower 2 CIs") + ignore_attr = TRUE,info="test lower 2 CIs") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop,type="upper")), data.frame(select(slice(test_ISR_results,7:9),1,7)), - check.attributes=FALSE, check.names=FALSE,info="test upper") + ignore_attr = TRUE,info="test upper") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, confidence = c(0.95, 0.998),type="upper")), data.frame(select(slice(test_ISR_results,7:9),1,7,9)), - check.attributes=FALSE, check.names=FALSE,info="test upper 2 CIs") + ignore_attr = TRUE,info="test upper 2 CIs") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop,confidence = 99.8)), data.frame(select(slice(test_ISR_results,7:9),1:3,5,8:9)), - check.attributes=FALSE, check.names=FALSE,info="test confidence") + ignore_attr = TRUE,info="test confidence") expect_equal(data.frame(calculate_ISRatio(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop, refvalue=100)), data.frame(select(slice(test_ISR_results,10:12),1:3,5:7)), - check.attributes=FALSE, check.names=FALSE,info="test refvalue") + ignore_attr = TRUE,info="test refvalue") expect_equal(data.frame(select(calculate_ISRatio(select(test_ISR_ownref,-refcount,-refpop), count, pop, confidence = c(0.95,0.998), x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop),1:6)), data.frame(select(slice(test_ISR_results,7:9),1:3,5:7)), - check.attributes=FALSE, check.names=FALSE,info="test two CIS 95%") + ignore_attr = TRUE,info="test two CIS 95%") expect_equal(data.frame(select(calculate_ISRatio(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop,confidence = c(0.95, 0.998)),1:4,7,8)), data.frame(select(slice(test_ISR_results,7:9),1:3,5,8:9)), - check.attributes=FALSE, check.names=FALSE,info="test two CIs 99.8") + ignore_attr = TRUE,info="test two CIs 99.8") expect_equal(data.frame(select(ungroup(calculate_ISRatio(select(test_ISR_ownref,-refcount,-refpop), count, pop, confidence = c(0.95,0.998), x_ref = test_ISR_refdata$refcount, n_ref = test_ISR_refdata$refpop)),9)), data.frame(confidence = rep("95%, 99.8%",3), stringsAsFactors=FALSE), - check.attributes=FALSE, check.names=FALSE,info="test two CIS metadata") + ignore_attr = TRUE,info="test two CIS metadata") }) diff --git a/tests/testthat/testLifeExpectancy.R b/tests/testthat/testLifeExpectancy.R index 4f0f879..afa07cf 100644 --- a/tests/testthat/testLifeExpectancy.R +++ b/tests/testthat/testLifeExpectancy.R @@ -1,5 +1,3 @@ -context("test_phe_life_expectancy") - # dps to test to n <- 4 diff --git a/tests/testthat/testMeans.R b/tests/testthat/testMeans.R index 664afaa..c800e6a 100644 --- a/tests/testthat/testMeans.R +++ b/tests/testthat/testMeans.R @@ -1,62 +1,60 @@ -context("test_phe_mean") - #test calculations test_that("means and CIs calculate correctly",{ expect_equal(data.frame(select(phe_mean(test_Mean,values),1:6,8:9)), data.frame(select(slice(test_Mean_results,3),2:7,10:11)), - check.attributes=FALSE, check.names=FALSE,info="test default") + ignore_attr = TRUE,info="test default") expect_equal(data.frame(select(phe_mean(test_Mean,values, confidence = c(0.95,0.998)),1:8,10:11)), data.frame(select(slice(test_Mean_results,3),2:11)), - check.attributes=FALSE, check.names=FALSE,info="test full output 2 CIs") + ignore_attr = TRUE,info="test full output 2 CIs") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, type="standard")), data.frame(select(slice(test_Mean_results,1:2),1:7)), - check.attributes=FALSE, check.names=FALSE,info="test grouped & standard") + ignore_attr = TRUE,info="test grouped & standard") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, confidence = c(0.95,0.998), type="standard")), data.frame(select(slice(test_Mean_results,1:2),1:9)), - check.attributes=FALSE, check.names=FALSE,info="test grouped & standard 2 CIs") + ignore_attr = TRUE,info="test grouped & standard 2 CIs") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, type="value")), data.frame(select(slice(test_Mean_results,1:2),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test value") + ignore_attr = TRUE,info="test value") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, confidence = c(0.95,0.998), type="value")), data.frame(select(slice(test_Mean_results,1:2),1,5)), - check.attributes=FALSE, check.names=FALSE,info="test value 2 CIs") + ignore_attr = TRUE,info="test value 2 CIs") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, type="lower")), data.frame(select(slice(test_Mean_results,1:2),1,6)), - check.attributes=FALSE, check.names=FALSE,info="test lower") + ignore_attr = TRUE,info="test lower") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, confidence = c(0.95,0.998), type="lower")), data.frame(select(slice(test_Mean_results,1:2),1,6,8)), - check.attributes=FALSE, check.names=FALSE,info="test lower 2 CIs") + ignore_attr = TRUE,info="test lower 2 CIs") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, type="upper")), data.frame(select(slice(test_Mean_results,1:2),1,7)), - check.attributes=FALSE, check.names=FALSE,info="test upper") + ignore_attr = TRUE,info="test upper") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, confidence = c(0.95,0.998), type="upper")), data.frame(select(slice(test_Mean_results,1:2),1,7,9)), - check.attributes=FALSE, check.names=FALSE,info="test upper 2 CIs") + ignore_attr = TRUE,info="test upper 2 CIs") expect_equal(data.frame(phe_mean(test_Mean_Grp,values, confidence = 99.8, type="standard")), data.frame(select(slice(test_Mean_results,1:2),1:5,8:9)), - check.attributes=FALSE, check.names=FALSE,info="test confidence") + ignore_attr = TRUE,info="test confidence") expect_equal(data.frame(select(phe_mean(test_Mean,values, confidence = c(0.95, 0.998)),1:6)), data.frame(select(slice(test_Mean_results,3),2:7)), - check.attributes=FALSE, check.names=FALSE,info="test two CIS 95%") + ignore_attr = TRUE,info="test two CIS 95%") expect_equal(data.frame(select(phe_mean(test_Mean_Grp,values, confidence = c(0.95, 0.998)),1:5,8,9)), data.frame(select(slice(test_Mean_results,1:2),1:5,8:9)), - check.attributes=FALSE, check.names=FALSE,info="test two CIS 99.8%") + ignore_attr = TRUE,info="test two CIS 99.8%") expect_equal(data.frame(select(phe_mean(test_Mean,values, confidence = c(0.95, 0.998)),9)), data.frame(confidence = "95%, 99.8%", stringsAsFactors=FALSE), - check.attributes=FALSE, check.names=FALSE,info="test two CIS metadata") + ignore_attr = TRUE,info="test two CIS metadata") }) diff --git a/tests/testthat/testProportions.R b/tests/testthat/testProportions.R index 4cf62cb..651c2f8 100644 --- a/tests/testthat/testProportions.R +++ b/tests/testthat/testProportions.R @@ -1,92 +1,89 @@ -context("test_phe_proportion") - - # test calculations test_that("proportions and CIs calculate correctly",{ expect_equal(data.frame(select(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator),1:6,8:9)), data.frame(select(slice(test_Prop,1:8),1:6,9:10)), - check.attributes=FALSE, check.names=FALSE, info="test default") + ignore_attr = TRUE, info="test default") expect_equal(data.frame(select(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, confidence = c(0.95,0.998)),1:8,10:11)), data.frame(select(slice(test_Prop,1:8),1:10)), - check.attributes=FALSE, check.names=FALSE, info="test full output 2 CIs") + ignore_attr = TRUE, info="test full output 2 CIs") expect_equal(data.frame(select(phe_proportion(slice(test_Prop,9:16)[1:3], Numerator, Denominator, multiplier = 100, type="full"),1:6,8:9)), data.frame(select(slice(test_Prop,9:16),1:6,9:10)), - check.attributes=FALSE, check.names=FALSE, info="test full, percentage") + ignore_attr = TRUE, info="test full, percentage") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, multiplier = 1, type="standard")), data.frame(select(slice(test_Prop,1:8),1:6)), - check.attributes=FALSE, check.names=FALSE, info="test standard") + ignore_attr = TRUE, info="test standard") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, multiplier = 1, confidence = c(0.95,0.998), type="standard")), data.frame(select(slice(test_Prop,1:8),1:8)), - check.attributes=FALSE, check.names=FALSE, info="test standard 2 CIs") + ignore_attr = TRUE, info="test standard 2 CIs") expect_equal(data.frame(select(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, type="full", confidence=99.8), 1:6,8:9)), data.frame(select(slice(test_Prop,1:8),1:4,7:10)), - check.attributes=FALSE, check.names=FALSE, info="test confidence") + ignore_attr = TRUE, info="test confidence") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, type="value")), data.frame(select(slice(test_Prop,1:8),1:4)), - check.attributes=FALSE, check.names=FALSE, info="test value") + ignore_attr = TRUE, info="test value") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, confidence = c(0.95,0.998), type="value")), data.frame(select(slice(test_Prop,1:8),1:4)), - check.attributes=FALSE, check.names=FALSE, info="test value 2 CIs") + ignore_attr = TRUE, info="test value 2 CIs") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, type="lower")), data.frame(select(slice(test_Prop,1:8),1:3,5)), - check.attributes=FALSE, check.names=FALSE, info="test lower") + ignore_attr = TRUE, info="test lower") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, confidence = c(0.95,0.998), type="lower")), data.frame(select(slice(test_Prop,1:8),1:3,5,7)), - check.attributes=FALSE, check.names=FALSE, info="test lower 2 CIs") + ignore_attr = TRUE, info="test lower 2 CIs") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, type="upper")), data.frame(select(slice(test_Prop,1:8),1:3,6)), - check.attributes=FALSE, check.names=FALSE, info="test upper") + ignore_attr = TRUE, info="test upper") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, confidence = c(0.95,0.998), type="upper")), data.frame(select(slice(test_Prop,1:8),1:3,6,8)), - check.attributes=FALSE, check.names=FALSE, info="test upper 2 CIs") + ignore_attr = TRUE, info="test upper 2 CIs") expect_equal(arrange(data.frame(select(phe_proportion(filter(test_Prop,Area %in% c("Area09","Area10","Area11"))[1:3], Numerator, Denominator, type="full"),1:6,8:9)), Area), arrange(data.frame(select(filter(test_Prop,Area %in% c("Area09","Area10","Area11")),1:6,9:10)), Area), - check.attributes=FALSE, check.names=FALSE, info="test NAs") + ignore_attr = TRUE, info="test NAs") expect_equal(arrange(data.frame(phe_proportion(slice(test_Prop_g,1:8)[1:3], Numerator, Denominator, type="standard")), Area), arrange(select(data.frame(test_Prop_g_results[1:6]),1:6),Area), - check.attributes=FALSE, check.names=FALSE, info="test grouped") + ignore_attr = TRUE, info="test grouped") expect_equal(data.frame(phe_proportion(slice(test_Prop,1:8)[1:3], Numerator, Denominator, confidence = c(0.95, 0.998)))[1:6], data.frame(select(slice(test_Prop,1:8),1:9))[1:6], - check.attributes=FALSE, check.names=FALSE, info="test two CIs 95%") + ignore_attr = TRUE, info="test two CIs 95%") expect_equal(data.frame(select(phe_proportion(slice(test_Prop,9:16)[1:3], Numerator, Denominator, multiplier = 100, confidence = c(0.95, 0.998)),1:4,7,8)), data.frame(select(slice(test_Prop,9:16),1:4,7:8)), - check.attributes=FALSE, check.names=FALSE, info="test two CIs 99.8%") + ignore_attr = TRUE, info="test two CIs 99.8%") expect_equal(data.frame(select(phe_proportion(slice(test_Prop,9:16)[1:3], Numerator, Denominator, confidence = c(0.95, 0.998)),9)), data.frame(confidence = rep("95%, 99.8%",8), stringsAsFactors = FALSE), - check.attributes=FALSE, check.names=FALSE, info="test two CIs, metadata") + ignore_attr = TRUE, info="test two CIs, metadata") expect_equal(data.frame(phe_proportion(slice(test_Prop_g_results,1:8)[1:3], Numerator, Denominator, confidence = NA)), data.frame(select(slice(test_Prop_g_results_no_CI,1:8),1:4,8)), - check.attributes=FALSE, check.names=FALSE, info="test no CIs") + ignore_attr = TRUE, info="test no CIs") }) diff --git a/tests/testthat/testQuantiles.R b/tests/testthat/testQuantiles.R index 1076c6e..c9b6e32 100644 --- a/tests/testthat/testQuantiles.R +++ b/tests/testthat/testQuantiles.R @@ -1,5 +1,3 @@ -context("test_phe_quantiles") - # test grouped df field df1 <- test_quantiles_g %>% filter(GroupSet == "IndSexReg") %>% group_by(IndSexRef, ParentCode) @@ -17,42 +15,60 @@ df4 <- test_quantiles_ug %>% filter(GroupSet == "None") # test grouped df5 <- df4 %>% group_by(GroupSet) +# test data where all values are NA +df6 <- df2 |> filter(!AreaCode %in% c("E06000053", "E09000001"))|> + mutate(Value = case_when(ParentCode == "E12000006" ~ NA_real_, + TRUE ~ Value)) #test calculations test_that("quantiles calculate correctly",{ - expect_equal(data.frame(phe_quantile(df1,Value, - invert = Polarity, inverttype = "field")[15]), - rename(df1,quantile = QuantileInGrp)[14], - check.attributes=FALSE, check.names=FALSE,info="test grouped df field") - - expect_equal(data.frame(phe_quantile(df2,Value, - invert = FALSE))[15:18], - data.frame(tibble(quantile = df2$QuantileInGrp, - nquantiles = 10L, - groupvars = "IndSexRef, ParentCode", - qinverted = "lowest quantile represents lowest values")), - check.attributes=FALSE, check.names=FALSE,info="test grouped df logical") - - expect_equal(phe_quantile(df3, Value, - invert = Polarity, inverttype = "field", nquantiles = 7L)[15], - rename(df3,quantile = QuantileInGrp)[14],check.attributes=FALSE, - check.names=FALSE,info="test ungrouped df field") - expect_equal(phe_quantile(df5, Value, nquantiles = 4L)[15], + suppressWarnings({ + # within-region deciles for multiple indicators + expect_equal(data.frame(phe_quantile(df1,Value, + invert = Polarity, inverttype = "field")[15]), + rename(df1,quantile = QuantileInGrp)[14], + ignore_attr = TRUE,info="test grouped df field") + # within-region deciles for multiple indicators + expect_equal(data.frame(phe_quantile(df2,Value, + invert = FALSE))[15:18], + data.frame(tibble(quantile = df2$QuantileInGrp, + nquantiles = 10L, + groupvars = "IndSexRef, ParentCode", + qinverted = "lowest quantile represents lowest values")), + ignore_attr = TRUE,info="test grouped df logical") + + expect_equal(phe_quantile(df3, Value, + invert = Polarity, inverttype = "field", nquantiles = 7L)[15], + rename(df3,quantile = QuantileInGrp)[14], + ignore_attr = TRUE,info="test ungrouped df field") + + expect_equal(phe_quantile(df5, Value, nquantiles = 4L)[15], + rename(df4,quantile = QuantileInGrp)[14], + ignore_attr = TRUE,info="test nquantiles") + + expect_equal(phe_quantile(df4, Value, nquantiles = 4L)[15], + rename(df4,quantile = QuantileInGrp)[14], + ignore_attr = TRUE,info="test ungrouped df logical nohighergeog") + + expect_equal(phe_quantile(df4, Value, nquantiles = 4L, type="standard")[15], rename(df4,quantile = QuantileInGrp)[14], - check.attributes=FALSE, check.names=FALSE,info="test nquantiles") + ignore_attr = TRUE,info="test ungrouped df logical nohighergeog") - expect_equal(phe_quantile(df4, Value, nquantiles = 4L)[15], - rename(df4,quantile = QuantileInGrp)[14], - check.attributes=FALSE, check.names=FALSE,info="test ungrouped df logical nohighergeog") + }) +}) - expect_equal(phe_quantile(df4, Value, nquantiles = 4L, type="standard")[15], - rename(df4,quantile = QuantileInGrp)[14], - check.attributes=FALSE, check.names=FALSE,info="test ungrouped df logical nohighergeog") +#test warnings +test_that("quantiles - warnings are generated when too few small areas for number of quantiles",{ + expect_warning(data.frame(phe_quantile(df2, Value, invert = FALSE)), + "One or more groups had too few small areas with values to allow quantiles to be assigned", + info="warning too few small areas") + expect_warning(data.frame(phe_quantile(df6, Value, invert = FALSE)), + "One or more groups had too few small areas with values to allow quantiles to be assigned", + info="warning too few small areas") }) - #test error handling test_that("quantiles - errors are generated when invalid arguments are used",{ expect_error(phe_quantile(test_quantiles_g), @@ -73,8 +89,4 @@ test_that("quantiles - errors are generated when invalid arguments are used",{ expect_error(phe_quantile(test_quantiles_fail, Value, invert = Pol, inverttype = "field"), "invert is not a field name from data",info="error invert not valid field name") - expect_error(phe_quantile(df1, Value, highergeog = ParentCode), - "highergeog argument is deprecated - pregroup input dataframe to replace this functionality", - info="error invert not valid field name") - }) diff --git a/tests/testthat/testRates.R b/tests/testthat/testRates.R index dbd0fd7..b959a8d 100644 --- a/tests/testthat/testRates.R +++ b/tests/testthat/testRates.R @@ -1,86 +1,83 @@ -context("test_phe_rate") - - #test calculations test_that("rates and CIs calculate correctly",{ expect_equal(data.frame(select(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator),1:6,8:9)), data.frame(select(slice(test_Rate,9:16),1:6,9:10)), - check.attributes=FALSE, check.names=FALSE, info="test default") + ignore_attr = TRUE, info="test default") expect_equal(data.frame(select(phe_rate(slice(test_Rate,9:16)[1:3], Numerator,Denominator, confidence = c(0.95,0.998)),1:8,10:11)), data.frame(select(slice(test_Rate,9:16),1:10)), - check.attributes=FALSE, check.names=FALSE, info="test full output 2 CIs") + ignore_attr = TRUE, info="test full output 2 CIs") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, type="standard")), data.frame(select(slice(test_Rate,9:16),1:6)), - check.attributes=FALSE, check.names=FALSE, info="test standard") + ignore_attr = TRUE, info="test standard") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence = c(0.95,0.998), type="standard")), data.frame(select(slice(test_Rate,9:16),1:8)), - check.attributes=FALSE, check.names=FALSE, info="test standard 2 CIs") + ignore_attr = TRUE, info="test standard 2 CIs") expect_equal(data.frame(phe_rate(slice(test_Rate,20)[1:3],Numerator,Denominator, type="standard")), data.frame(select(slice(test_Rate,20),1:6)), - check.attributes=FALSE, check.names=FALSE, info="test num > denom") + ignore_attr = TRUE, info="test num > denom") expect_equal(data.frame(select(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence=99.8),1:6,8:9)), data.frame(select(slice(test_Rate,9:16),1:4,7:10)), - check.attributes=FALSE, check.names=FALSE, info="test confidence") + ignore_attr = TRUE, info="test confidence") expect_equal(data.frame(select(phe_rate(slice(test_Rate,1:8)[1:3],Numerator,Denominator, multiplier=100),1:6,8:9)), data.frame(select(slice(test_Rate,1:8),1:6,9:10)), - check.attributes=FALSE, check.names=FALSE, info="test multiplier") + ignore_attr = TRUE, info="test multiplier") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, type="value")), data.frame(select(slice(test_Rate,9:16),1:4)), - check.attributes=FALSE, check.names=FALSE, info="test value") + ignore_attr = TRUE, info="test value") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence = c(0.95,0.998), type="value")), data.frame(select(slice(test_Rate,9:16),1:4)), - check.attributes=FALSE, check.names=FALSE, info="test value 2 CIs") + ignore_attr = TRUE, info="test value 2 CIs") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, type="lower")), data.frame(select(slice(test_Rate,9:16),1:3,5)), - check.attributes=FALSE, check.names=FALSE, info="test lower") + ignore_attr = TRUE, info="test lower") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence = c(0.95,0.998), type="lower")), data.frame(select(slice(test_Rate,9:16),1:3,5,7)), - check.attributes=FALSE, check.names=FALSE, info="test lower 2 CIs") + ignore_attr = TRUE, info="test lower 2 CIs") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, type="upper")), data.frame(select(slice(test_Rate,9:16),1:3,6)), - check.attributes=FALSE, check.names=FALSE, info="test upper") + ignore_attr = TRUE, info="test upper") expect_equal(data.frame(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence = c(0.95,0.998), type="upper")), data.frame(select(slice(test_Rate,9:16),1:3,6,8)), - check.attributes=FALSE, check.names=FALSE, info="test upper 2 CIs") + ignore_attr = TRUE, info="test upper 2 CIs") expect_equal(data.frame(select(phe_rate(slice(test_Rate,17:19)[1:3], Numerator,Denominator, type="full"),1:6,8:9)), data.frame(select(slice(test_Rate,17:19),1:6,9:10)), - check.attributes=FALSE, check.names=FALSE, info="test NAs") + ignore_attr = TRUE, info="test NAs") expect_equal(arrange(data.frame(phe_rate(slice(test_Rate_g,1:8)[1:3], Numerator, Denominator)),Area), arrange(data.frame(select(test_Rate_g_results,1:9)),Area), - check.attributes=FALSE, check.names=FALSE, info="test grouped") + ignore_attr = TRUE, info="test grouped") expect_equal(data.frame(select(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence = c(0.95, 0.998)),1:6)), data.frame(select(slice(test_Rate,9:16),1:6)), - check.attributes=FALSE, check.names=FALSE, info="test two CIs 95%") + ignore_attr = TRUE, info="test two CIs 95%") expect_equal(data.frame(select(phe_rate(slice(test_Rate,25:32)[1:3],Numerator,Denominator, confidence = c(0.95, 0.998)),1:4,7,8)), data.frame(select(slice(test_Rate,25:32),1:6)), - check.attributes=FALSE, check.names=FALSE, info="test two CIs 99.8%") + ignore_attr = TRUE, info="test two CIs 99.8%") expect_equal(data.frame(select(phe_rate(slice(test_Rate,9:16)[1:3],Numerator,Denominator, confidence = c(0.95, 0.998)),9)), data.frame(confidence = rep("95%, 99.8%",8), stringsAsFactors = FALSE), - check.attributes=FALSE, check.names=FALSE, info="test two CIs metadata") + ignore_attr = TRUE, info="test two CIs metadata") }) diff --git a/tests/testthat/testSII.R b/tests/testthat/testSII.R index 926c778..1670a61 100644 --- a/tests/testthat/testSII.R +++ b/tests/testthat/testSII.R @@ -1,5 +1,3 @@ -context("test_phe_SII") - # 1) Test calculations ---------------------------------------------------- # Expect SII value to match exactly, and confidence limits to be within # given tolerance @@ -58,7 +56,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(1,11), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test default with SE provided", tolerance = tol) # test same calculation, supplying upper and lower CLs rather than SE @@ -72,9 +70,24 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(1,11), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test default with CLs provided", tolerance = tol) + # test same calculation, outputting intercept value + expect_equal(data.frame(phe_sii(SII_test_grouped[1:20, 3:13], + Quantile, Population, + value_type = 0, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + intercept = TRUE, # Intercept set to true + type = "standard")), + data.frame(SII_test_grouped[c(246,256), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test default with intercept", tolerance = tol) + # test function on ungrouped dataset SII_test_data <- ungroup(SII_test_grouped) @@ -88,7 +101,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[1, c(16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test on ungrouped data", tolerance = tol) # test SII calculation at 99% confidence (inputted as decimal) @@ -103,7 +116,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), # SII confidence changed data.frame(SII_test_grouped[c(21,31), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test at 99% confidence (decimal)", tolerance = tol) # test SII calculation at 99% confidence (inputted as %) @@ -117,7 +130,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), # SII confidence changed data.frame(SII_test_grouped[c(21,31), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test at 99% confidence (%)", tolerance = tol) # test SII calculation on multiple confidence intervals (inputted as %) @@ -130,8 +143,8 @@ test_that("SII and confidence limits calculate correctly",{ repetitions = no_reps, rii = TRUE, type = "standard")), # SII confidence changed - data.frame(SII_test_grouped[c(226,236), c(3:5,16:18,22,19,23,20,24,21,25)]), - check.attributes=FALSE, check.names=FALSE, + data.frame(SII_test_grouped[c(226,236), c(3:5,16:18,19,22,23,20,21,24,25)]), + ignore_attr = TRUE, info="test at 95 and 99% confidence (%)", tolerance = tol) # test SII calculation on 100,000 repetitions @@ -144,7 +157,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), # No. repetitions changed data.frame(SII_test_grouped[c(41,51), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test on 10000 repetitions", tolerance = tol) # test SII calculation on quintiles instead of deciles @@ -157,7 +170,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard"))[, 1:4], # only have SII available from Excel tool data.frame(SII_test_grouped[c(61,66), c(3:5,16)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test on quintiles", tolerance = tol) # ***************************** @@ -174,7 +187,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(71,81), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test rate with SE provided", tolerance = tol) # test same calculation, supplying upper and lower CLs (before transformation) @@ -189,9 +202,107 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(71,81), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test rate with CLs provided", tolerance = tol) +# test same calculation, outputting intercept value + expect_equal(data.frame(phe_sii(SII_test_grouped[71:90, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + intercept = TRUE, # Intercept set to true + type = "standard")), + data.frame(SII_test_grouped[c(266,276), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test rate with intercept", tolerance = tol) + + # test calculation with log transformation of values, + expect_equal(data.frame(phe_sii(SII_test_grouped[306:325, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(306,316), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test rate with transformation", tolerance = tol) + + # test calculation with log transformation of values and negative multiplier, + expect_equal(data.frame(phe_sii(SII_test_grouped[386:405, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + multiplier = -1, # Negative multiplier + rii = TRUE, + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(386,396), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test rate with transformation and negative multiplier", tolerance = tol) + + # test calculation with log transformation of values and 98% confidence intervals, + expect_equal(data.frame(phe_sii(SII_test_grouped[406:425, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + confidence = 98, #98% CIs + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(406,416), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test rate with transformation and 98% cis", tolerance = tol) + + # test calculation with log transformation of values and multiple confidence intervals, + expect_equal(data.frame(phe_sii(SII_test_grouped[366:385, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + confidence = c(95, 98), #95 and 98% CIs + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(366,376), c(3:5,16:26)]), + ignore_attr = TRUE, + info="test rate with transformation and multiple cis", tolerance = tol) + + # test calculation with log transformation of values without RII, + expect_equal(data.frame(phe_sii(SII_test_grouped[426:445, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = FALSE, + intercept = FALSE, + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(426,436), c(3:5,16, 18:19)]), + ignore_attr = TRUE, + info="test rate with transformation without rii", tolerance = tol) + # *********************************** # *** PROPORTION (value_type = 2) *** @@ -207,7 +318,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(91,101), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with SE provided", tolerance = tol) # test same calculation, supplying upper and lower CLs (before transformation) @@ -222,7 +333,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(91,101), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with CLs provided", tolerance = tol) # test same calculation, supplying count instead of value @@ -236,9 +347,24 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(91,101), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with count provided", tolerance = tol) + # test same calculation, outputting intercept value + expect_equal(data.frame(phe_sii(SII_test_grouped[91:110, 3:13], + Quantile, Population, + value_type = 1, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + intercept = TRUE, # Intercept set to true + type = "standard")), + data.frame(SII_test_grouped[c(286,296), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test proportion with intercept", tolerance = tol) + # test SII calculation with POSITIVE multiplier and RII expect_equal(data.frame(phe_sii(SII_test_grouped[111:130, 3:13], Quantile, Population, @@ -251,7 +377,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(111,121), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with positive multiplier and RII", tolerance = tol) # test SII calculation with POSITIVE multiplier without RII @@ -266,7 +392,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = FALSE, type = "standard")), data.frame(SII_test_grouped[c(111,121), c(3:5,16,18,19)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with positive multiplier without RII", tolerance = tol) # test SII calculation with NEGATIVE multiplier and RII @@ -281,7 +407,7 @@ test_that("SII and confidence limits calculate correctly",{ rii = TRUE, type = "standard")), data.frame(SII_test_grouped[c(131,141), c(3:5,16:21)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with negative multiplier and RII", tolerance = tol) # test SII calculation with NEGATIVE multiplier without RII @@ -296,8 +422,91 @@ test_that("SII and confidence limits calculate correctly",{ rii = FALSE, type = "standard")), data.frame(SII_test_grouped[c(131,141), c(3:5,16,18,19)]), - check.attributes=FALSE, check.names=FALSE, + ignore_attr = TRUE, info="test proportion with negative multiplier without RII", tolerance = tol) + + # test calculation with logit transformation of values, + expect_equal(data.frame(phe_sii(SII_test_grouped[326:345, 3:13], + Quantile, Population, + value_type = 2, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = TRUE, + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(326,336), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test proportion with transformation", tolerance = tol) + + # test calculation with logit transformation of values and negative multiplier, + expect_equal(data.frame(phe_sii(SII_test_grouped[346:365, 3:13], + Quantile, Population, + value_type = 2, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + multiplier = -1, # Multiplier set to -1 + rii = TRUE, + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(346,356), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test proportion with transformation and negative multiplier", tolerance = tol) + + # test calculation with logit transformation of values and 98% ci, + expect_equal(data.frame(phe_sii(SII_test_grouped[466:485, 3:13], + Quantile, Population, + value_type = 2, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + confidence = 98, + rii = TRUE, + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(466,476), c(3:5,16:21,26)]), + ignore_attr = TRUE, + info="test proportion with transformation and 98% cis", tolerance = tol) + + # test calculation with logit transformation of values and 95% and 98% ci, + expect_equal(data.frame(phe_sii(SII_test_grouped[486:505, 3:13], + Quantile, Population, + value_type = 2, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + confidence = c(95, 98), + rii = TRUE, + intercept = TRUE, # Intercept set to true + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(486,496), c(3:5,16:26)]), + ignore_attr = TRUE, + info="test proportion with transformation and 95% and 98% cis", tolerance = tol) + + # test calculation with logit transformation of values and 95% and 98% ci, + expect_equal(data.frame(phe_sii(SII_test_grouped[446:465, 3:13], + Quantile, Population, + value_type = 2, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + repetitions = no_reps, + rii = FALSE, + transform = TRUE, # Log transformation set to true + type = "standard")), + data.frame(SII_test_grouped[c(446,456), c(3:5,16, 18:19)]), + ignore_attr = TRUE, + info="test proportion with transformation and no rii", tolerance = tol) + }) @@ -423,6 +632,27 @@ test_that("errors are generated when invalid parameters are entered",{ confidence = 135), "all confidence levels must be between 90 and 100 or between 0.9 and 1", info = "invalid confidence limit pc - too high") + # transform set to true with indicator type =0 + expect_error(phe_sii(SII_test_grouped[1:20, 3:13], + Quantile, + Population, + value = Value, + lower_cl = LowerCL, # CLs supplied + upper_cl = UpperCL, + transform = TRUE, + value_type = 0), + "value_type should be 1 or 2 when transform is true", + info = "value_type should be 1 or 2 when transform is true") + # transform set to true and se provided + expect_error(phe_sii(SII_test_grouped[1:20, 3:13], + Quantile, + Population, + value = Value, + se = StandardError, + transform = TRUE, + value_type = 1), + "se to be missing when transform is true", + info = "se to be missing when transform is true") }) @@ -603,7 +833,7 @@ test_that("dimensions are correct when reliaibility stats requested",{ repetitions = 1000, reliability_stat = TRUE, type = "full")), - c(2,11)) + c(2,12)) # check dimensions with default type = "full" WITHOUT reliability stats expect_equal(dim(phe_sii(SII_test_grouped[1:20, 3:13], @@ -615,7 +845,7 @@ test_that("dimensions are correct when reliaibility stats requested",{ repetitions = 1000, reliability_stat = FALSE, type = "full")), - c(2,10)) + c(2,11)) # Tests WITH RII @@ -685,7 +915,7 @@ test_that("dimensions are correct when reliaibility stats requested",{ rii = TRUE, reliability_stat = TRUE, type = "full")), - c(2,15)) + c(2,16)) # check dimensions with default type = "full" WITHOUT reliability stats expect_equal(dim(phe_sii(SII_test_grouped[1:20, 3:13], @@ -698,6 +928,6 @@ test_that("dimensions are correct when reliaibility stats requested",{ rii = TRUE, reliability_stat = FALSE, type = "full")), - c(2,13)) + c(2,14)) }) diff --git a/tests/testthat/testWilson.R b/tests/testthat/testWilson.R index 4ad3963..2a3b1f3 100644 --- a/tests/testthat/testWilson.R +++ b/tests/testthat/testWilson.R @@ -1,28 +1,26 @@ -context("test_wilson") - #test calculations test_that("wilson_lower calculate correctly",{ expect_equal(data.frame(lowercl = wilson_lower(c(65,1045,0.445),c(100,5000,1))), - data.frame(slice(test_BW,7:9)[3]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,7:9)[3]),ignore_attr = TRUE, info="test default") expect_equal(data.frame(lowercl = wilson_lower(c(65,1045,0.445),c(100,5000,1),confidence=99.8)), - data.frame(slice(test_BW,10:12)[3]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,10:12)[3]),ignore_attr = TRUE, info="test default") expect_equal(data.frame(lowercl = wilson_lower(c(65,1045,0.445),c(100,5000,1),confidence=1)), - data.frame(uppercl = 0),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(uppercl = 0),ignore_attr = TRUE, info="test default") expect_equal(data.frame(uppercl = wilson_upper(c(65,1045,0.445),c(100,5000,1))), - data.frame(slice(test_BW,7:9)[4]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,7:9)[4]),ignore_attr = TRUE, info="test default") expect_equal(data.frame(uppercl = wilson_upper(c(65,1045,0.445),c(100,5000,1),confidence=0.998)), - data.frame(slice(test_BW,10:12)[4]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,10:12)[4]),ignore_attr = TRUE, info="test default") expect_equal(data.frame(uppercl = wilson_upper(c(65,1045,0.445),c(100,5000,1),confidence=1)), - data.frame(uppercl = 1),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(uppercl = 1),ignore_attr = TRUE, info="test default") expect_equal(data.frame(uppercl = wilson_upper(c(65,1045,0.445),c(100,5000,1),confidence=99.8)), - data.frame(slice(test_BW,10:12)[4]),check.attributes=FALSE, check.names=FALSE, info="test default") + data.frame(slice(test_BW,10:12)[4]),ignore_attr = TRUE, info="test default") }) diff --git a/tests/testthat/testdata_Quantiles.xlsx b/tests/testthat/testdata_Quantiles.xlsx index 7c4aa56..ae11cf5 100644 Binary files a/tests/testthat/testdata_Quantiles.xlsx and b/tests/testthat/testdata_Quantiles.xlsx differ diff --git a/tests/testthat/testdata_SII.xlsx b/tests/testthat/testdata_SII.xlsx index 11c9148..21e390e 100644 Binary files a/tests/testthat/testdata_SII.xlsx and b/tests/testthat/testdata_SII.xlsx differ diff --git a/vignettes/WorkedExamples_phe_sii.Rmd b/vignettes/WorkedExamples_phe_sii.Rmd index 81420fc..e193bfd 100644 --- a/vignettes/WorkedExamples_phe_sii.Rmd +++ b/vignettes/WorkedExamples_phe_sii.Rmd @@ -1,6 +1,6 @@ --- title: "Worked examples for phe_sii function" -author: "Emma Clegg" +author: "Emma Clegg and Georgina Anderson" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: @@ -41,7 +41,7 @@ The user can also specify the indicator type from "0 - default", "1 - rate" or " The example below calculates the SII on some life expectancy data. This is assumed to have symmetric confidence intervals around the indicator values, so default standard error calculations would be done (involving no prior transformations). -The relevant fields in the input dataset are specified for the arguments `quantile`, `population` and `value`. `value_type` is kept equal to 0 (default), and the number of repetitions set to 100 for faster running of the function as a demonstration. +The relevant fields in the input dataset are specified for the arguments `quantile`, `population` and `value`. `value_type` is kept equal to 0 (default), and the number of repetitions set to 1000 for faster running of the function as a demonstration. The standard error (`se`) has been provided here in the input dataset, meaning this will be used directly and lower/upper 95% confidence limits of the indicator values are not needed. @@ -75,7 +75,7 @@ Note that some areas are missing quantiles in the dataset, and these are subsequ ## Example 2 - rate -The example below calculates both the SII and RII on Directly Standardised Rate (DSR) data. The `value_type` argument is set to 1 to specify this indicator is a rate; this means a log transformation will be applied to the `value`, `lower_cl` and `upper_cl` fields before calculating the standard error. +The example below calculates both the SII and RII on Directly Standardised Rate (DSR) data. The `value_type` argument is set to 1 to specify this indicator is a rate; this means a log transformation will be applied to the `value`, `lower_cl` and `upper_cl` fields before calculating the standard error. The `transform` argument is set to TRUE because rates do not show a linear relationship across the quantiles so a log transformation will be applied before calculating the SII and then reverted once the SII has been calculated to ensure the SII is given in the original units. As the number of repetitions is not specified, the function will run on the default 100,000. To return the RII, the `rii` argument is set to TRUE. @@ -94,6 +94,7 @@ DSR_data_SII <- DSR_data %>% value_type = 1, # specifies indicator is a rate lower_cl = lowercl, upper_cl = uppercl, + transform = TRUE, rii = TRUE, # returns RII as well as SII (default is FALSE) reliability_stat = TRUE) # returns reliability stats (default is FALSE) @@ -107,7 +108,7 @@ knitr::kable(DSR_data_SII) This example calculates the SII for a prevalence indicator. Proportions need to be between 0 and 1 - this formatting is done in the `mutate` command below, before passing the grouped dataset to the `phe_sii` function. -The `value_type` argument is set to 2 to specify the indicator is a proportion, and a logit transformation is applied to the `value`, `lower_cl` and `upper_cl` fields. +The `value_type` argument is set to 2 to specify the indicator is a proportion, and a logit transformation is applied to the `value`, `lower_cl` and `upper_cl` fields before calculating the standard error. The `transform` argument is set to TRUE because proportions do not show a linear relationship across the quantiles so a logit transformation will be applied before calculating the SII and then reverted once the SII has been calculated to ensure the SII is given in the original units. The function will again run on the default 100,000 reps, and neither the RII or MAD values will be returned. @@ -132,6 +133,7 @@ prevalence_SII <- prevalence_data %>% value_type = 2, # specifies indicator is a proportion lower_cl = LCL, upper_cl = UCL, + transform = TRUE, multiplier = -100) # negative multiplier to scale SII outputs # View first 10 rows of results