Soft deprecate expect_similar(). Close #18

socialresearchcentre · Sep 13, 2021 · d2b1d1c · d2b1d1c
1 parent 99cba59
commit d2b1d1c
Show file tree

Hide file tree

Showing 6 changed files with 72 additions and 77 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,8 @@
 
 * Soft deprecated `context_data()` (#43). `context_data()` is just a wrapper for `set_testdata()`, which has a much more intuitive name.
 
+* Soft deprecated `expect_similar()` (#18). It was a silly way of comparing data frames and we're better off making something new.
+
 # testdat 0.2.0
 
 In addition to minor updates and bug fixes, this release does three main things:

diff --git a/R/deprec-expect.R b/R/deprec-expect.R
@@ -33,6 +33,49 @@ expect_join <- function(data2, by = NULL, not = FALSE, flt = TRUE, data = get_te
   expect_subset(data2 = data2, by = by, not = not, flt = !!flt, data = data)
 }
 
+#' @export
+#' @rdname expect-deprec
+#' @inheritParams datacomp-expectations
+#' @param var2 an unquoted variable name from data2
+#' @param flt2 a filter specifying a subset of data2 to test
+#' @param threshold the maximum proportional difference allowed between the two
+#'   categories
+#' @param min the minimum number of responses for a category to allow
+#'   comparison. This avoidmall categories raising spurious errors
+expect_similar <- function(var, data2, var2, flt = TRUE, flt2 = flt,
+                           threshold = 0.05, min = 100, data = get_testdata()) {
+  signal_soft_deprecated("`expect_similar()` is soft-deprecated as of testdat 0.3.0.")
+  act <- quasi_label(enquo(data))
+  act$var_desc   <- as_label_vars(enquo(var))
+  act$data2_desc <- as_label(enquo(data2))
+  act$var2_desc  <- as_label_vars(enquo(var2))
+  act$flt_desc   <- as_label_flt(enquo(flt))
+  act$flt2_desc  <- as_label_flt(enquo(flt2))
+
+  var <- enquo(var)
+  var2 <- enquo(var2)
+  data_tb  <- data  %>% group_by(!!var)  %>% summarise(freq = n())
+  data2_tb <- data2 %>% group_by(!!var2) %>% summarise(freq = n())
+
+  by_var <- structure(as_name(var2), names = as_name(var))
+  act$result <-
+    left_join(data_tb, data2_tb, by = by_var) %>%
+    mutate(prop_diff = abs(.data$freq.x - .data$freq.y) / .data$freq.x,
+           pass = .data$prop_diff < threshold | .data$freq.x < min)
+
+  expect_custom(
+    all(act$result$pass, na.rm = TRUE),
+    glue("{act$lab} has {sum(!act$result$pass, na.rm = TRUE)} \\
+          values breaking the {threshold} similarity threshold for variable \\
+          `{act$var_desc}`
+          Values: {glue::glue_collapse(act$result %>% filter(!pass) %>% pull(!!var), ', ')}
+          Filter: {act$flt_desc}"),
+    table = act$result
+  )
+
+  invisible(act$result$pass)
+}
+
 #' Filter data to expectation result
 #'
 #' \Sexpr[results=rd, stage=render]{testdat:::lifecycle("defunct")}

diff --git a/R/expect-datacomp.R b/R/expect-datacomp.R
@@ -48,46 +48,6 @@
 #'
 NULL
 
-#' @export
-#' @rdname datacomp-expectations
-#' @param var2 an unquoted variable name from data2
-#' @param flt2 a filter specifying a subset of data2 to test
-#' @param threshold the maximum proportional difference allowed between the two
-#'   categories
-#' @param min the minimum number of responses for a category to allow
-#'   comparison. This avoidmall categories raising spurious errors
-expect_similar <- function(var, data2, var2, flt = TRUE, flt2 = flt,
-                           threshold = 0.05, min = 100, data = get_testdata()) {
-  act <- quasi_label(enquo(data))
-  act$var_desc   <- as_label_vars(enquo(var))
-  act$data2_desc <- as_label(enquo(data2))
-  act$var2_desc  <- as_label_vars(enquo(var2))
-  act$flt_desc   <- as_label_flt(enquo(flt))
-  act$flt2_desc  <- as_label_flt(enquo(flt2))
-
-  var <- enquo(var)
-  var2 <- enquo(var2)
-  data_tb  <- data  %>% group_by(!!var)  %>% summarise(freq = n())
-  data2_tb <- data2 %>% group_by(!!var2) %>% summarise(freq = n())
-
-  by_var <- structure(as_name(var2), names = as_name(var))
-  act$result <-
-    left_join(data_tb, data2_tb, by = by_var) %>%
-    mutate(prop_diff = abs(.data$freq.x - .data$freq.y) / .data$freq.x,
-           pass = .data$prop_diff < threshold | .data$freq.x < min)
-
-  expect_custom(
-    all(act$result$pass, na.rm = TRUE),
-    glue("{act$lab} has {sum(!act$result$pass, na.rm = TRUE)} \\
-          values breaking the {threshold} similarity threshold for variable \\
-          `{act$var_desc}`
-          Values: {glue::glue_collapse(act$result %>% filter(!pass) %>% pull(!!var), ', ')}
-          Filter: {act$flt_desc}"),
-    table = act$result
-  )
-
-  invisible(act$result$pass)
-}
 
 #' @importFrom tidyselect vars_select
 #' @export

diff --git a/man/datacomp-expectations.Rd b/man/datacomp-expectations.Rd
diff --git a/man/expect-deprec.Rd b/man/expect-deprec.Rd
diff --git a/tests/testthat/test-expect_datacomp.R b/tests/testthat/test-expect_datacomp.R
@@ -1,11 +1,3 @@
-test_that("expect_similar", {
-  df1 <- data.frame(key = 1:1000, binom_50 = c(rep(0, 500), rep(1, 500)))
-  df2 <- data.frame(key = 1:1000, binom_51 = c(rep(0, 490), rep(1, 510)))
-  df3 <- data.frame(key = 1:1000, binom_80 = c(rep(0, 200), rep(1, 800)))
-  expect_success(expect_similar(binom_50, df2, binom_51, data = df1))
-  expect_failure(expect_similar(binom_50, df3, binom_80, data = df1))
-})
-
 test_that("expect_valmatch", {
   df1 <- data.frame(key = 1:10, a = rep(1:5, 2), b = rep(0:1, 5), c = rep(c("c", "C"), 5), d = 1:10)
   df2 <- data.frame(key = 1:10, a = rep(1:5, 2), b = rep(0:4, 2), c = rep(c("c", "C"), 5))