# \donttest{
# \dontshow{
options("jlmerclusterperm.nthreads" = 2)
-jlmerclusterperm_setup(verbose = FALSE)
+jlmerclusterperm_setup(cache_dir = tempdir(), verbose = FALSE)
julia_progress(show = FALSE)
# }
diff --git a/docs/search.json b/docs/search.json
index 24af39b..3a25921 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -1 +1 @@
-[{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/asynchronous-cpa.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Asynchronous CPA","text":"Minimal example data CPA specification: Example CPA (1000 simulations): details actual CPA skipped ’s focus vignette, example readme just nsim.","code":"library(jlmerclusterperm) jlmerclusterperm_setup(verbose = FALSE) chickweights <- ChickWeight chickweights$Time <- as.integer(factor(chickweights$Time)) chickweights_spec <- make_jlmer_spec( formula = weight ~ 1 + Diet, data = chickweights, subject = \"Chick\", time = \"Time\" ) set_rng_state(123L) CPA <- clusterpermute( chickweights_spec, threshold = 2.5, nsim = 1000, progress = FALSE )"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/asynchronous-cpa.html","id":"basic-idea","dir":"Articles","previous_headings":"","what":"Basic idea","title":"Asynchronous CPA","text":"basic idea behind asynchronous CPA strategy start background R process whose sole job send instructions CPA Julia. ’s asynchronous running CPA way block interactive R session. Since work done Julia anyways, parallelization virtually impact performance evaluating R code. future package allows asynchronous evaluation R code. ’s big package implementing complex topic. can read project futureverse just show bare minimum . advanced users, note ’re use parallelization asynchronous properties (non-blocking evaluation R code background process). recommended start multiple R processes running CPA Julia session shared already multithreaded (can spare cores, set options(\"jlmerclusterperm.nthreads\") calling jlmerclusterperm_setup()). Later updates jlmerclusterperm may wrap workflow principled way, now vignette serves minimally working example asynchronously running CPA.","code":""},{"path":[]},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/asynchronous-cpa.html","id":"setup-for-async-cpa","dir":"Articles","previous_headings":"Walkthrough","what":"Setup for async CPA","title":"Asynchronous CPA","text":"Three things order workflow: Load future package Initialize multisession future Grab options jlmerclusterperm package environment Please treat .jlmerclusterperm internal variable read-object - ’s unexported meant manipulated.","code":"library(future) plan(multisession) pkgopts <- as.list(jlmerclusterperm:::.jlmerclusterperm)"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/asynchronous-cpa.html","id":"creating-the-future-object","dir":"Articles","previous_headings":"Walkthrough","what":"Creating the future object","title":"Asynchronous CPA","text":"start creating special object class using future::future(). can simply pass clusterpermute() code first argument future(), slight modification ensure background process connects Julia session. template follows. First, make pkgopts object (defined ) available future (via globals argument). , inside future expression ensure background process shares Julia session (via list2env(...)). future object replicating 1000-simulation CPA look like following: future created, immediately starts executing code background process. important evaluate future object (f) directly. Instead, query future::resolved() - simply tells whether background evaluation completed : , use loop show background CPA non-blocking. evaluating R code interactive session simultaneously CPA running: reach end loop 2 seconds, approximately long CPA took complete (confirm next section). point future completed result available collection. Although R code just crude progress alert using Sys.sleep(), can freely evaluate R code except functions call Julia. can make plots, clean data, write analyses, etc., just don’t run another CPA one already running Julia process shared.","code":"# Not run future( { list2env(pkgopts, jlmerclusterperm:::.jlmerclusterperm) ## Your CPA code below ## }, globals = structure(TRUE, add = \"pkgopts\") ) f <- future( { list2env(as.list(pkgopts), jlmerclusterperm:::.jlmerclusterperm) ## Your CPA code below ## set_rng_state(123L) clusterpermute( chickweights_spec, threshold = 2.5, nsim = 1000 ) }, globals = structure(TRUE, add = \"pkgopts\") ) resolved(f) #> [1] FALSE i <- 0 while (!resolved(f)) { Sys.sleep(0.5) i <<- i + 1 cat(sprintf(\"Elapsed: %.01fs\", i / 2), \"\\n\") } #> Elapsed: 0.5s #> Elapsed: 1.0s #> Elapsed: 1.5s #> Elapsed: 2.0s resolved(f) #> [1] TRUE"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/asynchronous-cpa.html","id":"collecting-the-results","dir":"Articles","previous_headings":"Walkthrough","what":"Collecting the results","title":"Asynchronous CPA","text":"can collect output background process future::value(). first, double check make sure background process indeed finished evaluating: ’s confirmed, can use value() collect results future assign variable inspection: Note value() also prints messages encountered executing CPA code. see specify progress = FALSE clusterpermute() call passed future. purposes, just note adding times messages similar saw loop (2 seconds). output asynchronous CPA (CPA_async) identical initial CPA ran beginning (CPA) ran default seed 1 RNG counter value 123L: separate vignette covers Julia RNG. practice, may want use random seed (via set_rng_seed()) background CPA.","code":"resolved(f) #> [1] TRUE CPA_async <- value(f) #> Connecting to Julia TCP server at localhost:11984 ... #> ℹ Detecting empirical clusters and calculating cluster-mass statistics. #> ✔ Detecting empirical clusters and calculating cluster-mass statistics. [71ms] #> #> ℹ Sampling cluster-mass statistics from a bootstrapped null distribution. #> ✔ Sampling cluster-mass statistics from a bootstrapped null distribution. [1.7s] #> #> ℹ Calculating the probability of the observed cluster-mass statistics. #> ✔ Calculating the probability of the observed cluster-mass statistics. [36ms] #> identical(CPA_async, CPA) #> [1] TRUE"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"background","dir":"Articles","previous_headings":"","what":"Background","title":"de Carvalho et al. 2021","text":"data comes eye-tracking study de Carvalho, Dautriche, Fiévet, & Christophe (2021) “Toddlers exploit referential syntactic cues flexibly adapt interpretation novel verb meanings.” article reproduces expands Experiment 2 analysis, used CPA (t-tests) conduct pairwise comparison three conditions (relevant Figure 7 paper ). data comes mostly ready CPA straight study’s OSF repository. collapsible chunk contains code prepare data reproduce figure. Data cleaning Code Figure 7 plots data original study (close reproduce ). E2_data_agg dataframe following four columns: Subject: Unique identifier subjects Condition: -participant factor variable three levels (\"Intransitive\", \"RightDislocated\", \"Transitive\") Time: continuous measure time 0-8000ms 50ms intervals Prop: Proportion looks target (averaged across trials within condition, subject) following reproduction Figure 7 original paper: original study used pairwise comparisons analyze relationship three conditions. smaller figures (B, C, D) plot following relationships: (B) looks target \"Transitive\" compared \"RightDislocated\" (C) looks target \"RightDislocated\" compared \"Intransitive\" (D) looks target \"Transitive\" compared \"Intransitive\" rest vignette organized follows: First, replicate individual pairwise CPAs original paper. Next, consider parsimonious analysis reformulates research hypothesis choice regression contrast. Lastly, build model (2) consider issues around model diagnostics complexity (linear vs. logistic; fixed vs. mixed) context CPA Load package start Julia instance jlmerclusterperm_setup() proceeding.","code":"library(dplyr) library(forcats) filepath <- \"https://osf.io/download/var2h/?view_only=2a06746ba360446e9857df73307d21be\" E2_data_raw <- readr::read_delim(filepath) E2_data <- E2_data_raw %>% filter(away != 1) %>% mutate(Target = as.integer(bin_2P == 1)) %>% mutate(Condition = fct_recode( factor(Condition, levels = c(\"intrans\", \"pros\", \"trans\")), \"Intransitive\" = \"intrans\", \"RightDislocated\" = \"pros\", \"Transitive\" = \"trans\" )) %>% select(Subject, Trial, Condition, Time, Target) E2_data_agg <- E2_data %>% group_by(Subject, Condition, Time) %>% summarize(Prop = mean(Target), .groups = \"drop\") # Unaggregated trial-level data of 1s and 0s E2_data #> # A tibble: 69,123 × 5 #> Subject Trial Condition Time Target #> #> 1 200.asc 0 RightDislocated 0 0 #> 2 200.asc 0 RightDislocated 400 0 #> 3 200.asc 0 RightDislocated 450 1 #> 4 200.asc 0 RightDislocated 500 1 #> 5 200.asc 0 RightDislocated 550 1 #> 6 200.asc 0 RightDislocated 600 1 #> 7 200.asc 0 RightDislocated 650 1 #> 8 200.asc 0 RightDislocated 700 1 #> 9 200.asc 0 RightDislocated 750 1 #> 10 200.asc 0 RightDislocated 800 1 #> # ℹ 69,113 more rows # Aggregated subject-mean proportions data used in original study E2_data_agg #> # A tibble: 11,540 × 4 #> Subject Condition Time Prop #> #> 1 200.asc RightDislocated 0 0.5 #> 2 200.asc RightDislocated 50 1 #> 3 200.asc RightDislocated 100 1 #> 4 200.asc RightDislocated 150 1 #> 5 200.asc RightDislocated 200 1 #> 6 200.asc RightDislocated 250 0.5 #> 7 200.asc RightDislocated 300 0.5 #> 8 200.asc RightDislocated 350 0.4 #> 9 200.asc RightDislocated 400 0.286 #> 10 200.asc RightDislocated 450 0.429 #> # ℹ 11,530 more rows library(ggplot2) make_fig7_plot <- function(conditions) { E2_data %>% group_by(Condition, Time) %>% summarize( Prop = mean(Target), se = sqrt(var(Target) / n()), lower = Prop - se, upper = Prop + se, .groups = \"drop\" ) %>% filter(Condition %in% conditions) %>% ggplot(aes(Time, Prop, color = Condition, fill = Condition)) + geom_hline(aes(yintercept = .5)) + geom_ribbon( aes(ymin = lower, ymax = upper), alpha = .2, show.legend = FALSE, ) + geom_line(linewidth = 1) + scale_color_manual( aesthetics = c(\"color\", \"fill\"), values = setNames(scales::hue_pal()(3)[c(2, 1, 3)], levels(E2_data$Condition)) ) + scale_y_continuous(limits = c(.2, .8), oob = scales::oob_keep) + labs(y = NULL, x = NULL) + theme_minimal() + theme(axis.title.y = element_text(angle = 0, vjust = .5, hjust = 0)) } fig7_comparisons <- list( \"A\" = c(\"Intransitive\", \"RightDislocated\", \"Transitive\"), \"B\" = c(\"RightDislocated\", \"Transitive\"), \"C\" = c(\"Intransitive\", \"RightDislocated\"), \"D\" = c(\"Intransitive\", \"Transitive\") ) fig7 <- lapply(fig7_comparisons, make_fig7_plot) # Figure 7 combined plots library(patchwork) p_top <- fig7$A + scale_x_continuous(breaks = scales::breaks_width(1000)) + theme(legend.position = \"bottom\") p_bot <- (fig7$B + fig7$C + fig7$D) & guides(color = guide_none()) fig7_combined <- p_top / guide_area() / p_bot + plot_layout(guides = \"collect\", heights = c(3, .1, 1)) + plot_annotation(tag_levels = \"A\") E2_data_agg #> # A tibble: 11,540 × 4 #> Subject Condition Time Prop #> #> 1 200.asc RightDislocated 0 0.5 #> 2 200.asc RightDislocated 50 1 #> 3 200.asc RightDislocated 100 1 #> 4 200.asc RightDislocated 150 1 #> 5 200.asc RightDislocated 200 1 #> 6 200.asc RightDislocated 250 0.5 #> 7 200.asc RightDislocated 300 0.5 #> 8 200.asc RightDislocated 350 0.4 #> 9 200.asc RightDislocated 400 0.286 #> 10 200.asc RightDislocated 450 0.429 #> # ℹ 11,530 more rows fig7_combined library(jlmerclusterperm) jlmerclusterperm_setup(verbose = FALSE)"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"replicating-the-pairwise-cpas","dir":"Articles","previous_headings":"","what":"Replicating the pairwise CPAs","title":"de Carvalho et al. 2021","text":"three conditions experiment levels Condition factor variable: begin replication \"Transitive\" vs. \"RightDislocated\" comparison shown Figure 7B apply logic two pairwise comparisons 7C 7D.","code":"levels(E2_data_agg$Condition) #> [1] \"Intransitive\" \"RightDislocated\" \"Transitive\""},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"transitive-vs--rightdislocated","dir":"Articles","previous_headings":"Replicating the pairwise CPAs","what":"Transitive vs. RightDislocated","title":"de Carvalho et al. 2021","text":"reproduced Figure 7B compares \"Transitive\" \"RightDislocated\" conditions. paper (caption Figure 7; emphasis mine) reports: transitive right-dislocated conditions differed second repetition novel verbs (~6400 ms onset test trials end trials). now replicate analysis. First, prepare specification object. Two things note : express original t-test regression model Condition predictor drop third, unused condition data factor representation Next, fit global model sanity check structure model output. get one estimate ConditionTransitive positive coefficient, ’d expect: Finally, call clusterpermute() threshold = 1.5 (original study) simulate 100 permutations: detect largest empirical cluster spanning 6400-8000ms reported original paper. converges around p=0.02 separate 10,000-simulation run (shown ).","code":"fig7$B spec_7B <- make_jlmer_spec( formula = Prop ~ Condition, data = E2_data_agg %>% filter(Condition %in% c(\"Transitive\", \"RightDislocated\")) %>% mutate(Condition = droplevels(Condition)), # or forcats::fct_drop() subject = \"Subject\", time = \"Time\" ) spec_7B #> ── jlmer specification ───────────────────────────────────────── ── #> Formula: Prop ~ 1 + ConditionTransitive #> Predictors: #> Condition: ConditionTransitive #> Groupings: #> Subject: Subject #> Trial: #> Time: Time #> Data: #> # A tibble: 7,688 × 4 #> Prop ConditionTransitive Subject Time #> #> 1 0.5 0 200.asc 0 #> 2 1 0 200.asc 50 #> 3 1 0 200.asc 100 #> # ℹ 7,685 more rows #> ──────────────────────────────────────────────────────────────────────────────── jlmer(spec_7B) #> #> ────────────────────────────────────────────────────────────────────────────────── #> Coef. Std. Error z Pr(>|z|) Lower 95% Upper 95% #> ────────────────────────────────────────────────────────────────────────────────── #> (Intercept) 0.592999 0.00364424 162.72 <1e-99 0.585856 0.600141 #> ConditionTransitive 0.0569017 0.0051591 11.03 <1e-27 0.04679 0.0670133 #> ────────────────────────────────────────────────────────────────────────────────── clusterpermute(spec_7B, threshold = 1.5, nsim = 100L, progress = FALSE) #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 1.5) ──────────── ── #> ConditionTransitive (n = 100) #> Mean (SD): -2.674 (27.83) #> Coverage intervals: 95% [-61.740, 46.439] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 1.5) ──────────────────────── ── #> ConditionTransitive #> [300, 1150]: 40.643 (p=0.1287) #> [2700, 2750]: -4.068 (p=0.9010) #> [6150, 6200]: 3.249 (p=0.9505) #> [6400, 8000]: 88.742 (p=0.0198) #> ────────────────────────────────────────────────────────────────────────────────"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"rightdislocated-vs--intransitive","dir":"Articles","previous_headings":"Replicating the pairwise CPAs","what":"RightDislocated vs. Intransitive","title":"de Carvalho et al. 2021","text":"reproduced Figure 7C compares \"RightDislocated\" \"Intransitive\" conditions. paper reports: intransitive right-dislocated conditions differed first repetition novel verbs (2100 ms 3500 ms beginning test trials). repeat CPA procedure pairwise comparison: largest empirical cluster detect spans 2150-3650ms, slightly different cluster reported original paper (2100-3500ms). relatively less “extreme” cluster converges around p=0.05 10,000-simulation run.","code":"fig7$C spec_7C <- make_jlmer_spec( formula = Prop ~ Condition, data = E2_data_agg %>% filter(Condition %in% c(\"RightDislocated\", \"Intransitive\")) %>% mutate(Condition = droplevels(Condition)), subject = \"Subject\", time = \"Time\" ) clusterpermute(spec_7C, threshold = 1.5, nsim = 100L, progress = FALSE) #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 1.5) ──────────── ── #> ConditionRightDislocated (n = 100) #> Mean (SD): 0.619 (33.26) #> Coverage intervals: 95% [-63.734, 67.326] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 1.5) ──────────────────────── ── #> ConditionRightDislocated #> [150, 200]: 3.562 (p=0.9010) #> [2150, 3650]: 72.842 (p=0.0297) #> [4550, 5050]: 23.836 (p=0.3960) #> [5150, 5350]: 8.589 (p=0.7723) #> [5700, 5750]: 3.332 (p=0.9208) #> ────────────────────────────────────────────────────────────────────────────────"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"transitive-vs--intransitive","dir":"Articles","previous_headings":"Replicating the pairwise CPAs","what":"Transitive vs. Intransitive","title":"de Carvalho et al. 2021","text":"reproduced Figure 7D compares \"Transitive\" \"Intransitive\" conditions. paper reports: transitive intransitive conditions differed slightly offset first sentence test trials (4500 ms beginning test trials end trials). repeat CPA procedure pairwise comparison: largest empirical cluster detect spans 4600-8000ms, slightly different cluster reported original paper (4500-8000ms). converges around p=0.001 separate 10,000-simulation run.","code":"fig7$D spec_7D <- make_jlmer_spec( formula = Prop ~ Condition, data = E2_data_agg %>% filter(Condition %in% c(\"Transitive\", \"Intransitive\")) %>% mutate(Condition = droplevels(Condition)), subject = \"Subject\", time = \"Time\" ) clusterpermute(spec_7D, threshold = 1.5, nsim = 100L, progress = FALSE) #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 1.5) ──────────── ── #> ConditionTransitive (n = 100) #> Mean (SD): 2.024 (41.73) #> Coverage intervals: 95% [-70.510, 88.501] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 1.5) ──────────────────────── ── #> ConditionTransitive #> [300, 1000]: 25.928 (p=0.4356) #> [1300, 1400]: 4.808 (p=0.8713) #> [2800, 2900]: 5.356 (p=0.8218) #> [3000, 4300]: 52.638 (p=0.2079) #> [4600, 8000]: 172.635 (p=0.0198) #> ────────────────────────────────────────────────────────────────────────────────"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"expressed-as-regression-contrasts","dir":"Articles","previous_headings":"","what":"Expressed as regression contrasts","title":"de Carvalho et al. 2021","text":"now consider parsimonious analysis translates research hypothesis contrast coding avoid multiple testing. Specifically, exploit fact original paper specifies hypothesis Intransitive < RightDislocated < Transitive.","code":""},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"helmert-deviation-coding","dir":"Articles","previous_headings":"Expressed as regression contrasts","what":"Helmert (deviation) coding","title":"de Carvalho et al. 2021","text":"Testing ordinal relationship levels category require possible pairwise comparisons; instead, can approximated via Helmert coding (.k.. deviance coding) K levels expressed K-1 contrasts, contrast successively comparing level vs. average previous (typically lower) levels. Critically, Helmert contrasts orthogonal, can test simultaneously single model. data, test ordinal relationship Intransitive < RightDislocated < Transitive via two contrasts: \"RightDislocated\" vs. \"Intransitive\" \"Transitive\" vs. average \"RightDislocated\" \"Intransitive\" corresponding numerical coding following: practice, Helmert contrasts often standardized deviations expressed unit 1. also comparison \"RightDislocated\" vs. \"Intransitive\" expressed 1 unit RvsI comparison \"Transitive\" vs. average \"RightDislocated\" \"Intransitive\" expressed 1 unit TvsRI: contrast matrix, make new column original data called ConditionHelm copying Condition column, apply contrasts new column: Lastly, build new specification making use full data. , predict Prop ConditionHelm estimate effect contrasts single model. sanity check, fit global model - expect estimate contrast indeed find RvsI TvsRI output positive coefficients. suggests ordinal relationship three conditions hold least globally. Note coefficient RvsI contrast exactly pairwise model using spec_7C earlier, also compared \"RightDislocated\" \"Intransitive\": spec_helm two conditions RvsI coded -0.5 0.5, spec_7C’s treatment coding coded 0 1. Since express difference \"Intransitive\" \"RightDislocated\" unit +1, two coefficients equal magnitude sign.","code":"condition_helm <- contr.helmert(3) colnames(condition_helm) <- c(\"RvsI\", \"TvsRI\") condition_helm #> RvsI TvsRI #> 1 -1 -1 #> 2 1 -1 #> 3 0 2 condition_helm[, 1] <- condition_helm[, 1] / 2 condition_helm[, 2] <- condition_helm[, 2] / 3 condition_helm #> RvsI TvsRI #> 1 -0.5 -0.3333333 #> 2 0.5 -0.3333333 #> 3 0.0 0.6666667 E2_data_agg$ConditionHelm <- E2_data_agg$Condition contrasts(E2_data_agg$ConditionHelm) <- condition_helm # For pretty-printing as fractions MASS::fractions(contrasts(E2_data_agg$ConditionHelm)) #> RvsI TvsRI #> Intransitive -1/2 -1/3 #> RightDislocated 1/2 -1/3 #> Transitive 0 2/3 spec_helm <- make_jlmer_spec( formula = Prop ~ ConditionHelm, data = E2_data_agg, subject = \"Subject\", time = \"Time\" ) spec_helm #> ── jlmer specification ───────────────────────────────────────── ── #> Formula: Prop ~ 1 + ConditionHelmRvsI + ConditionHelmTvsRI #> Predictors: #> ConditionHelm: ConditionHelmRvsI, ConditionHelmTvsRI #> Groupings: #> Subject: Subject #> Trial: #> Time: Time #> Data: #> # A tibble: 11,540 × 5 #> Prop ConditionHelmRvsI ConditionHelmTvsRI Subject Time #> #> 1 0.5 0.5 -0.333 200.asc 0 #> 2 1 0.5 -0.333 200.asc 50 #> 3 1 0.5 -0.333 200.asc 100 #> # ℹ 11,537 more rows #> ──────────────────────────────────────────────────────────────────────────────── jlmer(spec_helm) %>% tidy(effects = \"fixed\") #> # A tibble: 3 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.591 0.00215 275. 0 #> 2 ConditionHelmRvsI 0.0640 0.00526 12.2 5.67e-34 #> 3 ConditionHelmTvsRI 0.0889 0.00457 19.5 2.02e-84 jlmer(spec_7C) %>% tidy(effects = \"fixed\") #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.529 0.00379 140. 0 #> 2 ConditionRightDislocated 0.0640 0.00536 11.9 8.30e-33"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"interpreting-cpa-results","dir":"Articles","previous_headings":"Expressed as regression contrasts","what":"Interpreting CPA results","title":"de Carvalho et al. 2021","text":"proceed normal clusterpermute() using new spec_helm: ’s summary find CPA_helm: RvsI clusters similar detected previous “partial” CPA spec_7C. slight differences cluster-mass due part due Helmert-coded model simultaneously estimating TvsRI contrast. importantly, CPA_helm detects largest cluster \"RightDislocated\" \"Intransitive\" (2150-3650ms). converges around p=0.05 10,000-simulation run. TvsRI clusters new, largest cluster predictor spans 5800ms-8000ms. converges around p=0.01 separate 10,000-simulation run. cluster effectively captures region relationship Transitive > (RightDislocated & Intransitive) emerges robust. Essentially, TvsRI comparison line \"Transitive\" invisible line runs lines \"Intransitive\" \"RightDislocated\". conclude visualizing clusters two Helmert-coded terms, annotated empirical data. “invisible” line RI Helmert coding drawn dashed line.","code":"reset_rng_state() CPA_helm <- clusterpermute(spec_helm, threshold = 1.5, nsim = 100L, progress = FALSE) CPA_helm #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 1.5) ──────────── ── #> ConditionHelmRvsI (n = 100) #> Mean (SD): 4.019 (28.80) #> Coverage intervals: 95% [-45.805, 67.827] #> ConditionHelmTvsRI (n = 100) #> Mean (SD): 1.213 (24.59) #> Coverage intervals: 95% [-50.218, 47.462] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 1.5) ──────────────────────── ── #> ConditionHelmRvsI #> [150, 200]: 3.654 (p=0.8911) #> [2150, 3650]: 71.141 (p=0.0297) #> [4550, 5350]: 36.043 (p=0.2079) #> [5700, 5750]: 3.328 (p=0.9010) #> ConditionHelmTvsRI #> [300, 1050]: 35.863 (p=0.1881) #> [3500, 3700]: 8.348 (p=0.6436) #> [3850, 4250]: 15.985 (p=0.4455) #> [4800, 5600]: 36.143 (p=0.1782) #> [5800, 8000]: 123.631 (p=0.0099) #> ──────────────────────────────────────────────────────────────────────────────── fig7A_v2 <- fig7$A + geom_line( aes(Time, Prop, linetype = \"RI\"), inherit.aes = FALSE, data = . %>% filter(Condition %in% c(\"RightDislocated\", \"Intransitive\")) %>% group_by(Time) %>% summarize(Prop = mean(Prop)), ) + scale_linetype_manual(values = \"41\", guide = guide_legend(\"ConditionHelm\")) + guides(x = guide_none(\"\")) clusters_annotation <- tidy(CPA_helm$empirical_clusters) %>% mutate(contrast = gsub(\".*([TR])vs([RI]+)\", \"\\\\1 vs. \\\\2\", predictor)) %>% ggplot(aes(y = fct_rev(contrast))) + geom_segment( aes( x = start, xend = end, yend = contrast, color = pvalue < 0.05 ), linewidth = 8 ) + scale_color_manual(values = c(\"grey70\", \"steelblue\")) + scale_y_discrete() + scale_x_continuous(n.breaks = 9, limits = range(E2_data_agg$Time)) + theme_minimal() + theme( axis.title = element_blank(), axis.text.y = element_text(face = \"bold\"), panel.border = element_rect(fill = NA), panel.grid.major.y = element_blank() ) fig7A_v2 / clusters_annotation + plot_layout(heights = c(4, 1))"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"model-complexity","dir":"Articles","previous_headings":"","what":"Model complexity","title":"de Carvalho et al. 2021","text":"wrap case study considering complex CPA uses logistic mixed effects models trial-level data fixations target (1s 0s).","code":""},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"logistic-mixed-model","dir":"Articles","previous_headings":"Model complexity","what":"Logistic mixed model","title":"de Carvalho et al. 2021","text":"un-aggregated trial-level data use section E2_data, comes initial data preparation code chunk: apply Helmert/deviation-coded contrast matrix: specification object E2_data, add trial = \"Trial\" predict Target instead Prop. also add -subject random intercept formula: , CPA family = \"binomial\": now visualize results CPA_helm_complex CPA_helm side side: results largely , except largest, significant cluster identified TvsRI extends much complex CPA simple CPA. examine difference next.","code":"E2_data #> # A tibble: 69,123 × 5 #> Subject Trial Condition Time Target #> #> 1 200.asc 0 RightDislocated 0 0 #> 2 200.asc 0 RightDislocated 400 0 #> 3 200.asc 0 RightDislocated 450 1 #> 4 200.asc 0 RightDislocated 500 1 #> 5 200.asc 0 RightDislocated 550 1 #> 6 200.asc 0 RightDislocated 600 1 #> 7 200.asc 0 RightDislocated 650 1 #> 8 200.asc 0 RightDislocated 700 1 #> 9 200.asc 0 RightDislocated 750 1 #> 10 200.asc 0 RightDislocated 800 1 #> # ℹ 69,113 more rows E2_data$ConditionHelm <- E2_data$Condition contrasts(E2_data$ConditionHelm) <- condition_helm MASS::fractions(contrasts(E2_data$ConditionHelm)) #> RvsI TvsRI #> Intransitive -1/2 -1/3 #> RightDislocated 1/2 -1/3 #> Transitive 0 2/3 spec_helm_complex <- make_jlmer_spec( formula = Target ~ ConditionHelm + (1 | Subject), data = E2_data, subject = \"Subject\", trial = \"Trial\", time = \"Time\" ) spec_helm_complex #> ── jlmer specification ───────────────────────────────────────── ── #> Formula: Target ~ 1 + ConditionHelmRvsI + ConditionHelmTvsRI + (1 | Subject) #> Predictors: #> ConditionHelm: ConditionHelmRvsI, ConditionHelmTvsRI #> Groupings: #> Subject: Subject #> Trial: Trial #> Time: Time #> Data: #> # A tibble: 69,123 × 6 #> Target ConditionHelmRvsI ConditionHelmTvsRI Subject Trial Time #> #> 1 0 0.5 -0.333 200.asc 0 0 #> 2 0 0.5 -0.333 200.asc 0 400 #> 3 1 0.5 -0.333 200.asc 0 450 #> # ℹ 69,120 more rows #> ──────────────────────────────────────────────────────────────────────────────── reset_rng_state() CPA_helm_complex <- clusterpermute( spec_helm_complex, family = \"binomial\", threshold = 1.5, nsim = 100, progress = FALSE ) CPA_helm_complex #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 1.5) ──────────── ── #> ConditionHelmRvsI (n = 100) #> Mean (SD): 1.791 (31.12) #> Coverage intervals: 95% [-60.956, 80.602] #> ConditionHelmTvsRI (n = 100) #> Mean (SD): 2.704 (28.90) #> Coverage intervals: 95% [-54.636, 62.925] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 1.5) ──────────────────────── ── #> ConditionHelmRvsI #> [2100, 3450]: 69.563 (p=0.0495) #> [4200, 5050]: 40.460 (p=0.1584) #> [5150, 5350]: 8.379 (p=0.7228) #> ConditionHelmTvsRI #> [350, 800]: 19.358 (p=0.4158) #> [900, 1050]: 7.519 (p=0.6535) #> [1250, 1350]: 4.980 (p=0.7228) #> [3100, 4450]: 56.770 (p=0.0792) #> [4650, 8000]: 181.165 (p=0.0099) #> ──────────────────────────────────────────────────────────────────────────────── clusters_annotation2 <- tidy(CPA_helm_complex$empirical_clusters) %>% mutate(contrast = gsub(\".*([TR])vs([RI]+)\", \"\\\\1 vs. \\\\2\", predictor)) %>% ggplot(aes(y = fct_rev(contrast))) + geom_segment( aes( x = start, xend = end, yend = contrast, color = pvalue < 0.05 ), linewidth = 8 ) + scale_color_manual(values = c(\"grey70\", \"steelblue\")) + scale_y_discrete() + scale_x_continuous(n.breaks = 9, limits = range(E2_data$Time)) + theme_minimal() + theme( legend.position = \"bottom\", axis.title = element_blank(), axis.text.y = element_text(face = \"bold\"), panel.border = element_rect(fill = NA), panel.grid.major.y = element_blank() ) clusters_annotation / clusters_annotation2 & guides(color = guide_none()) & plot_annotation(tag_levels = list(c(\"Simple\", \"Complex\")))"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/deCarvalho-et-al-2021.html","id":"comparison-of-cpas","dir":"Articles","previous_headings":"Model complexity","what":"Comparison of CPAs","title":"de Carvalho et al. 2021","text":"Looking timewise statistics computed simple vs. complex CPA tells us . figure plots information calls compute_timewise_statistics(): Whereas largest cluster starts emerge 5800ms CPA_helm, emerges much earlier 4650ms CPA_helm_complex. zoom region around 5800ms, see timewise statistics T vs. RI CPA_helm suddenly dip 1.5 threshold 5750ms: CPA better? existence dips spikes indicate problem, ’s consistent expectation simple CPA less robust variance. can inspect time-point model 5750ms two CPAs fitting : ’s standard way comparing goodness fit linear fixed-effects model logistic mixed-effects model fitted different data. complex model outperforms simple model classic metrics inspect glance(). doesn’t come surprise, differences largely driven number observations (nobs). Determining appropriate degree model complexity CPA beyond scope vignette, pursue discussion . Instead, conclude old wisdom: chain strong weakest link.","code":"# Compute the timewise statistics from the CPA specifications empirical_statistics <- bind_rows( tidy(compute_timewise_statistics(spec_helm)), tidy(compute_timewise_statistics(spec_helm_complex, family = \"binomial\")), .id = \"spec\" ) %>% mutate( CPA = c(\"Simple\", \"Complex\")[as.integer(spec)], Contrast = gsub(\".*([TR])vs([RI]+)\", \"\\\\1 vs. \\\\2\", predictor) ) # Time series plot of the statistics, with a line for each Helmert contrasts empirical_statistics_fig <- ggplot(empirical_statistics, aes(time, statistic)) + geom_line(aes(color = Contrast, linetype = CPA), linewidth = 1, alpha = .7) + geom_hline(yintercept = c(-1.5, 1.5), linetype = 2) + theme_classic() empirical_statistics_fig empirical_statistics_fig + geom_vline(xintercept = 5750) + coord_cartesian(xlim = 5750 + c(-500, 500), ylim = c(1, 2.5)) jlmer_simple_5750 <- to_jlmer( formula = Prop ~ ConditionHelm, data = E2_data_agg %>% filter(Time == 5750) ) jlmer_complex_5750 <- to_jlmer( formula = Target ~ ConditionHelm + (1 | Subject), family = \"binomial\", data = E2_data %>% filter(Time == 5750) ) glance(jlmer_simple_5750) #> # A tibble: 1 × 8 #> nobs df sigma logLik AIC BIC deviance df.residual #> #> 1 72 4 0.223 7.51 -7.02 2.08 3.42 68 glance(jlmer_complex_5750) #> # A tibble: 1 × 8 #> nobs df sigma logLik AIC BIC deviance df.residual #> #> 1 448 4 NA -301. 609. 626. 601. 444"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/eyetrackingR-comparison.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Comparison to eyetrackingR","text":"following collapsed code chunk runs code “Prerequisites” section original eyetrackingR vignette: pick response_time dataframe created. stage, data filtered track loss aggregated subject time bins, among things. fuller description experiment research hypothesis can found original eyetrackingR vignette. purposes exercise, note following minimal facts analysis: hypothesis Prop ~ Target. t-test used estimate effect Target Prop Target experiment condition. two-level factor (\"Animate\" vs. \"Inanimate\") Prop response variable. continuous measure proportion (0-1) looks area interest ParticipantName column identifies participant TimeBin represents time, binned 100ms windows Additionally, keep following columns necessary metadata eyetrackingR approach, though can dropped jlmerclusterperm approach. Time continuous, raw (.e., un-binned) measure time AOI identity area interest simplicity, subset response_time include columns interest cluster-based permutation analysis. Lastly, replicate plot original vignette:","code":"set.seed(42) library(\"Matrix\") library(\"lme4\") library(\"ggplot2\") library(\"eyetrackingR\") data(\"word_recognition\") data <- make_eyetrackingr_data(word_recognition, participant_column = \"ParticipantName\", trial_column = \"Trial\", time_column = \"TimeFromTrialOnset\", trackloss_column = \"TrackLoss\", aoi_columns = c(\"Animate\", \"Inanimate\"), treat_non_aoi_looks_as_missing = TRUE ) # subset to response window post word-onset response_window <- subset_by_window(data, window_start_time = 15500, window_end_time = 21000, rezero = FALSE ) # analyze amount of trackloss by subjects and trials (trackloss <- trackloss_analysis(data = response_window)) #> # A tibble: 155 × 6 #> ParticipantName Trial Samples TracklossSamples TracklossForTrial #> #> 1 ANCAT139 FamiliarBottle 330 161 0.488 #> 2 ANCAT18 FamiliarBird 330 74 0.224 #> 3 ANCAT18 FamiliarBottle 330 43 0.130 #> 4 ANCAT18 FamiliarCow 330 159 0.482 #> 5 ANCAT18 FamiliarDog 330 95 0.288 #> 6 ANCAT18 FamiliarHorse 330 165 0.5 #> 7 ANCAT18 FamiliarSpoon 330 95 0.288 #> 8 ANCAT22 FamiliarBird 330 14 0.0424 #> 9 ANCAT22 FamiliarBottle 330 8 0.0242 #> 10 ANCAT22 FamiliarDog 330 55 0.167 #> # ℹ 145 more rows #> # ℹ 1 more variable: TracklossForParticipant # remove trials with > 25% of trackloss response_window_clean <- clean_by_trackloss( data = response_window, trial_prop_thresh = .25 ) # create Target condition column response_window_clean$Target <- as.factor(ifelse(test = grepl(\"(Spoon|Bottle)\", response_window_clean$Trial), yes = \"Inanimate\", no = \"Animate\" )) response_time <- make_time_sequence_data(response_window_clean, time_bin_size = 100, predictor_columns = c(\"Target\"), aois = \"Animate\", summarize_by = \"ParticipantName\" ) response_time <- response_time %>% select( Prop, Target, TimeBin, ParticipantName, Time, AOI ) dplyr::glimpse(response_time) #> Rows: 2,970 #> Columns: 6 #> $ Prop NaN, NaN, NaN, NaN, NaN, NaN, 0, 0, 1, 1, 1, 1, 1, 1, … #> $ Target Animate, Animate, Animate, Animate, Animate, Animate, … #> $ TimeBin 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,… #> $ ParticipantName ANCAT18, ANCAT18, ANCAT18, ANCAT18, ANCAT18, ANCAT18, … #> $ Time 15500, 15600, 15700, 15800, 15900, 16000, 16100, 16200… #> $ AOI \"Animate\", \"Animate\", \"Animate\", \"Animate\", \"Animate\",… # visualize timecourse plot(response_time, predictor_column = \"Target\") + theme_light() + coord_cartesian(ylim = c(0, 1)) #> Warning: Removed 37 rows containing non-finite values (`stat_summary()`). #> Removed 37 rows containing non-finite values (`stat_summary()`)."},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/eyetrackingR-comparison.html","id":"cpa-in-eyetrackingr","dir":"Articles","previous_headings":"","what":"CPA in {eyetrackingR}","title":"Comparison to eyetrackingR","text":"Continuing response_time dataframe, jump “Bootstrapped cluster-based permutation analysis” original vignette. original vignette uses following heuristic choosing threshold: choose threshold outside scope article - simply use value approaches. eyetrackingR, CPA conducted two steps: Prepare data CPA make_time_cluster_data(): step computes timewise statistics data identifies empirical clusters, can inspected plot() summary() method: Run permutation test cluster data analyze_time_clusters(): simulates null distribution cluster-mass statistics. output, printed, essentially output summary(df_timeclust) p-values (Probability column) null distribution (extremety empirical clusters context) can visualized plot() method:","code":"num_sub <- length(unique((response_window_clean$ParticipantName))) threshold_t <- qt(p = 1 - .05 / 2, df = num_sub - 1) threshold_t #> [1] 2.055529 df_timeclust <- make_time_cluster_data(response_time, test = \"t.test\", paired = TRUE, predictor_column = \"Target\", threshold = threshold_t ) plot(df_timeclust) summary(df_timeclust) #> Test Type: t.test #> Predictor: Target #> Formula: Pair(Prop[Target == \"Animate\"], Prop[Target == \"Inanimate\"]) ~ 1 #> Summary of Clusters ====== #> Cluster Direction SumStatistic StartTime EndTime #> 1 1 Positive 132.29900 16100 19300 #> 2 2 Positive 42.31067 19400 20800 system.time({ clust_analysis <- analyze_time_clusters( df_timeclust, within_subj = TRUE, paired = TRUE, samples = 150, quiet = TRUE ) }) #> user system elapsed #> 19.48 0.14 36.24 clust_analysis #> Test Type: t.test #> Predictor: Target #> Formula: Pair(Prop[Target == \"Animate\"], Prop[Target == \"Inanimate\"]) ~ 1 #> Null Distribution ====== #> Mean: -1.4815 #> 2.5%: -31.1268 #> 97.5%: 17.9841 #> Summary of Clusters ====== #> Cluster Direction SumStatistic StartTime EndTime Probability #> 1 1 Positive 132.29900 16100 19300 0.000000000 #> 2 2 Positive 42.31067 19400 20800 0.006666667 plot(clust_analysis)"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/eyetrackingR-comparison.html","id":"cpa-in-jlmerclusterperm","dir":"Articles","previous_headings":"","what":"CPA in {jlmerclusterperm}","title":"Comparison to eyetrackingR","text":"First, set jlmerclusterperm. comparability, use single thread: {jlmerclusterpm}, CPA can also conducted two steps: Make specification object make_jlmer_spec() first specify formula, data, grouping columns data. Instead paired t-test, specify linear model formula Prop ~ Target. output prepares data CPA subsetting applying contrast coding schemes, among things: Run permutation test specification using clusterpermute() kinds information returned: different pieces information available inspection using tidy(), returns dataframe underlying summary: contrast eyetrackingR, jlmerclusterperm provide custom plot() method. However, kinds plots can replicated lines ggplot code: Note sign clusters (originally) flipped two approaches, trivial difference simply reflects default choice baseline t.text() vs. regression contrasts. can addressed setting contrasts prior running CPA (see related issue another article). Lastly, side--side comparison results:","code":"library(jlmerclusterperm) options(\"jlmerclusterperm.nthreads\" = 1) system.time({ # Note the overhead for initializing the Julia session jlmerclusterperm_setup() }) #> user system elapsed #> 0.04 0.01 26.13 spec <- make_jlmer_spec( formula = Prop ~ Target, data = response_time, subject = \"ParticipantName\", trial = \"Target\", time = \"TimeBin\" ) #> ! Dropping 37 rows with missing values. spec #> ── jlmer specification ───────────────────────────────────────── ── #> Formula: Prop ~ 1 + TargetInanimate #> Predictors: #> Target: TargetInanimate #> Groupings: #> Subject: ParticipantName #> Trial: Target #> Time: TimeBin #> Data: #> # A tibble: 2,933 × 5 #> Prop TargetInanimate ParticipantName Target TimeBin #> #> 1 0 0 ANCAT18 Animate 161 #> 2 0 0 ANCAT18 Animate 162 #> 3 1 0 ANCAT18 Animate 163 #> # ℹ 2,930 more rows #> ──────────────────────────────────────────────────────────────────────────────── system.time({ cpa <- clusterpermute(spec, threshold = threshold_t, nsim = 150) }) #> user system elapsed #> 0.05 0.00 16.29 cpa #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 2.05552943864287) ─────────────────────── #> TargetInanimate (n = 150) #> Mean (SD): -1.009 (11.03) #> Coverage intervals: 95% [-25.856, 27.183] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 2.05552943864287) ─────────── ── #> TargetInanimate #> [161, 191]: -130.192 (p=0.0066) #> [194, 209]: -45.791 (p=0.0066) #> ──────────────────────────────────────────────────────────────────────────────── null_distribution <- tidy(cpa$null_cluster_dists) null_distribution #> # A tibble: 150 × 6 #> predictor start end length sum_statistic sim #> #> 1 TargetInanimate NA NA NA 0 001 #> 2 TargetInanimate 208 209 2 5.09 002 #> 3 TargetInanimate 197 200 4 -8.94 003 #> 4 TargetInanimate 205 206 2 -4.32 004 #> 5 TargetInanimate NA NA NA 0 005 #> 6 TargetInanimate 193 196 4 -10.8 006 #> 7 TargetInanimate 187 193 7 19.3 007 #> 8 TargetInanimate 177 180 4 9.34 008 #> 9 TargetInanimate 159 161 3 -7.57 009 #> 10 TargetInanimate NA NA NA 0 010 #> # ℹ 140 more rows empirical_clusters <- tidy(cpa$empirical_clusters) empirical_clusters #> # A tibble: 2 × 7 #> predictor id start end length sum_statistic pvalue #> #> 1 TargetInanimate 1 161 191 31 -130. 0.00662 #> 2 TargetInanimate 2 194 209 16 -45.8 0.00662 # We flip the sign of the statistic here for visual equivalence cpa_plot <- null_distribution %>% ggplot(aes(-sum_statistic)) + geom_histogram( aes(y = after_stat(density)), binwidth = 3 ) + geom_density() + geom_vline( aes(xintercept = -sum_statistic, color = id), data = empirical_clusters, linetype = 2, linewidth = 1 ) cpa_plot library(patchwork) plot(clust_analysis) + cpa_plot"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/eyetrackingR-comparison.html","id":"performance","dir":"Articles","previous_headings":"","what":"Performance","title":"Comparison to eyetrackingR","text":"jlmerclusterperm scales easily higher number simulations, especially fixed-effects models: eyetrackingR option parallelization, limited support platform dependent. contrast, jlmerclusterperm leverages Julia’s built-multi-threading support performant consistent: 10,000 simulations 7 threads:","code":"system.time({ clusterpermute(spec, threshold = threshold_t, nsim = 1000) }) #> user system elapsed #> 0.00 0.04 12.34 options(\"jlmerclusterperm.nthreads\" = 7) system.time({ jlmerclusterperm_setup(restart = TRUE, verbose = FALSE) }) #> user system elapsed #> 0.06 0.01 22.89 system.time({ clusterpermute(spec, threshold = threshold_t, nsim = 10000) }) #> user system elapsed #> 0.10 0.16 52.57"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"background","dir":"Articles","previous_headings":"","what":"Background","title":"Garrison et al. 2020","text":"data comes eye-tracking study Garrison, Baudet, Breitfield, Aberman, & Bergelson (2020) “Familiarity plays small role noun comprehension 12-18 months”, way peekbankr corpus package. original study used growth curve model analyze children’s looks object familiar (“”) vs. unfamiliar (“”) different ages (median-split “younger” “older” groups). replicating specific analysis . Instead, follow Katie Von Holzen’s tutorial used dataset conduct simpler, cluster-based permutation analysis (CPA) differences two age groups proportion looks target. tutorial used eyetrackingR package data preparation CPA, vignette use dplyr jlmerclusterperm tasks, respectively. following set-code reads data prepares analysis: data ts_data_agg ’ll use replicate tutorial just 4 columns relevant cluster-permutation analysis: subject_id: Unique identifier subjects age: -participant factor variable (\"Younger\", \"Older\") time: continuous measure time 0-3975ms 25ms intervals prop: -subject proportion looks target object following replicates figure tutorial introduces cluster-permutation analysis:","code":"library(dplyr) remotes::install_github(\"langcog/peekbankr\", quiet = TRUE) library(peekbankr) # Load data aoi_timepoints <- get_aoi_timepoints(dataset_name = \"garrison_bergelson_2020\") administrations <- get_administrations(dataset_name = \"garrison_bergelson_2020\") # Pre-processing ## Filter for age groups of interest ps_data <- aoi_timepoints %>% left_join(administrations, by = \"administration_id\") %>% filter(!between(age, 14, 16)) %>% mutate(age_binned = ifelse(age < 14, \"Younger\", \"Older\")) ## Filter for time window of interest ts_window <- ps_data %>% filter(t_norm >= 0, t_norm < 4000) ## Identify trials to exclude (trackloss in >25% of samples) to_exclude <- ts_window %>% group_by(subject_id, trial_id) %>% summarize(prop = mean(aoi == \"missing\"), .groups = \"drop\") %>% filter(prop > 0.25) ## Exclude disqualifying trials and keep information relevant for CPA ts_data <- ts_window %>% anti_join(to_exclude, by = c(\"subject_id\", \"trial_id\")) %>% mutate( target = aoi == \"target\", missing = aoi == \"missing\", age = factor(age_binned, c(\"Younger\", \"Older\")) ) %>% select(subject_id, age, trial_id, time = t_norm, target, missing) # Data of subject mean proportions of fixations to target ts_data_agg <- ts_data %>% group_by(subject_id, age, time) %>% summarize(prop = sum(target) / sum(!missing), .groups = \"drop\") # Data of trial-level fixations to target (0s and 1s) ts_data_trials <- ts_data %>% filter(!missing) %>% mutate(target = as.integer(target)) %>% select(subject_id, age, trial_id, time, target) ts_data_agg #> # A tibble: 2,560 × 4 #> subject_id age time prop #> #> 1 1413 Older 0 0.7 #> 2 1413 Older 25 0.733 #> 3 1413 Older 50 0.7 #> 4 1413 Older 75 0.7 #> 5 1413 Older 100 0.677 #> 6 1413 Older 125 0.677 #> 7 1413 Older 150 0.645 #> 8 1413 Older 175 0.645 #> 9 1413 Older 200 0.581 #> 10 1413 Older 225 0.548 #> # ℹ 2,550 more rows library(ggplot2) tutorial_fig <- ggplot(ts_data_agg, aes(x = time, y = prop, color = age)) + stat_smooth(method = \"loess\", span = 0.1, se = TRUE, aes(fill = age), alpha = 0.3) + theme_bw() + labs(y = \"Prop. of Target Looks\") + geom_hline(yintercept = .5, color = \"black\") + geom_vline(xintercept = 0, color = \"black\") + coord_cartesian(ylim = c(0, 1)) + theme( legend.position = \"bottom\", legend.title = element_blank() ) tutorial_fig"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"outline","dir":"Articles","previous_headings":"","what":"Outline","title":"Garrison et al. 2020","text":"case study vignette showcases four features CPA jlmerclusterperm: Prepping data CPA using make_jlmer_spec() CPA one go clusterpermute() modular, step--step approach CPA Using logistic regression trial-level data 1s 0s Load package start Julia instance jlmerclusterperm_setup() proceeding.","code":"library(jlmerclusterperm) jlmerclusterperm_setup(verbose = FALSE)"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"a-prepping-a-specification-object","dir":"Articles","previous_headings":"","what":"A) Prepping a specification object","title":"Garrison et al. 2020","text":"Conducting cluster-based permutation analysis jlmerclusterm starts creating special specification object, compiles information necessary CPA. function make_jlmer_spec() returns specification object class minimally requires two arguments: formula: R regression model formula data: data frame example, model prop using age predictor ts_data_agg data: printed display includes four pieces information specification object: Formula: model formula Julia syntax. , looks similar formula provided, contrasts “spelled ” (age becomes ageOlder) Predictors: list predictors form ( original : expanded ). case, one predictor age expanded ageOlder ’s treatment coded “Younger” reference level. Groupings: specified call make_jlmer_spec(). empty simple_spec provided formula data. Data: transformed dataframe whose columns terms expanded formula. Note default dummy coding applied discrete variable age - now ageOlder 0 “Younger” 1 “Older”. start bare specification object , lacks parts CPA, can use jlmerclusterperm’s simple interface Julia regression models. function jlmer() takes specification object input returns Julia regression model. model represents “global” model fitted entire data, collapsing across time. equivalent lm() model specifications: raise tangent jlmer() recommend sanity-checking design output global model prior using model specifications CPA. regression models fit Julia “hood”, want make sure output comparable expect R. global model can also tell upper bound model complexity. example, global model singular fit, time-wise models fitted subsets data likely . Functions CPA also take object require information time (calculate time-wise statistics) subject/trial (bootstrapped permutation). ts_data_agg data trial-level information summarized subject, just leave . trial argument can omitted object time sample/bin uniquely identified subject. Otherwise, trial column data uniquely identifies time values within subject (example, column items counterbalanced designed participant sees every item one variants).","code":"simple_spec <- make_jlmer_spec(formula = prop ~ 1 + age, data = ts_data_agg) simple_spec #> ── jlmer specification ───────────────────────────────────────── ── #> Formula: prop ~ 1 + ageOlder #> Predictors: #> age: ageOlder #> Data: #> # A tibble: 2,560 × 2 #> prop ageOlder #> #> 1 0.7 1 #> 2 0.733 1 #> 3 0.7 1 #> # ℹ 2,557 more rows #> ──────────────────────────────────────────────────────────────────────────────── global_jmod <- jlmer(simple_spec) tidy(global_jmod) #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.523 0.00387 135. 0 #> 2 ageOlder 0.0984 0.00548 18.0 3.45e-72 library(broom) # for the `tidy()` method for `lm()` global_rmod <- lm(formula = prop ~ 1 + age, data = ts_data_agg) tidy(global_rmod) #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.523 0.00387 135. 0 #> 2 ageOlder 0.0984 0.00548 18.0 4.45e-68 ts_data_agg_spec <- make_jlmer_spec( formula = prop ~ 1 + age, data = ts_data_agg, subject = \"subject_id\", time = \"time\", trial = NULL # This is the default value ) ts_data_agg_spec #> ── jlmer specification ───────────────────────────────────────── ── #> Formula: prop ~ 1 + ageOlder #> Predictors: #> age: ageOlder #> Groupings: #> Subject: subject_id #> Trial: #> Time: time #> Data: #> # A tibble: 2,560 × 4 #> prop ageOlder subject_id time #> #> 1 0.7 1 1413 0 #> 2 0.733 1 1413 25 #> 3 0.7 1 1413 50 #> # ℹ 2,557 more rows #> ────────────────────────────────────────────────────────────────────────────────"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"b-cpa-with-clusterpermute","dir":"Articles","previous_headings":"","what":"B) CPA with clusterpermute()","title":"Garrison et al. 2020","text":"clusterpermute() function runs CPA one go, using information object. addition specification object, must also supply (t-value) threshold number simulations run: result list two elements: object holds information distribution bootstrapped cluster-mass statistics random permutations data: object holds per-predictor information clusters detected, p-values significance testing null: output explained next section, presented purposes simply note results similar reported Katie Von Holzen’s tutorial, copied . minor numerical differences due stochastic nature permutation test ’re comparing t-tests R regression models Julia.","code":"CPA_agg <- clusterpermute( ts_data_agg_spec, threshold = 2, nsim = 100, progress = FALSE # Suppress printing of progress for vignette ) CPA_agg #> $null_cluster_dists #> ── Null cluster-mass distribution (t > 2) ────────────── ── #> ageOlder (n = 100) #> Mean (SD): -0.844 (29.71) #> Coverage intervals: 95% [-46.664, 71.698] #> ──────────────────────────────────────────────────────────────────────────────── #> #> $empirical_clusters #> ── Empirical clusters (t > 2) ────────────────────────── ── #> ageOlder #> [850, 1525]: 74.724 (p=0.0396) #> [2675, 2775]: 11.017 (p=0.4356) #> [3500, 3900]: 46.295 (p=0.0990) #> ──────────────────────────────────────────────────────────────────────────────── #> ## Summary of Clusters ====== #> ## Cluster Direction SumStatistic StartTime EndTime Probability #> ## 1 1 Positive 74.723687 850 1550 0.051 #> ## 2 2 Positive 6.905622 2725 2800 0.405 #> ## 3 3 Positive 46.294644 3500 3925 0.111"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"c-a-step-by-step-approach","dir":"Articles","previous_headings":"","what":"C) A step-by-step approach","title":"Garrison et al. 2020","text":"clusterpermute() function showcased consists five parts, also standalone functions package: compute_timewise_statistics() extract_empirical_clusters() permute_timewise_statistics() extract_null_cluster_dists() calculate_clusters_pvalues() functions correspond algorithmic steps statistical test. clusterpermute() obviates need think individual components, knowing gives greater flexibility procedure. section walks CPA step--step using functions.","code":""},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"empirical-clusters","dir":"Articles","previous_headings":"C) A step-by-step approach","what":"1) Empirical clusters","title":"Garrison et al. 2020","text":"compute_timewise_statistics() function takes object returns timewise statistics form matrix row predictor column time point: example see 25ms, t-value age -0.127. consistent figure tutorial replicated show two lines mostly overlapping. Just demonstration purposes, use to_jlmer() quickly fit model using formula just data 25ms. see t-value age model exactly saw : construct empirical clusters timewise statistics. Clusters defined continuous span statistics sign whose absolute value passes certain threshold. extract_empirical_clusters() function takes timewise statistics threshold just : t-value threshold 2, detect three clusters data main effect age (ageOlder). numbers brackets show span cluster number right show sum t-values cluster (.k.. cluster-mass statistic). can collect object data frame tidy(): add figure : point know whether size clusters observe (un)likely emerge chance. calculate probability, need simulate null distribution.","code":"empirical_statistics <- compute_timewise_statistics(ts_data_agg_spec) dim(empirical_statistics) #> [1] 1 160 empirical_statistics[1, 1:5, drop = FALSE] # First 5 time points #> Time #> Predictor 0 25 50 75 100 #> ageOlder 0.1166025 -0.1273179 -0.2387778 -0.4022095 -0.2804397 to_jlmer(prop ~ 1 + age, data = filter(ts_data_agg, time == 25)) %>% tidy() %>% filter(term == \"ageOlder\") %>% pull(statistic) #> [1] -0.1273179 empirical_clusters <- extract_empirical_clusters(empirical_statistics, threshold = 2) empirical_clusters #> ── Empirical clusters (t > 2) ────────────────────────── ── #> ageOlder #> [850, 1525]: 74.724 #> [2675, 2775]: 11.017 #> [3500, 3900]: 46.295 #> ──────────────────────────────────────────────────────────────────────────────── empirical_clusters_df <- tidy(empirical_clusters) empirical_clusters_df #> # A tibble: 3 × 6 #> predictor id start end length sum_statistic #> #> 1 ageOlder 1 850 1525 28 74.7 #> 2 ageOlder 2 2675 2775 5 11.0 #> 3 ageOlder 3 3500 3900 17 46.3 tutorial_fig + geom_segment( aes(x = start, xend = end, y = -Inf, yend = -Inf), linewidth = 10, color = \"steelblue\", inherit.aes = FALSE, data = empirical_clusters_df ) + geom_text( aes( y = -Inf, x = start + (end - start) / 2, label = paste(\"Cluster\", id) ), vjust = -2, inherit.aes = FALSE, data = empirical_clusters_df )"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"null-distribution","dir":"Articles","previous_headings":"C) A step-by-step approach","what":"2) Null distribution","title":"Garrison et al. 2020","text":"permute_timewise_statistics() function takes specification object number bootstrapped permutation data simulate (via nsim argument). returned 3-dimensional array like output compute_timewise_statistics() except additional dimension representing simulation: permutation algorithm preserves grouping structures data specified specification object. example, testing -subject predictor like age, age groups participants random swapped preserving temporal structure data. null distribution collection largest cluster-mass statistic simulated data. clusters detected simulation, contributes cluster-mass zero null. use extract_null_cluster_dists() construct null distribution, using threshold value 2 allow comparison empirical clusters: printed, returned object shows summary statistics. can use tidy() collect samples null data frame: null distribution can plotted like : Now left test probability observing cluster-mass statistics extreme detected clusters empirical_clusters, using null_cluster_dists reference.","code":"null_statistics <- permute_timewise_statistics(ts_data_agg_spec, nsim = 100) # simulation by time by predictor dim(null_statistics) #> [1] 100 160 1 # First 5 time points of the first three simulations null_statistics[1:3, 1:5, 1, drop = FALSE] #> , , Predictor = ageOlder #> #> Time #> Sim 0 25 50 75 100 #> 001 -0.1037696 0.1387838 0.4583545 0.4337538 0.69136613 #> 002 1.4358330 1.2745381 1.2985888 0.9111359 1.16401311 #> 003 0.2545470 0.3885999 0.3374939 0.3200497 0.05603617 null_cluster_dists <- extract_null_cluster_dists(null_statistics, threshold = 2) null_cluster_dists #> ── Null cluster-mass distribution (t > 2) ────────────── ── #> ageOlder (n = 100) #> Mean (SD): -6.150 (40.29) #> Coverage intervals: 95% [-114.564, 74.425] #> ──────────────────────────────────────────────────────────────────────────────── null_cluster_dists_df <- tidy(null_cluster_dists) null_cluster_dists_df #> # A tibble: 100 × 6 #> predictor start end length sum_statistic sim #> #> 1 ageOlder NA NA NA 0 001 #> 2 ageOlder NA NA NA 0 002 #> 3 ageOlder 2175 2225 3 -6.37 003 #> 4 ageOlder NA NA NA 0 004 #> 5 ageOlder 3425 3475 3 -6.67 005 #> 6 ageOlder NA NA NA 0 006 #> 7 ageOlder NA NA NA 0 007 #> 8 ageOlder NA NA NA 0 008 #> 9 ageOlder NA NA NA 0 009 #> 10 ageOlder 1525 2475 39 -114. 010 #> # ℹ 90 more rows plot( density(null_cluster_dists_df$sum_statistic), main = \"Null distribution of cluster-mass statistics\" )"},{"path":"https://yjunechoe.github.io/jlmerclusterperm/articles/Garrison-et-al-2020.html","id":"significance-test","dir":"Articles","previous_headings":"C) A step-by-step approach","what":"3) Significance test","title":"Garrison et al. 2020","text":"calculate_clusters_pvalues() function computes p-value empirical cluster returns augmented