diff --git a/.github/workflows/cron.yaml b/.github/workflows/cron.yaml new file mode 100644 index 00000000..3b2a0dd7 --- /dev/null +++ b/.github/workflows/cron.yaml @@ -0,0 +1,31 @@ + + + +on: + schedule: + - cron: '0 4 1,15 * *' + workflow_dispatch: + +name: CRON + +jobs: + r-cmd: + name: R CMD Check 🧬 + uses: insightsengineering/r.pkg.template/.github/workflows/build-check-install.yaml@main + secrets: + REPO_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + additional-env-vars: | + CMDSTAN=/root/.cmdstan + CMDSTAN_PATH=/root/.cmdstan + CMDSTANR_NO_VER_CHECK=true + JMPOST_CACHE_DIR=${{ github.workspace }}/.cache + JMPOST_FULL_TEST=TRUE + additional-caches: | + ${{ github.workspace }}/.cache + + + + + + diff --git a/R/DataLongitudinal.R b/R/DataLongitudinal.R index 13b155b9..d68f1caa 100644 --- a/R/DataLongitudinal.R +++ b/R/DataLongitudinal.R @@ -92,11 +92,11 @@ harmonise.DataLongitudinal <- function(object, subject_var, subject_ord, ...) { ) assert_that( all(data[[subject_var]] %in% subject_ord), - msg = "There are subjects `longitudinal` that are not present in `subjects`" + msg = "There are subjects in `longitudinal` that are not present in `subjects`" ) assert_that( all(subject_ord %in% data[[subject_var]]), - msg = "There are subjects `subjects` that are not present in `longitudinal`" + msg = "There are subjects in `subjects` that are not present in `longitudinal`" ) data[[subject_var]] <- factor( as.character(data[[subject_var]]), @@ -169,7 +169,7 @@ as_stan_list.DataLongitudinal <- function(object, subject_var, ...) { assert_factor(df[[subject_var]]) mat_sld_index <- stats::model.matrix( - stats::as.formula(paste("~", subject_var)), + stats::as.formula(paste("~ -1 + ", subject_var)), data = df ) |> t() diff --git a/R/DataSurvival.R b/R/DataSurvival.R index be0945bb..f4d1c16d 100644 --- a/R/DataSurvival.R +++ b/R/DataSurvival.R @@ -159,11 +159,11 @@ harmonise.DataSurvival <- function(object, subject_var, subject_ord, ...) { ) assert_that( all(data[[subject_var]] %in% subject_ord), - msg = "There are subjects `survival` that are not present in `subjects`" + msg = "There are subjects in `survival` that are not present in `subjects`" ) assert_that( all(subject_ord %in% data[[subject_var]]), - msg = "There are subjects `subjects` that are not present in `survival`" + msg = "There are subjects in `subjects` that are not present in `survival`" ) data[[subject_var]] <- factor( diff --git a/R/JointModel.R b/R/JointModel.R index 205e395e..20d634de 100755 --- a/R/JointModel.R +++ b/R/JointModel.R @@ -121,7 +121,9 @@ write_stan.JointModel <- function(object, file_path) { #' @rdname compileStanModel #' @export compileStanModel.JointModel <- function(object) { - x <- compileStanModel(object@stan) + stanObject <- object@stan + stanObject@generated_quantities <- "" + x <- compileStanModel(stanObject) invisible(x) } @@ -161,9 +163,7 @@ sampleStanModel.JointModel <- function(object, data, ...) { args[["init"]] <- function() values_initial_expanded } - stanObject <- object@stan - stanObject@generated_quantities <- "" - model <- compileStanModel(stanObject) + model <- compileStanModel(object) results <- do.call(model$sample, args) .JointModelSamples( diff --git a/R/JointModelSamples.R b/R/JointModelSamples.R index b4038513..ac932e96 100644 --- a/R/JointModelSamples.R +++ b/R/JointModelSamples.R @@ -76,7 +76,7 @@ as_print_string.JointModelSamples <- function(object, indent = 1, ...) { sizes <- vapply( object@results$metadata()[["stan_variable_sizes"]], \(x) { - if (length(x) == 1 & x == 1) return("") + if (length(x) == 1 && x == 1) return("") paste0("[", paste(x, collapse = ", "), "]") }, character(1) diff --git a/R/LongitudinalGSF.R b/R/LongitudinalGSF.R index 11caaf4c..f0dd0e97 100755 --- a/R/LongitudinalGSF.R +++ b/R/LongitudinalGSF.R @@ -26,47 +26,95 @@ NULL #' @param mu_bsld (`Prior`)\cr for the mean baseline value `mu_bsld`. #' @param mu_ks (`Prior`)\cr for the mean shrinkage rate `mu_ks`. #' @param mu_kg (`Prior`)\cr for the mean growth rate `mu_kg`. -#' @param mu_phi (`Prior`)\cr for the mean shrinkage proportion `mu_phi`. +#' #' @param omega_bsld (`Prior`)\cr for the baseline value standard deviation `omega_bsld`. #' @param omega_ks (`Prior`)\cr for the shrinkage rate standard deviation `omega_ks`. #' @param omega_kg (`Prior`)\cr for the growth rate standard deviation `omega_kg`. -#' @param omega_phi (`Prior`)\cr for the shrinkage proportion standard deviation `omega_phi`. +#' #' @param sigma (`Prior`)\cr for the variance of the longitudinal values `sigma`. #' +#' @param a_phi (`Prior`)\cr for the alpha parameter for the fraction of cells that respond to treatment. +#' @param b_phi (`Prior`)\cr for the beta parameter for the fraction of cells that respond to treatment. +#' +#' @param psi_bsld (`Prior`)\cr for the baseline value random effect `psi_bsld`. Only used in the +#' centered parameterization to set the initial value. +#' @param psi_ks (`Prior`)\cr for the shrinkage rate random effect `psi_ks`. Only used in the +#' centered parameterization to set the initial value. +#' @param psi_kg (`Prior`)\cr for the growth rate random effect `psi_kg`. Only used in the +#' centered parameterization to set the initial value. +#' @param psi_phi (`Prior`)\cr for the shrinkage proportion random effect `psi_phi`. Only used in the +#' centered parameterization to set the initial value. +#' +#' @param centered (`logical`)\cr whether to use the centered parameterization. +#' #' @export LongitudinalGSF <- function( - mu_bsld = prior_lognormal(log(55), 5, init = 55), - mu_ks = prior_lognormal(log(0.1), 0.5, init = 0.1), - mu_kg = prior_lognormal(log(0.1), 1, init = 0.1), - mu_phi = prior_beta(2, 8, init = 0.2), - omega_bsld = prior_lognormal(log(0.1), 1, init = 0.1), - omega_ks = prior_lognormal(log(0.1), 1, init = 0.1), - omega_kg = prior_lognormal(log(0.1), 1, init = 0.1), - omega_phi = prior_lognormal(log(0.1), 1, init = 0.1), - sigma = prior_lognormal(log(0.1), 0.8, init = 0.1) + + mu_bsld = prior_normal(log(60), 1, init = 60), + mu_ks = prior_normal(log(0.5), 1, init = 0.5), + mu_kg = prior_normal(log(0.3), 1, init = 0.3), + + omega_bsld = prior_lognormal(log(0.2), 1, init = 0.2), + omega_ks = prior_lognormal(log(0.2), 1, init = 0.2), + omega_kg = prior_lognormal(log(0.2), 1, init = 0.2), + + a_phi = prior_lognormal(log(5), 1, init = 5), + b_phi = prior_lognormal(log(5), 1, init = 5), + + sigma = prior_lognormal(log(0.1), 1, init = 0.1), + + psi_bsld = prior_none(init = 60), + psi_ks = prior_none(init = 0.5), + psi_kg = prior_none(init = 0.5), + psi_phi = prior_none(init = 0.5), + + centered = FALSE ) { - eta_prior <- prior_std_normal() + + gsf_model <- StanModule(decorated_render( + .x = paste0(read_stan("lm-gsf/model.stan"), collapse = "\n"), + centered = centered + )) + + parameters <- list( + Parameter(name = "lm_gsf_mu_bsld", prior = mu_bsld, size = "n_studies"), + Parameter(name = "lm_gsf_mu_ks", prior = mu_ks, size = "n_arms"), + Parameter(name = "lm_gsf_mu_kg", prior = mu_kg, size = "n_arms"), + + Parameter(name = "lm_gsf_omega_bsld", prior = omega_bsld, size = 1), + Parameter(name = "lm_gsf_omega_ks", prior = omega_ks, size = 1), + Parameter(name = "lm_gsf_omega_kg", prior = omega_kg, size = 1), + + Parameter(name = "lm_gsf_a_phi", prior = a_phi, size = "n_arms"), + Parameter(name = "lm_gsf_b_phi", prior = b_phi, size = "n_arms"), + Parameter(name = "lm_gsf_psi_phi", prior = psi_phi, size = "Nind"), + + Parameter(name = "lm_gsf_sigma", prior = sigma, size = 1) + ) + + assert_flag(centered) + parameters_extra <- if (centered) { + list( + Parameter(name = "lm_gsf_psi_bsld", prior = psi_bsld, size = "Nind"), + Parameter(name = "lm_gsf_psi_ks", prior = psi_ks, size = "Nind"), + Parameter(name = "lm_gsf_psi_kg", prior = psi_kg, size = "Nind") + ) + } else { + list( + Parameter(name = "lm_gsf_eta_tilde_bsld", prior = prior_std_normal(), size = "Nind"), + Parameter(name = "lm_gsf_eta_tilde_ks", prior = prior_std_normal(), size = "Nind"), + Parameter(name = "lm_gsf_eta_tilde_kg", prior = prior_std_normal(), size = "Nind") + ) + } + parameters <- append(parameters, parameters_extra) + x <- LongitudinalModel( name = "Generalized Stein-Fojo", stan = merge( - StanModule("lm-gsf/model.stan"), + gsf_model, StanModule("lm-gsf/functions.stan") ), - parameters = ParameterList( - Parameter(name = "lm_gsf_mu_bsld", prior = mu_bsld, size = "n_studies"), - Parameter(name = "lm_gsf_mu_ks", prior = mu_ks, size = "n_arms"), - Parameter(name = "lm_gsf_mu_kg", prior = mu_kg, size = "n_arms"), - Parameter(name = "lm_gsf_mu_phi", prior = mu_phi, size = "n_arms"), - Parameter(name = "lm_gsf_omega_bsld", prior = omega_bsld, size = 1), - Parameter(name = "lm_gsf_omega_ks", prior = omega_ks, size = 1), - Parameter(name = "lm_gsf_omega_kg", prior = omega_kg, size = 1), - Parameter(name = "lm_gsf_omega_phi", prior = omega_phi, size = 1), - Parameter(name = "lm_gsf_sigma", prior = sigma, size = 1), - Parameter(name = "lm_gsf_eta_tilde_bsld", prior = eta_prior, size = "Nind"), - Parameter(name = "lm_gsf_eta_tilde_ks", prior = eta_prior, size = "Nind"), - Parameter(name = "lm_gsf_eta_tilde_kg", prior = eta_prior, size = "Nind"), - Parameter(name = "lm_gsf_eta_tilde_phi", prior = eta_prior, size = "Nind") - ) + parameters = do.call(ParameterList, parameters) ) .LongitudinalGSF(x) } diff --git a/R/Prior.R b/R/Prior.R index 1ad61bed..851d15ab 100755 --- a/R/Prior.R +++ b/R/Prior.R @@ -94,9 +94,12 @@ setValidity( #' @family Prior-internal #' @export as.character.Prior <- function(x, ...) { + + parameters_rounded <- lapply(x@parameters, round, 5) + do.call( glue::glue, - append(x@display, x@parameters) + append(x@display, parameters_rounded) ) } diff --git a/R/StanModule.R b/R/StanModule.R index 65a2a3ad..9c2f71fb 100755 --- a/R/StanModule.R +++ b/R/StanModule.R @@ -433,10 +433,9 @@ as_stan_fragments <- function(x, stan_blocks = STAN_BLOCKS) { #' @keywords internal #' @export as_print_string.StanModule <- function(object, indent = 1, ...) { - slots <- getSlots("StanModule") - slots <- slots[!names(slots) %in% c("priors", "inits")] - components <- Filter(\(block) all(slot(object, block) != ""), names(slots)) - + slots <- names(getSlots("StanModule")) + slots <- slots[!slots %in% c("priors", "inits")] + components <- Filter(\(block) paste(slot(object, block), collapse = "") != "", slots) template <- c( "StanModule Object with components:", paste(" ", components) diff --git a/R/simulations_gsf.R b/R/simulations_gsf.R index 526b0fab..659c0a05 100644 --- a/R/simulations_gsf.R +++ b/R/simulations_gsf.R @@ -49,12 +49,12 @@ gsf_dsld <- function(time, b, s, g, phi) { #' @param sigma (`number`)\cr the variance of the longitudinal values. #' @param mu_s (`numeric`)\cr the mean shrinkage rates for the two treatment arms. #' @param mu_g (`numeric`)\cr the mean growth rates for the two treatment arms. -#' @param mu_phi (`numeric`)\cr the mean shrinkage proportions for the two treatment arms. #' @param mu_b (`numeric`)\cr the mean baseline values for the two treatment arms. #' @param omega_b (`number`)\cr the baseline value standard deviation. #' @param omega_s (`number`)\cr the shrinkage rate standard deviation. #' @param omega_g (`number`)\cr the growth rate standard deviation. -#' @param omega_phi (`number`)\cr the shrinkage proportion standard deviation. +#' @param a_phi (`number`)\cr the alpha parameter for the fraction of cells that respond to treatment. +#' @param b_phi (`number`)\cr the beta parameter for the fraction of cells that respond to treatment. #' @param link_dsld (`number`)\cr the link coefficient for the derivative contribution. #' @param link_ttg (`number`)\cr the link coefficient for the time-to-growth contribution. #' @param link_identity (`number`)\cr the link coefficient for the SLD Identity contribution. @@ -65,14 +65,14 @@ gsf_dsld <- function(time, b, s, g, phi) { #' @export sim_lm_gsf <- function( sigma = 0.01, - mu_s = c(3, 4), - mu_g = c(0.2, 0.3), - mu_phi = c(0.1, 0.2), - mu_b = 50, - omega_b = 0.135, - omega_s = 0.15, - omega_g = 0.225, - omega_phi = 0.75, + mu_s = c(0.6, 0.4), + mu_g = c(0.25, 0.35), + mu_b = 60, + a_phi = c(4, 6), + b_phi = c(4, 6), + omega_b = 0.2, + omega_s = 0.2, + omega_g = 0.2, link_dsld = 0, link_ttg = 0, link_identity = 0 @@ -80,26 +80,24 @@ sim_lm_gsf <- function( function(lm_base) { assert_that( - length(unique(lm_base$study)) == 1, + length(unique(lm_base$study)) == length(mu_b), length(mu_b) == 1, length(sigma) == 1, length(mu_s) == length(unique(lm_base$arm)), length(mu_s) == length(mu_g), - length(mu_s) == length(mu_phi), - length(c(omega_b, omega_s, omega_g, omega_phi)) == 4 + length(mu_s) == length(a_phi), + length(mu_s) == length(b_phi), + length(c(omega_b, omega_s, omega_g)) == 3 ) baseline_covs <- lm_base |> dplyr::distinct(.data$pt, .data$arm, .data$study) |> - dplyr::mutate(arm_n = as.numeric(factor(as.character(.data$arm)))) |> - dplyr::mutate(eta_b = stats::rnorm(dplyr::n(), 0, 1)) |> - dplyr::mutate(eta_s = stats::rnorm(dplyr::n(), 0, 1)) |> - dplyr::mutate(eta_g = stats::rnorm(dplyr::n(), 0, 1)) |> - dplyr::mutate(eta_phi = stats::rnorm(dplyr::n(), 0, 1)) |> - dplyr::mutate(psi_b = exp(log(mu_b) + .data$eta_b * omega_b)) |> - dplyr::mutate(psi_s = exp(log(mu_s[.data$arm_n]) + .data$eta_s * omega_s)) |> - dplyr::mutate(psi_g = exp(log(mu_g[.data$arm_n]) + .data$eta_g * omega_g)) |> - dplyr::mutate(psi_phi = stats::plogis(stats::qlogis(mu_phi[.data$arm_n]) + .data$eta_phi * omega_phi)) + dplyr::mutate(study_idx = as.numeric(factor(as.character(.data$study)))) |> + dplyr::mutate(arm_idx = as.numeric(factor(as.character(.data$arm)))) |> + dplyr::mutate(psi_b = stats::rlnorm(dplyr::n(), log(mu_b[.data$study_idx]), omega_b)) |> + dplyr::mutate(psi_s = stats::rlnorm(dplyr::n(), log(mu_s[.data$arm_idx]), omega_s)) |> + dplyr::mutate(psi_g = stats::rlnorm(dplyr::n(), log(mu_g[.data$arm_idx]), omega_g)) |> + dplyr::mutate(psi_phi = stats::rbeta(dplyr::n(), a_phi[.data$arm_idx], b_phi[.data$arm_idx])) lm_dat <- lm_base |> dplyr::select(!dplyr::all_of(c("study", "arm"))) |> diff --git a/README.md b/README.md index 5462cdad..3f4b9b4a 100755 --- a/README.md +++ b/README.md @@ -78,6 +78,10 @@ library(jmpost) #> Registered S3 method overwritten by 'GGally': #> method from #> +.gg ggplot2 +#> Registered S3 methods overwritten by 'ggpp': +#> method from +#> heightDetails.titleGrob ggplot2 +#> widthDetails.titleGrob ggplot2 set.seed(321) sim_data <- simulate_joint_data( lm_fun = sim_lm_random_slope(), diff --git a/design/debug-gsf/A/model.R b/design/debug-gsf/A/model.R new file mode 100644 index 00000000..0362a551 --- /dev/null +++ b/design/debug-gsf/A/model.R @@ -0,0 +1,138 @@ + +set.seed(3130) + +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + +################################# +# +# Setup test data +# + +n <- 200 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + bsld = rnorm(n, 60, 10), + g = 0.2, + s = 0.4, + phi = 0.6, + sigma = 0.02 +) + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 6, by = 0.3) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, bsld, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + bsld = baseline$bsld, + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + ks = 0.4, + kg = 0.2, + phi = 0.6, + sigma = 0.01 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "A", "model.stan")), + exe_file = file.path(here("local", "models", "model_A")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + refresh = 200, + iter_warmup = 500, + iter_sampling = 500, + init = lapply(1:nchains, \(...) init_vals) +) + +pars <- c("ks", "kg", "phi", "sigma") + +fit$summary(pars) + + +################################# +# +# Diagnostics +# + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(500, log(60), 0.6) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.5) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + + + + + diff --git a/design/debug-gsf/A/model.md b/design/debug-gsf/A/model.md new file mode 100644 index 00000000..d6a88ecf --- /dev/null +++ b/design/debug-gsf/A/model.md @@ -0,0 +1,40 @@ + +## Model Specification + + + +$$ +y_{ij} \sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) +$$ + +$$ +SLD_{ij} = b_i \Big [ (\phi e^{-st_{j}}) + (1-\phi) \cdot e^{gt_j} \Big] +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter (this is a known parameter and not estimated by the model) +- $s$ is the shrinkage parameter +- $g$ is the growth parameter +- $\phi$ is the proportion of responding cells + + +Priors: +$$ +\begin{align*} +\phi &\sim \text{Beta}(3, 3) \\ +s &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +g &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +\end{align*} +$$ + + + + + + + + diff --git a/design/debug-gsf/A/model.stan b/design/debug-gsf/A/model.stan new file mode 100644 index 00000000..c70cc3ca --- /dev/null +++ b/design/debug-gsf/A/model.stan @@ -0,0 +1,74 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + real psi_ks, + real psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[Nind] bsld; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real ks; + real kg; + real phi; + real sigma; +} + + +model { + vector[N] Ypred; + + ks ~ lognormal(log(0.4), 0.5); + kg ~ lognormal(log(0.4), 0.5); + phi ~ beta(3, 3); + + Ypred = sld( + Tobs, + bsld[ind_index], + ks, + kg, + rep_vector(phi, N) + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} diff --git a/design/debug-gsf/B/model.R b/design/debug-gsf/B/model.R new file mode 100644 index 00000000..c80c4ac6 --- /dev/null +++ b/design/debug-gsf/B/model.R @@ -0,0 +1,139 @@ + +set.seed(3130) + +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + +################################# +# +# Setup test data +# + +n <- 200 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + bsld = rnorm(n, 60, 10), + g = rlnorm(n, log(0.2), 0.3), + s = rlnorm(n, log(0.7), 0.3), + phi = rbeta(n, 1.5, 1.5), + sigma = 0.03 +) + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 5, by = 0.3) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, bsld, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + bsld = baseline$bsld, + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + ks = rep(0.3, n), + kg = rep(0.6, n), + phi = rep(0.5, n), + sigma = 0.01 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "B", "model.stan")), + exe_file = file.path(here("local", "models", "model_B")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + init = lapply(1:nchains, \(...) init_vals), + refresh = 200, + iter_warmup = 500, + iter_sampling = 500 +) + +pars <- c("ks_mu", "kg_mu", "phi_mu", "sigma") +fit$summary(variables = pars) + + + + +################################# +# +# Diagnostics +# + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(500, log(60), 0.8) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.6) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + + + + + + \ No newline at end of file diff --git a/design/debug-gsf/B/model.md b/design/debug-gsf/B/model.md new file mode 100644 index 00000000..05e9fac0 --- /dev/null +++ b/design/debug-gsf/B/model.md @@ -0,0 +1,42 @@ + +## Model Specification + + + +$$ +\begin{align*} +y_{ij} &\sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) \\ \\ +SLD_{ij} &= b_i \Big [ (\phi_i e^{-s_it_{j}}) + (1-\phi_i) \cdot e^{g_it_j} \Big] \\ \\ +\end{align*} +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter (this is a known parameter and not estimated by the model) +- $s_i$ is the shrinkage parameter +- $g_i$ is the growth parameter +- $\phi_i$ is the proportion of responding cells + + + +Priors: +$$ +\begin{align*} +\phi_i &\sim \text{Beta}(3, 3) \\ +s_i &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +g_i &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +\end{align*} +$$ + +That is to say that $\phi_i$, $s_i$, and $g_i$ are being modeled as independent parameters per each subject. + + + + + + + + diff --git a/design/debug-gsf/B/model.stan b/design/debug-gsf/B/model.stan new file mode 100644 index 00000000..54d95a19 --- /dev/null +++ b/design/debug-gsf/B/model.stan @@ -0,0 +1,80 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[Nind] bsld; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + vector[Nind] ks; + vector[Nind] kg; + vector[Nind] phi; + real sigma; +} + + +model { + vector[N] Ypred; + + ks ~ lognormal(log(0.4), 0.6); + kg ~ lognormal(log(0.4), 0.6); + phi ~ beta(3, 3); + + Ypred = sld( + Tobs, + bsld[ind_index], + ks[ind_index], + kg[ind_index], + phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} + +generated quantities { + real ks_mu = mean(ks); + real kg_mu = mean(kg); + real phi_mu = mean(phi); +} diff --git a/design/debug-gsf/C/model.R b/design/debug-gsf/C/model.R new file mode 100644 index 00000000..89659606 --- /dev/null +++ b/design/debug-gsf/C/model.R @@ -0,0 +1,135 @@ + + +set.seed(3130) +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + +################################# +# +# Setup test data +# + +n <- 200 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + bsld = rnorm(n, 60, 10), + g = 0.2, + s = 0.4, + phi = 0.5, + sigma = 0.02 +) + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 6, by = 0.3) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, bsld, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +selected_pts <- sample(baseline$pt, 10) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + bsld = baseline$bsld, + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + ks = 0.4, + kg = 0.2, + sigma = 0.01 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "C", "model.stan")), + exe_file = file.path(here("local", "models", "model_C")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + init = lapply(1:nchains, \(...) init_vals), + parallel_chains = nchains, + chains = nchains, + refresh = 200, + iter_warmup = 500, + iter_sampling = 500 +) +pars <- c("ks", "kg", "sigma") + +fit$summary(pars) + + +################################# +# +# Diagnostics +# + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(500, log(60), 0.6) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.5) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + + + + + diff --git a/design/debug-gsf/C/model.md b/design/debug-gsf/C/model.md new file mode 100644 index 00000000..a8fc1b85 --- /dev/null +++ b/design/debug-gsf/C/model.md @@ -0,0 +1,38 @@ + +## Model Specification + + + +$$ +y_{ij} \sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) +$$ + +$$ +SLD_{ij} = b_i \Big [ \frac{1}{2}e^{-st_{j}} + \frac{1}{2}e^{gt_j} \Big] +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter (this is a known parameter and not estimated by the model) +- $s$ is the shrinkage parameter +- $g$ is the growth parameter + + +Priors: +$$ +\begin{align*} +s &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +g &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +\end{align*} +$$ + + + + + + + + diff --git a/design/debug-gsf/C/model.stan b/design/debug-gsf/C/model.stan new file mode 100644 index 00000000..381dbcd2 --- /dev/null +++ b/design/debug-gsf/C/model.stan @@ -0,0 +1,72 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + real psi_ks, + real psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[Nind] bsld; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real ks; + real kg; + real sigma; +} + + +model { + vector[N] Ypred; + + ks ~ lognormal(log(0.4), 0.5); + kg ~ lognormal(log(0.4), 0.5); + + Ypred = sld( + Tobs, + bsld[ind_index], + ks, + kg, + rep_vector(0.5, N) + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} diff --git a/design/debug-gsf/D/model.R b/design/debug-gsf/D/model.R new file mode 100644 index 00000000..32509fb1 --- /dev/null +++ b/design/debug-gsf/D/model.R @@ -0,0 +1,223 @@ + +set.seed(5130) + +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + +################################# +# +# Setup test data +# + +n <- 300 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + b = rlnorm(n, log(60), 1), + g = rlnorm(n, log(0.2), 0.3), + s = rlnorm(n, log(0.6), 0.3), + phi = rbeta(n, 1.5, 1.5), + sigma = 0.03 +) + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 5, by = 0.3) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, b, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + ks = rep(0.6, n), + kg = rep(0.2, n), + phi = rep(0.5, n), + kb = rep(60, n), + sigma = 0.03 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "D", "model.stan")), + exe_file = file.path(here("local", "models", "model_D")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + init = lapply(1:nchains, \(...) init_vals), + refresh = 200, + iter_warmup = 1500, + iter_sampling = 1500 +) +baseline |> summarise(across(c("b", "g", "s", "phi"), mean)) +pars <- c("ks_mu", "kg_mu", "phi_mu", "b_mu", "sigma") +fit$summary(variables = pars) + + + + +################################# +# +# Diagnostics +# + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(500, log(60), 1) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.6) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + + + + + + + +####################################### +# +# +# JMPOST +# +# + ks ~ lognormal(log(0.6), 0.3); + kg ~ lognormal(log(0.2), 0.3); + kb ~ lognormal(log(60), 1); + phi ~ beta(2, 2); + + +devtools::load_all() + +jm <- JointModel( + longitudinal = LongitudinalGSF( + + mu_bsld = prior_lognormal(log(60), 1), + mu_ks = prior_lognormal(log(0.6), 0.3), + mu_kg = prior_lognormal(log(0.6), 0.3), + mu_phi = prior_beta(2, 2), + + omega_bsld = prior_lognormal(log(0.5), 0.6), + omega_ks = prior_lognormal(log(0.5), 0.6), + omega_kg = prior_lognormal(log(0.5), 0.6), + omega_phi = prior_lognormal(log(0.5), 0.6), + + sigma = prior_lognormal(log(0.03), 0.6) + ) +) + +# Create local file with stan code for debugging purposes ONLY +write_stan(jm, "local/debug.stan") + + +dat_os <- baseline |> + mutate(arm = "A", study = "A") + + + +## Prepare data for sampling +jdat <- DataJoint( + subject = DataSubject( + data = dat_os, + subject = "pt", + arm = "arm", + study = "study" + ), + longitudinal = DataLongitudinal( + data = dat_lm, + formula = sld ~ time, + threshold = 5 + ) +) + +## Sample from JointModel + +mp <- sampleStanModel( + jm, + data = jdat, + iter_sampling = 500, + iter_warmup = 1000, + chains = 1, + parallel_chains = 1 +) + + +baseline |> summarise(across(c("b", "g", "s", "phi"), mean)) +vars <- c( + "lm_gsf_mu_bsld", # + "lm_gsf_mu_phi", # + "lm_gsf_mu_kg", # + "lm_gsf_mu_ks", # + "lm_gsf_sigma", # + "lm_gsf_omega_bsld", # + "lm_gsf_omega_kg", # + "lm_gsf_omega_phi", # + "lm_gsf_omega_ks" # +) +mp@results$summary(vars) + + + diff --git a/design/debug-gsf/D/model.md b/design/debug-gsf/D/model.md new file mode 100644 index 00000000..40fa88a5 --- /dev/null +++ b/design/debug-gsf/D/model.md @@ -0,0 +1,43 @@ + +## Model Specification + + + +$$ +\begin{align*} +y_{ij} &\sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) \\ \\ +SLD_{ij} &= b_i \Big [ (\phi_i e^{-s_it_{j}}) + (1-\phi_i) \cdot e^{g_it_j} \Big] \\ \\ +\end{align*} +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter +- $s_i$ is the shrinkage parameter +- $g_i$ is the growth parameter +- $\phi_i$ is the proportion of responding cells + + + +Priors: +$$ +\begin{align*} +\phi_i &\sim \text{Beta}(3, 3) \\ +s_i &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +g_i &\sim \text{LogNormal}\big(\log(0.4),\ 0.5\big) \\ +b_i &\sim \text{LogNormal}\big(\log(60),\ 1\big) +\end{align*} +$$ + +That is to say that $\phi_i$, $s_i$, $b_i$ and $g_i$ are being modeled as independent parameters per each subject. + + + + + + + + diff --git a/design/debug-gsf/D/model.stan b/design/debug-gsf/D/model.stan new file mode 100644 index 00000000..f304744f --- /dev/null +++ b/design/debug-gsf/D/model.stan @@ -0,0 +1,82 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + vector[Nind] ks; + vector[Nind] kg; + vector[Nind] kb; + vector[Nind] phi; + real sigma; +} + + +model { + vector[N] Ypred; + + ks ~ lognormal(log(0.6), 0.3); + kg ~ lognormal(log(0.2), 0.3); + kb ~ lognormal(log(60), 1); + phi ~ beta(2, 2); + + Ypred = sld( + Tobs, + kb[ind_index], + ks[ind_index], + kg[ind_index], + phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} + +generated quantities { + real ks_mu = mean(ks); + real kg_mu = mean(kg); + real phi_mu = mean(phi); + real b_mu = mean(kb); +} diff --git a/design/debug-gsf/E/model.R b/design/debug-gsf/E/model.R new file mode 100644 index 00000000..c2897615 --- /dev/null +++ b/design/debug-gsf/E/model.R @@ -0,0 +1,168 @@ + + +set.seed(32130) +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(1000, log(60), 0.05) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.6) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + +################################# +# +# Setup test data +# + +logit <- binomial()$linkfun +inv_logit <- binomial()$linkinv + +n <- 200 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + + mu_b = 60, + mu_s = 0.6, + mu_g = 0.2, + mu_phi = 0.5, + + eta_b = rnorm(n, 0, 0.5), + eta_s = rnorm(n, 0, 0.3), + eta_g = rnorm(n, 0, 0.3), + eta_phi = rnorm(n, 0, 0.4), + + b = exp(log(mu_b) + eta_b), + g = exp(log(mu_g) + eta_g), + s = exp(log(mu_s) + eta_s), + phi = inv_logit(logit(mu_phi) + eta_phi), + + sigma = 0.05 +) + +pdat <- baseline |> + select(b, g, s, phi) |> + gather("key", "var") + +ggplot(data = pdat, aes(x = var)) + + geom_density() + + theme_bw() + + facet_wrap(~key, scales = "free") + + + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 7, by = 0.3) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, b, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + eta_b = rep(0, n), + eta_s = rep(0, n), + eta_g = rep(0, n), + eta_phi = rep(0, n), + mu_b = 60, + mu_s = 0.6, + mu_g = 0.2, + mu_phi = 0.5, + sigma = 0.05 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "E", "model.stan")), + exe_file = file.path(here("local", "models", "model_E")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + init = lapply(1:nchains, \(...) init_vals), + refresh = 200, + iter_warmup = 300, + iter_sampling = 500 +) +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) +pars <- c( + "mu_b", "mu_s", "mu_g", "mu_phi", "sigma" +) +fit$summary(variables = pars) + + + + +################################# +# +# Diagnostics +# + +pars <- c( + "mu_b", "mu_s", "mu_g", "mu_phi" +) + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) diff --git a/design/debug-gsf/E/model.md b/design/debug-gsf/E/model.md new file mode 100644 index 00000000..28ec1e49 --- /dev/null +++ b/design/debug-gsf/E/model.md @@ -0,0 +1,52 @@ + +## Model Specification + + + +$$ +\begin{align*} +y_{ij} &\sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) \\ \\ +SLD_{ij} &= b_i \Big [ (\phi_i e^{-s_it_{j}}) + (1-\phi_i) \cdot e^{g_it_j} \Big] \\ \\ +b_i =& \exp(\log(\mu_b) + \eta_{bi})\\ +s_i =& \exp(\log(\mu_s) + \eta_{si})\\ +g_i =& \exp(\log(\mu_s) + \eta_{gi}) \\ +\phi_i =& \text{logit}^{-1}(\text{logit}(\mu_\phi) + \eta_{\phi i}) \\ +\end{align*} +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter +- $s_i$ is the shrinkage parameter +- $g_i$ is the growth parameter +- $\phi_i$ is the proportion of responding cells + + + +Priors: +$$ +\begin{align*} +\mu_b &\sim \text{LogNormal}\big(\log(60),\ 0.5\big) \\ +\mu_s &\sim \text{LogNormal}\big(\log(0.6),\ 0.3\big) \\ +\mu_g &\sim \text{LogNormal}\big(\log(0.2),\ 0.3\big) \\ +\mu_\phi &\sim \text{Beta}(3, 3) \\ \\ + +\eta_{bi} & \sim N(0, 1) \\ +\eta_{si} & \sim N(0, 0.3) \\ +\eta_{gi} & \sim N(0, 0.3) \\ +\eta_{\phi i} & \sim N(0, 2) \\\\ +\sigma &\sim \text{LogNormal}\big(\log(0.3),\ 0.2\big) +\end{align*} +$$ + + + + + + + + + diff --git a/design/debug-gsf/E/model.stan b/design/debug-gsf/E/model.stan new file mode 100644 index 00000000..ce200186 --- /dev/null +++ b/design/debug-gsf/E/model.stan @@ -0,0 +1,102 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real mu_b; + real mu_s; + real mu_g; + real mu_phi; + + vector[Nind] eta_b; + vector[Nind] eta_s; + vector[Nind] eta_g; + vector[Nind] eta_phi; + + real sigma; +} + + +transformed parameters { + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + vector[Nind] ind_phi; + + ind_b = exp(log(mu_b) + eta_b); + ind_s = exp(log(mu_s) + eta_s); + ind_g = exp(log(mu_g) + eta_g); + ind_phi = inv_logit(logit(mu_phi) + eta_phi); +} + + +model { + vector[N] Ypred; + + + mu_b ~ lognormal(log(60), 0.05); + mu_s ~ lognormal(log(0.6), 0.05); + mu_g ~ lognormal(log(0.2), 0.05); + mu_phi ~ beta(1, 1); + + eta_b ~ normal(0, 0.5); + eta_s ~ normal(0, 0.3); + eta_g ~ normal(0, 0.3); + eta_phi ~ normal(0, 0.4); + + sigma ~ lognormal(log(0.05), 0.05); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index], + ind_phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} diff --git a/design/debug-gsf/F/model.R b/design/debug-gsf/F/model.R new file mode 100644 index 00000000..6ef82d07 --- /dev/null +++ b/design/debug-gsf/F/model.R @@ -0,0 +1,179 @@ + + +set.seed(32130) +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + + +################################# +# +# Checks +# + +x <- rlnorm(1000, log(60), 1) +plot(density(x)) + +x <- rlnorm(1000, log(60), 0.5) +plot(density(x)) + + + +x <- rlnorm(500, log(100), 0.2) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + +x <- rlnorm(1000, log(0.6), 0.4) +plot(density(x)) + + +x <- rbeta(1000, 3, 3) +plot(density(x)) +x <- rlnorm(1000, log(3), 0.4) +plot(density(x)) + + +################################# +# +# Setup test data +# + + +n <- 200 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + + mu_b = 60, + mu_s = 0.6, + mu_g = 0.25, + + sigma_b = 0.4, + sigma_s = 0.2, + sigma_g = 0.2, + + a_phi = 2, + b_phi = 2, + + b = rlnorm(n, log(mu_b), sigma_b), + g = rlnorm(n, log(mu_g), sigma_g), + s = rlnorm(n, log(mu_s), sigma_s), + phi = rbeta(n, a_phi, b_phi), + + sigma = 0.05 +) + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 7, by = 0.3) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, b, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + ind_b = rep(60, n), + ind_s = rep(0.6, n), + ind_g = rep(0.25, n), + ind_phi = rep(0.5, n), + + mu_b = 80, + mu_s = 0.5, + mu_g = 0.2, + + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + + a_phi = 2, + b_phi = 2, + sigma = 0.05 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "F", "model.stan")), + exe_file = file.path(here("local", "models", "model_F")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + init = lapply(1:nchains, \(...) init_vals), + refresh = 200, + iter_warmup = 500, + iter_sampling = 1000 +) +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) +pars <- c( + "mu_b", "mu_s", "mu_g", "a_phi", "b_phi", + "sigma", "b_mean", "s_mean", "g_mean", "phi_mean" +) +fit$summary(variables = pars) + + + + +################################# +# +# Diagnostics +# + + +pars <- c( + "mu_b", "mu_s", "mu_g", "a_phi", "b_phi", + "sigma" +) + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + +mcmc_acf(samps, pars =pars) + + + diff --git a/design/debug-gsf/F/model.md b/design/debug-gsf/F/model.md new file mode 100644 index 00000000..dbe92874 --- /dev/null +++ b/design/debug-gsf/F/model.md @@ -0,0 +1,58 @@ + +## Model Specification + + + +$$ +\begin{align*} +y_{ij} &\sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) \\ \\ +SLD_{ij} &= b_i \Big [ (\phi_i e^{-s_it_{j}}) + (1-\phi_i) \cdot e^{g_it_j} \Big] \\ \\ +b_i &\sim \text{LogNormal}(\mu_b,\ \sigma_b) \\ +s_i &\sim \text{LogNormal}(\mu_s,\ \sigma_s) \\ +g_i &\sim \text{LogNormal}(\mu_g,\ \sigma_g) \\ +\phi_i &\sim \text{Beta}(a_\phi,\ b_\phi) \\ +\end{align*} +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter +- $s_i$ is the shrinkage parameter +- $g_i$ is the growth parameter¦ +- $\phi_i$ is the proportion of responding cells + + + +Priors: +$$ +\begin{align*} + +\mu_b &\sim \text{LogNormal}(\log(60),\ 0.4) \\ +\mu_s &\sim \text{LogNormal}(\log(0.6),\ 0.4) \\ +\mu_g &\sim \text{LogNormal}(\log(0.25),\ 0.4) \\ +\\ +\sigma_b &\sim \text{LogNormal}(\log(0.4),\ 0.3) \\ +\sigma_s &\sim \text{LogNormal}(\log(0.2),\ 0.3) \\ +\sigma_g &\sim \text{LogNormal}(\log(0.2),\ 0.3) \\ +\\ + +a_\phi &\sim \text{LogNormal}(\log(2),\ 0.3) \\ +b_\phi &\sim \text{LogNormal}(\log(2),\ 0.3) \\ +\\ + +\sigma &\sim \text{LogNormal}(\log(0.05),\ 0.3) + +\end{align*} +$$ + + + + + + + + + diff --git a/design/debug-gsf/F/model.stan b/design/debug-gsf/F/model.stan new file mode 100644 index 00000000..6c14c5e3 --- /dev/null +++ b/design/debug-gsf/F/model.stan @@ -0,0 +1,110 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real mu_b; + real mu_s; + real mu_g; + + real sigma_b; + real sigma_s; + real sigma_g; + + real a_phi; + real b_phi; + + real sigma; + + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + vector[Nind] ind_phi; + +} + +model { + vector[N] Ypred; + + + mu_b ~ lognormal(log(60), 0.4); + mu_s ~ lognormal(log(0.6), 0.4); + mu_g ~ lognormal(log(0.25), 0.4); + + sigma_b ~ lognormal(log(0.4), 0.3); + sigma_s ~ lognormal(log(0.2), 0.3); + sigma_g ~ lognormal(log(0.2), 0.3); + + a_phi ~ lognormal(log(2), 0.3); + b_phi ~ lognormal(log(2), 0.3); + + sigma ~ lognormal(log(0.05), 0.3); + + ind_b ~ lognormal(log(mu_b), sigma_b); + ind_s ~ lognormal(log(mu_s), sigma_s); + ind_g ~ lognormal(log(mu_g), sigma_g); + ind_phi ~ beta(a_phi, b_phi); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index], + ind_phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} + + + +generated quantities { + real b_mean = mu_b * exp((sigma_b^2)/2); + real s_mean = mu_s * exp((sigma_s^2)/2); + real g_mean = mu_g * exp((sigma_g^2)/2); + real phi_mean = a_phi / (a_phi + b_phi); +} diff --git a/design/debug-gsf/G/model.R b/design/debug-gsf/G/model.R new file mode 100644 index 00000000..10a76cea --- /dev/null +++ b/design/debug-gsf/G/model.R @@ -0,0 +1,181 @@ + + +set.seed(32130) +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g) { + b * (exp(-s * time) + exp(g * time) - 1) +} + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(1000, log(60), 0.05) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.6) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + +################################# +# +# Setup test data +# + + +n <- 80 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + + mu_b = log(60), + mu_s = log(0.6), + mu_g = log(0.2), + + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + + LB = rnorm(n, mu_b, sigma_b), + LS = rnorm(n, mu_s, sigma_s), + LG = rnorm(n, mu_g, sigma_g), + + b = exp(LB), + s = exp(LS), + g = exp(LG), + + sigma = 5 +) + +pdat <- baseline |> + select(b, g, s) |> + gather("key", "var") + +ggplot(data = pdat, aes(x = var)) + + geom_density(fill = "#828282") + + theme_bw() + + facet_wrap(~key, scales = "free") + + + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 6, length.out = 5) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, b, s, g)) |> + mutate(sld = rnorm(nrow(grid_df), msld, sigma)) |> + select(pt, b, s, g, sigma, time, msld, sld) + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + +baseline |> summarise(across(c("b", "g", "s", "sigma"), mean)) + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + + LB = rep(log(60), n), + LS = rep(log(0.6), n), + LG = rep(log(0.2), n), + + mu_b = log(60), + mu_s = log(0.6), + mu_g = log(0.2), + + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + + sigma = 5 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "G", "model.stan")), + exe_file = file.path(here("local", "models", "model_G")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + init = lapply(1:nchains, \(...) init_vals), + refresh = 200, + iter_warmup = 400, + iter_sampling = 600 +) +baseline |> summarise(across(c("b", "g", "s", "sigma"), mean)) +pars <- c( + "mu_b", "mu_s", "mu_g", + "sigma_b", "sigma_s", "sigma_g", "sigma", + "b_mean", "s_mean", "g_mean" +) +fit$summary(variables = pars) + + + + +################################# +# +# Diagnostics +# + +pars <- c( + "mu_b", "mu_s", "mu_g", + "sigma_b", "sigma_s", "sigma_g", "sigma", + "b_mean", "s_mean", "g_mean" +) + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + diff --git a/design/debug-gsf/G/model.md b/design/debug-gsf/G/model.md new file mode 100644 index 00000000..980375e0 --- /dev/null +++ b/design/debug-gsf/G/model.md @@ -0,0 +1,64 @@ + +## Model Specification + + + +$$ +\begin{align*} +y_{ij} &\sim N(SLD_{ij}, \sigma) \\ \\ + + +SLD_{ij} &= b_i \Big [ + e^{-s_it_{j}} + + e^{g_it_j} - + 1 +\Big] \\ \\ + + +b_i =& \exp(LB_i) \\ +s_i =& \exp(KS_i) \\ +g_i =& \exp(LG_i) \\ +\\ + +LB_i &\sim N(\mu_b, \sigma_b) \\ +LS_i &\sim N(\mu_s, \sigma_s) \\ +LG_i &\sim N(\mu_g, \sigma_g) \\ + +\end{align*} +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter +- $s_i$ is the shrinkage parameter +- $g_i$ is the growth parameter +- $\phi_i$ is the proportion of responding cells + + + +Priors: +$$ +\begin{align*} +\mu_b &\sim \text{LogNormal}\big(\log(60),\ 0.5\big) \\ +\mu_s &\sim \text{LogNormal}\big(\log(0.6),\ 0.3\big) \\ +\mu_g &\sim \text{LogNormal}\big(\log(0.2),\ 0.3\big) \\ + \\ +\sigma_{b} &\sim \text{LogNormal}\big(\log(0.3),\ 0.2\big) \\ +\sigma_{s} &\sim \text{LogNormal}\big(\log(0.3),\ 0.2\big) \\ +\sigma_{g} &\sim \text{LogNormal}\big(\log(0.3),\ 0.2\big) \\ +\\ +\sigma &\sim \text{LogNormal}\big(\log(0.3),\ 0.2\big) +\end{align*} +$$ + + + + + + + + + diff --git a/design/debug-gsf/G/model.stan b/design/debug-gsf/G/model.stan new file mode 100644 index 00000000..6d322a9b --- /dev/null +++ b/design/debug-gsf/G/model.stan @@ -0,0 +1,84 @@ + +functions { + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg + ) { + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* ( exp(- psi_ks .* time) + exp(psi_kg .* time) - 1) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real mu_b; + real mu_s; + real mu_g; + + real sigma_b; + real sigma_s; + real sigma_g; + + vector[Nind] LB; + vector[Nind] LS; + vector[Nind] LG; + + real sigma; +} + + +transformed parameters { + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + + ind_b = exp(LB); + ind_s = exp(LS); + ind_g = exp(LG); +} + + +model { + vector[N] Ypred; + + mu_b ~ normal(log(60), 2); + mu_s ~ normal(log(0.6), 1); + mu_g ~ normal(log(0.2), 1); + + sigma_b ~ lognormal(log(0.3), 0.5); + sigma_s ~ lognormal(log(0.3), 0.5); + sigma_g ~ lognormal(log(0.3), 0.5); + + sigma ~ lognormal(log(5), 0.5); + + LB ~ normal(mu_b, sigma_b); + LS ~ normal(mu_s, sigma_s); + LG ~ normal(mu_g, sigma_g); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index] + ); + + Yobs ~ normal(Ypred, sigma); +} + +generated quantities { + real b_mean = exp(mu_b + (sigma_b^2)/2); + real s_mean = exp(mu_s + (sigma_s^2)/2); + real g_mean = exp(mu_g + (sigma_g^2)/2); +} diff --git a/design/debug-gsf/H/model.R b/design/debug-gsf/H/model.R new file mode 100644 index 00000000..458a6e93 --- /dev/null +++ b/design/debug-gsf/H/model.R @@ -0,0 +1,225 @@ + + +set.seed(32130) +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(1000, log(60), 1) +plot(density(x)) + + +x <- rlnorm(1000, log(0.2), 1) +plot(density(x)) + + +x <- rbeta(1000, 3, 3) +plot(density(x)) + + + + + +################################# +# +# Setup test data +# + +logit <- binomial()$linkfun +inv_logit <- binomial()$linkinv + +n <- 200 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + + mu_b = log(60), + mu_s = log(0.6), + mu_g = log(0.2), + mu_phi = logit(0.5), + + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + sigma_phi = 0.3, + + eta_b = rnorm(n, 0, 1), + eta_s = rnorm(n, 0, 1), + eta_g = rnorm(n, 0, 1), + eta_phi = rnorm(n, 0, 1), + + b = exp(mu_b + eta_b * sigma_b), + g = exp(mu_g + eta_g * sigma_s), + s = exp(mu_s + eta_s * sigma_g), + phi = inv_logit(mu_phi + eta_phi * sigma_phi), + + sigma = 0.05 +) + +pdat <- baseline |> + select(b, g, s, phi) |> + gather("key", "var") + +ggplot(data = pdat, aes(x = var)) + + geom_density() + + theme_bw() + + facet_wrap(~key, scales = "free") + + + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 6, length.out = 9) +) |> + filter(time != 0) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, b, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) + + +################################# +# +# Model fitting +# + +stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + Yobs = dat_lm$sld, + Tobs = dat_lm$time +) +init_vals <- list( + raw_b = rep(0, n), + raw_s = rep(0, n), + raw_g = rep(0, n), + raw_phi = rep(0, n), + mu_b = log(60), + mu_s = log(0.6), + mu_g = log(0.2), + mu_phi = log(0.5), + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + sigma_phi = 0.3, + sigma = 0.05 +) + + +mod <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "H", "model.stan")), + exe_file = file.path(here("local", "models", "model_H")) +) +nchains <- 3 +fit <- mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + init = lapply(1:nchains, \(...) init_vals), + refresh = 200, + iter_warmup = 300, + iter_sampling = 500 +) + +pars <- c( "sigma", "pop_mean_b", "pop_mean_g", "pop_mean_s") + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) +fit$summary(variables = pars) +fit$summary() + + + +# 12 seconds +# ess ~100 + +################################# +# +# Diagnostics +# + +pars <- c( + "mu_b", "mu_s", "mu_g", "mu_phi" +) + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + + + + + + + + + + + + + + + + + + + + +# A tibble: 4 × 10 + variable mean median sd mad q5 q95 rhat ess_bulk + +1 sigma 0.0513 0.0513 0.00110 0.00112 0.0495 0.0531 1.00 1184. +2 pop_mean_b 63.2 63.1 1.76 1.77 60.6 66.4 1.03 76.6 +3 pop_mean_g 0.197 0.197 0.00618 0.00637 0.187 0.207 1.03 157. +4 pop_mean_s 0.599 0.599 0.0484 0.0500 0.521 0.679 1.00 659. +# ℹ 1 more variable: ess_tail +> fit$summary() +# A tibble: 1,613 × 10 + variable mean median sd mad q5 q95 rhat ess_bulk + + 1 lp__ -3.22e+3 -3.22e+3 28.1 28.7 -3.27e+3 -3.18e+3 1.01 187. + 2 mu_b 4.09e+0 4.09e+0 0.0269 0.0265 4.05e+0 4.14e+0 1.03 80.4 + 3 mu_s -5.19e-1 -5.16e-1 0.0822 0.0843 -6.57e-1 -3.89e-1 1.00 672. + 4 mu_g -1.67e+0 -1.67e+0 0.0328 0.0333 -1.72e+0 -1.62e+0 1.02 180. + 5 mu_phi -1.20e-1 -1.20e-1 0.0467 0.0459 -1.97e-1 -4.02e-2 1.00 708. + 6 sigma_b 3.31e-1 3.29e-1 0.0174 0.0171 3.04e-1 3.61e-1 1.04 118. + 7 sigma_s 7.48e-2 6.86e-2 0.0373 0.0370 2.35e-2 1.44e-1 1.02 297. + 8 sigma_g 2.93e-1 2.92e-1 0.0161 0.0162 2.68e-1 3.20e-1 1.02 151. + 9 sigma_phi 5.11e-1 2.84e-1 0.709 0.261 5.20e-2 1.68e+0 1.00 3256. +10 raw_b[1] 3.76e-1 3.68e-1 0.228 0.226 1.84e-2 7.44e-1 1.01 455. \ No newline at end of file diff --git a/design/debug-gsf/H/model.md b/design/debug-gsf/H/model.md new file mode 100644 index 00000000..04c607da --- /dev/null +++ b/design/debug-gsf/H/model.md @@ -0,0 +1,60 @@ + +## Model Specification + + + +$$ +\begin{align*} +y_{ij} &\sim N(SLD_{ij}, \sigma \cdot SLD_{ij}) \\ \\ +SLD_{ij} &= b_i \Big [ (\phi_i e^{-s_it_{j}}) + (1-\phi_i) \cdot e^{g_it_j} \Big] \\ \\ +b_i =& \exp(\mu_b + \eta_{bi} \cdot \sigma_b)\\ +s_i =& \exp(\mu_s + \eta_{si} \cdot \sigma_s)\\ +g_i =& \exp(\mu_s + \eta_{gi} \cdot \sigma_g) \\ +\phi_i =& \text{logit}^{-1}(\mu_\phi + \eta_{\phi i}\cdot \sigma_\phi) \\ +\end{align*} +$$ + +Where: +- $i$ is the subject index +- $j$ is the time index +- $y_{ij}$ is the observed SLD value +- $SLD_{ij}$ is the expected SLD value +- $b_i$ is the baseline parameter +- $s_i$ is the shrinkage parameter +- $g_i$ is the growth parameter +- $\phi_i$ is the proportion of responding cells + + + +Priors: +$$ +\begin{align*} +\mu_b &\sim \text{Normal}\big(\log(60),\ 3\big) \\ +\mu_s &\sim \text{Normal}\big(\log(0.6),\ 3\big) \\ +\mu_g &\sim \text{Normal}\big(\log(0.2),\ 3\big) \\ +\mu_\phi &\sim \text{Normal}\big(0,\ 3\big) \\ \\ + + +\sigma_{b} &\sim \text{LogNormal(log(0.3), 0.5)} \\ +\sigma_{s} &\sim \text{LogNormal(log(0.3), 0.5)} \\ +\sigma_{g} &\sim \text{LogNormal(log(0.3), 0.5)} \\ +\sigma_{\phi} &\sim \text{LogNormal(log(0.3), 0.5)} \\ +\\ + + +\eta_{bi} & \sim N(0, 1) \\ +\eta_{si} & \sim N(0, 1) \\ +\eta_{gi} & \sim N(0, 1) \\ +\eta_{\phi i} & \sim N(0, 1) \\ \\ +\sigma &\sim \text{LogNormal}\big(\log(0.3),\ 0.2\big) +\end{align*} +$$ + + + + + + + + + diff --git a/design/debug-gsf/H/model.stan b/design/debug-gsf/H/model.stan new file mode 100644 index 00000000..5a3b96cd --- /dev/null +++ b/design/debug-gsf/H/model.stan @@ -0,0 +1,118 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real mu_b; + real mu_s; + real mu_g; + real mu_phi; + + real sigma_b; + real sigma_s; + real sigma_g; + real sigma_phi; + + vector[Nind] raw_b; + vector[Nind] raw_s; + vector[Nind] raw_g; + vector[Nind] raw_phi; + + real sigma; +} + + +transformed parameters { + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + vector[Nind] ind_phi; + + ind_b = exp(mu_b + raw_b * sigma_b); + ind_s = exp(mu_s + raw_s * sigma_s); + ind_g = exp(mu_g + raw_g * sigma_g); + ind_phi = inv_logit(mu_phi + raw_s * sigma_g); +} + + +model { + vector[N] Ypred; + + mu_b ~ normal(log(60), 1.2); + mu_s ~ normal(log(0.7), 1.2); + mu_g ~ normal(log(0.3), 1.2); + mu_phi ~ normal(0, 1); + + raw_b ~ std_normal(); + raw_s ~ std_normal(); + raw_g ~ std_normal(); + raw_phi ~ std_normal(); + + sigma_b ~ lognormal(log(0.3), 1); + sigma_s ~ lognormal(log(0.3), 1); + sigma_g ~ lognormal(log(0.3), 1); + sigma_phi ~ lognormal(log(0.3), 1); + + sigma ~ lognormal(log(0.05), 0.2); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index], + ind_phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} + +generated quantities { + real pop_mean_b = exp(mu_b + sigma_b^2/2); + real pop_mean_s = exp(mu_s + sigma_s^2/2); + real pop_mean_g = exp(mu_g + sigma_g^2/2); +} + diff --git a/design/debug-gsf/compare/mod_central.stan b/design/debug-gsf/compare/mod_central.stan new file mode 100644 index 00000000..a300b858 --- /dev/null +++ b/design/debug-gsf/compare/mod_central.stan @@ -0,0 +1,108 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + vector[Nind] ind_phi; + + real mu_b; + real mu_s; + real mu_g; + + real sigma_b; + real sigma_s; + real sigma_g; + + real a_phi; + real b_phi; + + real sigma; +} + + +model { + vector[N] Ypred; + + ind_b ~ lognormal(mu_b, sigma_b); + ind_s ~ lognormal(mu_s, sigma_s); + ind_g ~ lognormal(mu_g, sigma_g); + ind_phi ~ beta(a_phi, b_phi); + + mu_b ~ normal(log(60), 1); + mu_s ~ normal(log(0.6), 1); + mu_g ~ normal(log(0.2), 1); + + sigma_b ~ normal(log(0.3), 1); + sigma_s ~ normal(log(0.3), 1); + sigma_g ~ normal(log(0.3), 1); + + a_phi ~ lognormal(log(2), 1); + b_phi ~ lognormal(log(2), 1); + + sigma ~ lognormal(log(0.05), 0.2); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index], + ind_phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} + + +generated quantities { + real pop_mean_b = exp(mu_b + sigma_b^2/2); + real pop_mean_s = exp(mu_s + sigma_s^2/2); + real pop_mean_g = exp(mu_g + sigma_g^2/2); +} diff --git a/design/debug-gsf/compare/mod_dcent_lg.stan b/design/debug-gsf/compare/mod_dcent_lg.stan new file mode 100644 index 00000000..b27f7bbd --- /dev/null +++ b/design/debug-gsf/compare/mod_dcent_lg.stan @@ -0,0 +1,120 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real mu_b; + real mu_s; + real mu_g; + + real sigma_b; + real sigma_s; + real sigma_g; + + vector[Nind] raw_b; + vector[Nind] raw_s; + vector[Nind] raw_g; + + vector[Nind] ind_phi; + real a_phi; + real b_phi; + + real sigma; +} + + +transformed parameters { + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + + ind_b = exp(log(mu_b) + raw_b * sigma_b); + ind_s = exp(log(mu_s) + raw_s * sigma_s); + ind_g = exp(log(mu_g) + raw_g * sigma_g); + + +} + + +model { + vector[N] Ypred; + + mu_b ~ lognormal(log(60), 1); + mu_s ~ lognormal(log(0.7), 1); + mu_g ~ lognormal(log(0.3), 1); + + raw_b ~ std_normal(); + raw_s ~ std_normal(); + raw_g ~ std_normal(); + + sigma_b ~ lognormal(log(0.3), 1); + sigma_s ~ lognormal(log(0.3), 1); + sigma_g ~ lognormal(log(0.3), 1); + + sigma ~ lognormal(log(0.05), 1); + + ind_phi ~ beta(a_phi, b_phi); + a_phi ~ lognormal(log(2), 1); + b_phi ~ lognormal(log(2), 1); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index], + ind_phi[ind_index] + ); + + Yobs ~ normal(Ypred, Ypred .* sigma); +} + +generated quantities { + real pop_mean_b = mu_b * exp(sigma_b^2/2); + real pop_mean_s = mu_s * exp(sigma_s^2/2); + real pop_mean_g = mu_g * exp(sigma_g^2/2); +} + diff --git a/design/debug-gsf/compare/mod_dcent_nl.stan b/design/debug-gsf/compare/mod_dcent_nl.stan new file mode 100644 index 00000000..ef01bf8b --- /dev/null +++ b/design/debug-gsf/compare/mod_dcent_nl.stan @@ -0,0 +1,122 @@ + +functions { + // Vectorized ifelse() similar as in R. + vector ifelse(array[] int condition, vector yes, vector no) { + vector[num_elements(yes)] result; + for (i in 1:num_elements(yes)) { + result[i] = condition[i] ? yes[i] : no[i]; + } + return result; + } + + // Vectorized negative predicate. Basically returns ifelse(x < 0, 1, 0). + array[] int is_negative(vector x) { + array[num_elements(x)] int result; + for (i in 1:num_elements(x)) { + result[i] = x[i] < 0.0; + } + return result; + } + + vector sld( + vector time, + vector psi_bsld, + vector psi_ks, + vector psi_kg, + vector psi_phi + ) { + vector[num_elements(time)] psi_phi_mod = ifelse( + is_negative(time), + zeros_vector(num_elements(time)), + psi_phi + ); + vector[num_elements(time)] result = fmin( + 8000.0, + psi_bsld .* (psi_phi_mod .* exp(- psi_ks .* time) + (1 - psi_phi_mod) .* exp(psi_kg .* time)) + ); + return result; + } +} + +data { + int N; + int Nind; + vector[N] Yobs; + vector[N] Tobs; + array[N] int ind_index; +} + +parameters { + real mu_b; + real mu_s; + real mu_g; + + real sigma_b; + real sigma_s; + real sigma_g; + + vector[Nind] raw_b; + vector[Nind] raw_s; + vector[Nind] raw_g; + + real sigma; + + vector[Nind] ind_phi; + real a_phi; + real b_phi; + +} + + +transformed parameters { + vector[Nind] ind_b; + vector[Nind] ind_s; + vector[Nind] ind_g; + + ind_b = exp(mu_b + raw_b * sigma_b); + ind_s = exp(mu_s + raw_s * sigma_s); + ind_g = exp(mu_g + raw_g * sigma_g); +} + + +model { + vector[N] Ypred; + + mu_b ~ normal(log(60), 1); + mu_s ~ normal(log(0.7), 1); + mu_g ~ normal(log(0.3), 1); + + + raw_b ~ std_normal(); + raw_s ~ std_normal(); + raw_g ~ std_normal(); + + + sigma_b ~ lognormal(log(0.3), 1); + sigma_s ~ lognormal(log(0.3), 1); + sigma_g ~ lognormal(log(0.3), 1); + + ind_phi ~ beta(a_phi, b_phi); + a_phi ~ lognormal(log(2), 1); + b_phi ~ lognormal(log(2), 1); + + + sigma ~ lognormal(log(0.05), 1); + + Ypred = sld( + Tobs, + ind_b[ind_index], + ind_s[ind_index], + ind_g[ind_index], + ind_phi[ind_index] + ); + + target += normal_lpdf(Yobs | Ypred, Ypred .* sigma); +} + +generated quantities { + real pop_mean_b = exp(mu_b + sigma_b^2/2); + real pop_mean_s = exp(mu_s + sigma_s^2/2); + real pop_mean_g = exp(mu_g + sigma_g^2/2); +} + diff --git a/design/debug-gsf/compare/model.R b/design/debug-gsf/compare/model.R new file mode 100644 index 00000000..4fb41284 --- /dev/null +++ b/design/debug-gsf/compare/model.R @@ -0,0 +1,381 @@ + + +set.seed(32130) +library(dplyr) +library(tibble) +library(tidyr) +library(cmdstanr) +library(bayesplot) +library(ggplot2) +library(here) + + +gsf_sld <- function(time, b, s, g, phi) { + b * (phi * exp(-s * time) + (1 - phi) * exp(g * time)) +} + + +################################# +# +# Check Prior densitys +# + + +x <- rlnorm(1000, log(60), 0.05) +plot(density(x)) + + +x <- rlnorm(500, log(0.4), 0.6) +plot(density(x)) + + +x <- rbeta(500, 3, 3) +plot(density(x)) + + + + + +################################# +# +# Setup test data +# + +n <- 50 +baseline <- tibble( + pt = factor(sprintf("pt_%04i", 1:n)), + b = rlnorm(n, log(70), 0.6), + s = rlnorm(n, log(0.6), 0.6), + g = rlnorm(n, log(0.2), 0.6), + phi = rbeta(n, 2, 2), + sigma = 0.08 +) + + +grid_df <- tidyr::expand_grid( + pt = baseline$pt, + time = seq(0, 5, length.out = 4) +) + +dat_lm <- grid_df |> + left_join(baseline, by = "pt") |> + mutate(msld = gsf_sld(time, b, s, g, phi)) |> + mutate(sld = rnorm(nrow(grid_df), msld, msld * sigma)) + + +################################# +# +# Data Diagnositics (does it look sensible) +# + + +pdat <- baseline |> + select(b, g, s, phi) |> + gather("key", "var") + +ggplot(data = pdat, aes(x = var)) + + geom_density() + + theme_bw() + + facet_wrap(~key, scales = "free") + +selected_pts <- sample(baseline$pt, 8) +ggplot( + data = dat_lm |> filter(pt %in% selected_pts), + aes(x = time, y = sld, group = pt, color = pt) +) + + geom_point() + + geom_line() + + theme_bw() + + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) + + +################################# +# +# Model fitting - Common Settings +# + + +get_samples <- function(mod, init_vals) { + stan_data <- list( + N = nrow(dat_lm), + Nind = nrow(baseline), + ind_index = as.numeric(dat_lm$pt), + Yobs = dat_lm$sld, + Tobs = dat_lm$time + ) + nchains <- 3 + + mod$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + refresh = 200, + iter_warmup = 600, + init = lapply(1:nchains, \(...) init_vals), + iter_sampling = 600 + ) +} + + + +################################# +# +# Model fitting - Centralised +# + + +init_vals <- list( + ind_b = rep(60, n), + ind_s = rep(0.6, n), + ind_g = rep(0.2, n), + ind_phi = rep(0.5, n), + mu_b = 60, + mu_s = 0.6, + mu_g = 0.2, + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + a_phi = 2, + b_phi = 2, + sigma = 0.05 +) + + +mod_central <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "compare", "mod_central.stan")), + exe_file = file.path(here("local", "models", "mod_central")) +) + +fit_central <- get_samples(mod_central, init_vals) + +pars_cent <- c( "sigma", "pop_mean_b", "pop_mean_g", "pop_mean_s") + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) +fit_central$summary(variables = pars_cent) + + + + + + +################################# +# +# Model fitting - De-Centralised no-log +# + + +init_vals <- list( + raw_b = rep(0, n), + raw_s = rep(0, n), + raw_g = rep(0, n), + mu_b = log(60), + mu_s = log(0.6), + mu_g = log(0.2), + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + ind_phi = rep(0.5, n), + a_phi = 2, + b_phi = 2, + sigma = 0.05 +) + + +mod_dcent_nl <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "compare", "mod_dcent_nl.stan")), + exe_file = file.path(here("local", "models", "mod_dcent_nl")) +) + +fit_dcent_nl <- get_samples(mod_dcent_nl, init_vals) + +pars_dcent_nl <- c("sigma", "pop_mean_b", "pop_mean_g", "pop_mean_s") + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) +fit_dcent_nl$summary(variables = pars_dcent_nl) + +fit_dcent_nl + + + + +################################# +# +# Model fitting - De-Centralised log +# + + +init_vals <- list( + raw_b = rep(0, n), + raw_s = rep(0, n), + raw_g = rep(0, n), + mu_b = 60, + mu_s = 0.6, + mu_g = 0.2, + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + ind_phi = rep(0.5, n), + a_phi = 2, + b_phi = 2, + sigma = 0.05 +) + + +mod_dcent_lg <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "compare", "mod_dcent_lg.stan")), + exe_file = file.path(here("local", "models", "mod_dcent_lg")) +) + +fit_dcent_lg <- get_samples(mod_dcent_lg, init_vals) + +pars_dcent_lg <- c("sigma", "pop_mean_b", "pop_mean_g", "pop_mean_s") + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) +fit_dcent_lg$summary(variables = pars_dcent_lg) + + + + + + +# 12 seconds +# ess ~100 + +################################# +# +# Diagnostics +# + +pars <- c( + "mu_b", "mu_s", "mu_g", "mu_phi" +) + + +lp_cp <- log_posterior(fit) +np_cp <- nuts_params(fit) + +samps <- fit$draws() +mcmc_pairs(samps, np = np_cp, pars = pars) +mcmc_trace(samps, pars = pars) + + + + + + + + + + + + + + + + +################################# +# +# Applying model to package sim data +# + +devtools::load_all(export = FALSE, path = "../../..") + +set.seed(4513) +jlist <- simulate_joint_data( + .debug = TRUE, + n_arm = c(50), + times = seq(0, 3, by = (1/365)/2), + lambda_cen = 1 / 9000, + beta_cat = c( + "A" = 0, + "B" = -0.1, + "C" = 0.5 + ), + beta_cont = 0.3, + lm_fun = sim_lm_gsf( + sigma = 0.01, + mu_s = c(0.6), + mu_g = c(0.3), + mu_b = 60, + omega_b = 0.2, + omega_s = 0.2, + omega_g = 0.2, + a_phi = 6, + b_phi = 8 + ), + os_fun = sim_os_exponential( + lambda = 1 / (400 / 365) + ) +) + +set.seed(333) +select_times <- sample(jlist$os$time, 5) +# select_times <- seq(1, 2000, by = 30) + +dlm <- jlist$lm |> + dplyr::filter(time %in% select_times) |> + dplyr::arrange(time, pt) |> + dplyr::mutate(time = time) + + +dlm |> + summarise(across(c("psi_b", "psi_g", "psi_s", "psi_phi"), mean)) + + +mod_dcent_nl <- cmdstan_model( + stan_file = file.path(here("design", "debug-gsf", "compare", "mod_dcent_nl.stan")), + exe_file = file.path(here("local", "models", "mod_dcent_nl")) +) + +init_vals <- list( + raw_b = rep(0, length(unique(dlm$pt))), + raw_s = rep(0, length(unique(dlm$pt))), + raw_g = rep(0, length(unique(dlm$pt))), + mu_b = log(60), + mu_s = log(0.6), + mu_g = log(0.2), + sigma_b = 0.3, + sigma_s = 0.3, + sigma_g = 0.3, + ind_phi = rep(0.5, length(unique(dlm$pt))), + a_phi = 2, + b_phi = 2, + sigma = 0.05 +) + + +stan_data <- list( + N = nrow(dlm), + Nind = length(unique(dlm$pt)), + ind_index = as.numeric(factor(dlm$pt)), + Yobs = dlm$sld, + Tobs = dlm$time +) +nchains <- 3 + +res <- mod_dcent_nl$sample( + data = stan_data, + chains = nchains, + parallel_chains = nchains, + refresh = 200, + iter_warmup = 4000, + iter_sampling = 10000, + init = lapply(1:nchains, \(...) init_vals), +) + +res +res$metadata() + + + + +fit_dcent_nl <- get_samples(mod_dcent_nl, init_vals) + +pars_dcent_nl <- c("sigma", "pop_mean_b", "pop_mean_g", "pop_mean_s") + +baseline |> summarise(across(c("b", "g", "s", "phi", "sigma"), mean)) + + + diff --git a/design/tests/gsf-no-link.R b/design/tests/gsf-no-link.R index 6fc55f99..d33393d0 100644 --- a/design/tests/gsf-no-link.R +++ b/design/tests/gsf-no-link.R @@ -6,7 +6,7 @@ library(cmdstanr) # devtools::install_git("https://github.com/stan-dev/cmdstanr") -devtools::document() +# devtools::document() devtools::load_all(export_all = FALSE) options("jmpost.cache.dir" = file.path("local", "models")) @@ -15,11 +15,41 @@ options("jmpost.cache.dir" = file.path("local", "models")) + ## Generate Test data with known parameters ## (based on internal data, time in years) +# set.seed(7143) +# jlist <- simulate_joint_data( +# .debug = TRUE, +# n_arm = c(70, 90), +# times = seq(0, 3, by = (1/365)/2), +# lambda_cen = 1 / 9000, +# beta_cat = c( +# "A" = 0, +# "B" = -0.1, +# "C" = 0.5 +# ), +# beta_cont = 0.3, +# lm_fun = sim_lm_gsf( +# sigma = 0.01, +# mu_s = c(0.6), +# mu_g = c(0.3), +# mu_b = 60, +# omega_b = 0.2, +# omega_s = 0.2, +# omega_g = 0.2, +# a_phi = 6, +# b_phi = 8 +# ), +# os_fun = sim_os_exponential( +# lambda = 1 / (400 / 365) +# ) +# ) +set.seed(7143) jlist <- simulate_joint_data( - n_arm = c(80, 80), - times = seq(0, 4, by = (1/365)/2), + .debug = TRUE, + n_arm = c(85, 100), + times = seq(0, 3, by = (1/365)/2), lambda_cen = 1 / 9000, beta_cat = c( "A" = 0, @@ -27,65 +57,31 @@ jlist <- simulate_joint_data( "C" = 0.5 ), beta_cont = 0.3, - lm_fun = sim_lm_gsf( - sigma = 0.005, - mu_s = c(0.25, 0.35), - mu_g = c(0.15, 0.25), - mu_phi = c(0.4, 0.6), - mu_b = 60, - omega_b = 0.1, - omega_s = 0.1, - omega_g = 0.1, - omega_phi = 0.2 - ), - os_fun = sim_os_exponential( - lambda = 1 / 400 - ) + lm_fun = sim_lm_gsf(link_dsld = 0.1, link_ttg = 0.2), + os_fun = sim_os_exponential(1 / (400 / 365)) ) +set.seed(333) +select_times <- sample(jlist$lm$time, 5) -## Generate Test data with known parameters -## (based on Kerioui time in days) -jlist <- simulate_joint_data( - n_arm = c(80), - times = seq(0, 900), - lambda_cen = 1 / 9000, - beta_cat = c( - "A" = 0, - "B" = -0.1, - "C" = 0.5 - ), - beta_cont = 0.3, - lm_fun = sim_lm_gsf( - sigma = 0.025, - mu_s = c(0.007), - mu_g = c(0.001), - mu_phi = c(0.2), - mu_b = 60, - omega_b = 0.51, - omega_s = 0.51, - omega_g = 0.51, - omega_phi = 0.51 - ), - os_fun = sim_os_exponential( - lambda = 1 / 400 - ) -) ## Extract data to individual datasets dat_os <- jlist$os -select_times <- seq(1, 600, by = 50) -# select_times <- seq(1, 2000, by = 30) dat_lm <- jlist$lm |> dplyr::filter(time %in% select_times) |> dplyr::arrange(time, pt) |> dplyr::mutate(time = time) +dat_lm |> + dplyr::group_by(arm) |> + dplyr::summarise(across(c("psi_b", "psi_g", "psi_s", "psi_phi"), mean)) + + # mean(dat_os$time) # mean(dat_os$event) @@ -97,23 +93,33 @@ ggplot(data = dat_lm |> dplyr::filter(pt %in% pnam)) + geom_line(aes(x = time, y = sld, col =pt, group =pt)) + theme_bw() +param <- dat_lm |> + select(arm, psi_b, psi_s, psi_g, psi_phi) |> + gather("KEY", "VAR", -arm) + +ggplot(data = param, aes(x = VAR, group = arm, col = arm)) + + geom_density() + + theme_bw() + + facet_wrap(~KEY, scales = "free") jm <- JointModel( longitudinal = LongitudinalGSF( - - mu_bsld = prior_lognormal(log(60), 0.6), - mu_ks = prior_lognormal(log(0.007), 0.6), - mu_kg = prior_lognormal(log(0.001), 0.6), - mu_phi = prior_beta(7, 10), - - omega_bsld = prior_lognormal(log(0.5), 0.6), - omega_ks = prior_lognormal(log(0.5), 0.6), - omega_kg = prior_lognormal(log(0.5), 0.6), - omega_phi = prior_lognormal(log(0.5), 0.6), - - sigma = prior_lognormal(log(0.03), 0.6) - ) + mu_bsld = prior_normal(log(60), 1), + mu_ks = prior_normal(log(0.6), 1), + mu_kg = prior_normal(log(0.3), 1), + omega_bsld = prior_lognormal(log(0.2), 1), + omega_ks = prior_lognormal(log(0.2), 1), + omega_kg = prior_lognormal(log(0.2), 1), + a_phi = prior_lognormal(log(6), 1), + b_phi = prior_lognormal(log(8), 1), + sigma = prior_lognormal(log(0.01), 1), + centered = FALSE + ), + survival = SurvivalExponential( + lambda = prior_lognormal(log(1 / (400 / 365)), 1) + ), + link = LinkGSF() ) @@ -133,41 +139,55 @@ jdat <- DataJoint( arm = "arm", study = "study" ), + survival = DataSurvival( + data = dat_os, + formula = Surv(time, event) ~ cov_cat + cov_cont + ), longitudinal = DataLongitudinal( data = dat_lm, formula = sld ~ time, - threshold = 5 + threshold = -999 ) ) + +# jmpost:::initialValues(jm) + + ## Sample from JointModel mp <- sampleStanModel( jm, data = jdat, - iter_sampling = 500, - iter_warmup = 1000, - chains = 1, - parallel_chains = 1 + iter_warmup = 600, + iter_sampling = 1000, + refresh = 200, + chains = 3, + parallel_chains = 3 ) +summary_post <- function(model, vars, exp = FALSE) { + dat <- model$summary( + vars, + mean = mean, + q01 = \(x) purrr::set_names(quantile(x, 0.01), ""), + q99 = \(x) purrr::set_names(quantile(x, 0.99), ""), + rhat = posterior::rhat, + ess_bulk = posterior::ess_bulk, + ess_tail = posterior::ess_tail + ) + if (exp) { + dat$q01 <- dat$q01 |> exp() + dat$q99 <- dat$q99 |> exp() + dat$mean <- dat$mean |> exp() + } + dat +} -vars <- c( - "lm_gsf_mu_bsld", # 60 - "lm_gsf_mu_phi", # 0.4 0.6 - "lm_gsf_mu_kg", # 0.15 0.2 - "lm_gsf_mu_ks", # 0.2 0.25 - "lm_gsf_sigma", # 0.003 - "lm_gsf_omega_bsld", # 0.1 - "lm_gsf_omega_kg", # 0.1 - "lm_gsf_omega_phi", # 0.1 - "lm_gsf_omega_ks" # 0.1 -) - - +summary_post(mp@results, c("lm_gsf_mu_bsld", "lm_gsf_mu_kg", "lm_gsf_mu_ks"), TRUE) +summary_post(mp@results, c("lm_gsf_beta", "lm_gsf_gamma", "lm_gsf_a_phi", "lm_gsf_b_phi")) -mp@results$summary(vars) diff --git a/inst/WORDLIST b/inst/WORDLIST index a3219574..ccdcf4df 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -111,4 +111,7 @@ le lt gt mathbb +gk +xk +LogNormal AST diff --git a/inst/stan/lm-gsf/model.stan b/inst/stan/lm-gsf/model.stan index 40b2cf22..cdb35f5b 100755 --- a/inst/stan/lm-gsf/model.stan +++ b/inst/stan/lm-gsf/model.stan @@ -7,25 +7,31 @@ parameters{ // Source - lm-gsf/model.stan // - // Population parameters. - vector[n_studies] lm_gsf_mu_bsld; - vector[n_arms] lm_gsf_mu_ks; - vector[n_arms] lm_gsf_mu_kg; - vector[n_arms] lm_gsf_mu_phi; + vector[n_studies] lm_gsf_mu_bsld; + vector[n_arms] lm_gsf_mu_ks; + vector[n_arms] lm_gsf_mu_kg; real lm_gsf_omega_bsld; real lm_gsf_omega_ks; real lm_gsf_omega_kg; - real lm_gsf_omega_phi; - // Standard deviation for RUV. - real lm_gsf_sigma; - - // Random effects. +{% if centered -%} + vector[Nind] lm_gsf_psi_bsld; + vector[Nind] lm_gsf_psi_ks; + vector[Nind] lm_gsf_psi_kg; +{% else -%} vector[Nind] lm_gsf_eta_tilde_bsld; vector[Nind] lm_gsf_eta_tilde_ks; vector[Nind] lm_gsf_eta_tilde_kg; - vector[Nind] lm_gsf_eta_tilde_phi; +{%- endif -%} + + // Phi Parameters + vector[Nind] lm_gsf_psi_phi; + vector[n_arms] lm_gsf_a_phi; + vector[n_arms] lm_gsf_b_phi; + + // Standard deviation of the error term + real lm_gsf_sigma; } @@ -38,22 +44,17 @@ transformed parameters{ // Source - lm-gsf/model.stan // - // Non-centered reparametrization for hierarchical models. - vector[Nind] lm_gsf_psi_bsld = exp( - log(lm_gsf_mu_bsld[pt_study_index]) + lm_gsf_eta_tilde_bsld * lm_gsf_omega_bsld - ); - - vector[Nind] lm_gsf_psi_ks = exp( - log(lm_gsf_mu_ks[pt_arm_index]) + lm_gsf_eta_tilde_ks * lm_gsf_omega_ks +{% if not centered -%} + vector[Nind] lm_gsf_psi_bsld = exp( + lm_gsf_mu_bsld[pt_study_index] + (lm_gsf_eta_tilde_bsld * lm_gsf_omega_bsld) ); - - vector[Nind] lm_gsf_psi_kg = exp( - log(lm_gsf_mu_kg[pt_arm_index]) + lm_gsf_eta_tilde_kg * lm_gsf_omega_kg + vector[Nind] lm_gsf_psi_ks = exp( + lm_gsf_mu_ks[pt_arm_index] + (lm_gsf_eta_tilde_ks * lm_gsf_omega_ks) ); - - vector[Nind] lm_gsf_psi_phi = inv_logit( - logit(lm_gsf_mu_phi[pt_arm_index]) + lm_gsf_eta_tilde_phi * lm_gsf_omega_phi + vector[Nind] lm_gsf_psi_kg = exp( + lm_gsf_mu_kg[pt_arm_index] + (lm_gsf_eta_tilde_kg * lm_gsf_omega_kg) ); +{%- endif -%} vector[Nta_total] Ypred; @@ -65,8 +66,6 @@ transformed parameters{ lm_gsf_psi_phi[ind_index] ); - - // Reverse implementation from Rstan helper function log_lik += csr_matrix_times_vector( Nind, Nta_obs_y, @@ -97,6 +96,19 @@ transformed parameters{ } +model { + // + // Source - lm-gsf/model.stan + // +{% if centered %} + lm_gsf_psi_bsld ~ lognormal(lm_gsf_mu_bsld[pt_study_index], lm_gsf_omega_bsld); + lm_gsf_psi_ks ~ lognormal(lm_gsf_mu_ks[pt_arm_index], lm_gsf_omega_ks); + lm_gsf_psi_kg ~ lognormal(lm_gsf_mu_kg[pt_arm_index], lm_gsf_omega_kg); +{%- endif -%} + lm_gsf_psi_phi ~ beta(lm_gsf_a_phi[pt_arm_index], lm_gsf_b_phi[pt_arm_index]); +} + + generated quantities { // // Source - lm-gsf/model.stan diff --git a/man/LongitudinalGSF-class.Rd b/man/LongitudinalGSF-class.Rd index 078286e5..bb384114 100644 --- a/man/LongitudinalGSF-class.Rd +++ b/man/LongitudinalGSF-class.Rd @@ -8,15 +8,20 @@ \title{\code{LongitudinalGSF}} \usage{ LongitudinalGSF( - mu_bsld = prior_lognormal(log(55), 5, init = 55), - mu_ks = prior_lognormal(log(0.1), 0.5, init = 0.1), - mu_kg = prior_lognormal(log(0.1), 1, init = 0.1), - mu_phi = prior_beta(2, 8, init = 0.2), - omega_bsld = prior_lognormal(log(0.1), 1, init = 0.1), - omega_ks = prior_lognormal(log(0.1), 1, init = 0.1), - omega_kg = prior_lognormal(log(0.1), 1, init = 0.1), - omega_phi = prior_lognormal(log(0.1), 1, init = 0.1), - sigma = prior_lognormal(log(0.1), 0.8, init = 0.1) + mu_bsld = prior_normal(log(60), 1, init = 60), + mu_ks = prior_normal(log(0.5), 1, init = 0.5), + mu_kg = prior_normal(log(0.3), 1, init = 0.3), + omega_bsld = prior_lognormal(log(0.2), 1, init = 0.2), + omega_ks = prior_lognormal(log(0.2), 1, init = 0.2), + omega_kg = prior_lognormal(log(0.2), 1, init = 0.2), + a_phi = prior_lognormal(log(5), 1, init = 5), + b_phi = prior_lognormal(log(5), 1, init = 5), + sigma = prior_lognormal(log(0.1), 1, init = 0.1), + psi_bsld = prior_none(init = 60), + psi_ks = prior_none(init = 0.5), + psi_kg = prior_none(init = 0.5), + psi_phi = prior_none(init = 0.5), + centered = FALSE ) } \arguments{ @@ -26,17 +31,31 @@ LongitudinalGSF( \item{mu_kg}{(\code{Prior})\cr for the mean growth rate \code{mu_kg}.} -\item{mu_phi}{(\code{Prior})\cr for the mean shrinkage proportion \code{mu_phi}.} - \item{omega_bsld}{(\code{Prior})\cr for the baseline value standard deviation \code{omega_bsld}.} \item{omega_ks}{(\code{Prior})\cr for the shrinkage rate standard deviation \code{omega_ks}.} \item{omega_kg}{(\code{Prior})\cr for the growth rate standard deviation \code{omega_kg}.} -\item{omega_phi}{(\code{Prior})\cr for the shrinkage proportion standard deviation \code{omega_phi}.} +\item{a_phi}{(\code{Prior})\cr for the alpha parameter for the fraction of cells that respond to treatment.} + +\item{b_phi}{(\code{Prior})\cr for the beta parameter for the fraction of cells that respond to treatment.} \item{sigma}{(\code{Prior})\cr for the variance of the longitudinal values \code{sigma}.} + +\item{psi_bsld}{(\code{Prior})\cr for the baseline value random effect \code{psi_bsld}. Only used in the +centered parameterization to set the initial value.} + +\item{psi_ks}{(\code{Prior})\cr for the shrinkage rate random effect \code{psi_ks}. Only used in the +centered parameterization to set the initial value.} + +\item{psi_kg}{(\code{Prior})\cr for the growth rate random effect \code{psi_kg}. Only used in the +centered parameterization to set the initial value.} + +\item{psi_phi}{(\code{Prior})\cr for the shrinkage proportion random effect \code{psi_phi}. Only used in the +centered parameterization to set the initial value.} + +\item{centered}{(\code{logical})\cr whether to use the centered parameterization.} } \description{ This class extends the general \code{\link{LongitudinalModel}} class for using the diff --git a/man/sim_lm_gsf.Rd b/man/sim_lm_gsf.Rd index 1407b823..1ce262f2 100644 --- a/man/sim_lm_gsf.Rd +++ b/man/sim_lm_gsf.Rd @@ -6,14 +6,14 @@ \usage{ sim_lm_gsf( sigma = 0.01, - mu_s = c(3, 4), - mu_g = c(0.2, 0.3), - mu_phi = c(0.1, 0.2), - mu_b = 50, - omega_b = 0.135, - omega_s = 0.15, - omega_g = 0.225, - omega_phi = 0.75, + mu_s = c(0.6, 0.4), + mu_g = c(0.25, 0.35), + mu_b = 60, + a_phi = c(4, 6), + b_phi = c(4, 6), + omega_b = 0.2, + omega_s = 0.2, + omega_g = 0.2, link_dsld = 0, link_ttg = 0, link_identity = 0 @@ -26,18 +26,18 @@ sim_lm_gsf( \item{mu_g}{(\code{numeric})\cr the mean growth rates for the two treatment arms.} -\item{mu_phi}{(\code{numeric})\cr the mean shrinkage proportions for the two treatment arms.} - \item{mu_b}{(\code{numeric})\cr the mean baseline values for the two treatment arms.} +\item{a_phi}{(\code{number})\cr the alpha parameter for the fraction of cells that respond to treatment.} + +\item{b_phi}{(\code{number})\cr the beta parameter for the fraction of cells that respond to treatment.} + \item{omega_b}{(\code{number})\cr the baseline value standard deviation.} \item{omega_s}{(\code{number})\cr the shrinkage rate standard deviation.} \item{omega_g}{(\code{number})\cr the growth rate standard deviation.} -\item{omega_phi}{(\code{number})\cr the shrinkage proportion standard deviation.} - \item{link_dsld}{(\code{number})\cr the link coefficient for the derivative contribution.} \item{link_ttg}{(\code{number})\cr the link coefficient for the time-to-growth contribution.} diff --git a/tests/testthat/_snaps/JointModel.md b/tests/testthat/_snaps/JointModel.md index ea4b0ee8..f2fd02ab 100644 --- a/tests/testthat/_snaps/JointModel.md +++ b/tests/testthat/_snaps/JointModel.md @@ -63,19 +63,19 @@ Longitudinal: Generalized Stein-Fojo Longitudinal Model with parameters: - lm_gsf_mu_bsld ~ lognormal(mu = 4.00733318523247, sigma = 5) - lm_gsf_mu_ks ~ lognormal(mu = -2.30258509299405, sigma = 0.5) - lm_gsf_mu_kg ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_mu_phi ~ beta(a = 2, b = 8) - lm_gsf_omega_bsld ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_ks ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_kg ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_phi ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_sigma ~ lognormal(mu = -2.30258509299405, sigma = 0.8) + lm_gsf_mu_bsld ~ normal(mu = 4.09434, sigma = 1) + lm_gsf_mu_ks ~ normal(mu = -0.69315, sigma = 1) + lm_gsf_mu_kg ~ normal(mu = -1.20397, sigma = 1) + lm_gsf_omega_bsld ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_omega_ks ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_omega_kg ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_a_phi ~ lognormal(mu = 1.60944, sigma = 1) + lm_gsf_b_phi ~ lognormal(mu = 1.60944, sigma = 1) + lm_gsf_psi_phi ~ + lm_gsf_sigma ~ lognormal(mu = -2.30259, sigma = 1) lm_gsf_eta_tilde_bsld ~ std_normal() lm_gsf_eta_tilde_ks ~ std_normal() lm_gsf_eta_tilde_kg ~ std_normal() - lm_gsf_eta_tilde_phi ~ std_normal() Link: No Link diff --git a/tests/testthat/_snaps/LongitudinalGSF.md b/tests/testthat/_snaps/LongitudinalGSF.md index 4161de3c..a6afe95d 100644 --- a/tests/testthat/_snaps/LongitudinalGSF.md +++ b/tests/testthat/_snaps/LongitudinalGSF.md @@ -6,41 +6,41 @@ Output Generalized Stein-Fojo Longitudinal Model with parameters: - lm_gsf_mu_bsld ~ lognormal(mu = 4.00733318523247, sigma = 5) - lm_gsf_mu_ks ~ lognormal(mu = -2.30258509299405, sigma = 0.5) - lm_gsf_mu_kg ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_mu_phi ~ beta(a = 2, b = 8) - lm_gsf_omega_bsld ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_ks ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_kg ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_phi ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_sigma ~ lognormal(mu = -2.30258509299405, sigma = 0.8) + lm_gsf_mu_bsld ~ normal(mu = 4.09434, sigma = 1) + lm_gsf_mu_ks ~ normal(mu = -0.69315, sigma = 1) + lm_gsf_mu_kg ~ normal(mu = -1.20397, sigma = 1) + lm_gsf_omega_bsld ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_omega_ks ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_omega_kg ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_a_phi ~ lognormal(mu = 1.60944, sigma = 1) + lm_gsf_b_phi ~ lognormal(mu = 1.60944, sigma = 1) + lm_gsf_psi_phi ~ + lm_gsf_sigma ~ lognormal(mu = -2.30259, sigma = 1) lm_gsf_eta_tilde_bsld ~ std_normal() lm_gsf_eta_tilde_ks ~ std_normal() lm_gsf_eta_tilde_kg ~ std_normal() - lm_gsf_eta_tilde_phi ~ std_normal() --- Code - x <- LongitudinalGSF(sigma = prior_normal(0, 1), mu_phi = prior_gamma(2, 1)) + x <- LongitudinalGSF(sigma = prior_normal(0, 1), mu_kg = prior_gamma(2, 1)) print(x) Output Generalized Stein-Fojo Longitudinal Model with parameters: - lm_gsf_mu_bsld ~ lognormal(mu = 4.00733318523247, sigma = 5) - lm_gsf_mu_ks ~ lognormal(mu = -2.30258509299405, sigma = 0.5) - lm_gsf_mu_kg ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_mu_phi ~ gamma(alpha = 2, beta = 1) - lm_gsf_omega_bsld ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_ks ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_kg ~ lognormal(mu = -2.30258509299405, sigma = 1) - lm_gsf_omega_phi ~ lognormal(mu = -2.30258509299405, sigma = 1) + lm_gsf_mu_bsld ~ normal(mu = 4.09434, sigma = 1) + lm_gsf_mu_ks ~ normal(mu = -0.69315, sigma = 1) + lm_gsf_mu_kg ~ gamma(alpha = 2, beta = 1) + lm_gsf_omega_bsld ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_omega_ks ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_omega_kg ~ lognormal(mu = -1.60944, sigma = 1) + lm_gsf_a_phi ~ lognormal(mu = 1.60944, sigma = 1) + lm_gsf_b_phi ~ lognormal(mu = 1.60944, sigma = 1) + lm_gsf_psi_phi ~ lm_gsf_sigma ~ normal(mu = 0, sigma = 1) lm_gsf_eta_tilde_bsld ~ std_normal() lm_gsf_eta_tilde_ks ~ std_normal() lm_gsf_eta_tilde_kg ~ std_normal() - lm_gsf_eta_tilde_phi ~ std_normal() diff --git a/tests/testthat/_snaps/SurvivalLoglogistic.md b/tests/testthat/_snaps/SurvivalLoglogistic.md index 80cdacd9..d0cd51f6 100644 --- a/tests/testthat/_snaps/SurvivalLoglogistic.md +++ b/tests/testthat/_snaps/SurvivalLoglogistic.md @@ -6,7 +6,7 @@ Output Log-Logistic Survival Model with parameters: - sm_logl_lambda ~ lognormal(mu = -2.30258509299405, sigma = 5) + sm_logl_lambda ~ lognormal(mu = -2.30259, sigma = 5) sm_logl_p ~ gamma(alpha = 2, beta = 5) beta_os_cov ~ normal(mu = 0, sigma = 5) @@ -20,7 +20,7 @@ Output Log-Logistic Survival Model with parameters: - sm_logl_lambda ~ lognormal(mu = -2.30258509299405, sigma = 5) + sm_logl_lambda ~ lognormal(mu = -2.30259, sigma = 5) sm_logl_p ~ cauchy(mu = 0, sigma = 1) beta_os_cov ~ gamma(alpha = 3, beta = 4) diff --git a/tests/testthat/helper-setup.R b/tests/testthat/helper-setup.R index 7e0c5458..837985aa 100644 --- a/tests/testthat/helper-setup.R +++ b/tests/testthat/helper-setup.R @@ -4,3 +4,8 @@ CACHE_DIR <- if (Sys.getenv("JMPOST_CACHE_DIR") == "") { } else { Sys.getenv("JMPOST_CACHE_DIR") } + + +is_full_test <- function() { + toupper(Sys.getenv("JMPOST_FULL_TEST")) == "TRUE" +} diff --git a/tests/testthat/test-LongitudinalGSF.R b/tests/testthat/test-LongitudinalGSF.R index 46d6397e..948b7679 100644 --- a/tests/testthat/test-LongitudinalGSF.R +++ b/tests/testthat/test-LongitudinalGSF.R @@ -5,19 +5,89 @@ test_that("LongitudinalGSF works as expected with default arguments", { expect_s4_class(result, "LongitudinalGSF") }) -test_that("LongitudinalGSF works as expected with a single study", { - set.seed(123) - jlist <- suppressMessages(simulate_joint_data( - times = 0:2000, - lm_fun = sim_lm_random_slope(), - os_fun = sim_os_exponential(1 / 200) + + +test_that("Print method for LongitudinalGSF works as expected", { + + expect_snapshot({ + x <- LongitudinalGSF() + print(x) + }) + + expect_snapshot({ + x <- LongitudinalGSF( + sigma = prior_normal(0, 1), + mu_kg = prior_gamma(2, 1) + ) + print(x) + }) +}) + + +test_that("Centralised parameterisation compiles without issues", { + jm <- JointModel(longitudinal = LongitudinalGSF(centered = TRUE)) + expect_false(any( + c("lm_gsf_eta_tilde_kg", "lm_gsf_eta_tilde_bsld") %in% names(jm@parameters) + )) + expect_true(all( + c("lm_gsf_psi_kg", "lm_gsf_psi_bsld") %in% names(jm@parameters) )) + compileStanModel(jm) +}) + + +test_that("Non-Centralised parameterisation compiles without issues", { + jm <- JointModel(longitudinal = LongitudinalGSF(centered = FALSE)) + expect_true(all( + c("lm_gsf_eta_tilde_kg", "lm_gsf_eta_tilde_bsld") %in% names(jm@parameters) + )) + expect_false(any( + c("lm_gsf_psi_kg", "lm_gsf_psi_bsld") %in% names(jm@parameters) + )) + compileStanModel(jm) +}) + + +test_that("Can recover known distributional parameters from a full GSF joint model", { + + skip_if_not(is_full_test()) + + set.seed(7143) + jlist <- simulate_joint_data( + .debug = TRUE, + n_arm = c(80, 100), + times = seq(0, 3, by = (1 / 365) / 2), + lambda_cen = 1 / 9000, + beta_cat = c( + "A" = 0, + "B" = -0.1, + "C" = 0.5 + ), + beta_cont = 0.3, + lm_fun = sim_lm_gsf( + sigma = 0.01, + mu_s = c(0.6, 0.4), + mu_g = c(0.25, 0.35), + mu_b = 60, + a_phi = c(4, 6), + b_phi = c(4, 6), + omega_b = 0.2, + omega_s = 0.2, + omega_g = 0.2, + link_dsld = 0.1, + link_ttg = 0.2, + link_identity = 0 + ), + os_fun = sim_os_exponential(1 / (400 / 365)) + ) + + set.seed(333) + select_times <- sample(jlist$lm$time, 12) dat_os <- jlist$os dat_lm <- jlist$lm |> - dplyr::filter(time %in% c(0, 50, 100, 150, 200, 250, 300)) |> - dplyr::arrange(time, pt) |> - dplyr::filter(observed) + dplyr::filter(time %in% select_times) |> + dplyr::arrange(time, pt) jdat <- DataJoint( subject = DataSubject( @@ -37,37 +107,78 @@ test_that("LongitudinalGSF works as expected with a single study", { ) jm <- JointModel( - link = LinkGSF(), - longitudinal = LongitudinalGSF(), - survival = SurvivalExponential() + longitudinal = LongitudinalGSF( + mu_bsld = prior_normal(log(60), 1), + mu_ks = prior_normal(log(0.6), 1), + mu_kg = prior_normal(log(0.3), 1), + omega_bsld = prior_lognormal(log(0.2), 1), + omega_ks = prior_lognormal(log(0.2), 1), + omega_kg = prior_lognormal(log(0.2), 1), + a_phi = prior_lognormal(log(6), 1), + b_phi = prior_lognormal(log(8), 1), + sigma = prior_lognormal(log(0.01), 1), + centered = TRUE + ), + survival = SurvivalExponential( + lambda = prior_lognormal(log(1 / (400 / 365)), 1) + ), + link = LinkGSF( + link_gsf_dsld(), + link_gsf_ttg() + ) ) mp <- suppressWarnings(sampleStanModel( jm, data = jdat, - iter_sampling = 75, - iter_warmup = 75, - chains = 1, - refresh = 0, - parallel_chains = 1 + iter_warmup = 400, + iter_sampling = 800, + chains = 2, + refresh = 200, + parallel_chains = 2 )) - expect_s4_class(mp, "JointModelSamples") - expect_true(mp@results$summary("lm_gsf_mu_bsld")$ess_tail > 5) -}) + summary_post <- function(model, vars, exp = FALSE) { + no_name_quant <- \(...) { + x <- quantile(...) + names(x) <- NULL + x + } + dat <- model$summary( + vars, + mean = mean, + q01 = \(x) no_name_quant(x, 0.01), + q99 = \(x) no_name_quant(x, 0.99), + rhat = posterior::rhat, + ess_bulk = posterior::ess_bulk, + ess_tail = posterior::ess_tail + ) + if (exp) { + dat$q01 <- dat$q01 |> exp() + dat$q99 <- dat$q99 |> exp() + dat$mean <- dat$mean |> exp() + } + dat + } -test_that("Print method for LongitudinalGSF works as expected", { + dat <- summary_post( + mp@results, + c("lm_gsf_mu_bsld", "lm_gsf_mu_ks", "lm_gsf_mu_kg"), + TRUE + ) - expect_snapshot({ - x <- LongitudinalGSF() - print(x) - }) + true_values <- c(60, 0.6, 0.4, 0.25, 0.35) + expect_true(all(dat$q01 <= true_values)) + expect_true(all(dat$q99 >= true_values)) + expect_true(all(dat$ess_bulk > 100)) - expect_snapshot({ - x <- LongitudinalGSF( - sigma = prior_normal(0, 1), - mu_phi = prior_gamma(2, 1) - ) - print(x) - }) + dat <- summary_post( + mp@results, + c("lm_gsf_beta", "lm_gsf_gamma", "lm_gsf_a_phi", "lm_gsf_b_phi", "sm_exp_lambda") + ) + + true_values <- c(0.1, 0.2, 4, 8, 4, 8, 1 / (1 / (400 / 365))) + expect_true(all(dat$q01 <= true_values)) + expect_true(all(dat$q99 >= true_values)) + expect_true(all(dat$ess_bulk > 100)) }) diff --git a/tests/testthat/test-simulations.R b/tests/testthat/test-simulations.R index 6f1efefc..2f521ad1 100644 --- a/tests/testthat/test-simulations.R +++ b/tests/testthat/test-simulations.R @@ -13,12 +13,12 @@ test_that("sim_lm_gsf works as expected", { sigma = 0.003, mu_s = c(0.2, 0.25), mu_g = c(0.15, 0.2), - mu_phi = c(0.4, 0.6), mu_b = 60, omega_b = 0.1, omega_s = 0.1, omega_g = 0.1, - omega_phi = 0.2, + a_phi = c(4, 6), + b_phi = c(4, 6), link_dsld = 0.5, link_ttg = 1 )) @@ -81,12 +81,12 @@ test_that("simulate_joint_data works as expected", { sigma = 0.003, mu_s = c(0.2, 0.25), mu_g = c(0.15, 0.2), - mu_phi = c(0.4, 0.6), mu_b = 60, omega_b = 0.1, omega_s = 0.1, omega_g = 0.1, - omega_phi = 0.2, + a_phi = c(4, 6), + b_phi = c(4, 6), link_dsld = 0, link_ttg = 0 ), diff --git a/vignettes/statistical-specification.Rmd b/vignettes/statistical-specification.Rmd index 1a430790..3da57fc5 100644 --- a/vignettes/statistical-specification.Rmd +++ b/vignettes/statistical-specification.Rmd @@ -31,6 +31,52 @@ Please note that this document is currently a work-in-progress and does not contain complete information for this package yet. + + +# Longitudinal Model Specification + + +## Generalized Stein-Fojo (GSF) Model + +$$ +\begin{align*} +y_{ij} &\sim \mathcal{N}(SLD_{ij}, SLD_{ij}^2 \sigma^2) \\ \\ +SLD_{ij} &=b_{i} +\left[ + \phi_i e^{-s_{i}t_{ij}}+ + (1-\phi_{i}) e^ {g_{i}t_{ij}} +\right] \\ \\ +b_i &\sim \text{LogNormal}(\mu_{bl(i)}, \omega_b) \\ +s_i &\sim \text{LogNormal}(\mu_{sk(i)}, \omega_s) \\ +g_i &\sim \text{LogNormal}(\mu_{gk(i)}, \omega_g) \\ +\phi &\sim \text{Beta}(a_{\phi k(i)}, b_{\phi k(i)}) +\end{align*} +$$ + +Where: + +* $i$ is the subject index +* $j$ is the visit index +* $y_{ij}$ is the observed tumour measurements +* $SLD_{ij}$ is the expected sum of longest diameter for subject $i$ at time point $j$ +* $t_{ij}$ is the time since first treatment for subject $i$ at visit $j$ +* $b_i$ is the subject baseline SLD measurement +* $s_i$ is the subject kinetics shrinkage parameter +* $g_i$ is the subject kinetics tumour growth parameter +* $\phi_i$ is the subject proportion of cells affected by the treatment +* $k(i)$ is the treatment arm index for subject $i$ +* $l(i)$ is the study index for subject $i$ +* $\mu_{\theta k(i)}$ is the population mean for parameter $\theta$ in group $k(i)$ +* $\omega_{\theta}$ is the population parameter for parameter $\theta$. +* $\eta_{\theta i}$ is a random effects offset on parameter $\theta$ for subject $i$ + + + + + +# Post-Processing + + ## Brier Score The Brier Score is used to measure a models predictive performance. In the case of Survival Models @@ -65,5 +111,9 @@ the censoring distribution). Both of these default options can be changed if req + + + + # References