diff --git a/DESCRIPTION b/DESCRIPTION index a7cce07c..641b779e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,11 @@ Package: greta Type: Package Title: Simple and Scalable Statistical Modelling in R -Version: 0.2.3 -Date: 2018-01-23 +Version: 0.2.4 +Date: 2018-02-20 Authors@R: person("Nick", "Golding", role = c("aut", "cre"), email = "nick.golding.research@gmail.com") -Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see for more information). +Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see for more information). License: Apache License 2.0 URL: https://github.com/greta-dev/greta BugReports: https://github.com/greta-dev/greta/issues @@ -40,6 +40,7 @@ Collate: 'inference.R' 'install_tensorflow.R' 'internals.R' + 'calculate.R' Imports: R6, tensorflow, diff --git a/NAMESPACE b/NAMESPACE index f027caf5..973e47e1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -98,6 +98,7 @@ export(bernoulli) export(beta) export(beta_binomial) export(binomial) +export(calculate) export(categorical) export(cauchy) export(chi_squared) @@ -116,6 +117,7 @@ export(hypergeometric) export(icauchit) export(icloglog) export(ilogit) +export(imultilogit) export(install_tensorflow) export(inverse_gamma) export(iprobit) diff --git a/NEWS b/NEWS index d94d2089..20f8f935 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,33 @@ +### greta 0.2.4 +================ + +Fixes: + +* improved error checking/messages in `model()`, `%*%` +* switched docs and examples to always use `<-` for assignment +* fixed the `n_cores` argument to `model()` + +New functionality: + +* added a `calculate()` function to compute the values of greta arrays conditional on provided values for others +* added `imultilogit()` transform +* added a `chains` argument to `model()` +* improved HMC self-tuning, including a diagonal euclidean metric + +### greta 0.2.3 +================ + +Fixes: + +* fixed breaking change in extraDistr API (caused test errors on CRAN builds) +* added dontrun statements to pass CRAN checks on winbuilder +* fixed breaking change in tensorflow API (1-based indexing) + +New functionality: + +* added `cumsum()` and `cumprod()` functions + + ### greta 0.2.2 ================ diff --git a/R/calculate.R b/R/calculate.R new file mode 100644 index 00000000..f3e53416 --- /dev/null +++ b/R/calculate.R @@ -0,0 +1,219 @@ +# calculating values outside inference + +#' @name calculate +#' @title calculate greta arrays given fixed values +#' @description Calculate the values that greta arrays would take, given +#' temporary values for the greta arrays on which they depend, and return them +#' as numeric R arrays. This can be used to check the behaviour of your model +#' or make predictions to new data after model fitting. +#' +#' @param target a greta array for which to calculate the value +#' @param values a named list giving temporary values of the greta arrays with +#' which \code{target} is connected, or an \code{mcmc.list} object returned by +#' \code{\link{mcmc}}. +#' +#' @return A numeric R array with the same dimensions as \code{target}, giving +#' the values it would take conditioned on the fixed values given by +#' \code{values}. +#' +#' @details The greta arrays named in \code{values} need not be variables, they +#' can also be other operations or even data. +#' +#' At present, if \code{values} is a named list it must contain values for +#' \emph{all} of the variable greta arrays with which \code{target} is +#' connected, even values are given for intermediate operations, or the target +#' doesn't depend on the variable. That may be relaxed in a future release. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' # define a variable greta array, and another that is calculated from it +#' # then calculate what value y would take for different values of x +#' x <- normal(0, 1, dim = 3) +#' a <- lognormal(0, 1) +#' y <- sum(x ^ 2) + a +#' calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2)) +#' +#' +#' # define a model +#' alpha <- normal(0, 1) +#' beta <- normal(0, 1) +#' sigma <- lognormal(1, 0.1) +#' mu <- alpha + iris$Petal.Length * beta +#' distribution(iris$Petal.Width) <- normal(mu, sigma) +#' m <- model(alpha, beta, sigma) +#' +#' # calculate intermediate greta arrays, given some parameter values +#' calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5)) +#' calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5)) +#' +#' +#' # fit the model then calculate samples at a new greta array +#' draws <- mcmc(m, n_samples = 500) +#' petal_length_plot <- seq(min(iris$Petal.Length), +#' max(iris$Petal.Length), +#' length.out = 100) +#' mu_plot <- alpha + petal_length_plot * beta +#' mu_plot_draws <- calculate(mu_plot, draws) +#' +#' # plot the draws +#' mu_est <- colMeans(mu_plot_draws[[1]]) +#' plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]])) +#' apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8)) +#' lines(mu_est ~ petal_length_plot, lwd = 2) +#' } +#' +#' +calculate <- function (target, values) { + + target_name <- deparse(substitute(target)) + + if (!inherits(target, "greta_array")) + stop ("'target' is not a greta array") + + if (inherits(values, "mcmc.list")) + calculate_mcmc.list(target, target_name, values) + else + calculate_list(target, values) + +} + +calculate_mcmc.list <- function (target, target_name, values) { + + model_info <- attr(values, "model_info") + + if (is.null(model_info)) { + stop ("value is an mcmc.list object, but is not associated with any ", + "model information, perhaps it wasn't created with ", + "greta::mcmc() ?", + call. = FALSE) + } + + # copy and refresh the dag + dag <- model_info$model$dag$clone() + dag$tf_environment <- new.env() + dag$tf_graph <- tf$Graph() + + # extend the dag to include this node, as the target + dag$build_dag(list(target)) + self <- dag # mock for scoping + dag$define_tf(log_density = TRUE, gradients = TRUE) + + dag$target_nodes <- list(target$node) + names(dag$target_nodes) <- target_name + + example_values <- dag$trace_values() + n_trace <- length(example_values) + + # raw draws are either an attribute, or this object + model_info <- attr(values, "model_info") + draws <- model_info$raw_draws + + # trace the target for each draw in each chain + trace <- list() + for (i in seq_along(draws)) { + + samples <- apply(draws[[i]], + 1, + function (x) { + dag$send_parameters(x) + dag$trace_values() + }) + + samples <- as.matrix(samples) + + if (ncol(samples) != n_trace) + samples <- t(samples) + + colnames(samples) <- names(example_values) + + trace[[i]] <- coda::mcmc(samples) + + } + + trace <- coda::mcmc.list(trace) + attr(trace, "model_info") <- model_info + # return this + return (trace) + +} + +calculate_list <- function(target, values) { + + # get the values and their names + names <- names(values) + stopifnot(length(names) == length(values)) + + # get the corresponding greta arrays + fixed_greta_arrays <- lapply(names, + get, + envir = parent.frame(n = 2)) + + # make sure that's what they are + are_greta_arrays <- vapply(fixed_greta_arrays, + inherits, + "greta_array", + FUN.VALUE = FALSE) + + stopifnot(all(are_greta_arrays)) + + # make sure the values have the correct dimensions + values <- mapply(assign_dim, + values, + fixed_greta_arrays, + SIMPLIFY = FALSE) + + all_greta_arrays <- c(fixed_greta_arrays, list(target)) + + # define the dag and TF graph + dag <- dag_class$new(all_greta_arrays) + dag$define_tf(log_density = FALSE, gradients = FALSE) + + # build and send a dict for the fixed values + fixed_nodes <- lapply(fixed_greta_arrays, + member, + 'node') + + names(values) <- vapply(fixed_nodes, + dag$tf_name, + FUN.VALUE = "") + + # check that all of the variables are set + # list of variable tf names + variable_nodes <- dag$node_list[dag$node_types == "variable"] + variable_names <- vapply(variable_nodes, + dag$tf_name, + FUN.VALUE = "") + + if (!all(variable_names %in% names(values))) { + stop ("values have not been provided for all variables", + call. = FALSE) + } + + # send it to tf + assign("eval_list", values, envir = dag$tf_environment) + ex <- expression(with_dict <- do.call(dict, eval_list)) + eval(ex, envir = dag$tf_environment) + + # evaluate the target there + ex <- sprintf("sess$run(%s, feed_dict = with_dict)", + dag$tf_name(target$node)) + result <- eval(parse(text = ex), + envir = dag$tf_environment) + + result + +} + +# coerce value to have the correct dimensions +assign_dim <- function (value, greta_array) { + array <- strip_unknown_class(greta_array$node$value()) + if (length(array) != length(value)) { + stop ("a provided value has different number of elements", + " than the greta array", call. = FALSE) + } + array[] <- value + array +} diff --git a/R/dag_class.R b/R/dag_class.R index df88a6d1..5c3d8b9a 100644 --- a/R/dag_class.R +++ b/R/dag_class.R @@ -9,6 +9,7 @@ dag_class <- R6Class( node_types = NA, node_tf_names = NA, tf_environment = NA, + tf_graph = NA, target_nodes = NA, parameters_example = NA, tf_float = NA, @@ -27,12 +28,9 @@ dag_class <- R6Class( # find the nodes we care about self$target_nodes <- lapply(target_greta_arrays, member, 'node') - # stash the node names, types, and tf names - self$node_types <- vapply(self$node_list, node_type, FUN.VALUE = '') - self$node_tf_names <- self$make_names() - - # set up the tf environment + # set up the tf environment, with a graph self$tf_environment <- new.env() + self$tf_graph <- tf$Graph() # stash an example list to relist parameters self$parameters_example <- self$example_parameters(flat = FALSE) @@ -44,6 +42,17 @@ dag_class <- R6Class( }, + # execute an expression on this dag's tensorflow graph + on_graph = function (expr) { + with(self$tf_graph$as_default(), expr) + }, + + # execute an exporession in the tensorflow environment + tf_run = function (expr) { + self$tf_environment$expr <- substitute(expr) + self$on_graph(with(self$tf_environment, eval(expr))) + }, + # return a list of nodes connected to those in the target node list build_dag = function (greta_array_list) { @@ -54,6 +63,10 @@ dag_class <- R6Class( node$register_family(self) } + # stash the node names, types, and tf names + self$node_types <- vapply(self$node_list, node_type, FUN.VALUE = '') + self$node_tf_names <- self$make_names() + }, make_names = function () { @@ -85,7 +98,7 @@ dag_class <- R6Class( }, # define tf graph in environment - define_tf = function () { + define_tf = function (log_density = TRUE, gradients = TRUE) { # temporarily pass float type info to options, so it can be accessed by # nodes on definition, without cluncky explicit passing @@ -107,28 +120,33 @@ dag_class <- R6Class( call. = FALSE) } - # define all nodes, node densities and free states in the environment - lapply(self$node_list, function (x) x$define_tf(self)) + # define all nodes, node densities and free states in the environment, and + # on the graph + self$on_graph(lapply(self$node_list, + function (x) x$define_tf(self))) + # define an overall log density and gradients, plus adjusted versions - self$define_joint_density() - self$define_gradients() + if (log_density) + self$on_graph(self$define_joint_density()) + + if (gradients) + self$on_graph(self$define_gradients()) # use core and compilation options to create a config object - with(self$tf_environment, - {config <- tf$ConfigProto(intra_op_parallelism_threads = self$n_cores, - inter_op_parallelism_threads = self$n_cores)}) + self$tf_environment$n_cores <- self$n_cores + self$tf_run(config <- tf$ConfigProto(inter_op_parallelism_threads = n_cores, + intra_op_parallelism_threads = n_cores)) if (self$compile) { - with(self$tf_environment, - {py_set_attr(config$graph_options$optimizer_options, - 'global_jit_level', - tf$OptimizerOptions$ON_1)}) + self$tf_run(py_set_attr(config$graph_options$optimizer_options, + 'global_jit_level', + tf$OptimizerOptions$ON_1)) } # start a session and initialise all variables - with(self$tf_environment, - {sess <- tf$Session(config = config); sess$run(tf$global_variables_initializer())}) + self$tf_run(sess <- tf$Session(config = config)) + self$tf_run(sess$run(tf$global_variables_initializer())) }, @@ -237,8 +255,7 @@ dag_class <- R6Class( # create a feed dict in the TF environment self$tf_environment$parameters <- parameters - with(self$tf_environment, - parameter_dict <- do.call(dict, parameters)) + self$tf_run(parameter_dict <- do.call(dict, parameters)) }, @@ -246,15 +263,15 @@ dag_class <- R6Class( # variable nodes, with or without applying the jacobian adjustment log_density = function(adjusted = TRUE) { - cleanly(with(self$tf_environment, - sess$run(joint_density_adj, feed_dict = parameter_dict))) + cleanly(self$tf_run(sess$run(joint_density_adj, + feed_dict = parameter_dict))) }, gradients = function (adjusted = TRUE) { - cleanly(with(self$tf_environment, - sess$run(gradients_adj, feed_dict = parameter_dict))) + cleanly(self$tf_run(sess$run(gradients_adj, + feed_dict = parameter_dict))) }, diff --git a/R/distribution.R b/R/distribution.R index c11ccc9c..90c994ec 100644 --- a/R/distribution.R +++ b/R/distribution.R @@ -23,12 +23,12 @@ #' #' # observed data and mean parameter to be estimated #' # (explicitly coerce data to a greta array so we can refer to it later) -#' y = as_data(rnorm(5, 0, 3)) +#' y <- as_data(rnorm(5, 0, 3)) #' -#' mu = uniform(-3, 3) +#' mu <- uniform(-3, 3) #' #' # define the distribution over y (the model likelihood) -#' distribution(y) = normal(mu, 1) +#' distribution(y) <- normal(mu, 1) #' #' # get the distribution over y #' distribution(y) diff --git a/R/extract_replace_combine.R b/R/extract_replace_combine.R index e78086c4..dc8ce94b 100644 --- a/R/extract_replace_combine.R +++ b/R/extract_replace_combine.R @@ -43,7 +43,7 @@ #' @examples #' \dontrun{ #' -#' x = as_data(matrix(1:12, 3, 4)) +#' x <- as_data(matrix(1:12, 3, 4)) #' #' # extract/replace #' x[1:3, ] diff --git a/R/functions.R b/R/functions.R index f8a1bcec..0dbf9187 100644 --- a/R/functions.R +++ b/R/functions.R @@ -86,18 +86,18 @@ #' @examples #' \dontrun{ #' -#' x = as_data(matrix(1:9, nrow = 3, ncol = 3)) -#' a = log(exp(x)) -#' b = log1p(expm1(x)) -#' c = sign(x - 5) -#' d = abs(x - 5) +#' x <- as_data(matrix(1:9, nrow = 3, ncol = 3)) +#' a <- log(exp(x)) +#' b <- log1p(expm1(x)) +#' c <- sign(x - 5) +#' d <- abs(x - 5) #' -#' e = diag(x) +#' e <- diag(x) #' diag(x) <- e + 1 #' -#' z = t(a) +#' z <- t(a) #' -#' y = sweep(x, 1, e, '-') +#' y <- sweep(x, 1, e, '-') #' } NULL diff --git a/R/greta_model_class.R b/R/greta_model_class.R index bb1a45a6..67c2a0ad 100644 --- a/R/greta_model_class.R +++ b/R/greta_model_class.R @@ -41,20 +41,18 @@ NULL #' @return \code{model} - a \code{greta_model} object. #' #' @examples -#' #' \dontrun{ #' #' # define a simple model -#' mu = variable() -#' sigma = lognormal(1, 0.1) -#' x = rnorm(10) -#' distribution(x) = normal(mu, sigma) +#' mu <- variable() +#' sigma <- lognormal(1, 0.1) +#' x <- rnorm(10) +#' distribution(x) <- normal(mu, sigma) #' #' m <- model(mu, sigma) #' #' plot(m) #' } -#' model <- function (..., precision = c('single', 'double'), n_cores = NULL, @@ -83,9 +81,6 @@ model <- function (..., } } - # flush all tensors from the default graph - tf$reset_default_graph() - # nodes required target_greta_arrays <- list(...) @@ -104,6 +99,27 @@ model <- function (..., } + # check they are greta arrays + are_greta_arrays <- vapply(target_greta_arrays, + inherits, "greta_array", + FUN.VALUE = FALSE) + + if (!all(are_greta_arrays)) { + + unexpected_items <- names(target_greta_arrays)[!are_greta_arrays] + + msg <- ifelse(length(unexpected_items) > 1, + "The following objects passed to model() are not greta arrays: ", + "The following object passed to model() is not a greta array: ") + + stop (msg, + paste(unexpected_items, sep = ", "), + call. = FALSE) + + } + + + if (length(target_greta_arrays) == 0) { stop ('could not find any non-data greta arrays', call. = FALSE) diff --git a/R/inference.R b/R/inference.R index 5a61ee4c..70c24043 100644 --- a/R/inference.R +++ b/R/inference.R @@ -17,12 +17,24 @@ stashed_samples <- function () { if (stashed) { + stash <- greta_stash$trace_stash + # get them, remove the NAs, and return - draws <- greta_stash$trace_stash - draws_clean <- na.omit(draws) + draws_clean <- na.omit(stash$trace) draws_prepped <- prepare_draws(draws_clean) + draws_mcmclist <- mcmc.list(draws_prepped) + + # prep the raw model objects + model_info <- new.env() + raw_clean <- na.omit(stash$raw) + raw_prepped <- prepare_draws(raw_clean) + model_info$raw_draws <- mcmc.list(raw_prepped) + model_info$model <- greta_stash$model - return (draws_prepped) + # add the raw draws as an attribute + attr(draws_mcmclist, "model_info") <- model_info + + return (draws_mcmclist) } else { @@ -36,8 +48,12 @@ stashed_samples <- function () { # they abort a run greta_stash <- new.env() -stash_trace <- function (trace) - assign('trace_stash', trace, envir = greta_stash) +stash_trace <- function (trace, raw) { + assign('trace_stash', + list(trace = trace, + raw = raw), + envir = greta_stash) +} #' @rdname inference #' @export @@ -45,22 +61,23 @@ stash_trace <- function (trace) #' @importFrom utils setTxtProgressBar txtProgressBar #' #' @param model greta_model object -#' @param method the method used to sample or optimise values. Currently only +#' @param method method used to sample or optimise values. Currently only #' one method is available for each procedure: \code{hmc} and \code{adagrad} -#' @param n_samples the number of MCMC samples to draw (after any warm-up, but +#' @param n_samples number of MCMC samples to draw (after any warm-up, but #' before thinning) -#' @param thin the MCMC thinning rate; every \code{thin} samples is retained, +#' @param thin MCMC thinning rate; every \code{thin} samples is retained, #' the rest are discarded -#' @param warmup the number of samples to spend warming up the mcmc sampler. +#' @param warmup number of samples to spend warming up the mcmc sampler. #' During this phase the sampler moves toward the highest density area and #' tunes sampler hyperparameters. +#' @param chains number of MCMC chains to run #' @param verbose whether to print progress information to the console #' @param pb_update how regularly to update the progress bar (in iterations) #' @param control an optional named list of hyperparameters and options to #' control behaviour of the sampler or optimiser. See Details. -#' @param initial_values an optional named vector of initial values for the free -#' parameters in the model. These will be used as the starting point for -#' sampling/optimisation +#' @param initial_values an optional vector (or list of vectors, for multiple +#' chains) of initial values for the free parameters in the model. These will +#' be used as the starting point for sampling/optimisation. #' #' @details For \code{mcmc()} if \code{verbose = TRUE}, the progress bar shows #' the number of iterations so far and the expected time to complete the phase @@ -82,29 +99,31 @@ stash_trace <- function (trace) #' (via \code{model}) to have double precision, though this will slow down #' sampling. #' -#' Currently, the only implemented MCMC procedure is static Hamiltonian -#' Monte Carlo (\code{method = "hmc"}). During the warmup iterations, the -#' leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the -#' sampler efficiency. The \code{control} argument can be used to specify the -#' initial value for epsilon, along with two other hyperparameters: \code{Lmin} -#' and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the -#' upper and lower limits to the number of leapfrog steps per iteration (from -#' which the number is selected uniformly at random). +#' Currently, the only implemented MCMC procedure is static Hamiltonian Monte +#' Carlo (\code{method = "hmc"}). During the warmup iterations, the leapfrog +#' stepsize hyperparameter \code{epsilon} is tuned to maximise the sampler +#' efficiency, and the posterior marginal standard deviations are estimated +#' \code{diag_sd}. The \code{control} argument can be used to specify the +#' initial value for \code{epsilon}, \code{diag_sd}, and two other +#' hyperparameters: \code{Lmin} and \code{Lmax}; positive integers (with +#' \code{Lmax > Lmin}) giving the upper and lower limits to the number of +#' leapfrog steps per iteration (from which the number is selected uniformly +#' at random). #' #' The default control options for HMC are: -#' \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)} +#' \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005, diag_sd = 1)} #' -#' @return \code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object that can be analysed using -#' functions from the coda package. This will contain mcmc samples of the -#' greta arrays used to create \code{model}. +#' @return \code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object +#' that can be analysed using functions from the coda package. This will +#' contain mcmc samples of the greta arrays used to create \code{model}. #' #' @examples #' \dontrun{ #' # define a simple model -#' mu = variable() -#' sigma = lognormal(1, 0.1) -#' x = rnorm(10) -#' distribution(x) = normal(mu, sigma) +#' mu <- variable() +#' sigma <- lognormal(1, 0.1) +#' x <- rnorm(10) +#' distribution(x) <- normal(mu, sigma) #' m <- model(mu, sigma) #' #' # carry out mcmc on the model @@ -117,6 +136,7 @@ mcmc <- function (model, n_samples = 1000, thin = 1, warmup = 100, + chains = 1, verbose = TRUE, pb_update = 10, control = list(), @@ -124,6 +144,9 @@ mcmc <- function (model, method <- match.arg(method) + # store the model + greta_stash$model <- model + # find variable names to label samples target_greta_arrays <- model$target_greta_arrays names <- names(target_greta_arrays) @@ -145,34 +168,72 @@ mcmc <- function (model, # get the dag containing the target nodes dag <- model$dag + param <- dag$example_parameters() + n_initial <- length(param) + # random starting locations if (is.null(initial_values)) { # try several times valid <- FALSE attempts <- 1 - while (!valid & attempts < 10) { + while (!valid & attempts < 20) { - initial_values <- dag$example_parameters() - # increase the jitter each time - initial_values[] <- rnorm(length(initial_values), 0, 1 + attempts / 5) + initial_values <- replicate(chains, + rnorm(n_initial, 0, 0.5), + simplify = FALSE) # test validity of values - valid <- valid_parameters(dag, initial_values) + valid <- valid_parameters(initial_values, dag) attempts <- attempts + 1 } if (!valid) { - stop ('Could not find reasonable starting values after ', attempts, - ' attempts. Please specify initial values manually via the ', - 'initial_values argument to mcmc', + stop ("Could not find reasonable starting values after ", attempts, + " attempts. Please specify initial values manually via the ", + "initial_values argument to mcmc", call. = FALSE) } } else { - if (!valid_parameters(dag, initial_values)) { + # if they provided a list, check it + if (is.list(initial_values)) { + + n_sets <- length(initial_values) + + if (n_sets != chains) { + stop (n_sets, " sets of initial values were provided, but there ", + ifelse(chains > 1, "are ", "is only "), chains, " chain", + ifelse(chains > 1, "s", ""), + call. = FALSE) + } + + n_param <- vapply(initial_values, length, FUN.VALUE = 0) + + if (!all(n_param == n_initial)) { + stop ("each set of initial values must be a vector of length ", + n_initial, + call. = FALSE) + } + + } else { + + # replicate + initial_values <- replicate(chains, + initial_values, + simplify = FALSE) + + if (chains > 1) { + message ("\nonly one set of was initial values given, ", + "and was used for all chains\n") + } + + } + + # check they are valid + if (!valid_parameters(initial_values, dag)) { stop ('The log density and gradients could not be evaluated at these ', 'initial values.', call. = FALSE) @@ -185,7 +246,8 @@ mcmc <- function (model, con <- switch(method, hmc = list(Lmin = 10, Lmax = 20, - epsilon = 0.005)) + epsilon = 0.005, + diag_sd = 1)) # update them with user overrides con[names(control)] <- control @@ -194,6 +256,50 @@ mcmc <- function (model, method <- switch(method, hmc = hmc) + print_chain <- chains > 1 + + chains_list <- lapply(seq_len(chains), + run_chain, + dag = dag, + method = method, + n_samples = n_samples, + thin = thin, + warmup = warmup, + chains = chains, + verbose = verbose, + pb_update = pb_update, + control = con, + initial_values = initial_values, + print_chain = print_chain) + + # get raw_draws + raw_list <- lapply(chains_list, + function (x) { + attr(x, "model_info")$raw_draws[[1]] + }) + chains_list <- lapply(chains_list, `[[`, 1) + + model_info <- new.env() + model_info$raw_draws <- do.call(mcmc.list, raw_list) + model_info$model <- greta_stash$model + chains <- do.call(mcmc.list, chains_list) + attr(chains, "model_info") <- model_info + chains + +} + + +run_chain <- function (chain, dag, method, n_samples, thin, + warmup, chains, verbose, pb_update, + control, initial_values, print_chain) { + + if (print_chain) { + msg <- sprintf("\nchain %i/%i\n", chain, chains) + cat(msg) + } + + initial_values_chain <- initial_values[[chain]] + # if warmup is required, do that now and update init if (warmup > 0) { @@ -204,18 +310,18 @@ mcmc <- function (model, # run it warmup_draws <- method(dag = dag, - init = initial_values, + init = initial_values_chain, n_samples = warmup, thin = thin, verbose = verbose, pb = pb_warmup, tune = TRUE, stash = FALSE, - control = con) + control = control) # use the last draw of the full parameter vector as the init - initial_values <- attr(warmup_draws, 'last_x') - con <- attr(warmup_draws, 'control') + initial_values_chain <- attr(warmup_draws, 'last_x') + control <- attr(warmup_draws, 'control') } @@ -226,27 +332,27 @@ mcmc <- function (model, # run the sampler draws <- method(dag = dag, - init = initial_values, + init = initial_values_chain, n_samples = n_samples, thin = thin, verbose = verbose, pb = pb_sampling, tune = FALSE, stash = TRUE, - control = con) + control = control) # if this was successful, trash the stash, prepare and return the draws rm('trace_stash', envir = greta_stash) - prepare_draws(draws) + draws } + #' @importFrom coda mcmc mcmc.list prepare_draws <- function (draws) { # given a matrix of draws returned by the sampler, prepare it and return draws_df <- data.frame(draws) - draws_mcmc <- coda::mcmc(draws_df) - coda::mcmc.list(draws_mcmc) + coda::mcmc(draws_df) } @@ -286,8 +392,13 @@ opt <- function (model, control = list(), initial_values = NULL) { + # mock up some names to avoid CRAN-check note + optimiser <- joint_density <- sess <- NULL + # get the tensorflow environment tfe <- model$dag$tf_environment + on_graph <- model$dag$on_graph + tf_run <- model$dag$tf_run # get the method method <- match.arg(method) @@ -304,8 +415,8 @@ opt <- function (model, con[names(control)] <- control # set up optimiser - tfe$optimiser <- do.call(optimise_fun, con) - with(tfe, train <- optimiser$minimize(-joint_density)) + on_graph(tfe$optimiser <- do.call(optimise_fun, con)) + tf_run(train <- optimiser$minimize(-joint_density)) # random initial values if unspecified if (is.null(initial_values)) { @@ -314,16 +425,16 @@ opt <- function (model, } # initialize the variables, then set the ones we care about - with(tfe, sess$run(tf$global_variables_initializer())) + tf_run(sess$run(tf$global_variables_initializer())) parameters <- relist_tf(initial_values, model$dag$parameters_example) for (i in seq_along(parameters)) { variable_name <- paste0(names(parameters)[i], '_free') vble <- tfe[[variable_name]] - init <- tf$constant(parameters[[i]], - shape = vble$shape, - dtype = tf_float()) - tmp <- tfe$sess$run(vble$assign(init)) + on_graph( init <- tf$constant(parameters[[i]], + shape = vble$shape, + dtype = tf_float())) + . <- on_graph(tfe$sess$run(vble$assign(init))) } diff <- old_obj <- Inf @@ -331,14 +442,14 @@ opt <- function (model, while (it < max_iterations & diff > tolerance) { it <- it + 1 - with(tfe, sess$run(train)) - obj <- with(tfe, sess$run(-joint_density)) + tf_run(sess$run(train)) + obj <- tf_run(sess$run(-joint_density)) diff <- abs(old_obj - obj) old_obj <- obj } list(par = model$dag$trace_values(), - value = with(tfe, sess$run(joint_density)), + value = tf_run(sess$run(joint_density)), iterations = it, convergence = ifelse(it < max_iterations, 0, 1)) diff --git a/R/operators.R b/R/operators.R index 5e0eeb42..c7b7b625 100644 --- a/R/operators.R +++ b/R/operators.R @@ -40,23 +40,23 @@ #' @examples #' \dontrun{ #' -#' x = as_data(-1:12) +#' x <- as_data(-1:12) #' #' # arithmetic -#' a = x + 1 -#' b = 2 * x + 3 -#' c = x %% 2 -#' d = x %/% 5 +#' a <- x + 1 +#' b <- 2 * x + 3 +#' c <- x %% 2 +#' d <- x %/% 5 #' #' # logical -#' e = (x > 1) | (x < 1) -#' f = e & (x < 2) -#' g = !f +#' e <- (x > 1) | (x < 1) +#' f <- e & (x < 2) +#' g <- !f #' #' # relational -#' h = x < 1 -#' i = (-x) >= x -#' j = h == x +#' h <- x < 1 +#' i <- (-x) >= x +#' j <- h == x #' } NULL @@ -145,15 +145,16 @@ NULL y <- elem_list[[2]] # check they're matrices - if (length(dim(x)) != 2 | length(dim(x)) != 2) - stop ('only two-dimensional greta arrays can be matrix-multiplied', + if (length(dim(x)) != 2 | length(dim(y)) != 2) { + stop ("only two-dimensional greta arrays can be matrix-multiplied", call. = FALSE) + } # check the dimensions match if (dim(x)[2] != dim(y)[1]) { - msg <- sprintf('incompatible dimensions: %s vs %s', - paste0(dim(x), collapse = 'x'), - paste0(dim(y), collapse = 'x')) + msg <- sprintf("incompatible dimensions: %s vs %s", + paste0(dim(x), collapse = "x"), + paste0(dim(y), collapse = "x")) stop (msg) } diff --git a/R/package.R b/R/package.R index 66061d33..15b1fb3a 100644 --- a/R/package.R +++ b/R/package.R @@ -26,13 +26,13 @@ #' # a simple Bayesian regression model for the iris data #' #' # priors -#' int = normal(0, 5) -#' coef = normal(0, 3) -#' sd = lognormal(0, 3) +#' int <- normal(0, 5) +#' coef <- normal(0, 3) +#' sd <- lognormal(0, 3) #' #' # likelihood #' mean <- int + coef * iris$Petal.Length -#' distribution(iris$Sepal.Length) = normal(mean, sd) +#' distribution(iris$Sepal.Length) <- normal(mean, sd) #' #' # build and sample #' m <- model(int, coef, sd) diff --git a/R/probability_distributions.R b/R/probability_distributions.R index 58c9bb99..4f1fda1a 100644 --- a/R/probability_distributions.R +++ b/R/probability_distributions.R @@ -537,7 +537,7 @@ cauchy_distribution <- R6Class ( s <- parameters$scale log_prob = function (x) - -tf$log(fl(pi) * s * (fl(1) + tf$square((x - loc) / s))) + tf$negative(tf$log(fl(pi) * s * (fl(1) + tf$square((x - loc) / s)))) cdf = function (x) fl(1 / pi) * tf$atan((x - loc) / s) + fl(0.5) @@ -1359,39 +1359,39 @@ distribution_classes_module <- module(uniform_distribution, #' \dontrun{ #' #' # a uniform parameter constrained to be between 0 and 1 -#' phi = uniform(min = 0, max = 1) +#' phi <- uniform(min = 0, max = 1) #' #' # a length-three variable, with each element following a standard normal #' # distribution -#' alpha = normal(0, 1, dim = 3) +#' alpha <- normal(0, 1, dim = 3) #' #' # a length-three variable of lognormals -#' sigma = lognormal(0, 3, dim = 3) +#' sigma <- lognormal(0, 3, dim = 3) #' #' # a hierarchical uniform, constrained between alpha and alpha + sigma, -#' eta = alpha + uniform(0, 1, dim = 3) * sigma +#' eta <- alpha + uniform(0, 1, dim = 3) * sigma #' #' # a hierarchical distribution -#' mu = normal(0, 1) -#' sigma = lognormal(0, 1) -#' theta = normal(mu, sigma) +#' mu <- normal(0, 1) +#' sigma <- lognormal(0, 1) +#' theta <- normal(mu, sigma) #' #' # a vector of 3 variables drawn from the same hierarchical distribution -#' thetas = normal(mu, sigma, dim = 3) +#' thetas <- normal(mu, sigma, dim = 3) #' #' # a matrix of 12 variables drawn from the same hierarchical distribution -#' thetas = normal(mu, sigma, dim = c(3, 4)) +#' thetas <- normal(mu, sigma, dim = c(3, 4)) #' #' # a multivariate normal variable, with correlation between two elements #' Sig <- diag(4) #' Sig[3, 4] <- Sig[4, 3] <- 0.6 -#' theta = multivariate_normal(rep(mu, 4), Sig) +#' theta <- multivariate_normal(rep(mu, 4), Sig) #' #' # 10 independent replicates of that -#' theta = multivariate_normal(rep(mu, 4), Sig, dim = 10) +#' theta <- multivariate_normal(rep(mu, 4), Sig, dim = 10) #' #' # a Wishart variable with the same covariance parameter -#' theta = wishart(df = 5, Sigma = Sig) +#' theta <- wishart(df = 5, Sigma = Sig) #' #' } NULL diff --git a/R/samplers.R b/R/samplers.R index bf94dfff..cf10e28d 100644 --- a/R/samplers.R +++ b/R/samplers.R @@ -8,18 +8,35 @@ hmc <- function (dag, stash = FALSE, control = list(Lmin = 10, Lmax = 20, - epsilon = 0.005)) { + epsilon = 0.005, + diag_sd = 1)) { # unpack options Lmin <- control$Lmin Lmax <- control$Lmax epsilon <- control$epsilon + diag_sd <- control$diag_sd - # tuning parameters - accept_group = 50 - target_acceptance = 0.651 - kappa = 0.75 - gamma = 0.1 + if (tune) { + + # when to start and stop each type of tuning, in fractions of the tuning period + epsilon_periods <- list(c(0, 0.1), c(0.4, 1)) + diag_sd_periods <- list(c(0.1, 0.4)) + + # epslion tuning parameters + accept_group <- 50 + target_acceptance <- 0.651 + kappa <- 0.75 + gamma <- 0.1 + + # initialise welford accumulator for marginal variance + diag_sd_update_rate <- 5 + welford_m <- 0 + welford_m2 <- 0 + + epsilon_trace <- rep(NA, n_samples) + + } numerical_rejections <- 0 @@ -33,9 +50,6 @@ hmc <- function (dag, grad <- dag$gradients() logprob <- dag$log_density() - if (tune) - epsilon_trace <- rep(NA, n_samples) - # set up trace store (grab values of target variables from graph to get # dimension and names) init_trace <- dag$trace_values() @@ -45,9 +59,15 @@ hmc <- function (dag, ncol = n_target) colnames(trace) <- names(init_trace) - # if anything goes awry, stash the trace so far + # also trace the raw values, with the dag attached + raw <- matrix(NA, + nrow = n_samples %/% thin, + ncol = length(x)) + colnames(raw) <- names(dag$example_parameters()) + + # if anything goes awry, stash the trace and raw trace so far if (stash) - on.exit(stash_trace(trace)) + on.exit(stash_trace(trace, raw)) # track acceptance accept_trace <- rep(0, n_samples) @@ -55,7 +75,7 @@ hmc <- function (dag, # get free parameter dimension npar <- length(x) - accept_count <- 0 + L <- Lmin:Lmax # loop through iterations for (i in 1:n_samples) { @@ -68,13 +88,14 @@ hmc <- function (dag, # start leapfrog steps reject <- FALSE - # p <- p_old + 0.5 * epsilon * grad - n_steps <- base::sample(Lmin:Lmax, 1) + n_steps <- ifelse(length(L) == 1, L, + base::sample(L, 1)) + for (l in seq_len(n_steps)) { # step - p <- p + 0.5 * epsilon * grad - x <- x + epsilon * p + p <- p + 0.5 * epsilon * grad * diag_sd + x <- x + epsilon * p * diag_sd # send parameters dag$send_parameters(x) @@ -86,7 +107,7 @@ hmc <- function (dag, break() } - p <- p + 0.5 * epsilon * grad + p <- p + 0.5 * epsilon * grad * diag_sd } @@ -108,14 +129,11 @@ hmc <- function (dag, # acceptance ratio logprob <- dag$log_density() - log_accept_ratio = logprob - p_prod - logprob_old + p_prod_old - log_u = log(runif(1)) + log_accept_ratio <- logprob - p_prod - logprob_old + p_prod_old + log_u <- log(runif(1)) if (log_u < log_accept_ratio) { - # on acceptance, iterate the counter and leave the parameters in the dag - # to be put in the trace - accept_count <- accept_count + 1 accept_trace[i] <- 1 } else { @@ -135,6 +153,7 @@ hmc <- function (dag, if (i %% thin == 0) { dag$send_parameters(x) trace[i / thin, ] <- dag$trace_values() + raw[i / thin, ] <- x } if (verbose) @@ -143,20 +162,51 @@ hmc <- function (dag, # optionally tune epsilon if (tune) { - # acceptance rate over the last accept_group runs - start <- max(1, i - accept_group) - end <- i - accept_rate <- mean(accept_trace[start:end], na.rm = TRUE) + adapt_epsilon <- in_periods(i, n_samples, epsilon_periods) + if (adapt_epsilon) { + + # acceptance rate over the last accept_group runs + start <- max(1, i - accept_group) + end <- i + accept_rate <- mean(accept_trace[start:end], na.rm = TRUE) + + # decrease the adaptation rate as we go + adapt_rate <- min(1, gamma * i ^ (-kappa)) - # decrease the adaptation rate as we go - adapt_rate <- min(1, gamma * i ^ (-kappa)) + # shift epsilon in the right direction, making sure it never goes negative + epsilon <- epsilon + pmax(-(epsilon + sqrt(.Machine$double.eps)), + adapt_rate * (accept_rate - target_acceptance)) - # shift epsilon in the right direction, making sure it never goes negative - epsilon <- epsilon + pmax(-(epsilon + sqrt(.Machine$double.eps)), - adapt_rate * (accept_rate - target_acceptance)) + # keep track of epsilon + epsilon_trace[i] <- epsilon - # keep track of epsilon - epsilon_trace[i] <- epsilon + } + + # only adapt diag_sd in the first third of tuning, so that epsilon can + # settle in during the second half + adapt_diag_sd <- in_periods(i, n_samples, diag_sd_periods) + if (adapt_diag_sd) { + + # update welford accumulator for posterior variance + n_accepted <- sum(accept_trace) + + # update only if this step was accepted + if (accept_trace[i] == 1) { + welford_delta <- x - welford_m + welford_m <- welford_m + welford_delta / n_accepted + welford_m2 <- welford_m2 + welford_delta * (x - welford_m) + } + + # if there are samples, and we want to adapt, get the sample posterior + # variance and shrink it + if (n_accepted > 1 & (i %% diag_sd_update_rate == 0)) { + sample_var <- welford_m2 / (n_accepted - 1) + var_shrinkage <- 1 / (n_accepted + 5) + var_shrunk <- n_accepted * var_shrinkage * sample_var + 5e-3 * var_shrinkage + diag_sd <- sqrt(var_shrunk) + } + + } } @@ -167,12 +217,26 @@ hmc <- function (dag, start <- floor(n_samples/2) end <- n_samples control$epsilon <- mean(epsilon_trace[start:end], na.rm = TRUE) + control$diag_sd <- diag_sd } + stash_trace(trace, raw) + trace <- stashed_samples() attr(trace, 'last_x') <- x attr(trace, 'control') <- control trace } +# determine whether the sampler is within one of the adaptation periods for a given parameter +in_periods <- function (i, n_samples, periods) { + fraction <- i / n_samples + in_period <- vapply(periods, within, fraction, FUN.VALUE = FALSE) + any(in_period) +} + +within <- function (period, fraction) + fraction >= period[1] & fraction <= period[2] + + samplers_module <- module(hmc) diff --git a/R/tf_functions.R b/R/tf_functions.R index 707410b8..0e096da2 100644 --- a/R/tf_functions.R +++ b/R/tf_functions.R @@ -212,6 +212,11 @@ tf_icloglog <- function (x) tf_icauchit <- function (x) fl(1 / pi) * tf$atan(x) + fl(0.5) +tf_imultilogit <- function (x) { + zeros <- tf$zeros(shape(nrow(x), 1L), tf_float()) + latent <- tf$concat(list(x, zeros), 1L) + tf$nn$softmax(latent) +} # map R's extract and replace syntax to tensorflow, for use in operation nodes # the following arguments are required: @@ -328,6 +333,7 @@ tf_functions_module <- module(tf_as_logical, tf_iprobit, tf_icloglog, tf_icauchit, + tf_imultilogit, tf_extract, tf_recombine, tf_replace, diff --git a/R/transforms.R b/R/transforms.R index 84ce8bec..1ee8e598 100644 --- a/R/transforms.R +++ b/R/transforms.R @@ -12,27 +12,37 @@ #' modelling languages. That's because the same syntax has a very different #' meaning in R, and can only be applied to objects that are already in #' existence. The inverse forms of the common link functions (prefixed with an -#' 'i') are therefore more likely to useful in modelling, and these are what -#' are provided here. +#' 'i') can be used instead. #' -#' The \code{log1pe} inverse link function is equivalent to \code{log(1 + exp(x))}, -#' yielding a positive transformed parameter. Unlike the log transformation, -#' this transformation is approximately linear for x > 1. i.e. when \eqn{x > -#' 1}, \eqn{y \approx x} +#' The \code{log1pe} inverse link function is equivalent to \code{log(1 + +#' exp(x))}, yielding a positive transformed parameter. Unlike the log +#' transformation, this transformation is approximately linear for x > 1. i.e. +#' when \eqn{x > 1}, \eqn{y \approx x} +#' +#' \code{imultilogit} expects an n-by-m greta array, and returns an n-by-(m+1) +#' greta array of positive reals whose rows sum to one. This is equivalent +#' adding a final column of 0s and then running the softmax function widely +#' used in machine learning. #' #' @examples #' \dontrun{ #' -#' x = normal(1, 3, dim = 10) +#' x1 <- normal(1, 3, dim = 10) #' #' # transformation to the unit interval -#' p1 <- iprobit(x) -#' p2 <- ilogit(x) -#' p3 <- icloglog(x) -#' p4 <- icauchit(x) +#' p1 <- iprobit(x1) +#' p2 <- ilogit(x1) +#' p3 <- icloglog(x1) +#' p4 <- icauchit(x1) #' #' # and to positive reals -#' y <- log1pe(x) +#' y <- log1pe(x1) +#' +#' # transform from 10x3 to 10x4, where rows are a complete set of +#' # probabilities +#' x2 <- normal(1, 3, dim = c(10, 3)) +#' z <- imultilogit(x2) +#' #' } NULL @@ -60,3 +70,25 @@ icauchit <- function (x) #' @export log1pe <- function (x) op('log1pe', x, tf_operation = tf$nn$softplus) + +#' @rdname transforms +#' @export +imultilogit <- function (x) { + dimfun <- function (elem_list) { + + dim <- dim(elem_list[[1]]) + + # check it's a matrix + if (length(dim) != 2) { + stop ("imultilogit expects a 2D greta array", + call. = FALSE) + } + + dim + c(0, 1) + + } + + op('imultilogit', x, + dimfun = dimfun, + tf_operation = tf_imultilogit) +} diff --git a/R/utils.R b/R/utils.R index 65245516..a0ed88eb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -335,11 +335,19 @@ cleanly <- function (expr) { } # check whether initial values are valid -valid_parameters <- function (dag, initial_values) { +valid_parameters <- function (initial_values, dag) { + + if (is.list(initial_values)) { + each_valid <- vapply(initial_values, valid_parameters, dag, FUN.VALUE = FALSE) + valid <- all(each_valid) + return (valid) + } + dag$send_parameters(initial_values) ld <- dag$log_density() grad <- dag$gradients() all(is.finite(c(ld, grad))) + } # unlist and flatten a list of arrays to a vector row-wise diff --git a/docs/_site.yml b/docs/_site.yml index b93e7b3f..7320b05e 100644 --- a/docs/_site.yml +++ b/docs/_site.yml @@ -1,9 +1,11 @@ name: greta -output_dir: "." - +output_dir: '.' output: html_document: css: greta.css - toc: true + toc: yes theme: lumen highlight: default + lib_dir: site_libs + self_contained: no + diff --git a/docs/calculate.html b/docs/calculate.html new file mode 100644 index 00000000..72c3dcb1 --- /dev/null +++ b/docs/calculate.html @@ -0,0 +1,452 @@ + + + + + + + + + + + + + +calculate greta arrays given fixed values + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
+
+
+
+
+ +
+ + + + + + + + + + +

+ Description +

+

+Calculate the values that greta arrays would take, given temporary values for the greta arrays on which they depend, and return them as numeric R arrays. This can be used to check the behaviour of your model or make predictions to new data after model fitting. +

+

+ Usage +

+
calculate(target, values) 
+

+ Arguments +

+target +

+a greta array for which to calculate the value +

+values +

+a named list giving temporary values of the greta arrays with which target is connected, or an mcmc.list object returned by mcmc. +

+

+Value +

+

+A numeric R array with the same dimensions as target, giving the values it would take conditioned on the fixed values given by values. +

+

+Details +

+

+The greta arrays named in values need not be variables, they can also be other operations or even data. +

+

+At present, if values is a named list it must contain values for all of the variable greta arrays with which target is connected, even values are given for intermediate operations, or the target doesn’t depend on the variable. That may be relaxed in a future release. +

+

+Examples +

+
+
+# define a variable greta array, and another that is calculated from it
+# then calculate what value y would take for different values of x
+x <- normal(0, 1, dim = 3)
+a <- lognormal(0, 1)
+y <- sum(x ^ 2) + a
+calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2))
+
+
+# define a model
+alpha <- normal(0, 1)
+beta <- normal(0, 1)
+sigma <- lognormal(1, 0.1)
+mu <- alpha + iris$Petal.Length * beta
+distribution(iris$Petal.Width) <- normal(mu, sigma)
+m <- model(alpha, beta, sigma)
+
+# calculate intermediate greta arrays, given some parameter values
+calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5))
+calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5))
+
+
+# fit the model then calculate samples at a new greta array
+draws <- mcmc(m, n_samples = 500)
+petal_length_plot <- seq(min(iris$Petal.Length),
+                         max(iris$Petal.Length),
+                         length.out = 100)
+mu_plot <- alpha + petal_length_plot * beta
+mu_plot_draws <- calculate(mu_plot, draws)
+
+# plot the draws
+mu_est <- colMeans(mu_plot_draws[[1]])
+plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]]))
+apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8))
+lines(mu_est ~ petal_length_plot, lwd = 2)
+ + + +
+
+ +
+ + + + + + + + diff --git a/docs/distribution.html b/docs/distribution.html index c009085f..dd4c3bc9 100644 --- a/docs/distribution.html +++ b/docs/distribution.html @@ -380,12 +380,12 @@

# observed data and mean parameter to be estimated # (explicitly coerce data to a greta array so we can refer to it later) -y = as_data(rnorm(5, 0, 3)) +y <- as_data(rnorm(5, 0, 3)) -mu = uniform(-3, 3) +mu <- uniform(-3, 3) # define the distribution over y (the model likelihood) -distribution(y) = normal(mu, 1) +distribution(y) <- normal(mu, 1) # get the distribution over y distribution(y) diff --git a/docs/distributions.html b/docs/distributions.html index a05c63bc..e21e194d 100644 --- a/docs/distributions.html +++ b/docs/distributions.html @@ -762,39 +762,39 @@

 
 # a uniform parameter constrained to be between 0 and 1
-phi = uniform(min = 0, max = 1)
+phi <- uniform(min = 0, max = 1)
 
 # a length-three variable, with each element following a standard normal
 # distribution
-alpha = normal(0, 1, dim = 3)
+alpha <- normal(0, 1, dim = 3)
 
 # a length-three variable of lognormals
-sigma = lognormal(0, 3, dim = 3)
+sigma <- lognormal(0, 3, dim = 3)
 
 # a hierarchical uniform, constrained between alpha and alpha + sigma,
-eta = alpha + uniform(0, 1, dim = 3) * sigma
+eta <- alpha + uniform(0, 1, dim = 3) * sigma
 
 # a hierarchical distribution
-mu = normal(0, 1)
-sigma = lognormal(0, 1)
-theta = normal(mu, sigma)
+mu <- normal(0, 1)
+sigma <- lognormal(0, 1)
+theta <- normal(mu, sigma)
 
 # a vector of 3 variables drawn from the same hierarchical distribution
-thetas = normal(mu, sigma, dim = 3)
+thetas <- normal(mu, sigma, dim = 3)
 
 # a matrix of 12 variables drawn from the same hierarchical distribution
-thetas = normal(mu, sigma, dim = c(3, 4))
+thetas <- normal(mu, sigma, dim = c(3, 4))
 
 # a multivariate normal variable, with correlation between two elements
 Sig <- diag(4)
 Sig[3, 4] <- Sig[4, 3] <- 0.6
-theta = multivariate_normal(rep(mu, 4), Sig)
+theta <- multivariate_normal(rep(mu, 4), Sig)
 
 # 10 independent replicates of that
-theta = multivariate_normal(rep(mu, 4), Sig, dim = 10)
+theta <- multivariate_normal(rep(mu, 4), Sig, dim = 10)
 
 # a Wishart variable with the same covariance parameter
-theta = wishart(df = 5, Sigma = Sig)
+theta <- wishart(df = 5, Sigma = Sig) diff --git a/docs/example_models.html b/docs/example_models.html index 3003140f..854d77c0 100644 --- a/docs/example_models.html +++ b/docs/example_models.html @@ -367,11 +367,11 @@

data

greta code

-
theta = normal(0, 32, dim = 2)
+
theta <- normal(0, 32, dim = 2)
 mu <- alpha + beta * Z
-X = normal(mu, sigma)
+X <- normal(mu, sigma)
 p <- ilogit(theta[1] + theta[2] * X)
-distribution(y) = binomial(n, p)
+distribution(y) <- binomial(n, p)

BUGS/JAGS code

@@ -436,10 +436,10 @@

data

greta code

-
alpha_star = normal(0, 32)
-beta = normal(0, 32)
+
alpha_star <- normal(0, 32)
+beta <- normal(0, 32)
 p <- ilogit(alpha_star + beta * (x - mean(x)))
-distribution(r) = binomial(n, p)
+distribution(r) <- binomial(n, p)
 
 alpha <- alpha_star - beta * mean(x)
 rhat <- p * n
diff --git a/docs/extract-replace-combine.html b/docs/extract-replace-combine.html index c7464798..046fd18d 100644 --- a/docs/extract-replace-combine.html +++ b/docs/extract-replace-combine.html @@ -407,7 +407,7 @@

 
- x = as_data(matrix(1:12, 3, 4))
+ x <- as_data(matrix(1:12, 3, 4))
 
  # extract/replace
  x[1:3, ]
diff --git a/docs/figures/coef_graph.png b/docs/figures/coef_graph.png
index 6572dd57..fce4c8a2 100644
Binary files a/docs/figures/coef_graph.png and b/docs/figures/coef_graph.png differ
diff --git a/docs/figures/likelihood_graph.png b/docs/figures/likelihood_graph.png
index 4af299fc..99d39c96 100644
Binary files a/docs/figures/likelihood_graph.png and b/docs/figures/likelihood_graph.png differ
diff --git a/docs/functions.html b/docs/functions.html
index a6ad8b9e..202aee93 100644
--- a/docs/functions.html
+++ b/docs/functions.html
@@ -425,18 +425,18 @@ 

 
-x = as_data(matrix(1:9, nrow = 3, ncol = 3))
-a = log(exp(x))
-b = log1p(expm1(x))
-c = sign(x - 5)
-d = abs(x - 5)
+x <- as_data(matrix(1:9, nrow = 3, ncol = 3))
+a <- log(exp(x))
+b <- log1p(expm1(x))
+c <- sign(x - 5)
+d <- abs(x - 5)
 
-e = diag(x)
+e <- diag(x)
 diag(x) <- e + 1
 
-z = t(a)
+z <- t(a)
 
-y = sweep(x, 1, e, '-')
+y <- sweep(x, 1, e, '-')
diff --git a/docs/get_started.html b/docs/get_started.html index 808e6930..c5d206c7 100644 --- a/docs/get_started.html +++ b/docs/get_started.html @@ -418,15 +418,15 @@

Building a model

y <- as_data(iris$Sepal.Length) # variables and priors -int = normal(0, 1) -coef = normal(0, 3) -sd = student(3, 0, 1, truncation = c(0, Inf)) +int <- normal(0, 1) +coef <- normal(0, 3) +sd <- student(3, 0, 1, truncation = c(0, Inf)) # operations mean <- int + coef * x # likelihood -distribution(y) = normal(mean, sd) +distribution(y) <- normal(mean, sd) # defining the model m <- model(int, coef, sd) @@ -436,7 +436,6 @@

Building a model

# sampling draws <- mcmc(m, n_samples = 1000)
-

Throughout this example, and the rest of greta’s documentation, <- is used when assigning deterministic values, and = when assigning variable or stochastic values. This makes the model code a little bit clearer to read, since = corresponds to the ~ symbol used to define stochastic relationships in BUGS and Stan. However, it doesn’t actually make any difference to the model which assignment operator you use.


Data

@@ -507,21 +506,21 @@

data structures

Variables and priors

The second section of the script creates three greta arrays to represent the parameters in our model:

-
int = normal(0, 1)
-coef = normal(0, 3)
-sd = student(3, 0, 1, truncation = c(0, Inf))
-

Each of these is a variable greta array, and each is assumed a priori (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.

+
int <- normal(0, 1)
+coef <- normal(0, 3)
+sd <- student(3, 0, 1, truncation = c(0, Inf))
+

Each of these is a variable greta array, and each is assumed a priori (before fitting to data) to follow a different probability distribution. In other words, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.

variables without probability distributions

If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the variable() function. E.g. in a frequentist version of the model we could create int with:

-
(int = variable())
+
(int <- variable())
greta array (variable)
 
      [,1]
 [1,]  ?  

variable() has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of lower = -Inf, upper = Inf meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).

We can create a variable constrained between two values by specifying lower and upper. So we could have created the positive variable sd (the standard deviation of the likelihood) with:

-
(sd = variable(lower = 0))
+
(sd <- variable(lower = 0))
greta array (variable)
 
      [,1]
@@ -542,7 +541,7 @@ 

variables with probability distributions

variables with truncated distributions

Some of greta’s probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the truncation argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do:

-
(z = normal(0, 1, truncation = c(-1, 1)))
+
(z <- normal(0, 1, truncation = c(-1, 1)))
greta array (variable following a normal distribution)
 
      [,1]
@@ -596,7 +595,7 @@ 

Extract and replace

NULL
dim(z[, 1, drop = FALSE])
[1] 2 1
-

greta arrays must always have two dimensions, so greta always acts as though drop=FALSE:

+

greta arrays must always have two dimensions, so greta always acts as though drop = FALSE:

z_greta <- as_data(z)
 dim(z_greta[, 1])
[1] 2 1
@@ -620,8 +619,8 @@

Functions

Likelihood

So far, we have created greta arrays representing the variables in our model (with prior distributions) and created other greta arrays from them and some fixed, independent data. To perform statistical inference on this model, we also need to link it to some observed dependent data. By comparing the sepal lengths predicted under different parameter values with the observed sepal lengths, we can estimate the most plausible values of those parameters. We do that by defining a likelihood for the observed sepal length data y.

By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we’re trying to work out the parameters of that distribution. In greta we do that with the distribution() assignment function:

-
distribution(y) = normal(mean, sd)
-

With the syntax distribution(<lhs>) = <rhs>, we are stating that the data greta array on the left <lhs> has the same distribution as the greta array on the right <rhs>. In this case, we’re temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays mean and sd), but then stating that values of that distribution have been observed (y). In this case both <lhs> (y) and <rhs> are column vectors with the same dimensions, so each element in y has a normal distribution with the corresponding parameters.

+
distribution(y) <- normal(mean, sd)
+

With the syntax distribution(<lhs>) <- <rhs>, we are stating that the data greta array on the left <lhs> has the same distribution as the greta array on the right <rhs>. In this case, we’re temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays mean and sd), but then stating that values of that distribution have been observed (y). In this case both <lhs> (y) and <rhs> are column vectors with the same dimensions, so each element in y has a normal distribution with the corresponding parameters.


@@ -644,7 +643,7 @@

Plotting

The fourth type of node (diamonds) represents probability distributions. These have greta arrays as parameters (linked via solid lines), and have other greta arrays as values(linked via dashed lines). Distributions calculate the probability density of the values, given the parameters and their distribution type.

-

For example, a plot of just the prior distribution over coef (defined as coef = normal(0, 3)) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution’s value, the variable coef:

+

For example, a plot of just the prior distribution over coef (defined as coef <- normal(0, 3)) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution’s value, the variable coef:

diff --git a/docs/greta.html b/docs/greta.html index a2979fe6..32eb0fd0 100644 --- a/docs/greta.html +++ b/docs/greta.html @@ -364,13 +364,13 @@

# a simple Bayesian regression model for the iris data # priors -int = normal(0, 5) -coef = normal(0, 3) -sd = lognormal(0, 3) +int <- normal(0, 5) +coef <- normal(0, 3) +sd <- lognormal(0, 3) # likelihood mean <- int + coef * iris$Petal.Length -distribution(iris$Sepal.Length) = normal(mean, sd) +distribution(iris$Sepal.Length) <- normal(mean, sd) # build and sample m <- model(int, coef, sd) diff --git a/docs/index.Rmd b/docs/index.Rmd index 05c915d9..f4d5c78f 100644 --- a/docs/index.Rmd +++ b/docs/index.Rmd @@ -12,7 +12,7 @@ body{ padding-top: 10px; } pre, pre:not([class]){ - font-size: 20px; + font-size: 18px; color: #5a3a78; background-color: #f7f7f7; border: none; @@ -101,7 +101,7 @@ a, a:hover{ ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE, cache = TRUE) -set.seed(2017-06-19) +set.seed(2018-02-17) ```

m <- model(int, coef, sd)
-
draws <- mcmc(m, n_samples = 1000)
+
draws <- mcmc(m, n_samples = 1000, chains = 3)
 bayesplot::mcmc_trace(draws)
-

+


diff --git a/docs/index_files/figure-html/vis-1.png b/docs/index_files/figure-html/vis-1.png index 4954319d..34022571 100644 Binary files a/docs/index_files/figure-html/vis-1.png and b/docs/index_files/figure-html/vis-1.png differ diff --git a/docs/inference.html b/docs/inference.html index 7beb12ad..3c783e32 100644 --- a/docs/inference.html +++ b/docs/inference.html @@ -354,7 +354,7 @@

stashed_samples()
 
 mcmc(model, method = c("hmc"), n_samples = 1000, thin = 1, warmup = 100,
-  verbose = TRUE, pb_update = 10, control = list(),
+  chains = 1, verbose = TRUE, pb_update = 10, control = list(),
   initial_values = NULL)
 
 opt(model, method = c("adagrad"), max_iterations = 100, tolerance = 1e-06,
@@ -368,19 +368,23 @@ 

method

-the method used to sample or optimise values. Currently only one method is available for each procedure: hmc and adagrad +method used to sample or optimise values. Currently only one method is available for each procedure: hmc and adagrad

n_samples

-the number of MCMC samples to draw (after any warm-up, but before thinning) +number of MCMC samples to draw (after any warm-up, but before thinning)

thin

-the MCMC thinning rate; every thin samples is retained, the rest are discarded +MCMC thinning rate; every thin samples is retained, the rest are discarded

warmup

-the number of samples to spend warming up the mcmc sampler. During this phase the sampler moves toward the highest density area and tunes sampler hyperparameters. +number of samples to spend warming up the mcmc sampler. During this phase the sampler moves toward the highest density area and tunes sampler hyperparameters. +

+chains +

+number of MCMC chains to run

verbose

@@ -396,7 +400,7 @@

initial_values

-an optional named vector of initial values for the free parameters in the model. These will be used as the starting point for sampling/optimisation +an optional vector (or list of vectors, for multiple chains) of initial values for the free parameters in the model. These will be used as the starting point for sampling/optimisation.

max_iterations

@@ -448,10 +452,10 @@

Occasionally, a proposed set of parameters can cause numerical instability (I.e. the log density or its gradient is NA, Inf or -Inf); normally because the log joint density is so low that it can’t be represented as a floating point number. When this happens, the progress bar will also display the proportion of samples so far that were ‘bad’ (numerically unstable) and therefore rejected. If you’re getting a lot of numerical instability, you might want to manually define starting values to move the sampler into a more reasonable part of the parameter space. Alternatively, you could redefine the model (via model) to have double precision, though this will slow down sampling.

-Currently, the only implemented MCMC procedure is static Hamiltonian Monte Carlo (method = “hmc”). During the warmup iterations, the leapfrog stepsize hyperparameter epsilon is tuned to maximise the sampler efficiency. The control argument can be used to specify the initial value for epsilon, along with two other hyperparameters: Lmin and Lmax; positive integers (with Lmax > Lmin) giving the upper and lower limits to the number of leapfrog steps per iteration (from which the number is selected uniformly at random). +Currently, the only implemented MCMC procedure is static Hamiltonian Monte Carlo (method = “hmc”). During the warmup iterations, the leapfrog stepsize hyperparameter epsilon is tuned to maximise the sampler efficiency, and the posterior marginal standard deviations are estimated diag_sd. The control argument can be used to specify the initial value for epsilon, diag_sd, and two other hyperparameters: Lmin and Lmax; positive integers (with Lmax > Lmin) giving the upper and lower limits to the number of leapfrog steps per iteration (from which the number is selected uniformly at random).

-The default control options for HMC are: control = list(Lmin = 10, Lmax = 20, epsilon = 0.005) +The default control options for HMC are: control = list(Lmin = 10, Lmax = 20, epsilon = 0.005, diag_sd = 1)

Currently, the only implemented optimisation algorithm is Adagrad (method = “adagrad”). The control argument can be used to specify the optimiser hyperparameters: learning_rate (default 0.8), initial_accumulator_value (default 0.1) and use_locking (default TRUE). The are passed directly to TensorFlow’s optimisers, see the TensorFlow docs for more information @@ -461,10 +465,10 @@

 # define a simple model
-mu = variable()
-sigma = lognormal(1, 0.1)
-x = rnorm(10)
-distribution(x) = normal(mu, sigma)
+mu <- variable()
+sigma <- lognormal(1, 0.1)
+x <- rnorm(10)
+distribution(x) <- normal(mu, sigma)
 m <- model(mu, sigma)
 
 # carry out mcmc on the model
diff --git a/docs/model.html b/docs/model.html
index 8b963f24..95f8c731 100644
--- a/docs/model.html
+++ b/docs/model.html
@@ -407,12 +407,11 @@ 

 
-
 # define a simple model
-mu = variable()
-sigma = lognormal(1, 0.1)
-x = rnorm(10)
-distribution(x) = normal(mu, sigma)
+mu <- variable()
+sigma <- lognormal(1, 0.1)
+x <- rnorm(10)
+distribution(x) <- normal(mu, sigma)
 
 m <- model(mu, sigma)
 
diff --git a/docs/operators.html b/docs/operators.html
index cc4db3c3..bcae4b32 100644
--- a/docs/operators.html
+++ b/docs/operators.html
@@ -390,23 +390,23 @@ 

 
-x = as_data(-1:12)
+x <- as_data(-1:12)
 
 # arithmetic
-a = x + 1
-b = 2 * x + 3
-c = x %% 2
-d = x %/% 5
+a <- x + 1
+b <- 2 * x + 3
+c <- x %% 2
+d <- x %/% 5
 
 # logical
-e = (x > 1) | (x < 1)
-f = e & (x < 2)
-g = !f
+e <- (x > 1) | (x < 1)
+f <- e & (x < 2)
+g <- !f
 
 # relational
-h = x < 1
-i = (-x) >= x
-j = h == x
+h <- x < 1 +i <- (-x) >= x +j <- h == x
diff --git a/docs/reference-index.html b/docs/reference-index.html index 38ad1804..b5346466 100644 --- a/docs/reference-index.html +++ b/docs/reference-index.html @@ -402,7 +402,7 @@

extract, replace and combine greta ar

transformation functions for greta arrays

-

iprobit ilogit icloglog icauchit log1pe

+

iprobit ilogit icloglog icauchit log1pe imultilogit



diff --git a/docs/technical_details.html b/docs/technical_details.html index 016e91b7..d1c37320 100644 --- a/docs/technical_details.html +++ b/docs/technical_details.html @@ -430,7 +430,7 @@

Tensors

variables and free states

Hamiltonian Monte Carlo (HMC) requires all of the parameters to be transformed to a continuous scale for sampling. Variable nodes are therefore converted to tensors by first defining a ‘free’ (unconstrained) version of themselves as a tensor, and then applying a transformation function to convert them back to the scale of interest.

-
a = variable(lower = 0)
+
a <- variable(lower = 0)
 class(a$node)
[1] "variable_node" "node"          "R6"           
a$node$tf_from_free
@@ -445,7 +445,7 @@

variables and free states

} else if (self$constraint == 'both') { - y <- tf_ilogit(x) * fl(upper - lower) + fl(lower) + y <- tf$nn$sigmoid(x) * fl(upper - lower) + fl(lower) } else if (self$constraint == 'low') { @@ -464,12 +464,12 @@

variables and free states

y } -<environment: 0x121afa638>
+<environment: 0x10b5aef78>

distributions

distribution nodes are node objects just like the others, but they are not directly associated with greta arrays. Instead, greta arrays may have a distribution node in their distribution slot to indicate that their values are assumed to follow that distribution. The distribution node will also be listed as a parent node, and likewise the ‘target node’ will be listed as a child of the distribution. Distribution nodes also have child nodes (data, variables or operations) representing their parameters.

-
b = normal(0, 1)
+
b <- normal(0, 1)
 class(b$node)
[1] "variable_node" "node"          "R6"           
class(b$node$distribution)
diff --git a/docs/transforms.html b/docs/transforms.html index 12541514..64043a25 100644 --- a/docs/transforms.html +++ b/docs/transforms.html @@ -359,7 +359,9 @@

icauchit(x) -log1pe(x)

+log1pe(x) + +imultilogit(x)

Arguments

@@ -371,26 +373,34 @@

Details

-greta does not allow you to state the transformation/link on the left hand side of an assignment, as is common in the BUGS and STAN modelling languages. That’s because the same syntax has a very different meaning in R, and can only be applied to objects that are already in existence. The inverse forms of the common link functions (prefixed with an ‘i’) are therefore more likely to useful in modelling, and these are what are provided here. +greta does not allow you to state the transformation/link on the left hand side of an assignment, as is common in the BUGS and STAN modelling languages. That’s because the same syntax has a very different meaning in R, and can only be applied to objects that are already in existence. The inverse forms of the common link functions (prefixed with an ‘i’) can be used instead. +

+

+The log1pe inverse link function is equivalent to log(1 + exp(x)), yielding a positive transformed parameter. Unlike the log transformation, this transformation is approximately linear for x > 1. i.e. when \(x &gt; 1\), \(y \approx x\)

-The log1pe inverse link function is equivalent to log(1 + exp(x)), yielding a positive transformed parameter. Unlike the log transformation, this transformation is approximately linear for x > 1. i.e. when \(x &gt; 1\), \(y \approx x\) +imultilogit expects an n-by-m greta array, and returns an n-by-(m+1) greta array of positive reals whose rows sum to one. This is equivalent adding a final column of 0s and then running the softmax function widely used in machine learning.

Examples

 
- x = normal(1, 3, dim = 10)
+ x1 <- normal(1, 3, dim = 10)
 
  # transformation to the unit interval
- p1 <- iprobit(x)
- p2 <- ilogit(x)
- p3 <- icloglog(x)
- p4 <- icauchit(x)
+ p1 <- iprobit(x1)
+ p2 <- ilogit(x1)
+ p3 <- icloglog(x1)
+ p4 <- icauchit(x1)
 
  # and to positive reals
- y <- log1pe(x)
+ y <- log1pe(x1) + + # transform from 10x3 to 10x4, where rows are a complete set of + # probabilities + x2 <- normal(1, 3, dim = c(10, 3)) + z <- imultilogit(x2) diff --git a/logos/greta_hex.png b/logos/greta_hex.png new file mode 100644 index 00000000..716330cf Binary files /dev/null and b/logos/greta_hex.png differ diff --git a/logos/make_hex.R b/logos/make_hex.R new file mode 100644 index 00000000..364f7feb --- /dev/null +++ b/logos/make_hex.R @@ -0,0 +1,65 @@ +library(hexSticker) +library(magick) +library(sysfonts) +font_add_google("Muli") +attach(greta::.internals$utils$colours) + +# make a hex-shaped mask +hexd <- data.frame(x = 1 + c(rep(-sqrt(3) / 2, 2), 0, rep(sqrt(3) / 2, 2), 0), + y = 1 + c(0.5, -0.5, -1, -0.5, 0.5, 1)) + +x_lim <- range(hexd$x) +y_lim <- range(hexd$y) +x_dim <- abs(diff(x_lim)) +y_dim <- abs(diff(y_lim)) +ex <- 4 +pdf("logos/hex_mask.pdf", width = x_dim * ex, height = y_dim * ex) +par(pty = "s", + xpd = NA, + bg = "black", + mar = rep(0, 4), oma = rep(0, 4)) +plot.new() +plot.window(xlim = x_lim, ylim = y_lim) +polygon(hexd, col = "white") +dev.off() + +# load the hex mask +mask <- image_read("logos/hex_mask.pdf") +mask <- image_transparent(mask, "black") + +# crop and mask header image to the mask (?) +bg <- image_read("logos/greta-header.png") +bg <- image_crop(bg, "498x576+1600+600") +hex_bg <- image_composite(bg, mask, "CopyOpacity") +image_write(hex_bg, path = "logos/hex_bg.pdf") + +# # add the icon and save +# icon <- image_read("logos/icon_on_light_transparent.png") +# icon <- image_resize(icon, "15%") +# hex <- image_composite(hex_bg, icon, offset = "+114+223") + +par(pty = "s", + xpd = NA, + bg = "black", + mar = c(5, 4, 4, 2) + 0.1) + +greta_hex <- sticker("logos/hex_bg.pdf", + s_x = 1, + s_y = 1, + s_width = 1.05, + package = "greta", + p_y = 1.1, + p_family = "Muli", + p_size = 15, + h_fill = greta_col("main"), + h_color = greta_col("main"), + filename = "logos/greta_hex.png") +library(ggplot2) +ggsave(greta_hex, width = 43.9, height = 50.8, + filename = "logos/greta_hex.png", + bg = "transparent", + units = "mm", + dpi = 600) + +file.remove("logos/hex_bg.pdf") +file.remove("logos/hex_mask.pdf") diff --git a/man/calculate.Rd b/man/calculate.Rd new file mode 100644 index 00000000..44fba24b --- /dev/null +++ b/man/calculate.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculate.R +\name{calculate} +\alias{calculate} +\title{calculate greta arrays given fixed values} +\usage{ +calculate(target, values) +} +\arguments{ +\item{target}{a greta array for which to calculate the value} + +\item{values}{a named list giving temporary values of the greta arrays with +which \code{target} is connected, or an \code{mcmc.list} object returned by +\code{\link{mcmc}}.} +} +\value{ +A numeric R array with the same dimensions as \code{target}, giving + the values it would take conditioned on the fixed values given by + \code{values}. +} +\description{ +Calculate the values that greta arrays would take, given + temporary values for the greta arrays on which they depend, and return them + as numeric R arrays. This can be used to check the behaviour of your model + or make predictions to new data after model fitting. +} +\details{ +The greta arrays named in \code{values} need not be variables, they + can also be other operations or even data. + + At present, if \code{values} is a named list it must contain values for + \emph{all} of the variable greta arrays with which \code{target} is + connected, even values are given for intermediate operations, or the target + doesn't depend on the variable. That may be relaxed in a future release. +} +\examples{ +\dontrun{ + +# define a variable greta array, and another that is calculated from it +# then calculate what value y would take for different values of x +x <- normal(0, 1, dim = 3) +a <- lognormal(0, 1) +y <- sum(x ^ 2) + a +calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2)) + + +# define a model +alpha <- normal(0, 1) +beta <- normal(0, 1) +sigma <- lognormal(1, 0.1) +mu <- alpha + iris$Petal.Length * beta +distribution(iris$Petal.Width) <- normal(mu, sigma) +m <- model(alpha, beta, sigma) + +# calculate intermediate greta arrays, given some parameter values +calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5)) +calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5)) + + +# fit the model then calculate samples at a new greta array +draws <- mcmc(m, n_samples = 500) +petal_length_plot <- seq(min(iris$Petal.Length), + max(iris$Petal.Length), + length.out = 100) +mu_plot <- alpha + petal_length_plot * beta +mu_plot_draws <- calculate(mu_plot, draws) + +# plot the draws +mu_est <- colMeans(mu_plot_draws[[1]]) +plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]])) +apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8)) +lines(mu_est ~ petal_length_plot, lwd = 2) +} + + +} diff --git a/man/distribution.Rd b/man/distribution.Rd index c3dbd409..6553e745 100644 --- a/man/distribution.Rd +++ b/man/distribution.Rd @@ -33,12 +33,12 @@ The extract method returns the greta array if it has a distribution, # observed data and mean parameter to be estimated # (explicitly coerce data to a greta array so we can refer to it later) -y = as_data(rnorm(5, 0, 3)) +y <- as_data(rnorm(5, 0, 3)) -mu = uniform(-3, 3) +mu <- uniform(-3, 3) # define the distribution over y (the model likelihood) -distribution(y) = normal(mu, 1) +distribution(y) <- normal(mu, 1) # get the distribution over y distribution(y) diff --git a/man/distributions.Rd b/man/distributions.Rd index 12950c97..4b59b616 100644 --- a/man/distributions.Rd +++ b/man/distributions.Rd @@ -194,39 +194,39 @@ The discrete probability distributions (\code{bernoulli}, \dontrun{ # a uniform parameter constrained to be between 0 and 1 -phi = uniform(min = 0, max = 1) +phi <- uniform(min = 0, max = 1) # a length-three variable, with each element following a standard normal # distribution -alpha = normal(0, 1, dim = 3) +alpha <- normal(0, 1, dim = 3) # a length-three variable of lognormals -sigma = lognormal(0, 3, dim = 3) +sigma <- lognormal(0, 3, dim = 3) # a hierarchical uniform, constrained between alpha and alpha + sigma, -eta = alpha + uniform(0, 1, dim = 3) * sigma +eta <- alpha + uniform(0, 1, dim = 3) * sigma # a hierarchical distribution -mu = normal(0, 1) -sigma = lognormal(0, 1) -theta = normal(mu, sigma) +mu <- normal(0, 1) +sigma <- lognormal(0, 1) +theta <- normal(mu, sigma) # a vector of 3 variables drawn from the same hierarchical distribution -thetas = normal(mu, sigma, dim = 3) +thetas <- normal(mu, sigma, dim = 3) # a matrix of 12 variables drawn from the same hierarchical distribution -thetas = normal(mu, sigma, dim = c(3, 4)) +thetas <- normal(mu, sigma, dim = c(3, 4)) # a multivariate normal variable, with correlation between two elements Sig <- diag(4) Sig[3, 4] <- Sig[4, 3] <- 0.6 -theta = multivariate_normal(rep(mu, 4), Sig) +theta <- multivariate_normal(rep(mu, 4), Sig) # 10 independent replicates of that -theta = multivariate_normal(rep(mu, 4), Sig, dim = 10) +theta <- multivariate_normal(rep(mu, 4), Sig, dim = 10) # a Wishart variable with the same covariance parameter -theta = wishart(df = 5, Sigma = Sig) +theta <- wishart(df = 5, Sigma = Sig) } } diff --git a/man/extract-replace-combine.Rd b/man/extract-replace-combine.Rd index 2a2cfad3..e9662613 100644 --- a/man/extract-replace-combine.Rd +++ b/man/extract-replace-combine.Rd @@ -59,7 +59,7 @@ dim(x) <- value \examples{ \dontrun{ - x = as_data(matrix(1:12, 3, 4)) + x <- as_data(matrix(1:12, 3, 4)) # extract/replace x[1:3, ] diff --git a/man/functions.Rd b/man/functions.Rd index f3b52cfc..a46af747 100644 --- a/man/functions.Rd +++ b/man/functions.Rd @@ -91,17 +91,17 @@ TensorFlow only enables rounding to integers, so \code{round()} will \examples{ \dontrun{ -x = as_data(matrix(1:9, nrow = 3, ncol = 3)) -a = log(exp(x)) -b = log1p(expm1(x)) -c = sign(x - 5) -d = abs(x - 5) +x <- as_data(matrix(1:9, nrow = 3, ncol = 3)) +a <- log(exp(x)) +b <- log1p(expm1(x)) +c <- sign(x - 5) +d <- abs(x - 5) -e = diag(x) +e <- diag(x) diag(x) <- e + 1 -z = t(a) +z <- t(a) -y = sweep(x, 1, e, '-') +y <- sweep(x, 1, e, '-') } } diff --git a/man/greta.Rd b/man/greta.Rd index ff618800..9b14d1d4 100644 --- a/man/greta.Rd +++ b/man/greta.Rd @@ -25,13 +25,13 @@ greta lets you write statistical models interactively in native # a simple Bayesian regression model for the iris data # priors -int = normal(0, 5) -coef = normal(0, 3) -sd = lognormal(0, 3) +int <- normal(0, 5) +coef <- normal(0, 3) +sd <- lognormal(0, 3) # likelihood mean <- int + coef * iris$Petal.Length -distribution(iris$Sepal.Length) = normal(mean, sd) +distribution(iris$Sepal.Length) <- normal(mean, sd) # build and sample m <- model(int, coef, sd) diff --git a/man/inference.Rd b/man/inference.Rd index 48437a17..4bbc8b46 100644 --- a/man/inference.Rd +++ b/man/inference.Rd @@ -10,7 +10,7 @@ stashed_samples() mcmc(model, method = c("hmc"), n_samples = 1000, thin = 1, warmup = 100, - verbose = TRUE, pb_update = 10, control = list(), + chains = 1, verbose = TRUE, pb_update = 10, control = list(), initial_values = NULL) opt(model, method = c("adagrad"), max_iterations = 100, tolerance = 1e-06, @@ -19,19 +19,21 @@ opt(model, method = c("adagrad"), max_iterations = 100, tolerance = 1e-06, \arguments{ \item{model}{greta_model object} -\item{method}{the method used to sample or optimise values. Currently only +\item{method}{method used to sample or optimise values. Currently only one method is available for each procedure: \code{hmc} and \code{adagrad}} -\item{n_samples}{the number of MCMC samples to draw (after any warm-up, but +\item{n_samples}{number of MCMC samples to draw (after any warm-up, but before thinning)} -\item{thin}{the MCMC thinning rate; every \code{thin} samples is retained, +\item{thin}{MCMC thinning rate; every \code{thin} samples is retained, the rest are discarded} -\item{warmup}{the number of samples to spend warming up the mcmc sampler. +\item{warmup}{number of samples to spend warming up the mcmc sampler. During this phase the sampler moves toward the highest density area and tunes sampler hyperparameters.} +\item{chains}{number of MCMC chains to run} + \item{verbose}{whether to print progress information to the console} \item{pb_update}{how regularly to update the progress bar (in iterations)} @@ -39,18 +41,18 @@ tunes sampler hyperparameters.} \item{control}{an optional named list of hyperparameters and options to control behaviour of the sampler or optimiser. See Details.} -\item{initial_values}{an optional named vector of initial values for the free -parameters in the model. These will be used as the starting point for -sampling/optimisation} +\item{initial_values}{an optional vector (or list of vectors, for multiple +chains) of initial values for the free parameters in the model. These will +be used as the starting point for sampling/optimisation.} \item{max_iterations}{the maximum number of iterations before giving up} \item{tolerance}{the numerical tolerance for the solution, the optimiser stops when the (absolute) difference in the joint density between successive iterations drops below this level} } \value{ -\code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object that can be analysed using - functions from the coda package. This will contain mcmc samples of the - greta arrays used to create \code{model}. +\code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object + that can be analysed using functions from the coda package. This will + contain mcmc samples of the greta arrays used to create \code{model}. \code{opt} - a list containing the following named elements: \itemize{ @@ -90,17 +92,19 @@ For \code{mcmc()} if \code{verbose = TRUE}, the progress bar shows (via \code{model}) to have double precision, though this will slow down sampling. - Currently, the only implemented MCMC procedure is static Hamiltonian - Monte Carlo (\code{method = "hmc"}). During the warmup iterations, the - leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the - sampler efficiency. The \code{control} argument can be used to specify the - initial value for epsilon, along with two other hyperparameters: \code{Lmin} - and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the - upper and lower limits to the number of leapfrog steps per iteration (from - which the number is selected uniformly at random). + Currently, the only implemented MCMC procedure is static Hamiltonian Monte + Carlo (\code{method = "hmc"}). During the warmup iterations, the leapfrog + stepsize hyperparameter \code{epsilon} is tuned to maximise the sampler + efficiency, and the posterior marginal standard deviations are estimated + \code{diag_sd}. The \code{control} argument can be used to specify the + initial value for \code{epsilon}, \code{diag_sd}, and two other + hyperparameters: \code{Lmin} and \code{Lmax}; positive integers (with + \code{Lmax > Lmin}) giving the upper and lower limits to the number of + leapfrog steps per iteration (from which the number is selected uniformly + at random). The default control options for HMC are: - \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)} + \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005, diag_sd = 1)} Currently, the only implemented optimisation algorithm is Adagrad (\code{method = "adagrad"}). The \code{control} argument can be used to @@ -114,10 +118,10 @@ Currently, the only implemented optimisation algorithm is Adagrad \examples{ \dontrun{ # define a simple model -mu = variable() -sigma = lognormal(1, 0.1) -x = rnorm(10) -distribution(x) = normal(mu, sigma) +mu <- variable() +sigma <- lognormal(1, 0.1) +x <- rnorm(10) +distribution(x) <- normal(mu, sigma) m <- model(mu, sigma) # carry out mcmc on the model diff --git a/man/model.Rd b/man/model.Rd index 145b82ef..471ccd10 100644 --- a/man/model.Rd +++ b/man/model.Rd @@ -64,18 +64,16 @@ The plot method produces a visual representation of the defined \if{latex}{\figure{plotlegend.pdf}{options: width=7cm}} } \examples{ - \dontrun{ # define a simple model -mu = variable() -sigma = lognormal(1, 0.1) -x = rnorm(10) -distribution(x) = normal(mu, sigma) +mu <- variable() +sigma <- lognormal(1, 0.1) +x <- rnorm(10) +distribution(x) <- normal(mu, sigma) m <- model(mu, sigma) plot(m) } - } diff --git a/man/operators.Rd b/man/operators.Rd index 3f6e3555..e9f730c0 100644 --- a/man/operators.Rd +++ b/man/operators.Rd @@ -45,22 +45,22 @@ greta's operators are used just like R's the standard arithmetic, \examples{ \dontrun{ -x = as_data(-1:12) +x <- as_data(-1:12) # arithmetic -a = x + 1 -b = 2 * x + 3 -c = x \%\% 2 -d = x \%/\% 5 +a <- x + 1 +b <- 2 * x + 3 +c <- x \%\% 2 +d <- x \%/\% 5 # logical -e = (x > 1) | (x < 1) -f = e & (x < 2) -g = !f +e <- (x > 1) | (x < 1) +f <- e & (x < 2) +g <- !f # relational -h = x < 1 -i = (-x) >= x -j = h == x +h <- x < 1 +i <- (-x) >= x +j <- h == x } } diff --git a/man/transforms.Rd b/man/transforms.Rd index 84c222e2..111ed2ea 100644 --- a/man/transforms.Rd +++ b/man/transforms.Rd @@ -8,6 +8,7 @@ \alias{icloglog} \alias{icauchit} \alias{log1pe} +\alias{imultilogit} \title{transformation functions for greta arrays} \usage{ iprobit(x) @@ -19,6 +20,8 @@ icloglog(x) icauchit(x) log1pe(x) + +imultilogit(x) } \arguments{ \item{x}{a real-valued (i.e. values ranging from -Inf to Inf) greta array to @@ -34,26 +37,36 @@ greta does not allow you to state the transformation/link on the modelling languages. That's because the same syntax has a very different meaning in R, and can only be applied to objects that are already in existence. The inverse forms of the common link functions (prefixed with an - 'i') are therefore more likely to useful in modelling, and these are what - are provided here. + 'i') can be used instead. + + The \code{log1pe} inverse link function is equivalent to \code{log(1 + + exp(x))}, yielding a positive transformed parameter. Unlike the log + transformation, this transformation is approximately linear for x > 1. i.e. + when \eqn{x > 1}, \eqn{y \approx x} - The \code{log1pe} inverse link function is equivalent to \code{log(1 + exp(x))}, - yielding a positive transformed parameter. Unlike the log transformation, - this transformation is approximately linear for x > 1. i.e. when \eqn{x > - 1}, \eqn{y \approx x} + \code{imultilogit} expects an n-by-m greta array, and returns an n-by-(m+1) + greta array of positive reals whose rows sum to one. This is equivalent + adding a final column of 0s and then running the softmax function widely + used in machine learning. } \examples{ \dontrun{ - x = normal(1, 3, dim = 10) + x1 <- normal(1, 3, dim = 10) # transformation to the unit interval - p1 <- iprobit(x) - p2 <- ilogit(x) - p3 <- icloglog(x) - p4 <- icauchit(x) + p1 <- iprobit(x1) + p2 <- ilogit(x1) + p3 <- icloglog(x1) + p4 <- icauchit(x1) # and to positive reals - y <- log1pe(x) + y <- log1pe(x1) + + # transform from 10x3 to 10x4, where rows are a complete set of + # probabilities + x2 <- normal(1, 3, dim = c(10, 3)) + z <- imultilogit(x2) + } } diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 91456f7b..b6c0ed7b 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -1,10 +1,15 @@ # test functions -library(tensorflow) +library (tensorflow) -# set the seed before running tests +# set the seed and flush the graph before running tests +tf <- tensorflow::tf +tf$reset_default_graph() set.seed(2017-05-01) +quietly <- function (expr) + invisible(capture.output(expr)) + expect_ok <- function (expr) expect_error(expr, NA) @@ -65,8 +70,6 @@ compare_distribution <- function (greta_fun, r_fun, parameters, x) { # define greta distribution, with fixed values - tf$reset_default_graph() - parameters_greta <- parameters # no dim for wishart if (!identical(names(parameters), c('df', 'Sigma'))) @@ -116,8 +119,6 @@ randu <- function (...) { # e.g. check_op(sum, randn(100, 3)) check_op <- function (op, a, b, greta_op = NULL) { - tf$reset_default_graph() - if (is.null(greta_op)) greta_op <- op @@ -168,8 +169,6 @@ with_greta <- function (call, swap = c('x'), swap_scope = 1) { # e.g. check_expr(a[1:3], swap = 'a') check_expr <- function (expr, swap = c('x')) { - tf$reset_default_graph() - call <- substitute(expr) r_out <- eval(expr) @@ -241,8 +240,6 @@ compare_truncated_distribution <- function (greta_fun, # array. 'r_fun' is an r function returning the log density for the same # truncated distribution, taking x as its only argument. - tf$reset_default_graph() - require (truncdist) x <- do.call(truncdist::rtrunc, @@ -332,8 +329,7 @@ mock_mcmc <- function (n_samples = 1010) { dinvgamma <- extraDistr::dinvgamma qinvgamma <- extraDistr::qinvgamma -pinvgamma <- function (q, alpha, beta) - ifelse(q < 0, 0, extraDistr::pinvgamma(q, alpha, beta)) +pinvgamma <- extraDistr::pinvgamma dlaplace <- extraDistr::dlaplace plaplace <- extraDistr::plaplace diff --git a/tests/testthat/test_calculate.R b/tests/testthat/test_calculate.R new file mode 100644 index 00000000..85001f5b --- /dev/null +++ b/tests/testthat/test_calculate.R @@ -0,0 +1,122 @@ +context('calculate') + +test_that('calculate works with correct lists', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + # unknown variable + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + y_value <- calculate(y, list(a = 3)) + expect_equal(y_value, matrix(c(3, 6))) + + # unknown variable and new data + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + y_value <- calculate(y, list(a = 6, x = c(2, 1))) + expect_equal(y_value, matrix(c(12, 6))) + +}) + +test_that('calculate works with mcmc.list objects', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + samples <- 10 + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + m <- model(y) + draws <- mcmc(m, warmup = 0, n_samples = samples, verbose = FALSE) + + # with an existing greta array + y_values <- calculate(y, draws) + # correct class + expect_s3_class(y_values, "mcmc.list") + # correct dimensions + expect_equal(dim(y_values[[1]]), c(10, 2)) + # all valid values + expect_true(all(is.finite(as.vector(y_values[[1]])))) + + # with a new greta array, based on a different element in the model + new_values <- calculate(a ^ 2, draws) + # correct class + expect_s3_class(new_values, "mcmc.list") + # correct dimensions + expect_equal(dim(new_values[[1]]), c(10, 1)) + # all valid values + expect_true(all(is.finite(as.vector(new_values[[1]])))) + +}) + +test_that('calculate errors nicely if mcmc.list objects aren missing crucial info', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + samples <- 10 + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + m <- model(y) + draws <- mcmc(m, warmup = 0, n_samples = samples, verbose = FALSE) + + # scrub the model info + attr(draws, "model_info") <- NULL + + # it should error nicely + expect_error(calculate(y, draws), + "perhaps it wasn't created with greta") + +}) + +test_that('calculate errors nicely if not all required values are passed', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + + # it should error nicely + expect_error(calculate(y, list(x = c(2, 1))), + "values have not been provided for all variables") + +}) + +test_that('calculate errors nicely if required values have incorrect dimensions', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + + # it should error nicely + expect_error(calculate(y, list(a = c(1, 1))), + "different number of elements than the greta array") + +}) + +test_that('calculate errors nicely if not used on a greta array', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- as_data(c(1, 2)) + a <- normal(0, 1) + y <- a * x + z <- 1:5 + + + # it should error nicely + expect_error(calculate(z, list(a = c(1, 1))), + "target' is not a greta array") + +}) diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R index f41e0a82..6e95f2e7 100644 --- a/tests/testthat/test_inference.R +++ b/tests/testthat/test_inference.R @@ -47,7 +47,7 @@ test_that('opt accepts initial values', { }) -test_that('rejected mcmc proposals', { +test_that('bad mcmc proposals are rejected', { skip_if_not(check_tf_version()) source('helpers.R') @@ -75,7 +75,13 @@ test_that('rejected mcmc proposals', { distribution(x) = normal(z, 1e-12) m <- model(z) expect_error(mcmc(m, n_samples = 1, warmup = 0), - 'Could not find reasonable starting values after 10 attempts') + 'Could not find reasonable starting values after 20 attempts') + + # proposals that are fine, but rejected anyway (long chain required) + z = normal(0, 1) + m <- model(z) + expect_ok(mcmc(m, n_samples = 5, warmup = 0, verbose = FALSE, + control = list(epsilon = 100, Lmin = 1, Lmax = 1))) }) @@ -88,7 +94,51 @@ test_that('mcmc works with verbosity and warmup', { z = normal(0, 1) distribution(x) = normal(z, 1) m <- model(z) - expect_ok( mcmc(m, n_samples = 50, warmup = 50, verbose = TRUE) ) + quietly(expect_ok( mcmc(m, n_samples = 50, warmup = 50, verbose = TRUE) )) + +}) + +test_that('mcmc works with multiple chains', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- rnorm(10) + z = normal(0, 1) + distribution(x) = normal(z, 1) + m <- model(z) + + # multiple chains, automatic initial values + quietly(expect_ok( mcmc(m, warmup = 10, n_samples = 10, chains = 2) )) + + # multiple chains, user-specified initial values + quietly(expect_ok( mcmc(m, warmup = 10, n_samples = 10, chains = 2, initial_values = list(1, 2)) )) + +}) + +test_that('mcmc handles initial values nicely', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- rnorm(10) + z = normal(0, 1) + distribution(x) = normal(z, 1) + m <- model(z) + + # too many sets of initial values + expect_error( mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE, + chains = 2, initial_values = list(1)), + "sets of initial values were provided, but there are") + + # initial values have the wrong length + expect_error( mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE, + chains = 2, initial_values = list(1:2, 2:3)), + "each set of initial values must be a vector of length") + + quietly(expect_message(mcmc(m, warmup = 10, n_samples = 10, + chains = 2, initial_values = 1), + "only one set of was initial values given, and was used for all chains")) }) @@ -140,10 +190,31 @@ test_that('stashed_samples works', { # mock up a stash stash <- greta:::greta_stash - assign('trace_stash', as.matrix(rnorm(17)), envir = stash) + trace_stash <- list(trace = as.matrix(rnorm(17)), + raw = as.matrix(rnorm(17))) + assign('trace_stash', trace_stash, envir = stash) # should convert to an mcmc.list ans <- stashed_samples() expect_s3_class(ans, 'mcmc.list') + # model_info attribute should have raw draws and the model + model_info <- attr(ans, "model_info") + expect_true(inherits(model_info, "environment")) + expect_s3_class(model_info$raw_draws, 'mcmc.list') + expect_true(inherits(model_info$model, "greta_model")) + +}) + + +test_that('model errors nicely', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + # model should give a nice error if passed something other than a greta array + a <- 1 + b <- normal(0, a) + expect_error(model(a, b), + "^The following object") }) diff --git a/tests/testthat/test_operators.R b/tests/testthat/test_operators.R index 42e88e26..e7c9cdb3 100644 --- a/tests/testthat/test_operators.R +++ b/tests/testthat/test_operators.R @@ -87,13 +87,19 @@ test_that('random strings of operators work as expected', { }) -test_that('%*% errors informatively', { +test_that("%*% errors informatively", { skip_if_not(check_tf_version()) + source('helpers.R') + a <- ones(3, 4) b <- ones(1, 4) + c <- ones(2, 2, 2) expect_error(a %*% b, - 'incompatible dimensions: 3x4 vs 1x4') + "incompatible dimensions: 3x4 vs 1x4") + + expect_error(a %*% c, + "only two-dimensional greta arrays can be matrix-multiplied") }) diff --git a/tests/testthat/test_transformations.R b/tests/testthat/test_transforms.R similarity index 54% rename from tests/testthat/test_transformations.R rename to tests/testthat/test_transforms.R index 14026e88..2916d960 100644 --- a/tests/testthat/test_transformations.R +++ b/tests/testthat/test_transforms.R @@ -7,17 +7,38 @@ test_that('transformations work as expected', { a <- randn(25, 4) + r_icloglog <- function (x) 1 - exp(-1 * exp(x)) r_log1pe <- function (x) log1p(exp(x)) + r_imultilogit <- function (x) { + x <- cbind(x, 0) + x <- exp(x) + x <- sweep(x, 1, rowSums(x), "/") + x + } + check_op(pnorm, a, greta_op = iprobit) check_op(plogis, a, greta_op = ilogit) check_op(r_icloglog, a, greta_op = icloglog) check_op(pcauchy, a, greta_op = icauchit) check_op(r_log1pe, a, greta_op = log1pe) + check_op(r_imultilogit, a, greta_op = imultilogit) + +}) + +test_that("imultilogit errors informatively", { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- ones(3, 4, 3) + + expect_error(imultilogit(x), + "imultilogit expects a 2D greta array") }) diff --git a/vignettes/examples/air.Rmd b/vignettes/examples/air.Rmd index 40e80446..b0acc6a8 100644 --- a/vignettes/examples/air.Rmd +++ b/vignettes/examples/air.Rmd @@ -21,11 +21,11 @@ J <- 3 #### greta code ```{r air_greta} -theta = normal(0, 32, dim = 2) +theta <- normal(0, 32, dim = 2) mu <- alpha + beta * Z -X = normal(mu, sigma) +X <- normal(mu, sigma) p <- ilogit(theta[1] + theta[2] * X) -distribution(y) = binomial(n, p) +distribution(y) <- binomial(n, p) ``` #### BUGS/JAGS code diff --git a/vignettes/examples/beetles.Rmd b/vignettes/examples/beetles.Rmd index be375481..d4f30ec2 100644 --- a/vignettes/examples/beetles.Rmd +++ b/vignettes/examples/beetles.Rmd @@ -17,10 +17,10 @@ N <- 8 #### greta code ```{r beetles_greta} -alpha_star = normal(0, 32) -beta = normal(0, 32) +alpha_star <- normal(0, 32) +beta <- normal(0, 32) p <- ilogit(alpha_star + beta * (x - mean(x))) -distribution(r) = binomial(n, p) +distribution(r) <- binomial(n, p) alpha <- alpha_star - beta * mean(x) rhat <- p * n diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd index 2851f9a8..f1ebc962 100644 --- a/vignettes/get_started.Rmd +++ b/vignettes/get_started.Rmd @@ -15,7 +15,7 @@ vignette: > %\usepackage[utf8]{inputenc} --- -```{r setup, include=FALSE} +```{r setup, include = FALSE} knitr::opts_chunk$set(echo = TRUE, eval = greta:::check_tf_version("message"), @@ -128,15 +128,15 @@ x <- as_data(iris$Petal.Length) y <- as_data(iris$Sepal.Length) # variables and priors -int = normal(0, 1) -coef = normal(0, 3) -sd = student(3, 0, 1, truncation = c(0, Inf)) +int <- normal(0, 1) +coef <- normal(0, 3) +sd <- student(3, 0, 1, truncation = c(0, Inf)) # operations mean <- int + coef * x # likelihood -distribution(y) = normal(mean, sd) +distribution(y) <- normal(mean, sd) # defining the model m <- model(int, coef, sd) @@ -148,8 +148,6 @@ plot(m) draws <- mcmc(m, n_samples = 1000) ``` -Throughout this example, and the rest of greta's documentation, `<-` is used when assigning *deterministic* values, and `=` when assigning variable or *stochastic* values. This makes the model code a little bit clearer to read, since `=` corresponds to the `~` symbol used to define stochastic relationships in BUGS and Stan. However, it doesn't actually make any difference to the model which assignment operator you use. -
### Data @@ -208,19 +206,19 @@ greta_array(0:1, dim = c(3, 3)) The second section of the script creates three greta arrays to represent the parameters in our model: ```{r variables} -int = normal(0, 1) -coef = normal(0, 3) -sd = student(3, 0, 1, truncation = c(0, Inf)) +int <- normal(0, 1) +coef <- normal(0, 3) +sd <- student(3, 0, 1, truncation = c(0, Inf)) ``` -Each of these is a variable greta array, and each is assumed *a priori* (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions. +Each of these is a variable greta array, and each is assumed *a priori* (before fitting to data) to follow a different probability distribution. In other words, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions. #### variables without probability distributions If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the `variable()` function. E.g. in a frequentist version of the model we could create `int` with: ```{r int_variable} -(int = variable()) +(int <- variable()) ``` `variable()` has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of `lower = -Inf, upper = Inf` meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar). @@ -228,7 +226,7 @@ If we were carrying out a frequentist analysis of this model, we could create va We can create a variable constrained between two values by specifying `lower` and `upper`. So we could have created the positive variable `sd` (the standard deviation of the likelihood) with: ```{r positive_variable} -(sd = variable(lower = 0)) +(sd <- variable(lower = 0)) ``` If we had instead wanted a 2x3 array of positive variables we could have created it like this: @@ -247,7 +245,7 @@ Both `int` and `coef` were given zero-mean normal distributions, which are a com Some of greta's probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the `truncation` argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do: ```{r truncated1} -(z = normal(0, 1, truncation = c(-1, 1))) +(z <- normal(0, 1, truncation = c(-1, 1))) ``` greta will account for this truncation when calculating the density of this distribution; rescaling it to be a valid probability distribution. We can only truncate to within the support of the distribution; e.g. greta will throw an error if we try to truncate a lognormal distribution (which must be positive) to have a lower bound of -1. @@ -294,7 +292,7 @@ dim(z[, 1]) dim(z[, 1, drop = FALSE]) ``` -greta arrays must always have two dimensions, so greta always acts as though `drop=FALSE`: +greta arrays must always have two dimensions, so greta always acts as though `drop = FALSE`: ```{r drop_greta} z_greta <- as_data(z) @@ -320,10 +318,10 @@ So far, we have created greta arrays representing the variables in our model (wi By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we're trying to work out the parameters of that distribution. In greta we do that with the `distribution()` assignment function: ```{r likelihood} -distribution(y) = normal(mean, sd) +distribution(y) <- normal(mean, sd) ``` -With the syntax `distribution() = `, we are stating that the data greta array on the left `` has the same distribution as the greta array on the right ``. In this case, we're temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays `mean` and `sd`), but then stating that values of that distribution have been observed (`y`). In this case both `` (`y`) and `` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters. +With the syntax `distribution() <- `, we are stating that the data greta array on the left `` has the same distribution as the greta array on the right ``. In this case, we're temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays `mean` and `sd`), but then stating that values of that distribution have been observed (`y`). In this case both `` (`y`) and `` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters.
@@ -334,11 +332,11 @@ Now all of the greta arrays making up the model have been created, we need to co ```{r hidden_model, echo = FALSE} x <- as_data(iris$Petal.Length) y <- as_data(iris$Sepal.Length) -int = normal(0, 1) -coef = normal(0, 3) -sd = student(3, 0, 1, truncation = c(0, Inf)) +int <- normal(0, 1) +coef <- normal(0, 3) +sd <- student(3, 0, 1, truncation = c(0, Inf)) mean <- int + coef * x -distribution(y) = normal(mean, sd) +distribution(y) <- normal(mean, sd) ``` ```{r define_model} m <- model(int, coef, sd) @@ -354,7 +352,7 @@ greta provides a plot function for greta models to help you visualise and check ```{r plot, eval = FALSE} plot(m) ``` -```{r plot_hidden, echo = FALSE, results='hide'} +```{r plot_hidden, echo = FALSE, results = "hide"} gr <- plot(m) fname <- "figures/full_graph.png" DiagrammeR::export_graph(gr, @@ -374,10 +372,10 @@ Here's a legend for the plot (it's in the [`?greta::model`](https://greta-dev.gi The fourth type of node (diamonds) represents probability distributions. These have greta arrays as parameters (linked via solid lines), and have other greta arrays as *values*(linked via dashed lines). Distributions calculate the probability density of the *values*, given the parameters and their distribution type. -For example, a plot of just the prior distribution over `coef` (defined as `coef = normal(0, 3)`) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution's value, the variable `coef`: +For example, a plot of just the prior distribution over `coef` (defined as `coef <- normal(0, 3)`) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution's value, the variable `coef`: -```{r plot_coef, echo = FALSE, results='hide'} -coef = normal(0, 3) +```{r plot_coef, echo = FALSE, results = "hide"} +coef <- normal(0, 3) m_coef <- model(coef) gr <- plot(m_coef) fname <- "figures/coef_graph.png" @@ -391,11 +389,11 @@ DiagrammeR::export_graph(gr, It's the same for the model likelihood, but this time the distribution's parameters are a variable (`sd`) and the result of an operation (`mean`), and the distribution's value is given by data (the observed sepal lengths `y`): -```{r plot_likelihood, echo = FALSE, results='hide'} -sd = variable() +```{r plot_likelihood, echo = FALSE, results = "hide"} +sd <- variable() y <- as_data(iris$Sepal.Length) mean <- ones(150) -distribution(y) = normal(mean, sd) +distribution(y) <- normal(mean, sd) m_likelihood <- model(sd) gr <- plot(m_likelihood) idx <- which(gr$nodes_df$label == 'mean\n') @@ -422,7 +420,7 @@ When defining the model, greta combines all of the distributions together to def Now we have a greta model that will give us the joint density for a candidate set of values, so we can use that to carry out inference on the model. We do that using an Markov chain Monte Carlo (MCMC) algorithm to sample values of the parameters we're interested in, using the `mcmc()` function: -```{r mcmc, message=FALSE, results='hide', progress = FALSE} +```{r mcmc, message = FALSE, results = "hide", progress = FALSE} draws <- mcmc(m, n_samples = 1000) ``` @@ -441,7 +439,7 @@ mcmc_trace(draws) mcmc_intervals(draws) ``` -```{r mcmcvis, echo = FALSE, message = FALSE, out.width=c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'} +```{r mcmcvis, echo = FALSE, message = FALSE, out.width = c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'} library (bayesplot) mcmc_trace(draws, facet_args = list(nrow = 3, ncol = 1)) mcmc_intervals(draws) diff --git a/vignettes/technical_details.Rmd b/vignettes/technical_details.Rmd index 778fb1f2..20ff451c 100644 --- a/vignettes/technical_details.Rmd +++ b/vignettes/technical_details.Rmd @@ -97,7 +97,7 @@ z$node$tf Hamiltonian Monte Carlo (HMC) requires all of the parameters to be transformed to a continuous scale for sampling. Variable nodes are therefore converted to tensors by first defining a 'free' (unconstrained) version of themselves as a tensor, and then applying a transformation function to convert them back to the scale of interest. ```{r free_state} -a = variable(lower = 0) +a <- variable(lower = 0) class(a$node) a$node$tf_from_free ``` @@ -107,7 +107,7 @@ a$node$tf_from_free distribution nodes are node objects just like the others, but they are not *directly* associated with greta arrays. Instead, greta arrays may have a distribution node in their `distribution` slot to indicate that their values are assumed to follow that distribution. The distribution node will also be listed as a parent node, and likewise the 'target node' will be listed as a child of the distribution. Distribution nodes also have child nodes (data, variables or operations) representing their parameters. ```{r distributions1} -b = normal(0, 1) +b <- normal(0, 1) class(b$node) class(b$node$distribution) # a is the target of its own distribution