diff --git a/DESCRIPTION b/DESCRIPTION
index a7cce07c..641b779e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: greta
 Type: Package
 Title: Simple and Scalable Statistical Modelling in R
-Version: 0.2.3
-Date: 2018-01-23
+Version: 0.2.4
+Date: 2018-02-20
 Authors@R: person("Nick", "Golding", role = c("aut", "cre"),
     email = "nick.golding.research@gmail.com")
-Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see <https://goldingn.github.io/greta> for more information).
+Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see <https://greta-dev.github.io/greta> for more information).
 License: Apache License 2.0
 URL: https://github.com/greta-dev/greta
 BugReports: https://github.com/greta-dev/greta/issues
@@ -40,6 +40,7 @@ Collate:
     'inference.R'
     'install_tensorflow.R'
     'internals.R'
+    'calculate.R'
 Imports:
     R6,
     tensorflow,
diff --git a/NAMESPACE b/NAMESPACE
index f027caf5..973e47e1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -98,6 +98,7 @@ export(bernoulli)
 export(beta)
 export(beta_binomial)
 export(binomial)
+export(calculate)
 export(categorical)
 export(cauchy)
 export(chi_squared)
@@ -116,6 +117,7 @@ export(hypergeometric)
 export(icauchit)
 export(icloglog)
 export(ilogit)
+export(imultilogit)
 export(install_tensorflow)
 export(inverse_gamma)
 export(iprobit)
diff --git a/NEWS b/NEWS
index d94d2089..20f8f935 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,33 @@
+### greta 0.2.4
+================
+
+Fixes:
+
+* improved error checking/messages in `model()`, `%*%`
+* switched docs and examples to always use `<-` for assignment
+* fixed the `n_cores` argument to `model()`
+
+New functionality:
+
+* added a `calculate()` function to compute the values of greta arrays conditional on provided values for others
+* added `imultilogit()` transform
+* added a `chains` argument to `model()`
+* improved HMC self-tuning, including a diagonal euclidean metric
+
+### greta 0.2.3
+================
+
+Fixes:
+
+* fixed breaking change in extraDistr API (caused test errors on CRAN builds)
+* added dontrun statements to pass CRAN checks on winbuilder
+* fixed breaking change in tensorflow API (1-based indexing)
+
+New functionality:
+
+* added `cumsum()` and `cumprod()` functions
+
+
 ### greta 0.2.2
 ================
 
diff --git a/R/calculate.R b/R/calculate.R
new file mode 100644
index 00000000..f3e53416
--- /dev/null
+++ b/R/calculate.R
@@ -0,0 +1,219 @@
+# calculating values outside inference
+
+#' @name calculate
+#' @title calculate greta arrays given fixed values
+#' @description Calculate the values that greta arrays would take, given
+#'   temporary values for the greta arrays on which they depend, and return them
+#'   as numeric R arrays. This can be used to check the behaviour of your model
+#'   or make predictions to new data after model fitting.
+#'
+#' @param target a greta array for which to calculate the value
+#' @param values a named list giving temporary values of the greta arrays with
+#'   which \code{target} is connected, or an \code{mcmc.list} object returned by
+#'   \code{\link{mcmc}}.
+#'
+#' @return A numeric R array with the same dimensions as \code{target}, giving
+#'   the values it would take conditioned on the fixed values given by
+#'   \code{values}.
+#'
+#' @details The greta arrays named in \code{values} need not be variables, they
+#'   can also be other operations or even data.
+#'
+#'   At present, if \code{values} is a named list it must contain values for
+#'   \emph{all} of the variable greta arrays with which \code{target} is
+#'   connected, even values are given for intermediate operations, or the target
+#'   doesn't depend on the variable. That may be relaxed in a future release.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' # define a variable greta array, and another that is calculated from it
+#' # then calculate what value y would take for different values of x
+#' x <- normal(0, 1, dim = 3)
+#' a <- lognormal(0, 1)
+#' y <- sum(x ^ 2) + a
+#' calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2))
+#'
+#'
+#' # define a model
+#' alpha <- normal(0, 1)
+#' beta <- normal(0, 1)
+#' sigma <- lognormal(1, 0.1)
+#' mu <- alpha + iris$Petal.Length * beta
+#' distribution(iris$Petal.Width) <- normal(mu, sigma)
+#' m <- model(alpha, beta, sigma)
+#'
+#' # calculate intermediate greta arrays, given some parameter values
+#' calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5))
+#' calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5))
+#'
+#'
+#' # fit the model then calculate samples at a new greta array
+#' draws <- mcmc(m, n_samples = 500)
+#' petal_length_plot <- seq(min(iris$Petal.Length),
+#'                          max(iris$Petal.Length),
+#'                          length.out = 100)
+#' mu_plot <- alpha + petal_length_plot * beta
+#' mu_plot_draws <- calculate(mu_plot, draws)
+#'
+#' # plot the draws
+#' mu_est <- colMeans(mu_plot_draws[[1]])
+#' plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]]))
+#' apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8))
+#' lines(mu_est ~ petal_length_plot, lwd = 2)
+#' }
+#'
+#'
+calculate <- function (target, values) {
+
+  target_name <- deparse(substitute(target))
+
+  if (!inherits(target, "greta_array"))
+    stop ("'target' is not a greta array")
+
+  if (inherits(values, "mcmc.list"))
+    calculate_mcmc.list(target, target_name, values)
+  else
+    calculate_list(target, values)
+
+}
+
+calculate_mcmc.list <- function (target, target_name, values) {
+
+  model_info <- attr(values, "model_info")
+
+  if (is.null(model_info)) {
+    stop ("value is an mcmc.list object, but is not associated with any ",
+          "model information, perhaps it wasn't created with ",
+          "greta::mcmc() ?",
+          call. = FALSE)
+  }
+
+  # copy and refresh the dag
+  dag <- model_info$model$dag$clone()
+  dag$tf_environment <- new.env()
+  dag$tf_graph <- tf$Graph()
+
+  # extend the dag to include this node, as the target
+  dag$build_dag(list(target))
+  self <- dag  # mock for scoping
+  dag$define_tf(log_density = TRUE, gradients = TRUE)
+
+  dag$target_nodes <- list(target$node)
+  names(dag$target_nodes) <- target_name
+
+  example_values <- dag$trace_values()
+  n_trace <- length(example_values)
+
+  # raw draws are either an attribute, or this object
+  model_info <- attr(values, "model_info")
+  draws <- model_info$raw_draws
+
+  # trace the target for each draw in each chain
+  trace <- list()
+  for (i in seq_along(draws)) {
+
+    samples <- apply(draws[[i]],
+                     1,
+                     function (x) {
+                       dag$send_parameters(x)
+                       dag$trace_values()
+                     })
+
+    samples <- as.matrix(samples)
+
+    if (ncol(samples) != n_trace)
+      samples <- t(samples)
+
+    colnames(samples) <- names(example_values)
+
+    trace[[i]] <- coda::mcmc(samples)
+
+  }
+
+  trace <- coda::mcmc.list(trace)
+  attr(trace, "model_info") <- model_info
+  # return this
+  return (trace)
+
+}
+
+calculate_list <- function(target, values) {
+
+  # get the values and their names
+  names <- names(values)
+  stopifnot(length(names) == length(values))
+
+  # get the corresponding greta arrays
+  fixed_greta_arrays <- lapply(names,
+                               get,
+                               envir = parent.frame(n = 2))
+
+  # make sure that's what they are
+  are_greta_arrays <- vapply(fixed_greta_arrays,
+                             inherits,
+                             "greta_array",
+                             FUN.VALUE = FALSE)
+
+  stopifnot(all(are_greta_arrays))
+
+  # make sure the values have the correct dimensions
+  values <- mapply(assign_dim,
+                   values,
+                   fixed_greta_arrays,
+                   SIMPLIFY = FALSE)
+
+  all_greta_arrays <- c(fixed_greta_arrays, list(target))
+
+  # define the dag and TF graph
+  dag <- dag_class$new(all_greta_arrays)
+  dag$define_tf(log_density = FALSE, gradients = FALSE)
+
+  # build and send a dict for the fixed values
+  fixed_nodes <- lapply(fixed_greta_arrays,
+                        member,
+                        'node')
+
+  names(values) <- vapply(fixed_nodes,
+                          dag$tf_name,
+                          FUN.VALUE = "")
+
+  # check that all of the variables are set
+  # list of variable tf names
+  variable_nodes <- dag$node_list[dag$node_types == "variable"]
+  variable_names <- vapply(variable_nodes,
+                           dag$tf_name,
+                           FUN.VALUE = "")
+
+  if (!all(variable_names %in% names(values))) {
+    stop ("values have not been provided for all variables",
+          call. = FALSE)
+  }
+
+  # send it to tf
+  assign("eval_list", values, envir = dag$tf_environment)
+  ex <- expression(with_dict <- do.call(dict, eval_list))
+  eval(ex, envir = dag$tf_environment)
+
+  # evaluate the target there
+  ex <- sprintf("sess$run(%s, feed_dict = with_dict)",
+                dag$tf_name(target$node))
+  result <- eval(parse(text = ex),
+                 envir = dag$tf_environment)
+
+  result
+
+}
+
+# coerce value to have the correct dimensions
+assign_dim <- function (value, greta_array) {
+  array <- strip_unknown_class(greta_array$node$value())
+  if (length(array) != length(value)) {
+    stop ("a provided value has different number of elements",
+          " than the greta array", call. = FALSE)
+  }
+  array[] <- value
+  array
+}
diff --git a/R/dag_class.R b/R/dag_class.R
index df88a6d1..5c3d8b9a 100644
--- a/R/dag_class.R
+++ b/R/dag_class.R
@@ -9,6 +9,7 @@ dag_class <- R6Class(
     node_types = NA,
     node_tf_names = NA,
     tf_environment = NA,
+    tf_graph = NA,
     target_nodes = NA,
     parameters_example = NA,
     tf_float = NA,
@@ -27,12 +28,9 @@ dag_class <- R6Class(
       # find the nodes we care about
       self$target_nodes <- lapply(target_greta_arrays, member, 'node')
 
-      # stash the node names, types, and tf names
-      self$node_types <- vapply(self$node_list, node_type, FUN.VALUE = '')
-      self$node_tf_names <- self$make_names()
-
-      # set up the tf environment
+      # set up the tf environment, with a graph
       self$tf_environment <- new.env()
+      self$tf_graph <- tf$Graph()
 
       # stash an example list to relist parameters
       self$parameters_example <- self$example_parameters(flat = FALSE)
@@ -44,6 +42,17 @@ dag_class <- R6Class(
 
     },
 
+    # execute an expression on this dag's tensorflow graph
+    on_graph = function (expr) {
+      with(self$tf_graph$as_default(), expr)
+    },
+
+    # execute an exporession in the tensorflow environment
+    tf_run = function (expr) {
+      self$tf_environment$expr <- substitute(expr)
+      self$on_graph(with(self$tf_environment, eval(expr)))
+    },
+
     # return a list of nodes connected to those in the target node list
     build_dag = function (greta_array_list) {
 
@@ -54,6 +63,10 @@ dag_class <- R6Class(
         node$register_family(self)
       }
 
+      # stash the node names, types, and tf names
+      self$node_types <- vapply(self$node_list, node_type, FUN.VALUE = '')
+      self$node_tf_names <- self$make_names()
+
     },
 
     make_names = function () {
@@ -85,7 +98,7 @@ dag_class <- R6Class(
     },
 
     # define tf graph in environment
-    define_tf = function () {
+    define_tf = function (log_density = TRUE, gradients = TRUE) {
 
       # temporarily pass float type info to options, so it can be accessed by
       # nodes on definition, without cluncky explicit passing
@@ -107,28 +120,33 @@ dag_class <- R6Class(
               call. = FALSE)
       }
 
-      # define all nodes, node densities and free states in the environment
-      lapply(self$node_list, function (x) x$define_tf(self))
+      # define all nodes, node densities and free states in the environment, and
+      # on the graph
+      self$on_graph(lapply(self$node_list,
+                      function (x) x$define_tf(self)))
+
 
       # define an overall log density and gradients, plus adjusted versions
-      self$define_joint_density()
-      self$define_gradients()
+      if (log_density)
+        self$on_graph(self$define_joint_density())
+
+      if (gradients)
+        self$on_graph(self$define_gradients())
 
       # use core and compilation options to create a config object
-      with(self$tf_environment,
-           {config <- tf$ConfigProto(intra_op_parallelism_threads = self$n_cores,
-                                     inter_op_parallelism_threads = self$n_cores)})
+      self$tf_environment$n_cores <- self$n_cores
+      self$tf_run(config <- tf$ConfigProto(inter_op_parallelism_threads = n_cores,
+                                           intra_op_parallelism_threads = n_cores))
 
       if (self$compile) {
-        with(self$tf_environment,
-             {py_set_attr(config$graph_options$optimizer_options,
-                          'global_jit_level',
-                          tf$OptimizerOptions$ON_1)})
+        self$tf_run(py_set_attr(config$graph_options$optimizer_options,
+                                 'global_jit_level',
+                                 tf$OptimizerOptions$ON_1))
       }
 
       # start a session and initialise all variables
-      with(self$tf_environment,
-           {sess <- tf$Session(config = config); sess$run(tf$global_variables_initializer())})
+      self$tf_run(sess <- tf$Session(config = config))
+      self$tf_run(sess$run(tf$global_variables_initializer()))
 
     },
 
@@ -237,8 +255,7 @@ dag_class <- R6Class(
 
       # create a feed dict in the TF environment
       self$tf_environment$parameters <- parameters
-      with(self$tf_environment,
-           parameter_dict <- do.call(dict, parameters))
+      self$tf_run(parameter_dict <- do.call(dict, parameters))
 
     },
 
@@ -246,15 +263,15 @@ dag_class <- R6Class(
     # variable nodes, with or without applying the jacobian adjustment
     log_density = function(adjusted = TRUE) {
 
-      cleanly(with(self$tf_environment,
-                   sess$run(joint_density_adj, feed_dict = parameter_dict)))
+      cleanly(self$tf_run(sess$run(joint_density_adj,
+                                   feed_dict = parameter_dict)))
 
     },
 
     gradients = function (adjusted = TRUE) {
 
-      cleanly(with(self$tf_environment,
-                   sess$run(gradients_adj, feed_dict = parameter_dict)))
+      cleanly(self$tf_run(sess$run(gradients_adj,
+                                   feed_dict = parameter_dict)))
 
     },
 
diff --git a/R/distribution.R b/R/distribution.R
index c11ccc9c..90c994ec 100644
--- a/R/distribution.R
+++ b/R/distribution.R
@@ -23,12 +23,12 @@
 #'
 #' # observed data and mean parameter to be estimated
 #' # (explicitly coerce data to a greta array so we can refer to it later)
-#' y = as_data(rnorm(5, 0, 3))
+#' y <- as_data(rnorm(5, 0, 3))
 #'
-#' mu = uniform(-3, 3)
+#' mu <- uniform(-3, 3)
 #'
 #' # define the distribution over y (the model likelihood)
-#' distribution(y) = normal(mu, 1)
+#' distribution(y) <- normal(mu, 1)
 #'
 #' # get the distribution over y
 #' distribution(y)
diff --git a/R/extract_replace_combine.R b/R/extract_replace_combine.R
index e78086c4..dc8ce94b 100644
--- a/R/extract_replace_combine.R
+++ b/R/extract_replace_combine.R
@@ -43,7 +43,7 @@
 #' @examples
 #' \dontrun{
 #'
-#'  x = as_data(matrix(1:12, 3, 4))
+#'  x <- as_data(matrix(1:12, 3, 4))
 #'
 #'  # extract/replace
 #'  x[1:3, ]
diff --git a/R/functions.R b/R/functions.R
index f8a1bcec..0dbf9187 100644
--- a/R/functions.R
+++ b/R/functions.R
@@ -86,18 +86,18 @@
 #' @examples
 #' \dontrun{
 #'
-#' x = as_data(matrix(1:9, nrow = 3, ncol = 3))
-#' a = log(exp(x))
-#' b = log1p(expm1(x))
-#' c = sign(x - 5)
-#' d = abs(x - 5)
+#' x <- as_data(matrix(1:9, nrow = 3, ncol = 3))
+#' a <- log(exp(x))
+#' b <- log1p(expm1(x))
+#' c <- sign(x - 5)
+#' d <- abs(x - 5)
 #'
-#' e = diag(x)
+#' e <- diag(x)
 #' diag(x) <- e + 1
 #'
-#' z = t(a)
+#' z <- t(a)
 #'
-#' y = sweep(x, 1, e, '-')
+#' y <- sweep(x, 1, e, '-')
 #' }
 NULL
 
diff --git a/R/greta_model_class.R b/R/greta_model_class.R
index bb1a45a6..67c2a0ad 100644
--- a/R/greta_model_class.R
+++ b/R/greta_model_class.R
@@ -41,20 +41,18 @@ NULL
 #' @return \code{model} - a \code{greta_model} object.
 #'
 #' @examples
-#'
 #' \dontrun{
 #'
 #' # define a simple model
-#' mu = variable()
-#' sigma = lognormal(1, 0.1)
-#' x = rnorm(10)
-#' distribution(x) = normal(mu, sigma)
+#' mu <- variable()
+#' sigma <- lognormal(1, 0.1)
+#' x <- rnorm(10)
+#' distribution(x) <- normal(mu, sigma)
 #'
 #' m <- model(mu, sigma)
 #'
 #' plot(m)
 #' }
-#'
 model <- function (...,
                    precision = c('single', 'double'),
                    n_cores = NULL,
@@ -83,9 +81,6 @@ model <- function (...,
     }
   }
 
-  # flush all tensors from the default graph
-  tf$reset_default_graph()
-
   # nodes required
   target_greta_arrays <- list(...)
 
@@ -104,6 +99,27 @@ model <- function (...,
 
   }
 
+  # check they are greta arrays
+  are_greta_arrays <- vapply(target_greta_arrays,
+                             inherits, "greta_array",
+                             FUN.VALUE = FALSE)
+
+  if (!all(are_greta_arrays)) {
+
+    unexpected_items <- names(target_greta_arrays)[!are_greta_arrays]
+
+    msg <- ifelse(length(unexpected_items) > 1,
+                  "The following objects passed to model() are not greta arrays: ",
+                  "The following object passed to model() is not a greta array: ")
+
+    stop (msg,
+          paste(unexpected_items, sep = ", "),
+          call. = FALSE)
+
+  }
+
+
+
   if (length(target_greta_arrays) == 0) {
     stop ('could not find any non-data greta arrays',
           call. = FALSE)
diff --git a/R/inference.R b/R/inference.R
index 5a61ee4c..70c24043 100644
--- a/R/inference.R
+++ b/R/inference.R
@@ -17,12 +17,24 @@ stashed_samples <- function () {
 
   if (stashed) {
 
+    stash <- greta_stash$trace_stash
+
     # get them, remove the NAs, and return
-    draws <- greta_stash$trace_stash
-    draws_clean <- na.omit(draws)
+    draws_clean <- na.omit(stash$trace)
     draws_prepped <- prepare_draws(draws_clean)
+    draws_mcmclist <- mcmc.list(draws_prepped)
+
+    # prep the raw model objects
+    model_info <- new.env()
+    raw_clean <- na.omit(stash$raw)
+    raw_prepped <- prepare_draws(raw_clean)
+    model_info$raw_draws <- mcmc.list(raw_prepped)
+    model_info$model <- greta_stash$model
 
-    return (draws_prepped)
+    # add the raw draws as an attribute
+    attr(draws_mcmclist, "model_info") <- model_info
+
+    return (draws_mcmclist)
 
   } else {
 
@@ -36,8 +48,12 @@ stashed_samples <- function () {
 # they abort a run
 greta_stash <- new.env()
 
-stash_trace <- function (trace)
-  assign('trace_stash', trace, envir = greta_stash)
+stash_trace <- function (trace, raw) {
+  assign('trace_stash',
+         list(trace = trace,
+              raw = raw),
+         envir = greta_stash)
+}
 
 #' @rdname inference
 #' @export
@@ -45,22 +61,23 @@ stash_trace <- function (trace)
 #' @importFrom utils setTxtProgressBar txtProgressBar
 #'
 #' @param model greta_model object
-#' @param method the method used to sample or optimise values. Currently only
+#' @param method method used to sample or optimise values. Currently only
 #'   one method is available for each procedure: \code{hmc} and \code{adagrad}
-#' @param n_samples the number of MCMC samples to draw (after any warm-up, but
+#' @param n_samples number of MCMC samples to draw (after any warm-up, but
 #'   before thinning)
-#' @param thin the MCMC thinning rate; every \code{thin} samples is retained,
+#' @param thin MCMC thinning rate; every \code{thin} samples is retained,
 #'   the rest are discarded
-#' @param warmup the number of samples to spend warming up the mcmc sampler.
+#' @param warmup number of samples to spend warming up the mcmc sampler.
 #'   During this phase the sampler moves toward the highest density area and
 #'   tunes sampler hyperparameters.
+#' @param chains number of MCMC chains to run
 #' @param verbose whether to print progress information to the console
 #' @param pb_update how regularly to update the progress bar (in iterations)
 #' @param control an optional named list of hyperparameters and options to
 #'   control behaviour of the sampler or optimiser. See Details.
-#' @param initial_values an optional named vector of initial values for the free
-#'   parameters in the model. These will be used as the starting point for
-#'   sampling/optimisation
+#' @param initial_values an optional vector (or list of vectors, for multiple
+#'   chains) of initial values for the free parameters in the model. These will
+#'   be used as the starting point for sampling/optimisation.
 #'
 #' @details For \code{mcmc()} if \code{verbose = TRUE}, the progress bar shows
 #'   the number of iterations so far and the expected time to complete the phase
@@ -82,29 +99,31 @@ stash_trace <- function (trace)
 #'   (via \code{model}) to have double precision, though this will slow down
 #'   sampling.
 #'
-#'   Currently, the only implemented MCMC procedure is static Hamiltonian
-#'   Monte Carlo (\code{method = "hmc"}). During the warmup iterations, the
-#'   leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the
-#'   sampler efficiency. The \code{control} argument can be used to specify the
-#'   initial value for epsilon, along with two other hyperparameters: \code{Lmin}
-#'   and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the
-#'   upper and lower limits to the number of leapfrog steps per iteration (from
-#'   which the number is selected uniformly at random).
+#'   Currently, the only implemented MCMC procedure is static Hamiltonian Monte
+#'   Carlo (\code{method = "hmc"}). During the warmup iterations, the leapfrog
+#'   stepsize hyperparameter \code{epsilon} is tuned to maximise the sampler
+#'   efficiency, and the posterior marginal standard deviations are estimated
+#'   \code{diag_sd}. The \code{control} argument can be used to specify the
+#'   initial value for \code{epsilon}, \code{diag_sd}, and two other
+#'   hyperparameters: \code{Lmin} and \code{Lmax}; positive integers (with
+#'   \code{Lmax > Lmin}) giving the upper and lower limits to the number of
+#'   leapfrog steps per iteration (from which the number is selected uniformly
+#'   at random).
 #'
 #'   The default control options for HMC are:
-#'   \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)}
+#'   \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005, diag_sd = 1)}
 #'
-#' @return \code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object that can be analysed using
-#'   functions from the coda package. This will contain mcmc samples of the
-#'   greta arrays used to create \code{model}.
+#' @return \code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object
+#'   that can be analysed using functions from the coda package. This will
+#'   contain mcmc samples of the greta arrays used to create \code{model}.
 #'
 #' @examples
 #' \dontrun{
 #' # define a simple model
-#' mu = variable()
-#' sigma = lognormal(1, 0.1)
-#' x = rnorm(10)
-#' distribution(x) = normal(mu, sigma)
+#' mu <- variable()
+#' sigma <- lognormal(1, 0.1)
+#' x <- rnorm(10)
+#' distribution(x) <- normal(mu, sigma)
 #' m <- model(mu, sigma)
 #'
 #' # carry out mcmc on the model
@@ -117,6 +136,7 @@ mcmc <- function (model,
                   n_samples = 1000,
                   thin = 1,
                   warmup = 100,
+                  chains = 1,
                   verbose = TRUE,
                   pb_update = 10,
                   control = list(),
@@ -124,6 +144,9 @@ mcmc <- function (model,
 
   method <- match.arg(method)
 
+  # store the model
+  greta_stash$model <- model
+
   # find variable names to label samples
   target_greta_arrays <- model$target_greta_arrays
   names <- names(target_greta_arrays)
@@ -145,34 +168,72 @@ mcmc <- function (model,
   # get the dag containing the target nodes
   dag <- model$dag
 
+  param <- dag$example_parameters()
+  n_initial <- length(param)
+
   # random starting locations
   if (is.null(initial_values)) {
 
     # try several times
     valid <- FALSE
     attempts <- 1
-    while (!valid & attempts < 10) {
+    while (!valid & attempts < 20) {
 
-      initial_values <- dag$example_parameters()
-      # increase the jitter each time
-      initial_values[] <- rnorm(length(initial_values), 0, 1 + attempts / 5)
+      initial_values <- replicate(chains,
+                                  rnorm(n_initial, 0, 0.5),
+                                  simplify = FALSE)
 
       # test validity of values
-      valid <- valid_parameters(dag, initial_values)
+      valid <- valid_parameters(initial_values, dag)
       attempts <- attempts + 1
 
     }
 
     if (!valid) {
-      stop ('Could not find reasonable starting values after ', attempts,
-            ' attempts. Please specify initial values manually via the ',
-            'initial_values argument to mcmc',
+      stop ("Could not find reasonable starting values after ", attempts,
+            " attempts. Please specify initial values manually via the ",
+            "initial_values argument to mcmc",
             call. = FALSE)
     }
 
   } else {
 
-    if (!valid_parameters(dag, initial_values)) {
+    # if they provided a list, check it
+    if (is.list(initial_values)) {
+
+      n_sets <- length(initial_values)
+
+      if (n_sets != chains) {
+        stop (n_sets, " sets of initial values were provided, but there ",
+              ifelse(chains > 1, "are ", "is only "), chains, " chain",
+              ifelse(chains > 1, "s", ""),
+              call. = FALSE)
+      }
+
+      n_param <- vapply(initial_values, length, FUN.VALUE = 0)
+
+      if (!all(n_param == n_initial)) {
+        stop ("each set of initial values must be a vector of length ",
+              n_initial,
+              call. = FALSE)
+      }
+
+    } else {
+
+      # replicate
+      initial_values <- replicate(chains,
+                                  initial_values,
+                                  simplify = FALSE)
+
+      if (chains > 1) {
+        message ("\nonly one set of was initial values given, ",
+                 "and was used for all chains\n")
+      }
+
+    }
+
+    # check they are valid
+    if (!valid_parameters(initial_values, dag)) {
       stop ('The log density and gradients could not be evaluated at these ',
             'initial values.',
             call. = FALSE)
@@ -185,7 +246,8 @@ mcmc <- function (model,
   con <- switch(method,
                 hmc = list(Lmin = 10,
                            Lmax = 20,
-                           epsilon = 0.005))
+                           epsilon = 0.005,
+                           diag_sd = 1))
 
   # update them with user overrides
   con[names(control)] <- control
@@ -194,6 +256,50 @@ mcmc <- function (model,
   method <- switch(method,
                    hmc = hmc)
 
+  print_chain <- chains > 1
+
+  chains_list <- lapply(seq_len(chains),
+                        run_chain,
+                        dag = dag,
+                        method = method,
+                        n_samples = n_samples,
+                        thin = thin,
+                        warmup = warmup,
+                        chains = chains,
+                        verbose = verbose,
+                        pb_update = pb_update,
+                        control = con,
+                        initial_values = initial_values,
+                        print_chain = print_chain)
+
+  # get raw_draws
+  raw_list <- lapply(chains_list,
+                     function (x) {
+                       attr(x, "model_info")$raw_draws[[1]]
+                       })
+  chains_list <- lapply(chains_list, `[[`, 1)
+
+  model_info <- new.env()
+  model_info$raw_draws <- do.call(mcmc.list, raw_list)
+  model_info$model <- greta_stash$model
+  chains <- do.call(mcmc.list, chains_list)
+  attr(chains, "model_info") <- model_info
+  chains
+
+}
+
+
+run_chain <- function (chain, dag, method, n_samples, thin,
+                       warmup, chains, verbose, pb_update,
+                       control, initial_values, print_chain) {
+
+  if (print_chain) {
+    msg <- sprintf("\nchain %i/%i\n", chain, chains)
+    cat(msg)
+  }
+
+  initial_values_chain <- initial_values[[chain]]
+
   # if warmup is required, do that now and update init
   if (warmup > 0) {
 
@@ -204,18 +310,18 @@ mcmc <- function (model,
 
     # run it
     warmup_draws <- method(dag = dag,
-                           init = initial_values,
+                           init = initial_values_chain,
                            n_samples = warmup,
                            thin = thin,
                            verbose = verbose,
                            pb = pb_warmup,
                            tune = TRUE,
                            stash = FALSE,
-                           control = con)
+                           control = control)
 
     # use the last draw of the full parameter vector as the init
-    initial_values <- attr(warmup_draws, 'last_x')
-    con <- attr(warmup_draws, 'control')
+    initial_values_chain <- attr(warmup_draws, 'last_x')
+    control <- attr(warmup_draws, 'control')
 
   }
 
@@ -226,27 +332,27 @@ mcmc <- function (model,
 
   # run the sampler
   draws <- method(dag = dag,
-                  init = initial_values,
+                  init = initial_values_chain,
                   n_samples = n_samples,
                   thin = thin,
                   verbose = verbose,
                   pb = pb_sampling,
                   tune = FALSE,
                   stash = TRUE,
-                  control = con)
+                  control = control)
 
   # if this was successful, trash the stash, prepare and return the draws
   rm('trace_stash', envir = greta_stash)
-  prepare_draws(draws)
+  draws
 
 }
 
+
 #' @importFrom coda mcmc mcmc.list
 prepare_draws <- function (draws) {
   # given a matrix of draws returned by the sampler, prepare it and return
   draws_df <- data.frame(draws)
-  draws_mcmc <- coda::mcmc(draws_df)
-  coda::mcmc.list(draws_mcmc)
+  coda::mcmc(draws_df)
 }
 
 
@@ -286,8 +392,13 @@ opt <- function (model,
                   control = list(),
                   initial_values = NULL) {
 
+  # mock up some names to avoid CRAN-check note
+  optimiser <- joint_density <- sess <- NULL
+
   # get the tensorflow environment
   tfe <- model$dag$tf_environment
+  on_graph <- model$dag$on_graph
+  tf_run <- model$dag$tf_run
 
   # get the method
   method <- match.arg(method)
@@ -304,8 +415,8 @@ opt <- function (model,
   con[names(control)] <- control
 
   # set up optimiser
-  tfe$optimiser <- do.call(optimise_fun, con)
-  with(tfe, train <- optimiser$minimize(-joint_density))
+  on_graph(tfe$optimiser <- do.call(optimise_fun, con))
+  tf_run(train <- optimiser$minimize(-joint_density))
 
   # random initial values if unspecified
   if (is.null(initial_values)) {
@@ -314,16 +425,16 @@ opt <- function (model,
   }
 
   # initialize the variables, then set the ones we care about
-  with(tfe, sess$run(tf$global_variables_initializer()))
+  tf_run(sess$run(tf$global_variables_initializer()))
   parameters <- relist_tf(initial_values, model$dag$parameters_example)
 
   for (i in seq_along(parameters)) {
     variable_name <- paste0(names(parameters)[i], '_free')
     vble <- tfe[[variable_name]]
-    init <- tf$constant(parameters[[i]],
-                        shape = vble$shape,
-                        dtype = tf_float())
-    tmp <- tfe$sess$run(vble$assign(init))
+    on_graph( init <- tf$constant(parameters[[i]],
+                                  shape = vble$shape,
+                                  dtype = tf_float()))
+    . <- on_graph(tfe$sess$run(vble$assign(init)))
   }
 
   diff <- old_obj <- Inf
@@ -331,14 +442,14 @@ opt <- function (model,
 
   while (it < max_iterations & diff > tolerance) {
     it <- it + 1
-    with(tfe, sess$run(train))
-    obj <- with(tfe, sess$run(-joint_density))
+    tf_run(sess$run(train))
+    obj <- tf_run(sess$run(-joint_density))
     diff <- abs(old_obj - obj)
     old_obj <- obj
   }
 
   list(par = model$dag$trace_values(),
-       value = with(tfe, sess$run(joint_density)),
+       value = tf_run(sess$run(joint_density)),
        iterations = it,
        convergence = ifelse(it < max_iterations, 0, 1))
 
diff --git a/R/operators.R b/R/operators.R
index 5e0eeb42..c7b7b625 100644
--- a/R/operators.R
+++ b/R/operators.R
@@ -40,23 +40,23 @@
 #' @examples
 #' \dontrun{
 #'
-#' x = as_data(-1:12)
+#' x <- as_data(-1:12)
 #'
 #' # arithmetic
-#' a = x + 1
-#' b = 2 * x + 3
-#' c = x %% 2
-#' d = x %/% 5
+#' a <- x + 1
+#' b <- 2 * x + 3
+#' c <- x %% 2
+#' d <- x %/% 5
 #'
 #' # logical
-#' e = (x > 1) | (x < 1)
-#' f = e & (x < 2)
-#' g = !f
+#' e <- (x > 1) | (x < 1)
+#' f <- e & (x < 2)
+#' g <- !f
 #'
 #' # relational
-#' h = x < 1
-#' i = (-x) >= x
-#' j = h == x
+#' h <- x < 1
+#' i <- (-x) >= x
+#' j <- h == x
 #' }
 NULL
 
@@ -145,15 +145,16 @@ NULL
     y <- elem_list[[2]]
 
     # check they're matrices
-    if (length(dim(x)) != 2 | length(dim(x)) != 2)
-      stop ('only two-dimensional greta arrays can be matrix-multiplied',
+    if (length(dim(x)) != 2 | length(dim(y)) != 2) {
+      stop ("only two-dimensional greta arrays can be matrix-multiplied",
             call. = FALSE)
+    }
 
     # check the dimensions match
     if (dim(x)[2] != dim(y)[1]) {
-      msg <- sprintf('incompatible dimensions: %s vs %s',
-                     paste0(dim(x), collapse = 'x'),
-                     paste0(dim(y), collapse = 'x'))
+      msg <- sprintf("incompatible dimensions: %s vs %s",
+                     paste0(dim(x), collapse = "x"),
+                     paste0(dim(y), collapse = "x"))
       stop (msg)
     }
 
diff --git a/R/package.R b/R/package.R
index 66061d33..15b1fb3a 100644
--- a/R/package.R
+++ b/R/package.R
@@ -26,13 +26,13 @@
 #' # a simple Bayesian regression model for the iris data
 #'
 #' # priors
-#' int = normal(0, 5)
-#' coef = normal(0, 3)
-#' sd = lognormal(0, 3)
+#' int <- normal(0, 5)
+#' coef <- normal(0, 3)
+#' sd <- lognormal(0, 3)
 #'
 #' # likelihood
 #' mean <- int + coef * iris$Petal.Length
-#' distribution(iris$Sepal.Length) = normal(mean, sd)
+#' distribution(iris$Sepal.Length) <- normal(mean, sd)
 #'
 #' # build and sample
 #' m <- model(int, coef, sd)
diff --git a/R/probability_distributions.R b/R/probability_distributions.R
index 58c9bb99..4f1fda1a 100644
--- a/R/probability_distributions.R
+++ b/R/probability_distributions.R
@@ -537,7 +537,7 @@ cauchy_distribution <- R6Class (
       s <- parameters$scale
 
       log_prob = function (x)
-        -tf$log(fl(pi) * s * (fl(1) + tf$square((x - loc) / s)))
+        tf$negative(tf$log(fl(pi) * s * (fl(1) + tf$square((x - loc) / s))))
 
       cdf = function (x)
         fl(1 / pi)  * tf$atan((x - loc) / s) + fl(0.5)
@@ -1359,39 +1359,39 @@ distribution_classes_module <- module(uniform_distribution,
 #' \dontrun{
 #'
 #' # a uniform parameter constrained to be between 0 and 1
-#' phi = uniform(min = 0, max = 1)
+#' phi <- uniform(min = 0, max = 1)
 #'
 #' # a length-three variable, with each element following a standard normal
 #' # distribution
-#' alpha = normal(0, 1, dim = 3)
+#' alpha <- normal(0, 1, dim = 3)
 #'
 #' # a length-three variable of lognormals
-#' sigma = lognormal(0, 3, dim = 3)
+#' sigma <- lognormal(0, 3, dim = 3)
 #'
 #' # a hierarchical uniform, constrained between alpha and alpha + sigma,
-#' eta = alpha + uniform(0, 1, dim = 3) * sigma
+#' eta <- alpha + uniform(0, 1, dim = 3) * sigma
 #'
 #' # a hierarchical distribution
-#' mu = normal(0, 1)
-#' sigma = lognormal(0, 1)
-#' theta = normal(mu, sigma)
+#' mu <- normal(0, 1)
+#' sigma <- lognormal(0, 1)
+#' theta <- normal(mu, sigma)
 #'
 #' # a vector of 3 variables drawn from the same hierarchical distribution
-#' thetas = normal(mu, sigma, dim = 3)
+#' thetas <- normal(mu, sigma, dim = 3)
 #'
 #' # a matrix of 12 variables drawn from the same hierarchical distribution
-#' thetas = normal(mu, sigma, dim = c(3, 4))
+#' thetas <- normal(mu, sigma, dim = c(3, 4))
 #'
 #' # a multivariate normal variable, with correlation between two elements
 #' Sig <- diag(4)
 #' Sig[3, 4] <- Sig[4, 3] <- 0.6
-#' theta = multivariate_normal(rep(mu, 4), Sig)
+#' theta <- multivariate_normal(rep(mu, 4), Sig)
 #'
 #' # 10 independent replicates of that
-#' theta = multivariate_normal(rep(mu, 4), Sig, dim = 10)
+#' theta <- multivariate_normal(rep(mu, 4), Sig, dim = 10)
 #'
 #' # a Wishart variable with the same covariance parameter
-#' theta = wishart(df = 5, Sigma = Sig)
+#' theta <- wishart(df = 5, Sigma = Sig)
 #'
 #' }
 NULL
diff --git a/R/samplers.R b/R/samplers.R
index bf94dfff..cf10e28d 100644
--- a/R/samplers.R
+++ b/R/samplers.R
@@ -8,18 +8,35 @@ hmc <- function (dag,
                  stash = FALSE,
                  control = list(Lmin = 10,
                                 Lmax = 20,
-                                epsilon = 0.005)) {
+                                epsilon = 0.005,
+                                diag_sd = 1)) {
 
   # unpack options
   Lmin <- control$Lmin
   Lmax <- control$Lmax
   epsilon <- control$epsilon
+  diag_sd <- control$diag_sd
 
-  # tuning parameters
-  accept_group = 50
-  target_acceptance = 0.651
-  kappa = 0.75
-  gamma = 0.1
+  if (tune) {
+
+    # when to start and stop each type of tuning, in fractions of the tuning period
+    epsilon_periods <- list(c(0, 0.1), c(0.4, 1))
+    diag_sd_periods <- list(c(0.1, 0.4))
+
+    # epslion tuning parameters
+    accept_group <- 50
+    target_acceptance <- 0.651
+    kappa <- 0.75
+    gamma <- 0.1
+
+    # initialise welford accumulator for marginal variance
+    diag_sd_update_rate <- 5
+    welford_m <- 0
+    welford_m2 <- 0
+
+    epsilon_trace <- rep(NA, n_samples)
+
+  }
 
   numerical_rejections <- 0
 
@@ -33,9 +50,6 @@ hmc <- function (dag,
   grad <- dag$gradients()
   logprob <- dag$log_density()
 
-  if (tune)
-    epsilon_trace <- rep(NA, n_samples)
-
   # set up trace store (grab values of target variables from graph to get
   # dimension and names)
   init_trace <- dag$trace_values()
@@ -45,9 +59,15 @@ hmc <- function (dag,
                   ncol = n_target)
   colnames(trace) <- names(init_trace)
 
-  # if anything goes awry, stash the trace so far
+  # also trace the raw values, with the dag attached
+  raw <- matrix(NA,
+                nrow = n_samples %/% thin,
+                ncol = length(x))
+  colnames(raw) <- names(dag$example_parameters())
+
+  # if anything goes awry, stash the trace and raw trace so far
   if (stash)
-    on.exit(stash_trace(trace))
+    on.exit(stash_trace(trace, raw))
 
   # track acceptance
   accept_trace <- rep(0, n_samples)
@@ -55,7 +75,7 @@ hmc <- function (dag,
   # get free parameter dimension
   npar <- length(x)
 
-  accept_count <- 0
+  L <- Lmin:Lmax
 
   # loop through iterations
   for (i in 1:n_samples) {
@@ -68,13 +88,14 @@ hmc <- function (dag,
 
     # start leapfrog steps
     reject <- FALSE
-    # p <- p_old + 0.5 * epsilon * grad
-    n_steps <- base::sample(Lmin:Lmax, 1)
+    n_steps <- ifelse(length(L) == 1, L,
+                      base::sample(L, 1))
+
     for (l in seq_len(n_steps)) {
 
       # step
-      p <- p + 0.5 * epsilon * grad
-      x <- x + epsilon * p
+      p <- p + 0.5 * epsilon * grad * diag_sd
+      x <- x + epsilon * p * diag_sd
 
       # send parameters
       dag$send_parameters(x)
@@ -86,7 +107,7 @@ hmc <- function (dag,
         break()
       }
 
-      p <- p + 0.5 * epsilon * grad
+      p <- p + 0.5 * epsilon * grad * diag_sd
 
     }
 
@@ -108,14 +129,11 @@ hmc <- function (dag,
 
       # acceptance ratio
       logprob <- dag$log_density()
-      log_accept_ratio = logprob - p_prod - logprob_old + p_prod_old
-      log_u = log(runif(1))
+      log_accept_ratio <- logprob - p_prod - logprob_old + p_prod_old
+      log_u <- log(runif(1))
 
       if (log_u < log_accept_ratio) {
 
-        # on acceptance, iterate the counter and leave the parameters in the dag
-        # to be put in the trace
-        accept_count <- accept_count + 1
         accept_trace[i] <- 1
 
       } else {
@@ -135,6 +153,7 @@ hmc <- function (dag,
     if (i %% thin == 0) {
       dag$send_parameters(x)
       trace[i / thin, ] <- dag$trace_values()
+      raw[i / thin, ] <- x
     }
 
     if (verbose)
@@ -143,20 +162,51 @@ hmc <- function (dag,
     # optionally tune epsilon
     if (tune) {
 
-      # acceptance rate over the last accept_group runs
-      start <- max(1, i - accept_group)
-      end <- i
-      accept_rate <- mean(accept_trace[start:end], na.rm = TRUE)
+      adapt_epsilon <- in_periods(i, n_samples, epsilon_periods)
+      if (adapt_epsilon) {
+
+        # acceptance rate over the last accept_group runs
+        start <- max(1, i - accept_group)
+        end <- i
+        accept_rate <- mean(accept_trace[start:end], na.rm = TRUE)
+
+        # decrease the adaptation rate as we go
+        adapt_rate <- min(1, gamma * i ^ (-kappa))
 
-      # decrease the adaptation rate as we go
-      adapt_rate <- min(1, gamma * i ^ (-kappa))
+        # shift epsilon in the right direction, making sure it never goes negative
+        epsilon <- epsilon + pmax(-(epsilon + sqrt(.Machine$double.eps)),
+                                  adapt_rate * (accept_rate - target_acceptance))
 
-      # shift epsilon in the right direction, making sure it never goes negative
-      epsilon <- epsilon + pmax(-(epsilon + sqrt(.Machine$double.eps)),
-                                adapt_rate * (accept_rate - target_acceptance))
+        # keep track of epsilon
+        epsilon_trace[i] <- epsilon
 
-      # keep track of epsilon
-      epsilon_trace[i] <- epsilon
+      }
+
+      # only adapt diag_sd in the first third of tuning, so that epsilon can
+      # settle in during the second half
+      adapt_diag_sd <- in_periods(i, n_samples, diag_sd_periods)
+      if (adapt_diag_sd) {
+
+        # update welford accumulator for posterior variance
+        n_accepted <- sum(accept_trace)
+
+        # update only if this step was accepted
+        if (accept_trace[i] == 1) {
+          welford_delta <- x - welford_m
+          welford_m <- welford_m + welford_delta / n_accepted
+          welford_m2 <- welford_m2 + welford_delta * (x - welford_m)
+        }
+
+        # if there are samples, and we want to adapt, get the sample posterior
+        # variance and shrink it
+        if (n_accepted > 1 & (i %% diag_sd_update_rate == 0)) {
+          sample_var <- welford_m2 / (n_accepted - 1)
+          var_shrinkage <- 1 / (n_accepted + 5)
+          var_shrunk <- n_accepted * var_shrinkage * sample_var + 5e-3 * var_shrinkage
+          diag_sd <- sqrt(var_shrunk)
+        }
+
+      }
 
     }
 
@@ -167,12 +217,26 @@ hmc <- function (dag,
     start <- floor(n_samples/2)
     end <- n_samples
     control$epsilon <- mean(epsilon_trace[start:end], na.rm = TRUE)
+    control$diag_sd <- diag_sd
   }
 
+  stash_trace(trace, raw)
+  trace <- stashed_samples()
   attr(trace, 'last_x') <- x
   attr(trace, 'control') <- control
   trace
 
 }
 
+# determine whether the sampler is within one of the adaptation periods for a given parameter
+in_periods <- function (i, n_samples, periods) {
+  fraction <- i / n_samples
+  in_period <- vapply(periods, within, fraction, FUN.VALUE = FALSE)
+  any(in_period)
+}
+
+within <- function (period, fraction)
+  fraction >= period[1] & fraction <= period[2]
+
+
 samplers_module <- module(hmc)
diff --git a/R/tf_functions.R b/R/tf_functions.R
index 707410b8..0e096da2 100644
--- a/R/tf_functions.R
+++ b/R/tf_functions.R
@@ -212,6 +212,11 @@ tf_icloglog <- function (x)
 tf_icauchit <- function (x)
   fl(1 / pi) * tf$atan(x) + fl(0.5)
 
+tf_imultilogit <- function (x) {
+  zeros <- tf$zeros(shape(nrow(x), 1L), tf_float())
+  latent <- tf$concat(list(x, zeros), 1L)
+  tf$nn$softmax(latent)
+}
 
 # map R's extract and replace syntax to tensorflow, for use in operation nodes
 # the following arguments are required:
@@ -328,6 +333,7 @@ tf_functions_module <- module(tf_as_logical,
                               tf_iprobit,
                               tf_icloglog,
                               tf_icauchit,
+                              tf_imultilogit,
                               tf_extract,
                               tf_recombine,
                               tf_replace,
diff --git a/R/transforms.R b/R/transforms.R
index 84ce8bec..1ee8e598 100644
--- a/R/transforms.R
+++ b/R/transforms.R
@@ -12,27 +12,37 @@
 #'   modelling languages. That's because the same syntax has a very different
 #'   meaning in R, and can only be applied to objects that are already in
 #'   existence. The inverse forms of the common link functions (prefixed with an
-#'   'i') are therefore more likely to useful in modelling, and these are what
-#'   are provided here.
+#'   'i') can be used instead.
 #'
-#'   The \code{log1pe} inverse link function is equivalent to \code{log(1 + exp(x))},
-#'   yielding a positive transformed parameter. Unlike the log transformation,
-#'   this transformation is approximately linear for x > 1. i.e. when \eqn{x >
-#'   1}, \eqn{y \approx x}
+#'   The \code{log1pe} inverse link function is equivalent to \code{log(1 +
+#'   exp(x))}, yielding a positive transformed parameter. Unlike the log
+#'   transformation, this transformation is approximately linear for x > 1. i.e.
+#'   when \eqn{x > 1}, \eqn{y \approx x}
+#'
+#'   \code{imultilogit} expects an n-by-m greta array, and returns an n-by-(m+1)
+#'   greta array of positive reals whose rows sum to one. This is equivalent
+#'   adding a final column of 0s and then running the softmax function widely
+#'   used in machine learning.
 #'
 #' @examples
 #' \dontrun{
 #'
-#'  x = normal(1, 3, dim = 10)
+#'  x1 <- normal(1, 3, dim = 10)
 #'
 #'  # transformation to the unit interval
-#'  p1 <- iprobit(x)
-#'  p2 <- ilogit(x)
-#'  p3 <- icloglog(x)
-#'  p4 <- icauchit(x)
+#'  p1 <- iprobit(x1)
+#'  p2 <- ilogit(x1)
+#'  p3 <- icloglog(x1)
+#'  p4 <- icauchit(x1)
 #'
 #'  # and to positive reals
-#'  y <- log1pe(x)
+#'  y <- log1pe(x1)
+#'
+#'  # transform from 10x3 to 10x4, where rows are a complete set of
+#'  # probabilities
+#'  x2 <- normal(1, 3, dim = c(10, 3))
+#'  z <- imultilogit(x2)
+#'
 #' }
 NULL
 
@@ -60,3 +70,25 @@ icauchit <- function (x)
 #' @export
 log1pe <- function (x)
   op('log1pe', x, tf_operation = tf$nn$softplus)
+
+#' @rdname transforms
+#' @export
+imultilogit <- function (x) {
+  dimfun <- function (elem_list) {
+
+    dim <- dim(elem_list[[1]])
+
+    # check it's a matrix
+    if (length(dim) != 2) {
+      stop ("imultilogit expects a 2D greta array",
+            call. = FALSE)
+    }
+
+    dim + c(0, 1)
+
+  }
+
+  op('imultilogit', x,
+     dimfun = dimfun,
+     tf_operation = tf_imultilogit)
+}
diff --git a/R/utils.R b/R/utils.R
index 65245516..a0ed88eb 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -335,11 +335,19 @@ cleanly <- function (expr) {
 }
 
 # check whether initial values are valid
-valid_parameters <- function (dag, initial_values) {
+valid_parameters <- function (initial_values, dag) {
+
+  if (is.list(initial_values)) {
+    each_valid <- vapply(initial_values, valid_parameters, dag, FUN.VALUE = FALSE)
+    valid <- all(each_valid)
+    return (valid)
+  }
+
   dag$send_parameters(initial_values)
   ld <- dag$log_density()
   grad <- dag$gradients()
   all(is.finite(c(ld, grad)))
+
 }
 
 # unlist and flatten a list of arrays to a vector row-wise
diff --git a/docs/_site.yml b/docs/_site.yml
index b93e7b3f..7320b05e 100644
--- a/docs/_site.yml
+++ b/docs/_site.yml
@@ -1,9 +1,11 @@
 name: greta
-output_dir: "."
-
+output_dir: '.'
 output:
   html_document:
     css: greta.css
-    toc: true
+    toc: yes
     theme: lumen
     highlight: default
+    lib_dir: site_libs
+    self_contained: no
+
diff --git a/docs/calculate.html b/docs/calculate.html
new file mode 100644
index 00000000..72c3dcb1
--- /dev/null
+++ b/docs/calculate.html
@@ -0,0 +1,452 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8" />
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+
+
+
+<title>calculate greta arrays given fixed values</title>
+
+<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<link href="site_libs/bootstrap-3.3.5/css/lumen.min.css" rel="stylesheet" />
+<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
+<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
+<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
+<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
+<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
+<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
+<script src="site_libs/navigation-1.1/tabsets.js"></script>
+<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
+<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
+<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+  pre:not([class]) {
+    background-color: white;
+  }
+</style>
+<script type="text/javascript">
+if (window.hljs) {
+  hljs.configure({languages: []});
+  hljs.initHighlightingOnLoad();
+  if (document.readyState && document.readyState === "complete") {
+    window.setTimeout(function() { hljs.initHighlighting(); }, 0);
+  }
+}
+</script>
+
+
+
+<style type="text/css">
+h1 {
+  font-size: 34px;
+}
+h1.title {
+  font-size: 38px;
+}
+h2 {
+  font-size: 30px;
+}
+h3 {
+  font-size: 24px;
+}
+h4 {
+  font-size: 18px;
+}
+h5 {
+  font-size: 16px;
+}
+h6 {
+  font-size: 12px;
+}
+.table th:not([align]) {
+  text-align: left;
+}
+</style>
+
+<link rel="stylesheet" href="greta.css" type="text/css" />
+
+</head>
+
+<body>
+
+<style type = "text/css">
+.main-container {
+  max-width: 940px;
+  margin-left: auto;
+  margin-right: auto;
+}
+code {
+  color: inherit;
+  background-color: rgba(0, 0, 0, 0.04);
+}
+img {
+  max-width:100%;
+  height: auto;
+}
+.tabbed-pane {
+  padding-top: 12px;
+}
+button.code-folding-btn:focus {
+  outline: none;
+}
+</style>
+
+
+<style type="text/css">
+/* padding for bootstrap navbar */
+body {
+  padding-top: 54px;
+  padding-bottom: 40px;
+}
+/* offset scroll position for anchor links (for fixed navbar)  */
+.section h1 {
+  padding-top: 59px;
+  margin-top: -59px;
+}
+
+.section h2 {
+  padding-top: 59px;
+  margin-top: -59px;
+}
+.section h3 {
+  padding-top: 59px;
+  margin-top: -59px;
+}
+.section h4 {
+  padding-top: 59px;
+  margin-top: -59px;
+}
+.section h5 {
+  padding-top: 59px;
+  margin-top: -59px;
+}
+.section h6 {
+  padding-top: 59px;
+  margin-top: -59px;
+}
+</style>
+
+<script>
+// manage active state of menu based on current page
+$(document).ready(function () {
+  // active menu anchor
+  href = window.location.pathname
+  href = href.substr(href.lastIndexOf('/') + 1)
+  if (href === "")
+    href = "index.html";
+  var menuAnchor = $('a[href="' + href + '"]');
+
+  // mark it active
+  menuAnchor.parent().addClass('active');
+
+  // if it's got a parent navbar menu mark it active as well
+  menuAnchor.closest('li.dropdown').addClass('active');
+});
+</script>
+
+
+<div class="container-fluid main-container">
+
+<!-- tabsets -->
+<script>
+$(document).ready(function () {
+  window.buildTabsets("TOC");
+});
+</script>
+
+<!-- code folding -->
+
+
+
+
+<script>
+$(document).ready(function ()  {
+
+    // move toc-ignore selectors from section div to header
+    $('div.section.toc-ignore')
+        .removeClass('toc-ignore')
+        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');
+
+    // establish options
+    var options = {
+      selectors: "h1,h2,h3",
+      theme: "bootstrap3",
+      context: '.toc-content',
+      hashGenerator: function (text) {
+        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
+      },
+      ignoreSelector: ".toc-ignore",
+      scrollTo: 0
+    };
+    options.showAndHide = false;
+    options.smoothScroll = true;
+
+    // tocify
+    var toc = $("#TOC").tocify(options).data("toc-tocify");
+});
+</script>
+
+<style type="text/css">
+
+#TOC {
+  margin: 25px 0px 20px 0px;
+}
+@media (max-width: 768px) {
+#TOC {
+  position: relative;
+  width: 100%;
+}
+}
+
+
+.toc-content {
+  padding-left: 30px;
+  padding-right: 40px;
+}
+
+div.main-container {
+  max-width: 1200px;
+}
+
+div.tocify {
+  width: 20%;
+  max-width: 260px;
+  max-height: 85%;
+}
+
+@media (min-width: 768px) and (max-width: 991px) {
+  div.tocify {
+    width: 25%;
+  }
+}
+
+@media (max-width: 767px) {
+  div.tocify {
+    width: 100%;
+    max-width: none;
+  }
+}
+
+.tocify ul, .tocify li {
+  line-height: 20px;
+}
+
+.tocify-subheader .tocify-item {
+  font-size: 0.90em;
+  padding-left: 25px;
+  text-indent: 0;
+}
+
+.tocify .list-group-item {
+  border-radius: 0px;
+}
+
+.tocify-subheader {
+  display: inline;
+}
+.tocify-subheader .tocify-item {
+  font-size: 0.95em;
+}
+
+</style>
+
+<!-- setup 3col/9col grid for toc_float and main content  -->
+<div class="row-fluid">
+<div class="col-xs-12 col-sm-4 col-md-3">
+<div id="TOC" class="tocify">
+</div>
+</div>
+
+<div class="toc-content col-xs-12 col-sm-8 col-md-9">
+
+
+
+<style type="text/css">
+.logo {
+    display: inline-block;
+    width: 144px;
+    height: 40px;
+    background-image: url(banner-icon.png);
+    background-size: 100% auto;
+    background-repeat: no-repeat;
+    vertical-align: middle;
+}
+</style>
+
+<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a class="logo" href="index.html"></a>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav navbar-right">
+        <li>
+          <a href="get_started.html">get started</a>
+        </li>
+        <li>
+          <a href="example_models.html">examples</a>
+        </li>
+        <li>
+          <a href="reference-index.html">docs</a>
+        </li>
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
+            more
+            <span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu" role="menu">
+            <li>
+              <a href="why_greta.html">why 'greta'?</a>
+            </li>
+            <li>
+              <a href="technical_details.html">technical details</a>
+            </li>
+            <li>
+              <a href="software.html">software</a>
+            </li>
+            <li>
+              <a href="contribute.html">contribute to greta</a>
+            </li>
+          </ul>
+        </li>
+        <li>
+          <a href="https://github.com/greta-dev/greta">
+            <span class="fa fa-github fa-lg"></span>
+          </a>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div><!--/.container -->
+</div><!--/.navbar -->
+
+<div class="fluid-row" id="header">
+
+
+
+<h1 class="title toc-ignore">calculate greta arrays given fixed values</h1>
+
+</div>
+
+
+<h2 class="hasAnchor" id="description">
+<a class="anchor" href="#description"></a> Description
+</h2>
+<p>
+Calculate the values that greta arrays would take, given temporary values for the greta arrays on which they depend, and return them as numeric R arrays. This can be used to check the behaviour of your model or make predictions to new data after model fitting.
+</p>
+<h2 class="hasAnchor" id="usage">
+<a class="anchor" href="#usage"></a> Usage
+</h2>
+<pre class="r"><span class='fu'>calculate</span>(<span class='no'>target</span>, <span class='no'>values</span>) </pre>
+<h2 class="hasAnchor" id="arguments">
+<a class="anchor" href="#arguments"></a> Arguments
+</h2>
+<code>target</code>
+<p>
+a greta array for which to calculate the value
+</p>
+<code>values</code>
+<p>
+a named list giving temporary values of the greta arrays with which <code>target</code> is connected, or an <code>mcmc.list</code> object returned by <code><a href='inference.html'>mcmc</a></code>.
+</p>
+<h2 class="hasAnchor" id="value">
+<a class="anchor" href="#value"></a>Value
+</h2>
+<p>
+A numeric R array with the same dimensions as <code>target</code>, giving the values it would take conditioned on the fixed values given by <code>values</code>.
+</p>
+<h2 class="hasAnchor" id="details">
+<a class="anchor" href="#details"></a>Details
+</h2>
+<p>
+The greta arrays named in <code>values</code> need not be variables, they can also be other operations or even data.
+</p>
+<p>
+At present, if <code>values</code> is a named list it must contain values for <em>all</em> of the variable greta arrays with which <code>target</code> is connected, even values are given for intermediate operations, or the target doesn’t depend on the variable. That may be relaxed in a future release.
+</p>
+<h2 class="hasAnchor" id="examples">
+<a class="anchor" href="#examples"></a>Examples
+</h2>
+<pre class="r">
+
+<span class='co'># define a variable greta array, and another that is calculated from it</span>
+<span class='co'># then calculate what value y would take for different values of x</span>
+<span class='no'>x</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>1</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
+<span class='no'>a</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
+<span class='no'>y</span> <span class='kw'>&lt;-</span> <span class='fu'>sum</span>(<span class='no'>x</span> ^ <span class='fl'>2</span>) + <span class='no'>a</span>
+<span class='fu'>calculate</span>(<span class='no'>y</span>, <span class='fu'>list</span>(<span class='kw'>x</span> <span class='kw'>=</span> <span class='fu'><a href='extract-replace-combine.html'>c</a></span>(<span class='fl'>0.1</span>, <span class='fl'>0.2</span>, <span class='fl'>0.3</span>), <span class='kw'>a</span> <span class='kw'>=</span> <span class='fl'>2</span>))
+
+
+<span class='co'># define a model</span>
+<span class='no'>alpha</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
+<span class='no'>beta</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
+<span class='no'>sigma</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>1</span>, <span class='fl'>0.1</span>)
+<span class='no'>mu</span> <span class='kw'>&lt;-</span> <span class='no'>alpha</span> + <span class='no'>iris</span>$<span class='no'>Petal.Length</span> * <span class='no'>beta</span>
+<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>iris</span>$<span class='no'>Petal.Width</span>) <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
+<span class='no'>m</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='model.html'>model</a></span>(<span class='no'>alpha</span>, <span class='no'>beta</span>, <span class='no'>sigma</span>)
+
+<span class='co'># calculate intermediate greta arrays, given some parameter values</span>
+<span class='fu'>calculate</span>(<span class='no'>mu</span>[<span class='fl'>1</span>:<span class='fl'>5</span>], <span class='fu'>list</span>(<span class='kw'>alpha</span> <span class='kw'>=</span> <span class='fl'>1</span>, <span class='kw'>beta</span> <span class='kw'>=</span> <span class='fl'>2</span>, <span class='kw'>sigma</span> <span class='kw'>=</span> <span class='fl'>0.5</span>))
+<span class='fu'>calculate</span>(<span class='no'>mu</span>[<span class='fl'>1</span>:<span class='fl'>5</span>], <span class='fu'>list</span>(<span class='kw'>alpha</span> <span class='kw'>=</span> -<span class='fl'>1</span>, <span class='kw'>beta</span> <span class='kw'>=</span> <span class='fl'>0.2</span>, <span class='kw'>sigma</span> <span class='kw'>=</span> <span class='fl'>0.5</span>))
+
+
+<span class='co'># fit the model then calculate samples at a new greta array</span>
+<span class='no'>draws</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='inference.html'>mcmc</a></span>(<span class='no'>m</span>, <span class='kw'>n_samples</span> <span class='kw'>=</span> <span class='fl'>500</span>)
+<span class='no'>petal_length_plot</span> <span class='kw'>&lt;-</span> <span class='fu'>seq</span>(<span class='fu'>min</span>(<span class='no'>iris</span>$<span class='no'>Petal.Length</span>),
+                         <span class='fu'>max</span>(<span class='no'>iris</span>$<span class='no'>Petal.Length</span>),
+                         <span class='kw'>length.out</span> <span class='kw'>=</span> <span class='fl'>100</span>)
+<span class='no'>mu_plot</span> <span class='kw'>&lt;-</span> <span class='no'>alpha</span> + <span class='no'>petal_length_plot</span> * <span class='no'>beta</span>
+<span class='no'>mu_plot_draws</span> <span class='kw'>&lt;-</span> <span class='fu'>calculate</span>(<span class='no'>mu_plot</span>, <span class='no'>draws</span>)
+
+<span class='co'># plot the draws</span>
+<span class='no'>mu_est</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='overloaded.html'>colMeans</a></span>(<span class='no'>mu_plot_draws</span><span class='kw'>[[</span><span class='fl'>1</span>]])
+<span class='fu'>plot</span>(<span class='no'>mu_est</span> ~ <span class='no'>petal_length_plot</span>, <span class='kw'>type</span> <span class='kw'>=</span> <span class='st'>"n"</span>, <span class='kw'>ylim</span> <span class='kw'>=</span> <span class='fu'>range</span>(<span class='no'>mu_plot_draws</span><span class='kw'>[[</span><span class='fl'>1</span>]]))
+<span class='fu'>apply</span>(<span class='no'>mu_plot_draws</span><span class='kw'>[[</span><span class='fl'>1</span>]], <span class='fl'>1</span>, <span class='no'>lines</span>, <span class='kw'>x</span> <span class='kw'>=</span> <span class='no'>petal_length_plot</span>, <span class='kw'>col</span> <span class='kw'>=</span> <span class='fu'>grey</span>(<span class='fl'>0.8</span>))
+<span class='fu'>lines</span>(<span class='no'>mu_est</span> ~ <span class='no'>petal_length_plot</span>, <span class='kw'>lwd</span> <span class='kw'>=</span> <span class='fl'>2</span>)</pre>
+
+
+
+</div>
+</div>
+
+</div>
+
+<script>
+
+// add bootstrap table styles to pandoc tables
+function bootstrapStylePandocTables() {
+  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
+}
+$(document).ready(function () {
+  bootstrapStylePandocTables();
+});
+
+
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>
diff --git a/docs/distribution.html b/docs/distribution.html
index c009085f..dd4c3bc9 100644
--- a/docs/distribution.html
+++ b/docs/distribution.html
@@ -380,12 +380,12 @@ <h2 class="hasAnchor" id="examples">
 
 <span class='co'># observed data and mean parameter to be estimated</span>
 <span class='co'># (explicitly coerce data to a greta array so we can refer to it later)</span>
-<span class='no'>y</span> <span class='kw'>=</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(<span class='fu'>rnorm</span>(<span class='fl'>5</span>, <span class='fl'>0</span>, <span class='fl'>3</span>))
+<span class='no'>y</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(<span class='fu'>rnorm</span>(<span class='fl'>5</span>, <span class='fl'>0</span>, <span class='fl'>3</span>))
 
-<span class='no'>mu</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>uniform</a></span>(-<span class='fl'>3</span>, <span class='fl'>3</span>)
+<span class='no'>mu</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>uniform</a></span>(-<span class='fl'>3</span>, <span class='fl'>3</span>)
 
 <span class='co'># define the distribution over y (the model likelihood)</span>
-<span class='fu'>distribution</span>(<span class='no'>y</span>) <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='fl'>1</span>)
+<span class='fu'>distribution</span>(<span class='no'>y</span>) <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='fl'>1</span>)
 
 <span class='co'># get the distribution over y</span>
 <span class='fu'>distribution</span>(<span class='no'>y</span>)</pre>
diff --git a/docs/distributions.html b/docs/distributions.html
index a05c63bc..e21e194d 100644
--- a/docs/distributions.html
+++ b/docs/distributions.html
@@ -762,39 +762,39 @@ <h2 class="hasAnchor" id="examples">
 <pre class="r">
 
 <span class='co'># a uniform parameter constrained to be between 0 and 1</span>
-<span class='no'>phi</span> <span class='kw'>=</span> <span class='fu'>uniform</span>(<span class='kw'>min</span> <span class='kw'>=</span> <span class='fl'>0</span>, <span class='kw'>max</span> <span class='kw'>=</span> <span class='fl'>1</span>)
+<span class='no'>phi</span> <span class='kw'>&lt;-</span> <span class='fu'>uniform</span>(<span class='kw'>min</span> <span class='kw'>=</span> <span class='fl'>0</span>, <span class='kw'>max</span> <span class='kw'>=</span> <span class='fl'>1</span>)
 
 <span class='co'># a length-three variable, with each element following a standard normal</span>
 <span class='co'># distribution</span>
-<span class='no'>alpha</span> <span class='kw'>=</span> <span class='fu'>normal</span>(<span class='fl'>0</span>, <span class='fl'>1</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
+<span class='no'>alpha</span> <span class='kw'>&lt;-</span> <span class='fu'>normal</span>(<span class='fl'>0</span>, <span class='fl'>1</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
 
 <span class='co'># a length-three variable of lognormals</span>
-<span class='no'>sigma</span> <span class='kw'>=</span> <span class='fu'>lognormal</span>(<span class='fl'>0</span>, <span class='fl'>3</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
+<span class='no'>sigma</span> <span class='kw'>&lt;-</span> <span class='fu'>lognormal</span>(<span class='fl'>0</span>, <span class='fl'>3</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
 
 <span class='co'># a hierarchical uniform, constrained between alpha and alpha + sigma,</span>
-<span class='no'>eta</span> <span class='kw'>=</span> <span class='no'>alpha</span> + <span class='fu'>uniform</span>(<span class='fl'>0</span>, <span class='fl'>1</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>) * <span class='no'>sigma</span>
+<span class='no'>eta</span> <span class='kw'>&lt;-</span> <span class='no'>alpha</span> + <span class='fu'>uniform</span>(<span class='fl'>0</span>, <span class='fl'>1</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>) * <span class='no'>sigma</span>
 
 <span class='co'># a hierarchical distribution</span>
-<span class='no'>mu</span> <span class='kw'>=</span> <span class='fu'>normal</span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
-<span class='no'>sigma</span> <span class='kw'>=</span> <span class='fu'>lognormal</span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
-<span class='no'>theta</span> <span class='kw'>=</span> <span class='fu'>normal</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
+<span class='no'>mu</span> <span class='kw'>&lt;-</span> <span class='fu'>normal</span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
+<span class='no'>sigma</span> <span class='kw'>&lt;-</span> <span class='fu'>lognormal</span>(<span class='fl'>0</span>, <span class='fl'>1</span>)
+<span class='no'>theta</span> <span class='kw'>&lt;-</span> <span class='fu'>normal</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
 
 <span class='co'># a vector of 3 variables drawn from the same hierarchical distribution</span>
-<span class='no'>thetas</span> <span class='kw'>=</span> <span class='fu'>normal</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
+<span class='no'>thetas</span> <span class='kw'>&lt;-</span> <span class='fu'>normal</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>3</span>)
 
 <span class='co'># a matrix of 12 variables drawn from the same hierarchical distribution</span>
-<span class='no'>thetas</span> <span class='kw'>=</span> <span class='fu'>normal</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fu'><a href='extract-replace-combine.html'>c</a></span>(<span class='fl'>3</span>, <span class='fl'>4</span>))
+<span class='no'>thetas</span> <span class='kw'>&lt;-</span> <span class='fu'>normal</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fu'><a href='extract-replace-combine.html'>c</a></span>(<span class='fl'>3</span>, <span class='fl'>4</span>))
 
 <span class='co'># a multivariate normal variable, with correlation between two elements</span>
 <span class='no'>Sig</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='overloaded.html'>diag</a></span>(<span class='fl'>4</span>)
 <span class='no'>Sig</span>[<span class='fl'>3</span>, <span class='fl'>4</span>] <span class='kw'>&lt;-</span> <span class='no'>Sig</span>[<span class='fl'>4</span>, <span class='fl'>3</span>] <span class='kw'>&lt;-</span> <span class='fl'>0.6</span>
-<span class='no'>theta</span> <span class='kw'>=</span> <span class='fu'>multivariate_normal</span>(<span class='fu'><a href='extract-replace-combine.html'>rep</a></span>(<span class='no'>mu</span>, <span class='fl'>4</span>), <span class='no'>Sig</span>)
+<span class='no'>theta</span> <span class='kw'>&lt;-</span> <span class='fu'>multivariate_normal</span>(<span class='fu'><a href='extract-replace-combine.html'>rep</a></span>(<span class='no'>mu</span>, <span class='fl'>4</span>), <span class='no'>Sig</span>)
 
 <span class='co'># 10 independent replicates of that</span>
-<span class='no'>theta</span> <span class='kw'>=</span> <span class='fu'>multivariate_normal</span>(<span class='fu'><a href='extract-replace-combine.html'>rep</a></span>(<span class='no'>mu</span>, <span class='fl'>4</span>), <span class='no'>Sig</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>10</span>)
+<span class='no'>theta</span> <span class='kw'>&lt;-</span> <span class='fu'>multivariate_normal</span>(<span class='fu'><a href='extract-replace-combine.html'>rep</a></span>(<span class='no'>mu</span>, <span class='fl'>4</span>), <span class='no'>Sig</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>10</span>)
 
 <span class='co'># a Wishart variable with the same covariance parameter</span>
-<span class='no'>theta</span> <span class='kw'>=</span> <span class='fu'>wishart</span>(<span class='kw'>df</span> <span class='kw'>=</span> <span class='fl'>5</span>, <span class='kw'>Sigma</span> <span class='kw'>=</span> <span class='no'>Sig</span>)</pre>
+<span class='no'>theta</span> <span class='kw'>&lt;-</span> <span class='fu'>wishart</span>(<span class='kw'>df</span> <span class='kw'>=</span> <span class='fl'>5</span>, <span class='kw'>Sigma</span> <span class='kw'>=</span> <span class='no'>Sig</span>)</pre>
 
 
 
diff --git a/docs/example_models.html b/docs/example_models.html
index 3003140f..854d77c0 100644
--- a/docs/example_models.html
+++ b/docs/example_models.html
@@ -367,11 +367,11 @@ <h4>data</h4>
 </div>
 <div id="greta-code" class="section level4">
 <h4>greta code</h4>
-<pre class="r"><code>theta = normal(0, 32, dim = 2)
+<pre class="r"><code>theta &lt;- normal(0, 32, dim = 2)
 mu &lt;- alpha + beta * Z
-X = normal(mu, sigma)
+X &lt;- normal(mu, sigma)
 p &lt;- ilogit(theta[1] + theta[2] * X)
-distribution(y) = binomial(n, p)</code></pre>
+distribution(y) &lt;- binomial(n, p)</code></pre>
 </div>
 <div id="bugsjags-code" class="section level4">
 <h4>BUGS/JAGS code</h4>
@@ -436,10 +436,10 @@ <h4>data</h4>
 </div>
 <div id="greta-code-1" class="section level4">
 <h4>greta code</h4>
-<pre class="r"><code>alpha_star = normal(0, 32)
-beta = normal(0, 32)
+<pre class="r"><code>alpha_star &lt;- normal(0, 32)
+beta &lt;- normal(0, 32)
 p &lt;- ilogit(alpha_star + beta * (x - mean(x)))
-distribution(r) = binomial(n, p)
+distribution(r) &lt;- binomial(n, p)
 
 alpha &lt;- alpha_star - beta * mean(x)
 rhat &lt;- p * n</code></pre>
diff --git a/docs/extract-replace-combine.html b/docs/extract-replace-combine.html
index c7464798..046fd18d 100644
--- a/docs/extract-replace-combine.html
+++ b/docs/extract-replace-combine.html
@@ -407,7 +407,7 @@ <h2 class="hasAnchor" id="examples">
 </h2>
 <pre class="r">
 
- <span class='no'>x</span> <span class='kw'>=</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(<span class='fu'>matrix</span>(<span class='fl'>1</span>:<span class='fl'>12</span>, <span class='fl'>3</span>, <span class='fl'>4</span>))
+ <span class='no'>x</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(<span class='fu'>matrix</span>(<span class='fl'>1</span>:<span class='fl'>12</span>, <span class='fl'>3</span>, <span class='fl'>4</span>))
 
  <span class='co'># extract/replace</span>
  <span class='no'>x</span>[<span class='fl'>1</span>:<span class='fl'>3</span>, ]
diff --git a/docs/figures/coef_graph.png b/docs/figures/coef_graph.png
index 6572dd57..fce4c8a2 100644
Binary files a/docs/figures/coef_graph.png and b/docs/figures/coef_graph.png differ
diff --git a/docs/figures/likelihood_graph.png b/docs/figures/likelihood_graph.png
index 4af299fc..99d39c96 100644
Binary files a/docs/figures/likelihood_graph.png and b/docs/figures/likelihood_graph.png differ
diff --git a/docs/functions.html b/docs/functions.html
index a6ad8b9e..202aee93 100644
--- a/docs/functions.html
+++ b/docs/functions.html
@@ -425,18 +425,18 @@ <h2 class="hasAnchor" id="examples">
 </h2>
 <pre class="r">
 
-<span class='no'>x</span> <span class='kw'>=</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(<span class='fu'>matrix</span>(<span class='fl'>1</span>:<span class='fl'>9</span>, <span class='kw'>nrow</span> <span class='kw'>=</span> <span class='fl'>3</span>, <span class='kw'>ncol</span> <span class='kw'>=</span> <span class='fl'>3</span>))
-<span class='no'>a</span> <span class='kw'>=</span> <span class='fu'>log</span>(<span class='fu'>exp</span>(<span class='no'>x</span>))
-<span class='no'>b</span> <span class='kw'>=</span> <span class='fu'>log1p</span>(<span class='fu'>expm1</span>(<span class='no'>x</span>))
-<span class='no'>c</span> <span class='kw'>=</span> <span class='fu'>sign</span>(<span class='no'>x</span> - <span class='fl'>5</span>)
-<span class='no'>d</span> <span class='kw'>=</span> <span class='fu'>abs</span>(<span class='no'>x</span> - <span class='fl'>5</span>)
+<span class='no'>x</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(<span class='fu'>matrix</span>(<span class='fl'>1</span>:<span class='fl'>9</span>, <span class='kw'>nrow</span> <span class='kw'>=</span> <span class='fl'>3</span>, <span class='kw'>ncol</span> <span class='kw'>=</span> <span class='fl'>3</span>))
+<span class='no'>a</span> <span class='kw'>&lt;-</span> <span class='fu'>log</span>(<span class='fu'>exp</span>(<span class='no'>x</span>))
+<span class='no'>b</span> <span class='kw'>&lt;-</span> <span class='fu'>log1p</span>(<span class='fu'>expm1</span>(<span class='no'>x</span>))
+<span class='no'>c</span> <span class='kw'>&lt;-</span> <span class='fu'>sign</span>(<span class='no'>x</span> - <span class='fl'>5</span>)
+<span class='no'>d</span> <span class='kw'>&lt;-</span> <span class='fu'>abs</span>(<span class='no'>x</span> - <span class='fl'>5</span>)
 
-<span class='no'>e</span> <span class='kw'>=</span> <span class='fu'><a href='overloaded.html'>diag</a></span>(<span class='no'>x</span>)
+<span class='no'>e</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='overloaded.html'>diag</a></span>(<span class='no'>x</span>)
 <span class='fu'><a href='overloaded.html'>diag</a></span>(<span class='no'>x</span>) <span class='kw'>&lt;-</span> <span class='no'>e</span> + <span class='fl'>1</span>
 
-<span class='no'>z</span> <span class='kw'>=</span> <span class='fu'>t</span>(<span class='no'>a</span>)
+<span class='no'>z</span> <span class='kw'>&lt;-</span> <span class='fu'>t</span>(<span class='no'>a</span>)
 
-<span class='no'>y</span> <span class='kw'>=</span> <span class='fu'><a href='overloaded.html'>sweep</a></span>(<span class='no'>x</span>, <span class='fl'>1</span>, <span class='no'>e</span>, <span class='st'>'-'</span>)</pre>
+<span class='no'>y</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='overloaded.html'>sweep</a></span>(<span class='no'>x</span>, <span class='fl'>1</span>, <span class='no'>e</span>, <span class='st'>'-'</span>)</pre>
 
 
 
diff --git a/docs/get_started.html b/docs/get_started.html
index 808e6930..c5d206c7 100644
--- a/docs/get_started.html
+++ b/docs/get_started.html
@@ -418,15 +418,15 @@ <h2>Building a model</h2>
 y &lt;- as_data(iris$Sepal.Length)
 
 # variables and priors
-int = normal(0, 1)
-coef = normal(0, 3)
-sd = student(3, 0, 1, truncation = c(0, Inf))
+int &lt;- normal(0, 1)
+coef &lt;- normal(0, 3)
+sd &lt;- student(3, 0, 1, truncation = c(0, Inf))
 
 # operations
 mean &lt;- int + coef * x
 
 # likelihood
-distribution(y) = normal(mean, sd)
+distribution(y) &lt;- normal(mean, sd)
 
 # defining the model
 m &lt;- model(int, coef, sd)
@@ -436,7 +436,6 @@ <h2>Building a model</h2>
 
 # sampling
 draws &lt;- mcmc(m, n_samples = 1000)</code></pre>
-<p>Throughout this example, and the rest of greta’s documentation, <code>&lt;-</code> is used when assigning <em>deterministic</em> values, and <code>=</code> when assigning variable or <em>stochastic</em> values. This makes the model code a little bit clearer to read, since <code>=</code> corresponds to the <code>~</code> symbol used to define stochastic relationships in BUGS and Stan. However, it doesn’t actually make any difference to the model which assignment operator you use.</p>
 <hr>
 <div id="data" class="section level3">
 <h3>Data</h3>
@@ -507,21 +506,21 @@ <h4>data structures</h4>
 <div id="variables-and-priors" class="section level3">
 <h3>Variables and priors</h3>
 <p>The second section of the script creates three greta arrays to represent the parameters in our model:</p>
-<pre class="r"><code>int = normal(0, 1)
-coef = normal(0, 3)
-sd = student(3, 0, 1, truncation = c(0, Inf))</code></pre>
-<p>Each of these is a variable greta array, and each is assumed <em>a priori</em> (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.</p>
+<pre class="r"><code>int &lt;- normal(0, 1)
+coef &lt;- normal(0, 3)
+sd &lt;- student(3, 0, 1, truncation = c(0, Inf))</code></pre>
+<p>Each of these is a variable greta array, and each is assumed <em>a priori</em> (before fitting to data) to follow a different probability distribution. In other words, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.</p>
 <div id="variables-without-probability-distributions" class="section level4">
 <h4>variables without probability distributions</h4>
 <p>If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the <code>variable()</code> function. E.g. in a frequentist version of the model we could create <code>int</code> with:</p>
-<pre class="r"><code>(int = variable())</code></pre>
+<pre class="r"><code>(int &lt;- variable())</code></pre>
 <pre><code>greta array (variable)
 
      [,1]
 [1,]  ?  </code></pre>
 <p><code>variable()</code> has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of <code>lower = -Inf, upper = Inf</code> meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).</p>
 <p>We can create a variable constrained between two values by specifying <code>lower</code> and <code>upper</code>. So we could have created the positive variable <code>sd</code> (the standard deviation of the likelihood) with:</p>
-<pre class="r"><code>(sd = variable(lower = 0))</code></pre>
+<pre class="r"><code>(sd &lt;- variable(lower = 0))</code></pre>
 <pre><code>greta array (variable)
 
      [,1]
@@ -542,7 +541,7 @@ <h4>variables with probability distributions</h4>
 <div id="variables-with-truncated-distributions" class="section level4">
 <h4>variables with truncated distributions</h4>
 <p>Some of greta’s probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the <code>truncation</code> argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do:</p>
-<pre class="r"><code>(z = normal(0, 1, truncation = c(-1, 1)))</code></pre>
+<pre class="r"><code>(z &lt;- normal(0, 1, truncation = c(-1, 1)))</code></pre>
 <pre><code>greta array (variable following a normal distribution)
 
      [,1]
@@ -596,7 +595,7 @@ <h4>Extract and replace</h4>
 <pre><code>NULL</code></pre>
 <pre class="r"><code>dim(z[, 1, drop = FALSE])</code></pre>
 <pre><code>[1] 2 1</code></pre>
-<p>greta arrays must always have two dimensions, so greta always acts as though <code>drop=FALSE</code>:</p>
+<p>greta arrays must always have two dimensions, so greta always acts as though <code>drop = FALSE</code>:</p>
 <pre class="r"><code>z_greta &lt;- as_data(z)
 dim(z_greta[, 1])</code></pre>
 <pre><code>[1] 2 1</code></pre>
@@ -620,8 +619,8 @@ <h4>Functions</h4>
 <h3>Likelihood</h3>
 <p>So far, we have created greta arrays representing the variables in our model (with prior distributions) and created other greta arrays from them and some fixed, <em>independent</em> data. To perform statistical inference on this model, we also need to link it to some observed <em>dependent</em> data. By comparing the sepal lengths predicted under different parameter values with the observed sepal lengths, we can estimate the most plausible values of those parameters. We do that by defining a likelihood for the observed sepal length data <code>y</code>.</p>
 <p>By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we’re trying to work out the parameters of that distribution. In greta we do that with the <code>distribution()</code> assignment function:</p>
-<pre class="r"><code>distribution(y) = normal(mean, sd)</code></pre>
-<p>With the syntax <code>distribution(&lt;lhs&gt;) = &lt;rhs&gt;</code>, we are stating that the data greta array on the left <code>&lt;lhs&gt;</code> has the same distribution as the greta array on the right <code>&lt;rhs&gt;</code>. In this case, we’re temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays <code>mean</code> and <code>sd</code>), but then stating that values of that distribution have been observed (<code>y</code>). In this case both <code>&lt;lhs&gt;</code> (<code>y</code>) and <code>&lt;rhs&gt;</code> are column vectors with the same dimensions, so each element in <code>y</code> has a normal distribution with the corresponding parameters.</p>
+<pre class="r"><code>distribution(y) &lt;- normal(mean, sd)</code></pre>
+<p>With the syntax <code>distribution(&lt;lhs&gt;) &lt;- &lt;rhs&gt;</code>, we are stating that the data greta array on the left <code>&lt;lhs&gt;</code> has the same distribution as the greta array on the right <code>&lt;rhs&gt;</code>. In this case, we’re temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays <code>mean</code> and <code>sd</code>), but then stating that values of that distribution have been observed (<code>y</code>). In this case both <code>&lt;lhs&gt;</code> (<code>y</code>) and <code>&lt;rhs&gt;</code> are column vectors with the same dimensions, so each element in <code>y</code> has a normal distribution with the corresponding parameters.</p>
 <hr>
 </div>
 <div id="defining-the-model" class="section level3">
@@ -644,7 +643,7 @@ <h3>Plotting</h3>
 <img src="figures/plotlegend.png" width="750" />
 </p>
 <p>The fourth type of node (diamonds) represents probability distributions. These have greta arrays as parameters (linked via solid lines), and have other greta arrays as <em>values</em>(linked via dashed lines). Distributions calculate the probability density of the <em>values</em>, given the parameters and their distribution type.</p>
-<p>For example, a plot of just the prior distribution over <code>coef</code> (defined as <code>coef = normal(0, 3)</code>) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution’s value, the variable <code>coef</code>:</p>
+<p>For example, a plot of just the prior distribution over <code>coef</code> (defined as <code>coef &lt;- normal(0, 3)</code>) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution’s value, the variable <code>coef</code>:</p>
 <p>
 <img src="figures/coef_graph.png" width="500" />
 </p>
diff --git a/docs/greta.html b/docs/greta.html
index a2979fe6..32eb0fd0 100644
--- a/docs/greta.html
+++ b/docs/greta.html
@@ -364,13 +364,13 @@ <h2 class="hasAnchor" id="examples">
 <span class='co'># a simple Bayesian regression model for the iris data</span>
 
 <span class='co'># priors</span>
-<span class='no'>int</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>5</span>)
-<span class='no'>coef</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>3</span>)
-<span class='no'>sd</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>0</span>, <span class='fl'>3</span>)
+<span class='no'>int</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>5</span>)
+<span class='no'>coef</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>0</span>, <span class='fl'>3</span>)
+<span class='no'>sd</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>0</span>, <span class='fl'>3</span>)
 
 <span class='co'># likelihood</span>
 <span class='no'>mean</span> <span class='kw'>&lt;-</span> <span class='no'>int</span> + <span class='no'>coef</span> * <span class='no'>iris</span>$<span class='no'>Petal.Length</span>
-<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>iris</span>$<span class='no'>Sepal.Length</span>) <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mean</span>, <span class='no'>sd</span>)
+<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>iris</span>$<span class='no'>Sepal.Length</span>) <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mean</span>, <span class='no'>sd</span>)
 
 <span class='co'># build and sample</span>
 <span class='no'>m</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='model.html'>model</a></span>(<span class='no'>int</span>, <span class='no'>coef</span>, <span class='no'>sd</span>)
diff --git a/docs/index.Rmd b/docs/index.Rmd
index 05c915d9..f4d5c78f 100644
--- a/docs/index.Rmd
+++ b/docs/index.Rmd
@@ -12,7 +12,7 @@ body{
   padding-top: 10px;
 }
 pre, pre:not([class]){
-  font-size: 20px;
+  font-size: 18px;
   color: #5a3a78;
   background-color: #f7f7f7;
   border: none;
@@ -101,7 +101,7 @@ a, a:hover{
 
 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = FALSE, cache = TRUE)
-set.seed(2017-06-19)
+set.seed(2018-02-17)
 ```
 
 <div id="head">
@@ -213,49 +213,49 @@ set.seed(2017-06-19)
 
 <div class="col-md-6">
 
-Here's a Bayesian linear regression model fit to the `iris` data using greta:
+Here's a Bayesian linear regression model for the `iris` data using greta:
 
-```
-library(greta)
-```
 ```
 x <- iris$Petal.Length
 y <- iris$Sepal.Length
+```
+```
+library(greta)
 
-int = normal(0, 5)
-coef = normal(0, 3)
-sd = lognormal(0, 3)
+int <- normal(0, 5)
+coef <- normal(0, 3)
+sd <- lognormal(0, 3)
 
 mean <- int + coef * x
-distribution(y) = normal(mean, sd)
+distribution(y) <- normal(mean, sd)
 ```
 ```
 m <- model(int, coef, sd)
 ```
-```{r model, echo = FALSE, eval = TRUE, results='hide', message=FALSE}
-library(greta)
+</div>
+<div class="col-md-6">
+```
+draws <- mcmc(m, n_samples = 1000, chains = 3)
+bayesplot::mcmc_trace(draws)
+```
+```{r vis, echo = FALSE, eval = TRUE, results='hide', message=FALSE, warning=FALSE, fig.height=4, fig.width=6, fig.align="center"}
 
 x <- iris$Petal.Length
 y <- iris$Sepal.Length
 
-int = normal(0, 5)
-coef = normal(0, 3)
-sd = lognormal(0, 3)
+library(greta)
+
+int <- normal(0, 5)
+coef <- normal(0, 3)
+sd <- lognormal(0, 3)
 
 mean <- int + coef * x
-distribution(y) = normal(mean, sd)
+distribution(y) <- normal(mean, sd)
 
 m <- model(int, coef, sd)
-```
-</div>
-<div class="col-md-6">
-```
-draws <- mcmc(m, n_samples = 1000)
-bayesplot::mcmc_trace(draws)
-```
-```{r vis, echo = FALSE, eval = TRUE, results='hide', message=FALSE, warning=FALSE, fig.height=4, fig.width=4, fig.align="center"}
-draws <- mcmc(m, n_samples = 1000)
-bayesplot::mcmc_trace(draws, facet_args = list(nrow = 3, ncol = 1))
+
+draws <- mcmc(m, warmup = 1000, n_samples = 1000, chains = 3)
+bayesplot::mcmc_trace(draws, facet_args = list(nrow = 3, ncol = 1)) + ggplot2::theme_minimal()
 ```
 
 </div>
diff --git a/docs/index.html b/docs/index.html
index 5d7f8cc0..607cfb19 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -242,7 +242,7 @@
   padding-top: 10px;
 }
 pre, pre:not([class]){
-  font-size: 20px;
+  font-size: 18px;
   color: #5a3a78;
   background-color: #f7f7f7;
   border: none;
@@ -410,23 +410,23 @@ <h1 style="color: #8960B3;">
 </h1>
 <div class="row">
 <div class="col-md-6">
-<p>Here’s a Bayesian linear regression model fit to the <code>iris</code> data using greta:</p>
-<pre><code>library(greta)</code></pre>
+<p>Here’s a Bayesian linear regression model for the <code>iris</code> data using greta:</p>
 <pre><code>x &lt;- iris$Petal.Length
-y &lt;- iris$Sepal.Length
+y &lt;- iris$Sepal.Length</code></pre>
+<pre><code>library(greta)
 
-int = normal(0, 5)
-coef = normal(0, 3)
-sd = lognormal(0, 3)
+int &lt;- normal(0, 5)
+coef &lt;- normal(0, 3)
+sd &lt;- lognormal(0, 3)
 
 mean &lt;- int + coef * x
-distribution(y) = normal(mean, sd)</code></pre>
+distribution(y) &lt;- normal(mean, sd)</code></pre>
 <pre><code>m &lt;- model(int, coef, sd)</code></pre>
 </div>
 <div class="col-md-6">
-<pre><code>draws &lt;- mcmc(m, n_samples = 1000)
+<pre><code>draws &lt;- mcmc(m, n_samples = 1000, chains = 3)
 bayesplot::mcmc_trace(draws)</code></pre>
-<p><img src="index_files/figure-html/vis-1.png" width="384" style="display: block; margin: auto;" /></p>
+<p><img src="index_files/figure-html/vis-1.png" width="576" style="display: block; margin: auto;" /></p>
 </div>
 </div>
 <p><br></p>
diff --git a/docs/index_files/figure-html/vis-1.png b/docs/index_files/figure-html/vis-1.png
index 4954319d..34022571 100644
Binary files a/docs/index_files/figure-html/vis-1.png and b/docs/index_files/figure-html/vis-1.png differ
diff --git a/docs/inference.html b/docs/inference.html
index 7beb12ad..3c783e32 100644
--- a/docs/inference.html
+++ b/docs/inference.html
@@ -354,7 +354,7 @@ <h2 class="hasAnchor" id="usage">
 <pre class="r"><span class='fu'>stashed_samples</span>()
 
 <span class='fu'>mcmc</span>(<span class='no'>model</span>, <span class='kw'>method</span> <span class='kw'>=</span> <span class='fu'><a href='extract-replace-combine.html'>c</a></span>(<span class='st'>"hmc"</span>), <span class='kw'>n_samples</span> <span class='kw'>=</span> <span class='fl'>1000</span>, <span class='kw'>thin</span> <span class='kw'>=</span> <span class='fl'>1</span>, <span class='kw'>warmup</span> <span class='kw'>=</span> <span class='fl'>100</span>,
-  <span class='kw'>verbose</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>pb_update</span> <span class='kw'>=</span> <span class='fl'>10</span>, <span class='kw'>control</span> <span class='kw'>=</span> <span class='fu'>list</span>(),
+  <span class='kw'>chains</span> <span class='kw'>=</span> <span class='fl'>1</span>, <span class='kw'>verbose</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>pb_update</span> <span class='kw'>=</span> <span class='fl'>10</span>, <span class='kw'>control</span> <span class='kw'>=</span> <span class='fu'>list</span>(),
   <span class='kw'>initial_values</span> <span class='kw'>=</span> <span class='kw'>NULL</span>)
 
 <span class='fu'>opt</span>(<span class='no'>model</span>, <span class='kw'>method</span> <span class='kw'>=</span> <span class='fu'><a href='extract-replace-combine.html'>c</a></span>(<span class='st'>"adagrad"</span>), <span class='kw'>max_iterations</span> <span class='kw'>=</span> <span class='fl'>100</span>, <span class='kw'>tolerance</span> <span class='kw'>=</span> <span class='fl'>1e-06</span>,
@@ -368,19 +368,23 @@ <h2 class="hasAnchor" id="arguments">
 </p>
 <code>method</code>
 <p>
-the method used to sample or optimise values. Currently only one method is available for each procedure: <code>hmc</code> and <code>adagrad</code>
+method used to sample or optimise values. Currently only one method is available for each procedure: <code>hmc</code> and <code>adagrad</code>
 </p>
 <code>n_samples</code>
 <p>
-the number of MCMC samples to draw (after any warm-up, but before thinning)
+number of MCMC samples to draw (after any warm-up, but before thinning)
 </p>
 <code>thin</code>
 <p>
-the MCMC thinning rate; every <code>thin</code> samples is retained, the rest are discarded
+MCMC thinning rate; every <code>thin</code> samples is retained, the rest are discarded
 </p>
 <code>warmup</code>
 <p>
-the number of samples to spend warming up the mcmc sampler. During this phase the sampler moves toward the highest density area and tunes sampler hyperparameters.
+number of samples to spend warming up the mcmc sampler. During this phase the sampler moves toward the highest density area and tunes sampler hyperparameters.
+</p>
+<code>chains</code>
+<p>
+number of MCMC chains to run
 </p>
 <code>verbose</code>
 <p>
@@ -396,7 +400,7 @@ <h2 class="hasAnchor" id="arguments">
 </p>
 <code>initial_values</code>
 <p>
-an optional named vector of initial values for the free parameters in the model. These will be used as the starting point for sampling/optimisation
+an optional vector (or list of vectors, for multiple chains) of initial values for the free parameters in the model. These will be used as the starting point for sampling/optimisation.
 </p>
 <code>max_iterations</code>
 <p>
@@ -448,10 +452,10 @@ <h2 class="hasAnchor" id="details">
 Occasionally, a proposed set of parameters can cause numerical instability (I.e. the log density or its gradient is <code>NA</code>, <code>Inf</code> or <code>-Inf</code>); normally because the log joint density is so low that it can’t be represented as a floating point number. When this happens, the progress bar will also display the proportion of samples so far that were ‘bad’ (numerically unstable) and therefore rejected. If you’re getting a lot of numerical instability, you might want to manually define starting values to move the sampler into a more reasonable part of the parameter space. Alternatively, you could redefine the model (via <code>model</code>) to have double precision, though this will slow down sampling.
 </p>
 <p>
-Currently, the only implemented MCMC procedure is static Hamiltonian Monte Carlo (<code>method = “hmc”</code>). During the warmup iterations, the leapfrog stepsize hyperparameter <code>epsilon</code> is tuned to maximise the sampler efficiency. The <code>control</code> argument can be used to specify the initial value for epsilon, along with two other hyperparameters: <code>Lmin</code> and <code>Lmax</code>; positive integers (with <code>Lmax &gt; Lmin</code>) giving the upper and lower limits to the number of leapfrog steps per iteration (from which the number is selected uniformly at random).
+Currently, the only implemented MCMC procedure is static Hamiltonian Monte Carlo (<code>method = “hmc”</code>). During the warmup iterations, the leapfrog stepsize hyperparameter <code>epsilon</code> is tuned to maximise the sampler efficiency, and the posterior marginal standard deviations are estimated <code>diag_sd</code>. The <code>control</code> argument can be used to specify the initial value for <code>epsilon</code>, <code>diag_sd</code>, and two other hyperparameters: <code>Lmin</code> and <code>Lmax</code>; positive integers (with <code>Lmax &gt; Lmin</code>) giving the upper and lower limits to the number of leapfrog steps per iteration (from which the number is selected uniformly at random).
 </p>
 <p>
-The default control options for HMC are: <code>control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)</code>
+The default control options for HMC are: <code>control = list(Lmin = 10, Lmax = 20, epsilon = 0.005, diag_sd = 1)</code>
 </p>
 <p>
 Currently, the only implemented optimisation algorithm is Adagrad (<code>method = “adagrad”</code>). The <code>control</code> argument can be used to specify the optimiser hyperparameters: <code>learning_rate</code> (default 0.8), <code>initial_accumulator_value</code> (default 0.1) and <code>use_locking</code> (default <code>TRUE</code>). The are passed directly to TensorFlow’s optimisers, see <a href = 'https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer'>the TensorFlow docs</a> for more information
@@ -461,10 +465,10 @@ <h2 class="hasAnchor" id="examples">
 </h2>
 <pre class="r">
 <span class='co'># define a simple model</span>
-<span class='no'>mu</span> <span class='kw'>=</span> <span class='fu'><a href='variable.html'>variable</a></span>()
-<span class='no'>sigma</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>1</span>, <span class='fl'>0.1</span>)
-<span class='no'>x</span> <span class='kw'>=</span> <span class='fu'>rnorm</span>(<span class='fl'>10</span>)
-<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>x</span>) <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
+<span class='no'>mu</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='variable.html'>variable</a></span>()
+<span class='no'>sigma</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>1</span>, <span class='fl'>0.1</span>)
+<span class='no'>x</span> <span class='kw'>&lt;-</span> <span class='fu'>rnorm</span>(<span class='fl'>10</span>)
+<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>x</span>) <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
 <span class='no'>m</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='model.html'>model</a></span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
 
 <span class='co'># carry out mcmc on the model</span>
diff --git a/docs/model.html b/docs/model.html
index 8b963f24..95f8c731 100644
--- a/docs/model.html
+++ b/docs/model.html
@@ -407,12 +407,11 @@ <h2 class="hasAnchor" id="examples">
 </h2>
 <pre class="r">
 
-
 <span class='co'># define a simple model</span>
-<span class='no'>mu</span> <span class='kw'>=</span> <span class='fu'><a href='variable.html'>variable</a></span>()
-<span class='no'>sigma</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>1</span>, <span class='fl'>0.1</span>)
-<span class='no'>x</span> <span class='kw'>=</span> <span class='fu'>rnorm</span>(<span class='fl'>10</span>)
-<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>x</span>) <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
+<span class='no'>mu</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='variable.html'>variable</a></span>()
+<span class='no'>sigma</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>lognormal</a></span>(<span class='fl'>1</span>, <span class='fl'>0.1</span>)
+<span class='no'>x</span> <span class='kw'>&lt;-</span> <span class='fu'>rnorm</span>(<span class='fl'>10</span>)
+<span class='fu'><a href='distribution.html'>distribution</a></span>(<span class='no'>x</span>) <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
 
 <span class='no'>m</span> <span class='kw'>&lt;-</span> <span class='fu'>model</span>(<span class='no'>mu</span>, <span class='no'>sigma</span>)
 
diff --git a/docs/operators.html b/docs/operators.html
index cc4db3c3..bcae4b32 100644
--- a/docs/operators.html
+++ b/docs/operators.html
@@ -390,23 +390,23 @@ <h2 class="hasAnchor" id="examples">
 </h2>
 <pre class="r">
 
-<span class='no'>x</span> <span class='kw'>=</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(-<span class='fl'>1</span>:<span class='fl'>12</span>)
+<span class='no'>x</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='as_data.html'>as_data</a></span>(-<span class='fl'>1</span>:<span class='fl'>12</span>)
 
 <span class='co'># arithmetic</span>
-<span class='no'>a</span> <span class='kw'>=</span> <span class='no'>x</span> + <span class='fl'>1</span>
-<span class='no'>b</span> <span class='kw'>=</span> <span class='fl'>2</span> * <span class='no'>x</span> + <span class='fl'>3</span>
-<span class='no'>c</span> <span class='kw'>=</span> <span class='no'>x</span> <span class='kw'>%%</span> <span class='fl'>2</span>
-<span class='no'>d</span> <span class='kw'>=</span> <span class='no'>x</span> <span class='kw'>%/%</span> <span class='fl'>5</span>
+<span class='no'>a</span> <span class='kw'>&lt;-</span> <span class='no'>x</span> + <span class='fl'>1</span>
+<span class='no'>b</span> <span class='kw'>&lt;-</span> <span class='fl'>2</span> * <span class='no'>x</span> + <span class='fl'>3</span>
+<span class='no'>c</span> <span class='kw'>&lt;-</span> <span class='no'>x</span> <span class='kw'>%%</span> <span class='fl'>2</span>
+<span class='no'>d</span> <span class='kw'>&lt;-</span> <span class='no'>x</span> <span class='kw'>%/%</span> <span class='fl'>5</span>
 
 <span class='co'># logical</span>
-<span class='no'>e</span> <span class='kw'>=</span> (<span class='no'>x</span> <span class='kw'>&gt;</span> <span class='fl'>1</span>) <span class='kw'>|</span> (<span class='no'>x</span> <span class='kw'>&lt;</span> <span class='fl'>1</span>)
-<span class='no'>f</span> <span class='kw'>=</span> <span class='no'>e</span> <span class='kw'>&amp;</span> (<span class='no'>x</span> <span class='kw'>&lt;</span> <span class='fl'>2</span>)
-<span class='no'>g</span> <span class='kw'>=</span> !<span class='no'>f</span>
+<span class='no'>e</span> <span class='kw'>&lt;-</span> (<span class='no'>x</span> <span class='kw'>&gt;</span> <span class='fl'>1</span>) <span class='kw'>|</span> (<span class='no'>x</span> <span class='kw'>&lt;</span> <span class='fl'>1</span>)
+<span class='no'>f</span> <span class='kw'>&lt;-</span> <span class='no'>e</span> <span class='kw'>&amp;</span> (<span class='no'>x</span> <span class='kw'>&lt;</span> <span class='fl'>2</span>)
+<span class='no'>g</span> <span class='kw'>&lt;-</span> !<span class='no'>f</span>
 
 <span class='co'># relational</span>
-<span class='no'>h</span> <span class='kw'>=</span> <span class='no'>x</span> <span class='kw'>&lt;</span> <span class='fl'>1</span>
-<span class='no'>i</span> <span class='kw'>=</span> (-<span class='no'>x</span>) <span class='kw'>&gt;=</span> <span class='no'>x</span>
-<span class='no'>j</span> <span class='kw'>=</span> <span class='no'>h</span> <span class='kw'>==</span> <span class='no'>x</span></pre>
+<span class='no'>h</span> <span class='kw'>&lt;-</span> <span class='no'>x</span> <span class='kw'>&lt;</span> <span class='fl'>1</span>
+<span class='no'>i</span> <span class='kw'>&lt;-</span> (-<span class='no'>x</span>) <span class='kw'>&gt;=</span> <span class='no'>x</span>
+<span class='no'>j</span> <span class='kw'>&lt;-</span> <span class='no'>h</span> <span class='kw'>==</span> <span class='no'>x</span></pre>
 
 
 
diff --git a/docs/reference-index.html b/docs/reference-index.html
index 38ad1804..b5346466 100644
--- a/docs/reference-index.html
+++ b/docs/reference-index.html
@@ -402,7 +402,7 @@ <h4><a href="extract-replace-combine.html">extract, replace and combine greta ar
 </div>
 <div id="transformation-functions-for-greta-arrays" class="section level4">
 <h4><a href="transforms.html">transformation functions for greta arrays</a></h4>
-<p><code>iprobit</code> <code>ilogit</code> <code>icloglog</code> <code>icauchit</code> <code>log1pe</code></p>
+<p><code>iprobit</code> <code>ilogit</code> <code>icloglog</code> <code>icauchit</code> <code>log1pe</code> <code>imultilogit</code></p>
 <hr style="height:0px; visibility:hidden;" />
 <hr>
 </div>
diff --git a/docs/technical_details.html b/docs/technical_details.html
index 016e91b7..d1c37320 100644
--- a/docs/technical_details.html
+++ b/docs/technical_details.html
@@ -430,7 +430,7 @@ <h3>Tensors</h3>
 <div id="variables-and-free-states" class="section level2">
 <h2>variables and free states</h2>
 <p>Hamiltonian Monte Carlo (HMC) requires all of the parameters to be transformed to a continuous scale for sampling. Variable nodes are therefore converted to tensors by first defining a ‘free’ (unconstrained) version of themselves as a tensor, and then applying a transformation function to convert them back to the scale of interest.</p>
-<pre class="r"><code>a = variable(lower = 0)
+<pre class="r"><code>a &lt;- variable(lower = 0)
 class(a$node)</code></pre>
 <pre><code>[1] &quot;variable_node&quot; &quot;node&quot;          &quot;R6&quot;           </code></pre>
 <pre class="r"><code>a$node$tf_from_free</code></pre>
@@ -445,7 +445,7 @@ <h2>variables and free states</h2>
 
       } else if (self$constraint == &#39;both&#39;) {
 
-        y &lt;- tf_ilogit(x) * fl(upper - lower) + fl(lower)
+        y &lt;- tf$nn$sigmoid(x) * fl(upper - lower) + fl(lower)
 
       } else if (self$constraint == &#39;low&#39;) {
 
@@ -464,12 +464,12 @@ <h2>variables and free states</h2>
       y
 
     }
-&lt;environment: 0x121afa638&gt;</code></pre>
+&lt;environment: 0x10b5aef78&gt;</code></pre>
 </div>
 <div id="distributions" class="section level2">
 <h2>distributions</h2>
 <p>distribution nodes are node objects just like the others, but they are not <em>directly</em> associated with greta arrays. Instead, greta arrays may have a distribution node in their <code>distribution</code> slot to indicate that their values are assumed to follow that distribution. The distribution node will also be listed as a parent node, and likewise the ‘target node’ will be listed as a child of the distribution. Distribution nodes also have child nodes (data, variables or operations) representing their parameters.</p>
-<pre class="r"><code>b = normal(0, 1)
+<pre class="r"><code>b &lt;- normal(0, 1)
 class(b$node)</code></pre>
 <pre><code>[1] &quot;variable_node&quot; &quot;node&quot;          &quot;R6&quot;           </code></pre>
 <pre class="r"><code>class(b$node$distribution)</code></pre>
diff --git a/docs/transforms.html b/docs/transforms.html
index 12541514..64043a25 100644
--- a/docs/transforms.html
+++ b/docs/transforms.html
@@ -359,7 +359,9 @@ <h2 class="hasAnchor" id="usage">
 
 <span class='fu'>icauchit</span>(<span class='no'>x</span>)
 
-<span class='fu'>log1pe</span>(<span class='no'>x</span>) </pre>
+<span class='fu'>log1pe</span>(<span class='no'>x</span>)
+
+<span class='fu'>imultilogit</span>(<span class='no'>x</span>) </pre>
 <h2 class="hasAnchor" id="arguments">
 <a class="anchor" href="#arguments"></a> Arguments
 </h2>
@@ -371,26 +373,34 @@ <h2 class="hasAnchor" id="details">
 <a class="anchor" href="#details"></a>Details
 </h2>
 <p>
-greta does not allow you to state the transformation/link on the left hand side of an assignment, as is common in the BUGS and STAN modelling languages. That’s because the same syntax has a very different meaning in R, and can only be applied to objects that are already in existence. The inverse forms of the common link functions (prefixed with an ‘i’) are therefore more likely to useful in modelling, and these are what are provided here.
+greta does not allow you to state the transformation/link on the left hand side of an assignment, as is common in the BUGS and STAN modelling languages. That’s because the same syntax has a very different meaning in R, and can only be applied to objects that are already in existence. The inverse forms of the common link functions (prefixed with an ‘i’) can be used instead.
+</p>
+<p>
+The <code>log1pe</code> inverse link function is equivalent to <code>log(1 + exp(x))</code>, yielding a positive transformed parameter. Unlike the log transformation, this transformation is approximately linear for x &gt; 1. i.e. when <span class="math inline">\(x &amp;gt; 1\)</span>, <span class="math inline">\(y \approx x\)</span>
 </p>
 <p>
-The <code>log1pe</code> inverse link function is equivalent to <code>log(1 + exp(x))</code>, yielding a positive transformed parameter. Unlike the log transformation, this transformation is approximately linear for x &gt; 1. i.e. when <span class="math inline">\(x &amp;gt;  1\)</span>, <span class="math inline">\(y \approx x\)</span>
+<code>imultilogit</code> expects an n-by-m greta array, and returns an n-by-(m+1) greta array of positive reals whose rows sum to one. This is equivalent adding a final column of 0s and then running the softmax function widely used in machine learning.
 </p>
 <h2 class="hasAnchor" id="examples">
 <a class="anchor" href="#examples"></a>Examples
 </h2>
 <pre class="r">
 
- <span class='no'>x</span> <span class='kw'>=</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>1</span>, <span class='fl'>3</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>10</span>)
+ <span class='no'>x1</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>1</span>, <span class='fl'>3</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fl'>10</span>)
 
  <span class='co'># transformation to the unit interval</span>
- <span class='no'>p1</span> <span class='kw'>&lt;-</span> <span class='fu'>iprobit</span>(<span class='no'>x</span>)
- <span class='no'>p2</span> <span class='kw'>&lt;-</span> <span class='fu'>ilogit</span>(<span class='no'>x</span>)
- <span class='no'>p3</span> <span class='kw'>&lt;-</span> <span class='fu'>icloglog</span>(<span class='no'>x</span>)
- <span class='no'>p4</span> <span class='kw'>&lt;-</span> <span class='fu'>icauchit</span>(<span class='no'>x</span>)
+ <span class='no'>p1</span> <span class='kw'>&lt;-</span> <span class='fu'>iprobit</span>(<span class='no'>x1</span>)
+ <span class='no'>p2</span> <span class='kw'>&lt;-</span> <span class='fu'>ilogit</span>(<span class='no'>x1</span>)
+ <span class='no'>p3</span> <span class='kw'>&lt;-</span> <span class='fu'>icloglog</span>(<span class='no'>x1</span>)
+ <span class='no'>p4</span> <span class='kw'>&lt;-</span> <span class='fu'>icauchit</span>(<span class='no'>x1</span>)
 
  <span class='co'># and to positive reals</span>
- <span class='no'>y</span> <span class='kw'>&lt;-</span> <span class='fu'>log1pe</span>(<span class='no'>x</span>)</pre>
+ <span class='no'>y</span> <span class='kw'>&lt;-</span> <span class='fu'>log1pe</span>(<span class='no'>x1</span>)
+
+ <span class='co'># transform from 10x3 to 10x4, where rows are a complete set of</span>
+ <span class='co'># probabilities</span>
+ <span class='no'>x2</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='distributions.html'>normal</a></span>(<span class='fl'>1</span>, <span class='fl'>3</span>, <span class='kw'>dim</span> <span class='kw'>=</span> <span class='fu'><a href='extract-replace-combine.html'>c</a></span>(<span class='fl'>10</span>, <span class='fl'>3</span>))
+ <span class='no'>z</span> <span class='kw'>&lt;-</span> <span class='fu'>imultilogit</span>(<span class='no'>x2</span>)</pre>
 
 
 
diff --git a/logos/greta_hex.png b/logos/greta_hex.png
new file mode 100644
index 00000000..716330cf
Binary files /dev/null and b/logos/greta_hex.png differ
diff --git a/logos/make_hex.R b/logos/make_hex.R
new file mode 100644
index 00000000..364f7feb
--- /dev/null
+++ b/logos/make_hex.R
@@ -0,0 +1,65 @@
+library(hexSticker)
+library(magick)
+library(sysfonts)
+font_add_google("Muli")
+attach(greta::.internals$utils$colours)
+
+# make a hex-shaped mask
+hexd <- data.frame(x = 1 + c(rep(-sqrt(3) / 2, 2), 0, rep(sqrt(3) / 2, 2), 0),
+                   y = 1 + c(0.5, -0.5, -1, -0.5, 0.5, 1))
+
+x_lim <- range(hexd$x)
+y_lim <- range(hexd$y)
+x_dim <- abs(diff(x_lim))
+y_dim <- abs(diff(y_lim))
+ex <- 4
+pdf("logos/hex_mask.pdf", width = x_dim * ex, height = y_dim * ex)
+par(pty = "s",
+    xpd = NA,
+    bg = "black",
+    mar = rep(0, 4), oma = rep(0, 4))
+plot.new()
+plot.window(xlim = x_lim, ylim = y_lim)
+polygon(hexd, col = "white")
+dev.off()
+
+# load the hex mask
+mask <- image_read("logos/hex_mask.pdf")
+mask <- image_transparent(mask, "black")
+
+# crop and mask header image to the mask (?)
+bg <- image_read("logos/greta-header.png")
+bg <- image_crop(bg, "498x576+1600+600")
+hex_bg <- image_composite(bg, mask, "CopyOpacity")
+image_write(hex_bg, path = "logos/hex_bg.pdf")
+
+# # add the icon and save
+# icon <- image_read("logos/icon_on_light_transparent.png")
+# icon <- image_resize(icon, "15%")
+# hex <- image_composite(hex_bg, icon, offset = "+114+223")
+
+par(pty = "s",
+    xpd = NA,
+    bg = "black",
+    mar = c(5, 4, 4, 2) + 0.1)
+
+greta_hex <- sticker("logos/hex_bg.pdf",
+                     s_x = 1,
+                     s_y = 1,
+                     s_width = 1.05,
+                     package = "greta",
+                     p_y = 1.1,
+                     p_family = "Muli",
+                     p_size = 15,
+                     h_fill = greta_col("main"),
+                     h_color = greta_col("main"),
+                     filename = "logos/greta_hex.png")
+library(ggplot2)
+ggsave(greta_hex, width = 43.9, height = 50.8,
+       filename = "logos/greta_hex.png",
+       bg = "transparent",
+       units = "mm",
+       dpi = 600)
+
+file.remove("logos/hex_bg.pdf")
+file.remove("logos/hex_mask.pdf")
diff --git a/man/calculate.Rd b/man/calculate.Rd
new file mode 100644
index 00000000..44fba24b
--- /dev/null
+++ b/man/calculate.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/calculate.R
+\name{calculate}
+\alias{calculate}
+\title{calculate greta arrays given fixed values}
+\usage{
+calculate(target, values)
+}
+\arguments{
+\item{target}{a greta array for which to calculate the value}
+
+\item{values}{a named list giving temporary values of the greta arrays with
+which \code{target} is connected, or an \code{mcmc.list} object returned by
+\code{\link{mcmc}}.}
+}
+\value{
+A numeric R array with the same dimensions as \code{target}, giving
+  the values it would take conditioned on the fixed values given by
+  \code{values}.
+}
+\description{
+Calculate the values that greta arrays would take, given
+  temporary values for the greta arrays on which they depend, and return them
+  as numeric R arrays. This can be used to check the behaviour of your model
+  or make predictions to new data after model fitting.
+}
+\details{
+The greta arrays named in \code{values} need not be variables, they
+  can also be other operations or even data.
+
+  At present, if \code{values} is a named list it must contain values for
+  \emph{all} of the variable greta arrays with which \code{target} is
+  connected, even values are given for intermediate operations, or the target
+  doesn't depend on the variable. That may be relaxed in a future release.
+}
+\examples{
+\dontrun{
+
+# define a variable greta array, and another that is calculated from it
+# then calculate what value y would take for different values of x
+x <- normal(0, 1, dim = 3)
+a <- lognormal(0, 1)
+y <- sum(x ^ 2) + a
+calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2))
+
+
+# define a model
+alpha <- normal(0, 1)
+beta <- normal(0, 1)
+sigma <- lognormal(1, 0.1)
+mu <- alpha + iris$Petal.Length * beta
+distribution(iris$Petal.Width) <- normal(mu, sigma)
+m <- model(alpha, beta, sigma)
+
+# calculate intermediate greta arrays, given some parameter values
+calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5))
+calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5))
+
+
+# fit the model then calculate samples at a new greta array
+draws <- mcmc(m, n_samples = 500)
+petal_length_plot <- seq(min(iris$Petal.Length),
+                         max(iris$Petal.Length),
+                         length.out = 100)
+mu_plot <- alpha + petal_length_plot * beta
+mu_plot_draws <- calculate(mu_plot, draws)
+
+# plot the draws
+mu_est <- colMeans(mu_plot_draws[[1]])
+plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]]))
+apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8))
+lines(mu_est ~ petal_length_plot, lwd = 2)
+}
+
+
+}
diff --git a/man/distribution.Rd b/man/distribution.Rd
index c3dbd409..6553e745 100644
--- a/man/distribution.Rd
+++ b/man/distribution.Rd
@@ -33,12 +33,12 @@ The extract method returns the greta array if it has a distribution,
 
 # observed data and mean parameter to be estimated
 # (explicitly coerce data to a greta array so we can refer to it later)
-y = as_data(rnorm(5, 0, 3))
+y <- as_data(rnorm(5, 0, 3))
 
-mu = uniform(-3, 3)
+mu <- uniform(-3, 3)
 
 # define the distribution over y (the model likelihood)
-distribution(y) = normal(mu, 1)
+distribution(y) <- normal(mu, 1)
 
 # get the distribution over y
 distribution(y)
diff --git a/man/distributions.Rd b/man/distributions.Rd
index 12950c97..4b59b616 100644
--- a/man/distributions.Rd
+++ b/man/distributions.Rd
@@ -194,39 +194,39 @@ The discrete probability distributions (\code{bernoulli},
 \dontrun{
 
 # a uniform parameter constrained to be between 0 and 1
-phi = uniform(min = 0, max = 1)
+phi <- uniform(min = 0, max = 1)
 
 # a length-three variable, with each element following a standard normal
 # distribution
-alpha = normal(0, 1, dim = 3)
+alpha <- normal(0, 1, dim = 3)
 
 # a length-three variable of lognormals
-sigma = lognormal(0, 3, dim = 3)
+sigma <- lognormal(0, 3, dim = 3)
 
 # a hierarchical uniform, constrained between alpha and alpha + sigma,
-eta = alpha + uniform(0, 1, dim = 3) * sigma
+eta <- alpha + uniform(0, 1, dim = 3) * sigma
 
 # a hierarchical distribution
-mu = normal(0, 1)
-sigma = lognormal(0, 1)
-theta = normal(mu, sigma)
+mu <- normal(0, 1)
+sigma <- lognormal(0, 1)
+theta <- normal(mu, sigma)
 
 # a vector of 3 variables drawn from the same hierarchical distribution
-thetas = normal(mu, sigma, dim = 3)
+thetas <- normal(mu, sigma, dim = 3)
 
 # a matrix of 12 variables drawn from the same hierarchical distribution
-thetas = normal(mu, sigma, dim = c(3, 4))
+thetas <- normal(mu, sigma, dim = c(3, 4))
 
 # a multivariate normal variable, with correlation between two elements
 Sig <- diag(4)
 Sig[3, 4] <- Sig[4, 3] <- 0.6
-theta = multivariate_normal(rep(mu, 4), Sig)
+theta <- multivariate_normal(rep(mu, 4), Sig)
 
 # 10 independent replicates of that
-theta = multivariate_normal(rep(mu, 4), Sig, dim = 10)
+theta <- multivariate_normal(rep(mu, 4), Sig, dim = 10)
 
 # a Wishart variable with the same covariance parameter
-theta = wishart(df = 5, Sigma = Sig)
+theta <- wishart(df = 5, Sigma = Sig)
 
 }
 }
diff --git a/man/extract-replace-combine.Rd b/man/extract-replace-combine.Rd
index 2a2cfad3..e9662613 100644
--- a/man/extract-replace-combine.Rd
+++ b/man/extract-replace-combine.Rd
@@ -59,7 +59,7 @@ dim(x) <- value
 \examples{
 \dontrun{
 
- x = as_data(matrix(1:12, 3, 4))
+ x <- as_data(matrix(1:12, 3, 4))
 
  # extract/replace
  x[1:3, ]
diff --git a/man/functions.Rd b/man/functions.Rd
index f3b52cfc..a46af747 100644
--- a/man/functions.Rd
+++ b/man/functions.Rd
@@ -91,17 +91,17 @@ TensorFlow only enables rounding to integers, so \code{round()} will
 \examples{
 \dontrun{
 
-x = as_data(matrix(1:9, nrow = 3, ncol = 3))
-a = log(exp(x))
-b = log1p(expm1(x))
-c = sign(x - 5)
-d = abs(x - 5)
+x <- as_data(matrix(1:9, nrow = 3, ncol = 3))
+a <- log(exp(x))
+b <- log1p(expm1(x))
+c <- sign(x - 5)
+d <- abs(x - 5)
 
-e = diag(x)
+e <- diag(x)
 diag(x) <- e + 1
 
-z = t(a)
+z <- t(a)
 
-y = sweep(x, 1, e, '-')
+y <- sweep(x, 1, e, '-')
 }
 }
diff --git a/man/greta.Rd b/man/greta.Rd
index ff618800..9b14d1d4 100644
--- a/man/greta.Rd
+++ b/man/greta.Rd
@@ -25,13 +25,13 @@ greta lets you write statistical models interactively in native
 # a simple Bayesian regression model for the iris data
 
 # priors
-int = normal(0, 5)
-coef = normal(0, 3)
-sd = lognormal(0, 3)
+int <- normal(0, 5)
+coef <- normal(0, 3)
+sd <- lognormal(0, 3)
 
 # likelihood
 mean <- int + coef * iris$Petal.Length
-distribution(iris$Sepal.Length) = normal(mean, sd)
+distribution(iris$Sepal.Length) <- normal(mean, sd)
 
 # build and sample
 m <- model(int, coef, sd)
diff --git a/man/inference.Rd b/man/inference.Rd
index 48437a17..4bbc8b46 100644
--- a/man/inference.Rd
+++ b/man/inference.Rd
@@ -10,7 +10,7 @@
 stashed_samples()
 
 mcmc(model, method = c("hmc"), n_samples = 1000, thin = 1, warmup = 100,
-  verbose = TRUE, pb_update = 10, control = list(),
+  chains = 1, verbose = TRUE, pb_update = 10, control = list(),
   initial_values = NULL)
 
 opt(model, method = c("adagrad"), max_iterations = 100, tolerance = 1e-06,
@@ -19,19 +19,21 @@ opt(model, method = c("adagrad"), max_iterations = 100, tolerance = 1e-06,
 \arguments{
 \item{model}{greta_model object}
 
-\item{method}{the method used to sample or optimise values. Currently only
+\item{method}{method used to sample or optimise values. Currently only
 one method is available for each procedure: \code{hmc} and \code{adagrad}}
 
-\item{n_samples}{the number of MCMC samples to draw (after any warm-up, but
+\item{n_samples}{number of MCMC samples to draw (after any warm-up, but
 before thinning)}
 
-\item{thin}{the MCMC thinning rate; every \code{thin} samples is retained,
+\item{thin}{MCMC thinning rate; every \code{thin} samples is retained,
 the rest are discarded}
 
-\item{warmup}{the number of samples to spend warming up the mcmc sampler.
+\item{warmup}{number of samples to spend warming up the mcmc sampler.
 During this phase the sampler moves toward the highest density area and
 tunes sampler hyperparameters.}
 
+\item{chains}{number of MCMC chains to run}
+
 \item{verbose}{whether to print progress information to the console}
 
 \item{pb_update}{how regularly to update the progress bar (in iterations)}
@@ -39,18 +41,18 @@ tunes sampler hyperparameters.}
 \item{control}{an optional named list of hyperparameters and options to
 control behaviour of the sampler or optimiser. See Details.}
 
-\item{initial_values}{an optional named vector of initial values for the free
-parameters in the model. These will be used as the starting point for
-sampling/optimisation}
+\item{initial_values}{an optional vector (or list of vectors, for multiple
+chains) of initial values for the free parameters in the model. These will
+be used as the starting point for sampling/optimisation.}
 
 \item{max_iterations}{the maximum number of iterations before giving up}
 
 \item{tolerance}{the numerical tolerance for the solution, the optimiser stops when the (absolute) difference in the joint density between successive iterations drops below this level}
 }
 \value{
-\code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object that can be analysed using
-  functions from the coda package. This will contain mcmc samples of the
-  greta arrays used to create \code{model}.
+\code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object
+  that can be analysed using functions from the coda package. This will
+  contain mcmc samples of the greta arrays used to create \code{model}.
 
 \code{opt} - a list containing the following named elements:
   \itemize{
@@ -90,17 +92,19 @@ For \code{mcmc()} if \code{verbose = TRUE}, the progress bar shows
   (via \code{model}) to have double precision, though this will slow down
   sampling.
 
-  Currently, the only implemented MCMC procedure is static Hamiltonian
-  Monte Carlo (\code{method = "hmc"}). During the warmup iterations, the
-  leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the
-  sampler efficiency. The \code{control} argument can be used to specify the
-  initial value for epsilon, along with two other hyperparameters: \code{Lmin}
-  and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the
-  upper and lower limits to the number of leapfrog steps per iteration (from
-  which the number is selected uniformly at random).
+  Currently, the only implemented MCMC procedure is static Hamiltonian Monte
+  Carlo (\code{method = "hmc"}). During the warmup iterations, the leapfrog
+  stepsize hyperparameter \code{epsilon} is tuned to maximise the sampler
+  efficiency, and the posterior marginal standard deviations are estimated
+  \code{diag_sd}. The \code{control} argument can be used to specify the
+  initial value for \code{epsilon}, \code{diag_sd}, and two other
+  hyperparameters: \code{Lmin} and \code{Lmax}; positive integers (with
+  \code{Lmax > Lmin}) giving the upper and lower limits to the number of
+  leapfrog steps per iteration (from which the number is selected uniformly
+  at random).
 
   The default control options for HMC are:
-  \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)}
+  \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005, diag_sd = 1)}
 
 Currently, the only implemented optimisation algorithm is Adagrad
   (\code{method = "adagrad"}). The \code{control} argument can be used to
@@ -114,10 +118,10 @@ Currently, the only implemented optimisation algorithm is Adagrad
 \examples{
 \dontrun{
 # define a simple model
-mu = variable()
-sigma = lognormal(1, 0.1)
-x = rnorm(10)
-distribution(x) = normal(mu, sigma)
+mu <- variable()
+sigma <- lognormal(1, 0.1)
+x <- rnorm(10)
+distribution(x) <- normal(mu, sigma)
 m <- model(mu, sigma)
 
 # carry out mcmc on the model
diff --git a/man/model.Rd b/man/model.Rd
index 145b82ef..471ccd10 100644
--- a/man/model.Rd
+++ b/man/model.Rd
@@ -64,18 +64,16 @@ The plot method produces a visual representation of the defined
   \if{latex}{\figure{plotlegend.pdf}{options: width=7cm}}
 }
 \examples{
-
 \dontrun{
 
 # define a simple model
-mu = variable()
-sigma = lognormal(1, 0.1)
-x = rnorm(10)
-distribution(x) = normal(mu, sigma)
+mu <- variable()
+sigma <- lognormal(1, 0.1)
+x <- rnorm(10)
+distribution(x) <- normal(mu, sigma)
 
 m <- model(mu, sigma)
 
 plot(m)
 }
-
 }
diff --git a/man/operators.Rd b/man/operators.Rd
index 3f6e3555..e9f730c0 100644
--- a/man/operators.Rd
+++ b/man/operators.Rd
@@ -45,22 +45,22 @@ greta's operators are used just like R's the standard arithmetic,
 \examples{
 \dontrun{
 
-x = as_data(-1:12)
+x <- as_data(-1:12)
 
 # arithmetic
-a = x + 1
-b = 2 * x + 3
-c = x \%\% 2
-d = x \%/\% 5
+a <- x + 1
+b <- 2 * x + 3
+c <- x \%\% 2
+d <- x \%/\% 5
 
 # logical
-e = (x > 1) | (x < 1)
-f = e & (x < 2)
-g = !f
+e <- (x > 1) | (x < 1)
+f <- e & (x < 2)
+g <- !f
 
 # relational
-h = x < 1
-i = (-x) >= x
-j = h == x
+h <- x < 1
+i <- (-x) >= x
+j <- h == x
 }
 }
diff --git a/man/transforms.Rd b/man/transforms.Rd
index 84c222e2..111ed2ea 100644
--- a/man/transforms.Rd
+++ b/man/transforms.Rd
@@ -8,6 +8,7 @@
 \alias{icloglog}
 \alias{icauchit}
 \alias{log1pe}
+\alias{imultilogit}
 \title{transformation functions for greta arrays}
 \usage{
 iprobit(x)
@@ -19,6 +20,8 @@ icloglog(x)
 icauchit(x)
 
 log1pe(x)
+
+imultilogit(x)
 }
 \arguments{
 \item{x}{a real-valued (i.e. values ranging from -Inf to Inf) greta array to
@@ -34,26 +37,36 @@ greta does not allow you to state the transformation/link on the
   modelling languages. That's because the same syntax has a very different
   meaning in R, and can only be applied to objects that are already in
   existence. The inverse forms of the common link functions (prefixed with an
-  'i') are therefore more likely to useful in modelling, and these are what
-  are provided here.
+  'i') can be used instead.
+
+  The \code{log1pe} inverse link function is equivalent to \code{log(1 +
+  exp(x))}, yielding a positive transformed parameter. Unlike the log
+  transformation, this transformation is approximately linear for x > 1. i.e.
+  when \eqn{x > 1}, \eqn{y \approx x}
 
-  The \code{log1pe} inverse link function is equivalent to \code{log(1 + exp(x))},
-  yielding a positive transformed parameter. Unlike the log transformation,
-  this transformation is approximately linear for x > 1. i.e. when \eqn{x >
-  1}, \eqn{y \approx x}
+  \code{imultilogit} expects an n-by-m greta array, and returns an n-by-(m+1)
+  greta array of positive reals whose rows sum to one. This is equivalent
+  adding a final column of 0s and then running the softmax function widely
+  used in machine learning.
 }
 \examples{
 \dontrun{
 
- x = normal(1, 3, dim = 10)
+ x1 <- normal(1, 3, dim = 10)
 
  # transformation to the unit interval
- p1 <- iprobit(x)
- p2 <- ilogit(x)
- p3 <- icloglog(x)
- p4 <- icauchit(x)
+ p1 <- iprobit(x1)
+ p2 <- ilogit(x1)
+ p3 <- icloglog(x1)
+ p4 <- icauchit(x1)
 
  # and to positive reals
- y <- log1pe(x)
+ y <- log1pe(x1)
+
+ # transform from 10x3 to 10x4, where rows are a complete set of
+ # probabilities
+ x2 <- normal(1, 3, dim = c(10, 3))
+ z <- imultilogit(x2)
+
 }
 }
diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R
index 91456f7b..b6c0ed7b 100644
--- a/tests/testthat/helpers.R
+++ b/tests/testthat/helpers.R
@@ -1,10 +1,15 @@
 # test functions
 
-library(tensorflow)
+library (tensorflow)
 
-# set the seed before running tests
+# set the seed and flush the graph before running tests
+tf <- tensorflow::tf
+tf$reset_default_graph()
 set.seed(2017-05-01)
 
+quietly <- function (expr)
+  invisible(capture.output(expr))
+
 expect_ok <- function (expr)
   expect_error(expr, NA)
 
@@ -65,8 +70,6 @@ compare_distribution <- function (greta_fun, r_fun, parameters, x) {
 
   # define greta distribution, with fixed values
 
-  tf$reset_default_graph()
-
   parameters_greta <- parameters
   # no dim for wishart
   if (!identical(names(parameters), c('df', 'Sigma')))
@@ -116,8 +119,6 @@ randu <- function (...) {
 # e.g. check_op(sum, randn(100, 3))
 check_op <- function (op, a, b, greta_op = NULL) {
 
-  tf$reset_default_graph()
-
   if (is.null(greta_op))
     greta_op <- op
 
@@ -168,8 +169,6 @@ with_greta <- function (call, swap = c('x'), swap_scope = 1) {
 # e.g. check_expr(a[1:3], swap = 'a')
 check_expr <- function (expr, swap = c('x')) {
 
-  tf$reset_default_graph()
-
   call <- substitute(expr)
 
   r_out <- eval(expr)
@@ -241,8 +240,6 @@ compare_truncated_distribution <- function (greta_fun,
   # array. 'r_fun' is an r function returning the log density for the same
   # truncated distribution, taking x as its only argument.
 
-  tf$reset_default_graph()
-
   require (truncdist)
 
   x <- do.call(truncdist::rtrunc,
@@ -332,8 +329,7 @@ mock_mcmc <- function (n_samples = 1010) {
 
 dinvgamma <- extraDistr::dinvgamma
 qinvgamma <- extraDistr::qinvgamma
-pinvgamma <- function (q, alpha, beta)
-  ifelse(q < 0, 0, extraDistr::pinvgamma(q, alpha, beta))
+pinvgamma <- extraDistr::pinvgamma
 
 dlaplace <- extraDistr::dlaplace
 plaplace <- extraDistr::plaplace
diff --git a/tests/testthat/test_calculate.R b/tests/testthat/test_calculate.R
new file mode 100644
index 00000000..85001f5b
--- /dev/null
+++ b/tests/testthat/test_calculate.R
@@ -0,0 +1,122 @@
+context('calculate')
+
+test_that('calculate works with correct lists', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  # unknown variable
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+  y_value <- calculate(y, list(a = 3))
+  expect_equal(y_value, matrix(c(3, 6)))
+
+  # unknown variable and new data
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+  y_value <- calculate(y, list(a = 6, x = c(2, 1)))
+  expect_equal(y_value, matrix(c(12, 6)))
+
+})
+
+test_that('calculate works with mcmc.list objects', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  samples <- 10
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+  m <- model(y)
+  draws <- mcmc(m, warmup = 0, n_samples = samples, verbose = FALSE)
+
+  # with an existing greta array
+  y_values <- calculate(y, draws)
+  # correct class
+  expect_s3_class(y_values, "mcmc.list")
+  # correct dimensions
+  expect_equal(dim(y_values[[1]]), c(10, 2))
+  # all valid values
+  expect_true(all(is.finite(as.vector(y_values[[1]]))))
+
+  # with a new greta array, based on a different element in the model
+  new_values <- calculate(a ^ 2, draws)
+  # correct class
+  expect_s3_class(new_values, "mcmc.list")
+  # correct dimensions
+  expect_equal(dim(new_values[[1]]), c(10, 1))
+  # all valid values
+  expect_true(all(is.finite(as.vector(new_values[[1]]))))
+
+})
+
+test_that('calculate errors nicely if mcmc.list objects aren missing crucial info', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  samples <- 10
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+  m <- model(y)
+  draws <- mcmc(m, warmup = 0, n_samples = samples, verbose = FALSE)
+
+  # scrub the model info
+  attr(draws, "model_info") <- NULL
+
+  # it should error nicely
+  expect_error(calculate(y, draws),
+               "perhaps it wasn't created with greta")
+
+})
+
+test_that('calculate errors nicely if not all required values are passed', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+
+  # it should error nicely
+  expect_error(calculate(y, list(x = c(2, 1))),
+               "values have not been provided for all variables")
+
+})
+
+test_that('calculate errors nicely if required values have incorrect dimensions', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+
+  # it should error nicely
+  expect_error(calculate(y, list(a = c(1, 1))),
+               "different number of elements than the greta array")
+
+})
+
+test_that('calculate errors nicely if not used on a greta array', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  x <- as_data(c(1, 2))
+  a <- normal(0, 1)
+  y <- a * x
+  z <- 1:5
+
+
+  # it should error nicely
+  expect_error(calculate(z, list(a = c(1, 1))),
+               "target' is not a greta array")
+
+})
diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R
index f41e0a82..6e95f2e7 100644
--- a/tests/testthat/test_inference.R
+++ b/tests/testthat/test_inference.R
@@ -47,7 +47,7 @@ test_that('opt accepts initial values', {
 
 })
 
-test_that('rejected mcmc proposals', {
+test_that('bad mcmc proposals are rejected', {
 
   skip_if_not(check_tf_version())
   source('helpers.R')
@@ -75,7 +75,13 @@ test_that('rejected mcmc proposals', {
   distribution(x) = normal(z, 1e-12)
   m <- model(z)
   expect_error(mcmc(m, n_samples = 1, warmup = 0),
-               'Could not find reasonable starting values after 10 attempts')
+               'Could not find reasonable starting values after 20 attempts')
+
+  # proposals that are fine, but rejected anyway (long chain required)
+  z = normal(0, 1)
+  m <- model(z)
+  expect_ok(mcmc(m, n_samples = 5, warmup = 0, verbose = FALSE,
+                 control = list(epsilon = 100, Lmin = 1, Lmax = 1)))
 
 })
 
@@ -88,7 +94,51 @@ test_that('mcmc works with verbosity and warmup', {
   z = normal(0, 1)
   distribution(x) = normal(z, 1)
   m <- model(z)
-  expect_ok( mcmc(m, n_samples = 50, warmup = 50, verbose = TRUE) )
+  quietly(expect_ok( mcmc(m, n_samples = 50, warmup = 50, verbose = TRUE) ))
+
+})
+
+test_that('mcmc works with multiple chains', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  x <- rnorm(10)
+  z = normal(0, 1)
+  distribution(x) = normal(z, 1)
+  m <- model(z)
+
+  # multiple chains, automatic initial values
+  quietly(expect_ok( mcmc(m, warmup = 10, n_samples = 10, chains = 2) ))
+
+  # multiple chains, user-specified initial values
+  quietly(expect_ok( mcmc(m, warmup = 10, n_samples = 10, chains = 2, initial_values = list(1, 2)) ))
+
+})
+
+test_that('mcmc handles initial values nicely', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  x <- rnorm(10)
+  z = normal(0, 1)
+  distribution(x) = normal(z, 1)
+  m <- model(z)
+
+  # too many sets of initial values
+  expect_error( mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE,
+                     chains = 2, initial_values = list(1)),
+                "sets of initial values were provided, but there are")
+
+  # initial values have the wrong length
+  expect_error( mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE,
+                     chains = 2, initial_values = list(1:2, 2:3)),
+                "each set of initial values must be a vector of length")
+
+  quietly(expect_message(mcmc(m, warmup = 10, n_samples = 10,
+                      chains = 2, initial_values = 1),
+                 "only one set of was initial values given, and was used for all chains"))
 
 })
 
@@ -140,10 +190,31 @@ test_that('stashed_samples works', {
 
   # mock up a stash
   stash <- greta:::greta_stash
-  assign('trace_stash', as.matrix(rnorm(17)), envir = stash)
+  trace_stash <- list(trace = as.matrix(rnorm(17)),
+                      raw = as.matrix(rnorm(17)))
+  assign('trace_stash', trace_stash, envir = stash)
 
   # should convert to an mcmc.list
   ans <- stashed_samples()
   expect_s3_class(ans, 'mcmc.list')
 
+  # model_info attribute should have raw draws and the model
+  model_info <- attr(ans, "model_info")
+  expect_true(inherits(model_info, "environment"))
+  expect_s3_class(model_info$raw_draws, 'mcmc.list')
+  expect_true(inherits(model_info$model, "greta_model"))
+
+})
+
+
+test_that('model errors nicely', {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  # model should give a nice error if passed something other than a greta array
+  a <- 1
+  b <- normal(0, a)
+  expect_error(model(a, b),
+               "^The following object")
 })
diff --git a/tests/testthat/test_operators.R b/tests/testthat/test_operators.R
index 42e88e26..e7c9cdb3 100644
--- a/tests/testthat/test_operators.R
+++ b/tests/testthat/test_operators.R
@@ -87,13 +87,19 @@ test_that('random strings of operators work as expected', {
 
 })
 
-test_that('%*% errors informatively', {
+test_that("%*% errors informatively", {
 
   skip_if_not(check_tf_version())
+  source('helpers.R')
+
   a <- ones(3, 4)
   b <- ones(1, 4)
+  c <- ones(2, 2, 2)
 
   expect_error(a %*% b,
-               'incompatible dimensions: 3x4 vs 1x4')
+               "incompatible dimensions: 3x4 vs 1x4")
+
+  expect_error(a %*% c,
+               "only two-dimensional greta arrays can be matrix-multiplied")
 
 })
diff --git a/tests/testthat/test_transformations.R b/tests/testthat/test_transforms.R
similarity index 54%
rename from tests/testthat/test_transformations.R
rename to tests/testthat/test_transforms.R
index 14026e88..2916d960 100644
--- a/tests/testthat/test_transformations.R
+++ b/tests/testthat/test_transforms.R
@@ -7,17 +7,38 @@ test_that('transformations work as expected', {
 
   a <- randn(25, 4)
 
+
   r_icloglog <- function (x)
     1 - exp(-1 * exp(x))
 
   r_log1pe <- function (x)
     log1p(exp(x))
 
+  r_imultilogit <- function (x) {
+    x <- cbind(x, 0)
+    x <- exp(x)
+    x <- sweep(x, 1, rowSums(x), "/")
+    x
+  }
+
   check_op(pnorm, a, greta_op = iprobit)
   check_op(plogis, a, greta_op = ilogit)
   check_op(r_icloglog, a, greta_op = icloglog)
   check_op(pcauchy, a, greta_op = icauchit)
   check_op(r_log1pe, a, greta_op = log1pe)
+  check_op(r_imultilogit, a, greta_op = imultilogit)
+
+})
+
+test_that("imultilogit errors informatively", {
+
+  skip_if_not(check_tf_version())
+  source('helpers.R')
+
+  x <- ones(3, 4, 3)
+
+  expect_error(imultilogit(x),
+               "imultilogit expects a 2D greta array")
 
 })
 
diff --git a/vignettes/examples/air.Rmd b/vignettes/examples/air.Rmd
index 40e80446..b0acc6a8 100644
--- a/vignettes/examples/air.Rmd
+++ b/vignettes/examples/air.Rmd
@@ -21,11 +21,11 @@ J <- 3
 
 #### greta code
 ```{r air_greta}
-theta = normal(0, 32, dim = 2)
+theta <- normal(0, 32, dim = 2)
 mu <- alpha + beta * Z
-X = normal(mu, sigma)
+X <- normal(mu, sigma)
 p <- ilogit(theta[1] + theta[2] * X)
-distribution(y) = binomial(n, p)
+distribution(y) <- binomial(n, p)
 ```
 
 #### BUGS/JAGS code
diff --git a/vignettes/examples/beetles.Rmd b/vignettes/examples/beetles.Rmd
index be375481..d4f30ec2 100644
--- a/vignettes/examples/beetles.Rmd
+++ b/vignettes/examples/beetles.Rmd
@@ -17,10 +17,10 @@ N <- 8
 
 #### greta code
 ```{r beetles_greta}
-alpha_star = normal(0, 32)
-beta = normal(0, 32)
+alpha_star <- normal(0, 32)
+beta <- normal(0, 32)
 p <- ilogit(alpha_star + beta * (x - mean(x)))
-distribution(r) = binomial(n, p)
+distribution(r) <- binomial(n, p)
 
 alpha <- alpha_star - beta * mean(x)
 rhat <- p * n
diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd
index 2851f9a8..f1ebc962 100644
--- a/vignettes/get_started.Rmd
+++ b/vignettes/get_started.Rmd
@@ -15,7 +15,7 @@ vignette: >
   %\usepackage[utf8]{inputenc}
 ---
 
-```{r setup, include=FALSE}
+```{r setup, include = FALSE}
 
 knitr::opts_chunk$set(echo = TRUE,
                       eval = greta:::check_tf_version("message"),
@@ -128,15 +128,15 @@ x <- as_data(iris$Petal.Length)
 y <- as_data(iris$Sepal.Length)
 
 # variables and priors
-int = normal(0, 1)
-coef = normal(0, 3)
-sd = student(3, 0, 1, truncation = c(0, Inf))
+int <- normal(0, 1)
+coef <- normal(0, 3)
+sd <- student(3, 0, 1, truncation = c(0, Inf))
 
 # operations
 mean <- int + coef * x
 
 # likelihood
-distribution(y) = normal(mean, sd)
+distribution(y) <- normal(mean, sd)
 
 # defining the model
 m <- model(int, coef, sd)
@@ -148,8 +148,6 @@ plot(m)
 draws <- mcmc(m, n_samples = 1000)
 ```
 
-Throughout this example, and the rest of greta's documentation, `<-` is used when assigning *deterministic* values, and `=` when assigning variable or *stochastic* values. This makes the model code a little bit clearer to read, since `=` corresponds to the `~` symbol used to define stochastic relationships in BUGS and Stan. However, it doesn't actually make any difference to the model which assignment operator you use.
-
 <hr>
 
 ### Data
@@ -208,19 +206,19 @@ greta_array(0:1, dim = c(3, 3))
 
 The second section of the script creates three greta arrays to represent the parameters in our model:
 ```{r variables}
-int = normal(0, 1)
-coef = normal(0, 3)
-sd = student(3, 0, 1, truncation = c(0, Inf))
+int <- normal(0, 1)
+coef <- normal(0, 3)
+sd <- student(3, 0, 1, truncation = c(0, Inf))
 ```
 
-Each of these is a variable greta array, and each is assumed *a priori* (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.
+Each of these is a variable greta array, and each is assumed *a priori* (before fitting to data) to follow a different probability distribution. In other words, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.
 
 #### variables without probability distributions
 
 If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the `variable()` function. E.g. in a frequentist version of the model we could create `int` with:
 
 ```{r int_variable}
-(int = variable())
+(int <- variable())
 ```
 
 `variable()` has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of `lower = -Inf, upper = Inf` meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).
@@ -228,7 +226,7 @@ If we were carrying out a frequentist analysis of this model, we could create va
 We can create a variable constrained between two values by specifying `lower` and `upper`. So we could have created the positive variable `sd` (the standard deviation of the likelihood) with:
 
 ```{r positive_variable}
-(sd = variable(lower = 0))
+(sd <- variable(lower = 0))
 ```
 
 If we had instead wanted a 2x3 array of positive variables we could have created it like this:
@@ -247,7 +245,7 @@ Both `int` and `coef` were given zero-mean normal distributions, which are a com
 Some of greta's probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the `truncation` argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do:
 
 ```{r truncated1}
-(z = normal(0, 1, truncation = c(-1, 1)))
+(z <- normal(0, 1, truncation = c(-1, 1)))
 ```
 greta will account for this truncation when calculating the density of this distribution; rescaling it to be a valid probability distribution. We can only truncate to within the support of the distribution; e.g. greta will throw an error if we try to truncate a lognormal distribution (which must be positive) to have a lower bound of -1. 
 
@@ -294,7 +292,7 @@ dim(z[, 1])
 dim(z[, 1, drop = FALSE])
 ```
 
-greta arrays must always have two dimensions, so greta always acts as though `drop=FALSE`: 
+greta arrays must always have two dimensions, so greta always acts as though `drop = FALSE`: 
 
 ```{r drop_greta}
 z_greta <- as_data(z)
@@ -320,10 +318,10 @@ So far, we have created greta arrays representing the variables in our model (wi
 
 By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we're trying to work out the parameters of that distribution. In greta we do that with the `distribution()` assignment function:
 ```{r likelihood}
-distribution(y) = normal(mean, sd)
+distribution(y) <- normal(mean, sd)
 ```
 
-With the syntax `distribution(<lhs>) = <rhs>`, we are stating that the data greta array on the left `<lhs>` has the same distribution as the greta array on the right `<rhs>`. In this case, we're temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays `mean` and `sd`), but then stating that values of that distribution have been observed (`y`). In this case both `<lhs>` (`y`) and `<rhs>` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters.
+With the syntax `distribution(<lhs>) <- <rhs>`, we are stating that the data greta array on the left `<lhs>` has the same distribution as the greta array on the right `<rhs>`. In this case, we're temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays `mean` and `sd`), but then stating that values of that distribution have been observed (`y`). In this case both `<lhs>` (`y`) and `<rhs>` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters.
 
 <hr>
 
@@ -334,11 +332,11 @@ Now all of the greta arrays making up the model have been created, we need to co
 ```{r hidden_model, echo = FALSE}
 x <- as_data(iris$Petal.Length)
 y <- as_data(iris$Sepal.Length)
-int = normal(0, 1)
-coef = normal(0, 3)
-sd = student(3, 0, 1, truncation = c(0, Inf))
+int <- normal(0, 1)
+coef <- normal(0, 3)
+sd <- student(3, 0, 1, truncation = c(0, Inf))
 mean <- int + coef * x
-distribution(y) = normal(mean, sd)
+distribution(y) <- normal(mean, sd)
 ```
 ```{r define_model}
 m <- model(int, coef, sd)
@@ -354,7 +352,7 @@ greta provides a plot function for greta models to help you visualise and check
 ```{r plot, eval = FALSE}
 plot(m)
 ```
-```{r plot_hidden, echo = FALSE, results='hide'}
+```{r plot_hidden, echo = FALSE, results = "hide"}
 gr <- plot(m)
 fname <- "figures/full_graph.png"
 DiagrammeR::export_graph(gr,
@@ -374,10 +372,10 @@ Here's a legend for the plot (it's in the [`?greta::model`](https://greta-dev.gi
 
 The fourth type of node (diamonds) represents probability distributions. These have greta arrays as parameters (linked via solid lines), and have other greta arrays as *values*(linked via dashed lines). Distributions calculate the probability density of the *values*, given the parameters and their distribution type.
 
-For example, a plot of just the prior distribution over `coef` (defined as `coef = normal(0, 3)`) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution's value, the variable `coef`:
+For example, a plot of just the prior distribution over `coef` (defined as `coef <- normal(0, 3)`) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution's value, the variable `coef`:
 
-```{r plot_coef, echo = FALSE, results='hide'}
-coef = normal(0, 3)
+```{r plot_coef, echo = FALSE, results = "hide"}
+coef <- normal(0, 3)
 m_coef <- model(coef)
 gr <- plot(m_coef)
 fname <- "figures/coef_graph.png"
@@ -391,11 +389,11 @@ DiagrammeR::export_graph(gr,
 
 It's the same for the model likelihood, but this time the distribution's parameters are a variable (`sd`) and the result of an operation (`mean`), and the distribution's value is given by data (the observed sepal lengths `y`):
 
-```{r plot_likelihood, echo = FALSE, results='hide'}
-sd = variable()
+```{r plot_likelihood, echo = FALSE, results = "hide"}
+sd <- variable()
 y <- as_data(iris$Sepal.Length)
 mean <- ones(150)
-distribution(y) = normal(mean, sd)
+distribution(y) <- normal(mean, sd)
 m_likelihood <- model(sd)
 gr <- plot(m_likelihood)
 idx <- which(gr$nodes_df$label == 'mean\n')
@@ -422,7 +420,7 @@ When defining the model, greta combines all of the distributions together to def
 
 Now we have a greta model that will give us the joint density for a candidate set of values, so we can use that to carry out inference on the model. We do that using an Markov chain Monte Carlo (MCMC) algorithm to sample values of the parameters we're interested in, using the `mcmc()` function:
 
-```{r mcmc, message=FALSE, results='hide', progress = FALSE}
+```{r mcmc, message = FALSE, results = "hide", progress = FALSE}
 draws <- mcmc(m, n_samples = 1000)
 ```
 
@@ -441,7 +439,7 @@ mcmc_trace(draws)
 mcmc_intervals(draws)
 ```
 
-```{r mcmcvis, echo = FALSE, message = FALSE, out.width=c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'}
+```{r mcmcvis, echo = FALSE, message = FALSE, out.width = c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'}
 library (bayesplot)
 mcmc_trace(draws, facet_args = list(nrow = 3, ncol = 1))
 mcmc_intervals(draws)
diff --git a/vignettes/technical_details.Rmd b/vignettes/technical_details.Rmd
index 778fb1f2..20ff451c 100644
--- a/vignettes/technical_details.Rmd
+++ b/vignettes/technical_details.Rmd
@@ -97,7 +97,7 @@ z$node$tf
 Hamiltonian Monte Carlo (HMC) requires all of the parameters to be transformed to a continuous scale for sampling. Variable nodes are therefore converted to tensors by first defining a 'free' (unconstrained) version of themselves as a tensor, and then applying a transformation function to convert them back to the scale of interest.
 
 ```{r free_state}
-a = variable(lower = 0)
+a <- variable(lower = 0)
 class(a$node)
 a$node$tf_from_free
 ```
@@ -107,7 +107,7 @@ a$node$tf_from_free
 distribution nodes are node objects just like the others, but they are not *directly* associated with greta arrays. Instead, greta arrays may have a distribution node in their `distribution` slot to indicate that their values are assumed to follow that distribution. The distribution node will also be listed as a parent node, and likewise the 'target node' will be listed as a child of the distribution. Distribution nodes also have child nodes (data, variables or operations) representing their parameters.
 
 ```{r distributions1}
-b = normal(0, 1)
+b <- normal(0, 1)
 class(b$node)
 class(b$node$distribution)
 # a is the target of its own distribution