greta-dev · goldingn · Feb 18, 2018 · Sep 9, 2017 · Sep 13, 2017 · Sep 13, 2017
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -40,6 +40,7 @@ Collate:
     'inference.R'
     'install_tensorflow.R'
     'internals.R'
+    'calculate.R'
 Imports:
     R6,
     tensorflow,

diff --git a/NAMESPACE b/NAMESPACE
@@ -98,6 +98,7 @@ export(bernoulli)
 export(beta)
 export(beta_binomial)
 export(binomial)
+export(calculate)
 export(categorical)
 export(cauchy)
 export(chi_squared)

diff --git a/R/calculate.R b/R/calculate.R
@@ -0,0 +1,219 @@
+# calculating values outside inference
+
+#' @name calculate
+#' @title calculate greta arrays given fixed values
+#' @description Calculate the values that greta arrays would take, given
+#'   temporary values for the greta arrays on which they depend, and return them
+#'   as numeric R arrays. This can be used to check the behaviour of your model
+#'   or make predictions to new data after model fitting.
+#'
+#' @param target a greta array for which to calculate the value
+#' @param values a named list giving temporary values of the greta arrays with
+#'   which \code{target} is connected, or an \code{mcmc.list} object returned by
+#'   \code{\link{mcmc}}.
+#'
+#' @return A numeric R array with the same dimensions as \code{target}, giving
+#'   the values it would take conditioned on the fixed values given by
+#'   \code{values}.
+#'
+#' @details The greta arrays named in \code{values} need not be variables, they
+#'   can also be other operations or even data.
+#'
+#'   At present, if \code{values} is a named list it must contain values for
+#'   \emph{all} of the variable greta arrays with which \code{target} is
+#'   connected, even values are given for intermediate operations, or the target
+#'   doesn't depend on the variable. That may be relaxed in a future release.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' # define a variable greta array, and another that is calculated from it
+#' # then calculate what value y would take for different values of x
+#' x = normal(0, 1, dim = 3)
+#' a = lognormal(0, 1)
+#' y <- sum(x ^ 2) + a
+#' calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2))
+#'
+#'
+#' # define a model
+#' alpha = normal(0, 1)
+#' beta = normal(0, 1)
+#' sigma = lognormal(1, 0.1)
+#' mu <- alpha + iris$Petal.Length * beta
+#' distribution(iris$Petal.Width) = normal(mu, sigma)
+#' m <- model(alpha, beta, sigma)
+#'
+#' # calculate intermediate greta arrays, given some parameter values
+#' calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5))
+#' calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5))
+#'
+#'
+#' # fit the model then calculate samples at a new greta array
+#' draws <- mcmc(m, n_samples = 500)
+#' petal_length_plot <- seq(min(iris$Petal.Length),
+#'                          max(iris$Petal.Length),
+#'                          length.out = 100)
+#' mu_plot <- alpha + petal_length_plot * beta
+#' mu_plot_draws <- calculate(mu_plot, draws)
+#'
+#' # plot the draws
+#' mu_est <- colMeans(mu_plot_draws[[1]])
+#' plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]]))
+#' apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8))
+#' lines(mu_est ~ petal_length_plot, lwd = 2)
+#' }
+#'
+#'
+calculate <- function (target, values) {
+
+  target_name <- deparse(substitute(target))
+
+  if (!inherits(target, "greta_array"))
+    stop ("'target' is not a greta array")
+
+  if (inherits(values, "mcmc.list"))
+    calculate_mcmc.list(target, target_name, values)
+  else
+    calculate_list(target, values)
+
+}
+
+calculate_mcmc.list <- function (target, target_name, values) {
+
+  model_info <- attr(values, "model_info")
+
+  if (is.null(model_info)) {
+    stop ("value is an mcmc.list object, but is not associated with any ",
+          "model information, perhaps it wasn't created with ",
+          "greta::mcmc() ?",
+          call. = FALSE)
+  }
+
+  # copy and refresh the dag
+  dag <- model_info$model$dag$clone()
+  dag$tf_environment <- new.env()
+  dag$tf_graph <- tf$Graph()
+
+  # extend the dag to include this node, as the target
+  dag$build_dag(list(target))
+  self <- dag  # mock for scoping
+  dag$define_tf(log_density = TRUE, gradients = TRUE)
+
+  dag$target_nodes <- list(target$node)
+  names(dag$target_nodes) <- target_name
+
+  example_values <- dag$trace_values()
+  n_trace <- length(example_values)
+
+  # raw draws are either an attribute, or this object
+  model_info <- attr(values, "model_info")
+  draws <- model_info$raw_draws
+
+  # trace the target for each draw in each chain
+  trace <- list()
+  for (i in seq_along(draws)) {
+
+    samples <- apply(draws[[i]],
+                     1,
+                     function (x) {
+                       dag$send_parameters(x)
+                       dag$trace_values()
+                     })
+
+    samples <- as.matrix(samples)
+
+    if (ncol(samples) != n_trace)
+      samples <- t(samples)
+
+    colnames(samples) <- names(example_values)
+
+    trace[[i]] <- coda::mcmc(samples)
+
+  }
+
+  trace <- coda::mcmc.list(trace)
+  attr(trace, "model_info") <- model_info
+  # return this
+  return (trace)
+
+}
+
+calculate_list <- function(target, values) {
+
+  # get the values and their names
+  names <- names(values)
+  stopifnot(length(names) == length(values))
+
+  # get the corresponding greta arrays
+  fixed_greta_arrays <- lapply(names,
+                               get,
+                               envir = parent.frame(n = 2))
+
+  # make sure that's what they are
+  are_greta_arrays <- vapply(fixed_greta_arrays,
+                             inherits,
+                             "greta_array",
+                             FUN.VALUE = FALSE)
+
+  stopifnot(all(are_greta_arrays))
+
+  # make sure the values have the correct dimensions
+  values <- mapply(assign_dim,
+                   values,
+                   fixed_greta_arrays,
+                   SIMPLIFY = FALSE)
+
+  all_greta_arrays <- c(fixed_greta_arrays, list(target))
+
+  # define the dag and TF graph
+  dag <- dag_class$new(all_greta_arrays)
+  dag$define_tf(log_density = FALSE, gradients = FALSE)
+
+  # build and send a dict for the fixed values
+  fixed_nodes <- lapply(fixed_greta_arrays,
+                        member,
+                        'node')
+
+  names(values) <- vapply(fixed_nodes,
+                          dag$tf_name,
+                          FUN.VALUE = "")
+
+  # check that all of the variables are set
+  # list of variable tf names
+  variable_nodes <- dag$node_list[dag$node_types == "variable"]
+  variable_names <- vapply(variable_nodes,
+                           dag$tf_name,
+                           FUN.VALUE = "")
+
+  if (!all(variable_names %in% names(values))) {
+    stop ("values have not been provided for all variables",
+          call. = FALSE)
+  }
+
+  # send it to tf
+  assign("eval_list", values, envir = dag$tf_environment)
+  ex <- expression(with_dict <- do.call(dict, eval_list))
+  eval(ex, envir = dag$tf_environment)
+
+  # evaluate the target there
+  ex <- sprintf("sess$run(%s, feed_dict = with_dict)",
+                dag$tf_name(target$node))
+  result <- eval(parse(text = ex),
+                 envir = dag$tf_environment)
+
+  result
+
+}
+
+# coerce value to have the correct dimensions
+assign_dim <- function (value, greta_array) {
+  array <- strip_unknown_class(greta_array$node$value())
+  if (length(array) != length(value)) {
+    stop ("a provided value has different number of elements",
+          " than the greta array", call. = FALSE)
+  }
+  array[] <- value
+  array
+}
diff --git a/R/dag_class.R b/R/dag_class.R
@@ -9,6 +9,7 @@ dag_class <- R6Class(
     node_types = NA,
     node_tf_names = NA,
     tf_environment = NA,
+    tf_graph = NA,
     target_nodes = NA,
     parameters_example = NA,
     tf_float = NA,
@@ -27,12 +28,9 @@ dag_class <- R6Class(
       # find the nodes we care about
       self$target_nodes <- lapply(target_greta_arrays, member, 'node')
 
-      # stash the node names, types, and tf names
-      self$node_types <- vapply(self$node_list, node_type, FUN.VALUE = '')
-      self$node_tf_names <- self$make_names()
-
-      # set up the tf environment
+      # set up the tf environment, with a graph
       self$tf_environment <- new.env()
+      self$tf_graph <- tf$Graph()
 
       # stash an example list to relist parameters
       self$parameters_example <- self$example_parameters(flat = FALSE)
@@ -44,6 +42,17 @@ dag_class <- R6Class(
 
     },
 
+    # execute an expression on this dag's tensorflow graph
+    on_graph = function (expr) {
+      with(self$tf_graph$as_default(), expr)
+    },
+
+    # execute an exporession in the tensorflow environment
+    tf_run = function (expr) {
+      self$tf_environment$expr <- substitute(expr)
+      self$on_graph(with(self$tf_environment, eval(expr)))
+    },
+
     # return a list of nodes connected to those in the target node list
     build_dag = function (greta_array_list) {
 
@@ -54,6 +63,10 @@ dag_class <- R6Class(
         node$register_family(self)
       }
 
+      # stash the node names, types, and tf names
+      self$node_types <- vapply(self$node_list, node_type, FUN.VALUE = '')
+      self$node_tf_names <- self$make_names()
+
     },
 
     make_names = function () {
@@ -85,7 +98,7 @@ dag_class <- R6Class(
     },
 
     # define tf graph in environment
-    define_tf = function () {
+    define_tf = function (log_density = TRUE, gradients = TRUE) {
 
       # temporarily pass float type info to options, so it can be accessed by
       # nodes on definition, without cluncky explicit passing
@@ -107,28 +120,33 @@ dag_class <- R6Class(
               call. = FALSE)
       }
 
-      # define all nodes, node densities and free states in the environment
-      lapply(self$node_list, function (x) x$define_tf(self))
+      # define all nodes, node densities and free states in the environment, and
+      # on the graph
+      self$on_graph(lapply(self$node_list,
+                      function (x) x$define_tf(self)))
+
 
       # define an overall log density and gradients, plus adjusted versions
-      self$define_joint_density()
-      self$define_gradients()
+      if (log_density)
+        self$on_graph(self$define_joint_density())
+
+      if (gradients)
+        self$on_graph(self$define_gradients())
 
       # use core and compilation options to create a config object
-      with(self$tf_environment,
-           {config <- tf$ConfigProto(intra_op_parallelism_threads = self$n_cores,
-                                     inter_op_parallelism_threads = self$n_cores)})
+      self$tf_environment$n_cores <- self$n_cores
+      self$tf_run(config <- tf$ConfigProto(inter_op_parallelism_threads = n_cores,
+                                           intra_op_parallelism_threads = n_cores))
 
       if (self$compile) {
-        with(self$tf_environment,
-             {py_set_attr(config$graph_options$optimizer_options,
-                          'global_jit_level',
-                          tf$OptimizerOptions$ON_1)})
+        self$tf_run(py_set_attr(config$graph_options$optimizer_options,
+                                 'global_jit_level',
+                                 tf$OptimizerOptions$ON_1))
       }
 
       # start a session and initialise all variables
-      with(self$tf_environment,
-           {sess <- tf$Session(config = config); sess$run(tf$global_variables_initializer())})
+      self$tf_run(sess <- tf$Session(config = config))
+      self$tf_run(sess$run(tf$global_variables_initializer()))
 
     },
 
@@ -237,24 +255,23 @@ dag_class <- R6Class(
 
       # create a feed dict in the TF environment
       self$tf_environment$parameters <- parameters
-      with(self$tf_environment,
-           parameter_dict <- do.call(dict, parameters))
+      self$tf_run(parameter_dict <- do.call(dict, parameters))
 
     },
 
     # get log density and gradient of joint density w.r.t. free states of all
     # variable nodes, with or without applying the jacobian adjustment
     log_density = function(adjusted = TRUE) {
 
-      cleanly(with(self$tf_environment,
-                   sess$run(joint_density_adj, feed_dict = parameter_dict)))
+      cleanly(self$tf_run(sess$run(joint_density_adj,
+                                   feed_dict = parameter_dict)))
 
     },
 
     gradients = function (adjusted = TRUE) {
 
-      cleanly(with(self$tf_environment,
-                   sess$run(gradients_adj, feed_dict = parameter_dict)))
+      cleanly(self$tf_run(sess$run(gradients_adj,
+                                   feed_dict = parameter_dict)))
 
     },
 

diff --git a/R/greta_model_class.R b/R/greta_model_class.R
@@ -41,7 +41,6 @@ NULL
 #' @return \code{model} - a \code{greta_model} object.
 #'
 #' @examples
-#'
 #' \dontrun{
 #'
 #' # define a simple model
@@ -54,7 +53,6 @@ NULL
 #'
 #' plot(m)
 #' }
-#'
 model <- function (...,
                    precision = c('single', 'double'),
                    n_cores = NULL,