Merge pull request #136 from greta-dev/dev

v0.2.4 release candidate
greta-dev · Feb 19, 2018 · 8c83792 · 8c83792
2 parents 4b457c2 + e9f676c
commit 8c83792
Show file tree

Hide file tree

Showing 58 changed files with 1,796 additions and 453 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: greta
 Type: Package
 Title: Simple and Scalable Statistical Modelling in R
-Version: 0.2.3
-Date: 2018-01-23
+Version: 0.2.4
+Date: 2018-02-20
 Authors@R: person("Nick", "Golding", role = c("aut", "cre"),
     email = "[email protected]")
-Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see <https://goldingn.github.io/greta> for more information).
+Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see <https://greta-dev.github.io/greta> for more information).
 License: Apache License 2.0
 URL: https://github.com/greta-dev/greta
 BugReports: https://github.com/greta-dev/greta/issues
@@ -40,6 +40,7 @@ Collate:
     'inference.R'
     'install_tensorflow.R'
     'internals.R'
+    'calculate.R'
 Imports:
     R6,
     tensorflow,

diff --git a/NAMESPACE b/NAMESPACE
@@ -98,6 +98,7 @@ export(bernoulli)
 export(beta)
 export(beta_binomial)
 export(binomial)
+export(calculate)
 export(categorical)
 export(cauchy)
 export(chi_squared)
@@ -116,6 +117,7 @@ export(hypergeometric)
 export(icauchit)
 export(icloglog)
 export(ilogit)
+export(imultilogit)
 export(install_tensorflow)
 export(inverse_gamma)
 export(iprobit)

diff --git a/NEWS b/NEWS
@@ -1,3 +1,33 @@
+### greta 0.2.4
+================
+
+Fixes:
+
+* improved error checking/messages in `model()`, `%*%`
+* switched docs and examples to always use `<-` for assignment
+* fixed the `n_cores` argument to `model()`
+
+New functionality:
+
+* added a `calculate()` function to compute the values of greta arrays conditional on provided values for others
+* added `imultilogit()` transform
+* added a `chains` argument to `model()`
+* improved HMC self-tuning, including a diagonal euclidean metric
+
+### greta 0.2.3
+================
+
+Fixes:
+
+* fixed breaking change in extraDistr API (caused test errors on CRAN builds)
+* added dontrun statements to pass CRAN checks on winbuilder
+* fixed breaking change in tensorflow API (1-based indexing)
+
+New functionality:
+
+* added `cumsum()` and `cumprod()` functions
+
+
 ### greta 0.2.2
 ================
 

diff --git a/R/calculate.R b/R/calculate.R
@@ -0,0 +1,219 @@
+# calculating values outside inference
+
+#' @name calculate
+#' @title calculate greta arrays given fixed values
+#' @description Calculate the values that greta arrays would take, given
+#'   temporary values for the greta arrays on which they depend, and return them
+#'   as numeric R arrays. This can be used to check the behaviour of your model
+#'   or make predictions to new data after model fitting.
+#'
+#' @param target a greta array for which to calculate the value
+#' @param values a named list giving temporary values of the greta arrays with
+#'   which \code{target} is connected, or an \code{mcmc.list} object returned by
+#'   \code{\link{mcmc}}.
+#'
+#' @return A numeric R array with the same dimensions as \code{target}, giving
+#'   the values it would take conditioned on the fixed values given by
+#'   \code{values}.
+#'
+#' @details The greta arrays named in \code{values} need not be variables, they
+#'   can also be other operations or even data.
+#'
+#'   At present, if \code{values} is a named list it must contain values for
+#'   \emph{all} of the variable greta arrays with which \code{target} is
+#'   connected, even values are given for intermediate operations, or the target
+#'   doesn't depend on the variable. That may be relaxed in a future release.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' # define a variable greta array, and another that is calculated from it
+#' # then calculate what value y would take for different values of x
+#' x <- normal(0, 1, dim = 3)
+#' a <- lognormal(0, 1)
+#' y <- sum(x ^ 2) + a
+#' calculate(y, list(x = c(0.1, 0.2, 0.3), a = 2))
+#'
+#'
+#' # define a model
+#' alpha <- normal(0, 1)
+#' beta <- normal(0, 1)
+#' sigma <- lognormal(1, 0.1)
+#' mu <- alpha + iris$Petal.Length * beta
+#' distribution(iris$Petal.Width) <- normal(mu, sigma)
+#' m <- model(alpha, beta, sigma)
+#'
+#' # calculate intermediate greta arrays, given some parameter values
+#' calculate(mu[1:5], list(alpha = 1, beta = 2, sigma = 0.5))
+#' calculate(mu[1:5], list(alpha = -1, beta = 0.2, sigma = 0.5))
+#'
+#'
+#' # fit the model then calculate samples at a new greta array
+#' draws <- mcmc(m, n_samples = 500)
+#' petal_length_plot <- seq(min(iris$Petal.Length),
+#'                          max(iris$Petal.Length),
+#'                          length.out = 100)
+#' mu_plot <- alpha + petal_length_plot * beta
+#' mu_plot_draws <- calculate(mu_plot, draws)
+#'
+#' # plot the draws
+#' mu_est <- colMeans(mu_plot_draws[[1]])
+#' plot(mu_est ~ petal_length_plot, type = "n", ylim = range(mu_plot_draws[[1]]))
+#' apply(mu_plot_draws[[1]], 1, lines, x = petal_length_plot, col = grey(0.8))
+#' lines(mu_est ~ petal_length_plot, lwd = 2)
+#' }
+#'
+#'
+calculate <- function (target, values) {
+
+  target_name <- deparse(substitute(target))
+
+  if (!inherits(target, "greta_array"))
+    stop ("'target' is not a greta array")
+
+  if (inherits(values, "mcmc.list"))
+    calculate_mcmc.list(target, target_name, values)
+  else
+    calculate_list(target, values)
+
+}
+
+calculate_mcmc.list <- function (target, target_name, values) {
+
+  model_info <- attr(values, "model_info")
+
+  if (is.null(model_info)) {
+    stop ("value is an mcmc.list object, but is not associated with any ",
+          "model information, perhaps it wasn't created with ",
+          "greta::mcmc() ?",
+          call. = FALSE)
+  }
+
+  # copy and refresh the dag
+  dag <- model_info$model$dag$clone()
+  dag$tf_environment <- new.env()
+  dag$tf_graph <- tf$Graph()
+
+  # extend the dag to include this node, as the target
+  dag$build_dag(list(target))
+  self <- dag  # mock for scoping
+  dag$define_tf(log_density = TRUE, gradients = TRUE)
+
+  dag$target_nodes <- list(target$node)
+  names(dag$target_nodes) <- target_name
+
+  example_values <- dag$trace_values()
+  n_trace <- length(example_values)
+
+  # raw draws are either an attribute, or this object
+  model_info <- attr(values, "model_info")
+  draws <- model_info$raw_draws
+
+  # trace the target for each draw in each chain
+  trace <- list()
+  for (i in seq_along(draws)) {
+
+    samples <- apply(draws[[i]],
+                     1,
+                     function (x) {
+                       dag$send_parameters(x)
+                       dag$trace_values()
+                     })
+
+    samples <- as.matrix(samples)
+
+    if (ncol(samples) != n_trace)
+      samples <- t(samples)
+
+    colnames(samples) <- names(example_values)
+
+    trace[[i]] <- coda::mcmc(samples)
+
+  }
+
+  trace <- coda::mcmc.list(trace)
+  attr(trace, "model_info") <- model_info
+  # return this
+  return (trace)
+
+}
+
+calculate_list <- function(target, values) {
+
+  # get the values and their names
+  names <- names(values)
+  stopifnot(length(names) == length(values))
+
+  # get the corresponding greta arrays
+  fixed_greta_arrays <- lapply(names,
+                               get,
+                               envir = parent.frame(n = 2))
+
+  # make sure that's what they are
+  are_greta_arrays <- vapply(fixed_greta_arrays,
+                             inherits,
+                             "greta_array",
+                             FUN.VALUE = FALSE)
+
+  stopifnot(all(are_greta_arrays))
+
+  # make sure the values have the correct dimensions
+  values <- mapply(assign_dim,
+                   values,
+                   fixed_greta_arrays,
+                   SIMPLIFY = FALSE)
+
+  all_greta_arrays <- c(fixed_greta_arrays, list(target))
+
+  # define the dag and TF graph
+  dag <- dag_class$new(all_greta_arrays)
+  dag$define_tf(log_density = FALSE, gradients = FALSE)
+
+  # build and send a dict for the fixed values
+  fixed_nodes <- lapply(fixed_greta_arrays,
+                        member,
+                        'node')
+
+  names(values) <- vapply(fixed_nodes,
+                          dag$tf_name,
+                          FUN.VALUE = "")
+
+  # check that all of the variables are set
+  # list of variable tf names
+  variable_nodes <- dag$node_list[dag$node_types == "variable"]
+  variable_names <- vapply(variable_nodes,
+                           dag$tf_name,
+                           FUN.VALUE = "")
+
+  if (!all(variable_names %in% names(values))) {
+    stop ("values have not been provided for all variables",
+          call. = FALSE)
+  }
+
+  # send it to tf
+  assign("eval_list", values, envir = dag$tf_environment)
+  ex <- expression(with_dict <- do.call(dict, eval_list))
+  eval(ex, envir = dag$tf_environment)
+
+  # evaluate the target there
+  ex <- sprintf("sess$run(%s, feed_dict = with_dict)",
+                dag$tf_name(target$node))
+  result <- eval(parse(text = ex),
+                 envir = dag$tf_environment)
+
+  result
+
+}
+
+# coerce value to have the correct dimensions
+assign_dim <- function (value, greta_array) {
+  array <- strip_unknown_class(greta_array$node$value())
+  if (length(array) != length(value)) {
+    stop ("a provided value has different number of elements",
+          " than the greta array", call. = FALSE)
+  }
+  array[] <- value
+  array
+}