diff --git a/NEWS.md b/NEWS.md index 618aa17a3..0c65e727e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ * Fix: Corrected hash calculation for `PipeOpFilter`. * New PipeOps `PipeOpEncodePLQuantiles` and `PipeOpEncodePLTree` that implement piecewise linear encoding with two different binning methods. * Compatibility with new `R6` release. +* Fix: `PipeOpTargetMutate` and `PipeOpTargetTrafoScaleRange` no longer drop unseen factor levels of features or targets during train and predict. # mlr3pipelines 0.7.1 diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index 99c3e3471..f01f8d06d 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -15,37 +15,36 @@ #' #' @section Construction: #' ``` -#' PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) +#' PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list(), packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) #' ``` #' #' * `id` :: `character(1)`\cr #' Identifier of resulting object. See `$id` slot of [`PipeOp`]. #' * `param_set` :: [`ParamSet`][paradox::ParamSet]\cr -#' Parameter space description. This should be created by the subclass and given to -#' `super$initialize()`. +#' Parameter space description. This should be created by the subclass and given to `super$initialize()`. #' * `param_vals` :: named `list`\cr #' List of hyperparameter settings, overwriting the hyperparameter settings given in `param_set`. #' The subclass should have its own `param_vals` parameter and pass it on to `super$initialize()`. #' Default `list()`. #' * `task_type_in` :: `character(1)`\cr -#' The class of [`Task`][mlr3::Task] that should be accepted as input. This -#' should generally be a `character(1)` identifying a type of [`Task`][mlr3::Task], e.g. `"Task"`, `"TaskClassif"` or -#' `"TaskRegr"` (or another subclass introduced by other packages). Default is `"Task"`. +#' The class of [`Task`][mlr3::Task] that should be accepted as input. This should generally be a `character(1)` +#' identifying a type of [`Task`][mlr3::Task], e.g. `"Task"`, `"TaskClassif"` or `"TaskRegr"` (or another subclass +#' introduced by other packages). Default is `"Task"`. #' * `task_type_out` :: `character(1)`\cr -#' The class of [`Task`][mlr3::Task] that is produced as output. This -#' should generally be a `character(1)` identifying a type of [`Task`][mlr3::Task], e.g. `"Task"`, `"TaskClassif"` or -#' `"TaskRegr"` (or another subclass introduced by other packages). Default is the value of `task_type_in`. -#' * packages :: `character`\cr +#' The class of [`Task`][mlr3::Task] that is produced as output. This should generally be a `character(1)` +#' identifying a type of [`Task`][mlr3::Task], e.g. `"Task"`, `"TaskClassif"` or `"TaskRegr"` (or another subclass +#' introduced by other packages). Default is the value of `task_type_in`. +#' * `packages` :: `character`\cr #' Set of all required packages for the [`PipeOp`]'s methods. See `$packages` slot. Default is #' `character(0)`. -#' * tags :: `character` | `NULL`\cr +#' * `tags` :: `character` | `NULL`\cr #' Tags of the resulting `PipeOp`. This is added to the tag `"target transform"`. Default `NULL`. #' #' @section Input and Output Channels: -#' [`PipeOpTargetTrafo`] has one input channels named `"input"` taking a [`Task`][mlr3::Task] (or whatever class +#' `PipeOpTargetTrafo` has one input channels named `"input"` taking a [`Task`][mlr3::Task] (or whatever class #' was specified by the `task_type` during construction) both during training and prediction. #' -#' [`PipeOpTargetTrafo`] has two output channels named `"fun"` and `"output"`. During training, +#' `PipeOpTargetTrafo` has two output channels named `"fun"` and `"output"`. During training, #' `"fun"` returns `NULL` and during prediction, `"fun"` returns a function that can later be used #' to invert the transformation done during training according to the overloaded `.train_invert()` #' and `.invert()` functions. `"output"` returns the modified input [`Task`][mlr3::Task] (or `task_type`) @@ -56,11 +55,11 @@ #' `.get_state()` function. #' #' @section Internals: -#' [`PipeOpTargetTrafo`] is an abstract class inheriting from [`PipeOp`]. It implements the +#' `PipeOpTargetTrafo` is an abstract class inheriting from [`PipeOp`]. It implements the #' `private$.train()` and `private$.predict()` functions. These functions perform checks and go on #' to call `.get_state()`, `.transform()`, `.train_invert()`. `.invert()` is packaged and sent along #' the `"fun"` output to be applied to a [`Prediction`][mlr3::Prediction] by [`PipeOpTargetInvert`]. -#' A subclass of [`PipeOpTargetTrafo`] should implement these functions and be used in combination +#' A subclass of `PipeOpTargetTrafo` should implement these functions and be used in combination #' with [`PipeOpTargetInvert`]. #' #' @section Fields: @@ -70,7 +69,7 @@ #' Methods inherited from [`PipeOp`], as well as: #' * `.get_state(task)`\cr #' ([`Task`][mlr3::Task]) -> `list`\cr -#' Called by [`PipeOpTargetTrafo`]'s implementation of `private$.train()`. Takes a single +#' Called by `PipeOpTargetTrafo`'s implementation of `private$.train()`. Takes a single #' [`Task`][mlr3::Task] as input and returns a `list` to set the `$state`. #' `.get_state()` will be called a single time during *training* right before #' `.transform()` is called. The return value (i.e. the `$state`) should contain info needed in @@ -78,7 +77,7 @@ #' The base implementation returns `list()` and should be overloaded if setting the state is desired. #' * `.transform(task, phase)`\cr #' ([`Task`][mlr3::Task], `character(1)`) -> [`Task`][mlr3::Task]\cr -#' Called by [`PipeOpTargetTrafo`]'s implementation of `private$.train()` and +#' Called by `PipeOpTargetTrafo`'s implementation of `private$.train()` and #' `private$.predict()`. Takes a single [`Task`][mlr3::Task] as input and modifies it. #' This should typically consist of calculating a new target and modifying the #' [`Task`][mlr3::Task] by using the [`convert_task`][mlr3::convert_task] function. `.transform()` will be called during training and @@ -93,16 +92,15 @@ #' This function is abstract and should be overloaded by inheriting classes. #' * `.train_invert(task)`\cr #' ([`Task`][mlr3::Task]) -> `any`\cr -#' Called by [`PipeOpTargetTrafo`]'s implementation of `private$.predict()`. Takes a single +#' Called by `PipeOpTargetTrafo`'s implementation of `private$.predict()`. Takes a single #' [`Task`][mlr3::Task] as input and returns an arbitrary value that will be given as -#' `predict_phase_state` to `.invert()`. This should not modify the input [`Task`][mlr3::Task] .\cr +#' `predict_phase_state` to `.invert()`. This should not modify the input [`Task`][mlr3::Task].\cr #' The base implementation returns a list with a single element, the `$truth` column of the [`Task`][mlr3::Task], #' and should be overloaded if a more training-phase-dependent state is desired. #' * `.invert(prediction, predict_phase_state)`\cr #' ([`Prediction`][mlr3::Prediction], `any`) -> [`Prediction`][mlr3::Prediction]\cr -#' Takes a [`Prediction`][mlr3::Prediction] and a `predict_phase_state` -#' object as input and inverts the prediction. This function is sent as `"fun"` to -#' [`PipeOpTargetInvert`].\cr +#' Takes a [`Prediction`][mlr3::Prediction] and a `predict_phase_state` object as input and inverts the prediction. +#' This function is sent as `"fun"` to [`PipeOpTargetInvert`].\cr #' This function is abstract and should be overloaded by inheriting classes. Care should be #' taken that the `predict_type` of the [`Prediction`][mlr3::Prediction] being inverted is handled well. #' * `.invert_help(predict_phase_state)`\cr @@ -188,7 +186,7 @@ PipeOpTargetTrafo = R6Class("PipeOpTargetTrafo", #' #' During prediction phase the function supplied through `"fun"` is called with a `list` containing #' the `"prediction"` as a single element, and should return a `list` with a single element -#' (a [`Prediction`][mlr3::Prediction]) that is returned by [`PipeOpTargetInvert`]. +#' (a [`Prediction`][mlr3::Prediction]) that is returned by `PipeOpTargetInvert`. #' #' @section Construction: #' ``` @@ -201,18 +199,18 @@ PipeOpTargetTrafo = R6Class("PipeOpTargetTrafo", #' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. #' #' @section Input and Output Channels: -#' [`PipeOpTargetInvert`] has two input channels named `"fun"` and `"prediction"`. During +#' `PipeOpTargetInvert` has two input channels named `"fun"` and `"prediction"`. During #' training, both take `NULL` as input. During prediction, `"fun"` takes a function and #' `"prediction"` takes a [`Prediction`][mlr3::Prediction]. #' -#' [`PipeOpTargetInvert`] has one output channel named `"output"` and returns `NULL` during +#' `PipeOpTargetInvert` has one output channel named `"output"` and returns `NULL` during #' training and a [`Prediction`][mlr3::Prediction] during prediction. #' #' @section State: #' The `$state` is left empty (`list()`). #' #' @section Parameters: -#' [`PipeOpTargetInvert`] has no parameters. +#' `PipeOpTargetInvert` has no parameters. #' #' @section Internals: #' Should be used in combination with a subclass of [`PipeOpTargetTrafo`]. @@ -349,8 +347,8 @@ PipeOpTargetMutate = R6Class("PipeOpTargetMutate", initialize = function(id = "targetmutate", param_vals = list(), new_task_type = NULL) { private$.new_task_type = assert_choice(new_task_type, mlr_reflections$task_types$type, null.ok = TRUE) ps = ps( - trafo = p_uty(tags = c("train", "predict"), custom_check = crate(function(x) check_function(x, nargs = 1L))), - inverter = p_uty(tags = "predict", custom_check = crate(function(x) check_function(x, nargs = 1L))) + trafo = p_uty(tags = c("train", "predict"), custom_check = check_function), + inverter = p_uty(tags = "predict", custom_check = check_function) ) # We could add a condition here for new_task_type on trafo and inverter when mlr-org/paradox#278 has an answer. # HOWEVER conditions are broken in paradox, it is a terrible idea to use them in PipeOps, @@ -373,8 +371,11 @@ PipeOpTargetMutate = R6Class("PipeOpTargetMutate", .transform = function(task, phase) { new_target = self$param_set$values$trafo(task$data(cols = task$target_names)) + if (!is.data.frame(new_target) || !is.matrix(new_target)) { + stopf("Hyperparameter 'trafo' must be a function returning a 'data.frame', 'data.table', or 'matrix', not '%s'.", class(new_target)) + } task$cbind(new_target) - convert_task(task, target = colnames(new_target), new_type = private$.new_task_type, drop_original_target = TRUE) + convert_task(task, target = colnames(new_target), new_type = private$.new_task_type, drop_original_target = TRUE, drop_levels = FALSE) }, .invert = function(prediction, predict_phase_state) { @@ -478,7 +479,7 @@ PipeOpTargetTrafoScaleRange = R6Class("PipeOpTargetTrafoScaleRange", new_target = self$state$offset + x * self$state$scale setnames(new_target, paste0(colnames(new_target), ".scaled")) task$cbind(new_target) - convert_task(task, target = colnames(new_target), drop_original_target = TRUE) + convert_task(task, target = colnames(new_target), drop_original_target = TRUE, drop_levels = FALSE) }, .invert = function(prediction, predict_phase_state) { diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 4c42ffd3c..b02e60e5b 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -19,40 +19,39 @@ Users can overload up to four \verb{private$}-functions: \code{.get_state()} (op \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) +\if{html}{\out{
}}\preformatted{PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list(), packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr Identifier of resulting object. See \verb{$id} slot of \code{\link{PipeOp}}. \item \code{param_set} :: \code{\link[paradox:ParamSet]{ParamSet}}\cr -Parameter space description. This should be created by the subclass and given to -\code{super$initialize()}. +Parameter space description. This should be created by the subclass and given to \code{super$initialize()}. \item \code{param_vals} :: named \code{list}\cr List of hyperparameter settings, overwriting the hyperparameter settings given in \code{param_set}. The subclass should have its own \code{param_vals} parameter and pass it on to \code{super$initialize()}. Default \code{list()}. \item \code{task_type_in} :: \code{character(1)}\cr -The class of \code{\link[mlr3:Task]{Task}} that should be accepted as input. This -should generally be a \code{character(1)} identifying a type of \code{\link[mlr3:Task]{Task}}, e.g. \code{"Task"}, \code{"TaskClassif"} or -\code{"TaskRegr"} (or another subclass introduced by other packages). Default is \code{"Task"}. +The class of \code{\link[mlr3:Task]{Task}} that should be accepted as input. This should generally be a \code{character(1)} +identifying a type of \code{\link[mlr3:Task]{Task}}, e.g. \code{"Task"}, \code{"TaskClassif"} or \code{"TaskRegr"} (or another subclass +introduced by other packages). Default is \code{"Task"}. \item \code{task_type_out} :: \code{character(1)}\cr -The class of \code{\link[mlr3:Task]{Task}} that is produced as output. This -should generally be a \code{character(1)} identifying a type of \code{\link[mlr3:Task]{Task}}, e.g. \code{"Task"}, \code{"TaskClassif"} or -\code{"TaskRegr"} (or another subclass introduced by other packages). Default is the value of \code{task_type_in}. -\item packages :: \code{character}\cr +The class of \code{\link[mlr3:Task]{Task}} that is produced as output. This should generally be a \code{character(1)} +identifying a type of \code{\link[mlr3:Task]{Task}}, e.g. \code{"Task"}, \code{"TaskClassif"} or \code{"TaskRegr"} (or another subclass +introduced by other packages). Default is the value of \code{task_type_in}. +\item \code{packages} :: \code{character}\cr Set of all required packages for the \code{\link{PipeOp}}'s methods. See \verb{$packages} slot. Default is \code{character(0)}. -\item tags :: \code{character} | \code{NULL}\cr +\item \code{tags} :: \code{character} | \code{NULL}\cr Tags of the resulting \code{PipeOp}. This is added to the tag \code{"target transform"}. Default \code{NULL}. } } \section{Input and Output Channels}{ -\code{\link{PipeOpTargetTrafo}} has one input channels named \code{"input"} taking a \code{\link[mlr3:Task]{Task}} (or whatever class +\code{PipeOpTargetTrafo} has one input channels named \code{"input"} taking a \code{\link[mlr3:Task]{Task}} (or whatever class was specified by the \code{task_type} during construction) both during training and prediction. -\code{\link{PipeOpTargetTrafo}} has two output channels named \code{"fun"} and \code{"output"}. During training, +\code{PipeOpTargetTrafo} has two output channels named \code{"fun"} and \code{"output"}. During training, \code{"fun"} returns \code{NULL} and during prediction, \code{"fun"} returns a function that can later be used to invert the transformation done during training according to the overloaded \code{.train_invert()} and \code{.invert()} functions. \code{"output"} returns the modified input \code{\link[mlr3:Task]{Task}} (or \code{task_type}) @@ -67,11 +66,11 @@ The \verb{$state} is a named \code{list} and should be returned explicitly by th \section{Internals}{ -\code{\link{PipeOpTargetTrafo}} is an abstract class inheriting from \code{\link{PipeOp}}. It implements the +\code{PipeOpTargetTrafo} is an abstract class inheriting from \code{\link{PipeOp}}. It implements the \code{private$.train()} and \code{private$.predict()} functions. These functions perform checks and go on to call \code{.get_state()}, \code{.transform()}, \code{.train_invert()}. \code{.invert()} is packaged and sent along the \code{"fun"} output to be applied to a \code{\link[mlr3:Prediction]{Prediction}} by \code{\link{PipeOpTargetInvert}}. -A subclass of \code{\link{PipeOpTargetTrafo}} should implement these functions and be used in combination +A subclass of \code{PipeOpTargetTrafo} should implement these functions and be used in combination with \code{\link{PipeOpTargetInvert}}. } @@ -86,7 +85,7 @@ Methods inherited from \code{\link{PipeOp}}, as well as: \itemize{ \item \code{.get_state(task)}\cr (\code{\link[mlr3:Task]{Task}}) -> \code{list}\cr -Called by \code{\link{PipeOpTargetTrafo}}'s implementation of \code{private$.train()}. Takes a single +Called by \code{PipeOpTargetTrafo}'s implementation of \code{private$.train()}. Takes a single \code{\link[mlr3:Task]{Task}} as input and returns a \code{list} to set the \verb{$state}. \code{.get_state()} will be called a single time during \emph{training} right before \code{.transform()} is called. The return value (i.e. the \verb{$state}) should contain info needed in @@ -94,7 +93,7 @@ Called by \code{\link{PipeOpTargetTrafo}}'s implementation of \code{private$.tra The base implementation returns \code{list()} and should be overloaded if setting the state is desired. \item \code{.transform(task, phase)}\cr (\code{\link[mlr3:Task]{Task}}, \code{character(1)}) -> \code{\link[mlr3:Task]{Task}}\cr -Called by \code{\link{PipeOpTargetTrafo}}'s implementation of \code{private$.train()} and +Called by \code{PipeOpTargetTrafo}'s implementation of \code{private$.train()} and \code{private$.predict()}. Takes a single \code{\link[mlr3:Task]{Task}} as input and modifies it. This should typically consist of calculating a new target and modifying the \code{\link[mlr3:Task]{Task}} by using the \code{\link[mlr3:convert_task]{convert_task}} function. \code{.transform()} will be called during training and @@ -109,16 +108,15 @@ The input should \emph{not} be cloned and if possible should be changed in-place This function is abstract and should be overloaded by inheriting classes. \item \code{.train_invert(task)}\cr (\code{\link[mlr3:Task]{Task}}) -> \code{any}\cr -Called by \code{\link{PipeOpTargetTrafo}}'s implementation of \code{private$.predict()}. Takes a single +Called by \code{PipeOpTargetTrafo}'s implementation of \code{private$.predict()}. Takes a single \code{\link[mlr3:Task]{Task}} as input and returns an arbitrary value that will be given as -\code{predict_phase_state} to \code{.invert()}. This should not modify the input \code{\link[mlr3:Task]{Task}} .\cr +\code{predict_phase_state} to \code{.invert()}. This should not modify the input \code{\link[mlr3:Task]{Task}}.\cr The base implementation returns a list with a single element, the \verb{$truth} column of the \code{\link[mlr3:Task]{Task}}, and should be overloaded if a more training-phase-dependent state is desired. \item \code{.invert(prediction, predict_phase_state)}\cr (\code{\link[mlr3:Prediction]{Prediction}}, \code{any}) -> \code{\link[mlr3:Prediction]{Prediction}}\cr -Takes a \code{\link[mlr3:Prediction]{Prediction}} and a \code{predict_phase_state} -object as input and inverts the prediction. This function is sent as \code{"fun"} to -\code{\link{PipeOpTargetInvert}}.\cr +Takes a \code{\link[mlr3:Prediction]{Prediction}} and a \code{predict_phase_state} object as input and inverts the prediction. +This function is sent as \code{"fun"} to \code{\link{PipeOpTargetInvert}}.\cr This function is abstract and should be overloaded by inheriting classes. Care should be taken that the \code{predict_type} of the \code{\link[mlr3:Prediction]{Prediction}} being inverted is handled well. \item \code{.invert_help(predict_phase_state)}\cr diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index 40a4b602d..cb0556603 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -13,7 +13,7 @@ function. Typically should be used in combination with a subclass of \code{\link During prediction phase the function supplied through \code{"fun"} is called with a \code{list} containing the \code{"prediction"} as a single element, and should return a \code{list} with a single element -(a \code{\link[mlr3:Prediction]{Prediction}}) that is returned by \code{\link{PipeOpTargetInvert}}. +(a \code{\link[mlr3:Prediction]{Prediction}}) that is returned by \code{PipeOpTargetInvert}. } \section{Construction}{ @@ -30,11 +30,11 @@ List of hyperparameter settings, overwriting the hyperparameter settings that wo \section{Input and Output Channels}{ -\code{\link{PipeOpTargetInvert}} has two input channels named \code{"fun"} and \code{"prediction"}. During +\code{PipeOpTargetInvert} has two input channels named \code{"fun"} and \code{"prediction"}. During training, both take \code{NULL} as input. During prediction, \code{"fun"} takes a function and \code{"prediction"} takes a \code{\link[mlr3:Prediction]{Prediction}}. -\code{\link{PipeOpTargetInvert}} has one output channel named \code{"output"} and returns \code{NULL} during +\code{PipeOpTargetInvert} has one output channel named \code{"output"} and returns \code{NULL} during training and a \code{\link[mlr3:Prediction]{Prediction}} during prediction. } @@ -45,7 +45,7 @@ The \verb{$state} is left empty (\code{list()}). \section{Parameters}{ -\code{\link{PipeOpTargetInvert}} has no parameters. +\code{PipeOpTargetInvert} has no parameters. } \section{Internals}{ diff --git a/tests/testthat/test_pipeop_targetmutate.R b/tests/testthat/test_pipeop_targetmutate.R index 1d06fd01d..9c4a1a119 100644 --- a/tests/testthat/test_pipeop_targetmutate.R +++ b/tests/testthat/test_pipeop_targetmutate.R @@ -40,33 +40,49 @@ test_that("PipeOpTargetMutate - basic properties", { test_that("PipeOpTargetMutate - log base 2 trafo", { skip_if_not_installed("rpart") - g = Graph$new() - g$add_pipeop(PipeOpTargetMutate$new("logtrafo", - param_vals = list( - trafo = function(x) log(x, base = 2), - inverter = function(x) list(response = 2 ^ x$response)) - ) - ) - g$add_pipeop(LearnerRegrRpart$new()) - g$add_pipeop(PipeOpTargetInvert$new()) - g$add_edge(src_id = "logtrafo", dst_id = "targetinvert", src_channel = 1L, dst_channel = 1L) - g$add_edge(src_id = "logtrafo", dst_id = "regr.rpart", src_channel = 2L, dst_channel = 1L) - g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L) - - task = mlr_tasks$get("boston_housing_classic") - train_out = g$train(task) - predict_out = g$predict(task) - - dat = task$data() - dat$medv = log(dat$medv, base = 2) - task_log = TaskRegr$new("boston_housing_classic_log", backend = dat, target = "medv") - - learner = LearnerRegrRpart$new() - learner$train(task_log) - - learner_predict_out = learner$predict(task_log) - expect_equal(2 ^ learner_predict_out$truth, predict_out[[1L]]$truth) - expect_equal(2 ^ learner_predict_out$response, predict_out[[1L]]$response) + g = Graph$new() + g$add_pipeop(PipeOpTargetMutate$new("logtrafo", + param_vals = list( + trafo = function(x) log(x, base = 2), + inverter = function(x) list(response = 2 ^ x$response)) + ) + ) + g$add_pipeop(LearnerRegrRpart$new()) + g$add_pipeop(PipeOpTargetInvert$new()) + g$add_edge(src_id = "logtrafo", dst_id = "targetinvert", src_channel = 1L, dst_channel = 1L) + g$add_edge(src_id = "logtrafo", dst_id = "regr.rpart", src_channel = 2L, dst_channel = 1L) + g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L) + + task = mlr_tasks$get("boston_housing_classic") + train_out = g$train(task) + predict_out = g$predict(task) + + dat = task$data() + dat$medv = log(dat$medv, base = 2) + task_log = TaskRegr$new("boston_housing_classic_log", backend = dat, target = "medv") + + learner = LearnerRegrRpart$new() + learner$train(task_log) + + learner_predict_out = learner$predict(task_log) + expect_equal(2 ^ learner_predict_out$truth, predict_out[[1L]]$truth) + expect_equal(2 ^ learner_predict_out$response, predict_out[[1L]]$response) +}) + +test_that("PipeOpTargetMutate - does not drop missing levels, #631", { + task = tsk("boston_housing")$filter(1:100) + op = po("targetmutate") + train_out = op$train(list(task))[["output"]] + predict_out = op$predict(list(task))[["output"]] + # train_out and predict_out should also know all levels + expect_equal(task$levels(), train_out$levels()) + expect_equal(task$levels(), predict_out$levels()) +}) + +test_that("PipeOpTargetMutate - error if trafo does not return dt/df/matrix", { + task = tsk("boston_housing") + op = po("targetmutate", trafo = function(x) 1) + expect_error(op$train(list(task)), "'data.frame', 'data.table', or 'matrix'") }) #'test_that("PipeOpTargetMutate - Regr -> Classif", { diff --git a/tests/testthat/test_pipeop_targettrafoscalerange.R b/tests/testthat/test_pipeop_targettrafoscalerange.R index d66f34d31..e291bd345 100644 --- a/tests/testthat/test_pipeop_targettrafoscalerange.R +++ b/tests/testthat/test_pipeop_targettrafoscalerange.R @@ -86,3 +86,13 @@ test_that("PipeOpTargetTrafoScaleRange - row use subsets", { expect_equivalent((predict_out1$truth - a) / b, predict_out2[[1L]]$truth) expect_equivalent((predict_out1$response - a) / b, predict_out2[[1L]]$response) }) + +test_that("PipeOpTargetTrafoScaleRange - does not drop missing levels, #631", { + task = tsk("boston_housing")$filter(1:100) + op = po("targettrafoscalerange") + train_out = op$train(list(task))[["output"]] + predict_out = op$predict(list(task))[["output"]] + # train_out and predict_out should also know all levels + expect_equal(task$levels(), train_out$levels()) + expect_equal(task$levels(), predict_out$levels()) +})