JuliaAI · ablaom · May 18, 2022 · May 18, 2022 · May 18, 2022 · May 18, 2022
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJTest"
 uuid = "697918b4-fdc1-4f9e-8ff9-929724cee270"
 authors = ["Anthony D. Blaom <[email protected]>"]
-version = "0.1.0"
+version = "0.2.0"
 
 [deps]
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"

diff --git a/README.md b/README.md
@@ -19,8 +19,51 @@ This package provides a single method for testing a collection of
 `models` (types or named tuples with keys `:name` and `:package_name`)
 using the specified training `data`:
 
+```julia
+MLJTest.test(models, data...; mod=Main, level=2, throw=false, verbosity=1) 
+    -> failures, summary
 ```
-MLJTest.test(models, data...; verbosity=1, mod=Main, loading_only=false) -> failures, summary
+
+For detailed documentation, run `using MLJTest; @doc MLJTest.test`.
+
+
+# Examples
+
+## Testing models in a new MLJ model interface implementation
+
+The following tests the model interface implemented by some model type
+`MyClassifier`, as might appear in tests for a package providing that
+type:
+
+```julia
+import MLJTest
+using Test
+X, y = MLJTest.MLJ.make_blobs()
+failures, summary = MLJTest.test([MyClassifier, ], X, y, verbosity=1, mod=@__MODULE__)
+@test isempty(failures)
 ```
 
-See the method document string for details. 
+## Testing models after filtering models in the registry
+
+The following applies comprehensive integration tests to all
+regressors provided by the package GLM.jl appearing in the MLJ Model
+Registry. Since GLM.jl models are provided through the interface
+package `MLJGLMInterface`, this must be in the current environment:
+
+```julia
+Pkg.add("MLJGLMInterface")
+import MLJBase, MLJTest
+using DataFrames # to view summary
+X, y = MLJTest.MLJ.make_regression();
+regressors = MLJTest.MLJ.models(matching(X, y)) do m
+    m.package_name == "GLM"
+end
+failures, summary = MLJTest.test(
+    regressors, 
+    X, 
+    y, 
+    verbosity=1, 
+    mod=@__MODULE__,
+    level=3)
+summary |> DataFrame
+```
diff --git a/src/attemptors.jl b/src/attemptors.jl
@@ -1,20 +1,20 @@
-str(model_metadata) = "$(model_metadata.name) from $(model_metadata.package_name)"
-
 """
-    attempt(f, message="")
+    attempt(f, message; throw=false)
 
 Return `(f(), "✓") if `f()` executes without throwing an
 exception. Otherwise, return `(ex, "×"), where `ex` is the exception
-thrown.
+caught. Only truly throw the exception if `throw=true`. 
 
 If `message` is not empty, then it is logged to `Info`, together with
 the second return value ("✓" or "×").
 
+
 """
-function attempt(f, message="")
+function attempt(f, message; throw=false)
     ret = try
         (f(), "✓")
     catch ex
+        throw && Base.throw(ex)
         (ex, "×")
     end
     isempty(message) || @info message*last(ret)
@@ -39,10 +39,10 @@ end
 
 root(load_path) = split(load_path, '.') |> first
 
-function model_type(proxy, mod; verbosity=1)
+function model_type(proxy, mod; throw=false, verbosity=1)
     # check interface package really is in current environment:
     message = "`[:model_type]` Loading model type "
-    model_type, outcome = attempt(finalize(message, verbosity)) do
+    model_type, outcome = attempt(finalize(message, verbosity); throw) do
         load_path = MLJTest.load_path(proxy) # MLJ.load_path(proxy) *****
         load_path_ex = load_path |> Meta.parse
         api_pkg_ex = root(load_path) |> Symbol
@@ -69,31 +69,31 @@ function model_type(proxy, mod; verbosity=1)
         if !isnothing(api_pkg) &&
                api_pkg != "unknown" &&
                contains(model_type.msg, "$api_pkg not found in")
-            throw(model_type)
+            Base.throw(model_type)
         end
     end
 
     return model_type, outcome
 end
 
-function model_instance(model_type; verbosity=1)
+function model_instance(model_type; throw=false, verbosity=1)
     message = "`[:model_instance]` Instantiating default model "
-    attempt(finalize(message, verbosity))  do
+    attempt(finalize(message, verbosity); throw)  do
         model_type()
     end
 end
 
-function fitted_machine(model, data...; verbosity=1)
+function fitted_machine(model, data...; throw=false, verbosity=1)
     message = "`[:fitted_machine]` Fitting machine "
-    attempt(finalize(message, verbosity))  do
+    attempt(finalize(message, verbosity); throw)  do
         mach = machine(model, data...)
         fit!(mach, verbosity=-1)
     end
 end
 
-function operations(fitted_machine, data...; verbosity=1)
+function operations(fitted_machine, data...; throw=false, verbosity=1)
     message = "`[:operations]` Calling `predict`, `transform` and/or `inverse_transform` "
-    attempt(finalize(message, verbosity))  do
+    attempt(finalize(message, verbosity); throw)  do
         operations = String[]
         methods = MLJ.implemented_methods(fitted_machine.model)
         if :predict in methods
@@ -112,30 +112,30 @@ function operations(fitted_machine, data...; verbosity=1)
     end
 end
 
-function threshold_prediction(model, data...; verbosity=1)
+function threshold_prediction(model, data...; throw=false, verbosity=1)
     message = "`[:threshold_predictor]` Calling fit!/predict for threshold predictor "*
         "test) "
-    attempt(finalize(message, verbosity)) do
+    attempt(finalize(message, verbosity); throw) do
         tmodel = BinaryThresholdPredictor(model)
         mach = machine(tmodel, data...)
         fit!(mach, verbosity=0)
         predict(mach, first(data))
     end
 end
 
-function evaluation(measure, model, data...; verbosity=1)
+function evaluation(measure, model, data...; throw=false, verbosity=1)
     message = "`[:evaluation]` Evaluating performance "
-    attempt(finalize(message, verbosity)) do
+    attempt(finalize(message, verbosity); throw) do
         evaluate(model, data...;
                  measure=measure,
                  resampling=Holdout(),
                  verbosity=0)
     end
 end
 
-function tuned_pipe_evaluation(measure, model, data...; verbosity=1)
+function tuned_pipe_evaluation(measure, model, data...; throw=false, verbosity=1)
     message = "`[:tuned_pipe_evaluation]` Evaluating perfomance in a tuned pipeline "
-    attempt(finalize(message, verbosity)) do
+    attempt(finalize(message, verbosity); throw) do
         pipe = identity |> model
         tuned_pipe = TunedModel(models=[pipe,],
                                 measure=measure)
@@ -145,8 +145,8 @@ function tuned_pipe_evaluation(measure, model, data...; verbosity=1)
     end
 end
 
-function ensemble_prediction(model, data...; verbosity=1)
-    attempt(finalize("`[:ensemble_prediction]` Ensembling ", verbosity)) do
+function ensemble_prediction(model, data...; throw=false, verbosity=1)
+    attempt(finalize("`[:ensemble_prediction]` Ensembling ", verbosity); throw) do
         imodel = EnsembleModel(model=model,
                                n=2)
         mach = machine(imodel, data...)
@@ -155,9 +155,9 @@ function ensemble_prediction(model, data...; verbosity=1)
     end
 end
 
-function iteration_prediction(measure, model, data...; verbosity=1)
+function iteration_prediction(measure, model, data...; throw=false, verbosity=1)
     message =  "`[:iteration_prediction]` Iterating with controls "
-    attempt(finalize(message, verbosity)) do
+    attempt(finalize(message, verbosity); throw) do
         imodel = IteratedModel(model=model,
                                measure=measure,
                                controls=[Step(1),

diff --git a/src/test.jl b/src/test.jl
@@ -9,10 +9,8 @@ function next!(p)
     MLJ.ProgressMeter.updateProgress!(p)
 end
 
-
-
 """
-    test(models, data...; verbosity=1, mod=Main, loading_only=false)
+    test(models, data...; mod=Main, level=2, throw=false, verbosity=1)
 
 Apply a battery of MLJ integration tests to a collection of models,
 using `data` for training. Here `mod` should be the module from which
@@ -30,9 +28,18 @@ using `data` for training. Here `mod` should be the module from which
   interface packages providing the models must be in the current
   environment, but the packages need not be loaded.
 
-Specify `loading_only=true` to only test model loading, as detailed in
-the test called `model_type` below.
+The extent of testing is controlled by `level`:
+
+|`level`          | description                      | tests (full list below) |
+|:----------------|:---------------------------------|:------------------------|
+| 1               | test code loading                | `:model_type`           |
+| 2 (default)     | basic test of model interface    | first four tests        |
+| 3               | comprehensive                    | all applicable tests    |
 
+By default, exceptions caught in tests are not thrown. If
+`throw=true`, testing will terminate at the first execption
+encountered, after throwing that exception (useful to obtain stack
+traces).
 
 # Return value
 
@@ -43,22 +50,22 @@ Returns `(failures, summary)` where:
 - `summary`: table summarizing the outcomes of each test, where
   outcomes are indicated as below:
 
-`summary` table entry | interpretation
-----------------------|-----------------
-✓                     | test succesful
-×                     | test unsuccessful
-n/a                   | skipped because not applicable
- -                    | test skipped for some other reason
+| entry | interpretation                     |
+|:------|:-----------------------------------|
+| ✓     | test succesful                     |
+| ×     | test unsuccessful                  |
+| n/a   | skipped because not applicable     |
+| -     | test skipped for some other reason |
 
 # Examples
 
 ## Testing models in a new MLJ model interface implementation
 
-The following applies the integration tests to a model type
+The following tests the model interface implemented by some model type
 `MyClassifier`, as might appear in tests for a package providing that
 type:
 
-```
+```julia
 import MLJTest
 using Test
 X, y = MLJTest.MLJ.make_blobs()
@@ -68,24 +75,30 @@ failures, summary = MLJTest.test([MyClassifier, ], X, y, verbosity=1, mod=@__MOD
 
 ## Testing models after filtering models in the registry
 
-The following applies integration tests to all regressors provided by
-the package GLM.jl that are also in the MLJ Model Registry. Since
-GLM.jl models are provided through the interface package
-`MLJGLMInterface`, this must be in the current environment:
+The following applies comprehensive integration tests to all
+regressors provided by the package GLM.jl appearing in the MLJ Model
+Registry. Since GLM.jl models are provided through the interface
+package `MLJGLMInterface`, this must be in the current environment:
 
-```
+```julia
 Pkg.add("MLJGLMInterface")
 import MLJBase, MLJTest
 using DataFrames # to view summary
 X, y = MLJTest.MLJ.make_regression();
 regressors = MLJTest.MLJ.models(matching(X, y)) do m
     m.package_name == "GLM"
 end
-failures, summary = MLJTest.test(regressors, X, y, verbosity=1, mod=@__MODULE__)
+failures, summary = MLJTest.test(
+    regressors, 
+    X, 
+    y, 
+    verbosity=1, 
+    mod=@__MODULE__,
+    level=3)
 summary |> DataFrame
 ```
 
-# List of tests applied
+# List of tests
 
 Tests are applied in sequence. When a test fails, subsequent tests for
 that model are skipped. The following are applied to all models:
@@ -121,7 +134,7 @@ These additional tests are applied to `Supervised` models:
   but first wrap as an `IteratedModel`.
 
 """
-function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
+function test(model_proxies, data...; mod=Main, level=2, throw=false, verbosity=1,)
 
     nproxies = length(model_proxies)
 
@@ -196,27 +209,27 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
         row = merge(row0, (; name, package))
 
         # model_type:
-        model_type, outcome = MLJTest.model_type(model_proxy, mod; verbosity)
+        model_type, outcome = MLJTest.model_type(model_proxy, mod; throw, verbosity)
         row = update(row, i, :model_type, model_type, outcome)
         outcome == "×" && continue
 
-        load_only && continue
+        level > 1 || continue
 
         # model_instance:
         model_instance, outcome =
-            MLJTest.model_instance(model_type; verbosity)
+            MLJTest.model_instance(model_type; throw, verbosity)
         row = update(row, i, :model_instance, model_instance, outcome)
         outcome == "×" && continue
 
         # fitted_machine:
         fitted_machine, outcome =
-            MLJTest.fitted_machine(model_instance, data...; verbosity)
+            MLJTest.fitted_machine(model_instance, data...; throw, verbosity)
         row = update(row, i, :fitted_machine, fitted_machine, outcome)
         outcome == "×" && continue
 
         # operations:
         operations, outcome =
-            MLJTest.operations(fitted_machine, data...; verbosity)
+            MLJTest.operations(fitted_machine, data...; throw, verbosity)
         # special treatment to get list of operations in `summary`:
         if operations == "×"
             row = update(row, i, :operations, operations, outcome)
@@ -225,6 +238,7 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
             row = update(row, i, :operations, operations, operations)
         end
 
+        level > 2 || continue
         model_instance isa Supervised || continue
 
         # supervised tests:
@@ -236,7 +250,7 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
             scitype(data[2]) <: AbstractVector{<:Finite{2}}
 
             threshold_prediction, outcome =
-                MLJTest.threshold_prediction(model_instance, data...; verbosity)
+                MLJTest.threshold_prediction(model_instance, data...; throw, verbosity)
             row = update(row, i, :threshold_prediction, threshold_prediction, outcome)
             outcome == "×" && continue
         end
@@ -247,27 +261,33 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
 
         # evaluation:
         evaluation, outcome =
-            MLJTest.evaluation(measure, model_instance, data...; verbosity)
+            MLJTest.evaluation(measure, model_instance, data...; throw, verbosity)
         row = update(row, i, :evaluation, evaluation, outcome)
         outcome == "×" && continue
 
         # tuned_pipe_evaluation:
         tuned_pipe_evaluation, outcome =
-            MLJTest.tuned_pipe_evaluation(measure, model_instance, data...; verbosity)
+            MLJTest.tuned_pipe_evaluation(
+                measure,
+                model_instance,
+                data...;
+                throw,
+                verbosity
+            )
         row = update(row, i, :tuned_pipe_evaluation, tuned_pipe_evaluation, outcome)
         outcome == "×" && continue
 
         # ensemble_prediction:
         ensemble_prediction, outcome =
-            MLJTest.ensemble_prediction(model_instance, data...; verbosity)
+            MLJTest.ensemble_prediction(model_instance, data...; throw, verbosity)
         row = update(row, i, :ensemble_prediction, ensemble_prediction, outcome)
         outcome == "×" && continue
 
         isnothing(iteration_parameter(model_instance)) &&  continue
 
         # iteration prediction:
         iteration_prediction, outcome =
-            MLJTest.iteration_prediction(measure, model_instance, data...; verbosity)
+            MLJTest.iteration_prediction(measure, model_instance, data...; throw, verbosity)
         row = update(row, i, :iteration_prediction, iteration_prediction, outcome)
         outcome == "×" && continue
     end