Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add level and throw options to MLJTest.test #7

Merged
merged 7 commits into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJTest"
uuid = "697918b4-fdc1-4f9e-8ff9-929724cee270"
authors = ["Anthony D. Blaom <[email protected]>"]
version = "0.1.0"
version = "0.2.0"

[deps]
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
Expand Down
47 changes: 45 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,51 @@ This package provides a single method for testing a collection of
`models` (types or named tuples with keys `:name` and `:package_name`)
using the specified training `data`:

```julia
MLJTest.test(models, data...; mod=Main, level=2, throw=false, verbosity=1)
-> failures, summary
```
MLJTest.test(models, data...; verbosity=1, mod=Main, loading_only=false) -> failures, summary

For detailed documentation, run `using MLJTest; @doc MLJTest.test`.


# Examples

## Testing models in a new MLJ model interface implementation

The following tests the model interface implemented by some model type
`MyClassifier`, as might appear in tests for a package providing that
type:

```julia
import MLJTest
using Test
X, y = MLJTest.MLJ.make_blobs()
failures, summary = MLJTest.test([MyClassifier, ], X, y, verbosity=1, mod=@__MODULE__)
@test isempty(failures)
```

See the method document string for details.
## Testing models after filtering models in the registry

The following applies comprehensive integration tests to all
regressors provided by the package GLM.jl appearing in the MLJ Model
Registry. Since GLM.jl models are provided through the interface
package `MLJGLMInterface`, this must be in the current environment:

```julia
Pkg.add("MLJGLMInterface")
import MLJBase, MLJTest
using DataFrames # to view summary
X, y = MLJTest.MLJ.make_regression();
regressors = MLJTest.MLJ.models(matching(X, y)) do m
m.package_name == "GLM"
end
failures, summary = MLJTest.test(
regressors,
X,
y,
verbosity=1,
mod=@__MODULE__,
level=3)
summary |> DataFrame
```
48 changes: 24 additions & 24 deletions src/attemptors.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
str(model_metadata) = "$(model_metadata.name) from $(model_metadata.package_name)"

"""
attempt(f, message="")
attempt(f, message; throw=false)

Return `(f(), "✓") if `f()` executes without throwing an
exception. Otherwise, return `(ex, "×"), where `ex` is the exception
thrown.
caught. Only truly throw the exception if `throw=true`.

If `message` is not empty, then it is logged to `Info`, together with
the second return value ("✓" or "×").


"""
function attempt(f, message="")
function attempt(f, message; throw=false)
ret = try
(f(), "✓")
catch ex
throw && Base.throw(ex)
(ex, "×")
end
isempty(message) || @info message*last(ret)
Expand All @@ -39,10 +39,10 @@ end

root(load_path) = split(load_path, '.') |> first

function model_type(proxy, mod; verbosity=1)
function model_type(proxy, mod; throw=false, verbosity=1)
# check interface package really is in current environment:
message = "`[:model_type]` Loading model type "
model_type, outcome = attempt(finalize(message, verbosity)) do
model_type, outcome = attempt(finalize(message, verbosity); throw) do
load_path = MLJTest.load_path(proxy) # MLJ.load_path(proxy) *****
load_path_ex = load_path |> Meta.parse
api_pkg_ex = root(load_path) |> Symbol
Expand All @@ -69,31 +69,31 @@ function model_type(proxy, mod; verbosity=1)
if !isnothing(api_pkg) &&
api_pkg != "unknown" &&
contains(model_type.msg, "$api_pkg not found in")
throw(model_type)
Base.throw(model_type)
end
end

return model_type, outcome
end

function model_instance(model_type; verbosity=1)
function model_instance(model_type; throw=false, verbosity=1)
message = "`[:model_instance]` Instantiating default model "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
model_type()
end
end

function fitted_machine(model, data...; verbosity=1)
function fitted_machine(model, data...; throw=false, verbosity=1)
message = "`[:fitted_machine]` Fitting machine "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
mach = machine(model, data...)
fit!(mach, verbosity=-1)
end
end

function operations(fitted_machine, data...; verbosity=1)
function operations(fitted_machine, data...; throw=false, verbosity=1)
message = "`[:operations]` Calling `predict`, `transform` and/or `inverse_transform` "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
operations = String[]
methods = MLJ.implemented_methods(fitted_machine.model)
if :predict in methods
Expand All @@ -112,30 +112,30 @@ function operations(fitted_machine, data...; verbosity=1)
end
end

function threshold_prediction(model, data...; verbosity=1)
function threshold_prediction(model, data...; throw=false, verbosity=1)
message = "`[:threshold_predictor]` Calling fit!/predict for threshold predictor "*
"test) "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
tmodel = BinaryThresholdPredictor(model)
mach = machine(tmodel, data...)
fit!(mach, verbosity=0)
predict(mach, first(data))
end
end

function evaluation(measure, model, data...; verbosity=1)
function evaluation(measure, model, data...; throw=false, verbosity=1)
message = "`[:evaluation]` Evaluating performance "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
evaluate(model, data...;
measure=measure,
resampling=Holdout(),
verbosity=0)
end
end

function tuned_pipe_evaluation(measure, model, data...; verbosity=1)
function tuned_pipe_evaluation(measure, model, data...; throw=false, verbosity=1)
message = "`[:tuned_pipe_evaluation]` Evaluating perfomance in a tuned pipeline "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
pipe = identity |> model
tuned_pipe = TunedModel(models=[pipe,],
measure=measure)
Expand All @@ -145,8 +145,8 @@ function tuned_pipe_evaluation(measure, model, data...; verbosity=1)
end
end

function ensemble_prediction(model, data...; verbosity=1)
attempt(finalize("`[:ensemble_prediction]` Ensembling ", verbosity)) do
function ensemble_prediction(model, data...; throw=false, verbosity=1)
attempt(finalize("`[:ensemble_prediction]` Ensembling ", verbosity); throw) do
imodel = EnsembleModel(model=model,
n=2)
mach = machine(imodel, data...)
Expand All @@ -155,9 +155,9 @@ function ensemble_prediction(model, data...; verbosity=1)
end
end

function iteration_prediction(measure, model, data...; verbosity=1)
function iteration_prediction(measure, model, data...; throw=false, verbosity=1)
message = "`[:iteration_prediction]` Iterating with controls "
attempt(finalize(message, verbosity)) do
attempt(finalize(message, verbosity); throw) do
imodel = IteratedModel(model=model,
measure=measure,
controls=[Step(1),
Expand Down
82 changes: 51 additions & 31 deletions src/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ function next!(p)
MLJ.ProgressMeter.updateProgress!(p)
end



"""
test(models, data...; verbosity=1, mod=Main, loading_only=false)
test(models, data...; mod=Main, level=2, throw=false, verbosity=1)

Apply a battery of MLJ integration tests to a collection of models,
using `data` for training. Here `mod` should be the module from which
Expand All @@ -30,9 +28,18 @@ using `data` for training. Here `mod` should be the module from which
interface packages providing the models must be in the current
environment, but the packages need not be loaded.

Specify `loading_only=true` to only test model loading, as detailed in
the test called `model_type` below.
The extent of testing is controlled by `level`:

|`level` | description | tests (full list below) |
|:----------------|:---------------------------------|:------------------------|
| 1 | test code loading | `:model_type` |
| 2 (default) | basic test of model interface | first four tests |
| 3 | comprehensive | all applicable tests |

By default, exceptions caught in tests are not thrown. If
`throw=true`, testing will terminate at the first execption
encountered, after throwing that exception (useful to obtain stack
traces).

# Return value

Expand All @@ -43,22 +50,22 @@ Returns `(failures, summary)` where:
- `summary`: table summarizing the outcomes of each test, where
outcomes are indicated as below:

`summary` table entry | interpretation
----------------------|-----------------
| test succesful
× | test unsuccessful
n/a | skipped because not applicable
- | test skipped for some other reason
| entry | interpretation |
|:------|:-----------------------------------|
| | test succesful |
| × | test unsuccessful |
| n/a | skipped because not applicable |
| - | test skipped for some other reason |

# Examples

## Testing models in a new MLJ model interface implementation

The following applies the integration tests to a model type
The following tests the model interface implemented by some model type
`MyClassifier`, as might appear in tests for a package providing that
type:

```
```julia
import MLJTest
using Test
X, y = MLJTest.MLJ.make_blobs()
Expand All @@ -68,24 +75,30 @@ failures, summary = MLJTest.test([MyClassifier, ], X, y, verbosity=1, mod=@__MOD

## Testing models after filtering models in the registry

The following applies integration tests to all regressors provided by
the package GLM.jl that are also in the MLJ Model Registry. Since
GLM.jl models are provided through the interface package
`MLJGLMInterface`, this must be in the current environment:
The following applies comprehensive integration tests to all
regressors provided by the package GLM.jl appearing in the MLJ Model
Registry. Since GLM.jl models are provided through the interface
package `MLJGLMInterface`, this must be in the current environment:

```
```julia
Pkg.add("MLJGLMInterface")
import MLJBase, MLJTest
using DataFrames # to view summary
X, y = MLJTest.MLJ.make_regression();
regressors = MLJTest.MLJ.models(matching(X, y)) do m
m.package_name == "GLM"
end
failures, summary = MLJTest.test(regressors, X, y, verbosity=1, mod=@__MODULE__)
failures, summary = MLJTest.test(
regressors,
X,
y,
verbosity=1,
mod=@__MODULE__,
level=3)
summary |> DataFrame
```

# List of tests applied
# List of tests

Tests are applied in sequence. When a test fails, subsequent tests for
that model are skipped. The following are applied to all models:
Expand Down Expand Up @@ -121,7 +134,7 @@ These additional tests are applied to `Supervised` models:
but first wrap as an `IteratedModel`.

"""
function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
function test(model_proxies, data...; mod=Main, level=2, throw=false, verbosity=1,)

nproxies = length(model_proxies)

Expand Down Expand Up @@ -196,27 +209,27 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
row = merge(row0, (; name, package))

# model_type:
model_type, outcome = MLJTest.model_type(model_proxy, mod; verbosity)
model_type, outcome = MLJTest.model_type(model_proxy, mod; throw, verbosity)
row = update(row, i, :model_type, model_type, outcome)
outcome == "×" && continue

load_only && continue
level > 1 || continue

# model_instance:
model_instance, outcome =
MLJTest.model_instance(model_type; verbosity)
MLJTest.model_instance(model_type; throw, verbosity)
row = update(row, i, :model_instance, model_instance, outcome)
outcome == "×" && continue

# fitted_machine:
fitted_machine, outcome =
MLJTest.fitted_machine(model_instance, data...; verbosity)
MLJTest.fitted_machine(model_instance, data...; throw, verbosity)
row = update(row, i, :fitted_machine, fitted_machine, outcome)
outcome == "×" && continue

# operations:
operations, outcome =
MLJTest.operations(fitted_machine, data...; verbosity)
MLJTest.operations(fitted_machine, data...; throw, verbosity)
# special treatment to get list of operations in `summary`:
if operations == "×"
row = update(row, i, :operations, operations, outcome)
Expand All @@ -225,6 +238,7 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
row = update(row, i, :operations, operations, operations)
end

level > 2 || continue
model_instance isa Supervised || continue

# supervised tests:
Expand All @@ -236,7 +250,7 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)
scitype(data[2]) <: AbstractVector{<:Finite{2}}

threshold_prediction, outcome =
MLJTest.threshold_prediction(model_instance, data...; verbosity)
MLJTest.threshold_prediction(model_instance, data...; throw, verbosity)
row = update(row, i, :threshold_prediction, threshold_prediction, outcome)
outcome == "×" && continue
end
Expand All @@ -247,27 +261,33 @@ function test(model_proxies, data...; verbosity=1, mod=Main, load_only=false)

# evaluation:
evaluation, outcome =
MLJTest.evaluation(measure, model_instance, data...; verbosity)
MLJTest.evaluation(measure, model_instance, data...; throw, verbosity)
row = update(row, i, :evaluation, evaluation, outcome)
outcome == "×" && continue

# tuned_pipe_evaluation:
tuned_pipe_evaluation, outcome =
MLJTest.tuned_pipe_evaluation(measure, model_instance, data...; verbosity)
MLJTest.tuned_pipe_evaluation(
measure,
model_instance,
data...;
throw,
verbosity
)
row = update(row, i, :tuned_pipe_evaluation, tuned_pipe_evaluation, outcome)
outcome == "×" && continue

# ensemble_prediction:
ensemble_prediction, outcome =
MLJTest.ensemble_prediction(model_instance, data...; verbosity)
MLJTest.ensemble_prediction(model_instance, data...; throw, verbosity)
row = update(row, i, :ensemble_prediction, ensemble_prediction, outcome)
outcome == "×" && continue

isnothing(iteration_parameter(model_instance)) && continue

# iteration prediction:
iteration_prediction, outcome =
MLJTest.iteration_prediction(measure, model_instance, data...; verbosity)
MLJTest.iteration_prediction(measure, model_instance, data...; throw, verbosity)
row = update(row, i, :iteration_prediction, iteration_prediction, outcome)
outcome == "×" && continue
end
Expand Down
Loading