From 36555f53680a19268c130d82f25a3e0ffb5fd58e Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 19 Sep 2022 13:21:09 +1200 Subject: [PATCH 1/2] restrict ensemble testing to exclude Count targets, as unsupported --- src/test.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/test.jl b/src/test.jl index 68058f2..ed1ad0b 100644 --- a/src/test.jl +++ b/src/test.jl @@ -9,6 +9,8 @@ function next!(p) MLJ.ProgressMeter.updateProgress!(p) end +const ENSEMBLE_TARGET_ELSCITYPE = Union{Missing, Continuous, Finite} + """ test(models, data...; mod=Main, level=2, throw=false, verbosity=1) @@ -382,15 +384,16 @@ function test(model_proxies, data...; mod=Main, level=2, throw=false, verbosity= outcome == "×" && continue #[ensemble_prediction]: - ensemble_prediction, outcome = - MLJTestIntegration.ensemble_prediction( + if target_scitype(model_type) <: AbstractVector{<:ENSEMBLE_TARGET_ELSCITYPE} + ensemble_prediction, outcome = MLJTestIntegration.ensemble_prediction( model_instance, data...; throw, verbosity, ) - row = update(row, i, :ensemble_prediction, ensemble_prediction, outcome) - outcome == "×" && continue + row = update(row, i, :ensemble_prediction, ensemble_prediction, outcome) + outcome == "×" && continue + end # [iteration_prediction]: if !isnothing(iteration_parameter(model_instance)) From 7c27a12a783c49ab5e963e08d9d74ee24aea9dc3 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 19 Sep 2022 13:27:25 +1200 Subject: [PATCH 2/2] rename dataset generators and make public --- README.md | 21 ++++++++++--- src/special_cases.jl | 69 ++++++++++++++++++++++++++++++++----------- test/special_cases.jl | 2 +- 3 files changed, 69 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 083a292..8ed3ac5 100644 --- a/README.md +++ b/README.md @@ -41,14 +41,13 @@ Query the document strings for details, or see ## Testing models in a new MLJ model interface implementation -The following tests the model interface implemented by some model type -`MyClassifier`, as might appear in tests for a package providing that -type: +The following tests the model interface implemented by some model type `MyClassifier` for +multiclass classification, as might appear in tests for a package providing that type: ```julia import MLJTestIntegration using Test -X, y = MLJTestIntegration.MLJ.make_blobs() +X, y = MLJTestIntegration.make_multiclass() failures, summary = MLJTestIntegration.test([MyClassifier, ], X, y, verbosity=1, mod=@__MODULE__) @test isempty(failures) ``` @@ -78,3 +77,17 @@ failures, summary = summary |> DataFrame ``` + +# Datasets + +The following commands generate datasets of the form `(X, y)` suitable for integration +tests: + +- `MLJTestIntegration.make_binary` + +- `MLJTestIntegration.make_multiclass` + +- `MLJTestIntegration.make_regression` + +- `MLJTestIntegration.make_count` + diff --git a/src/special_cases.jl b/src/special_cases.jl index 4a44bf0..35e8d86 100644 --- a/src/special_cases.jl +++ b/src/special_cases.jl @@ -39,15 +39,60 @@ end _test(data; ignore=true, kwargs...) = _test([], data; ignore, kwargs...) -# # SINGLE TARGET CLASSIFICATION +# # BABY DATA SETS + +""" + make_binary() -function _make_binary() +Return data `(X, y)` for the crabs dataset, restricted to the two features `:FL`, +`:RW`. Target is `Multiclass{2}`. + +""" +function make_binary() data = MLJ.load_crabs() y_, X = unpack(data, ==(:sp), col->col in [:FL, :RW]) y = coerce(y_, MLJ.OrderedFactor) return X, y end +""" + make_multiclass() + +Return data `(X, y)` for the unshuffled iris dataset. Target is `Multiclass{3}`. + +""" +make_multiclass() = MLJ.@load_iris + +""" + make_regression() + +Return data `(X, y)` for the Boston dataset, restricted to the two features `:LStat`, +`:Rm`. Target is `Continuous`. + +""" +function make_regression() + data = MLJ.load_boston() + y, X = unpack(data, ==(:MedV), col->col in [:LStat, :Rm]) + return X, y +end + +""" + make_regression() + +Return data `(X, y)` for the Boston dataset, restricted to the two features `:LStat`, +`:Rm`, with the `Continuous` target converted to `Count` (integer). + +""" +function make_count() + X, y_ = make_regression() + y = map(η -> round(Int, η), y_) + return X, y +end + + +# # SINGLE TARGET CLASSIFICATION + + """ MLJTestIntegration.test_single_target_classifiers(; keyword_options...) @@ -62,17 +107,11 @@ $DOC_AS_ABOVE """ test_single_target_classifiers(args...; kwargs...) = - _test(args..., _make_binary(); kwargs...) + _test(args..., make_binary(); kwargs...) # # SINGLE TARGET REGRESSION -function _make_baby_boston() - data = MLJ.load_boston() - y, X = unpack(data, ==(:MedV), col->col in [:LStat, :Rm]) - return X, y -end - """ MLJTestIntegration.test_single_target_regressors(; keyword_options...) @@ -87,17 +126,11 @@ $DOC_AS_ABOVE """ test_single_target_regressors(args...; kwargs...) = - _test(args..., _make_baby_boston(); kwargs...) + _test(args..., make_regression(); kwargs...) # # SINGLE TARGET COUNT REGRESSORS -function _make_count() - X, y_ = _make_baby_boston() - y = map(η -> round(Int, η), y_) - return X, y -end - """ MLJTestIntegration.test_single_count_regressors(; keyword_options...) @@ -114,12 +147,12 @@ $DOC_AS_ABOVE """ test_single_target_count_regressors(args...; kwargs...) = - _test(args..., _make_count(); kwargs...) + _test(args..., make_count(); kwargs...) # # CONTINUOUS TABLE TRANSFORMERS -_make_transformer() = (first(_make_baby_boston()),) +_make_transformer() = (first(make_regression()),) """ test_continuous_table_transformers(; keyword_options...) diff --git a/test/special_cases.jl b/test/special_cases.jl index 9f2b895..ed613bf 100644 --- a/test/special_cases.jl +++ b/test/special_cases.jl @@ -9,7 +9,7 @@ regressors = [ ] @testset "actual_proxies" begin - data = MTI._make_baby_boston() + data = MTI.make_regression() proxies = @test_logs MTI.actual_proxies(regressors, data, false, 1) @test proxies == regressors proxies2 = @test_logs MTI.actual_proxies(regressors, data, true, 1)