From 640c285ad48a34edfb56591c13b817a33e3dd5db Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 13 Dec 2021 11:34:39 +1300 Subject: [PATCH 1/6] address fact that UnivariateFinite has moved out of MLJBase --- Project.toml | 13 +- src/MLJEnsembles.jl | 16 ++ src/ensembles.jl | 281 +++++++++++++------------------ test/_models.jl | 16 ++ test/_models/Constant.jl | 206 ++++++++++++++++++++++ test/_models/NearestNeighbors.jl | 170 +++++++++++++++++++ test/ensembles.jl | 58 +++---- test/runtests.jl | 2 + 8 files changed, 565 insertions(+), 197 deletions(-) create mode 100644 test/_models.jl create mode 100644 test/_models/Constant.jl create mode 100644 test/_models/NearestNeighbors.jl diff --git a/Project.toml b/Project.toml index 2b2487a..715723d 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.1.2" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" +CategoricalDistributions = "af321ab8-2d2e-40a6-b165-3d674595d28e" ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -12,25 +13,25 @@ MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" +ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] CategoricalArrays = "0.8, 0.9, 0.10" +CategoricalDistributions = "0.1" ComputationalResources = "0.3" Distributions = "0.21, 0.22, 0.23, 0.24, 0.25" -MLJBase = "0.18" MLJModelInterface = "0.4.1, 1.1" ProgressMeter = "1.1" -ScientificTypes = "2" +ScientificTypesBase = "2" StatsBase = "0.32, 0.33" julia = "1.1" [extras] -MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36" +Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["MLJModels", "NearestNeighborModels", "StableRNGs", "Test"] +test = ["Distances", "NearestNeighbors", "StableRNGs", "Test"] diff --git a/src/MLJEnsembles.jl b/src/MLJEnsembles.jl index fde68af..1d61f73 100644 --- a/src/MLJEnsembles.jl +++ b/src/MLJEnsembles.jl @@ -1,5 +1,21 @@ module MLJEnsembles +using MLJModelInterface +import MLJModelInterface: predict, fit +import MLJBase # still needed for aggregating measures in oob-estimates of error +using Random +using CategoricalArrays +using CategoricalDistributions +using ComputationalResources +using Distributed +import Distributions +using ProgressMeter +import StatsBase + +export EnsembleModel + +const MMI = MLJModelInterface + include("ensembles.jl") end # module diff --git a/src/ensembles.jl b/src/ensembles.jl index aae9de0..d955ce0 100644 --- a/src/ensembles.jl +++ b/src/ensembles.jl @@ -1,18 +1,4 @@ -using MLJModelInterface -import MLJModelInterface: predict, fit -using Random -using CategoricalArrays -using ComputationalResources -using MLJBase -using Distributed -import Distributions -using ScientificTypes: Continuous -using ProgressMeter -import StatsBase - -export EnsembleModel - -## ENSEMBLES OF FITRESULTS +# # ENSEMBLES OF FITRESULTS # Atom is atomic model type, eg, DecisionTree # R will be the tightest type of the atom fit-results. @@ -95,14 +81,10 @@ function predict(wens::WrappedEnsemble, atomic_weights, Xnew, # TODO: make this more memory efficient but note that the type of # Xnew is unknown (ie, model dependent): # a matrix of probability distributions: - preds_gen = (predict(atom, fitresult, Xnew) for fitresult in ensemble) - predictions = hcat(preds_gen...) - n_rows = size(predictions, 1) + predictions = [predict(atom, fitresult, Xnew) for fitresult in ensemble] # the weighted averages over the ensemble of the discrete pdf's: - predictions = [average([predictions[i, k] for k in 1:n_atoms], weights=atomic_weights) for i in 1:n_rows] - - return predictions + return atomic_weights .* predictions |> sum end function predict(wens::WrappedEnsemble, atomic_weights, Xnew, @@ -119,22 +101,27 @@ function predict(wens::WrappedEnsemble, atomic_weights, Xnew, preds_gen = (predict(atom, fitresult, Xnew) for fitresult in ensemble) predictions = hcat(preds_gen...) + # TODO: return normal distributions in special case of normal predictions # n_rows = size(predictions, 1) - # # the weighted average over the ensemble of the pdf means and pdf variances: - # μs = [sum([atomic_weights[k]*mean(predictions[i,k]) for k in 1:n_atoms]) for i in 1:n_rows] - # σ2s = [sum([atomic_weights[k]*var(predictions[i,k]) for k in 1:n_atoms]) for i in 1:n_rows] + # # the weighted average over the ensemble of the pdf + # # means and pdf variances: + # μs = [sum([atomic_weights[k]*mean(predictions[i,k]) + # for k in 1:n_atoms]) for i in 1:n_rows] + # σ2s = [sum([atomic_weights[k]*var(predictions[i,k]) + # for k in 1:n_atoms]) for i in 1:n_rows] # # a vector of normal probability distributions: # prediction = [Distributions.Normal(μs[i], sqrt(σ2s[i])) for i in 1:n_rows] - prediction = [Distributions.MixtureModel(predictions[i,:], atomic_weights) for i in 1:size(predictions, 1)] + prediction = [Distributions.MixtureModel(predictions[i,:], atomic_weights) + for i in 1:size(predictions, 1)] return prediction end -## CORE ENSEMBLE-BUILDING FUNCTIONS +# # CORE ENSEMBLE-BUILDING FUNCTIONS # for when out-of-bag performance estimates are requested: function get_ensemble_and_indices(atom::Supervised, verbosity, n, n_patterns, @@ -192,10 +179,10 @@ _reducer(p::Tuple, q::Tuple) = (vcat(p[1], q[1]), vcat(p[2], q[2])) -## ENSEMBLE MODEL FOR DETERMINISTIC MODELS +# # ENSEMBLE MODEL TYPES mutable struct DeterministicEnsembleModel{Atom<:Deterministic} <: Deterministic - atom::Atom + model::Atom atomic_weights::Vector{Float64} bagging_fraction::Float64 rng::Union{Int,AbstractRNG} @@ -204,63 +191,8 @@ mutable struct DeterministicEnsembleModel{Atom<:Deterministic} <: Deterministic out_of_bag_measure # TODO: type this end -function clean!(model::DeterministicEnsembleModel) - - target_scitype(model.atom) <: Union{AbstractVector{<:Finite}, AbstractVector{<:Continuous}} || - error("`atom` has unsupported target_scitype "* - "`$(target_scitype(model.atom))`. ") - - message = "" - - if model.bagging_fraction > 1 || model.bagging_fraction <= 0 - message = message*"`bagging_fraction` should be "* - "in the range (0,1]. Reset to 1. " - model.bagging_fraction = 1.0 - end - - if target_scitype(model.atom) <: AbstractVector{<:Finite} && !isempty(model.atomic_weights) - message = message*"atomic_weights will be ignored to form predictions. " - elseif !isempty(model.atomic_weights) - total = sum(model.atomic_weights) - if !(total ≈ 1.0) - message = message*"atomic_weights should sum to one and are being automatically normalized. " - model.atomic_weights = model.atomic_weights/total - end - end - - return message - -end - -# constructor to infer type automatically: -DeterministicEnsembleModel(atom::Atom, atomic_weights, - bagging_fraction, rng, n, acceleration, out_of_bag_measure) where Atom<:Deterministic = - DeterministicEnsembleModel{Atom}(atom, atomic_weights, - bagging_fraction, rng, n, acceleration, out_of_bag_measure) - -# lazy keyword constructors: -function DeterministicEnsembleModel(;atom=DeterministicConstantClassifier(), - atomic_weights=Float64[], - bagging_fraction=0.8, - rng=Random.GLOBAL_RNG, - n::Int=100, - acceleration=default_resource(), - out_of_bag_measure=[]) - - model = DeterministicEnsembleModel(atom, atomic_weights, bagging_fraction, rng, - n, acceleration, out_of_bag_measure) - - message = clean!(model) - isempty(message) || @warn message - - return model -end - - -## ENSEMBLE MODEL FOR PROBABILISTIC MODELS - mutable struct ProbabilisticEnsembleModel{Atom<:Probabilistic} <: Probabilistic - atom::Atom + model::Atom atomic_weights::Vector{Float64} bagging_fraction::Float64 rng::Union{Int, AbstractRNG} @@ -269,7 +201,18 @@ mutable struct ProbabilisticEnsembleModel{Atom<:Probabilistic} <: Probabilistic out_of_bag_measure end -function clean!(model::ProbabilisticEnsembleModel) +const EitherEnsembleModel{Atom} = + Union{DeterministicEnsembleModel{Atom}, ProbabilisticEnsembleModel{Atom}} + +function clean!(model::EitherEnsembleModel) + + if model isa DeterministicEnsembleModel + + ok_target = target_scitype(model.model) <: + Union{AbstractVector{<:Finite},AbstractVector{<:Continuous}} + ok_target || error("atomic model has unsupported target_scitype "* + "`$(target_scitype(model.model))`. ") + end message = "" @@ -279,10 +222,17 @@ function clean!(model::ProbabilisticEnsembleModel) model.bagging_fraction = 1.0 end - if !isempty(model.atomic_weights) + isempty(model.atomic_weights) && return message + + if model isa Deterministic && + target_scitype(model.model) <: AbstractVector{<:Finite} + message = message*"`atomic_weights` will be ignored to "* + "form predictions, as unsupported for `Finite` targets. " + else total = sum(model.atomic_weights) if !(total ≈ 1.0) - message = message*"atomic_weights should sum to one and are being automatically normalized. " + message = message*"atomic_weights should sum to one and are being "* + "replaced by normalized weights. " model.atomic_weights = model.atomic_weights/total end end @@ -291,37 +241,23 @@ function clean!(model::ProbabilisticEnsembleModel) end -# constructor to infer type automatically: -ProbabilisticEnsembleModel(atom::Atom, atomic_weights, bagging_fraction, rng, n, acceleration, out_of_bag_measure) where Atom<:Probabilistic = - ProbabilisticEnsembleModel{Atom}(atom, atomic_weights, bagging_fraction, rng, n, acceleration, out_of_bag_measure) - -# lazy keyword constructor: -function ProbabilisticEnsembleModel(;atom=ConstantProbabilisticClassifier(), - atomic_weights=Float64[], - bagging_fraction=0.8, - rng=Random.GLOBAL_RNG, - n::Int=100, - acceleration=default_resource(), - out_of_bag_measure=[]) - - model = ProbabilisticEnsembleModel(atom, atomic_weights, bagging_fraction, rng, n, acceleration, out_of_bag_measure) - - message = clean!(model) - isempty(message) || @warn message - return model -end +# # USER-FACING CONSTRUCTOR +const ERR_MODEL_UNSPECIFIED = ArgumentError( +"Expecting atomic model as argument. None specified. Use "* + "`EnsembleModel(model=...)`. ") +const ERR_TOO_MANY_ARGUMENTS = ArgumentError( + "At most one non-keyword argument, a model, allowed. ") -## COMMON CONSTRUCTOR """ - EnsembleModel(atom=nothing, + EnsembleModel(model, atomic_weights=Float64[], bagging_fraction=0.8, n=100, rng=GLOBAL_RNG, - acceleration=default_resource(), + acceleration=CPU1(), out_of_bag_measure=[]) Create a model for training an ensemble of `n` learners, with optional @@ -372,25 +308,49 @@ measures specified in `out_of_bag_measure` that support sample weights. """ -function EnsembleModel(; args...) - d = Dict(args) - :atom in keys(d) || - error("No atomic model specified. Use EnsembleModel(atom=...)") - if d[:atom] isa Deterministic - return DeterministicEnsembleModel(; d...) - elseif d[:atom] isa Probabilistic - return ProbabilisticEnsembleModel(; d...) +function EnsembleModel(args...; + model=nothing, + atomic_weights=Float64[], + bagging_fraction=0.8, + rng=Random.GLOBAL_RNG, + n::Int=100, + acceleration=CPU1(), + out_of_bag_measure=[]) + + length(args) < 2 || throw(ERR_TOO_MANY_ARGUMENTS) + if length(args) === 1 + atom = first(args) + model === nothing || + @warn "Using `model=$atom`. Ignoring specification "* + "`model=$model`. " + else + model === nothing && throw(ERR_MODEL_UNSPECIFIED) + atom = model end - error("$(d[:atom]) does not appear to be a Supervised model.") -end + arguments = (atom, + atomic_weights, + float(bagging_fraction), + rng, + n, + acceleration, + out_of_bag_measure) + + if atom isa Deterministic + emodel = DeterministicEnsembleModel(arguments...) + elseif atom isa Probabilistic + emodel = ProbabilisticEnsembleModel(arguments...) + else + error("$atom does not appear to be a Supervised model.") + end -## THE COMMON FIT AND PREDICT METHODS + message = clean!(emodel) + isempty(message) || @warn message + return emodel +end -const EitherEnsembleModel{Atom} = - Union{DeterministicEnsembleModel{Atom}, ProbabilisticEnsembleModel{Atom}} -MLJBase.is_wrapper(::Type{<:EitherEnsembleModel}) = true +# # THE COMMON FIT AND PREDICT METHODS function _fit(res::CPU1, func, verbosity, stuff) atom, n, n_patterns, n_train, rng, progress_meter, args = stuff @@ -438,7 +398,7 @@ end end end -function fit(model::EitherEnsembleModel{Atom}, +function MMI.fit(model::EitherEnsembleModel{Atom}, verbosity::Int, args...) where Atom<:Supervised X = args[1] @@ -466,7 +426,7 @@ function fit(model::EitherEnsembleModel{Atom}, rng = model.rng end - atom = model.atom + atom = model.model n = model.n n_patterns = nrows(y) n_train = round(Int, floor(model.bagging_fraction*n_patterns)) @@ -487,7 +447,7 @@ function fit(model::EitherEnsembleModel{Atom}, end - fitresult = WrappedEnsemble(model.atom, ensemble) + fitresult = WrappedEnsemble(model.model, ensemble) if !isempty(out_of_bag_measure) @@ -511,10 +471,15 @@ function fit(model::EitherEnsembleModel{Atom}, end for k in eachindex(out_of_bag_measure) m = out_of_bag_measure[k] - if reports_each_observation(m) - s = aggregate(value(m, yhat, Xtest, ytest, wtest), m) + if MMI.reports_each_observation(m) + s = MLJBase.aggregate(MLJBase.value(m, + yhat, + Xtest, + ytest, + wtest), + m) else - s = value(m, yhat, Xtest, ytest, wtest) + s = MLJBase.value(m, yhat, Xtest, ytest, wtest) end metrics[i,k] = s end @@ -522,7 +487,7 @@ function fit(model::EitherEnsembleModel{Atom}, # aggregate metrics across the ensembles: aggregated_metrics = map(eachindex(out_of_bag_measure)) do k - aggregate(metrics[:,k], out_of_bag_measure[k]) + MLJBase.aggregate(metrics[:,k], out_of_bag_measure[k]) end names = Symbol.(string.(out_of_bag_measure)) @@ -540,12 +505,12 @@ end # if n is only parameter that changes, we just append to the existing # ensemble, or truncate it: -function update(model::EitherEnsembleModel, - verbosity::Int, fitresult, old_model, args...) +function MMI.update(model::EitherEnsembleModel, + verbosity::Int, fitresult, old_model, args...) n = model.n - if MLJBase.is_same_except(model.atom, old_model.atom, + if MLJBase.is_same_except(model.model, old_model.model, :n, :atomic_weights, :acceleration) if n > old_model.n verbosity < 1 || @@ -568,7 +533,7 @@ function update(model::EitherEnsembleModel, end -function predict(model::EitherEnsembleModel, fitresult, Xnew) +function MMI.predict(model::EitherEnsembleModel, fitresult, Xnew) n = model.n if isempty(model.atomic_weights) @@ -581,35 +546,27 @@ function predict(model::EitherEnsembleModel, fitresult, Xnew) predict(fitresult, atomic_weights, Xnew) end -## METADATA -# Note: input and target traits are inherited from atom +# # METADATA -MLJBase.supports_weights(::Type{<:EitherEnsembleModel{Atom}}) where Atom = - MLJBase.supports_weights(Atom) +# Note: input and target traits are inherited from atom -MLJBase.load_path(::Type{<:DeterministicEnsembleModel}) = - "MLJ.DeterministicEnsembleModel" -MLJBase.package_name(::Type{<:DeterministicEnsembleModel}) = "MLJ" -MLJBase.package_uuid(::Type{<:DeterministicEnsembleModel}) = "" -MLJBase.package_url(::Type{<:DeterministicEnsembleModel}) = - "https://github.com/alan-turing-institute/MLJ.jl" -MLJBase.is_pure_julia(::Type{<:DeterministicEnsembleModel{Atom}}) where Atom = - MLJBase.is_pure_julia(Atom) -MLJBase.input_scitype(::Type{<:DeterministicEnsembleModel{Atom}}) where Atom = - MLJBase.input_scitype(Atom) -MLJBase.target_scitype(::Type{<:DeterministicEnsembleModel{Atom}}) where Atom = - MLJBase.target_scitype(Atom) - -MLJBase.load_path(::Type{<:ProbabilisticEnsembleModel}) = +MMI.load_path(::Type{<:ProbabilisticEnsembleModel}) = "MLJ.ProbabilisticEnsembleModel" -MLJBase.package_name(::Type{<:ProbabilisticEnsembleModel}) = "MLJ" -MLJBase.package_uuid(::Type{<:ProbabilisticEnsembleModel}) = "" -MLJBase.package_url(::Type{<:ProbabilisticEnsembleModel}) = - "https://github.com/alan-turing-institute/MLJ.jl" -MLJBase.is_pure_julia(::Type{<:ProbabilisticEnsembleModel{Atom}}) where Atom = - MLJBase.is_pure_julia(Atom) -MLJBase.input_scitype(::Type{<:ProbabilisticEnsembleModel{Atom}}) where Atom = - MLJBase.input_scitype(Atom) -MLJBase.target_scitype(::Type{<:ProbabilisticEnsembleModel{Atom}}) where Atom = - MLJBase.target_scitype(Atom) +MMI.load_path(::Type{<:DeterministicEnsembleModel}) = + "MLJ.DeterministicEnsembleModel" + +MMI.is_wrapper(::Type{<:EitherEnsembleModel}) = true +MMI.supports_weights(::Type{<:EitherEnsembleModel{Atom}}) where Atom = + MMI.supports_weights(Atom) +MMI.package_name(::Type{<:EitherEnsembleModel}) = "MLJEnsembles" +MMI.package_uuid(::Type{<:EitherEnsembleModel}) = + "50ed68f4-41fd-4504-931a-ed422449fee0" +MMI.package_url(::Type{<:EitherEnsembleModel}) = + "https://github.com/JuliaAI/MLJEnsembles.jl" +MMI.is_pure_julia(::Type{<:EitherEnsembleModel{Atom}}) where Atom = + MMI.is_pure_julia(Atom) +MMI.input_scitype(::Type{<:EitherEnsembleModel{Atom}}) where Atom = + MMI.input_scitype(Atom) +MMI.target_scitype(::Type{<:EitherEnsembleModel{Atom}}) where Atom = + MMI.target_scitype(Atom) diff --git a/test/_models.jl b/test/_models.jl new file mode 100644 index 0000000..5943c26 --- /dev/null +++ b/test/_models.jl @@ -0,0 +1,16 @@ +# If adding models from MLJModels for testing purposes, then do the +# following in the interface file (eg, DecisionTree.jl): + +# - change `import ..DecisionTree` to `import DecisionTree` +# - remove wrapping as module + +module Models + +using MLJBase +import MLJModelInterface: @mlj_model, metadata_model, metadata_pkg +import MLJModelInterface + +include("_models/Constant.jl") +include("_models/NearestNeighbors.jl") + +end diff --git a/test/_models/Constant.jl b/test/_models/Constant.jl new file mode 100644 index 0000000..d48c251 --- /dev/null +++ b/test/_models/Constant.jl @@ -0,0 +1,206 @@ +## THE CONSTANT REGRESSOR + +const MMI = MLJModelInterface +export ConstantClassifier, ConstantRegressor, + DeterministicConstantRegressor, + DeterministicConstantClassifier, + ProbabilisticConstantClassifer + +import Distributions + +""" +ConstantRegressor(; distribution_type=Distributions.Normal) + +A regressor that, for any new input pattern, predicts the univariate +probability distribution best fitting the training target data. Use +`predict_mean` to predict the mean value instead. +""" +struct ConstantRegressor{D} <: MMI.Probabilistic end + +function ConstantRegressor(; distribution_type=Distributions.Normal) + model = ConstantRegressor{distribution_type}() + message = clean!(model) + isempty(message) || @warn message + return model +end + +function MMI.clean!(model::ConstantRegressor{D}) where D + message = "" + D <: Distributions.Sampleable || + error("$model.distribution_type is not a valid distribution_type.") + return message +end + +MMI.reformat(::ConstantRegressor, X) = (MMI.matrix(X),) +MMI.reformat(::ConstantRegressor, X, y) = (MMI.matrix(X), y) +MMI.selectrows(::ConstantRegressor, I, A) = (view(A, I, :),) +MMI.selectrows(::ConstantRegressor, I, A, y) = (view(A, I, :), y[I]) + +function MMI.fit(::ConstantRegressor{D}, verbosity::Int, A, y) where D + fitresult = Distributions.fit(D, y) + cache = nothing + report = NamedTuple() + return fitresult, cache, report +end + +MMI.fitted_params(::ConstantRegressor, fitresult) = + (target_distribution=fitresult,) + +MMI.predict(::ConstantRegressor, fitresult, Xnew) = + fill(fitresult, nrows(Xnew)) + +## +## THE CONSTANT DETERMINISTIC REGRESSOR (FOR TESTING) +## + +struct DeterministicConstantRegressor <: MMI.Deterministic end + +function MMI.fit(::DeterministicConstantRegressor, verbosity::Int, X, y) + fitresult = mean(y) + cache = nothing + report = NamedTuple() + return fitresult, cache, report +end + +MMI.reformat(::DeterministicConstantRegressor, X) = (MMI.matrix(X),) +MMI.reformat(::DeterministicConstantRegressor, X, y) = (MMI.matrix(X), y) +MMI.selectrows(::DeterministicConstantRegressor, I, A) = (view(A, I, :),) +MMI.selectrows(::DeterministicConstantRegressor, I, A, y) = + (view(A, I, :), y[I]) + +MMI.predict(::DeterministicConstantRegressor, fitresult, Xnew) = + fill(fitresult, nrows(Xnew)) + +## +## THE CONSTANT CLASSIFIER +## + +""" +ConstantClassifier() + +A classifier that, for any new input pattern, `predict`s the +`UnivariateFinite` probability distribution `d` best fitting the +training target data. So, `pdf(d, level)` is the proportion of levels +in the training data coinciding with `level`. Use `predict_mode` to +obtain the training target mode instead. +""" +mutable struct ConstantClassifier <: MMI.Probabilistic + testing::Bool + bogus::Int +end + +ConstantClassifier(; testing=false, bogus=0) = + ConstantClassifier(testing, bogus) + +function MMI.reformat(model::ConstantClassifier, X) + model.testing && @info "reformatting X" + return (MMI.matrix(X),) +end + +function MMI.reformat(model::ConstantClassifier, X, y) + model.testing && @info "reformatting X, y" + return (MMI.matrix(X), y) +end + +function MMI.reformat(model::ConstantClassifier, X, y, w) + model.testing && @info "reformatting X, y, w" + return (MMI.matrix(X), y, w) +end + +function MMI.selectrows(model::ConstantClassifier, I, A) + model.testing && @info "resampling X" + return (view(A, I, :),) +end + +function MMI.selectrows(model::ConstantClassifier, I, A, y) + model.testing && @info "resampling X, y" + return (view(A, I, :), y[I]) +end + +function MMI.selectrows(model::ConstantClassifier, I, A, y, ::Nothing) + model.testing && @info "resampling X, y, nothing" + return (view(A, I, :), y[I], nothing) +end + +function MMI.selectrows(model::ConstantClassifier, I, A, y, w) + model.testing && @info "resampling X, y, nothing" + return (view(A, I, :), y[I], w[I]) +end + +# here `args` is `y` or `y, w`: +function MMI.fit(::ConstantClassifier, verbosity::Int, A, y, w=nothing) + fitresult = Distributions.fit(MLJBase.UnivariateFinite, y, w) + cache = nothing + report = NamedTuple + return fitresult, cache, report +end + +MMI.fitted_params(::ConstantClassifier, fitresult) = + (target_distribution=fitresult,) + +MMI.predict(::ConstantClassifier, fitresult, Xnew) = + fill(fitresult, nrows(Xnew)) + +## +## DETERMINISTIC CONSTANT CLASSIFIER (FOR TESTING) +## + +struct DeterministicConstantClassifier <: MMI.Deterministic end + +function MMI.fit(::DeterministicConstantClassifier, verbosity::Int, X, y) + # dump missing target values and make into a regular array: + fitresult = mode(skipmissing(y) |> collect) # a CategoricalValue + cache = nothing + report = NamedTuple() + return fitresult, cache, report +end + +MMI.reformat(::DeterministicConstantClassifier, X) = (MMI.matrix(X),) +MMI.reformat(::DeterministicConstantClassifier, X, y) = (MMI.matrix(X), y) +MMI.selectrows(::DeterministicConstantClassifier, I, A) = (view(A, I, :),) +MMI.selectrows(::DeterministicConstantClassifier, I, A, y) = + (view(A, I, :), y[I]) + +MMI.predict(::DeterministicConstantClassifier, fitresult, Xnew) = + fill(fitresult, nrows(Xnew)) + +# +# METADATA +# + +metadata_pkg.((ConstantRegressor, ConstantClassifier, + DeterministicConstantRegressor, DeterministicConstantClassifier), + name="MLJModels", + uuid="d491faf4-2d78-11e9-2867-c94bc002c0b7", + url="https://github.com/alan-turing-institute/MLJModels.jl", + julia=true, + license="MIT", + is_wrapper=false) + +metadata_model(ConstantRegressor, + input=MMI.Table, + target=AbstractVector{MMI.Continuous}, + weights=false, + descr="Constant regressor (Probabilistic).", + path="MLJModels.ConstantRegressor") + +metadata_model(DeterministicConstantRegressor, + input=MMI.Table, + target=AbstractVector{MMI.Continuous}, + weights=false, + descr="Constant regressor (Deterministic).", + path="MLJModels.DeterministicConstantRegressor") + +metadata_model(ConstantClassifier, + input=MMI.Table, + target=AbstractVector{<:MMI.Finite}, + weights=true, + descr="Constant classifier (Probabilistic).", + path="MLJModels.ConstantClassifier") + +metadata_model(DeterministicConstantClassifier, + input=MMI.Table, + target=AbstractVector{<:MMI.Finite}, + weights=false, + descr="Constant classifier (Deterministic).", + path="MLJModels.DeterministicConstantClassifier") diff --git a/test/_models/NearestNeighbors.jl b/test/_models/NearestNeighbors.jl new file mode 100644 index 0000000..277a564 --- /dev/null +++ b/test/_models/NearestNeighbors.jl @@ -0,0 +1,170 @@ +export KNNRegressor, KNNClassifier + +using Distances + +import NearestNeighbors + +const NN = NearestNeighbors + +const KNNRegressorDescription = + """ + K-Nearest Neighbors regressor: predicts the response associated with a new point + by taking an average of the response of the K-nearest points. + """ + +const KNNClassifierDescription = + """ + K-Nearest Neighbors classifier: predicts the class associated with a new point + by taking a vote over the classes of the K-nearest points. + """ + +const KNNFields = + """ + ## Keywords + + * `K=5` : number of neighbors + * `algorithm=:kdtree` : one of `(:kdtree, :brutetree, :balltree)` + * `metric=Euclidean()` : a `Metric` object for the distance between points + * `leafsize=10` : at what number of points to stop splitting the tree + * `reorder=true` : if true puts points close in distance close in memory + * `weights=:uniform` : one of `(:uniform, :distance)` if `:uniform` all neighbors are + considered as equally important, if `:distance`, closer neighbors + are proportionally more important. + + See also the [package documentation](https://github.com/KristofferC/NearestNeighbors.jl). + """ + +""" +KNNRegressoor(;kwargs...) + +$KNNRegressorDescription + +$KNNFields +""" +@mlj_model mutable struct KNNRegressor <: MLJBase.Deterministic + K::Int = 5::(_ > 0) + algorithm::Symbol = :kdtree::(_ in (:kdtree, :brutetree, :balltree)) + metric::Metric = Euclidean() + leafsize::Int = 10::(_ ≥ 0) + reorder::Bool = true + weights::Symbol = :uniform::(_ in (:uniform, :distance)) +end + +""" +KNNRegressor(;kwargs...) + +$KNNClassifierDescription + +$KNNFields +""" +@mlj_model mutable struct KNNClassifier <: MLJBase.Probabilistic + K::Int = 5::(_ > 0) + algorithm::Symbol = :kdtree::(_ in (:kdtree, :brutetree, :balltree)) + metric::Metric = Euclidean() + leafsize::Int = 10::(_ ≥ 0) + reorder::Bool = true + weights::Symbol = :uniform::(_ in (:uniform, :distance)) +end + +const KNN = Union{KNNRegressor, KNNClassifier} + +function MLJBase.fit(m::KNN, verbosity::Int, X, y, w=nothing) + Xmatrix = MLJBase.matrix(X, transpose=true) # NOTE: copies the data + if m.algorithm == :kdtree + tree = NN.KDTree(Xmatrix; leafsize=m.leafsize, reorder=m.reorder) + elseif m.algorithm == :balltree + tree = NN.BallTree(Xmatrix; leafsize=m.leafsize, reorder=m.reorder) + elseif m.algorithm == :brutetree + tree = NN.BruteTree(Xmatrix; leafsize=m.leafsize, reorder=m.reorder) + end + report = NamedTuple{}() + return (tree, y, w), nothing, report +end + +MLJBase.fitted_params(model::KNN, (tree, _)) = (tree=tree,) + +function MLJBase.predict(m::KNNClassifier, (tree, y, w), X) + Xmatrix = MLJBase.matrix(X, transpose=true) # NOTE: copies the data + # for each entry, get the K closest training point + their distance + idxs, dists = NN.knn(tree, Xmatrix, m.K) + + preds = Vector{MLJBase.UnivariateFinite}(undef, length(idxs)) + classes = MLJBase.classes(y[1]) + probas = zeros(length(classes)) + + w_ = ones(m.K) + + # go over each test record, and for each go over the k nearest entries + for i in eachindex(idxs) + idxs_ = idxs[i] + dists_ = dists[i] + labels = y[idxs_] + if w !== nothing + w_ = w[idxs_] + end + probas .*= 0.0 + if m.weights == :uniform + for (k, label) in enumerate(labels) + probas[classes .== label] .+= 1.0 / m.K * w_[k] + end + else + for (k, label) in enumerate(labels) + probas[classes .== label] .+= 1.0 / dists_[k] * w_[k] + end + end + # normalize so that sum to 1 + probas ./= sum(probas) + preds[i] = MLJBase.UnivariateFinite(classes, probas) + end + return preds +end + +function MLJBase.predict(m::KNNRegressor, (tree, y, w), X) + Xmatrix = MLJBase.matrix(X, transpose=true) # NOTE: copies the data + idxs, dists = NN.knn(tree, Xmatrix, m.K) + preds = zeros(length(idxs)) + + w_ = ones(m.K) + + for i in eachindex(idxs) + idxs_ = idxs[i] + dists_ = dists[i] + values = y[idxs_] + if w !== nothing + w_ = w[idxs_] + end + if m.weights == :uniform + preds[i] = sum(values .* w_) / sum(w_) + else + preds[i] = sum(values .* w_ .* (1.0 .- dists_ ./ sum(dists_))) / (sum(w_) - 1) + end + end + return preds +end + +# ==== + +metadata_pkg.((KNNRegressor, KNNClassifier), + name="NearestNeighbors", + uuid="b8a86587-4115-5ab1-83bc-aa920d37bbce", + url="https://github.com/KristofferC/NearestNeighbors.jl", + julia=true, + license="MIT", + is_wrapper=false + ) + +metadata_model(KNNRegressor, + input=MLJBase.Table(MLJBase.Continuous), + target=AbstractVector{MLJBase.Continuous}, + weights=true, + descr=KNNRegressorDescription + ) + +metadata_model(KNNClassifier, + input=MLJBase.Table(MLJBase.Continuous), + target=AbstractVector{<:MLJBase.Finite}, + weights=true, + descr=KNNClassifierDescription + ) + + diff --git a/test/ensembles.jl b/test/ensembles.jl index 4232167..4bb3d49 100644 --- a/test/ensembles.jl +++ b/test/ensembles.jl @@ -5,11 +5,10 @@ using Random using StableRNGs using MLJEnsembles using MLJBase -using MLJModels +using ..Models using CategoricalArrays import Distributions -KNNRegressor = @load KNNRegressor verbosity=0 ## HELPER FUNCTIONS @@ -20,10 +19,10 @@ KNNRegressor = @load KNNRegressor verbosity=0 pair_vcat(p, q) = (vcat(p[1], q[1]), vcat(p[2], q[2])) -## WRAPPED ENSEMBLES OF FITRESULTS +## WRAPPED ENSEMBLES OF FITRESU # target is :deterministic :multiclass false: -atom = MLJModels.DeterministicConstantClassifier() +atom = DeterministicConstantClassifier() L = ['a', 'b', 'j'] L2 = categorical(L) ensemble = [L2[1], L2[3], L2[3], L2[2]] @@ -35,7 +34,7 @@ X = MLJEnsembles.table(rand(3,5)) categorical(vcat(['j','j','j'],L))[1:3] # target is :deterministic :continuous false: -atom = MLJModels.DeterministicConstantRegressor() +atom = DeterministicConstantRegressor() ensemble = Float64[4, 7, 4, 4] atomic_weights = [0.1, 0.5, 0.2, 0.2] wens = MLJEnsembles.WrappedEnsemble(atom, ensemble) @@ -45,9 +44,9 @@ wens = MLJEnsembles.WrappedEnsemble(atom, ensemble) atom = ConstantClassifier() L = categorical(['a', 'b', 'j']) d1 = UnivariateFinite(L, [0.1, 0.2, 0.7]) -fitresult1 = (L, pdf([d1, ], L)) +fitresult1 = d1 d2 = UnivariateFinite(L, [0.2, 0.3, 0.5]) -fitresult2 = (L, pdf([d2, ], L)) +fitresult2 = d2 ensemble = [fitresult2, fitresult1, fitresult2, fitresult2] atomic_weights = [0.1, 0.5, 0.2, 0.2] wens = MLJEnsembles.WrappedEnsemble(atom, ensemble) @@ -71,11 +70,11 @@ d = predict(wens, atomic_weights, X)[1] ## ENSEMBLE MODEL # target is :deterministic :multiclass false: -atom=MLJModels.DeterministicConstantClassifier() +atom=DeterministicConstantClassifier() X = MLJEnsembles.table(ones(5,3)) y = categorical(collect("asdfa")) train, test = partition(1:length(y), 0.8); -ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom) +ensemble_model = EnsembleModel(model=atom) ensemble_model.n = 10 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test)) @@ -87,11 +86,11 @@ p = predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test)) @test MLJBase.target_scitype(ensemble_model) == MLJBase.target_scitype(atom) # target is :deterministic :continuous false: -atom = MLJModels.DeterministicConstantRegressor() +atom = DeterministicConstantRegressor() X = MLJEnsembles.table(ones(5,3)) y = Float64[1.0, 2.0, 1.0, 1.0, 1.0] train, test = partition(1:length(y), 0.8); -ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom) +ensemble_model = EnsembleModel(model=atom) ensemble_model.n = 10 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) @test reduce(* , [x ≈ 1.0 || x ≈ 1.25 for x in fitresult.ensemble]) @@ -106,21 +105,21 @@ ensemble_model.atomic_weights = atomic_weights predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test)) # target is :deterministic :continuous false: -atom = MLJModels.DeterministicConstantRegressor() +atom = DeterministicConstantRegressor() rng = StableRNG(1234) X = MLJEnsembles.table(randn(rng, 10, 3)) y = selectcols(X, 1) std(y) train, test = partition(1:length(y), 0.8); -ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom, rng=rng) -ensemble_model.out_of_bag_measure = [MLJEnsembles.rms,MLJEnsembles.rmsp] +ensemble_model = EnsembleModel(model=atom, rng=rng) +ensemble_model.out_of_bag_measure = [rms, rmsp] ensemble_model.n = 10 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) # TODO: the following test fails in distributed version (because of # multiple rng's ?) @test abs(report.oob_measurements[1] - std(y)) < 0.25 -ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom,rng=Random.MersenneTwister(1)) -ensemble_model.out_of_bag_measure = MLJEnsembles.rms +ensemble_model = EnsembleModel(model=atom,rng=Random.MersenneTwister(1)) +ensemble_model.out_of_bag_measure = rms ensemble_model.n = 2 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) @@ -129,7 +128,7 @@ atom = ConstantClassifier() X = MLJEnsembles.table(ones(5,3)) y = categorical(collect("asdfa")) train, test = partition(1:length(y), 0.8); -ensemble_model = MLJEnsembles.ProbabilisticEnsembleModel(atom=atom) +ensemble_model = EnsembleModel(model=atom) ensemble_model.n = 10 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) fitresult.ensemble @@ -159,7 +158,7 @@ atom = ConstantRegressor() X = MLJEnsembles.table(ones(5,3)) y = Float64[1.0, 2.0, 2.0, 1.0, 1.0] train, test = partition(1:length(y), 0.8); -ensemble_model = MLJEnsembles.ProbabilisticEnsembleModel(atom=atom) +ensemble_model = EnsembleModel(model=atom) ensemble_model.n = 10 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) d1 = Distributions.fit(Distributions.Normal, [1,1,2,2]) @@ -183,18 +182,18 @@ predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test)) # @test MLJBase.output_is(ensemble_model) == MLJBase.output_is(atom) # test generic constructor: -@test EnsembleModel(atom=ConstantRegressor()) isa Probabilistic -@test EnsembleModel(atom=MLJModels.DeterministicConstantRegressor()) isa Deterministic +@test EnsembleModel(model=ConstantRegressor()) isa Probabilistic +@test EnsembleModel(model=DeterministicConstantRegressor()) isa Deterministic @testset "further test of sample weights" begin rng = StableRNG(123) N = 20 X = (x = rand(rng, 3N), ); y = categorical(rand(rng, "abbbc", 3N)); - atom = (@load KNNClassifier verbosity=0)() - ensemble_model = MLJEnsembles.ProbabilisticEnsembleModel(atom=atom, - bagging_fraction=1, - n = 5, rng=rng) + atom = KNNClassifier() + ensemble_model = EnsembleModel(model=atom, + bagging_fraction=1, + n = 5, rng=rng) fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y) @test predict_mode(ensemble_model, fitresult, (x = [0, ],))[1] == 'b' w = map(y) do η @@ -219,34 +218,35 @@ predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test)) end - ## MACHINE TEST (INCLUDES TEST OF UPDATE) +## MACHINE TEST (INCLUDES TEST OF UPDATE) N =100 X = (x1=rand(N), x2=rand(N), x3=rand(N)) y = 2X.x1 - X.x2 + 0.05*rand(N) atom = KNNRegressor(K=7) -ensemble_model = EnsembleModel(atom=atom) +ensemble_model = EnsembleModel(model=atom) ensemble = machine(ensemble_model, X, y) train, test = partition(eachindex(y), 0.7) fit!(ensemble, rows=train, verbosity=0) @test length(ensemble.fitresult.ensemble) == ensemble_model.n ensemble_model.n = 15 @test_logs((:info, r"Training"), - fit!(ensemble)) + fit!(ensemble, verbosity=1)) @test length(ensemble.fitresult.ensemble) == 15 ensemble_model.n = 20 @test_logs((:info, r"Updating"), - # (:info, r"Building"), + (:info, r"Building"), fit!(ensemble)) @test length(ensemble.fitresult.ensemble) == 20 ensemble_model.n = 5 @test_logs((:info, r"Updating"), - # (:info, r"Truncating"), + (:info, r"Truncating"), fit!(ensemble)) @test length(ensemble.fitresult.ensemble) == 5 @test !isnan(predict(ensemble, MLJEnsembles.selectrows(X, test))[1]) end + true diff --git a/test/runtests.jl b/test/runtests.jl index 06685a3..a068d43 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1 +1,3 @@ +include("_models.jl") include("ensembles.jl") + From cd46dd8f17aaf837c569ae107afe7ddbf4691475 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 13 Dec 2021 11:48:54 +1300 Subject: [PATCH 2/6] update doc-string --- src/ensembles.jl | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/ensembles.jl b/src/ensembles.jl index d955ce0..4f43e0e 100644 --- a/src/ensembles.jl +++ b/src/ensembles.jl @@ -260,26 +260,25 @@ const ERR_TOO_MANY_ARGUMENTS = ArgumentError( acceleration=CPU1(), out_of_bag_measure=[]) -Create a model for training an ensemble of `n` learners, with optional -bagging, each with associated model `atom`. Ensembling is useful if -`fit!(machine(atom, data...))` does not create identical models on -repeated calls (ie, is a stochastic model, such as a decision tree -with randomized node selection criteria), or if `bagging_fraction` is -set to a value less than 1.0, or both. The constructor fails if no -`atom` is specified. - -Only atomic models supporting targets with scitype -`AbstractVector{<:Finite}` (univariate classifiers) or -`AbstractVector{<:Continuous}` (univariate regressors) are supported. +Create a model for training an ensemble of `n` clones of `model`, with +optional bagging. Ensembling is useful if `fit!(machine(atom, +data...))` does not create identical models on repeated calls (ie, is +a stochastic model, such as a decision tree with randomized node +selection criteria), or if `bagging_fraction` is set to a value less +than 1.0, or both. + +Here the atomic `model` must support targets with scitype +`AbstractVector{<:Finite}` (single-target classifiers) or +`AbstractVector{<:Continuous}` (single-target regressors). If `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected. -The atomic predictions are weighted according to the vector +The atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the -case that `atom` is a `Deterministic` classifier. Uniform -atomic weights are used if `weight` has zero length. +case that `model` is a `Deterministic` classifier, in which case +`atomic_weights` are ignored. The ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic @@ -292,20 +291,19 @@ particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`. -The `acceleration` keyword argument is used to specify the compute resource (a -subtype of `ComputationalResources.AbstractResource`) that will be used to -accelerate/parallelize ensemble fitting. +Specify `acceleration=CPUProcesses()` for distributed computing, or +`CPUThreads()` for multithreading. If a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are -written to the trainig report (call `report` on the trained +written to the training report (call `report` on the trained machine wrapping the ensemble model). -*Important:* If sample weights `w` (as opposed to atomic weights) are -specified when constructing a machine for the ensemble model, as in -`mach = machine(ensemble_model, X, y, w)`, then `w` is used by any -measures specified in `out_of_bag_measure` that support sample -weights. +*Important:* If sample weights `w` (not to be confused with atomic +weights) are specified when constructing a machine for the ensemble +model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is +used by any measures specified in `out_of_bag_measure` that support +sample weights. """ function EnsembleModel(args...; @@ -411,7 +409,7 @@ function MMI.fit(model::EitherEnsembleModel{Atom}, acceleration = model.acceleration if acceleration isa CPUProcesses && nworkers() == 1 - acceleration = default_resource() + acceleration = CPU1() end if model.out_of_bag_measure isa Vector From d9cd32e14d4f1a8a44f9bf1f83e724b1a1cc4ade Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 13 Dec 2021 11:54:05 +1300 Subject: [PATCH 3/6] add compat MLJBase="0.19" --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 715723d..c04b5de 100644 --- a/Project.toml +++ b/Project.toml @@ -21,6 +21,7 @@ CategoricalArrays = "0.8, 0.9, 0.10" CategoricalDistributions = "0.1" ComputationalResources = "0.3" Distributions = "0.21, 0.22, 0.23, 0.24, 0.25" +MLJBase = "0.19" MLJModelInterface = "0.4.1, 1.1" ProgressMeter = "1.1" ScientificTypesBase = "2" From 50d7810ae02d87fd289dbd1927b8df92e5586d97 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 13 Dec 2021 11:59:00 +1300 Subject: [PATCH 4/6] bump requirement for CategoricalDistributions to 0.1.2 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index c04b5de..87f6569 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] CategoricalArrays = "0.8, 0.9, 0.10" -CategoricalDistributions = "0.1" +CategoricalDistributions = "0.1.2" ComputationalResources = "0.3" Distributions = "0.21, 0.22, 0.23, 0.24, 0.25" MLJBase = "0.19" From 0037e6be84b36162aea40088a9f1d802acd4f429 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 24 Dec 2021 09:04:47 +1300 Subject: [PATCH 5/6] bump version 0.2.0 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 87f6569..8ca8766 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJEnsembles" uuid = "50ed68f4-41fd-4504-931a-ed422449fee0" authors = ["Anthony D. Blaom "] -version = "0.1.2" +version = "0.2.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From da40e1136dcdc19fae61cd7ce533e9dc05d16f2f Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 Dec 2021 09:28:06 +1300 Subject: [PATCH 6/6] extend compat ScientificTypesBase = "2,3" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8ca8766..482739d 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,7 @@ Distributions = "0.21, 0.22, 0.23, 0.24, 0.25" MLJBase = "0.19" MLJModelInterface = "0.4.1, 1.1" ProgressMeter = "1.1" -ScientificTypesBase = "2" +ScientificTypesBase = "2,3" StatsBase = "0.32, 0.33" julia = "1.1"