From 640c285ad48a34edfb56591c13b817a33e3dd5db Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 13 Dec 2021 11:34:39 +1300
Subject: [PATCH 1/6] address fact that UnivariateFinite has moved out of
 MLJBase

---
 Project.toml                     |  13 +-
 src/MLJEnsembles.jl              |  16 ++
 src/ensembles.jl                 | 281 +++++++++++++------------------
 test/_models.jl                  |  16 ++
 test/_models/Constant.jl         | 206 ++++++++++++++++++++++
 test/_models/NearestNeighbors.jl | 170 +++++++++++++++++++
 test/ensembles.jl                |  58 +++----
 test/runtests.jl                 |   2 +
 8 files changed, 565 insertions(+), 197 deletions(-)
 create mode 100644 test/_models.jl
 create mode 100644 test/_models/Constant.jl
 create mode 100644 test/_models/NearestNeighbors.jl

diff --git a/Project.toml b/Project.toml
index 2b2487a..715723d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "0.1.2"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+CategoricalDistributions = "af321ab8-2d2e-40a6-b165-3d674595d28e"
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -12,25 +13,25 @@ MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
+ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
 CategoricalArrays = "0.8, 0.9, 0.10"
+CategoricalDistributions = "0.1"
 ComputationalResources = "0.3"
 Distributions = "0.21, 0.22, 0.23, 0.24, 0.25"
-MLJBase = "0.18"
 MLJModelInterface = "0.4.1, 1.1"
 ProgressMeter = "1.1"
-ScientificTypes = "2"
+ScientificTypesBase = "2"
 StatsBase = "0.32, 0.33"
 julia = "1.1"
 
 [extras]
-MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
-NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
+Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["MLJModels", "NearestNeighborModels", "StableRNGs", "Test"]
+test = ["Distances", "NearestNeighbors", "StableRNGs", "Test"]
diff --git a/src/MLJEnsembles.jl b/src/MLJEnsembles.jl
index fde68af..1d61f73 100644
--- a/src/MLJEnsembles.jl
+++ b/src/MLJEnsembles.jl
@@ -1,5 +1,21 @@
 module MLJEnsembles
 
+using MLJModelInterface
+import MLJModelInterface: predict, fit
+import MLJBase # still needed for aggregating measures in oob-estimates of error
+using Random
+using CategoricalArrays
+using CategoricalDistributions
+using ComputationalResources
+using Distributed
+import Distributions
+using ProgressMeter
+import StatsBase
+
+export EnsembleModel
+
+const MMI = MLJModelInterface
+
 include("ensembles.jl")
 
 end # module
diff --git a/src/ensembles.jl b/src/ensembles.jl
index aae9de0..d955ce0 100644
--- a/src/ensembles.jl
+++ b/src/ensembles.jl
@@ -1,18 +1,4 @@
-using MLJModelInterface
-import MLJModelInterface: predict, fit
-using Random
-using CategoricalArrays
-using ComputationalResources
-using MLJBase
-using Distributed
-import Distributions
-using ScientificTypes: Continuous
-using ProgressMeter
-import StatsBase
-
-export EnsembleModel
-
-## ENSEMBLES OF FITRESULTS
+# # ENSEMBLES OF FITRESULTS
 
 # Atom is atomic model type, eg, DecisionTree
 # R will be the tightest type of the atom fit-results.
@@ -95,14 +81,10 @@ function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
     # TODO: make this more memory efficient but note that the type of
     # Xnew is unknown (ie, model dependent):
     # a matrix of probability distributions:
-    preds_gen   = (predict(atom, fitresult, Xnew) for fitresult in ensemble)
-    predictions = hcat(preds_gen...)
-    n_rows      = size(predictions, 1)
+    predictions = [predict(atom, fitresult, Xnew) for fitresult in ensemble]
 
     # the weighted averages over the ensemble of the discrete pdf's:
-    predictions = [average([predictions[i, k] for k in 1:n_atoms], weights=atomic_weights) for i in 1:n_rows]
-
-    return predictions
+    return atomic_weights .* predictions |> sum
 end
 
 function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
@@ -119,22 +101,27 @@ function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
     preds_gen   = (predict(atom, fitresult, Xnew) for fitresult in ensemble)
     predictions = hcat(preds_gen...)
 
+    # TODO: return normal distributions in special case of normal predictions
     # n_rows = size(predictions, 1)
-    # # the weighted average over the ensemble of the pdf means and pdf variances:
-    # μs  = [sum([atomic_weights[k]*mean(predictions[i,k]) for k in 1:n_atoms]) for i in 1:n_rows]
-    # σ2s = [sum([atomic_weights[k]*var(predictions[i,k]) for k in 1:n_atoms]) for i in 1:n_rows]
+    # # the weighted average over the ensemble of the pdf
+    # # means and pdf variances:
+    # μs  = [sum([atomic_weights[k]*mean(predictions[i,k])
+    #    for k in 1:n_atoms]) for i in 1:n_rows]
+    # σ2s = [sum([atomic_weights[k]*var(predictions[i,k])
+    #    for k in 1:n_atoms]) for i in 1:n_rows]
 
     # # a vector of normal probability distributions:
     # prediction = [Distributions.Normal(μs[i], sqrt(σ2s[i])) for i in 1:n_rows]
 
-    prediction = [Distributions.MixtureModel(predictions[i,:], atomic_weights) for i in 1:size(predictions, 1)]
+    prediction = [Distributions.MixtureModel(predictions[i,:], atomic_weights)
+                  for i in 1:size(predictions, 1)]
 
     return prediction
 
 end
 
 
-## CORE ENSEMBLE-BUILDING FUNCTIONS
+# # CORE ENSEMBLE-BUILDING FUNCTIONS
 
 # for when out-of-bag performance estimates are requested:
 function get_ensemble_and_indices(atom::Supervised, verbosity, n, n_patterns,
@@ -192,10 +179,10 @@ _reducer(p::Tuple, q::Tuple) = (vcat(p[1], q[1]), vcat(p[2], q[2]))
 
 
 
-## ENSEMBLE MODEL FOR DETERMINISTIC MODELS
+# # ENSEMBLE MODEL TYPES
 
 mutable struct DeterministicEnsembleModel{Atom<:Deterministic} <: Deterministic
-    atom::Atom
+    model::Atom
     atomic_weights::Vector{Float64}
     bagging_fraction::Float64
     rng::Union{Int,AbstractRNG}
@@ -204,63 +191,8 @@ mutable struct DeterministicEnsembleModel{Atom<:Deterministic} <: Deterministic
     out_of_bag_measure # TODO: type this
 end
 
-function clean!(model::DeterministicEnsembleModel)
-
-    target_scitype(model.atom) <: Union{AbstractVector{<:Finite}, AbstractVector{<:Continuous}} ||
-        error("`atom` has unsupported target_scitype "*
-              "`$(target_scitype(model.atom))`. ")
-
-    message = ""
-
-    if model.bagging_fraction > 1 || model.bagging_fraction <= 0
-        message = message*"`bagging_fraction` should be "*
-        "in the range (0,1]. Reset to 1. "
-        model.bagging_fraction = 1.0
-    end
-
-    if target_scitype(model.atom) <: AbstractVector{<:Finite} && !isempty(model.atomic_weights)
-        message = message*"atomic_weights will be ignored to form predictions. "
-    elseif !isempty(model.atomic_weights)
-        total = sum(model.atomic_weights)
-        if !(total ≈ 1.0)
-            message = message*"atomic_weights should sum to one and are being automatically normalized. "
-            model.atomic_weights = model.atomic_weights/total
-        end
-    end
-
-    return message
-
-end
-
-# constructor to infer type automatically:
-DeterministicEnsembleModel(atom::Atom, atomic_weights,
-                           bagging_fraction, rng, n, acceleration, out_of_bag_measure) where Atom<:Deterministic =
-                               DeterministicEnsembleModel{Atom}(atom, atomic_weights,
-                                                                   bagging_fraction, rng, n, acceleration, out_of_bag_measure)
-
-# lazy keyword constructors:
-function DeterministicEnsembleModel(;atom=DeterministicConstantClassifier(),
-                                    atomic_weights=Float64[],
-                                    bagging_fraction=0.8,
-                                    rng=Random.GLOBAL_RNG,
-                                    n::Int=100,
-                                    acceleration=default_resource(),
-                                    out_of_bag_measure=[])
-
-    model = DeterministicEnsembleModel(atom, atomic_weights, bagging_fraction, rng,
-                                       n, acceleration, out_of_bag_measure)
-
-    message = clean!(model)
-    isempty(message) || @warn message
-
-    return model
-end
-
-
-## ENSEMBLE MODEL FOR PROBABILISTIC MODELS
-
 mutable struct ProbabilisticEnsembleModel{Atom<:Probabilistic} <: Probabilistic
-    atom::Atom
+    model::Atom
     atomic_weights::Vector{Float64}
     bagging_fraction::Float64
     rng::Union{Int, AbstractRNG}
@@ -269,7 +201,18 @@ mutable struct ProbabilisticEnsembleModel{Atom<:Probabilistic} <: Probabilistic
     out_of_bag_measure
 end
 
-function clean!(model::ProbabilisticEnsembleModel)
+const EitherEnsembleModel{Atom} =
+    Union{DeterministicEnsembleModel{Atom}, ProbabilisticEnsembleModel{Atom}}
+
+function clean!(model::EitherEnsembleModel)
+
+    if model isa DeterministicEnsembleModel
+
+        ok_target = target_scitype(model.model) <:
+            Union{AbstractVector{<:Finite},AbstractVector{<:Continuous}}
+        ok_target || error("atomic model has unsupported target_scitype "*
+                           "`$(target_scitype(model.model))`. ")
+    end
 
     message = ""
 
@@ -279,10 +222,17 @@ function clean!(model::ProbabilisticEnsembleModel)
         model.bagging_fraction = 1.0
     end
 
-    if !isempty(model.atomic_weights)
+    isempty(model.atomic_weights) && return message
+
+    if model isa Deterministic &&
+        target_scitype(model.model) <: AbstractVector{<:Finite}
+        message = message*"`atomic_weights` will be ignored to "*
+            "form predictions, as unsupported for `Finite` targets. "
+    else
         total = sum(model.atomic_weights)
         if !(total ≈ 1.0)
-            message = message*"atomic_weights should sum to one and are being automatically normalized. "
+            message = message*"atomic_weights should sum to one and are being "*
+                "replaced by normalized weights. "
             model.atomic_weights = model.atomic_weights/total
         end
     end
@@ -291,37 +241,23 @@ function clean!(model::ProbabilisticEnsembleModel)
 
 end
 
-# constructor to infer type automatically:
-ProbabilisticEnsembleModel(atom::Atom, atomic_weights, bagging_fraction, rng, n, acceleration, out_of_bag_measure) where Atom<:Probabilistic =
-                               ProbabilisticEnsembleModel{Atom}(atom, atomic_weights, bagging_fraction, rng, n, acceleration, out_of_bag_measure)
-
-# lazy keyword constructor:
-function ProbabilisticEnsembleModel(;atom=ConstantProbabilisticClassifier(),
-                                    atomic_weights=Float64[],
-                                    bagging_fraction=0.8,
-                                    rng=Random.GLOBAL_RNG,
-                                    n::Int=100,
-                                    acceleration=default_resource(),
-                                    out_of_bag_measure=[])
-
-    model = ProbabilisticEnsembleModel(atom, atomic_weights, bagging_fraction, rng, n, acceleration, out_of_bag_measure)
-
-    message = clean!(model)
-    isempty(message) || @warn message
 
-    return model
-end
+# # USER-FACING CONSTRUCTOR
 
+const ERR_MODEL_UNSPECIFIED = ArgumentError(
+"Expecting atomic model as argument. None specified. Use "*
+    "`EnsembleModel(model=...)`. ")
+const ERR_TOO_MANY_ARGUMENTS = ArgumentError(
+    "At most one non-keyword argument, a model, allowed. ")
 
-## COMMON CONSTRUCTOR
 
 """
-    EnsembleModel(atom=nothing,
+    EnsembleModel(model,
                   atomic_weights=Float64[],
                   bagging_fraction=0.8,
                   n=100,
                   rng=GLOBAL_RNG,
-                  acceleration=default_resource(),
+                  acceleration=CPU1(),
                   out_of_bag_measure=[])
 
 Create a model for training an ensemble of `n` learners, with optional
@@ -372,25 +308,49 @@ measures specified in `out_of_bag_measure` that support sample
 weights.
 
 """
-function EnsembleModel(; args...)
-    d = Dict(args)
-    :atom in keys(d) ||
-        error("No atomic model specified. Use EnsembleModel(atom=...)")
-    if d[:atom] isa Deterministic
-        return DeterministicEnsembleModel(; d...)
-    elseif d[:atom] isa Probabilistic
-        return ProbabilisticEnsembleModel(; d...)
+function EnsembleModel(args...;
+                       model=nothing,
+                       atomic_weights=Float64[],
+                       bagging_fraction=0.8,
+                       rng=Random.GLOBAL_RNG,
+                       n::Int=100,
+                       acceleration=CPU1(),
+                       out_of_bag_measure=[])
+
+    length(args) < 2 || throw(ERR_TOO_MANY_ARGUMENTS)
+    if length(args) === 1
+        atom = first(args)
+        model === nothing ||
+            @warn "Using `model=$atom`. Ignoring specification "*
+            "`model=$model`. "
+    else
+        model === nothing && throw(ERR_MODEL_UNSPECIFIED)
+        atom = model
     end
-    error("$(d[:atom]) does not appear to be a Supervised model.")
-end
 
+    arguments = (atom,
+                 atomic_weights,
+                 float(bagging_fraction),
+                 rng,
+                 n,
+                 acceleration,
+                 out_of_bag_measure)
+
+    if atom isa Deterministic
+        emodel =  DeterministicEnsembleModel(arguments...)
+    elseif atom isa Probabilistic
+        emodel = ProbabilisticEnsembleModel(arguments...)
+    else
+        error("$atom does not appear to be a Supervised model.")
+    end
 
-## THE COMMON FIT AND PREDICT METHODS
+    message = clean!(emodel)
+    isempty(message) || @warn message
+    return emodel
+end
 
-const EitherEnsembleModel{Atom} =
-    Union{DeterministicEnsembleModel{Atom}, ProbabilisticEnsembleModel{Atom}}
 
-MLJBase.is_wrapper(::Type{<:EitherEnsembleModel}) = true
+# # THE COMMON FIT AND PREDICT METHODS
 
 function _fit(res::CPU1, func, verbosity, stuff)
     atom, n, n_patterns, n_train, rng, progress_meter, args = stuff
@@ -438,7 +398,7 @@ end
     end
 end
 
-function fit(model::EitherEnsembleModel{Atom},
+function MMI.fit(model::EitherEnsembleModel{Atom},
              verbosity::Int, args...) where Atom<:Supervised
 
     X = args[1]
@@ -466,7 +426,7 @@ function fit(model::EitherEnsembleModel{Atom},
         rng = model.rng
     end
 
-    atom = model.atom
+    atom = model.model
     n = model.n
     n_patterns = nrows(y)
     n_train = round(Int, floor(model.bagging_fraction*n_patterns))
@@ -487,7 +447,7 @@ function fit(model::EitherEnsembleModel{Atom},
 
     end
 
-    fitresult = WrappedEnsemble(model.atom, ensemble)
+    fitresult = WrappedEnsemble(model.model, ensemble)
 
     if !isempty(out_of_bag_measure)
 
@@ -511,10 +471,15 @@ function fit(model::EitherEnsembleModel{Atom},
             end
             for k in eachindex(out_of_bag_measure)
                 m = out_of_bag_measure[k]
-                if reports_each_observation(m)
-                    s =  aggregate(value(m, yhat, Xtest, ytest, wtest), m)
+                if MMI.reports_each_observation(m)
+                    s =  MLJBase.aggregate(MLJBase.value(m,
+                                                         yhat,
+                                                         Xtest,
+                                                         ytest,
+                                                         wtest),
+                                           m)
                 else
-                    s = value(m, yhat, Xtest, ytest, wtest)
+                    s = MLJBase.value(m, yhat, Xtest, ytest, wtest)
                 end
                 metrics[i,k] = s
             end
@@ -522,7 +487,7 @@ function fit(model::EitherEnsembleModel{Atom},
 
         # aggregate metrics across the ensembles:
         aggregated_metrics = map(eachindex(out_of_bag_measure)) do k
-            aggregate(metrics[:,k], out_of_bag_measure[k])
+            MLJBase.aggregate(metrics[:,k], out_of_bag_measure[k])
         end
 
         names = Symbol.(string.(out_of_bag_measure))
@@ -540,12 +505,12 @@ end
 
 # if n is only parameter that changes, we just append to the existing
 # ensemble, or truncate it:
-function update(model::EitherEnsembleModel,
-                verbosity::Int, fitresult, old_model, args...)
+function MMI.update(model::EitherEnsembleModel,
+                                  verbosity::Int, fitresult, old_model, args...)
 
     n = model.n
 
-    if MLJBase.is_same_except(model.atom, old_model.atom,
+    if MLJBase.is_same_except(model.model, old_model.model,
                               :n, :atomic_weights, :acceleration)
         if n > old_model.n
             verbosity < 1 ||
@@ -568,7 +533,7 @@ function update(model::EitherEnsembleModel,
 
 end
 
-function predict(model::EitherEnsembleModel, fitresult, Xnew)
+function MMI.predict(model::EitherEnsembleModel, fitresult, Xnew)
 
     n = model.n
     if isempty(model.atomic_weights)
@@ -581,35 +546,27 @@ function predict(model::EitherEnsembleModel, fitresult, Xnew)
     predict(fitresult, atomic_weights, Xnew)
 end
 
-## METADATA
 
-# Note: input and target traits are inherited from atom
+# # METADATA
 
-MLJBase.supports_weights(::Type{<:EitherEnsembleModel{Atom}}) where Atom =
-    MLJBase.supports_weights(Atom)
+# Note: input and target traits are inherited from atom
 
-MLJBase.load_path(::Type{<:DeterministicEnsembleModel}) =
-    "MLJ.DeterministicEnsembleModel"
-MLJBase.package_name(::Type{<:DeterministicEnsembleModel}) = "MLJ"
-MLJBase.package_uuid(::Type{<:DeterministicEnsembleModel}) = ""
-MLJBase.package_url(::Type{<:DeterministicEnsembleModel}) =
-    "https://github.com/alan-turing-institute/MLJ.jl"
-MLJBase.is_pure_julia(::Type{<:DeterministicEnsembleModel{Atom}}) where Atom =
-    MLJBase.is_pure_julia(Atom)
-MLJBase.input_scitype(::Type{<:DeterministicEnsembleModel{Atom}}) where Atom =
-    MLJBase.input_scitype(Atom)
-MLJBase.target_scitype(::Type{<:DeterministicEnsembleModel{Atom}}) where Atom =
-    MLJBase.target_scitype(Atom)
-
-MLJBase.load_path(::Type{<:ProbabilisticEnsembleModel}) =
+MMI.load_path(::Type{<:ProbabilisticEnsembleModel}) =
     "MLJ.ProbabilisticEnsembleModel"
-MLJBase.package_name(::Type{<:ProbabilisticEnsembleModel}) = "MLJ"
-MLJBase.package_uuid(::Type{<:ProbabilisticEnsembleModel}) = ""
-MLJBase.package_url(::Type{<:ProbabilisticEnsembleModel}) =
-    "https://github.com/alan-turing-institute/MLJ.jl"
-MLJBase.is_pure_julia(::Type{<:ProbabilisticEnsembleModel{Atom}}) where Atom =
-    MLJBase.is_pure_julia(Atom)
-MLJBase.input_scitype(::Type{<:ProbabilisticEnsembleModel{Atom}}) where Atom =
-    MLJBase.input_scitype(Atom)
-MLJBase.target_scitype(::Type{<:ProbabilisticEnsembleModel{Atom}}) where Atom =
-    MLJBase.target_scitype(Atom)
+MMI.load_path(::Type{<:DeterministicEnsembleModel}) =
+    "MLJ.DeterministicEnsembleModel"
+
+MMI.is_wrapper(::Type{<:EitherEnsembleModel}) = true
+MMI.supports_weights(::Type{<:EitherEnsembleModel{Atom}}) where Atom =
+    MMI.supports_weights(Atom)
+MMI.package_name(::Type{<:EitherEnsembleModel}) = "MLJEnsembles"
+MMI.package_uuid(::Type{<:EitherEnsembleModel}) =
+    "50ed68f4-41fd-4504-931a-ed422449fee0"
+MMI.package_url(::Type{<:EitherEnsembleModel}) =
+    "https://github.com/JuliaAI/MLJEnsembles.jl"
+MMI.is_pure_julia(::Type{<:EitherEnsembleModel{Atom}}) where Atom =
+    MMI.is_pure_julia(Atom)
+MMI.input_scitype(::Type{<:EitherEnsembleModel{Atom}}) where Atom =
+    MMI.input_scitype(Atom)
+MMI.target_scitype(::Type{<:EitherEnsembleModel{Atom}}) where Atom =
+    MMI.target_scitype(Atom)
diff --git a/test/_models.jl b/test/_models.jl
new file mode 100644
index 0000000..5943c26
--- /dev/null
+++ b/test/_models.jl
@@ -0,0 +1,16 @@
+# If adding models from MLJModels for testing purposes, then do the
+# following in the interface file (eg, DecisionTree.jl):
+
+# - change `import ..DecisionTree` to `import DecisionTree`
+# - remove wrapping as module
+
+module Models
+
+using MLJBase
+import MLJModelInterface: @mlj_model, metadata_model, metadata_pkg
+import MLJModelInterface
+
+include("_models/Constant.jl")
+include("_models/NearestNeighbors.jl")
+
+end
diff --git a/test/_models/Constant.jl b/test/_models/Constant.jl
new file mode 100644
index 0000000..d48c251
--- /dev/null
+++ b/test/_models/Constant.jl
@@ -0,0 +1,206 @@
+## THE CONSTANT REGRESSOR
+
+const MMI = MLJModelInterface
+export ConstantClassifier, ConstantRegressor,
+       DeterministicConstantRegressor,
+       DeterministicConstantClassifier,
+       ProbabilisticConstantClassifer
+
+import Distributions
+
+"""
+ConstantRegressor(; distribution_type=Distributions.Normal)
+
+A regressor that, for any new input pattern, predicts the univariate
+probability distribution best fitting the training target data. Use
+`predict_mean` to predict the mean value instead.
+"""
+struct ConstantRegressor{D} <: MMI.Probabilistic end
+
+function ConstantRegressor(; distribution_type=Distributions.Normal)
+    model   = ConstantRegressor{distribution_type}()
+    message = clean!(model)
+    isempty(message) || @warn message
+    return model
+end
+
+function MMI.clean!(model::ConstantRegressor{D}) where D
+    message = ""
+    D <: Distributions.Sampleable ||
+        error("$model.distribution_type is not a valid distribution_type.")
+    return message
+end
+
+MMI.reformat(::ConstantRegressor, X) = (MMI.matrix(X),)
+MMI.reformat(::ConstantRegressor, X, y) = (MMI.matrix(X), y)
+MMI.selectrows(::ConstantRegressor, I, A) = (view(A, I, :),)
+MMI.selectrows(::ConstantRegressor, I, A, y) = (view(A, I, :), y[I])
+
+function MMI.fit(::ConstantRegressor{D}, verbosity::Int, A, y) where D
+    fitresult = Distributions.fit(D, y)
+    cache     = nothing
+    report    = NamedTuple()
+    return fitresult, cache, report
+end
+
+MMI.fitted_params(::ConstantRegressor, fitresult) =
+    (target_distribution=fitresult,)
+
+MMI.predict(::ConstantRegressor, fitresult, Xnew) =
+    fill(fitresult, nrows(Xnew))
+
+##
+## THE CONSTANT DETERMINISTIC REGRESSOR (FOR TESTING)
+##
+
+struct DeterministicConstantRegressor <: MMI.Deterministic end
+
+function MMI.fit(::DeterministicConstantRegressor, verbosity::Int, X, y)
+    fitresult = mean(y)
+    cache     = nothing
+    report    = NamedTuple()
+    return fitresult, cache, report
+end
+
+MMI.reformat(::DeterministicConstantRegressor, X) = (MMI.matrix(X),)
+MMI.reformat(::DeterministicConstantRegressor, X, y) = (MMI.matrix(X), y)
+MMI.selectrows(::DeterministicConstantRegressor, I, A) = (view(A, I, :),)
+MMI.selectrows(::DeterministicConstantRegressor, I, A, y) =
+    (view(A, I, :), y[I])
+
+MMI.predict(::DeterministicConstantRegressor, fitresult, Xnew) =
+    fill(fitresult, nrows(Xnew))
+
+##
+## THE CONSTANT CLASSIFIER
+##
+
+"""
+ConstantClassifier()
+
+A classifier that, for any new input pattern, `predict`s the
+`UnivariateFinite` probability distribution `d` best fitting the
+training target data. So, `pdf(d, level)` is the proportion of levels
+in the training data coinciding with `level`. Use `predict_mode` to
+obtain the training target mode instead.
+"""
+mutable struct ConstantClassifier <: MMI.Probabilistic
+    testing::Bool
+    bogus::Int
+end
+
+ConstantClassifier(; testing=false, bogus=0) =
+    ConstantClassifier(testing, bogus)
+
+function MMI.reformat(model::ConstantClassifier, X)
+    model.testing && @info "reformatting X"
+    return (MMI.matrix(X),)
+end
+
+function MMI.reformat(model::ConstantClassifier, X, y)
+    model.testing && @info "reformatting X, y"
+    return (MMI.matrix(X), y)
+end
+
+function MMI.reformat(model::ConstantClassifier, X, y, w)
+    model.testing && @info "reformatting X, y, w"
+    return (MMI.matrix(X), y, w)
+end
+
+function MMI.selectrows(model::ConstantClassifier, I, A)
+    model.testing && @info "resampling X"
+    return (view(A, I, :),)
+end
+
+function MMI.selectrows(model::ConstantClassifier, I, A, y)
+    model.testing && @info "resampling X, y"
+    return (view(A, I, :), y[I])
+end
+
+function MMI.selectrows(model::ConstantClassifier, I, A, y, ::Nothing)
+    model.testing && @info "resampling X, y, nothing"
+    return (view(A, I, :), y[I], nothing)
+end
+
+function MMI.selectrows(model::ConstantClassifier, I, A, y, w)
+    model.testing && @info "resampling X, y, nothing"
+    return (view(A, I, :), y[I], w[I])
+end
+
+# here `args` is `y` or `y, w`:
+function MMI.fit(::ConstantClassifier, verbosity::Int, A, y, w=nothing)
+    fitresult = Distributions.fit(MLJBase.UnivariateFinite, y, w)
+    cache     = nothing
+    report    = NamedTuple
+    return fitresult, cache, report
+end
+
+MMI.fitted_params(::ConstantClassifier, fitresult) =
+    (target_distribution=fitresult,)
+
+MMI.predict(::ConstantClassifier, fitresult, Xnew) =
+    fill(fitresult, nrows(Xnew))
+
+##
+## DETERMINISTIC CONSTANT CLASSIFIER (FOR TESTING)
+##
+
+struct DeterministicConstantClassifier <: MMI.Deterministic end
+
+function MMI.fit(::DeterministicConstantClassifier, verbosity::Int, X, y)
+    # dump missing target values and make into a regular array:
+    fitresult = mode(skipmissing(y) |> collect) # a CategoricalValue
+    cache     = nothing
+    report    = NamedTuple()
+    return fitresult, cache, report
+end
+
+MMI.reformat(::DeterministicConstantClassifier, X) = (MMI.matrix(X),)
+MMI.reformat(::DeterministicConstantClassifier, X, y) = (MMI.matrix(X), y)
+MMI.selectrows(::DeterministicConstantClassifier, I, A) = (view(A, I, :),)
+MMI.selectrows(::DeterministicConstantClassifier, I, A, y) =
+    (view(A, I, :), y[I])
+
+MMI.predict(::DeterministicConstantClassifier, fitresult, Xnew) =
+    fill(fitresult, nrows(Xnew))
+
+#
+# METADATA
+#
+
+metadata_pkg.((ConstantRegressor, ConstantClassifier,
+               DeterministicConstantRegressor, DeterministicConstantClassifier),
+              name="MLJModels",
+              uuid="d491faf4-2d78-11e9-2867-c94bc002c0b7",
+              url="https://github.com/alan-turing-institute/MLJModels.jl",
+              julia=true,
+              license="MIT",
+              is_wrapper=false)
+
+metadata_model(ConstantRegressor,
+               input=MMI.Table,
+               target=AbstractVector{MMI.Continuous},
+               weights=false,
+               descr="Constant regressor (Probabilistic).",
+               path="MLJModels.ConstantRegressor")
+
+metadata_model(DeterministicConstantRegressor,
+               input=MMI.Table,
+               target=AbstractVector{MMI.Continuous},
+               weights=false,
+               descr="Constant regressor (Deterministic).",
+               path="MLJModels.DeterministicConstantRegressor")
+
+metadata_model(ConstantClassifier,
+               input=MMI.Table,
+               target=AbstractVector{<:MMI.Finite},
+               weights=true,
+               descr="Constant classifier (Probabilistic).",
+               path="MLJModels.ConstantClassifier")
+
+metadata_model(DeterministicConstantClassifier,
+               input=MMI.Table,
+               target=AbstractVector{<:MMI.Finite},
+               weights=false,
+               descr="Constant classifier (Deterministic).",
+               path="MLJModels.DeterministicConstantClassifier")
diff --git a/test/_models/NearestNeighbors.jl b/test/_models/NearestNeighbors.jl
new file mode 100644
index 0000000..277a564
--- /dev/null
+++ b/test/_models/NearestNeighbors.jl
@@ -0,0 +1,170 @@
+export KNNRegressor, KNNClassifier
+
+using Distances
+
+import NearestNeighbors
+
+const NN = NearestNeighbors
+
+const KNNRegressorDescription =
+    """
+    K-Nearest Neighbors regressor: predicts the response associated with a new point
+    by taking an average of the response of the K-nearest points.
+    """
+
+const KNNClassifierDescription =
+    """
+    K-Nearest Neighbors classifier: predicts the class associated with a new point
+    by taking a vote over the classes of the K-nearest points.
+    """
+
+const KNNFields =
+    """
+    ## Keywords
+
+    * `K=5`                 : number of neighbors
+    * `algorithm=:kdtree`   : one of `(:kdtree, :brutetree, :balltree)`
+    * `metric=Euclidean()`  : a `Metric` object for the distance between points
+    * `leafsize=10`         : at what number of points to stop splitting the tree
+    * `reorder=true`        : if true puts points close in distance close in memory
+    * `weights=:uniform`    : one of `(:uniform, :distance)` if `:uniform` all neighbors are
+                              considered as equally important, if `:distance`, closer neighbors
+                              are proportionally more important.
+
+    See also the [package documentation](https://github.com/KristofferC/NearestNeighbors.jl).
+    """
+
+"""
+KNNRegressoor(;kwargs...)
+
+$KNNRegressorDescription
+
+$KNNFields
+"""
+@mlj_model mutable struct KNNRegressor <: MLJBase.Deterministic
+    K::Int            = 5::(_ > 0)
+    algorithm::Symbol = :kdtree::(_ in (:kdtree, :brutetree, :balltree))
+    metric::Metric    = Euclidean()
+    leafsize::Int     = 10::(_ ≥ 0)
+    reorder::Bool     = true
+    weights::Symbol   = :uniform::(_ in (:uniform, :distance))
+end
+
+"""
+KNNRegressor(;kwargs...)
+
+$KNNClassifierDescription
+
+$KNNFields
+"""
+@mlj_model mutable struct KNNClassifier <: MLJBase.Probabilistic
+    K::Int            = 5::(_ > 0)
+    algorithm::Symbol = :kdtree::(_ in (:kdtree, :brutetree, :balltree))
+    metric::Metric    = Euclidean()
+    leafsize::Int     = 10::(_ ≥ 0)
+    reorder::Bool     = true
+    weights::Symbol   = :uniform::(_ in (:uniform, :distance))
+end
+
+const KNN = Union{KNNRegressor, KNNClassifier}
+
+function MLJBase.fit(m::KNN, verbosity::Int, X, y, w=nothing)
+    Xmatrix = MLJBase.matrix(X, transpose=true) # NOTE: copies the data
+    if m.algorithm == :kdtree
+        tree = NN.KDTree(Xmatrix; leafsize=m.leafsize, reorder=m.reorder)
+    elseif m.algorithm == :balltree
+        tree = NN.BallTree(Xmatrix; leafsize=m.leafsize, reorder=m.reorder)
+    elseif m.algorithm == :brutetree
+        tree = NN.BruteTree(Xmatrix; leafsize=m.leafsize, reorder=m.reorder)
+    end
+    report = NamedTuple{}()
+    return (tree, y, w), nothing, report
+end
+
+MLJBase.fitted_params(model::KNN, (tree, _)) = (tree=tree,)
+
+function MLJBase.predict(m::KNNClassifier, (tree, y, w), X)
+    Xmatrix = MLJBase.matrix(X, transpose=true) # NOTE: copies the data
+    # for each entry, get the K closest training point + their distance
+    idxs, dists = NN.knn(tree, Xmatrix, m.K)
+
+    preds       = Vector{MLJBase.UnivariateFinite}(undef, length(idxs))
+    classes     = MLJBase.classes(y[1])
+    probas      = zeros(length(classes))
+
+    w_ = ones(m.K)
+
+    # go over each test record, and for each go over the k nearest entries
+    for i in eachindex(idxs)
+        idxs_  = idxs[i]
+        dists_ = dists[i]
+        labels = y[idxs_]
+        if w !== nothing
+            w_ = w[idxs_]
+        end
+        probas .*= 0.0
+        if m.weights == :uniform
+            for (k, label) in enumerate(labels)
+                probas[classes .== label] .+= 1.0 / m.K * w_[k]
+            end
+        else
+            for (k, label) in enumerate(labels)
+                probas[classes .== label] .+= 1.0 / dists_[k] * w_[k]
+            end
+        end
+        # normalize so that sum to 1
+        probas ./= sum(probas)
+        preds[i] = MLJBase.UnivariateFinite(classes, probas)
+    end
+    return preds
+end
+
+function MLJBase.predict(m::KNNRegressor, (tree, y, w), X)
+    Xmatrix     = MLJBase.matrix(X, transpose=true) # NOTE: copies the data
+    idxs, dists = NN.knn(tree, Xmatrix, m.K)
+    preds       = zeros(length(idxs))
+
+    w_ = ones(m.K)
+
+    for i in eachindex(idxs)
+        idxs_  = idxs[i]
+        dists_ = dists[i]
+        values = y[idxs_]
+        if w !== nothing
+            w_ = w[idxs_]
+        end
+        if m.weights == :uniform
+            preds[i] = sum(values .* w_) / sum(w_)
+        else
+            preds[i] = sum(values .* w_ .* (1.0 .- dists_ ./ sum(dists_))) / (sum(w_) - 1)
+        end
+    end
+    return preds
+end
+
+# ====
+
+metadata_pkg.((KNNRegressor, KNNClassifier),
+    name="NearestNeighbors",
+    uuid="b8a86587-4115-5ab1-83bc-aa920d37bbce",
+    url="https://github.com/KristofferC/NearestNeighbors.jl",
+    julia=true,
+    license="MIT",
+    is_wrapper=false
+    )
+
+metadata_model(KNNRegressor,
+    input=MLJBase.Table(MLJBase.Continuous),
+    target=AbstractVector{MLJBase.Continuous},
+    weights=true,
+    descr=KNNRegressorDescription
+    )
+
+metadata_model(KNNClassifier,
+    input=MLJBase.Table(MLJBase.Continuous),
+    target=AbstractVector{<:MLJBase.Finite},
+    weights=true,
+    descr=KNNClassifierDescription
+    )
+
+
diff --git a/test/ensembles.jl b/test/ensembles.jl
index 4232167..4bb3d49 100644
--- a/test/ensembles.jl
+++ b/test/ensembles.jl
@@ -5,11 +5,10 @@ using Random
 using StableRNGs
 using MLJEnsembles
 using MLJBase
-using MLJModels
+using ..Models
 using CategoricalArrays
 import Distributions
 
-KNNRegressor = @load KNNRegressor verbosity=0
 
 ## HELPER FUNCTIONS
 
@@ -20,10 +19,10 @@ KNNRegressor = @load KNNRegressor verbosity=0
 pair_vcat(p, q) = (vcat(p[1], q[1]), vcat(p[2], q[2]))
 
 
-## WRAPPED ENSEMBLES OF FITRESULTS
+## WRAPPED ENSEMBLES OF FITRESU
 
 # target is :deterministic :multiclass false:
-atom = MLJModels.DeterministicConstantClassifier()
+atom = DeterministicConstantClassifier()
 L = ['a', 'b', 'j']
 L2 = categorical(L)
 ensemble = [L2[1], L2[3], L2[3], L2[2]]
@@ -35,7 +34,7 @@ X = MLJEnsembles.table(rand(3,5))
     categorical(vcat(['j','j','j'],L))[1:3]
 
 # target is :deterministic :continuous false:
-atom = MLJModels.DeterministicConstantRegressor()
+atom = DeterministicConstantRegressor()
 ensemble = Float64[4, 7, 4, 4]
 atomic_weights = [0.1, 0.5, 0.2, 0.2]
 wens = MLJEnsembles.WrappedEnsemble(atom, ensemble)
@@ -45,9 +44,9 @@ wens = MLJEnsembles.WrappedEnsemble(atom, ensemble)
 atom = ConstantClassifier()
 L = categorical(['a', 'b', 'j'])
 d1 = UnivariateFinite(L, [0.1, 0.2, 0.7])
-fitresult1 = (L, pdf([d1, ], L))
+fitresult1 = d1
 d2 = UnivariateFinite(L, [0.2, 0.3, 0.5])
-fitresult2 = (L, pdf([d2, ], L))
+fitresult2 = d2
 ensemble = [fitresult2, fitresult1, fitresult2, fitresult2]
 atomic_weights = [0.1, 0.5, 0.2, 0.2]
 wens = MLJEnsembles.WrappedEnsemble(atom, ensemble)
@@ -71,11 +70,11 @@ d = predict(wens, atomic_weights, X)[1]
 ## ENSEMBLE MODEL
 
 # target is :deterministic :multiclass false:
-atom=MLJModels.DeterministicConstantClassifier()
+atom=DeterministicConstantClassifier()
 X = MLJEnsembles.table(ones(5,3))
 y = categorical(collect("asdfa"))
 train, test = partition(1:length(y), 0.8);
-ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom)
+ensemble_model = EnsembleModel(model=atom)
 ensemble_model.n = 10
 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
 predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test))
@@ -87,11 +86,11 @@ p = predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test))
 @test MLJBase.target_scitype(ensemble_model) == MLJBase.target_scitype(atom)
 
 # target is :deterministic :continuous false:
-atom = MLJModels.DeterministicConstantRegressor()
+atom = DeterministicConstantRegressor()
 X = MLJEnsembles.table(ones(5,3))
 y = Float64[1.0, 2.0, 1.0, 1.0, 1.0]
 train, test = partition(1:length(y), 0.8);
-ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom)
+ensemble_model = EnsembleModel(model=atom)
 ensemble_model.n = 10
 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
 @test reduce(* , [x ≈ 1.0 || x ≈ 1.25 for x in fitresult.ensemble])
@@ -106,21 +105,21 @@ ensemble_model.atomic_weights = atomic_weights
 predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test))
 
 # target is :deterministic :continuous false:
-atom = MLJModels.DeterministicConstantRegressor()
+atom = DeterministicConstantRegressor()
 rng = StableRNG(1234)
 X = MLJEnsembles.table(randn(rng, 10, 3))
 y = selectcols(X, 1)
 std(y)
 train, test = partition(1:length(y), 0.8);
-ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom, rng=rng)
-ensemble_model.out_of_bag_measure = [MLJEnsembles.rms,MLJEnsembles.rmsp]
+ensemble_model = EnsembleModel(model=atom, rng=rng)
+ensemble_model.out_of_bag_measure = [rms, rmsp]
 ensemble_model.n = 10
 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
 # TODO: the following test fails in distributed version (because of
 # multiple rng's ?)
 @test abs(report.oob_measurements[1] - std(y)) < 0.25
-ensemble_model = MLJEnsembles.DeterministicEnsembleModel(atom=atom,rng=Random.MersenneTwister(1))
-ensemble_model.out_of_bag_measure = MLJEnsembles.rms
+ensemble_model = EnsembleModel(model=atom,rng=Random.MersenneTwister(1))
+ensemble_model.out_of_bag_measure = rms
 ensemble_model.n = 2
 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
 
@@ -129,7 +128,7 @@ atom = ConstantClassifier()
 X = MLJEnsembles.table(ones(5,3))
 y = categorical(collect("asdfa"))
 train, test = partition(1:length(y), 0.8);
-ensemble_model = MLJEnsembles.ProbabilisticEnsembleModel(atom=atom)
+ensemble_model = EnsembleModel(model=atom)
 ensemble_model.n = 10
 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
 fitresult.ensemble
@@ -159,7 +158,7 @@ atom = ConstantRegressor()
 X = MLJEnsembles.table(ones(5,3))
 y = Float64[1.0, 2.0, 2.0, 1.0, 1.0]
 train, test = partition(1:length(y), 0.8);
-ensemble_model = MLJEnsembles.ProbabilisticEnsembleModel(atom=atom)
+ensemble_model = EnsembleModel(model=atom)
 ensemble_model.n = 10
 fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
 d1 = Distributions.fit(Distributions.Normal, [1,1,2,2])
@@ -183,18 +182,18 @@ predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test))
 # @test MLJBase.output_is(ensemble_model) == MLJBase.output_is(atom)
 
 # test generic constructor:
-@test EnsembleModel(atom=ConstantRegressor()) isa Probabilistic
-@test EnsembleModel(atom=MLJModels.DeterministicConstantRegressor()) isa Deterministic
+@test EnsembleModel(model=ConstantRegressor()) isa Probabilistic
+@test EnsembleModel(model=DeterministicConstantRegressor()) isa Deterministic
 
 @testset "further test of sample weights" begin
     rng = StableRNG(123)
     N = 20
     X = (x = rand(rng, 3N), );
     y = categorical(rand(rng, "abbbc", 3N));
-    atom = (@load KNNClassifier verbosity=0)()
-    ensemble_model = MLJEnsembles.ProbabilisticEnsembleModel(atom=atom,
-                                                    bagging_fraction=1,
-                                                    n = 5, rng=rng)
+    atom = KNNClassifier()
+    ensemble_model = EnsembleModel(model=atom,
+                                   bagging_fraction=1,
+                                   n = 5, rng=rng)
     fitresult, cache, report = MLJEnsembles.fit(ensemble_model, 0, X, y)
     @test predict_mode(ensemble_model, fitresult, (x = [0, ],))[1] == 'b'
     w = map(y) do η
@@ -219,34 +218,35 @@ predict(ensemble_model, fitresult, MLJEnsembles.selectrows(X, test))
 end
 
 
-    ## MACHINE TEST (INCLUDES TEST OF UPDATE)
+## MACHINE TEST (INCLUDES TEST OF UPDATE)
 
 N =100
 X = (x1=rand(N), x2=rand(N), x3=rand(N))
 y = 2X.x1  - X.x2 + 0.05*rand(N)
 
 atom = KNNRegressor(K=7)
-ensemble_model = EnsembleModel(atom=atom)
+ensemble_model = EnsembleModel(model=atom)
 ensemble = machine(ensemble_model, X, y)
 train, test = partition(eachindex(y), 0.7)
 fit!(ensemble, rows=train, verbosity=0)
 @test length(ensemble.fitresult.ensemble) == ensemble_model.n
 ensemble_model.n = 15
 @test_logs((:info, r"Training"),
-          fit!(ensemble))
+          fit!(ensemble, verbosity=1))
 @test length(ensemble.fitresult.ensemble) == 15
 ensemble_model.n = 20
 @test_logs((:info, r"Updating"),
-           # (:info, r"Building"),
+           (:info, r"Building"),
            fit!(ensemble))
 @test length(ensemble.fitresult.ensemble) == 20
 ensemble_model.n = 5
 @test_logs((:info, r"Updating"),
-           # (:info, r"Truncating"),
+           (:info, r"Truncating"),
            fit!(ensemble))
 @test length(ensemble.fitresult.ensemble) == 5
 
 @test !isnan(predict(ensemble, MLJEnsembles.selectrows(X, test))[1])
 
 end
+
 true
diff --git a/test/runtests.jl b/test/runtests.jl
index 06685a3..a068d43 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1 +1,3 @@
+include("_models.jl")
 include("ensembles.jl")
+

From cd46dd8f17aaf837c569ae107afe7ddbf4691475 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 13 Dec 2021 11:48:54 +1300
Subject: [PATCH 2/6] update doc-string

---
 src/ensembles.jl | 46 ++++++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/src/ensembles.jl b/src/ensembles.jl
index d955ce0..4f43e0e 100644
--- a/src/ensembles.jl
+++ b/src/ensembles.jl
@@ -260,26 +260,25 @@ const ERR_TOO_MANY_ARGUMENTS = ArgumentError(
                   acceleration=CPU1(),
                   out_of_bag_measure=[])
 
-Create a model for training an ensemble of `n` learners, with optional
-bagging, each with associated model `atom`. Ensembling is useful if
-`fit!(machine(atom, data...))` does not create identical models on
-repeated calls (ie, is a stochastic model, such as a decision tree
-with randomized node selection criteria), or if `bagging_fraction` is
-set to a value less than 1.0, or both. The constructor fails if no
-`atom` is specified.
-
-Only atomic models supporting targets with scitype
-`AbstractVector{<:Finite}` (univariate classifiers) or
-`AbstractVector{<:Continuous}` (univariate regressors) are supported.
+Create a model for training an ensemble of `n` clones of `model`, with
+optional bagging. Ensembling is useful if `fit!(machine(atom,
+data...))` does not create identical models on repeated calls (ie, is
+a stochastic model, such as a decision tree with randomized node
+selection criteria), or if `bagging_fraction` is set to a value less
+than 1.0, or both.
+
+Here the atomic `model` must support targets with scitype
+`AbstractVector{<:Finite}` (single-target classifiers) or
+`AbstractVector{<:Continuous}` (single-target regressors).
 
 If `rng` is an integer, then `MersenneTwister(rng)` is the random
 number generator used for bagging. Otherwise some `AbstractRNG` object
 is expected.
 
-The atomic predictions are weighted according to the vector
+The atomic predictions are optionally weighted according to the vector
 `atomic_weights` (to allow for external optimization) except in the
-case that `atom` is a `Deterministic` classifier. Uniform
-atomic weights are used if `weight` has zero length.
+case that `model` is a `Deterministic` classifier, in which case
+`atomic_weights` are ignored.
 
 The ensemble model is `Deterministic` or `Probabilistic`, according to
 the corresponding supertype of `atom`. In the case of deterministic
@@ -292,20 +291,19 @@ particular, for regressors, the ensemble prediction on each input
 pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl
 package, where `D` is the type of predicted distribution for `atom`.
 
-The `acceleration` keyword argument is used to specify the compute resource (a
-subtype of `ComputationalResources.AbstractResource`) that will be used to
-accelerate/parallelize ensemble fitting.
+Specify `acceleration=CPUProcesses()` for distributed computing, or
+`CPUThreads()` for multithreading.
 
 If a single measure or non-empty vector of measures is specified by
 `out_of_bag_measure`, then out-of-bag estimates of performance are
-written to the trainig report (call `report` on the trained
+written to the training report (call `report` on the trained
 machine wrapping the ensemble model).
 
-*Important:* If sample weights `w` (as opposed to atomic weights) are
-specified when constructing a machine for the ensemble model, as in
-`mach = machine(ensemble_model, X, y, w)`, then `w` is used by any
-measures specified in `out_of_bag_measure` that support sample
-weights.
+*Important:* If sample weights `w` (not to be confused with atomic
+weights) are specified when constructing a machine for the ensemble
+model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is
+used by any measures specified in `out_of_bag_measure` that support
+sample weights.
 
 """
 function EnsembleModel(args...;
@@ -411,7 +409,7 @@ function MMI.fit(model::EitherEnsembleModel{Atom},
 
     acceleration = model.acceleration
     if acceleration isa CPUProcesses && nworkers() == 1
-        acceleration = default_resource()
+        acceleration = CPU1()
     end
 
     if model.out_of_bag_measure isa Vector

From d9cd32e14d4f1a8a44f9bf1f83e724b1a1cc4ade Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 13 Dec 2021 11:54:05 +1300
Subject: [PATCH 3/6] add compat MLJBase="0.19"

---
 Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Project.toml b/Project.toml
index 715723d..c04b5de 100644
--- a/Project.toml
+++ b/Project.toml
@@ -21,6 +21,7 @@ CategoricalArrays = "0.8, 0.9, 0.10"
 CategoricalDistributions = "0.1"
 ComputationalResources = "0.3"
 Distributions = "0.21, 0.22, 0.23, 0.24, 0.25"
+MLJBase = "0.19"
 MLJModelInterface = "0.4.1, 1.1"
 ProgressMeter = "1.1"
 ScientificTypesBase = "2"

From 50d7810ae02d87fd289dbd1927b8df92e5586d97 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 13 Dec 2021 11:59:00 +1300
Subject: [PATCH 4/6] bump requirement for CategoricalDistributions to 0.1.2

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index c04b5de..87f6569 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,7 +18,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
 CategoricalArrays = "0.8, 0.9, 0.10"
-CategoricalDistributions = "0.1"
+CategoricalDistributions = "0.1.2"
 ComputationalResources = "0.3"
 Distributions = "0.21, 0.22, 0.23, 0.24, 0.25"
 MLJBase = "0.19"

From 0037e6be84b36162aea40088a9f1d802acd4f429 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 24 Dec 2021 09:04:47 +1300
Subject: [PATCH 5/6] bump version 0.2.0

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 87f6569..8ca8766 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJEnsembles"
 uuid = "50ed68f4-41fd-4504-931a-ed422449fee0"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.1.2"
+version = "0.2.0"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

From da40e1136dcdc19fae61cd7ce533e9dc05d16f2f Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Tue, 28 Dec 2021 09:28:06 +1300
Subject: [PATCH 6/6] extend compat ScientificTypesBase = "2,3"

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 8ca8766..482739d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,7 +24,7 @@ Distributions = "0.21, 0.22, 0.23, 0.24, 0.25"
 MLJBase = "0.19"
 MLJModelInterface = "0.4.1, 1.1"
 ProgressMeter = "1.1"
-ScientificTypesBase = "2"
+ScientificTypesBase = "2,3"
 StatsBase = "0.32, 0.33"
 julia = "1.1"