From 0ca0ee260a805c3f364df8128f0acaa154a5307e Mon Sep 17 00:00:00 2001 From: Cornelius-G Date: Fri, 31 Jul 2020 13:30:30 +0200 Subject: [PATCH 1/2] add BinningAlgorithm --- src/algotypes/algotypes.jl | 1 + src/algotypes/binning_algorithm.jl | 68 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 src/algotypes/binning_algorithm.jl diff --git a/src/algotypes/algotypes.jl b/src/algotypes/algotypes.jl index bccb96ab2..f23e07e04 100644 --- a/src/algotypes/algotypes.jl +++ b/src/algotypes/algotypes.jl @@ -6,3 +6,4 @@ include("autocor_len.jl") include("mode_estimator.jl") include("median_estimator.jl") include("integration_algorithm.jl") +include("binning_algorithm.jl") diff --git a/src/algotypes/binning_algorithm.jl b/src/algotypes/binning_algorithm.jl new file mode 100644 index 000000000..27605869d --- /dev/null +++ b/src/algotypes/binning_algorithm.jl @@ -0,0 +1,68 @@ +abstract type AbstractBinningAlgorithm end + +struct FixedBinning <: AbstractBinningAlgorithm end +struct Sturges <: AbstractBinningAlgorithm end +struct Scott <: AbstractBinningAlgorithm end +struct Rice <: AbstractBinningAlgorithm end +struct FreedmanDiaconis <: AbstractBinningAlgorithm end +struct Wand <: AbstractBinningAlgorithm end +struct SqrtBinning <: AbstractBinningAlgorithm end + + +function function auto_binning(samples::DensitySampleVector; bins::AbstractBinningAlgorithm = FreedmanDiaconis()) + shape = varshape(samples) + flat_samples = flatview(unshaped.(samples.v)) + n_params = size(flat_samples)[1] + nt_samples = ntuple(i -> flat_samples[i,:], n_params) + + binningmode = binning_mode(bins) + number_of_bins = _auto_binning_nbins(nt_samples, param; mode=binningmode) + +end + +binning_mode(ba::SqrtBinning) = :sqrt +binning_mode(ba::Sturges) = :sturges +binning_mode(ba::Scott) = :scott +binning_mode(ba::Rice) = :rice +binning_mode(ba::FreedmanDiaconis) = :fd +binning_mode(ba::Wand) = :wand + +# also for prior + + + +# From Plots.jl, original authors Oliver Schulz and Michael K. Borregaard +function _auto_binning_nbins(vs::NTuple{N,AbstractVector}, dim::Integer; mode::Symbol = :auto) where N + max_bins = 10_000 + _cl(x) = min(ceil(Int, max(x, one(x))), max_bins) + _iqr(v) = (q = quantile(v, 0.75) - quantile(v, 0.25); q > 0 ? q : oftype(q, 1)) + _span(v) = maximum(v) - minimum(v) + + n_samples = length(LinearIndices(first(vs))) + + # The nd estimator is the key to most automatic binning methods, and is modified for twodimensional histograms to include correlation + nd = n_samples^(1/(2+N)) + nd = N == 2 ? min(n_samples^(1/(2+N)), nd / (1-cor(first(vs), last(vs))^2)^(3//8)) : nd # the >2-dimensional case does not have a nice solution to correlations + + v = vs[dim] + + if mode == :auto + mode = :fd + end + + if mode == :sqrt # Square-root choice + _cl(sqrt(n_samples)) + elseif mode == :sturges # Sturges' formula + _cl(log2(n_samples) + 1) + elseif mode == :rice # Rice Rule + _cl(2 * nd) + elseif mode == :scott # Scott's normal reference rule + _cl(_span(v) / (3.5 * std(v) / nd)) + elseif mode == :fd # Freedman–Diaconis rule + _cl(_span(v) / (2 * _iqr(v) / nd)) + elseif mode == :wand + _cl(wand_edges(v)) # this makes this function not type stable, but the type instability does not propagate + else + error("Unknown auto-binning mode $mode") + end +end From 995747980c6e4f4521811459bd9a889e63791251 Mon Sep 17 00:00:00 2001 From: Cornelius-G Date: Mon, 3 Aug 2020 19:55:20 +0200 Subject: [PATCH 2/2] adapt bat_marginalize to new Binning types --- src/algotypes/binning_algorithm.jl | 68 +++++++++++++++++++++-------- src/optimization/mode_estimators.jl | 11 ++--- src/plotting/MarginalDist.jl | 49 ++++++++++----------- 3 files changed, 76 insertions(+), 52 deletions(-) diff --git a/src/algotypes/binning_algorithm.jl b/src/algotypes/binning_algorithm.jl index 27605869d..121f0289e 100644 --- a/src/algotypes/binning_algorithm.jl +++ b/src/algotypes/binning_algorithm.jl @@ -1,15 +1,50 @@ -abstract type AbstractBinningAlgorithm end +abstract type AbstractBinning end -struct FixedBinning <: AbstractBinningAlgorithm end -struct Sturges <: AbstractBinningAlgorithm end -struct Scott <: AbstractBinningAlgorithm end -struct Rice <: AbstractBinningAlgorithm end -struct FreedmanDiaconis <: AbstractBinningAlgorithm end -struct Wand <: AbstractBinningAlgorithm end -struct SqrtBinning <: AbstractBinningAlgorithm end +struct SturgesBinning <: AbstractBinning end +struct ScottBinning <: AbstractBinning end +struct RiceBinning <: AbstractBinning end +struct FDBinning <: AbstractBinning end +struct WandBinning <: AbstractBinning end +struct SqrtBinning <: AbstractBinning end -function function auto_binning(samples::DensitySampleVector; bins::AbstractBinningAlgorithm = FreedmanDiaconis()) +function _bin_edges(data::AbstractVector, bins::Integer; closed::Symbol = :left) + return StatsBase.histrange((data, ), StatsBase._nbins_tuple((data, ), (bins,)), closed)[1] +end + +function _bin_edges(data::AbstractVector, bins::Union{AbstractRange, Tuple{AbstractRange}}; closed::Symbol = :left) + return bins +end + +function _bin_edges(data::AbstractVector, bins::AbstractBinning; closed::Symbol = :left) + nbins = auto_nbins(data, bins=bins) + return StatsBase.histrange((data, ), StatsBase._nbins_tuple((data, ), (nbins,)), closed)[1] +end + + +# for samples: +function _bin_edges(samples::DensitySampleVector, param::Integer, bins::Integer; closed::Symbol = :left) + data = flatview(unshaped.(samples.v))[param, :] + return _bin_edges(data, bins, closed=closed) +end + +function _bin_edges(samples::DensitySampleVector, param::Integer, bins::AbstractBinning; closed::Symbol = :left) + nbins = auto_nbins(samples, param, bins=bins) + data = flatview(unshaped.(samples.v))[param, :] + return StatsBase.histrange((data, ), StatsBase._nbins_tuple((data, ), (nbins,)), closed)[1] +end + +function _bin_edges(samples::Any, param::Any, bins::Union{AbstractRange, Tuple{AbstractRange}}; closed::Symbol = :left) + return bins +end + + +function auto_nbins(data::AbstractVector; bins::AbstractBinning = FDBinning()) + binningmode = binning_mode(bins) + number_of_bins = _auto_binning_nbins((data,), 1; mode=binningmode) +end + +function auto_nbins(samples::DensitySampleVector, param::Integer; bins::AbstractBinning = FDBinning()) shape = varshape(samples) flat_samples = flatview(unshaped.(samples.v)) n_params = size(flat_samples)[1] @@ -17,17 +52,16 @@ function function auto_binning(samples::DensitySampleVector; bins::AbstractBinni binningmode = binning_mode(bins) number_of_bins = _auto_binning_nbins(nt_samples, param; mode=binningmode) - end -binning_mode(ba::SqrtBinning) = :sqrt -binning_mode(ba::Sturges) = :sturges -binning_mode(ba::Scott) = :scott -binning_mode(ba::Rice) = :rice -binning_mode(ba::FreedmanDiaconis) = :fd -binning_mode(ba::Wand) = :wand -# also for prior + +binning_mode(ba::SqrtBinning) = :sqrt +binning_mode(ba::SturgesBinning) = :sturges +binning_mode(ba::ScottBinning) = :scott +binning_mode(ba::RiceBinning) = :rice +binning_mode(ba::FDBinning) = :fd +binning_mode(ba::WandBinning) = :wand diff --git a/src/optimization/mode_estimators.jl b/src/optimization/mode_estimators.jl index 9e41a0d9d..3d733053a 100644 --- a/src/optimization/mode_estimators.jl +++ b/src/optimization/mode_estimators.jl @@ -112,22 +112,17 @@ end -function bat_marginalmode_impl(samples::DensitySampleVector; nbins::Union{Integer, Symbol} = 200) +function bat_marginalmode_impl(samples::DensitySampleVector; bins = FDBinning()) shape = varshape(samples) flat_samples = flatview(unshaped.(samples.v)) n_params = size(flat_samples)[1] - nt_samples = ntuple(i -> flat_samples[i,:], n_params) marginalmode_params = Vector{Float64}() + bins_tuple = isa(bins, Tuple) ? bins : Tuple([bins for i in 1:n_params]) for param in Base.OneTo(n_params) - if typeof(nbins) == Symbol - number_of_bins = _auto_binning_nbins(nt_samples, param, mode=nbins) - else - number_of_bins = nbins - end - marginalmode_param = find_localmodes(bat_marginalize(samples, param, nbins=number_of_bins).result) + marginalmode_param = find_localmodes(bat_marginalize(samples, param, bins=bins_tuple[param]).result) if length(marginalmode_param[1]) > 1 @warn "More than one bin with the same weight is found. Returned the first one" diff --git a/src/plotting/MarginalDist.jl b/src/plotting/MarginalDist.jl index 2adc2998f..6830307df 100644 --- a/src/plotting/MarginalDist.jl +++ b/src/plotting/MarginalDist.jl @@ -4,23 +4,24 @@ struct MarginalDist{N,D<:Distribution,VS<:AbstractValueShape} origvalshape::VS end -function _get_edges(data::Tuple, nbins::Tuple{Vararg{<:Integer}}, closed::Symbol) - return StatsBase.histrange(data, StatsBase._nbins_tuple(data, nbins), closed) -end - -function _get_edges(data::Any, nbins::Integer, closed::Symbol) - return StatsBase.histrange((data, ), StatsBase._nbins_tuple((data, ), (nbins,)), closed)[1] -end - -function _get_edges(data::Any, nbins::Union{AbstractRange, Tuple{AbstractRange}}, closed::Symbol) - return nbins -end +# TODO: Remove +# function _get_edges(data::Tuple, nbins::Tuple{Vararg{<:Integer}}, closed::Symbol) +# return StatsBase.histrange(data, StatsBase._nbins_tuple(data, nbins), closed) +# end +# +# function _get_edges(data::Any, nbins::Integer, closed::Symbol) +# return StatsBase.histrange((data, ), StatsBase._nbins_tuple((data, ), (nbins,)), closed)[1] +# end +# +# function _get_edges(data::Any, nbins::Union{AbstractRange, Tuple{AbstractRange}}, closed::Symbol) +# return nbins +# end function bat_marginalize( maybe_shaped_samples::DensitySampleVector, key::Union{Integer, Symbol, Expr}; - bins = 200, + bins::Union{Integer, AbstractRange, AbstractBinning} = FDBinning(), closed::Symbol = :left, filter::Bool = false, normalize = true @@ -34,7 +35,7 @@ function bat_marginalize( idx = asindex(maybe_shaped_samples, key) s = flatview(samples.v)[idx, :] - edges = _get_edges(s, bins, closed) + edges = _bin_edges(samples, idx, bins; closed=closed) hist = fit(Histogram, s, @@ -54,7 +55,7 @@ end function bat_marginalize( maybe_shaped_samples::DensitySampleVector, key::Union{NTuple{n,Integer}, NTuple{n,Union{Symbol, Expr}}} where n; - bins = 200, + bins = FDBinning(), closed::Symbol = :left, filter::Bool = false, normalize = true @@ -68,11 +69,8 @@ function bat_marginalize( idxs = asindex.(Ref(maybe_shaped_samples), key) s = Tuple(BAT.flatview(samples.v)[i, :] for i in idxs) - edges = if isa(bins, Integer) - _get_edges(s, (bins,), closed) - else - Tuple(_get_edges(s[i], bins[i], closed) for i in 1:length(bins)) - end + bins_tuple = isa(bins, Tuple) ? bins : (bins, bins) + edges = Tuple(_bin_edges(samples, idxs[i], bins_tuple[i], closed=closed) for i in 1:length(bins_tuple)) hist = fit(Histogram, s, @@ -93,7 +91,7 @@ end function bat_marginalize( prior::NamedTupleDist, key::Union{Integer, Symbol}; - bins = 200, + bins = FDBinning(), edges = nothing, closed::Symbol = :left, nsamples::Integer = 10^6, @@ -102,7 +100,7 @@ function bat_marginalize( idx = asindex(prior, key) r = rand(prior, nsamples) - edges = _get_edges(r[idx, :], bins, closed) + edges = _bin_edges(r[idx, :], bins, closed=closed) hist = fit(Histogram, r[idx, :], edges, closed = closed) @@ -118,7 +116,7 @@ end function bat_marginalize( prior::NamedTupleDist, key::Union{NTuple{2, Symbol}, NTuple{2, Integer}}; - bins = 200, + bins = FDBinning(), closed::Symbol = :left, nsamples::Integer = 10^6, normalize=true @@ -128,11 +126,8 @@ function bat_marginalize( r = rand(prior, nsamples) s = Tuple(r[i, :] for i in idxs) - edges = if isa(bins, Integer) - _get_edges(s, (bins,), closed) - else - Tuple(_get_edges(s[i], bins[i], closed) for i in 1:length(bins)) - end + bins_tuple = isa(bins, Tuple) ? bins : (bins, bins) + edges = Tuple(_bin_edges(s[i], bins_tuple[i], closed=closed) for i in 1:length(bins_tuple)) hist = fit(Histogram, s,