diff --git a/perf/statistics.jl b/perf/statistics.jl deleted file mode 100644 index 66c776d..0000000 --- a/perf/statistics.jl +++ /dev/null @@ -1,119 +0,0 @@ -using NullableArrays -using DataArrays -using StatsBase - -srand(1) -N = 5_000_000 - -function profile_stats_methods() - A = rand(N) - B = rand(Bool, N) - X = NullableArray(A) - Y = NullableArray(A, B) - D = DataArray(A) - E = DataArray(A, B) - - profile_mean(A, X, D, Y, E) - profile_var(A, X, D, Y, E) - nothing -end - -function profile_mean(A, X, D, Y, E) - W = WeightVec(rand(N)) - - mean(A) - println("Method: mean(A) (0 missing entries)") - print(" for Array{Float64}: ") - @time(mean(A)) - mean(X) - print(" for NullableArray{Float64}: ") - @time(mean(X)) - mean(D) - print(" for DataArray{Float64}: ") - @time(mean(D)) - println() - - mean(Y, skipnull=false) - println("Method: mean(A) (~half missing entries, skip=false)") - print(" for NullableArray{Float64}: ") - @time(mean(Y, skipnull=false)) - mean(E, skipna=false) - print(" for DataArray{Float64}: ") - @time(mean(E, skipna=false)) - println() - - mean(Y, skipnull=true) - println("Method: mean(A) (~half missing entries, skip=true)") - print(" for NullableArray{Float64}: ") - @time(mean(Y, skipnull=true)) - mean(E, skipna=true) - print(" for DataArray{Float64}: ") - @time(mean(E, skipna=true)) - println() - - mean(A, W) - println("Method: mean(A, w::WeightVec{W, V}) (0 missing entries, V<:Array)") - print(" for Array{Float64}: ") - @time(mean(A, W)) - mean(X, W) - print(" for NullableArray{Float64}: ") - @time(mean(X, W)) - mean(D, W) - print(" for DataArray{Float64}: ") - @time(mean(D, W)) - println() - - println("Method: mean(A, W::WeightVec) (~half missing entries, skip=false)") - mean(Y, W, skipnull=false) - print(" for NullableArray{Float64}: ") - @time(mean(Y, W, skipnull=false)) - mean(E, W, skipna=false) - print(" for DataArray{Float64}: ") - @time(mean(E, W, skipna=false)) - println() - - println("Method: mean(A, W::WeightVec) (~half missing entries, skip=true)") - mean(Y, W, skipnull=true) - print(" for NullableArray{Float64}: ") - @time(mean(Y, W, skipnull=true)) - mean(E, W, skipna=true) - print(" for DataArray{Float64}: ") - @time(mean(E, W, skipna=true)) - println() -end - -function profile_var(A, X, D, Y, E) - mu = mean(A) - mu2 = mean(X, skipnull=true) - - varm(A, mu) - println("Method: varm(A, mu) (0 missing entries)") - print(" for Array{Float64}: ") - @time(varm(A, mu)) - println(" ", varm(A, mu)) - varm(X, mu) - print(" for NullableArray{Float64}: ") - @time(varm(X, mu)) - varm(D, mu) - print(" for DataArray{Float64}: ") - @time(varm(D, mu)) - println() - - varm(Y, mu; skipnull=false) - println("Method: varm(A, mu) (~half missing entries, skip=false)") - print(" for NullableArray{Float64}: ") - @time(varm(Y, mu; skipnull=false)) - varm(E, mu; skipna=false) - print(" for DataArray{Float64}: ") - @time(varm(E, mu; skipna=false)) - println() - - varm(Y, mu; skipnull=true) - println("Method: varm(A, mu) (~half missing entries, skip=true)") - print(" for NullableArray{Float64}: ") - @time(varm(Y, mu; skipnull=true)) - varm(E, mu; skipna=true) - print(" for DataArray{Float64}: ") - @time(varm(E, mu; skipna=true)) - println() -end diff --git a/src/NullableArrays.jl b/src/NullableArrays.jl index 043de62..0907852 100644 --- a/src/NullableArrays.jl +++ b/src/NullableArrays.jl @@ -28,6 +28,5 @@ module NullableArrays include("operators.jl") include("broadcast.jl") include("reduce.jl") - include("statistics.jl") include("show.jl") end diff --git a/src/statistics.jl b/src/statistics.jl deleted file mode 100644 index 3dbf0f1..0000000 --- a/src/statistics.jl +++ /dev/null @@ -1,100 +0,0 @@ -using StatsBase - -Base.mean(X::NullableArray; skipnull::Bool = false) = - sum(X; skipnull = skipnull) / - Nullable(length(X.isnull) - (skipnull * countnz(X.isnull))) - -function Base.mean{T, W, V}(X::NullableArray{T}, w::WeightVec{W, V}; - skipnull::Bool=false) - if skipnull - _X = NullableArray(X.values .* w.values, X.isnull) - _w = NullableArray(w.values, X.isnull) - return sum(_X; skipnull=true) / sum(_w; skipnull=true) - else - anynull(X) ? Nullable{T}() : Nullable(mean(X.values, w)) - end -end - -function Base.mean{T, W, V<:NullableArray}(X::NullableArray{T}, - w::WeightVec{W, V}; - skipnull::Bool=false) - if skipnull - _X = X .* w.values - _w = NullableArray(w.values, _X.isnull) - return sum(_X; skipnull=true) / sum(_w; skipnull=true) - else - anynull(X) || anynull(w) ? Nullable{T}() : - Nullable(mean(X.values, w.values.values)) - end -end - - -function Base.varm{T}(X::NullableArray{T}, m::Number; corrected::Bool=true, - skipnull::Bool=false) - if skipnull - n = length(X) - - nnull = countnz(X.isnull) - nnull == n && return Nullable(convert(Base.momenttype(T), NaN)) - nnull == n-1 && return Nullable( - convert(Base.momenttype(T), - abs2(X.values[Base.findnextnot(X.isnull, 1)] - m)/(1 - Int(corrected)) - ) - ) - /(nnull == 0 ? Nullable(Base.centralize_sumabs2(X.values, m, 1, n)) : - mapreduce_impl_skipnull(Base.CentralizedAbs2Fun(m), - Base.AddFun(), X), - Nullable(n - nnull - Int(corrected)) - ) - else - any(X.isnull) && return Nullable{T}() - Nullable(Base.varm(X.values, m; corrected=corrected)) - end -end - -function Base.varm{T, U<:Number}(X::NullableArray{T}, m::Nullable{U}; - corrected::Bool=true, skipnull::Bool=false) - m.isnull && throw(NullException()) - return varm(X, m.value; corrected=corrected, skipnull=skipnull) -end - -function Base.varzm{T}(X::NullableArray{T}; corrected::Bool=true, - skipnull::Bool=false) - n = length(X) - nnull = skipnull ? countnz(X.isnull) : 0 - (n == 0 || n == nnull) && return Nullable(convert(Base.momenttype(T), NaN)) - return sumabs2(X; skipnull=skipnull) / - Nullable((n - nnull - Int(corrected))) -end - -function Base.var(X::NullableArray; corrected::Bool=true, mean=nothing, - skipnull::Bool=false) - - (anynull(X) & !skipnull) && return Nullable{eltype(X)}() - - if mean == 0 || isequal(mean, Nullable(0)) - return Base.varzm(X; corrected=corrected, skipnull=skipnull) - elseif mean == nothing - return varm(X, Base.mean(X; skipnull=skipnull); corrected=corrected, - skipnull=skipnull) - elseif isa(mean, Union{Number, Nullable}) - return varm(X, mean; corrected=corrected, skipnull=skipnull) - else - error() - end -end - -function Base.stdm(X::NullableArray, m::Number; - corrected::Bool=true, skipnull::Bool=false) - return sqrt(varm(X, m; corrected=corrected, skipnull=skipnull)) -end - -function Base.stdm{T<:Number}(X::NullableArray, m::Nullable{T}; - corrected::Bool=true, skipnull::Bool=false) - return sqrt(varm(X, m; corrected=corrected, skipnull=skipnull)) -end - -function Base.std(X::NullableArray; corrected::Bool=true, - mean=nothing, skipnull::Bool=false) - return sqrt(var(X; corrected=corrected, mean=mean, skipnull=skipnull)) -end diff --git a/test/runtests.jl b/test/runtests.jl index e630824..2dc1304 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,7 +14,6 @@ my_tests = [ "nullablevector.jl", "nullablematrix.jl", "reduce.jl", - "statistics.jl", ] println("Running tests:") diff --git a/test/statistics.jl b/test/statistics.jl deleted file mode 100644 index c4b8dc6..0000000 --- a/test/statistics.jl +++ /dev/null @@ -1,173 +0,0 @@ -module TestStatistics - using NullableArrays - using StatsBase - using Base.Test - - srand(1) - - for N in (10, 100) - A = rand(N) - M = rand(Bool, N) - mu_A = mean(A) - nmu_A = Nullable(mu_A) - i = rand(1:N) - M[i] = true - j = rand(1:N) - while j == i - j = rand(1:N) - end - M[j] = false - X = NullableArray(A) - Y = NullableArray(A, M) - J = find(x -> !x, M) - B = A[J] - mu_B = mean(B) - nmu_B = Nullable(mu_B) - - C = rand(N) - V = WeightVec(C) - R = rand(Bool, N) - R[rand(1:N)] = true - R[j] = false - # W = WeightVec(NullableArray(C, R)) - - K = find(x -> !x, R) - L = find(i -> (!M[i] & !R[i]), [1:N...]) - - # For testing Base.varzm - D1 = rand(round(Int, N / 2)) - D2 = -1 .* D1 - D = [D1; D2] - while mean(D) != 0 - D1 = rand(round(Int, N / 2)) - D2 = -1 .* D1 - D = [D1; D2] - end - Q = NullableArray(D) - S = rand(Bool, round(Int, N / 2)) - U = NullableArray(D, [S; S]) - E = [D[find(x->!x, S)]; D[find(x->!x, S)]] - - @test_throws NullException varm(Y, Nullable{Float64}()) - - # Test mean - for skip in (true, false) - v = mean(X; skipnull=skip) - @test_approx_eq v.value mean(A) - @test !v.isnull - - v = mean(Y; skipnull=skip) - if skip == false - @test isequal(v, Nullable{Float64}()) - else - @test_approx_eq v.value mean(B) - @test !v.isnull - end - - v = mean(X, V; skipnull=skip) - @test_approx_eq v.value mean(A, V) - @test !v.isnull - - v = mean(Y, V; skipnull=skip) - if skip == false - @test isequal(v, Nullable{Float64}()) - else - @test_approx_eq v.value mean(B, WeightVec(C[J])) - @test !v.isnull - end - - # Following tests need to wait until WeightVec constructor is - # implemented for NullableArray argument - # - # v = mean(X, W, skipnull=skip) - # if skip == false - # @test isequal(v, Nullable{Float64}()) - # else - # @test_approx_eq v.value mean(A[K], WeightVec(C[K])) - # @test !v.isnull - # end - # v = mean(Y, W, skipnull=skip) - # if skip == false - # @test isequal(v, Nullable{Float64}()) - # else - # @test_approx_eq v.value mean(A[L], WeightVec(C[L])) - # @test !v.isnull - # end - end - - for corr in (true, false), skip in (true, false) - # Test Base.varzm - v = Base.varzm(Q, corrected=corr, skipnull=skip) - @test_approx_eq v.value Base.varzm(D, corrected=corr) - @test !v.isnull - - v = Base.varzm(U, corrected=corr, skipnull=skip) - if skip == false - @test isequal(v, Nullable{Float64}()) - else - @test_approx_eq v.value Base.varzm(E, corrected=corr) - @test !v.isnull - end - - # Test varm, stdm - for method in (varm, stdm) - for mu in (mu_A, nmu_A) - v = method(X, mu, corrected=corr, skipnull=skip) - @test_approx_eq v.value method(A, mu_A, corrected=corr) - @test !v.isnull - end - - for mu in (mu_B, nmu_B) - v = method(Y, mu, corrected=corr, skipnull=skip) - if skip == false - @test isequal(v, Nullable{Float64}()) - else - @test_approx_eq v.value method(B, mu_B, corrected=corr) - @test !v.isnull - end - end - end - - # Test var, std - for method in (var, std) - for mu in (nothing, mu_A, nmu_A, :a) - if mu == :a - @test_throws ErrorException method(X, mean=mu, - corrected=corr, - skipnull=skip) - else - v = method(X, mean=mu, corrected=corr, skipnull=skip) - @test_approx_eq v.value method(A, mean=mu_A, corrected=corr) - @test !v.isnull - end - end - - for mu in (0, Nullable(0)) - v = method(X, mean=mu, corrected=corr, skipnull=skip) - @test_approx_eq v.value method(A, mean=0, corrected=corr) - @test !v.isnull - end - - for mu in (nothing, mu_B, nmu_B) - v = method(Y, mean=mu, corrected=corr, skipnull=skip) - if skip == false - @test isequal(v, Nullable{Float64}()) - else - @test_approx_eq v.value method(B, mean=mu_B, corrected=corr) - @test !v.isnull - end - end - - for mu in (0, Nullable(0)) - v = method(Y, mean=mu, corrected=corr, skipnull=skip) - if skip == false - @test isequal(v, Nullable{Float64}()) - else - @test_approx_eq v.value method(B, mean=0, corrected=corr) - @test !v.isnull - end - end - end - end - end -end