diff --git a/Project.toml b/Project.toml index 44c87ce..fdaddfe 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PosDefManifoldML" uuid = "a07f4532-e2c9-11e9-2ea2-6d98fe4a1f21" authors = ["Marco-Congedo "] -version = "0.3.8" +version = "0.4.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/docs/Project.toml b/docs/Project.toml index c1c3763..9183014 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,5 @@ authors = ["Marco Congedo, Saloni Jain, Anton Andreev"] -version = "0.3.8" +version = "0.4.0" [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/docs/src/tools.md b/docs/src/tools.md index 5d5c7bd..2a13da5 100644 --- a/docs/src/tools.md +++ b/docs/src/tools.md @@ -11,6 +11,8 @@ and Euclidean machine learning classifiers. | [`tsMap`](@ref) | project data on a tangent space to apply Euclidean ML models therein | | [`tsWeights`](@ref)| generator of weights for tagent space mapping | | [`gen2ClassData`](@ref)| generate 2-class positive definite matrix data for testing Riemannian ML models | +| [`confusionMat`](@ref)| Confusion matrix given a vector of true labels and a vector of predicted labels | +| [`predictAcc`](@ref)| predictiion accuracy given a vector of true labels and a vector of predicted labels | | [`predictErr`](@ref)| prediction error given a vector of true labels and a vector of predicted labels | | [`rescale!`](@ref)| Rescale the rows of a real matrix to be in range [a, b] | @@ -21,6 +23,8 @@ and Euclidean machine learning classifiers. tsMap tsWeights gen2ClassData +confusionMat +predictAcc predictErr rescale! ``` diff --git a/src/PosDefManifoldML.jl b/src/PosDefManifoldML.jl index 94dc58c..b65b18b 100644 --- a/src/PosDefManifoldML.jl +++ b/src/PosDefManifoldML.jl @@ -1,5 +1,5 @@ # Unit "simulations.jl" of the PosDefManifoldML Package for Julia language -# v 0.3.8 - last update April 3 2020 +# v 0.4.0 - last update April 5 2020 # # MIT License # Copyright (c) 2019-2020, @@ -102,6 +102,8 @@ export tsMap, tsWeights, gen2ClassData, + confusionMat, + predictAcc, predictErr, rescale! diff --git a/src/cv.jl b/src/cv.jl index ccf958c..2185046 100644 --- a/src/cv.jl +++ b/src/cv.jl @@ -239,6 +239,7 @@ function cvAcc(model :: MLmodel, # for the TSmodels it is the mean of such means. # This is a quick approximation since the initialization is not critical, # but it hastens the computation time since itera. alg. require less iters. + #= if model.metric in (Fisher, logdet0) M0=means(logEuclidean, 𝐐; ⏩=⏩) if model isa TSmodel M0=mean(logEuclidean, M0; ⏩=⏩) end @@ -247,32 +248,33 @@ function cvAcc(model :: MLmodel, if model isa Tsmodel M0=generalizedMean(M0, 0.5; ⏩=⏩) end else M0=nothing; end + =# # perform cv function fold(f::Int) @static if VERSION >= v"1.3" print(defaultFont, rand(dice), " ") end # print a random dice in the REPL # get testing data for current fold - for i=1:z @inbounds 𝐐Te[f][i] = [𝐐[i][j] for j ∈ indTe[i][f]] end + for i=1:z 𝐐Te[f][i] = [𝐐[i][j] for j ∈ indTe[i][f]] end # get training labels for current fold - for i=1:z, j ∈ indTr[i][f] @inbounds push!(zTr[f], Int64(i)) end + for i=1:z, j ∈ indTr[i][f] push!(zTr[f], Int64(i)) end # get training data for current fold - for i=1:z, j ∈ indTr[i][f] @inbounds push!(𝐐Tr[f], 𝐐[i][j]) end + for i=1:z, j ∈ indTr[i][f] push!(𝐐Tr[f], 𝐐[i][j]) end # fit machine learning model β„³[f]=fit(model, 𝐐Tr[f], zTr[f]; - meanInit=M0, + #meanInit=M0, verbose=false, fitArgsβœ”...) # predict labels for current fold - @inbounds for i=1:z pl[f][i]=predict(β„³[f], 𝐐Te[f][i], :l; verbose=false) end + for i=1:z pl[f][i]=predict(β„³[f], 𝐐Te[f][i], :l; verbose=false) end # compute confusion matrix for current fold - @inbounds for i=1:z, s=1:length(pl[f][i]) CM[f][i, pl[f][i][s]]+=1. end + for i=1:z, s=1:length(pl[f][i]) CM[f][i, pl[f][i][s]]+=1. end # compute balanced accuracy or accuracy for current CV sumCM=sum(CM[f]) diff --git a/src/tools.jl b/src/tools.jl index 587923b..655c135 100644 --- a/src/tools.jl +++ b/src/tools.jl @@ -356,41 +356,134 @@ end """ ``` -function predictErr(yTrue::IntVector, yPred::IntVector; - digits::Int=3)) +function confusionMat(yTrue::IntVector, yPred::IntVector) +``` + +Return the *confusion matrix* given integer vectors of true label `yTrue` +and predicted labels `yPred`. + +The length of `yTrue` and `yPred` must be equal. Furthermore, +the `yTrue` vector must comprise all natural numbers +in between 1 and *z*, where *z* is the number of classes. + +The confusion matrix will have size *z*x*z*. It is computed +starting from a matrix filled everywhere with zeros and +adding, for each label, 1 at entry [i, j] of the matrix, where +i is the true label and j the predicted label, and finally +dividing the matrix by the sum of all its elements. +Therefore, the entries of the confusion matrix sum up to 1.0. + +**See** [`predict`](@ref), [`predictAcc`](@ref), [`predictErr`](@ref). + +**Examples** + +``` +using PosDefManifoldML +julia> confusionMat([1, 1, 1, 2, 2], [1, 1, 1, 1, 2]) +# return: [0.6 0.0; 0.2 0.2] +``` +""" +function confusionMat(yTrue::IntVector, yPred::IntVector) + + n1=length(yTrue) + n2=length(yPred) + if n1β‰ n2 + @error πŸ“Œ*", function ConfusionMat: the length of the two argument vectors must be equal." n1 n2 + return + end + + cTrue=sort(unique(yTrue)) + z = length(cTrue) + if cTrueβ‰ [i for i∈1:z] + @error πŸ“Œ*", function ConfusionMat: the `yTrue` vector must contains all natural numbers from 1 to the number of classes. It contains instead: " cTrue + return + end + + CM = zeros(Float64, z, z) + for i=1:n1 CM[yTrue[i], yPred[i]]+=1. end + return CM/=sum(CM) +end + +""" +``` +function predictAcc(yTrue::IntVector, yPred::IntVector; + scoring:: Symbol = :b, + digits::Int=3) ``` -Return the percent prediction error given a vector of true labels and a vector -of predicted labels. +Return the prediction accuracy as a proportion, that is, ∈[0, 1], +given a vector of true labels `yTrue` and a vector of +predicted labels `yPred`. -The order of arguments does not matter. +If `scoring`=:b (default) the **balanced accuracy** is computed. +Any other value will make the function returning the regular **accuracy**. +Balanced accuracy is to be preferred for unbalanced classes. +For balanced classes the balanced accuracy reduces to the +regular accuracy, therefore there is no point in using regular accuracy +if not to avoid a few unnecessary computations when the class are balanced. The error is rounded to the number of optional keyword argument `digits`, 3 by default. -**See** [`predict`](@ref) +**Maths** + +The regular *accuracy* is given by sum of the diagonal elements +of the confusion matrix. + +For the *balanced accuracy*, the diagonal elements +of the confusion matrix are divided by the respective row sums +and their mean is taken. + +**See** [`predict`](@ref), [`predictErr`](@ref), [`confusionMat`](@ref) **Examples** ``` using PosDefManifoldML -predictErr([1, 1, 2, 2], [1, 1, 1, 2]) -# return: 25.0 +julia> predictAcc([1, 1, 1, 2, 2], [1, 1, 1, 1, 2]; scoring=:a) +# regular accuracy, return: 0.8 +julia> predictAcc([1, 1, 1, 2, 2], [1, 1, 1, 1, 2]) +# balanced accuracy, return: 0.75 ``` """ -function predictErr(yTrue::IntVector, yPred::IntVector; - digits::Int=3) +function predictAcc(yTrue::IntVector, yPred::IntVector; + scoring:: Symbol = :b, + digits::Int=3) + n1=length(yTrue) n2=length(yPred) if n1β‰ n2 @error πŸ“Œ*", function predictErr: the length of the two argument vectors must be equal." n1 n2 return - else - round(sum(y1β‰ y2 for (y1, y2) ∈ zip(yTrue, yPred))/n1*100; digits=digits) end + + if scoringβ‰ :b # regular accuracy + return round(sum(y1==y2 for (y1, y2) ∈ zip(yTrue, yPred))/n1; digits=digits) + else # balanced accuracy + CM=confusionMat(yTrue, yPred) + z=size(CM, 1) + return round(sum(CM[i, i]/sum(CM[i, :]) for i=1:z) / z; digits=digits) + end + end +""" +``` +function predictErr(yTrue::IntVector, yPred::IntVector; + scoring:: Symbol = :b, + digits::Int=3) +``` + +Return the complement of the predicted accuracy, that is, 1.0 minus +the result of [`predictAcc`](@ref). +**See** [`predictAcc`](@ref). +""" +predictErr(yTrue::IntVector, yPred::IntVector; + scoring::Symbol = :b, + digits::Int=3) = + round(1.0 - predictAcc(yTrue, yPred; scoring=scoring, digits=8); + digits=digits) """ ```