Merge pull request #72 from Marco-Congedo/dev

implemented functions `confusionMat` and `predictAcc`. Changed the ou…
Marco-Congedo · Apr 5, 2020 · b5f3c7b · b5f3c7b
2 parents 7656883 + e8f75a8
commit b5f3c7b
Show file tree

Hide file tree

Showing 6 changed files with 122 additions and 21 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "PosDefManifoldML"
 uuid = "a07f4532-e2c9-11e9-2ea2-6d98fe4a1f21"
 authors = ["Marco-Congedo <[email protected]>"]
-version = "0.3.8"
+version = "0.4.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,5 +1,5 @@
 authors = ["Marco Congedo, Saloni Jain, Anton Andreev"]
-version = "0.3.8"
+version = "0.4.0"
 
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"

diff --git a/docs/src/tools.md b/docs/src/tools.md
@@ -11,6 +11,8 @@ and Euclidean machine learning classifiers.
 | [`tsMap`](@ref)        | project data on a tangent space to apply Euclidean ML models therein |
 | [`tsWeights`](@ref)| generator of weights for tagent space mapping |
 | [`gen2ClassData`](@ref)| generate 2-class positive definite matrix data for testing Riemannian ML models |
+| [`confusionMat`](@ref)| Confusion matrix given a vector of true labels and a vector of predicted labels |
+| [`predictAcc`](@ref)| predictiion accuracy given a vector of true labels and a vector of predicted labels |
 | [`predictErr`](@ref)| prediction error given a vector of true labels and a vector of predicted labels |
 | [`rescale!`](@ref)| Rescale the rows of a real matrix to be in range [a, b] |
 
@@ -21,6 +23,8 @@ and Euclidean machine learning classifiers.
 tsMap
 tsWeights
 gen2ClassData
+confusionMat
+predictAcc
 predictErr
 rescale!
 ```
diff --git a/src/PosDefManifoldML.jl b/src/PosDefManifoldML.jl
@@ -1,5 +1,5 @@
 #   Unit "simulations.jl" of the PosDefManifoldML Package for Julia language
-#   v 0.3.8 - last update April 3 2020
+#   v 0.4.0 - last update April 5 2020
 #
 #   MIT License
 #   Copyright (c) 2019-2020,
@@ -102,6 +102,8 @@ export
     tsMap,
     tsWeights,
     gen2ClassData,
+	confusionMat,
+	predictAcc,
     predictErr,
 	rescale!
 

diff --git a/src/cv.jl b/src/cv.jl
@@ -239,6 +239,7 @@ function cvAcc(model    :: MLmodel,
     # for the TSmodels it is the mean of such means.
     # This is a quick approximation since the initialization is not critical,
     # but it hastens the computation time since itera. alg. require less iters.
+    #=
     if      model.metric in (Fisher, logdet0)
                 M0=means(logEuclidean, 𝐐; ⏩=⏩)
                 if model isa TSmodel M0=mean(logEuclidean, M0; ⏩=⏩) end
@@ -247,32 +248,33 @@ function cvAcc(model    :: MLmodel,
                 if model isa Tsmodel M0=generalizedMean(M0, 0.5; ⏩=⏩) end
     else    M0=nothing;
     end
+    =#
 
     # perform cv
     function fold(f::Int)
         @static if VERSION >= v"1.3" print(defaultFont, rand(dice), " ") end # print a random dice in the REPL
 
         # get testing data for current fold
-        for i=1:z @inbounds 𝐐Te[f][i] = [𝐐[i][j] for j ∈ indTe[i][f]] end
+        for i=1:z 𝐐Te[f][i] = [𝐐[i][j] for j ∈ indTe[i][f]] end
 
         # get training labels for current fold
-        for i=1:z, j ∈ indTr[i][f] @inbounds push!(zTr[f], Int64(i)) end
+        for i=1:z, j ∈ indTr[i][f] push!(zTr[f], Int64(i)) end
 
         # get training data for current fold
-        for i=1:z, j ∈ indTr[i][f] @inbounds push!(𝐐Tr[f], 𝐐[i][j]) end
+        for i=1:z, j ∈ indTr[i][f] push!(𝐐Tr[f], 𝐐[i][j]) end
 
         # fit machine learning model
         ℳ[f]=fit(model, 𝐐Tr[f], zTr[f];
-                  meanInit=M0,
+                  #meanInit=M0,
                   verbose=false,
                   fitArgs✔...)
 
 
         # predict labels for current fold
-        @inbounds for i=1:z pl[f][i]=predict(ℳ[f], 𝐐Te[f][i], :l; verbose=false) end
+        for i=1:z pl[f][i]=predict(ℳ[f], 𝐐Te[f][i], :l; verbose=false) end
 
         # compute confusion matrix for current fold
-        @inbounds for i=1:z, s=1:length(pl[f][i]) CM[f][i, pl[f][i][s]]+=1. end
+        for i=1:z, s=1:length(pl[f][i]) CM[f][i, pl[f][i][s]]+=1. end
 
         # compute balanced accuracy or accuracy for current CV
         sumCM=sum(CM[f])

diff --git a/src/tools.jl b/src/tools.jl
@@ -356,41 +356,134 @@ end
 
 """
 ```
-function predictErr(yTrue::IntVector, yPred::IntVector;
-	          		digits::Int=3))
+function confusionMat(yTrue::IntVector, yPred::IntVector)
+```
+
+Return the *confusion matrix* given integer vectors of true label `yTrue`
+and predicted labels `yPred`.
+
+The length of `yTrue` and `yPred` must be equal. Furthermore,
+the `yTrue` vector must comprise all natural numbers
+in between 1 and *z*, where *z* is the number of classes.
+
+The confusion matrix will have size *z*x*z*. It is computed
+starting from a matrix filled everywhere with zeros and
+adding, for each label, 1 at entry [i, j] of the matrix, where
+i is the true label and j the predicted label, and finally
+dividing the matrix by the sum of all its elements.
+Therefore, the entries of the confusion matrix sum up to 1.0.
+
+**See** [`predict`](@ref), [`predictAcc`](@ref), [`predictErr`](@ref).
+
+**Examples**
+
+```
+using PosDefManifoldML
+julia> confusionMat([1, 1, 1, 2, 2], [1, 1, 1, 1, 2])
+# return: [0.6 0.0; 0.2 0.2]
+```
+"""
+function confusionMat(yTrue::IntVector, yPred::IntVector)
+
+	n1=length(yTrue)
+	n2=length(yPred)
+	if n1≠n2
+		@error 📌*", function ConfusionMat: the length of the two argument vectors must be equal." n1 n2
+		return
+	end
+
+	cTrue=sort(unique(yTrue))
+	z = length(cTrue)
+	if cTrue≠[i for i∈1:z]
+		@error 📌*", function ConfusionMat: the `yTrue` vector must contains all natural numbers from 1 to the number of classes. It contains instead: " cTrue
+		return
+	end
+
+	CM = zeros(Float64, z, z)
+	for i=1:n1 CM[yTrue[i], yPred[i]]+=1. end
+	return CM/=sum(CM)
+end
+
+"""
+```
+function predictAcc(yTrue::IntVector, yPred::IntVector;
+					scoring:: Symbol = :b,
+					digits::Int=3)
 ```
 
-Return the percent prediction error given a vector of true labels and a vector
-of predicted labels.
+Return the prediction accuracy as a proportion, that is, ∈[0, 1],
+given a vector of true labels `yTrue` and a vector of
+predicted labels `yPred`.
 
-The order of arguments does not matter.
+If `scoring`=:b (default) the **balanced accuracy** is computed.
+Any other value will make the function returning the regular **accuracy**.
+Balanced accuracy is to be preferred for unbalanced classes.
+For balanced classes the balanced accuracy reduces to the
+regular accuracy, therefore there is no point in using regular accuracy
+if not to avoid a few unnecessary computations when the class are balanced.
 
 The error is rounded to the number of optional keyword argument
 `digits`, 3 by default.
 
-**See** [`predict`](@ref)
+**Maths**
+
+The regular *accuracy* is given by sum of the diagonal elements
+of the confusion matrix.
+
+For the *balanced accuracy*, the diagonal elements
+of the confusion matrix are divided by the respective row sums
+and their mean is taken.
+
+**See** [`predict`](@ref), [`predictErr`](@ref), [`confusionMat`](@ref)
 
 **Examples**
 
 ```
 using PosDefManifoldML
-predictErr([1, 1, 2, 2], [1, 1, 1, 2])
-# return: 25.0
+julia> predictAcc([1, 1, 1, 2, 2], [1, 1, 1, 1, 2]; scoring=:a)
+# regular accuracy, return: 0.8
+julia> predictAcc([1, 1, 1, 2, 2], [1, 1, 1, 1, 2])
+# balanced accuracy, return: 0.75
 ```
 """
-function predictErr(yTrue::IntVector, yPred::IntVector;
-	          digits::Int=3)
+function predictAcc(yTrue::IntVector, yPred::IntVector;
+					scoring:: Symbol = :b,
+	          		digits::Int=3)
+
 	n1=length(yTrue)
 	n2=length(yPred)
 	if n1≠n2
 		@error 📌*", function predictErr: the length of the two argument vectors must be equal." n1 n2
 		return
-	else
-		round(sum(y1≠y2 for (y1, y2) ∈ zip(yTrue, yPred))/n1*100; digits=digits)
 	end
+
+	if scoring≠:b # regular accuracy
+		return round(sum(y1==y2 for (y1, y2) ∈ zip(yTrue, yPred))/n1; digits=digits)
+	else # balanced accuracy
+		CM=confusionMat(yTrue, yPred)
+		z=size(CM, 1)
+		return round(sum(CM[i, i]/sum(CM[i, :]) for i=1:z) / z; digits=digits)
+	end
+
 end
 
+"""
+```
+function predictErr(yTrue::IntVector, yPred::IntVector;
+					scoring:: Symbol = :b,
+					digits::Int=3)
+```
+
+Return the complement of the predicted accuracy, that is, 1.0 minus
+the result of [`predictAcc`](@ref).
 
+**See** [`predictAcc`](@ref).
+"""
+predictErr(yTrue::IntVector, yPred::IntVector;
+			scoring::Symbol = :b,
+	        digits::Int=3) =
+	round(1.0 - predictAcc(yTrue, yPred; scoring=scoring, digits=8);
+		  digits=digits)
 
 """
 ```