Skip to content

Commit

Permalink
Experiment with different score normalisations
Browse files Browse the repository at this point in the history
Issue #252 - z-score normalisation works best even though the scores are not normally distributed.
  • Loading branch information
towsey committed Oct 5, 2019
1 parent 7f1c504 commit d471feb
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/AnalysisPrograms/Sandpit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public static void ContentDescriptionApplyTemplates()
var path = Path.Combine(@"C:\Ecoacoustics\Output\Test\Test24HourRecording", "Testing__2Maps.png");
var ldfcSpectrogram = Image.FromFile(path);
var image = ContentVisualization.DrawLdfcSpectrogramWithContentScoreTracks(ldfcSpectrogram, contentPlots);
var path2 = Path.Combine(@"C:\Ecoacoustics\ContentDescription", "Testing_2Maps.CONTENTnew05.png");
var path2 = Path.Combine(@"C:\Ecoacoustics\ContentDescription", "Testing_2Maps.CONTENTnew06.png");
image.Save(path2);
Console.WriteLine("# Finished scanning recording with content description templates");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ public static List<Plot> ContentDescriptionOfMultipleRecordingFiles(FileInfo lis

var plotDict = DataProcessing.ConvertResultsToPlots(completeListOfResults, 1440, 0);
var contentPlots = DataProcessing.ConvertPlotDictionaryToPlotList(plotDict);
//contentPlots = DataProcessing.SubtractMeanPlusSd(contentPlots);
contentPlots = DataProcessing.SubtractMeanPlusSd(contentPlots);

//the following did not work as well.
contentPlots = DataProcessing.SubtractModeAndSd(contentPlots);
//contentPlots = DataProcessing.SubtractModeAndSd(contentPlots);
//contentPlots = DataProcessing.PercentileThresholding(contentPlots, 80);
return contentPlots;
}

Expand Down
41 changes: 40 additions & 1 deletion src/AudioAnalysisTools/ContentDescriptionTools/DataProcessing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ public static Dictionary<string, double[]> ApplyBandPass(Dictionary<string, doub
public static double[] ScanSpectrumWithTemplate(Dictionary<string, double[]> templateDict, Dictionary<string, double[]> oneMinuteIndices)
{
// convert the template dictionary to an array of averaged values
var dictionaryOfIndexAverages = DataProcessing.AverageIndicesInDictionary(templateDict);
var dictionaryOfIndexAverages = AverageIndicesInDictionary(templateDict);
var templateVector = ConvertDictionaryToVector(dictionaryOfIndexAverages);

// the score spectrum to be returned
Expand Down Expand Up @@ -455,6 +455,45 @@ public static List<Plot> SubtractModeAndSd(List<Plot> plots)
return opPlots;
}

public static List<Plot> PercentileThresholding(List<Plot> plots, int percentile)
{
var opPlots = new List<Plot>();

// subtract average from each plot array
foreach (Plot plot in plots)
{
var scores = plot.data;
var threshold = Statistics.GetPercentileValue(scores, percentile);
//NormalDist.AverageAndSD(scores, out double average, out double sd);

// normalize the scores to z-scores
for (int i = 0; i < scores.Length; i++)
{
// Normalize scores relative to threshold
scores[i] = (scores[i] - threshold) / (1 - threshold);
if (scores[i] < 0.0)
{
scores[i] = 0.0;
}

if (scores[i] > 4.0)
{
scores[i] = 4.0;
}

//normalize full scale to 4 SDs.
//scores[i] /= 4.0;
}

// when normalizing the scores this way the range of the plot will be 0 to 4 SD above the mean.
// Consequently we set the plot threshold to 0.5, which is two SDs or a p value = 5%.
plot.threshold = 0.5;
opPlots.Add(plot);
}

return opPlots;
}

public static List<Plot> ConvertPlotDictionaryToPlotList(Dictionary<string, Plot> dict)
{
var list = new List<Plot>();
Expand Down
11 changes: 10 additions & 1 deletion src/TowseyLibrary/Statistics.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// <copyright file="Statistics.cs" company="QutEcoacoustics">
// <copyright file="Statistics.cs" company="QutEcoacoustics">
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
// </copyright>

Expand All @@ -21,6 +21,15 @@ public static double GetMedian(double[] v)
return median;
}

public static double GetPercentileValue(double[] v, int percentile)
{
Tuple<int[], double[]> tuple = DataTools.SortArray(v);
var fraction = percentile / 100.0;
var percentileBin = (int)Math.Round(v.Length * fraction);
double percentileValue = tuple.Item2[percentileBin];
return percentileValue;
}

/// <summary>
/// Analyses an array of events or hits, represented by a binary of matrix.
/// Assumes a Poisson distribution
Expand Down

0 comments on commit d471feb

Please sign in to comment.