Skip to content

Commit

Permalink
Changed way mode of distribution calculated
Browse files Browse the repository at this point in the history
One reason for suboptimal rendering of LFFC spectrograms is that in some cases the mode of the distribution of index values is also the minimum, expecially in case of Entropy where mode often = zero. When tihs occur, now prevent mode being in the bottom two histogram bins.
Most of the other changes are resharper inspired or designed to simplify code.
  • Loading branch information
towsey committed Mar 21, 2019
1 parent 57de07e commit 252c58f
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 144 deletions.
71 changes: 31 additions & 40 deletions src/AudioAnalysisTools/Indices/IndexDistributions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ public static Dictionary<string, SpectralStats> WriteSpectralIndexDistributionSt
{
if (spectrogramMatrices.ContainsKey(key))
{
var matrix = spectrogramMatrices[key];
SpectralStats stats = GetModeAndOneTailedStandardDeviation(matrix, width, UpperPercentileDefault);
double[] array = DataTools.Matrix2Array(spectrogramMatrices[key]);
SpectralStats stats = GetModeAndOneTailedStandardDeviation(array, width, UpperPercentileDefault);
indexDistributionStatistics.Add(key, stats); // add index statistics
double value = stats.GetValueOfNthPercentile(UpperPercentileDefault);

Expand Down Expand Up @@ -143,29 +143,30 @@ public static Dictionary<string, SpectralStats> WriteSpectralIndexDistributionSt
return indexDistributionStatistics;
}

public static Image DrawImageOfDistribution(double[,] matrix, int width, int height, string label)
{
SpectralStats stats = GetModeAndOneTailedStandardDeviation(matrix, width, UpperPercentileDefault);
double value = stats.GetValueOfNthPercentile(UpperPercentileDefault);

var image =
GraphsAndCharts.DrawHistogram(
label,
stats.Distribution,
stats.UpperPercentileBin,
new Dictionary<string, double>()
{
{ "min", stats.Minimum },
{ "max", stats.Maximum },
{ "mode", stats.Mode },
{ "sd", stats.StandardDeviation },
{ UpperPercentileLabel, value },
{ "count", stats.Count },
},
width,
height);
return image;
}
//public static Image DrawImageOfDistribution(double[,] matrix, int width, int height, string label)
//{
// double[] array = DataTools.Matrix2Array(matrix);
// SpectralStats stats = GetModeAndOneTailedStandardDeviation(array, width, UpperPercentileDefault);
// double value = stats.GetValueOfNthPercentile(UpperPercentileDefault);

// var image =
// GraphsAndCharts.DrawHistogram(
// label,
// stats.Distribution,
// stats.UpperPercentileBin,
// new Dictionary<string, double>()
// {
// { "min", stats.Minimum },
// { "max", stats.Maximum },
// { "mode", stats.Mode },
// { "sd", stats.StandardDeviation },
// { UpperPercentileLabel, value },
// { "count", stats.Count },
// },
// width,
// height);
// return image;
//}

public static Dictionary<string, SpectralStats> WriteSummaryIndexDistributionStatistics(Dictionary<string, double[]> summaryIndices, DirectoryInfo outputDirectory, string fileStem)
{
Expand Down Expand Up @@ -217,37 +218,27 @@ public static Dictionary<string, SpectralStats> WriteSummaryIndexDistributionSta

public static SpectralStats GetModeAndOneTailedStandardDeviation(double[,] matrix)
{
int binCount = 100;
int upperPercentile = 0;
double[] values = DataTools.Matrix2Array(matrix);
const bool displayHistogram = false;
DataTools.GetModeAndOneTailedStandardDeviation(values, displayHistogram, out var min, out var max, out var modalBin, out var mode, out var sd);
int[] histogram = Histogram.Histo(matrix, binCount);
DataTools.GetModeAndOneTailedStandardDeviation(values, out var histogram, out var min, out var max, out var modalBin, out var mode, out var sd);

// writeBarGraph(histogram); // debug purposes
return new SpectralStats()
{
Minimum = min,
Maximum = max,
Mode = mode,
ModalBin = modalBin,
StandardDeviation = sd,
UpperPercentile = upperPercentile,
UpperPercentile = 0,
Distribution = histogram,
};
}

public static SpectralStats GetModeAndOneTailedStandardDeviation(double[,] matrix, int binCount, int upperPercentile)
{
double[] values = DataTools.Matrix2Array(matrix);
return GetModeAndOneTailedStandardDeviation(values, binCount, upperPercentile);
}

public static SpectralStats GetModeAndOneTailedStandardDeviation(double[] values, int binCount, int upperPercentile)
{
const bool displayHistogram = false;
DataTools.GetModeAndOneTailedStandardDeviation(values, displayHistogram, out var min, out var max, out var modalBin, out var mode, out var sd);
int[] histogram = Histogram.Histo(values, binCount);
DataTools.GetModeAndOneTailedStandardDeviation(values, out var histogram, out var min, out var max, out var modalBin, out var mode, out var sd);

// writeBarGraph(histogram); // debug purposes
return new SpectralStats()
{
Minimum = min,
Expand Down
4 changes: 2 additions & 2 deletions src/AudioAnalysisTools/SpectralTrack.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// <copyright file="SpectralTrack.cs" company="QutEcoacoustics">
// <copyright file="SpectralTrack.cs" company="QutEcoacoustics">
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
// </copyright>

Expand Down Expand Up @@ -472,7 +472,7 @@ public static void WriteHistogramOftrackLengths(List<SpectralTrack> tracks)
lengths[i] = tracks[i].Length;
}

Histogram.writeConciseHistogram(lengths);
Histogram.WriteConciseHistogram(lengths);
int[] histo = Histogram.Histo_FixedWidth(lengths, 1, 0, 20);
DataTools.writeBarGraph(histo);
}
Expand Down
82 changes: 40 additions & 42 deletions src/TowseyLibrary/DataTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4523,53 +4523,54 @@ public static int[] Data2Bins(int[] data, int min, int binWidth, int numBins)
return binCounts;
}

// =============================================================================
// =============================================================================

/// <summary>
/// Returns the min, max, mode and one-sided standard deviation of an array of double values.
/// This method accomodates the possibility that the distribution of values is a truncated Gaussian or a skewed Gaussian.
/// Once the modal position has been determined, it is assumed that the Sd is to be determined from the long-tailed side.
/// i.e. the modal position is assumed to be the average of the underlying distribution.
/// This method is used to calculate the mean and SD of acoustic indices whose distributions are very skewed, e.g. temporal entropy and cover.
/// </summary>
/// <param name="values">an array of values.</param>
/// <param name="min">min value.</param>
/// <param name="max">max value in the array.</param>
/// <param name="modalBin">bin having modal value.</param>
/// <param name="mode">the value of the mode.</param>
/// <param name="SD">standard deviation of the distribution.</param>
public static void GetModeAndOneTailedStandardDeviation(double[] values, bool writeHistogram, out double min, out double max, out int modalBin, out double mode, out double SD)
{
int binCount = 300;
double binWidth;
int[] histo = Histogram.Histo(values, binCount, out binWidth, out min, out max);
if (writeHistogram)
{
writeBarGraph(histo);
}
/// <summary>
/// This method is used to calculate the mean and SD of acoustic indices whose distributions are very skewed, e.g. temporal entropy and cover.
/// It returns the min, max, mode and one-sided standard deviation of an array of doubles.
/// NOTE: The mode is prevented from being in lowest two bins because we typically do not want the mode to be near the minimum value of the distribution.
/// This method accomodates the possibility that the distribution of index values is a truncated Gaussian or a skewed Gaussian.
/// Once the modal position has been determined, it is assumed that the Sd is to be determined from the long-tailed side.
/// i.e. the modal position is assumed to be the average of the underlying distribution.
/// </summary>
/// <param name="values">an array of values.</param>
/// <param name="histogram">histogram dervied from passed array.</param>
/// <param name="min">min value.</param>
/// <param name="max">max value in the array.</param>
/// <param name="modalBin">bin having modal value.</param>
/// <param name="mode">the value of the mode.</param>
/// <param name="SD">standard deviation of the distribution.</param>
public static void GetModeAndOneTailedStandardDeviation(double[] values, out int[] histogram, out double min, out double max, out int modalBin, out double mode, out double SD)
{
histogram = Histogram.Histo(values, binCount: 300, binWidth: out double binWidth, min: out min, max: out max);

// This next step is a hack for spectral acoustic indices.
// Set lowest two histogram bins to zero.
// We do not want a modal value in these bins when calculated bounds for LDFC spectrograms.
histogram[0] = 0;
histogram[1] = 0;

// Calculate the SD on longest tail. Assume that the tail is Gaussian.
int indexOfMode, indexOfOneSD;
GetModeAndOneTailedStandardDeviation(histo, out indexOfMode, out indexOfOneSD);
mode = min + (indexOfMode * binWidth);
modalBin = indexOfMode;
int delta = Math.Abs(indexOfOneSD - indexOfMode);
if (delta < 1)
GetModeAndOneTailedStandardDeviation(histogram, out int indexOfMode, out int indexOfOneSd);
mode = min + (indexOfMode * binWidth);
modalBin = indexOfMode;
int delta = Math.Abs(indexOfOneSd - indexOfMode);
if (delta < 1)
{
delta = 1;
}

SD = delta * binWidth;
SD = delta * binWidth;

// the below av and sd are just a check on the one-tailed calcualtion.
// double avDist, sdDist;
// NormalDist.AverageAndSD(values, out avDist, out sdDist);
// double[] avAndsd = new double[2];
// avAndsd[0] = avDist;
// avAndsd[1] = sdDist;
// Console.Write("Standard av & sd for data.");
// Console.WriteLine(NormalDist.formatAvAndSD(avAndsd, 3));
}
// the below av and sd are just a check on the one-tailed calcualtion.
// double avDist, sdDist;
// NormalDist.AverageAndSD(values, out avDist, out sdDist);
// double[] avAndsd = new double[2];
// avAndsd[0] = avDist;
// avAndsd[1] = sdDist;
// Console.Write("Standard av & sd for data.");
// Console.WriteLine(NormalDist.formatAvAndSD(avAndsd, 3));
}

/// <summary>
/// Assuming the passed histogram represents a distribution of values (derived from acoustic indices). which a signal is added to Gaussian noise,
Expand All @@ -4578,9 +4579,6 @@ public static void GetModeAndOneTailedStandardDeviation(double[] values, bool wr
/// i.e. the modal position is assumed to be the average of the underlying distribution.
/// This method is used to calculate the mean and SD of acoustic indices whose distrubtions are very skewed, e.g. temporal entropy and cover.
/// </summary>
/// <param name="histo"></param>
/// <param name="indexOfMode"></param>
/// <param name="indexOfOneSD"></param>
public static void GetModeAndOneTailedStandardDeviation(int[] histo, out int indexOfMode, out int indexOfOneSD)
{
// the below smoothing was added on 15th April 2015. It may or may not be helpful.
Expand Down
Loading

0 comments on commit 252c58f

Please sign in to comment.