From 08cb0fe6583df74afea96a0e168c25eb2f5dcc33 Mon Sep 17 00:00:00 2001 From: Michael Towsey Date: Sun, 9 Feb 2020 09:38:00 +1000 Subject: [PATCH] Work on the harmonic recognizer Issue #281 woring on the harmonic recognizer - in particular implement a new method to detect harmonics using AuotoCorrelation followed by DCT. this still needs more work as it is not yet detecting Curlew harmonics - my teset case. --- .../Recognizers/Base/HarmonicParameters.cs | 86 +++++-------------- .../Recognizers/GenericRecognizer.cs | 1 + src/AudioAnalysisTools/CrossCorrelation.cs | 52 +++++++++++ src/AudioAnalysisTools/Oscillations2012.cs | 65 ++++++-------- src/TowseyLibrary/AutoAndCrossCorrelation.cs | 21 +++-- 5 files changed, 115 insertions(+), 110 deletions(-) diff --git a/src/AnalysisPrograms/Recognizers/Base/HarmonicParameters.cs b/src/AnalysisPrograms/Recognizers/Base/HarmonicParameters.cs index 1482321c2..ee813347a 100644 --- a/src/AnalysisPrograms/Recognizers/Base/HarmonicParameters.cs +++ b/src/AnalysisPrograms/Recognizers/Base/HarmonicParameters.cs @@ -41,6 +41,11 @@ public class HarmonicParameters : CommonParameters //MinFormantGap: 800 //MaxFormantGap: 2200 + /// + /// Gets or sets the dctThreshold. + /// + public double? DctThreshold { get; set; } + /// /// Gets or sets the bottom bound of the rectangle. Units are Hertz. /// @@ -61,14 +66,13 @@ public static (List, double[]) GetComponentsWithHarmonics( int maxHz, int nyquist, double decibelThreshold, + double dctThreshold, double minDuration, double maxDuration, int minFormantGap, int maxFormantGap, TimeSpan segmentStartOffset) { - // parameters to be passed - double harmonicIntensityThreshold = 0.15; // Event threshold - Determines FP / FN trade-off for events. //double eventThreshold = 0.2; @@ -80,90 +84,46 @@ public static (List, double[]) GetComponentsWithHarmonics( double freqBinWidth = nyquist / (double)binCount; int minBin = (int)Math.Round(minHz / freqBinWidth); int maxBin = (int)Math.Round(maxHz / freqBinWidth); - - // set up score arrays - var harmonicScores = new double[frameCount]; - var formantGaps = new double[frameCount]; - - // now look for harmonics in search band using the Xcorrelation-FFT technique. int bandWidthBins = maxBin - minBin + 1; - //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. - //assuming sr=22050 and window= 512, then freq bin width = 43.0664 and 64 fft bins span 2756 Hz above the min Hz level. - //assuming sr=22050 and window= 512, then freq bin width = 43.0664 and 128 fft bins span 5513 Hz above the min Hz level. - //assuming sr=22050 and window=1024, then freq bin width = 21.5332 and 64 fft bins span 1378 Hz above the min Hz level. - //assuming sr=22050 and window=1024, then freq bin width = 21.5332 and 128 fft bins span 2756 Hz above the min Hz level. - //assuming sr=22050 and window=1024, then freq bin width = 21.5332 and 256 fft bins span 5513 Hz above the min Hz level. - - int numberOfFftBins = 2048; - while (numberOfFftBins > bandWidthBins && (minBin + numberOfFftBins) > binCount) - { - numberOfFftBins /= 2; - } - - //numberOfFftBins = Math.Min(256, numberOfFftBins); - maxBin = minBin + numberOfFftBins - 1; - int maxFftHertz = (int)Math.Ceiling(maxBin * sonogram.FBinWidth); - + // extract the sub-band double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, frameCount - 1, maxBin); - int minCallSpan = (int)Math.Round(minDuration * sonogram.FramesPerSecond); - //ii: DETECT HARMONICS - var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, minCallSpan); + // now look for harmonics in search band using the Xcorrelation-DCT method. + var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold); + // set up score arrays double[] dBArray = results.Item1; - double[] intensity = results.Item2; //an array of periodicity scores - double[] periodicity = results.Item3; + double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity + int[] maxIndexArray = results.Item3; - //intensity = DataTools.filterMovingAverage(intensity, 3); - int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise - double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < frameCount; r++) { - if (intensity[r] < harmonicIntensityThreshold) + if (harmonicIntensityScores[r] < dctThreshold) { continue; } //ignore locations with incorrect formant gap - double herzPeriod = periodicity[r] * freqBinWidth; - if (herzPeriod < minFormantGap || herzPeriod > maxFormantGap) - { - continue; - } - - //find freq having max power and use info to adjust score. - //expect humans to have max < 1000 Hz - double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); - for (int j = 0; j < noiseBound; j++) - { - spectrum[j] = 0.0; - } - - int maxIndex = DataTools.GetMaxIndex(spectrum); - int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); - double discount = 1.0; - if (freqWithMaxPower < 1200) - { - discount = 0.0; - } - - if (intensity[r] > harmonicIntensityThreshold) + int maxId = maxIndexArray[r]; + double freqBinGap = 2 * bandWidthBins / (double)maxId; + double formantGap = freqBinGap * freqBinWidth; + if (formantGap < minFormantGap || formantGap > maxFormantGap) { - scoreArray[r] = intensity[r] * discount; + harmonicIntensityScores[r] = 0.0; } } - // smooth the decibel array to allow for brief gaps. - harmonicScores = DataTools.filterMovingAverageOdd(harmonicScores, 5); + // smooth the harmonicIntensityScores array to allow for brief gaps. + harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 5); //extract the events based on length and threshhold. // Note: This method does NOT do prior smoothing of the score array. var acousticEvents = AcousticEvent.ConvertScoreArray2Events( - harmonicScores, + harmonicIntensityScores, minHz, - maxFftHertz, + maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, decibelThreshold, @@ -171,7 +131,7 @@ public static (List, double[]) GetComponentsWithHarmonics( maxDuration, segmentStartOffset); - return (acousticEvents, harmonicScores); + return (acousticEvents, harmonicIntensityScores); } } } diff --git a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs index dc693887a..4ffb8e5a0 100644 --- a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs +++ b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs @@ -206,6 +206,7 @@ public override RecognizerResults Recognize( hp.MaxHertz.Value, sonogram.NyquistFrequency, hp.DecibelThreshold.Value, + hp.DctThreshold.Value, hp.MinDuration.Value, hp.MaxDuration.Value, hp.MinFormantGap.Value, diff --git a/src/AudioAnalysisTools/CrossCorrelation.cs b/src/AudioAnalysisTools/CrossCorrelation.cs index 4513a2de4..6fd64caad 100644 --- a/src/AudioAnalysisTools/CrossCorrelation.cs +++ b/src/AudioAnalysisTools/CrossCorrelation.cs @@ -29,6 +29,7 @@ public class CrossCorrelation //public const string key_COUNT = "count"; + /* public static Tuple DetectBarsUsingXcorrelation(double[,] m, int rowStep, int rowWidth, int colStep, int colWidth, double intensityThreshold, int zeroBinCount) { @@ -103,6 +104,7 @@ public static Tuple DetectBarsUsingXc return Tuple.Create(intensityMatrix, periodicityMatrix, hitsMatrix, array2return); } + */ /// /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. @@ -204,5 +206,55 @@ public static Tuple DetectHarmonicsInSonogramMatri return Tuple.Create(dBArray, intensity, periodicity); } + + /// + /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. + /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. + /// Developed for GenericRecognizer of harmonics. + /// + /// data matrix. + /// Minimum sound level. + /// two arrays. + public static Tuple DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) + { + int rowCount = m.GetLength(0); + int colCount = m.GetLength(1); + double[] dBArray = new double[rowCount]; + var intensity = new double[rowCount]; //an array of formant intensity + var maxIndexArray = new int[rowCount]; //an array of max value index values + var binCount = m.GetLength(1); + double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients + + // for all time frames + for (int t = 0; t < rowCount; t++) + { + var frame = MatrixTools.GetRow(m, t); + double maxValue = frame.Max(); + dBArray[t] = maxValue; + if (maxValue < dBThreshold) + { + continue; + } + + double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); + + // xr has twice length of frame and is symmetrical. + // Require only first half. Also need to normalise the values for overlap count. + double[] normXr = new double[colCount]; + for (int i = 0; i < colCount; i++) + { + normXr[i] = xr[i] / (colCount - i); + } + + // now do DCT across the auto cross xr + int lowerDctBound = 2; + var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); + int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); + intensity[t] = dctCoefficients[indexOfMaxValue]; + maxIndexArray[t] = indexOfMaxValue; + } // frames = rows of matrix + + return Tuple.Create(dBArray, intensity, maxIndexArray); + } } } diff --git a/src/AudioAnalysisTools/Oscillations2012.cs b/src/AudioAnalysisTools/Oscillations2012.cs index e4e6173ff..a459fdcff 100644 --- a/src/AudioAnalysisTools/Oscillations2012.cs +++ b/src/AudioAnalysisTools/Oscillations2012.cs @@ -158,48 +158,10 @@ public static void Execute( dctArray[i] = matrix[r + i, c]; } - //dctArray = DataTools.Vector2Zscores(dctArray); - - dctArray = DataTools.SubtractMean(dctArray); - double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines); - - // convert to absolute values because not interested in negative values due to phase. - for (int i = 0; i < dctLength; i++) - { - dctCoeff[i] = Math.Abs(dctCoeff[i]); - } - - // remove low freq oscillations from consideration - int thresholdIndex = minIndex / 4; - for (int i = 0; i < thresholdIndex; i++) - { - dctCoeff[i] = 0.0; - } - - dctCoeff = DataTools.normalise2UnitLength(dctCoeff); - - //dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues + int lowerDctBound = minIndex / 4; + var dctCoeff = DoDct(dctArray, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); - //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi - - // #### Tried this option for scoring oscillation hits but did not work well. - // #### Requires very fine tuning of thresholds - //dctCoeff = DataTools.Normalise2Probabilities(dctCoeff); - //// sum area under curve where looking for oscillations - //double sum = 0.0; - //for (int i = minIndex; i <= maxIndex; i++) - // sum += dctCoeff[i]; - //if (sum > dctThreshold) - //{ - // for (int i = 0; i < dctLength; i++) hits[r + i, c] = midOscilFreq; - //} - - // DEBUGGING - // DataTools.MinMax(dctCoeff, out min, out max); - //DataTools.writeBarGraph(dctArray); - //DataTools.writeBarGraph(dctCoeff); - //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold) { @@ -218,6 +180,29 @@ public static void Execute( return hits; } + public static double[] DoDct(double[] vector, double[,] cosines, int lowerDctBound) + { + //var dctArray = DataTools.Vector2Zscores(dctArray); + var dctArray = DataTools.SubtractMean(vector); + int dctLength = dctArray.Length; + double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines); + + // convert to absolute values because not interested in negative values due to phase. + for (int i = 0; i < dctLength; i++) + { + dctCoeff[i] = Math.Abs(dctCoeff[i]); + } + + // remove lower coefficients from consideration because they dominate + for (int i = 0; i < lowerDctBound; i++) + { + dctCoeff[i] = 0.0; + } + + dctCoeff = DataTools.normalise2UnitLength(dctCoeff); + return dctCoeff; + } + /// /// Removes single lines of hits from Oscillation matrix. /// diff --git a/src/TowseyLibrary/AutoAndCrossCorrelation.cs b/src/TowseyLibrary/AutoAndCrossCorrelation.cs index 6acfa8a42..a6fe2472e 100644 --- a/src/TowseyLibrary/AutoAndCrossCorrelation.cs +++ b/src/TowseyLibrary/AutoAndCrossCorrelation.cs @@ -1,4 +1,4 @@ -// +// // All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). // @@ -130,12 +130,19 @@ public static void corrr1d( *************************************************************************/ - /// - /// returns the fft spectrum of a cross-correlation function - /// - /// - /// - /// + public static double[] AutoCrossCorr(double[] v) + { + int n = v.Length; + alglib.corrr1d(v, n, v, n, out double[] xr); + return xr; + } + + /// + /// returns the fft spectrum of a cross-correlation function. + /// + /// + /// + /// public static double[] CrossCorr(double[] v1, double[] v2) { int n = v1.Length; // assume both vectors of same length