Skip to content

Commit

Permalink
Work on the harmonic recognizer
Browse files Browse the repository at this point in the history
Issue #281  woring on the harmonic recognizer - in particular implement a new method to detect harmonics using AuotoCorrelation followed by DCT. this still needs more work as it is not yet detecting Curlew harmonics - my teset case.
  • Loading branch information
towsey committed Feb 8, 2020
1 parent 4571c2d commit 08cb0fe
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 110 deletions.
86 changes: 23 additions & 63 deletions src/AnalysisPrograms/Recognizers/Base/HarmonicParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ public class HarmonicParameters : CommonParameters
//MinFormantGap: 800
//MaxFormantGap: 2200

/// <summary>
/// Gets or sets the dctThreshold.
/// </summary>
public double? DctThreshold { get; set; }

/// <summary>
/// Gets or sets the bottom bound of the rectangle. Units are Hertz.
/// </summary>
Expand All @@ -61,14 +66,13 @@ public static (List<AcousticEvent>, double[]) GetComponentsWithHarmonics(
int maxHz,
int nyquist,
double decibelThreshold,
double dctThreshold,
double minDuration,
double maxDuration,
int minFormantGap,
int maxFormantGap,
TimeSpan segmentStartOffset)
{
// parameters to be passed
double harmonicIntensityThreshold = 0.15;

// Event threshold - Determines FP / FN trade-off for events.
//double eventThreshold = 0.2;
Expand All @@ -80,98 +84,54 @@ public static (List<AcousticEvent>, double[]) GetComponentsWithHarmonics(
double freqBinWidth = nyquist / (double)binCount;
int minBin = (int)Math.Round(minHz / freqBinWidth);
int maxBin = (int)Math.Round(maxHz / freqBinWidth);

// set up score arrays
var harmonicScores = new double[frameCount];
var formantGaps = new double[frameCount];

// now look for harmonics in search band using the Xcorrelation-FFT technique.
int bandWidthBins = maxBin - minBin + 1;

//the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
//assuming sr=22050 and window= 512, then freq bin width = 43.0664 and 64 fft bins span 2756 Hz above the min Hz level.
//assuming sr=22050 and window= 512, then freq bin width = 43.0664 and 128 fft bins span 5513 Hz above the min Hz level.
//assuming sr=22050 and window=1024, then freq bin width = 21.5332 and 64 fft bins span 1378 Hz above the min Hz level.
//assuming sr=22050 and window=1024, then freq bin width = 21.5332 and 128 fft bins span 2756 Hz above the min Hz level.
//assuming sr=22050 and window=1024, then freq bin width = 21.5332 and 256 fft bins span 5513 Hz above the min Hz level.

int numberOfFftBins = 2048;
while (numberOfFftBins > bandWidthBins && (minBin + numberOfFftBins) > binCount)
{
numberOfFftBins /= 2;
}

//numberOfFftBins = Math.Min(256, numberOfFftBins);
maxBin = minBin + numberOfFftBins - 1;
int maxFftHertz = (int)Math.Ceiling(maxBin * sonogram.FBinWidth);

// extract the sub-band
double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, frameCount - 1, maxBin);

int minCallSpan = (int)Math.Round(minDuration * sonogram.FramesPerSecond);

//ii: DETECT HARMONICS
var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, minCallSpan);
// now look for harmonics in search band using the Xcorrelation-DCT method.
var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold);

// set up score arrays
double[] dBArray = results.Item1;
double[] intensity = results.Item2; //an array of periodicity scores
double[] periodicity = results.Item3;
double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity
int[] maxIndexArray = results.Item3;

//intensity = DataTools.filterMovingAverage(intensity, 3);
int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise
double[] scoreArray = new double[intensity.Length];
for (int r = 0; r < frameCount; r++)
{
if (intensity[r] < harmonicIntensityThreshold)
if (harmonicIntensityScores[r] < dctThreshold)
{
continue;
}

//ignore locations with incorrect formant gap
double herzPeriod = periodicity[r] * freqBinWidth;
if (herzPeriod < minFormantGap || herzPeriod > maxFormantGap)
{
continue;
}

//find freq having max power and use info to adjust score.
//expect humans to have max < 1000 Hz
double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
for (int j = 0; j < noiseBound; j++)
{
spectrum[j] = 0.0;
}

int maxIndex = DataTools.GetMaxIndex(spectrum);
int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
double discount = 1.0;
if (freqWithMaxPower < 1200)
{
discount = 0.0;
}

if (intensity[r] > harmonicIntensityThreshold)
int maxId = maxIndexArray[r];
double freqBinGap = 2 * bandWidthBins / (double)maxId;
double formantGap = freqBinGap * freqBinWidth;
if (formantGap < minFormantGap || formantGap > maxFormantGap)
{
scoreArray[r] = intensity[r] * discount;
harmonicIntensityScores[r] = 0.0;
}
}

// smooth the decibel array to allow for brief gaps.
harmonicScores = DataTools.filterMovingAverageOdd(harmonicScores, 5);
// smooth the harmonicIntensityScores array to allow for brief gaps.
harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 5);

//extract the events based on length and threshhold.
// Note: This method does NOT do prior smoothing of the score array.
var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
harmonicScores,
harmonicIntensityScores,
minHz,
maxFftHertz,
maxHz,
sonogram.FramesPerSecond,
sonogram.FBinWidth,
decibelThreshold,
minDuration,
maxDuration,
segmentStartOffset);

return (acousticEvents, harmonicScores);
return (acousticEvents, harmonicIntensityScores);
}
}
}
1 change: 1 addition & 0 deletions src/AnalysisPrograms/Recognizers/GenericRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ public override RecognizerResults Recognize(
hp.MaxHertz.Value,
sonogram.NyquistFrequency,
hp.DecibelThreshold.Value,
hp.DctThreshold.Value,
hp.MinDuration.Value,
hp.MaxDuration.Value,
hp.MinFormantGap.Value,
Expand Down
52 changes: 52 additions & 0 deletions src/AudioAnalysisTools/CrossCorrelation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class CrossCorrelation

//public const string key_COUNT = "count";

/*
public static Tuple<double[,], double[,], double[,], double[]> DetectBarsUsingXcorrelation(double[,] m, int rowStep, int rowWidth, int colStep, int colWidth,
double intensityThreshold, int zeroBinCount)
{
Expand Down Expand Up @@ -103,6 +104,7 @@ public static Tuple<double[,], double[,], double[,], double[]> DetectBarsUsingXc
return Tuple.Create(intensityMatrix, periodicityMatrix, hitsMatrix, array2return);
}
*/

/// <summary>
/// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
Expand Down Expand Up @@ -204,5 +206,55 @@ public static Tuple<double[], double[], double[]> DetectHarmonicsInSonogramMatri

return Tuple.Create(dBArray, intensity, periodicity);
}

/// <summary>
/// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram.
/// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
/// Developed for GenericRecognizer of harmonics.
/// </summary>
/// <param name="m">data matrix.</param>
/// <param name="dBThreshold">Minimum sound level.</param>
/// <returns>two arrays.</returns>
public static Tuple<double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold)
{
int rowCount = m.GetLength(0);
int colCount = m.GetLength(1);
double[] dBArray = new double[rowCount];
var intensity = new double[rowCount]; //an array of formant intensity
var maxIndexArray = new int[rowCount]; //an array of max value index values
var binCount = m.GetLength(1);
double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients

// for all time frames
for (int t = 0; t < rowCount; t++)
{
var frame = MatrixTools.GetRow(m, t);
double maxValue = frame.Max();
dBArray[t] = maxValue;
if (maxValue < dBThreshold)
{
continue;
}

double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

// xr has twice length of frame and is symmetrical.
// Require only first half. Also need to normalise the values for overlap count.
double[] normXr = new double[colCount];
for (int i = 0; i < colCount; i++)
{
normXr[i] = xr[i] / (colCount - i);
}

// now do DCT across the auto cross xr
int lowerDctBound = 2;
var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
intensity[t] = dctCoefficients[indexOfMaxValue];
maxIndexArray[t] = indexOfMaxValue;
} // frames = rows of matrix

return Tuple.Create(dBArray, intensity, maxIndexArray);
}
}
}
65 changes: 25 additions & 40 deletions src/AudioAnalysisTools/Oscillations2012.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,48 +158,10 @@ public static void Execute(
dctArray[i] = matrix[r + i, c];
}

//dctArray = DataTools.Vector2Zscores(dctArray);

dctArray = DataTools.SubtractMean(dctArray);
double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines);

// convert to absolute values because not interested in negative values due to phase.
for (int i = 0; i < dctLength; i++)
{
dctCoeff[i] = Math.Abs(dctCoeff[i]);
}

// remove low freq oscillations from consideration
int thresholdIndex = minIndex / 4;
for (int i = 0; i < thresholdIndex; i++)
{
dctCoeff[i] = 0.0;
}

dctCoeff = DataTools.normalise2UnitLength(dctCoeff);

//dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues
int lowerDctBound = minIndex / 4;
var dctCoeff = DoDct(dctArray, cosines, lowerDctBound);
int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff);

//double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi

// #### Tried this option for scoring oscillation hits but did not work well.
// #### Requires very fine tuning of thresholds
//dctCoeff = DataTools.Normalise2Probabilities(dctCoeff);
//// sum area under curve where looking for oscillations
//double sum = 0.0;
//for (int i = minIndex; i <= maxIndex; i++)
// sum += dctCoeff[i];
//if (sum > dctThreshold)
//{
// for (int i = 0; i < dctLength; i++) hits[r + i, c] = midOscilFreq;
//}

// DEBUGGING
// DataTools.MinMax(dctCoeff, out min, out max);
//DataTools.writeBarGraph(dctArray);
//DataTools.writeBarGraph(dctCoeff);

//mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold)
{
Expand All @@ -218,6 +180,29 @@ public static void Execute(
return hits;
}

public static double[] DoDct(double[] vector, double[,] cosines, int lowerDctBound)
{
//var dctArray = DataTools.Vector2Zscores(dctArray);
var dctArray = DataTools.SubtractMean(vector);
int dctLength = dctArray.Length;
double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines);

// convert to absolute values because not interested in negative values due to phase.
for (int i = 0; i < dctLength; i++)
{
dctCoeff[i] = Math.Abs(dctCoeff[i]);
}

// remove lower coefficients from consideration because they dominate
for (int i = 0; i < lowerDctBound; i++)
{
dctCoeff[i] = 0.0;
}

dctCoeff = DataTools.normalise2UnitLength(dctCoeff);
return dctCoeff;
}

/// <summary>
/// Removes single lines of hits from Oscillation matrix.
/// </summary>
Expand Down
21 changes: 14 additions & 7 deletions src/TowseyLibrary/AutoAndCrossCorrelation.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// <copyright file="AutoAndCrossCorrelation.cs" company="QutEcoacoustics">
// <copyright file="AutoAndCrossCorrelation.cs" company="QutEcoacoustics">
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
// </copyright>

Expand Down Expand Up @@ -130,12 +130,19 @@ public static void corrr1d(
*************************************************************************/

/// <summary>
/// returns the fft spectrum of a cross-correlation function
/// </summary>
/// <param name="v1"></param>
/// <param name="v2"></param>
/// <returns></returns>
public static double[] AutoCrossCorr(double[] v)
{
int n = v.Length;
alglib.corrr1d(v, n, v, n, out double[] xr);
return xr;
}

/// <summary>
/// returns the fft spectrum of a cross-correlation function.
/// </summary>
/// <param name="v1"></param>
/// <param name="v2"></param>
/// <returns></returns>
public static double[] CrossCorr(double[] v1, double[] v2)
{
int n = v1.Length; // assume both vectors of same length
Expand Down

0 comments on commit 08cb0fe

Please sign in to comment.