Skip to content

Commit

Permalink
Work on Octave Spectrograms
Browse files Browse the repository at this point in the history
Issue #332 Get Octave spectrograms going.
  • Loading branch information
towsey committed Jul 31, 2020
1 parent 57cdec6 commit a7ca52f
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 219 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -431,23 +431,23 @@ public static Image<Rgb24> GetLcnSpectrogram(
double neighbourhoodSeconds,
double lcnContrastLevel)
{
BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader);
int neighbourhoodFrames = (int)(sonogram.FramesPerSecond * neighbourhoodSeconds);
LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", sonogram.FramesPerSecond);
BaseSonogram spectrogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader);
int neighbourhoodFrames = (int)(spectrogram.FramesPerSecond * neighbourhoodSeconds);
LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", spectrogram.FramesPerSecond);
LoggedConsole.WriteLine("LCN: Neighbourhood of {0} seconds = {1} frames", neighbourhoodSeconds, neighbourhoodFrames);

// subtract the lowest 20% of frames. This is first step in LCN noise removal. Sets the baseline.
const int lowPercentile = 20;
sonogram.Data =
NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(sonogram.Data, lowPercentile);
sonogram.Data =
NoiseRemoval_Briggs.NoiseReduction_byLCNDivision(sonogram.Data, neighbourhoodFrames, lcnContrastLevel);
spectrogram.Data =
NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(spectrogram.Data, lowPercentile);
spectrogram.Data =
NoiseRemoval_Briggs.NoiseReduction_byLCNDivision(spectrogram.Data, neighbourhoodFrames, lcnContrastLevel);

//Matrix normalisation
//MatrixTools.PercentileCutoffs(sonogram.Data, 10.0, 90, out double minCut, out double maxCut);
//NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(sonogram.Data, lowPercentile);
// Finally background noise removal. This step is optional.
double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram.Data);
spectrogram.Data = SNR.TruncateBgNoiseFromSpectrogram(spectrogram.Data, spectralDecibelBgn);

var image = sonogram.GetImageFullyAnnotated(
var image = spectrogram.GetImageFullyAnnotated(
"AMPLITUDE SPECTROGRAM with freq bin Local Contrast Normalization - " + sourceRecordingName,
ImageTags[AmplitudeSpectrogramLocalContrastNormalization]);
return image;
Expand Down
10 changes: 6 additions & 4 deletions src/AudioAnalysisTools/DSP/FrequencyScale.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ public enum FreqScaleType
{
Linear = 0,
Mel = 1,
Linear62Octaves7Tones31Nyquist11025 = 2,
Linear125Octaves6Tones30Nyquist11025 = 3,
Octaves24Nyquist32000 = 4,
Linear125Octaves7Tones28Nyquist32000 = 5,
LinearOctaveStandard = 2,
Linear62Octaves7Tones31Nyquist11025 = 3,
Linear125Octaves6Tones30Nyquist11025 = 4,
OctaveDataReduction = 5,
Octaves24Nyquist32000 = 6,
Linear125Octaves7Tones28Nyquist32000 = 7,

// alias Octave to predefined choice
Octave = Linear125Octaves7Tones28Nyquist32000,
Expand Down
2 changes: 1 addition & 1 deletion src/AudioAnalysisTools/DSP/NoiseProfile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ public static NoiseProfile CalculatePercentileNoiseProfile(double[,] matrix, int
/// (1) MODAL METHOD
/// Calculates the modal background noise for each freqeuncy bin.
/// Return the smoothed modal profile.
/// By default set the number of SDs = 0.
/// Set the default zero value for number of SDs.
/// </summary>
/// <param name="spectrogram">Assumes the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param>
public static double[] CalculateBackgroundNoise(double[,] spectrogram)
Expand Down
121 changes: 68 additions & 53 deletions src/AudioAnalysisTools/DSP/OctaveFreqScale.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ namespace AudioAnalysisTools.DSP
{
using System;
using System.Collections.Generic;
using AudioAnalysisTools.StandardSpectrograms;
using AudioAnalysisTools.WavTools;
using MathNet.Numerics;
using TowseyLibrary;

Expand All @@ -31,6 +29,21 @@ public static void GetOctaveScale(FrequencyScale scale)

switch (fst)
{
case FreqScaleType.LinearOctaveStandard:
// The number of octaveDivsions/tones (T) is set equal to number of linear bins.
// The remainder of the spectrum will be reduced over four T-tone octaves
scale.LinearBound = 500;
sr = 22050;
scale.Nyquist = sr / 2;
frameSize = 512;
var binWidth = sr / (double)frameSize;
var linearBinCount = (int)Math.Floor(scale.LinearBound / binWidth);
octaveDivisions = linearBinCount;
scale.OctaveCount = 1; //#################### CHECK THIS - APPEARS NOT TO BE USED
//finalBinCount = linearBinCount + 79;
finalBinCount = linearBinCount + 51;
break;

case FreqScaleType.Linear62Octaves7Tones31Nyquist11025:
// constants required for split linear-octave scale when sr = 22050
sr = 22050;
Expand All @@ -51,6 +64,18 @@ public static void GetOctaveScale(FrequencyScale scale)
scale.Nyquist = 11025;
break;

case FreqScaleType.OctaveDataReduction:
// This data conversion is for data reduction purposes.
// The remainder of the spectrum will be reduced over four 6-tone octaves
sr = 22050;
frameSize = 512;
finalBinCount = 45;
scale.OctaveCount = 4;
octaveDivisions = 6; // tone steps within one octave.
scale.LinearBound = 1000;
scale.Nyquist = 11025;
break;

case FreqScaleType.Octaves24Nyquist32000:
//// constants required for full octave scale when sr = 64000
sr = 64000;
Expand Down Expand Up @@ -83,50 +108,15 @@ public static void GetOctaveScale(FrequencyScale scale)
scale.GridLineLocations = GetGridLineLocations(fst, scale.BinBounds);
}

/// <summary>
/// This method takes an audio recording and returns an octave scale spectrogram.
/// At the present time it only works for recordings with 64000 sample rate and returns a 256 bin sonogram.
/// TODO: generalise this method for other recordings and octave scales.
/// </summary>
public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording recording, FreqScaleType fst)
{
var freqScale = new FrequencyScale(fst);
double windowOverlap = 0.75;
var sonoConfig = new SonogramConfig
{
WindowSize = freqScale.WindowSize,
WindowOverlap = windowOverlap,
SourceFName = recording.BaseName,
NoiseReductionType = NoiseReductionType.None,
NoiseReductionParameter = 0.0,
};

// Generate amplitude sonogram and then conver to octave scale
var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

// THIS IS THE CRITICAL LINE.
// TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method
sonogram.Data = ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale);

// DO NOISE REDUCTION
var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);
sonogram.Data = dataMatrix;
int windowSize = freqScale.FinalBinCount * 2;
sonogram.Configuration.WindowSize = windowSize;
sonogram.Configuration.WindowStep = (int)Math.Round(windowSize * (1 - windowOverlap));
return sonogram;
}

public static double[,] ConvertAmplitudeSpectrogramToDecibelOctaveScale(double[,] inputSpgram, FrequencyScale freqScale)
{
//var dataMatrix = MatrixTools.Submatrix(inputSpgram, 0, 1, inputSpgram.GetLength(0) - 1, inputSpgram.GetLength(1) - 1);
//square the values to produce power spectrogram
var dataMatrix = MatrixTools.SquareValues(inputSpgram);

//convert spectrogram to octave scale
dataMatrix = ConvertLinearSpectrogramToOctaveFreqScale(dataMatrix, freqScale);
dataMatrix = MatrixTools.Power2DeciBels(dataMatrix, out var min, out var max);
return dataMatrix;
var newMatrix = ConvertLinearSpectrogramToOctaveFreqScale(dataMatrix, freqScale);
newMatrix = MatrixTools.Power2DeciBels(newMatrix, out var min, out var max);
return newMatrix;
}

/// <summary>
Expand All @@ -149,9 +139,6 @@ public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording

// get the octave bin bounds for this octave scale type
var octaveBinBounds = freqScale.BinBounds;

//var octaveBinBounds = GetOctaveScale(freqScale.ScaleType);

int newBinCount = octaveBinBounds.GetLength(0);

// set up the new octave spectrogram
Expand Down Expand Up @@ -195,7 +182,6 @@ public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording

/// <summary>
/// Converts an amplitude spectrogram to a power spectrogram having an octave frequency scale.
/// This method has been copied from a method of same name in the class MFCCStuff.cs and adapted to produce an octave freq scale.
/// It transforms the amplitude spectrogram in the following steps:
/// (1) It removes the DC row or bin 0 iff there is odd number of spectrogram bins. ASSUMPTION: Bin count should be power of 2 from FFT.
/// (1) It converts spectral amplitudes to power, normalising for window power and sample rate.
Expand Down Expand Up @@ -292,6 +278,17 @@ public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording
gridLineLocations[5, 1] = 4000; // 4000
gridLineLocations[6, 1] = 8000; // 8000
break;

case FreqScaleType.OctaveDataReduction:
//This Octave Scale does not require grid lines. It is for data reduction purposes only
gridLineLocations = new int[6, 2];
break;

case FreqScaleType.LinearOctaveStandard:
gridLineLocations = new int[8, 2];
LoggedConsole.WriteErrorLine("This Octave Scale does not currently have grid data provided.");
break;

case FreqScaleType.Octaves24Nyquist32000:
gridLineLocations = new int[8, 2];
LoggedConsole.WriteErrorLine("This Octave Scale does not currently have grid data provided.");
Expand Down Expand Up @@ -414,6 +411,12 @@ public static double[] OctaveSpectrum(int[,] octaveBinBounds, double[] linearSpe
/// <summary>
/// Returns the index bounds for a full octave scale - from lowest freq set by user to top freq.
/// </summary>
/// <param name="sr">Sample rate of the source recording.</param>
/// <param name="frameSize">Frame size of the source recording.</param>
/// <param name="finalBinCount">Final Bin Count.</param>
/// <param name="lowerFreqBound">Lower bound of the octave part of the final scale.</param>
/// <param name="upperFreqBound">Upper bound of the octave scale, most likely the Nyquist.</param>
/// <param name="octaveDivisions">Number of tones/divisions per octave.</param>
public static int[,] LinearToFullOctaveScale(int sr, int frameSize, int finalBinCount, int lowerFreqBound, int upperFreqBound, int octaveDivisions)
{
var bandBounds = GetFractionalOctaveBands(lowerFreqBound, upperFreqBound, octaveDivisions);
Expand Down Expand Up @@ -441,46 +444,58 @@ public static double[] OctaveSpectrum(int[,] octaveBinBounds, double[] linearSpe

public static double[] GetFractionalOctaveBands(double minFreq, double maxFreq, int octaveDivisions)
{
double[] freqBandCentres = { 15.625, 31.25, 62.5, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000 };
double[] octaveLowerBounds = { 15.625, 31.25, 62.5, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000 };

var list = new List<double>();

for (int i = 0; i < freqBandCentres.Length; i++)
for (int i = 0; i < octaveLowerBounds.Length; i++)
{
if (freqBandCentres[i] < minFreq)
// ignore this octave floor if below that required.
if (octaveLowerBounds[i] < minFreq)
{
continue;
}

if (freqBandCentres[i] > maxFreq)
// stop when octave floor is above that required.
if (octaveLowerBounds[i] > maxFreq)
{
break;
}

double[] fractionalOctaveBands = GetFractionalOctaveBands(freqBandCentres[i], octaveDivisions);
// get the frequency tones in the given octave.
double[] tonesInOctave = GetFractionalOctaveBands(octaveLowerBounds[i], octaveDivisions);

for (int j = 0; j < octaveDivisions; j++)
{
double floor = fractionalOctaveBands[j]; // sqrt2;
if (floor < minFreq)
double toneFloor = tonesInOctave[j];
if (toneFloor < minFreq)
{
continue;
}

list.Add(floor);
list.Add(toneFloor);
}
}

return list.ToArray();
}

/// <summary>
/// Returns an array of tones in one octave.
/// The units are frequency in Hertz.
/// NOTE: The octave is divided geometrically.
/// </summary>
/// <param name="lowerBound">The lower frquency bound of the octave.</param>
/// <param name="subbandCount">The number of tones or frequency bins in the octave.</param>
/// <returns>The frequency of each tone in the octave.</returns>
public static double[] GetFractionalOctaveBands(double lowerBound, int subbandCount)
{
double[] fractionalOctaveBands = new double[subbandCount];
fractionalOctaveBands[0] = lowerBound;
double exponent = 1 / (double)subbandCount;
double factor = Math.Pow(2, exponent);

// calculate the frequency increment factor between each tone and the next.
double factor = Math.Pow(2, exponent);
for (int i = 1; i < subbandCount; i++)
{
fractionalOctaveBands[i] = fractionalOctaveBands[i - 1] * factor;
Expand Down
3 changes: 0 additions & 3 deletions src/AudioAnalysisTools/DSP/SNR.cs
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,6 @@ public static BackgroundNoise CalculateModalBackgroundNoiseInSignal(double[] arr
}

int[] histo = Histogram.Histo(array, binCount, out var binWidth, out var min, out var max);
////Log.WriteLine("BindWidth = "+ binWidth);

int smoothingwindow = 3;
if (binCount > 250)
Expand All @@ -834,8 +833,6 @@ public static BackgroundNoise CalculateModalBackgroundNoiseInSignal(double[] arr
}

double[] smoothHisto = DataTools.filterMovingAverage(histo, smoothingwindow);
////DataTools.writeBarGraph(histo);

GetModeAndOneStandardDeviation(smoothHisto, out var indexOfMode, out var indexOfOneStdDev);

// modal noise level gets symbol Q in Lamel et al.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// <copyright file="SpectrogramCepstral.cs" company="QutEcoacoustics">
// <copyright file="SpectrogramMelScale.cs" company="QutEcoacoustics">
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
// </copyright>

Expand All @@ -7,7 +7,6 @@ namespace AudioAnalysisTools.StandardSpectrograms
using System;
using Acoustics.Tools.Wav;
using AudioAnalysisTools.DSP;
using AudioAnalysisTools.WavTools;
using TowseyLibrary;

public class SpectrogramMelScale : BaseSonogram
Expand Down Expand Up @@ -37,7 +36,7 @@ public SpectrogramMelScale(AmplitudeSonogram sg)
this.SnrData = sg.SnrData;
this.Data = sg.Data;

//converts amplitude matrix to cepstral sonogram
//converts amplitude matrix to Mel-frequency scale spectrogram
this.Make(this.Data);
}

Expand All @@ -54,9 +53,8 @@ public SpectrogramMelScale(AmplitudeSonogram sg, int minHz, int maxHz)
this.SigState = sg.SigState;
this.SnrData = sg.SnrData;

this.Data = SpectrogramTools.ExtractFreqSubband(sg.Data, minHz, maxHz, this.Configuration.DoMelScale, sg.Configuration.FreqBinCount, sg.FBinWidth);

//converts amplitude matrix to mel-frequency scale spectrogram
this.Data = SpectrogramTools.ExtractFreqSubband(sg.Data, minHz, maxHz, this.Configuration.DoMelScale, sg.Configuration.FreqBinCount, sg.FBinWidth);
this.Make(this.Data);
}

Expand Down
Loading

0 comments on commit a7ca52f

Please sign in to comment.