From b62e20e4a5582c5f1d63c0d223a45a681fa9c8d1 Mon Sep 17 00:00:00 2001 From: towsey Date: Thu, 6 Aug 2020 21:16:21 +1000 Subject: [PATCH] Work on Mel scale spectrograms Issue #332 --- src/AudioAnalysisTools/DSP/MFCCStuff.cs | 21 ++++++ .../SpectrogramMelScale.cs | 65 +++++++++---------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/AudioAnalysisTools/DSP/MFCCStuff.cs b/src/AudioAnalysisTools/DSP/MFCCStuff.cs index 4aab320c7..85aef0fd8 100644 --- a/src/AudioAnalysisTools/DSP/MFCCStuff.cs +++ b/src/AudioAnalysisTools/DSP/MFCCStuff.cs @@ -212,6 +212,7 @@ public static double Mel(double f) /// /// Converts a Mel value to Herz. + /// NOTE: By default this Mel scale is linear to 1000 Hz. /// /// the Herz value. public static double InverseMel(double mel) @@ -322,6 +323,26 @@ public static double InverseHerzTranform(double m, double c, double div) return outData; } + /// + /// Returns an [N, 2] matrix with bin ID in column 1 and lower Herz bound in column 2 but on Mel scale. + /// + public static int[,] GetMelBinBounds(int nyquist, int melBinCount) + { + double maxMel = (int)MFCCStuff.Mel(nyquist); + double melPerBin = maxMel / melBinCount; + + var binBounds = new int[melBinCount, 2]; + + for (int i = 0; i < melBinCount; i++) + { + binBounds[i, 0] = i; + double mel = i * melPerBin; + binBounds[i, 1] = (int)MFCCStuff.InverseMel(mel); + } + + return binBounds; + } + /// /// Does MelFilterBank for passed sonogram matrix. /// IMPORTANT !!!!! Assumes that min freq of passed sonogram matrix = 0 Hz and maxFreq = Nyquist. diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs index 7b18d96ec..505e5165e 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs @@ -64,9 +64,16 @@ public SpectrogramMelScale(AmplitudeSonogram sg, int minHz, int maxHz) /// Matrix of amplitude values. public override void Make(double[,] amplitudeM) { - var tuple = MakeMelScaleSpectrogram(this.Configuration, amplitudeM, this.SampleRate); - this.Data = tuple.Item1; - this.ModalNoiseProfile = tuple.Item2; //store the full bandwidth modal noise profile + var m = MakeMelScaleSpectrogram(this.Configuration, amplitudeM, this.SampleRate); + + //(iii) NOISE REDUCTION + var nrt = this.Configuration.NoiseReductionType; + var nrp = this.Configuration.NoiseReductionParameter; + var tuple1 = SNR.NoiseReduce(m, nrt, nrp); + + //store the full bandwidth modal noise profile + this.ModalNoiseProfile = tuple1.Item2; + this.Data = DataTools.normalise(tuple1.Item1); } //################################################################################################################################## @@ -74,16 +81,18 @@ public override void Make(double[,] amplitudeM) /// /// NOTE!!!! The decibel array has been normalised in 0 - 1. /// - protected static Tuple MakeMelScaleSpectrogram(SonogramConfig config, double[,] matrix, int sampleRate) + public static double[,] MakeMelScaleSpectrogram(SonogramConfig config, double[,] matrix, int sampleRate) { double[,] m = matrix; int nyquist = sampleRate / 2; double epsilon = config.epsilon; //(i) APPLY FILTER BANK - int bandCount = config.mfccConfig.FilterbankCount; - int fftBinCount = config.FreqBinCount; //number of Hz bands = 2^N +1. Subtract DC bin + //number of Hz bands = 2^N +1. Subtract DC bin + int fftBinCount = config.FreqBinCount; + // Mel band count is set to 64 by default in BaseSonogramConfig class at line 158. + int bandCount = config.mfccConfig.FilterbankCount; Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1)); //error check that filterBankCount < Number of FFT bins @@ -101,46 +110,29 @@ protected static Tuple MakeMelScaleSpectrogram(SonogramConf //(ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram - - //(iii) NOISE REDUCTION - var tuple1 = SNR.NoiseReduce(m, config.NoiseReductionType, config.NoiseReductionParameter); - m = tuple1.Item1; - - //(iv) Normalize Matrix Values - m = DataTools.normalise(m); - - var tuple2 = Tuple.Create(m, tuple1.Item2); - - // return matrix and full bandwidth modal noise profile - return tuple2; + return m; } /// - /// TODO: This frequency scale is yet to be completed - it calculates nothing! - /// Currently, MEL scale is implemented directly in MakeMelScaleSpectrogram() method. - /// Calculates the parameters for Mel frequency scale. - /// Works only for "standard" recordings, i.e. sr = 22050 and frame = 512. - /// The default MelScale has 64 frequency bins and Linear500-octave has 66 frequency bands. + /// WARNING: This method assigns DEFAULT parameters for MEL FREQUENCY SCALE. + /// It works only for "standard" recordings, i.e. sr = 22050 and frame = 512. + /// The default MelScale has 64 frequency bins. + /// The Linear500-octave scale is almost similar and has 66 frequency bands. + /// Currently, the MEL scale is implemented directly in MakeMelScaleSpectrogram() method. /// public static FrequencyScale GetStandardMelScale(FrequencyScale scale) { - LoggedConsole.WriteErrorLine("WARNING: Assigning DEFAULT parameters for MEL FREQUENCY SCALE."); scale.ScaleType = FreqScaleType.Mel; int sr = 22050; - scale.Nyquist = sr / 2; int frameSize = 512; + + scale.Nyquist = sr / 2; + scale.FinalBinCount = 64; scale.WindowSize = frameSize; scale.LinearBound = 1000; - var binWidth = sr / (double)frameSize; - - // init tone steps within one octave. Note: piano = 12 steps per octave. - scale.ToneCount = 0; - scale.BinBounds = null; - scale.FinalBinCount = 0; - - //this.GridLineLocations = SpectrogramMelScale.GetMelGridLineLocations(this.HertzGridInterval, this.Nyquist, this.FinalBinCount); + scale.BinBounds = MFCCStuff.GetMelBinBounds(scale.Nyquist, scale.FinalBinCount); scale.HertzGridInterval = 1000; - scale.GridLineLocations = null; + scale.GridLineLocations = SpectrogramMelScale.GetMelGridLineLocations(scale.HertzGridInterval, scale.Nyquist, scale.FinalBinCount); return scale; } @@ -154,7 +146,10 @@ public static FrequencyScale GetStandardMelScale(FrequencyScale scale) { double maxMel = (int)MFCCStuff.Mel(nyquistFreq); double melPerBin = maxMel / melBinCount; - int gridCount = nyquistFreq / gridIntervalInHertz; + + // There is no point drawing gridlines above 8 kHz because they are too close together. + int maxGridValue = 4000; + int gridCount = maxGridValue / gridIntervalInHertz; var gridLines = new int[gridCount, 2];