Skip to content

Commit

Permalink
Update BaseSonogram.cs
Browse files Browse the repository at this point in the history
Issue #332 Add another constructor which accepts FreqScale as argument
  • Loading branch information
towsey committed Aug 11, 2020
1 parent 9f2904f commit a3778e3
Showing 1 changed file with 88 additions and 225 deletions.
313 changes: 88 additions & 225 deletions src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,45 +18,23 @@ namespace AudioAnalysisTools.StandardSpectrograms
using SixLabors.ImageSharp.Processing;
using TowseyLibrary;

/*
/// <summary>
/// Sonogram type.
/// Base Sonogram.
/// </summary>
public enum SonogramType
public abstract partial class BaseSonogram
{
/// <summary>
/// Ampltude Sonogram.
/// </summary>
Amplitude,
/// <summary>
/// Spectral Sonogram.
/// </summary>
Spectral,
/// <summary>
/// Cepstral Sonogram.
/// </summary>
Cepstral,
/// <summary>
/// Acoustic Vectors Sonogram.
/// Gets or sets the config information.
/// The Configuration object should contain all the parameters required to construct an amplitude spectrogram given a recording.
/// </summary>
AcousticVectors,
public SonogramConfig Configuration { get; set; }

/// <summary>
/// Sobel Edge Sonogram.
/// Gets or sets the frequency scale information.
/// The FreqScale object should contain all the parameters required to convert the linear frquency scale of the amplitude spectrogram
/// into any reduced or non-linear frequency scale required.
/// </summary>
SobelEdge,
}
*/

/// <summary>
/// Base Sonogram.
/// </summary>
public abstract partial class BaseSonogram
{
public SonogramConfig Configuration { get; set; }
public FrequencyScale FreqScale { get; set; }

public double MaxAmplitude { get; set; }

Expand Down Expand Up @@ -124,21 +102,64 @@ public BaseSonogram(SonogramConfig config)

/// <summary>
/// Initializes a new instance of the <see cref="BaseSonogram"/> class.
/// BASE CONSTRUCTOR
/// This constructor contains all steps required to prepare the amplitude spectrogram.
/// The third boolean parameter is simply a place-filler to ensure a different Constructor signature.
/// from the principle Constructor which follows.
/// BASE CONSTRUCTOR.
/// </summary>
/// <param name="config">config file to use.</param>
/// <param name="wav">wav.</param>
/// <param name="dummy">filler boolean. Calculate in method.</param>
public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy)
public BaseSonogram(SonogramConfig config, WavReader wav)
: this(config)
{
// As of 28 March 2017 drop capability to get sub-band of spectrogram because was not being used.
// can be recovered later if desired.
//bool doExtractSubband = this.SubBandMinHz > 0 || this.SubBandMaxHz < this.NyquistFrequency;
this.InitialiseSpectrogram(wav);

// this Make() call makes the desired spectrogram.
this.Make(this.Data);
}

/// <summary>
/// Initializes a new instance of the <see cref="BaseSonogram"/> class.
/// BASE CONSTRUCTOR.
/// </summary>
/// <param name="config">config file to use.</param>
/// <param name="wav">wav.</param>
public BaseSonogram(SonogramConfig config, FrequencyScale freqScale, WavReader wav)
: this(config)
{
// check that the frameWidths are consistent.
if (config.WindowSize != freqScale.WindowSize)
{
throw new Exception("BaseSonogram: CONSTRUCTOR ERROR: Inconsistency in Frequency Scale conversion data.");
}

this.FreqScale = freqScale;
this.InitialiseSpectrogram(wav);
this.Make(this.Data);
}

/// <summary>
/// Initializes a new instance of the <see cref="BaseSonogram"/> class.
/// Use this BASE CONSTRUCTOR when already have the amplitude spectrogram in matrix.
/// Init normalised signal energy array but do nothing with it. This has to be done from outside.
/// </summary>
/// <param name="config">the spectrogram config.</param>
/// <param name="amplitudeSpectrogramData">data of an amplitude Spectrogram.</param>
public BaseSonogram(SonogramConfig config, double[,] amplitudeSpectrogramData)
{
this.Configuration = config;
this.FrameCount = amplitudeSpectrogramData.GetLength(0);
this.SampleRate = this.Configuration.SampleRate;

//init normalised signal energy array but do nothing with it. This has to be done from outside
this.DecibelsNormalised = new double[this.FrameCount];
this.Data = amplitudeSpectrogramData;
}

public abstract void Make(double[,] amplitudeM);

/// <summary>
/// This method creates the amplitude spectrogram.
/// </summary>
private void InitialiseSpectrogram(WavReader wav)
{
this.Duration = wav.Time;
double minDuration = 0.2;
if (this.Duration.TotalSeconds < minDuration)
Expand All @@ -153,11 +174,14 @@ public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy)
this.Configuration.SampleRate = wav.SampleRate; //also set the Nyquist
this.MaxAmplitude = wav.CalculateMaximumAmplitude();

//init normalised signal energy array but do nothing with it. This has to be done from outside
this.DecibelsNormalised = new double[this.FrameCount];

var recording = new AudioRecording(wav);
var fftData = DSP_Frames.ExtractEnvelopeAndFfts(
recording,
config.WindowSize,
config.WindowOverlap,
this.Configuration.WindowSize,
this.Configuration.WindowOverlap,
this.Configuration.WindowFunction);

// now recover required data
Expand All @@ -166,101 +190,37 @@ public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy)
this.Configuration.WindowPower = fftData.WindowPower;
this.FrameCount = fftData.FrameCount;
this.DecibelsPerFrame = fftData.FrameDecibels;

//init normalised signal energy array but do nothing with it. This has to be done from outside
this.DecibelsNormalised = new double[this.FrameCount];
this.Data = fftData.AmplitudeSpectrogram;

// ENERGY PER FRAME and NORMALISED dB PER FRAME AND SNR
// currently DoSnr = true by default
if (config.DoSnr)
if (this.Configuration.DoSnr)
{
// If the FractionOfHighEnergyFrames PRIOR to noise removal exceeds SNR.FractionalBoundForMode,
// then Lamel's noise removal algorithm may not work well.
if (fftData.FractionOfHighEnergyFrames > SNR.FractionalBoundForMode)
{
Log.WriteIfVerbose("\nWARNING ##############");
Log.WriteIfVerbose(
"\t############### BaseSonogram(): This is a high energy recording. Percent of high energy frames = {0:f0} > {1:f0}%",
fftData.FractionOfHighEnergyFrames * 100,
SNR.FractionalBoundForMode * 100);
Log.WriteIfVerbose("\t############### Noise reduction algorithm may not work well in this instance!\n");
}

//AUDIO SEGMENTATION/END POINT DETECTION - based on Lamel et al
// Setting segmentation/endpoint detection parameters is broken as of September 2014.
// The next line is a hack replacement
EndpointDetectionConfiguration.SetDefaultSegmentationConfig();
this.SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsPerFrame, this.FrameStep);
this.CalculateSnrData(fftData.FractionOfHighEnergyFrames);
}

/* AS OF 30 MARCH 2017, NO LONGER IMPLEMENT SUB-BAND THINGS, because not being used for years.
// EXTRACT REQUIRED FREQUENCY BAND
if (doExtractSubband)
{
this.Data = SpectrogramTools.ExtractFreqSubband(
this.Data,
this.subBandMinHz,
this.subBandMaxHz,
this.Configuration.DoMelScale,
this.Configuration.FreqBinCount,
this.FBinWidth);
this.CalculateSubbandSNR(this.Data);
}
*/
}

/// <summary>
/// Initializes a new instance of the <see cref="BaseSonogram"/> class.
/// This BASE CONSTRUCTOR is the one most used - it automatically makes the Amplitude spectrum and
/// then, using a call to Make(), it converts the Amplitude matrix to a Spectrogram whose values are decibels.
/// Calculates SNR, ENERGY PER FRAME and NORMALISED dB PER FRAME.
/// </summary>
/// <param name="config">All parameters required to make spectrogram.</param>
/// <param name="wav">The recording whose spectrogram is to be made.</param>
public BaseSonogram(SonogramConfig config, WavReader wav)
: this(config, wav, false)
private void CalculateSnrData(double highEnergyFraction)
{
this.Make(this.Data);
}

/// <summary>
/// Initializes a new instance of the <see cref="BaseSonogram"/> class.
/// Use this BASE CONSTRUCTOR when already have the amplitude spectrogram in matrix.
/// Init normalised signal energy array but do nothing with it. This has to be done from outside.
/// </summary>
/// <param name="config">the spectrogram config.</param>
/// <param name="amplitudeSpectrogramData">data of an amplitude Spectrogram.</param>
public BaseSonogram(SonogramConfig config, double[,] amplitudeSpectrogramData)
{
this.Configuration = config;
this.FrameCount = amplitudeSpectrogramData.GetLength(0);
this.SampleRate = this.Configuration.SampleRate;

//init normalised signal energy array but do nothing with it. This has to be done from outside
this.DecibelsNormalised = new double[this.FrameCount];
this.Data = amplitudeSpectrogramData;
}

public abstract void Make(double[,] amplitudeM);

/* AS OF 30 MARCH 2017, NO LONGER IMPLEMENT SUB-BAND THINGS, because not being used for years.
public void CalculateSubbandSNR(double[,] subband)
{
this.SnrSubband = new SNR(subband); //subband is the amplitude values
//RECALCULATE DecibelsNormalised and dB REFERENCE LEVEL - need for MFCCs
this.DecibelsInSubband = SnrSubband.Decibels;
this.DecibelReference = SnrSubband.MaxReferenceDecibelsWrtNoise;
this.DecibelsNormalised = SnrSubband.NormaliseDecibelArray_ZeroOne(this.DecibelReference);
//RECALCULATE ENDPOINTS OF VOCALISATIONS
SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsInSubband, this.FrameStep);
}
*/
// If the FractionOfHighEnergyFrames PRIOR to noise removal exceeds SNR.FractionalBoundForMode,
// then Lamel's noise removal algorithm may not work well.
if (highEnergyFraction > SNR.FractionalBoundForMode)
{
Log.WriteIfVerbose("\nWARNING ##############");
Log.WriteIfVerbose(
"\t############### BaseSonogram(): This is a high energy recording. Percent of high energy frames = {0:f0} > {1:f0}%",
highEnergyFraction * 100,
SNR.FractionalBoundForMode * 100);
Log.WriteIfVerbose("\t############### Noise reduction algorithm may not work well in this instance!\n");
}

public void SetTimeScale(TimeSpan duration)
{
this.Duration = duration;
//AUDIO SEGMENTATION/END POINT DETECTION - based on Lamel et al
// Setting segmentation/endpoint detection parameters is broken as of September 2014.
// The next line sets default parameters.
EndpointDetectionConfiguration.SetDefaultSegmentationConfig();
this.SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsPerFrame, this.FrameStep);
}

/// <summary>
Expand Down Expand Up @@ -321,8 +281,6 @@ public Image<Rgb24> GetImage()

public Image<Rgb24> GetImage(bool doHighlightSubband, bool add1KHzLines, bool doMelScale)
{
// doHighlightSubband function still working but have removed min/max bounds from user control.
// doHighlightSubband = true;
int subBandMinHz = 1000;
int subBandMaxHz = 9000;

Expand Down Expand Up @@ -351,103 +309,6 @@ public Image<Rgb24> GetImage(bool doHighlightSubband, bool add1KHzLines, bool do
return image;
}

public Image<Rgb24> GetImage_ReducedSonogramWithWidth(int width, bool drawGridLines)
{
var data = this.Data; //sonogram intensity values
int frameCount = data.GetLength(0); // Number of spectra in sonogram

int factor = frameCount / width;

if (factor <= 1)
{
return this.GetImage();
}

return this.GetImage_ReducedSonogram(factor, drawGridLines);
}

public Image<Rgb24> GetImage_ReducedSonogram(int factor, bool drawGridLines)
{
// double[] logEnergy = this.LogEnPerFrame;
var data = this.Data; //sonogram intensity values
int frameCount = data.GetLength(0); // Number of spectra in sonogram
int imageHeight = data.GetLength(1); // image ht = sonogram ht. Later include grid and score scales
int imageWidth = frameCount / factor;
int subSample = frameCount / imageWidth;

//set up min, max, range for normalising of dB values
DataTools.MinMax(data, out double min, out double max);
double range = max - min;

var grayScale = ImageTools.GrayScale();

//set up the 1000kHz scale
int herzInterval = 1000;
int[] vScale = FrequencyScale.CreateLinearYaxis(herzInterval, this.NyquistFrequency, imageHeight); //calculate location of 1000Hz grid lines
var bmp = new Image<Rgb24>(imageWidth, imageHeight);
for (int w = 0; w < imageWidth; w++)
{
int start = w * subSample;
int end = ((w + 1) * subSample) - 1;
double maxE = -double.MaxValue;
int maxId = 0;
for (int x = start; x < end; x++)
{
// NOTE!@#$%^ This was changed from LogEnergy on 30th March 2009.
if (maxE < this.DecibelsPerFrame[x])
{
maxE = this.DecibelsPerFrame[x];
maxId = x;
}
}

// have found the frame with max energy. Now draw its spectrum
// over all freq bins
for (int y = 0; y < data.GetLength(1); y++)
{
// NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range
double value = (data[maxId, y] - min) / range;
int c = 255 - (int)Math.Floor(255.0 * value); //original version
if (c < 0)
{
c = 0;
}
else if (c >= 256)
{
c = 255;
}

var col = grayScale[c];
bmp[w, imageHeight - y - 1] = col;
} //end over all freq bins

//set up grid color

if (drawGridLines)
{
var gridCol = Color.Black;
if (w % 2 == 0)
{
gridCol = Color.Black;
}

//over all Y-axis pixels
for (int p = 0; p < vScale.Length; p++)
{
if (vScale[p] == 0)
{
continue;
}

int y = imageHeight - p;
bmp[w, y] = gridCol;
}
}
}

return bmp;
}

private static bool IsInBand(int y, int? minFreq, int? maxFreq)
{
if (minFreq == null && maxFreq == null)
Expand Down Expand Up @@ -792,6 +653,7 @@ public static Image<Rgb24> DrawTitleBarOfGrayScaleSpectrogram(string title, int
return bmp;
}

/*
// mark of time scale according to scale.
public static Image<Rgb24> DrawTimeTrack(TimeSpan offsetMinute, TimeSpan xAxisPixelDuration, TimeSpan xAxisTicInterval, TimeSpan labelInterval, int trackWidth, int trackHeight, string title)
{
Expand Down Expand Up @@ -835,5 +697,6 @@ public static Image<Rgb24> DrawTimeTrack(TimeSpan offsetMinute, TimeSpan xAxisPi
return bmp;
}
*/
}
}

0 comments on commit a3778e3

Please sign in to comment.