Skip to content

Commit

Permalink
More work on Oscillation recognizer
Browse files Browse the repository at this point in the history
Issue #238 More work on Oscillation recognizer to pick up bat wing beats.
  • Loading branch information
towsey committed Oct 28, 2019
1 parent a0e643b commit b31598b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ Profiles:
Wingbeats:
MinHz: 200
MaxHz: 2000
DecibelThreshold: 6.0
# duration of DCT in seconds
DctDuration: 0.8
DctDuration: 0.5
# minimum acceptable value of a DCT coefficient
DctThreshold: 0.5
# ignore oscillation rates below the min & above the max threshold
Expand All @@ -46,7 +47,7 @@ Profiles:
MinDuration: 1.0
MaxDuration: 10.0
# Event threshold - use this to determine FP / FN trade-off for events.
EventThreshold: 0.60
EventThreshold: 0.5
#Agonist:
# This notation means the Groote profile has all of the settings that the Standard profile has,
# however, the MinHz and MaxHz properties have been overridden.
Expand Down
2 changes: 1 addition & 1 deletion src/AnalysisPrograms/Recognizers/LitoriaCaerulea.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ public override RecognizerResults Recognize(AudioRecording recording, Config con
double dctThreshold = recognizerConfig.DctThreshold;
double minOscRate = 1 / recognizerConfig.MaxPeriod;
double maxOscRate = 1 / recognizerConfig.MinPeriod;
Oscillations2019.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold, out double[] dctScores, out double[] oscFreq);
Oscillations2019.DetectOscillations(croakScoreArray, framesPerSecond, decibelThreshold, dctDuration, minOscRate, maxOscRate, dctThreshold, out double[] dctScores, out double[] oscFreq);

// ######################################################################
// ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
Expand Down
68 changes: 27 additions & 41 deletions src/AnalysisPrograms/Recognizers/PteropusSpecies.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ namespace AnalysisPrograms.Recognizers
using System.IO;
using System.Linq;
using System.Reflection;
using Acoustics.Shared;
using Acoustics.Shared.ConfigFile;
using AnalysisPrograms.Recognizers.Base;
using AudioAnalysisTools;
Expand All @@ -55,14 +54,17 @@ namespace AnalysisPrograms.Recognizers
/// </summary>
internal class PteropusSpecies : RecognizerBase
{
private static readonly ILog PteropusLog = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);

// The default window for Pteropus sp. Need to be fixed for accurately detecting wing beat oscillations.
private static readonly int DefaultWindow = 512;

public override string Author => "Towsey";

public override string SpeciesName => "PteropusSpecies";

public override string Description => "[ALPHA] Detects acoustic events for species of Flying Fox, Pteropus species";

private static readonly ILog log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);

/*
/// <summary>
/// Summarize your results. This method is invoked exactly once per original file.
Expand All @@ -87,9 +89,9 @@ public override void SummariseResults(
/// <param name="genericConfig">config file that contains parameters used by all profiles.</param>
/// <param name="segmentStartOffset">when recording starts.</param>
/// <param name="getSpectralIndexes">not sure what this is.</param>
/// <param name="outputDirectory">where the recogniser results can be found.</param>
/// <param name="outputDirectory">where the recognizer results can be found.</param>
/// <param name="imageWidth"> assuming ????.</param>
/// <returns>recogniser results.</returns>
/// <returns>recognizer results.</returns>
public override RecognizerResults Recognize(AudioRecording audioRecording, Config genericConfig, TimeSpan segmentStartOffset, Lazy<IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int? imageWidth)
{
if (ConfigFile.HasProfiles(genericConfig))
Expand All @@ -102,34 +104,34 @@ public override RecognizerResults Recognize(AudioRecording audioRecording, Confi
message = message + (s + ", ");
}

log.Debug(message);
PteropusLog.Debug(message);
}
else
{
log.Warn("No configuration profiles found. Two profiles expected for the Flying Fox recogniser.");
PteropusLog.Warn("No configuration profiles found. Two profiles expected for the Flying Fox recogniser.");
}

var territorialResults = new RecognizerResults();

if (ConfigFile.TryGetProfile(genericConfig, "Territorial", out var profile1))
if (ConfigFile.TryGetProfile(genericConfig, "Territorial", out var _))
{
territorialResults = TerritorialCall(audioRecording, genericConfig, "Territorial", segmentStartOffset);
log.Debug("Territory event count = " + territorialResults.Events.Count);
PteropusLog.Debug("Territory event count = " + territorialResults.Events.Count);
}
else
{
log.Warn("Could not access Territorial configuration parameters");
PteropusLog.Warn("Could not access Territorial configuration parameters");
}

var wingbeatResults = new RecognizerResults();
if (ConfigFile.TryGetProfile(genericConfig, "Wingbeats", out var profile2))
if (ConfigFile.TryGetProfile(genericConfig, "Wingbeats", out var _))
{
wingbeatResults = WingBeats(audioRecording, genericConfig, "Wingbeats", segmentStartOffset);
log.Debug("Wingbeat event count = " + wingbeatResults.Events.Count);
PteropusLog.Debug("Wingbeat event count = " + wingbeatResults.Events.Count);
}
else
{
log.Warn("Could not access Wingbeats configuration parameters");
PteropusLog.Warn("Could not access Wingbeats configuration parameters");
}

// combine the results i.e. add wing-beat events to the list of territorial call events.
Expand Down Expand Up @@ -171,24 +173,8 @@ private static RecognizerResults TerritorialCall(AudioRecording audioRecording,
var maxTimeSpan = TimeSpan.FromSeconds(maxDurationSeconds);

//######################
//2. Don't use samples in this recogniser.
//var samples = audioRecording.WavReader.Samples;
//Instead, convert each segment to a spectrogram.
//2. Convert each segment to a spectrogram.
var sonogram = GetSonogram(configuration, audioRecording);
/*
// make a spectrogram
var sonoConfig = new SonogramConfig
{
WindowSize = 512,
NoiseReductionType = NoiseReductionType.Standard,
NoiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.0,
};
sonoConfig.WindowOverlap = 0.0;
// now construct the standard decibel spectrogram WITH noise removal
// get frame parameters for the analysis
var sonogram = (BaseSonogram)new SpectrogramStandard(sonoConfig, audioRecording.WavReader);
*/
var decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, sonogram.NyquistFrequency);

// prepare plots
Expand Down Expand Up @@ -242,7 +228,7 @@ private static RecognizerResults TerritorialCall(AudioRecording audioRecording,
private static List<AcousticEvent> FilterEventsForSpectralProfile(List<AcousticEvent> events, BaseSonogram sonogram)
{
double[,] spectrogramData = sonogram.Data;
int colCount = spectrogramData.GetLength(1);
//int colCount = spectrogramData.GetLength(1);

// The following freq bins are used to demarcate freq bands for spectral tests below.
// The hertz values are hard coded but could be included in the config.yml file.
Expand All @@ -254,7 +240,7 @@ private static List<AcousticEvent> FilterEventsForSpectralProfile(List<AcousticE
foreach (AcousticEvent ae in events)
{
int startFrame = ae.Oblong.RowTop;
int endFrame = ae.Oblong.RowBottom;
//int endFrame = ae.Oblong.RowBottom;

// get all the frames of the acoustic event
//var subMatrix = DataTools.Submatrix(spectrogramData, startFrame, 0, endFrame, colCount - 1);
Expand All @@ -265,10 +251,9 @@ private static List<AcousticEvent> FilterEventsForSpectralProfile(List<AcousticE
var normalisedSpectrum = DataTools.normalise(spectrum);
normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11);
var maxId = DataTools.GetMaxIndex(normalisedSpectrum);
var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth);
//var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth);

// Do TESTS to determine if event has spectrum matching a Flying fox.

// Test 1: Spectral maximum should be below 4 kHz.
bool passTest1 = maxId < fourKiloHzBin;

Expand Down Expand Up @@ -334,15 +319,16 @@ private static RecognizerResults WingBeats(AudioRecording audioRecording, Config
int maxHz = profile.GetIntOrNull(AnalysisKeys.MaxHz) ?? 3000;
double minDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MinDuration) ?? 1.0;
double maxDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MaxDuration) ?? 10.0;
double decibelThreshold = profile.GetDoubleOrNull("DecibelThreshold") ?? 6.0;
double dctDuration = profile.GetDoubleOrNull("DctDuration") ?? 1.0;
double dctThreshold = profile.GetDoubleOrNull("DctThreshold") ?? 0.5;
double minOscilFreq = profile.GetDoubleOrNull("MinOscilFreq") ?? 4.0;
double maxOscilFreq = profile.GetDoubleOrNull("MaxOscilFreq") ?? 6.0;
double minOscFreq = profile.GetDoubleOrNull("MinOscilFreq") ?? 4.0;
double maxOscFreq = profile.GetDoubleOrNull("MaxOscilFreq") ?? 6.0;
double eventThreshold = profile.GetDoubleOrNull("EventThreshold") ?? 0.3;

//######################

//2. Don't use samples in this recogniser.
//2. Don't use samples in this recognizer.
//var samples = audioRecording.WavReader.Samples;
//Instead, convert each segment to a spectrogram.
var sonogram = GetSonogram(configuration, audioRecording);
Expand All @@ -355,9 +341,10 @@ private static RecognizerResults WingBeats(AudioRecording audioRecording, Config
(SpectrogramStandard)sonogram,
minHz,
maxHz,
decibelThreshold,
dctDuration,
(int)Math.Floor(minOscilFreq),
(int)Math.Floor(maxOscilFreq),
(int)Math.Floor(minOscFreq),
(int)Math.Floor(maxOscFreq),
dctThreshold,
eventThreshold,
minDurationSeconds,
Expand Down Expand Up @@ -396,7 +383,6 @@ private static RecognizerResults WingBeats(AudioRecording audioRecording, Config
*/

// prepare plots
double decibelThreshold = 12.0;
double intensityNormalisationMax = 3 * decibelThreshold;
var normThreshold = decibelThreshold / intensityNormalisationMax;
var normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, 0, intensityNormalisationMax);
Expand Down Expand Up @@ -459,7 +445,7 @@ internal static BaseSonogram GetSonogram(Config configuration, AudioRecording au
{
var sonoConfig = new SonogramConfig
{
WindowSize = 512,
WindowSize = DefaultWindow,
NoiseReductionType = NoiseReductionType.Standard,
NoiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.0,
WindowOverlap = 0.0,
Expand Down
30 changes: 23 additions & 7 deletions src/AudioAnalysisTools/Oscillations2019.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
namespace AudioAnalysisTools
{
using System;
using System.CodeDom;
using System.Collections.Generic;
using AudioAnalysisTools.DSP;
using AudioAnalysisTools.StandardSpectrograms;
Expand All @@ -23,6 +22,7 @@ public static void Execute(
SpectrogramStandard sonogram,
int minHz,
int maxHz,
double decibelThreshold,
double dctDuration,
int minOscFreq,
int maxOscFreq,
Expand All @@ -41,9 +41,26 @@ public static void Execute(
// extract array of decibel values, frame averaged over required frequency band
var decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, sonogram.NyquistFrequency);

// if first value is negative dB, this means noise removal was not done.
// Do noise removal now
//if (decibelArray[0] < 0.0)
//{
// NoiseRemovalModal.CalculateNoiseUsingLamelsAlgorithm(decibelArray, out double _, out double _, out double noiseMode, out double _);
// decibelArray = SNR.SubtractAndTruncate2Zero(decibelArray, noiseMode);
//}

//DETECT OSCILLATIONS
var framesPerSecond = sonogram.FramesPerSecond;
DetectOscillations(decibelArray, framesPerSecond, dctDuration, minOscFreq, maxOscFreq, dctThreshold, out dctScores, out var oscFreq);
DetectOscillations(
decibelArray,
framesPerSecond,
decibelThreshold,
dctDuration,
minOscFreq,
maxOscFreq,
dctThreshold,
out dctScores,
out var oscFreq);

// smooth the scores - window=11 has been the DEFAULT. Now letting user set this.
dctScores = DataTools.filterMovingAverage(dctScores, smoothingWindow);
Expand All @@ -68,6 +85,7 @@ public static void Execute(
/// </summary>
/// <param name="ipArray">an array of decibel values.</param>
/// <param name="framesPerSecond">the frame rate.</param>
/// <param name="decibelThreshold">Ignore frames below this threshold.</param>
/// <param name="dctDuration">Duration in seconds of the required DCT.</param>
/// <param name="minOscFreq">minimum oscillation frequency.</param>
/// <param name="maxOscFreq">maximum oscillation frequency.</param>
Expand All @@ -77,6 +95,7 @@ public static void Execute(
public static void DetectOscillations(
double[] ipArray,
double framesPerSecond,
double decibelThreshold,
double dctDuration,
double minOscFreq,
double maxOscFreq,
Expand All @@ -87,9 +106,6 @@ public static void DetectOscillations(
int dctLength = (int)Math.Round(framesPerSecond * dctDuration);
int minIndex = (int)(minOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
int maxIndex = (int)(maxOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
double dbThreshold = 6;
//double midOscFreq = minOscFreq + ((maxOscFreq - minOscFreq) / 2);

if (maxIndex > dctLength)
{
LoggedConsole.WriteWarnLine("MaxIndex > DCT length. Therefore set maxIndex = DCT length.");
Expand All @@ -98,7 +114,7 @@ public static void DetectOscillations(

int length = ipArray.Length;
dctScores = new double[length];
oscFreq = new double[length]; //TODO TODO
oscFreq = new double[length];

//set up the cosine coefficients
double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);
Expand All @@ -116,7 +132,7 @@ public static void DetectOscillations(
}

// only stop if current location is a peak
if (ipArray[r] < dbThreshold)
if (ipArray[r] < decibelThreshold)
{
continue;
}
Expand Down

0 comments on commit b31598b

Please sign in to comment.