Skip to content

Commit

Permalink
Work on accuracy of Boobook Owl recognizer
Browse files Browse the repository at this point in the history
Issue #319 Reduce the number of FPs with Boobook recognizer on the noise data set. Also simplify the name of some post-processing methods.
  • Loading branch information
towsey committed May 25, 2020
1 parent 2f4fe03 commit 2b37c86
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,39 @@ Profiles:
#CombineOverlappingEvents: false

# 2: Combine each pair of Boobook syllables as one event
# Can also use this to "mop up" events in neighbourhood - these can be removed later.
CombinePossibleSyllableSequence: true
SyllableStartDifference: 0.6
SyllableHertzGap: 350

# 3: Combine events that are likely to be repetitions of the same syllable.
CombineProximalSimilarEvents: false
# B: Select or remove event types.
# 3: Select wanted event types.
#SelectEventsOfType: !ChirpEvent

# B: Remaining post-processing steps are to filter out over/undersize events
# 4: Filter the events for duration in seconds
# 4: Remove unwanted event types.
#RemoveEventsOfType: !ChirpEvent

# C: Remaining post-processing steps are to filter out over and/or undersized events
# 5: Filter the events for duration in seconds
RemoveEventsHavingWrongDuration: true

# 5: Filter the events for bandwidth in Hertz
# 6: Filter the events for bandwidth in Hertz
RemoveEventsHavingWrongBandwidth: true

# 7: Filter the events for excess activity in upper buffer zone
RemoveEventsHavingExcessActivityInUpperBufferZone: true

# 8: Filter the events for excess activity in lower buffer zone
RemoveEventsHavingExcessActivityInLowerBufferZone: true

# C: Options to save results files
# 6: Available options for saving data files (case-sensitive): [False/Never | True/Always | WhenEventsDetected]
# 9: Available options for saving data files (case-sensitive): [False/Never | True/Always | WhenEventsDetected]
SaveIntermediateWavFiles: Never
SaveIntermediateCsvFiles: false
# Available options (case-sensitive): [False/Never | True/Always | WhenEventsDetected]
# "True" is useful when debugging but "WhenEventsDetected" is required for operational use.

# 7: Available options for saving
# 10: Available options for saving
#SaveSonogramImages: True
SaveSonogramImages: WhenEventsDetected
# DisplayCsvImage is obsolete - ensure it remains set to: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public override RecognizerResults Recognize(
var spectralEvents = events.Cast<SpectralEvent>().ToList();
var startDiff = genericConfig.SyllableStartDifference;
var hertzDiff = genericConfig.SyllableHertzGap;
newEvents = CompositeEvent.CombineSimilarProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
newEvents = CompositeEvent.CombineProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
}
else
{
Expand Down
32 changes: 28 additions & 4 deletions src/AnalysisPrograms/Recognizers/Birds/NinoxBoobook.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,6 @@ public override RecognizerResults Recognize(

// ################### DO POST-PROCESSING of EVENTS ###################

var configuration = (GenericRecognizerConfig)genericConfig;
var chirpConfig = (ForwardTrackParameters)configuration.Profiles["BoobookSyllable"];

// 1: Pull out the chirp events for possible combining.
var (chirpEvents, others) = combinedResults.NewEvents.FilterForEventType<ChirpEvent, EventCommon>();

Expand All @@ -122,13 +119,17 @@ public override RecognizerResults Recognize(
if (genericConfig.CombinePossibleSyllableSequence)
{
// Convert events to spectral events for combining of possible sequences.
// Can also use this parameter to combine events that are in the upper or lower neighbourhood.
// Such combinations will increase bandwidth of the event and this property can be used later to weed out unlikely events.
var spectralEvents = combinedEvents.Cast<SpectralEvent>().ToList();
var startDiff = genericConfig.SyllableStartDifference;
var hertzDiff = genericConfig.SyllableHertzGap;
combinedEvents = CompositeEvent.CombineSimilarProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
combinedEvents = CompositeEvent.CombineProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
}

// 5: Filter the events for duration in seconds
var configuration = (GenericRecognizerConfig)genericConfig;
var chirpConfig = (ForwardTrackParameters)configuration.Profiles["BoobookSyllable"];
var minimumEventDuration = chirpConfig.MinDuration;
var maximumEventDuration = chirpConfig.MaxDuration;
if (genericConfig.CombinePossibleSyllableSequence)
Expand All @@ -145,6 +146,29 @@ public override RecognizerResults Recognize(
double sigmaThreshold = 3.0;
combinedResults.NewEvents = SpectralEvent.FilterOnBandwidth(combinedResults.NewEvents, average, sd, sigmaThreshold);

// 7 and 8: Finally, filter events on the amount of acoustic activity in their upper and lower neighbourhoods - their buffer zone.
// The idea is that an unambiguous event should have some acoustic space above and below.
// Here it is assumed that the average acoustic activity in the upper and lower buffer zones should not exceed the user specified decibel threshold.
// The size of the neighbourhood is determined by the following two parameters.
// ################# These parameters should be specified by user in config.yml file.
var upperHertzBuffer = 300;
var lowerHertzBuffer = 100;
var sonogramData = combinedResults.Sonogram.Data;

var converter = new UnitConverters(
segmentStartOffset: segmentStartOffset.TotalSeconds,
sampleRate: combinedResults.Sonogram.SampleRate,
frameSize: combinedResults.Sonogram.Configuration.WindowSize,
frameOverlap: combinedResults.Sonogram.Configuration.WindowOverlap);

// 7: Filter
var spectralEvents2 = combinedResults.NewEvents.Cast<SpectralEvent>().ToList();
combinedResults.NewEvents = SpectralEvent.FilterEventsOnUpperNeighbourhood(spectralEvents2, sonogramData, upperHertzBuffer, converter, chirpConfig.DecibelThreshold.Value);

// 8: Filter
spectralEvents2 = combinedResults.NewEvents.Cast<SpectralEvent>().ToList();
combinedResults.NewEvents = SpectralEvent.FilterEventsOnLowerNeighbourhood(spectralEvents2, sonogramData, lowerHertzBuffer, converter, chirpConfig.DecibelThreshold.Value);

//UNCOMMENT following line if you want special debug spectrogram, i.e. with special plots.
// NOTE: Standard spectrograms are produced by setting SaveSonogramImages: "True" or "WhenEventsDetected" in UserName.SpeciesName.yml config file.
//GenericRecognizer.SaveDebugSpectrogram(territorialResults, genericConfig, outputDirectory, audioRecording.BaseName);
Expand Down
2 changes: 1 addition & 1 deletion src/AnalysisPrograms/Recognizers/PhascolarctosCinereus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public override RecognizerResults Recognize(
spectralEvents = newEvents.Cast<SpectralEvent>().ToList();
var startDiff = genericConfig.SyllableStartDifference;
var hertzDiff = genericConfig.SyllableHertzGap;
newEvents = CompositeEvent.CombineSimilarProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
newEvents = CompositeEvent.CombineProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
}

combinedResults.NewEvents = newEvents;
Expand Down
53 changes: 43 additions & 10 deletions src/AudioAnalysisTools/Events/SpectralEvent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ public static List<EventCommon> FilterOnBandwidth(List<EventCommon> events, doub
{
var minHertzWidth = average - (sd * sigmaThreshold);
var maxHertzWidth = average + (sd * sigmaThreshold);
return FilterOnBandwidth(events, minHertzWidth, maxHertzWidth);
}

public static List<EventCommon> FilterOnBandwidth(List<EventCommon> events, double minHertzWidth, double maxHertzWidth)
{
var outputEvents = new List<EventCommon>();
foreach (var ev in events)
{
Expand Down Expand Up @@ -142,13 +147,14 @@ public static List<EventCommon> FilterOnDuration(List<EventCommon> events, doubl
/// </summary>
/// <param name="ev">The event.</param>
/// <param name="sonogramData">The spectrogram data as matrix with origin top/left.</param>
/// <param name="bufferBins">THe badnwidth of the buffer zone in bins.</param>
/// <param name="bufferHertz">THe bandwidth of the buffer zone in Hertz.</param>
/// <param name="converter">A converter to convert seconds/Hertz to frames/bins.</param>
/// <returns>Average of the spectrogram amplitude in buffer band above the event.</returns>
public static double GetAverageAmplitudeInUpperNeighbourhood(SpectralEvent ev, double[,] sonogramData, int bufferBins, UnitConverters converter)
public static double GetAverageAmplitudeInUpperNeighbourhood(SpectralEvent ev, double[,] sonogramData, double bufferHertz, UnitConverters converter)
{
// allow a gap of three bins above the event.
int gap = 3;
int gap = 2;
var bufferBins = (int)Math.Round(bufferHertz / converter.HertzPerFreqBin);
var bottomBufferBin = converter.GetFreqBinFromHertz(ev.HighFrequencyHertz) + gap;
var topBufferBin = bottomBufferBin + bufferBins;
var frameStart = converter.FrameFromStartTime(ev.EventStartSeconds);
Expand All @@ -165,14 +171,15 @@ public static double GetAverageAmplitudeInUpperNeighbourhood(SpectralEvent ev, d
/// </summary>
/// <param name="ev">The event.</param>
/// <param name="sonogramData">The spectrogram data as matrix with origin top/left.</param>
/// <param name="bufferBins">The bandwidth of the buffer zone in bins.</param>
/// <param name="bufferHertz">The bandwidth of the buffer zone in bins.</param>
/// <param name="converter">A converter to convert seconds/Hertz to frames/bins.</param>
/// <returns>Average of the spectrogram amplitude in buffer band below the event.</returns>
public static double GetAverageAmplitudeInLowerNeighbourhood(SpectralEvent ev, double[,] sonogramData, int bufferBins, UnitConverters converter)
public static double GetAverageAmplitudeInLowerNeighbourhood(SpectralEvent ev, double[,] sonogramData, double bufferHertz, UnitConverters converter)
{
int gap = 1;
var bufferBins = (int)Math.Round(bufferHertz / converter.HertzPerFreqBin);
var topBufferBin = converter.GetFreqBinFromHertz(ev.LowFrequencyHertz) - gap;
var bottomBufferBin = topBufferBin - bufferBins;
var bottomBufferBin = topBufferBin - bufferBins + 1;
bottomBufferBin = Math.Max(0, bottomBufferBin);
var frameStart = converter.FrameFromStartTime(ev.EventStartSeconds);
var frameEnd = converter.FrameFromStartTime(ev.EventEndSeconds);
Expand All @@ -191,18 +198,44 @@ public static double GetAverageAmplitudeInLowerNeighbourhood(SpectralEvent ev, d
/// <param name="bufferHertz">The band width of the required buffer. 300-500Hz is often appropriate.</param>
/// <param name="converter">Converts sec/Hz to frame/bin.</param>
/// <returns>A list of filtered events.</returns>
public static List<EventCommon> FilterEventsOnUpperNeighbourhood(List<SpectralEvent> events, double[,] sonogramData, int bufferHertz, UnitConverters converter, double decibelThreshold)
public static List<EventCommon> FilterEventsOnUpperNeighbourhood(List<SpectralEvent> events, double[,] sonogramData, double bufferHertz, UnitConverters converter, double decibelThreshold)
{
var bufferBins = (int)Math.Round(bufferHertz / converter.HertzPerFreqBin);
var filteredEvents = new List<EventCommon>();
foreach (var ev in events)
{
var avUpperNhAmplitude = SpectralEvent.GetAverageAmplitudeInUpperNeighbourhood((SpectralEvent)ev, sonogramData, bufferBins, converter);
var avUpperNhAmplitude = SpectralEvent.GetAverageAmplitudeInUpperNeighbourhood((SpectralEvent)ev, sonogramData, bufferHertz, converter);
Console.WriteLine($"################################### Buffer Average decibels = {avUpperNhAmplitude}");

if (avUpperNhAmplitude < decibelThreshold)
{
// There is little acoustic activity in the buffer zone above the chirp. It is likely to be a chirp.
// There is little acoustic activity in the designated frequency band above the event. It is likely to be a discrete event.
filteredEvents.Add(ev);
}
}

return filteredEvents;
}

/// <summary>
/// Removes events from a list of events that contain excessive noise in the upper neighbourhood.
/// Excess noise can indicate that this is not a legitimate event.
/// </summary>
/// <param name="events">A list of spectral events.</param>
/// <param name="sonogramData">A matrix of the spectrogram in which event occurs.</param>
/// <param name="bufferHertz">The band width of the required buffer. 300-500Hz is often appropriate.</param>
/// <param name="converter">Converts sec/Hz to frame/bin.</param>
/// <returns>A list of filtered events.</returns>
public static List<EventCommon> FilterEventsOnLowerNeighbourhood(List<SpectralEvent> events, double[,] sonogramData, double bufferHertz, UnitConverters converter, double decibelThreshold)
{
var filteredEvents = new List<EventCommon>();
foreach (var ev in events)
{
var avLowerNhAmplitude = SpectralEvent.GetAverageAmplitudeInLowerNeighbourhood((SpectralEvent)ev, sonogramData, bufferHertz, converter);
Console.WriteLine($"################################### Buffer Average decibels = {avLowerNhAmplitude}");

if (avLowerNhAmplitude < decibelThreshold)
{
// There is little acoustic activity in the designated frequency band below the event. It is likely to be a discrete event.
filteredEvents.Add(ev);
}
}
Expand Down
25 changes: 17 additions & 8 deletions src/AudioAnalysisTools/Events/Types/CompositeEvent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,10 @@ public static List<EventCommon> CombineOverlappingEvents(List<EventCommon> event

/// <summary>
/// Combines events that have similar bottom and top frequency bounds and whose start times are within the passed time range.
/// NOTE: Proximal means (1) that the event starts are close to one another and (2) the events occupy a SIMILAR frequency band.
/// NOTE: This method is used to combine events that are likely to be a syllable sequence within the same call.
/// </summary>
public static List<EventCommon> CombineSimilarProximalEvents(List<SpectralEvent> events, TimeSpan startDifference, int hertzDifference)
public static List<EventCommon> CombineProximalEvents(List<SpectralEvent> events, TimeSpan startDifference, int hertzDifference)
{
if (events.Count < 2)
{
Expand All @@ -210,8 +212,9 @@ public static List<EventCommon> CombineSimilarProximalEvents(List<SpectralEvent>
for (int j = i - 1; j >= 0; j--)
{
bool eventStartsAreProximal = Math.Abs(events[i].EventStartSeconds - events[j].EventStartSeconds) < startDifference.TotalSeconds;
bool eventAreInSimilarFreqBand = Math.Abs(events[i].LowFrequencyHertz - events[j].LowFrequencyHertz) < hertzDifference && Math.Abs(events[i].HighFrequencyHertz - events[j].HighFrequencyHertz) < hertzDifference;
if (eventStartsAreProximal && eventAreInSimilarFreqBand)
bool eventMinimaAreSimilar = Math.Abs(events[i].LowFrequencyHertz - events[j].LowFrequencyHertz) < hertzDifference;
bool eventMaximaAreSimilar = Math.Abs(events[i].HighFrequencyHertz - events[j].HighFrequencyHertz) < hertzDifference;
if (eventStartsAreProximal && eventMinimaAreSimilar && eventMaximaAreSimilar)
{
var compositeEvent = CombineTwoEvents(events[i], events[j]);
events[j] = compositeEvent;
Expand All @@ -225,10 +228,15 @@ public static List<EventCommon> CombineSimilarProximalEvents(List<SpectralEvent>
}

/// <summary>
/// Combines events that are possible stacked harmonics, that is, they are coincident (have similar start and end times)
/// AND stacked (their maxima are within the passed frequency gap).
/// Combines events that are possible stacked harmonics or formants.
/// Two conditions apply:
/// (1) the events are coincident (have similar start and end times)
/// (2) the events are stacked (their minima and maxima are within the passed frequency gap).
/// NOTE: The difference between this method and CombineProximalEvents() is that stacked events should have both similar start and similar end times.
/// Having similar start and end times means the events are superimposed in the spectrogram.
/// How closely stacked is determined by the hertzDifference argument. Typicaly, the formant spacing is not large, ~100-200Hz.
/// </summary>
public static List<EventCommon> CombinePotentialStackedTracks(List<SpectralEvent> events, TimeSpan timeDifference, int hertzDifference)
public static List<EventCommon> CombineStackedEvents(List<SpectralEvent> events, TimeSpan timeDifference, int hertzDifference)
{
if (events.Count < 2)
{
Expand All @@ -242,8 +250,9 @@ public static List<EventCommon> CombinePotentialStackedTracks(List<SpectralEvent
bool eventsStartTogether = Math.Abs(events[i].EventStartSeconds - events[j].EventStartSeconds) < timeDifference.TotalSeconds;
bool eventsEndTogether = Math.Abs(events[i].EventEndSeconds - events[j].EventEndSeconds) < timeDifference.TotalSeconds;
bool eventsAreCoincident = eventsStartTogether && eventsEndTogether;
bool eventsAreStacked = Math.Abs(events[i].HighFrequencyHertz - events[j].LowFrequencyHertz) < hertzDifference || Math.Abs(events[j].HighFrequencyHertz - events[i].LowFrequencyHertz) < hertzDifference;
if (eventsAreCoincident && eventsAreStacked)
bool eventMinimaAreSimilar = Math.Abs(events[i].LowFrequencyHertz - events[j].LowFrequencyHertz) < hertzDifference;
bool eventMaximaAreSimilar = Math.Abs(events[i].HighFrequencyHertz - events[j].HighFrequencyHertz) < hertzDifference;
if (eventsAreCoincident && eventMinimaAreSimilar && eventMaximaAreSimilar)
{
var compositeEvent = CombineTwoEvents(events[i], events[j]);
events[j] = compositeEvent;
Expand Down
Loading

0 comments on commit 2b37c86

Please sign in to comment.