Skip to content

Commit

Permalink
Split the GenericRecognizer class
Browse files Browse the repository at this point in the history
Issue #390 Split the GenericRecognizer class into processing and post-processing classes. This was so I could more neatly deal with a change to the use of multiple decibel thresholds.
  • Loading branch information
towsey authored and atruskie committed Nov 1, 2020
1 parent 0d143de commit 40b4825
Show file tree
Hide file tree
Showing 2 changed files with 325 additions and 245 deletions.
300 changes: 55 additions & 245 deletions src/AnalysisPrograms/Recognizers/GenericRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace AnalysisPrograms.Recognizers
using log4net;
using SixLabors.ImageSharp;
using TowseyLibrary;
using static AudioAnalysisTools.Events.Types.EventPostProcessing;
using Path = System.IO.Path;

/// <summary>
Expand All @@ -33,7 +34,7 @@ public class GenericRecognizer : RecognizerBase
{
private static readonly ILog Log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);

private readonly bool combineOverlappedEvents = false;
//private readonly bool combineOverlappedEvents = false;

/// <inheritdoc />
public override string Author => "Ecosounds";
Expand Down Expand Up @@ -119,18 +120,61 @@ public override RecognizerResults Recognize(
DirectoryInfo outputDirectory,
int? imageWidth)
{
// ############################### PRE-PROCESSING: PREPARATION FOR DETECTION OF GENERIC EVENTS ###############################
var configuration = (GenericRecognizerConfig)genericConfig;

if (configuration.Profiles?.Count < 1)
{
throw new ConfigFileException(
"The generic recognizer needs at least one profile set. 0 were found.");
throw new ConfigFileException("The generic recognizer needs at least one profile set. Zero were found.");
}

int count = configuration.Profiles.Count;
var message = $"Found {count} analysis profile(s): " + configuration.Profiles.Keys.Join(", ");
Log.Info(message);

var decibelThresholds = configuration.DecibelThresholds;
message = $"Number of decibel thresholds = {decibelThresholds.Length}: " + decibelThresholds.Join(", ");
Log.Info(message);

// init object to store the combined results from all decibel thresholds.
var combinedResults = new RecognizerResults()
{
Events = new List<AcousticEvent>(),
NewEvents = new List<EventCommon>(),
Hits = null,
ScoreTrack = null,
Plots = new List<Plot>(),
Sonogram = null,
};

foreach (var threshold in decibelThresholds)
{
// ############################### PROCESSING: DETECTION OF GENERIC EVENTS ###############################
var profileResults = RunProfiles(audioRecording, configuration, threshold, segmentStartOffset);

// ############################### POST-PROCESSING OF GENERIC EVENTS ###############################
var postprocessingConfig = configuration.PostProcessing;
profileResults.NewEvents = EventPostProcessing.PostProcessingOfSpectralEvents(profileResults.NewEvents, threshold.Value, postprocessingConfig, profileResults.Sonogram, segmentStartOffset);
Log.Debug($"Event count from all profiles at {threshold} dB threshold = {profileResults.NewEvents.Count}");

// combine the results i.e. add the events list of call events.
combinedResults.NewEvents.AddRange(profileResults.NewEvents);
combinedResults.Plots.AddRange(profileResults.Plots);

// effectively keeps only the *last* sonogram produced
combinedResults.Sonogram = profileResults.Sonogram;
}

combinedResults.NewEvents = CompositeEvent.RemoveEnclosedEvents(combinedResults.NewEvents);
return combinedResults;
}

public static RecognizerResults RunProfiles(
AudioRecording audioRecording,
GenericRecognizerConfig configuration,
double? decibelThreshold,
TimeSpan segmentStartOffset)
{
var allResults = new RecognizerResults()
{
Events = new List<AcousticEvent>(),
Expand Down Expand Up @@ -181,6 +225,7 @@ public override RecognizerResults Recognize(
(spectralEvents, decibelPlots) = OnebinTrackAlgorithm.GetOnebinTracks(
spectrogram,
wp,
decibelThreshold,
segmentStartOffset,
profileName);

Expand Down Expand Up @@ -289,98 +334,6 @@ public override RecognizerResults Recognize(
Log.Debug($"Profile {profileName}: event count = {spectralEvents.Count}");
}

// ############################### POST-PROCESSING OF GENERIC EVENTS ###############################
// The following generic post-processing steps are determined by config settings.
// Step 1: Combine overlapping events - events derived from all profiles.
// Step 2: Combine possible syllable sequences and filter on excess syllable count.
// Step 3: Remove events whose bandwidth is too small or large.
// Step 4: Remove events that have excessive noise in their side-bands.

Log.Debug($"Total event count BEFORE post-processing = {allResults.NewEvents.Count}");
var postprocessingConfig = configuration.PostProcessing;

// 1: Combine overlapping events.
// This will be necessary where many small events have been found - possibly because the dB threshold is set low.
if (postprocessingConfig.CombineOverlappingEvents)
{
allResults.NewEvents = CompositeEvent.CombineOverlappingEvents(allResults.NewEvents.Cast<EventCommon>().ToList());
Log.Debug($"Event count after combining overlapped events = {allResults.NewEvents.Count}");
}

// 2: Combine proximal events, that is, events that may be a sequence of syllables in the same strophe.
// Can also use this parameter to combine events that are in the upper or lower neighbourhood.
// Such combinations will increase bandwidth of the event and this property can be used later to weed out unlikely events.
var sequenceConfig = postprocessingConfig.SyllableSequence;

if (sequenceConfig.NotNull() && sequenceConfig.CombinePossibleSyllableSequence)
{
// Must first convert events to spectral events.
var spectralEvents1 = allResults.NewEvents.Cast<SpectralEvent>().ToList();
var startDiff = sequenceConfig.SyllableStartDifference;
var hertzDiff = sequenceConfig.SyllableHertzGap;
allResults.NewEvents = CompositeEvent.CombineProximalEvents(spectralEvents1, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
Log.Debug($"Event count after combining proximal events = {allResults.NewEvents.Count}");

// Now filter on properties of the sequences which are treated as Composite events.
if (sequenceConfig.FilterSyllableSequence)
{
// filter on number of syllables and their periodicity.
var maxComponentCount = sequenceConfig.SyllableMaxCount;
var period = sequenceConfig.ExpectedPeriod;
var periodSd = sequenceConfig.PeriodStandardDeviation;
allResults.NewEvents = EventFilters.FilterEventsOnSyllableCountAndPeriodicity(allResults.NewEvents, maxComponentCount, period, periodSd);
Log.Debug($"Event count after filtering on periodicity = {allResults.NewEvents.Count}");
}
}

// 3: Filter the events for time duration (seconds)
if (postprocessingConfig.Duration != null)
{
var expectedEventDuration = postprocessingConfig.Duration.ExpectedDuration;
var sdEventDuration = postprocessingConfig.Duration.DurationStandardDeviation;
allResults.NewEvents = EventFilters.FilterOnDuration(allResults.NewEvents, expectedEventDuration, sdEventDuration, sigmaThreshold: 3.0);
Log.Debug($"Event count after filtering on duration = {allResults.NewEvents.Count}");
}

// 4: Filter the events for bandwidth in Hertz
if (postprocessingConfig.Bandwidth != null)
{
var expectedEventBandwidth = postprocessingConfig.Bandwidth.ExpectedBandwidth;
var sdBandwidth = postprocessingConfig.Bandwidth.BandwidthStandardDeviation;
allResults.NewEvents = EventFilters.FilterOnBandwidth(allResults.NewEvents, expectedEventBandwidth, sdBandwidth, sigmaThreshold: 3.0);
Log.Debug($"Event count after filtering on bandwidth = {allResults.NewEvents.Count}");
}

// 5: Filter events on the amount of acoustic activity in their upper and lower sidebands - their buffer zone.
// The idea is that an unambiguous event should have some acoustic space above and below.
// The filter requires that the average acoustic activity in each frame and bin of the upper and lower buffer zones should not exceed the user specified decibel threshold.
var sidebandActivity = postprocessingConfig.SidebandActivity;
if (sidebandActivity != null)
{
var spectralEvents2 = allResults.NewEvents.Cast<SpectralEvent>().ToList();
allResults.NewEvents = EventFilters.FilterEventsOnSidebandActivity(
spectralEvents2,
allResults.Sonogram,
sidebandActivity.LowerHertzBuffer,
sidebandActivity.UpperHertzBuffer,
sidebandActivity.DecibelBuffer,
segmentStartOffset);
Log.Debug($"Event count after filtering on acoustic activity in sidebands = {allResults.NewEvents.Count}");
}

// Write out the events to log.
Log.Debug($"Final event count = {allResults.NewEvents.Count}.");
if (allResults.NewEvents.Count > 0)
{
int counter = 0;
foreach (var ev in allResults.NewEvents)
{
counter++;
var spEvent = (SpectralEvent)ev;
Log.Debug($" Event[{counter}]: Start={spEvent.EventStartSeconds:f1}; Duration={spEvent.EventDurationSeconds:f2}; Bandwidth={spEvent.BandWidthHertz} Hz");
}
}

return allResults;
}

Expand Down Expand Up @@ -432,6 +385,13 @@ private static SonogramConfig ParametersToSonogramConfig(CommonParameters common
/// <inheritdoc cref="GenericRecognizerConfig"/> />
public class GenericRecognizerConfig : RecognizerConfig, INamedProfiles<object>
{
/// <summary>
/// Gets or sets an array of decibel thresholds.
/// Each threshold determines the minimum "loudness" of an event that can be detected.
/// Units are decibels.
/// </summary>
public double?[] DecibelThresholds { get; set; }

/// <inheritdoc />
public Dictionary<string, object> Profiles { get; set; }

Expand All @@ -441,155 +401,5 @@ public class GenericRecognizerConfig : RecognizerConfig, INamedProfiles<object>
/// </summary>
public PostProcessingConfig PostProcessing { get; set; }
}

/// <summary>
/// The properties in this config class are required to combine a sequence of similar syllables into a single event.
/// </summary>
public class PostProcessingConfig
{
/// <summary>
/// Gets or sets a value indicating Whether or not to combine overlapping events.
/// </summary>
public bool CombineOverlappingEvents { get; set; }

/// <summary>
/// Gets or sets the parameters required to combine and filter syllable sequences.
/// </summary>
public SyllableSequenceConfig SyllableSequence { get; set; }

/// <summary>
/// Gets or sets the parameters required to filter events on the acoustic acticity in their sidebands.
/// </summary>
public SidebandConfig SidebandActivity { get; set; }

/// <summary>
/// Gets or sets the parameters required to filter events on their duration.
/// </summary>
public DurationConfig Duration { get; set; }

/// <summary>
/// Gets or sets the parameters required to filter events on their bandwidth.
/// </summary>
public BandwidthConfig Bandwidth { get; set; }
}

/// <summary>
/// The next two properties determine filtering of events based on their duration.
/// </summary>
public class DurationConfig
{
/// <summary>
/// Gets or sets a value indicating the Expected duration of an event.
/// </summary>
public double ExpectedDuration { get; set; }

/// <summary>
/// Gets or sets a value indicating the standard deviation of the expected duration.
/// </summary>
public double DurationStandardDeviation { get; set; }
}

/// <summary>
/// The next two properties determine filtering of events based on their bandwidth.
/// </summary>
public class BandwidthConfig
{
/// <summary>
/// Gets or sets a value indicating the Expected bandwidth of an event.
/// </summary>
public int ExpectedBandwidth { get; set; }

/// <summary>
/// Gets or sets a value indicating the standard deviation of the expected bandwidth.
/// </summary>
public int BandwidthStandardDeviation { get; set; }
}

/// <summary>
/// The properties in this config class are required to filter events based on the amount of acoustic activity in their sidebands.
/// </summary>
public class SidebandConfig
{
/// <summary>
/// Gets or sets a value indicating Whether or not to filter events based on acoustic conctent of upper buffer zone.
/// If value = 0, the upper sideband is ignored.
/// </summary>
public int UpperHertzBuffer { get; set; }

/// <summary>
/// Gets or sets a value indicating Whether or not to filter events based on the acoustic content of their lower buffer zone.
/// If value = 0, the lower sideband is ignored.
/// </summary>
public int LowerHertzBuffer { get; set; }

/// <summary>
/// Gets or sets a value indicating the decibel gap/difference between acoustic activity in the event and in the upper and lower buffer zones.
/// BufferAcousticActivity must be LessThan (EventAcousticActivity - DecibelBuffer)
/// This value is used only if LowerHertzBuffer > 0 OR UpperHertzBuffer > 0.
/// </summary>
public double DecibelBuffer { get; set; }
}

/// <summary>
/// The properties in this config class are required to combine a sequence of similar syllables into a single event.
/// The first three properties concern the combining of syllables into a sequence or stroph.
/// The next four properties concern the filtering/removal of sequences that do not satisfy expected properties.
/// </summary>
public class SyllableSequenceConfig
{
// ################ The first three properties concern the combining of syllables into a sequence or stroph.

/// <summary>
/// Gets or sets a value indicating Whether or not to combine events that constitute a sequence of the same strophe.
/// </summary>
public bool CombinePossibleSyllableSequence { get; set; }

/// <summary>
/// Gets or sets a value indicating the maximum allowable start time gap (seconds) between events within the same strophe.
/// The gap between successive syllables is the "period" of the sequence.
/// This value is used only where CombinePossibleSyllableSequence = true.
/// </summary>
public double SyllableStartDifference { get; set; }

/// <summary>
/// Gets or sets a value indicating the maximum allowable difference (in Hertz) between the frequency bands of two events. I.e. events should be in similar frequency band.
/// NOTE: SIMILAR frequency band means the differences between two top Hertz values and the two low Hertz values are less than hertzDifference.
/// This value is used only where CombinePossibleSyllableSequence = true.
/// </summary>
public double SyllableHertzGap { get; set; }

// ################ The next four properties concern the filtering/removal of sequences that do not satisfy expected properties.

/// <summary>
/// Gets or sets a value indicating Whether or not to remove/filter sequences having incorrect properties.
/// </summary>
public bool FilterSyllableSequence { get; set; }

/// <summary>
/// Gets or sets a value indicating the maximum allowable number of syllables in a sequence.
/// This value is used only where FilterSyllableSequence = true.
/// </summary>
public int SyllableMaxCount { get; set; }

/// <summary>
/// Gets or sets a value indicating the expected periodicity in seconds.
/// This value is used only where FilterSyllableSequence = true.
/// Important Note: This property interacts with SyllableStartDifference.
/// SyllableStartDifference - ExpectedPeriod = 3 x SD of the period.
/// </summary>
public double ExpectedPeriod { get; set; }

/// <summary>
/// Gets a value indicating the stadndard deviation of the expected period in seconds.
/// This value is used only where FilterSyllableSequence = true.
/// Important Note: This property is derived from two of the above properties.
/// SD of the period = (SyllableStartDifference - ExpectedPeriod) / 3.
/// The intent is that the maximum allowable syllable period is the expected value plus three times its standard deviation.
/// </summary>
public double PeriodStandardDeviation
{
get => (this.SyllableStartDifference - this.ExpectedPeriod) / 3;
}
}
}
}
Loading

0 comments on commit 40b4825

Please sign in to comment.