Finished edits to generic recognizer config

Also attempted to make hi-red indices file optional by default.
QutEcoacoustics · Feb 11, 2021 · 79168e8 · 79168e8
1 parent 85cfe79
commit 79168e8
Show file tree

Hide file tree

Showing 13 changed files with 462 additions and 225 deletions.
diff --git a/docs/guides/Ecosounds.NinoxBoobook.yml b/docs/guides/Ecosounds.NinoxBoobook.yml
@@ -51,36 +51,29 @@ PostProcessing:
         ExpectedPeriod: 0.4
 
     # 3: Remove events whose duration lies outside 3 SDs of an expected value.
-    #Duration:
+    Duration:
         ExpectedDuration: 0.14
         DurationStandardDeviation: 0.01
 
     # 4: Remove events whose bandwidth lies outside 3 SDs of an expected value.
- #   Bandwidth:
+    Bandwidth:
         ExpectedBandwidth: 280
         BandwidthStandardDeviation: 40
 
-    # 5: Filter the events for excess activity in their sidebands, i.e. upper and lower buffer zones
+    # 5: Filter the events for excess activity in their sidebands,
+    # i.e. upper and lower buffer zones
     SidebandActivity:
         LowerHertzBuffer: 150
         UpperHertzBuffer: 400
         MaxAverageSidebandDecibels: 3.0
 
 # Options to save results files
-# 1: Available options for saving spectrograms (case-sensitive): [False/Never | True/Always | WhenEventsDetected]
-# "True" is useful when debugging but "WhenEventsDetected" is required for operational use.
-#SaveSonogramImages: True
+# 1: Available options for saving spectrograms (case-sensitive): [Never | Always | WhenEventsDetected]
+# "Always" can be useful when debugging but "WhenEventsDetected" is a good default.
+#SaveSonogramImages: Always
 SaveSonogramImages: WhenEventsDetected
 
-# 2: Available options for saving data files (case-sensitive): [False/Never | True/Always | WhenEventsDetected]
+# 2: Available options for saving data files (case-sensitive):  [Never | Always | WhenEventsDetected]
 SaveIntermediateWavFiles: Never
-SaveIntermediateCsvFiles: false
-
-# 3: DisplayCsvImage is obsolete - ensure it remains set to: false
-DisplayCsvImage: false
-## End section for AnalyzeLongRecording
-
-# Other config files to reference
-HighResolutionIndicesConfig: "../Towsey.Acoustic.HiResIndicesForRecognisers.yml"
 
 ...
diff --git a/docs/guides/generic_recognizers.md b/docs/guides/generic_recognizers.md
diff --git a/docs/pdf/toc.yml b/docs/pdf/toc.yml
@@ -5,4 +5,6 @@
 - name: Theory
   href: ../theory/toc.yml
 - name: Guides
-  href: ../guides/toc.yml
+  href: ../guides/toc.yml
+- name: Technical
+  href: ../technical.yml
diff --git a/docs/technical/apidoc/AnalyzerConfig.md b/docs/technical/apidoc/AnalyzerConfig.md
@@ -28,4 +28,22 @@ to 22050 samples per second. This has the effect of limiting the maximum frequen
 call. If the target call is in a low frequency band (e.g. < 2kHz), then lower the resample rate to somewhat more than
 twice the maximum frequency of interest. This will reduce processing time and produce better focused spectrograms.
 If you down-sample, you will lose high frequency content. If you up-sample, there will be undefined "noise" in
-spectrograms above the original Nyquist.
+spectrograms above the original Nyquist.
+
+## Saving results
+
+Each of the parameters controls whether extra diagnostic files are saved while doing an analysis.
+
+> [!IMPORTANT]
+> If you are doing a lot of analysis **you'll want to disable** this extra diagnostic output. It will produce files
+> that are in total larger than the input audio data—you'll fill your harddrive quick.
+
+- `SaveSonogramImages` will save a spectrogram for analysis segments (typically one-minute)
+- `SaveIntermediateWavFiles` will save the converted WAVE file used to analyze each segment
+
+Both parameters accept three values:
+
+- `Never`: disables the output.
+- `WhenEventsDetected`: only outputs the spectrogram/WAVE file when an event is found in the current segment.
+  This choice is the most useful for debugging a new recognizer.
+- `Always`: always save the diagnostic files. Don't use this option if you're going to analyze a lot of files
diff --git a/docs/technical/apidoc/CommonParameters.md b/docs/technical/apidoc/CommonParameters.md
@@ -33,3 +33,10 @@ DecibelThresholds:
     - 9.0
     - 12.0
 ```
+
+<figure>
+
+![Common Parameters](~/images/generic_recognizer/Fig2EventParameters.png)
+
+<figcaption>Common parameters for all acoustic events, using an oscillation event as example.</figcaption>
+</figure>
diff --git a/docs/technical/apidoc/GenericRecognizerConfig.md b/docs/technical/apidoc/GenericRecognizerConfig.md
@@ -0,0 +1,13 @@
+---
+uid: AnalysisPrograms.Recognizers.GenericRecognizer.GenericRecognizerConfig
+---
+
+Please refer to the <xref:guides-generic-recognizers> guide.
+
+The basic format of a generic recognizer configuration is a YAML config file
+with two sections: `Profiles` and `PostProcessing`:
+
+```yaml
+Profiles:
+PostProcessing:
+```
diff --git a/docs/technical/apidoc/HarmonicParameters.md b/docs/technical/apidoc/HarmonicParameters.md
@@ -0,0 +1,55 @@
+---
+uid: AnalysisPrograms.Recognizers.Base.HarmonicParameters
+---
+
+## Harmonic Event detection
+
+The algorithm to find harmonic events uses a `discrete cosine transform` or *DCT*. Setting the correct DCT for the target syllable requires additional parameters.
+
+The algorithm to find harmonic events can be visualized as similar to the
+[oscillations algorithm]](xref:AnalysisPrograms.Recognizers.Base.OscillationParameters),
+but rotated by 90 degrees. It uses a DCT oriented in a vertical direction and
+requires similar additional parameters.
+
+```yml
+Profiles:
+    Speech: !HarmonicParameters
+        FrameSize: 512
+        FrameStep: 512
+        # The search band
+        MinHertz: 500          
+        MaxHertz: 5000
+        # Min & max duration for a set of harmonics.
+        MinDuration: 0.2
+        MaxDuration: 1.0        
+        DecibelThreshold: 2.0
+        #  Min & max Hertz gap between harmonics
+        MinFormantGap: 400        
+        MaxFormantGap: 1200
+        DctThreshold: 0.15         
+        # Event threshold - use this to determine FP/FN trade-off.
+        EventThreshold: 0.5
+```
+
+> [!NOTE]
+> The first parameters are common to all events—see
+> <xref:AnalysisPrograms.Recognizers.Base.CommonParameters>.
+> These parameters determine the search band, the allowable event duration and
+> the decibel threshold.
+>
+> The remaining parameters are unique to the harmonic algorithm and
+> determine the search for harmonics.
+
+There are only two parameters that are specific to `Harmonics`,
+`MinFormantGap` and `MaxFormantGap`. These specify the minimum and maximum
+allowed gap (measured in Hertz) between adjacent formants/harmonics. Note that
+for these purposes the terms `harmonic` and `formant` are equivalent.
+By default, the DCT is calculated over all bins in the search band.
+
+The output from a DCT operation is an array of coefficients (taking values in
+`[0, 1]`). The index into the array is the gap between formants and the value
+at that index is the formant amplitude. The index with largest amplitude
+indicates the likely formant gap, but `DctThreshold` sets the minimum
+acceptable amplitude value. Lowering `DctThreshold` increases the likelihood
+that random noise will be accepted as a true set of formants; increasing
+`DctThreshold` increases the likelihood that a target set of formants is rejected.
diff --git a/docs/technical/apidoc/OscillationParameters.md b/docs/technical/apidoc/OscillationParameters.md
@@ -0,0 +1,78 @@
+---
+uid: AnalysisPrograms.Recognizers.Base.OscillationParameters
+---
+
+## Oscillation Event detection
+
+The algorithm to find oscillation events uses a `discrete cosine transform` or *DCT*. Setting the correct DCT for the target syllable requires additional parameters. Here is the `Profiles` declaration in the config file for the _flying fox_. It contains two profiles, the first for a vocalization and the second to detect the rhythmic sound of wing beats as a flying fox takes off or comes in to land.
+
+```yml
+Profiles:
+    Territorial: !BlobParameters
+        ComponentName: TerritorialScreech
+        MinHertz: 800          
+        MaxHertz: 8000
+        MinDuration: 0.15
+        MaxDuration: 0.8
+        DecibelThresholds:
+            - 9.0
+    Wingbeats: !OscillationParameters
+        ComponentName: Wingbeats
+        # The search band
+        MinHertz: 200          
+        MaxHertz: 2000
+        # Min & max duration for sequence of wingbeats.
+        MinDuration: 1.0
+        MaxDuration: 10.0        
+        DecibelThresholds:
+            - 6.0
+        # Wingbeat bounds - oscillations per second       
+        MinOscillationFrequency: 4        
+        MaxOscillationFrequency: 6    
+        # DCT duration in seconds 
+        DctDuration: 0.5
+        # minimum acceptable value of a DCT coefficient
+        DctThreshold: 0.5
+
+        # Event threshold - use this to determine FP/FN trade-off.
+        EventThreshold: 0.5
+```
+
+> [!NOTE]
+> The first parameters are common to all events—see
+> <xref:AnalysisPrograms.Recognizers.Base.CommonParameters>.
+> These parameters determine the search band, the allowable event duration and
+> the decibel threshold.
+>
+> The remaining parameters are unique to the oscillation algorithm and
+> determine the search for oscillations.
+
+`MinOscilFreq` and `MaxOscilFreq` specify the oscillation bounds in beats or
+oscillations per second. These values were established by measuring a sample of
+flying fox wingbeats.
+
+The next two parameters, the DCT duration in seconds and
+the DCT threshold can be tricky to establish but are critical for success.
+The DCT is computationally expensive but for accuracy it needs to span at least
+two or three oscillations. In this case a duration of `0.5` seconds is just enough
+to span at least two oscillations. The output from a DCT operation is an array
+of coefficients (taking values in `[0, 1]`). The index into the array is the
+oscillation rate and the value at that index is the amplitude. The index with
+largest amplitude indicates the likely oscillation rate, but `DctThreshold` sets
+the minimum acceptable amplitude value. Lowering `DctThreshold` increases the
+likelihood that random noise will be accepted as a true oscillation;
+increasing `DctThreshold` increases the likelihood that a target oscillation is
+rejected.
+
+The optimum values for `DctDuration` and `DctThreshold` interact. It requires
+some experimentation to find the best values for your target syllable.
+Experiment with `DctDuration` first while keeping the `DctThreshold` value low.
+Once you have a reliable value for `DctDuration`, gradually increase the value
+for `DctThreshold` until you're no longer detecting target events.
+
+<figure>
+
+![DCT parameters](~/images/generic_recognizer/DCTparameters.jpg)
+
+<figcaption>Figure. Parameters required for using a DCT to detect an oscillation event.</figcaption>
+</figure>
diff --git a/docs/technical/apidoc/PostProcessingConfig.md b/docs/technical/apidoc/PostProcessingConfig.md
@@ -0,0 +1,60 @@
+---
+uid: AudioAnalysisTools.Events.Types.EventPostProcessing.PostProcessingConfig
+---
+
+An example configuration:
+
+[!code-yaml[post_processing](../../guides/Ecosounds.NinoxBoobook.yml#L34-L69 "Post Processing")]
+
+## Combine events
+
+The `CombineOverlappingEvents` parameter is typically set to `true`, but it depends on the target call. You may wish to
+set this true for two reasons:
+
+- the target call is composed of two or more overlapping syllables that you want to join as one event.
+- whistle events often require this step to unite whistle fragments detections into one event.
+
+## Combine syllables
+
+Add a <xref:AudioAnalysisTools.Events.Types.EventPostProcessing.SyllableSequenceConfig> object to combine syllables.
+
+## Filtering
+
+Use the parameter `Duration` to filter out events that are too long or short.
+This filter removes events whose duration lies outside three standard deviations (SDs) of an expected value.
+
+- `ExpectedDuration` defines the _expected_ or _average_ duration (in seconds) for the target events
+- `DurationStandardDeviation` defines _one_ SD of the assumed distribution. Assuming the duration is normally distributed, three SDs sets hard upper and lower duration bounds that includes 99.7% of instances. The filtering algorithm calculates these hard bounds and removes acoustic events that fall outside the bounds.
+
+Use the parameter `Bandwidth` to filter out events whose bandwidth is too small or large.
+This filter removes events whose bandwidth lies outside three standard deviations (SDs) of an expected value.
+
+- `ExpectedBandwidth` defines the _expected_ or _average_ bandwidth (in Hertz) for the target events
+- `BandwidthStandardDeviation` defines one SD of the assumed distribution. Assuming the bandwidth is normally
+  distributed, three SDs sets hard upper and lower bandwidth bounds that includes 99.7% of instances. The filtering
+  algorithm calculates these hard bounds and removes acoustic events that fall outside the bounds.
+
+## Filtering on side band activity
+
+The intuition of this filter is that an unambiguous event should have an "acoustic-free zone" above and below it.
+This filter removes an event that has "excessive" acoustic activity spilling into its sidebands (i.e. upper and lower
+"buffer" zones). These events are likely to be _broadband_ events unrelated to the target event. Since this is a common
+occurrence, this filter is useful.
+
+Use the parameter `SidebandActivity` to enable side band filtering.
+
+`LowerHertzBuffer` and `UpperHertzBuffer` set the width of the sidebands required below and above the target event.
+(These can be also be understood as buffer zones, hence the names assigned to the parameters.)
+
+There are two tests for determining if the sideband activity is excessive:
+
+1. The average decibel value in each sideband should be below the threshold value given by `MaxAverageSidebandDecibels`.
+  The average is taken over all spectrogram cells included in a sideband.
+2. There should be no more than one sideband frequency bin and one sideband timeframe whose average acoustic activity
+  lies within 3 dB of the average acoustic activity in the event. (The averages are over all relevant spectrogram cells.)
+  This covers the possibility that there is an acoustic event concentrated in a few frequency bins or timeframes within
+  a sideband. The 3 dB threshold is a small arbitrary value which seems to work well. It cannot be changed by the user.
+
+> [!TIP]
+> If you do not wish to apply these sideband filters, set `LowerHertzBuffer` and `UpperHertzBuffer` equal to zero.
+>Both sideband tests are applied where the buffer zones are non-zero.
diff --git a/docs/technical/apidoc/SyllableSequenceConfig.md b/docs/technical/apidoc/SyllableSequenceConfig.md
@@ -0,0 +1,19 @@
+---
+uid: AudioAnalysisTools.Events.Types.EventPostProcessing.SyllableSequenceConfig
+---
+
+
+Set `CombinePossibleSyllableSequence` true where you want to combine possible syllable sequences. A typical example is
+a sequence of chirps in a honeyeater call.
+
+`SyllableStartDifference` and `SyllableHertzGap` set the allowed tolerances when combining events into sequences
+
+- `SyllableStartDifference` sets the maximum allowed time difference (in seconds) between the starts of two events
+- `SyllableHertzGap` sets the maximum allowed frequency difference (in Hertz) between the minimum frequencies of two events.
+
+Once you have combined possible sequences, you may wish to remove sequences that do not satisfy the parameters for your
+target call. Set `FilterSyllableSequence` true if you want to filter (remove) sequences that do not fall within the
+constraints defined by `SyllableMaxCount` and `ExpectedPeriod`.
+
+- `SyllableMaxCount` sets an upper limit of the number of events that are combined to form a sequence
+-`ExpectedPeriod` sets a limit on the average period (in seconds) of the combined events.
diff --git a/docs/technical/configs/toc.yml b/docs/technical/configs/toc.yml
@@ -1,4 +1,17 @@
+- name: GenericRecognizerConfig
+  href: xref:AnalysisPrograms.Recognizers.GenericRecognizer.GenericRecognizerConfig
+
 - name: CommonParameters
   href: xref:AnalysisPrograms.Recognizers.Base.CommonParameters
+
+- name: OscillationParameters
+  href: xref:AnalysisPrograms.Recognizers.Base.OscillationParameters
+
+- name: HarmonicParameters
+  href: xref:AnalysisPrograms.Recognizers.Base.HarmonicParameters
+
+- name: HarmonicParameters
+  href: xref:AnalysisPrograms.Recognizers.Base.HarmonicParameters
+
 - name: AnalyzerConfig
   href: xref:AnalysisBase.AnalyzerConfig
diff --git a/src/AnalysisPrograms/Recognizers/Base/RecognizerBase.cs b/src/AnalysisPrograms/Recognizers/Base/RecognizerBase.cs
@@ -37,7 +37,7 @@ public RecognizerConfig()
             {
                 this.Loaded += config =>
                     {
-                        var file = ConfigFile.Resolve(this.HighResolutionIndicesConfig);
+                        var file = ConfigFile.Resolve(this.HighResolutionIndicesConfig ?? "Towsey.Acoustic.HiResIndicesForRecognisers.yml");
                         var indicesConfig = ConfigFile.Deserialize<AcousticIndices.AcousticIndicesConfig>(file);
                         this.HighResolutionIndices = indicesConfig;
                     };

diff --git a/src/AudioAnalysisTools/DctParameters.cs b/src/AudioAnalysisTools/DctParameters.cs
@@ -13,28 +13,38 @@ public abstract class DctParameters : CommonParameters
         /// <summary>
         /// Gets or sets the time duration (in seconds) of a Discrete Cosine Transform.
         /// </summary>
+        /// <value>The duration of the window in seconds.</value>
         public double DctDuration { get; set; } = 1.0;
 
         /// <summary>
         /// Gets or sets the minimum acceptable value of a DCT coefficient.
         /// </summary>
+        /// <remarks>
+        /// Lowering `DctThreshold` increases the likelihood that random noise
+        /// will be accepted as a true oscillation; increasing `DctThreshold`
+        /// increases the likelihood that a target oscillation is rejected.
+        /// </remarks>
+        /// <value>A value representing a minimum amplitude threshold in the range `[0, 1]`.</value>
         public double DctThreshold { get; set; } = 0.5;
 
         /// <summary>
         /// Gets or sets the minimum OSCILLATIONS PER SECOND
-        /// Ignore oscillation rates below the min &amp; above the max threshold.
+        /// Ignore oscillation rates below the min amplitude above the max threshold.
         /// </summary>
+        /// <value>The value in oscillations per second.</value>
         public int MinOscillationFrequency { get; set; }
 
         /// <summary>
         /// Gets or sets the maximum OSCILLATIONS PER SECOND
         /// Ignore oscillation rates below the min &amp; above the max threshold.
         /// </summary>
+        /// <value>The value in oscillations per second.</value>
         public int MaxOscillationFrequency { get; set; }
 
         /// <summary>
         /// Gets or sets the Event threshold - use this to determine FP / FN trade-off for events.
         /// </summary>
+        /// <value>A number between <c>0.0</c> and <c>1.0</c>.</value>
         public double EventThreshold { get; set; } = 0.3;
     }
 }