added ExtractClusteringFeatures and GenerateSpectrograms to MahnooshS…

…andpit
QutEcoacoustics · Sep 4, 2018 · c951ea4 · c951ea4
1 parent cf0c3b9
commit c951ea4
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 55 deletions.
diff --git a/src/AnalysisConfigFiles/FeatureLearningConfig.yml b/src/AnalysisConfigFiles/FeatureLearningConfig.yml
@@ -33,16 +33,16 @@ numFreqBand: 1
 PatchHeight: 1
 
 # the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
-FrameWindowLength : 8 
+FrameWindowLength : 1 
 
 # the step size to make a window of frames
 StepSize : 1
 
 # The number of patches to be selected from each recording of the patch sampling set
-NumRandomPatches: 16
+NumRandomPatches: 1000
 
 # the number of clusters to be generated from the selected patch set
-NumClusters: 64
+NumClusters: 256
 
 
 # Applying noise reduction and whitening if these options are set to 'true'

diff --git a/src/AnalysisPrograms/MahnooshSandpit.cs b/src/AnalysisPrograms/MahnooshSandpit.cs
@@ -15,6 +15,7 @@ namespace AnalysisPrograms
     using Accord.MachineLearning;
     using Accord.Math;
     using Accord.Statistics;
+    using Acoustics.Shared;
     using Acoustics.Shared.ConfigFile;
     using Acoustics.Shared.Csv;
     using AudioAnalysisTools.DSP;
@@ -31,14 +32,14 @@ public class MahnooshSandpit
 
         public void Execute(Arguments arguments)
         {
-            LoggedConsole.WriteLine("feature extraction process");
+            LoggedConsole.WriteLine("feature learning process");
 
-            var inputDir = @"M:\Postdoc\Liz\"; //"@"D:\Mahnoosh\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //
+            var inputDir = @"D:\Mahnoosh\Liz\"; //@"M:\Postdoc\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //
             var resultDir = Path.Combine(inputDir, "FeatureLearning");
-            var inputPath = Path.Combine(inputDir, "PatchSamplingSegments");
+            var inputPath = Path.Combine(inputDir, "TrainSet"); //PatchSamplingSegments //PatchSampling
             var trainSetPath = Path.Combine(inputDir, "TrainSet");
             var testSetPath = Path.Combine(inputDir, "TestSet");
-            var configPath = @"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
+            var configPath = @"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
             var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
             var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
             var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
@@ -137,6 +138,7 @@ public void Execute(Arguments arguments)
 
             // extracting features
             FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir);
+            LoggedConsole.WriteLine("Done...");
             /*
             // check whether there is any file in the folder/subfolders
             if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
@@ -470,26 +472,26 @@ public void Execute(Arguments arguments)
                             var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
                             var normVector = inputVector;
 
-                            
+
                             //if (inputVector.Euclidean() == 0)
                             //{
                                 //LoggedConsole.WriteLine(j.ToString());
                             //}
-                            
+
 
                             // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero.
                             if (inputVector.Euclidean() != 0)
                             {
                                 normVector = ART_2A.NormaliseVector(inputVector);
                             }
 
-                            
+
                             //if (normVector.HasNaN())
                             //{
                                 //var vec = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
                                 //LoggedConsole.WriteLine(j.ToString());
                             //}
-                            
+
 
                             featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                         }
@@ -695,8 +697,7 @@ public void Execute(Arguments arguments)
                             //}
                         //}
                     //}
-                    
-                    
+
                     // Reconstructing the target spectrogram based on clusters' centroids
                     //List<double[,]> convertedSpec = new List<double[,]>();
                     //int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand;
@@ -711,7 +712,6 @@ public void Execute(Arguments arguments)
                     // DO DRAW SPECTROGRAM
                     //var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);
                     //reconstructedSpecImage.Save(outputReSpecImagePath, ImageFormat.Png);
-                    *
                 }
             }
 
@@ -848,6 +848,7 @@ public void Execute(Arguments arguments)
             //*****
             */
         }
+
         /*
         public class FeatureLearningConfig : Config
         {
@@ -933,63 +934,133 @@ public class Arguments : SubCommandBase
         {
             public override Task<int> Execute(CommandLineApplication app)
             {
-                var instance = new MahnooshSandpit();
-                instance.Execute(this);
-                //TestSpectrograms();
+                //var instance = new MahnooshSandpit();
+                //instance.Execute(this);
+                //GenerateSpectrograms();
+                ExtractClusteringFeatures();
 
                 return this.Ok();
             }
         }
 
-        public static void TestSpectrograms()
+        public static void ExtractClusteringFeatures()
         {
-            var recordingPath = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\SM304264_0+1_20160421_094539_37-38min.wav";  // "SM304264_0+1_20160421_004539_47-48min.wav"
-            var resultDir = @"C:\Users\kholghim\Mahnoosh\Liz\SpectrogramTestResults\";
-            var outputAmpSpecImagePath = Path.Combine(resultDir, "AmplitudeSpectrogram.bmp");
-            var outputDecibelSpecImagePath = Path.Combine(resultDir, "DecibelSpectrogram.bmp");
-            var outputEnergySpecImagePath = Path.Combine(resultDir, "EnergySpectrogram.bmp");
-            var outputLogEnergySpecImagePath = Path.Combine(resultDir, "LogEnergySpectrogram.bmp");
-            var outputLinScaImagePath = Path.Combine(resultDir, "LinearScaleSpectrogram.bmp");
-            var outputMelScaImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.bmp");
-            var outputNormalizedImagePath = Path.Combine(resultDir, "NormalizedSpectrogram.bmp");
-            var outputNoiseReducedImagePath = Path.Combine(resultDir, "NoiseReducedSpectrogram.bmp");
-            var outputLogPsdImagePath = Path.Combine(resultDir, "Psd.bmp");
-
-            int nyquist = new AudioRecording(recordingPath).Nyquist; // 11025;
-            int frameSize = 1024;
-            int finalBinCount = 512; //256; //128; //  100; // 40; // 200; //
-            int hertzInterval = 1000;
+            LoggedConsole.WriteLine("feature extraction process");
+            var inputDir = @"D:\Mahnoosh\Liz\"; //@"M:\Postdoc\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //
+            var resultDir = Path.Combine(inputDir, "FeatureLearning");
+            var trainSetPath = Path.Combine(inputDir, "TrainSet");
+            var testSetPath = Path.Combine(inputDir, "TestSet");
+            var configPath = @"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; // @"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
+            var centroidsPath = Path.Combine(resultDir, "ClusterCentroids0.csv");
 
-            //FreqScaleType scaleType = FreqScaleType.Linear;
-            var scaleType = FreqScaleType.Linear;
+            var configFile = configPath.ToFileInfo();
 
-            //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
-            //var fst = freqScale.ScaleType;
+            if (configFile == null)
+            {
+                throw new FileNotFoundException("No config file argument provided");
+            }
+            else if (!configFile.Exists)
+            {
+                throw new ArgumentException($"Config file {configFile.FullName} not found");
+            }
+
+            var configuration = ConfigFile.Deserialize<FeatureLearningSettings>(configFile);
+
+            List<double[][]> centroids = new List<double[][]>();
+            centroids.Add(Csv.ReadMatrixFromCsv<double>(centroidsPath.ToFileInfo(), TwoDimensionalArray.None).ToJagged());
+            FeatureExtraction.UnsupervisedFeatureExtraction(configuration, centroids, testSetPath, resultDir);
+            LoggedConsole.WriteLine("Done...");
+        }
+
+        public static void GenerateSpectrograms()
+        {
+            var recordingDir = @"M:\Postdoc\Liz\SupervisedPatchSamplingSet\Recordings\"; //@"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\SM304264_0+1_20160421_094539_37-38min.wav";  // "SM304264_0+1_20160421_004539_47-48min.wav"
+            var resultDir = @"M:\Postdoc\Liz\SupervisedPatchSamplingSet\";
+
+            // check whether there is any file in the folder/subfolders
+            if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0)
+            {
+                throw new ArgumentException("The folder of recordings is empty...");
+            }
 
+            int frameSize = 1024;
+            int finalBinCount = 256;
+            int hertzInterval = 1000;
+            FreqScaleType scaleType = FreqScaleType.Mel;
             var settings = new SpectrogramSettings()
             {
                 WindowSize = frameSize,
+
+                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
+                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
+                // The "WindowOverlap" is calculated to answer this question
+                // each 24 single-frames duration is equal to 1 second
+                // note that the "WindowOverlap" value should be recalculated if frame size is changed
+                // this has not yet been considered in the Config file!
                 WindowOverlap = 0.1028,
                 DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
-                MelBinCount = 256, //(scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+                MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                 NoiseReductionType = NoiseReductionType.None,
+                NoiseReductionParameter = 0.0,
             };
+            //int minFreqBin = 24; // i.e., 500 Hz
+            //int maxFreqBin = 82; // i.e., 3500 Hz
+            //int numFreqBand = 1;
 
-            var recording = new AudioRecording(recordingPath);
+            //double[,] inputMatrix;
 
-            settings.SourceFileName = recording.BaseName;
-            //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
-
-            var sonogram = new EnergySpectrogram(settings, recording.WavReader);
-            sonogram.Data = MatrixTools.Matrix2LogValues(sonogram.Data);
-            var attributes = new SpectrogramAttributes()
+            foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav"))
             {
-                NyquistFrequency = sonogram.Attributes.NyquistFrequency,
-                Duration = sonogram.Attributes.Duration,
-            };
+                FileInfo fileInfo = filePath.ToFileInfo();
+
+                // process the wav file if it is not empty
+                if (fileInfo.Length != 0)
+                {
+                    var recording = new AudioRecording(filePath);
+                    settings.SourceFileName = recording.BaseName;
+
+                    var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader);
+
+                    var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);
+
+                    // DO RMS NORMALIZATION
+                    //sonogram.Data = SNR.RmsNormalization(sonogram.Data);
+
+                    // DO NOISE REDUCTION
+                    decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
+
+                    // draw the spectrogram
+                    var attributes = new SpectrogramAttributes()
+                    {
+                        NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency,
+                        Duration = decibelSpectrogram.Attributes.Duration,
+                    };
+
+                    Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes);
+                    string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp");
+                    image.Save(pathToSpectrogramFiles, ImageFormat.Bmp);
+
+                    // write the matrix to a csv file
+                    string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv");
+                    Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data);
+
+                    /*
+                    // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
+                    if (minFreqBin != 1 || maxFreqBin != finalBinCount)
+                    {
+                        inputMatrix =
+                            PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
+                    }
+                    else
+                    {
+                        inputMatrix = decibelSpectrogram.Data;
+                    }
 
-            Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes);
-            image.Save(outputLogEnergySpecImagePath, ImageFormat.Bmp);
+                    // creating matrices from different freq bands of the source spectrogram
+                    List<double[,]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
+                    */
+                }
+            }
         }
     }
 }
diff --git a/src/AudioAnalysisTools/DSP/FeatureExtraction.cs b/src/AudioAnalysisTools/DSP/FeatureExtraction.cs
@@ -183,10 +183,11 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
                             similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                         }
 
-                        // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames with
-                        // the length indicated as FrameWindowLength
+                        // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames using
+                        // FrameWindowLength
 
                         // patchId refers to the patch id that has been processed so far according to the step size.
+                        // if we want no overlap between different frame windows, then stepSize = frameWindowLength
                         int patchId = 0;
 
                         // patchCounter refers to the number of patches that has been processed so far accroding to FrameWindowLength.

diff --git a/src/AudioAnalysisTools/DSP/FeatureLearning.cs b/src/AudioAnalysisTools/DSP/FeatureLearning.cs
@@ -147,8 +147,10 @@ public static class FeatureLearning
         }
 
         /// <summary>
-        /// Apply feature learning process on a set of 1-minute recordings that contains the bird call of interest
-        /// in order to build one cluster (supervisedly built cluster!)
+        /// This method is called supervised feature learning because the frames to form a cluster
+        /// have been manually selected from 1-min recordings.
+        /// The input to this methods is a group of frames that contains the bird of interest based on the
+        /// configuration set, i.e., the freq band...
         /// </summary>
         public static List<KmeansClustering.Output> supervisedFeatureLearning(FeatureLearningSettings config,
             string inputPath)