Skip to content

Commit

Permalink
added ExtractClusteringFeatures and GenerateSpectrograms to MahnooshS…
Browse files Browse the repository at this point in the history
…andpit
  • Loading branch information
mkholghi committed Sep 4, 2018
1 parent cf0c3b9 commit c951ea4
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 55 deletions.
6 changes: 3 additions & 3 deletions src/AnalysisConfigFiles/FeatureLearningConfig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ numFreqBand: 1
PatchHeight: 1

# the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
FrameWindowLength : 8
FrameWindowLength : 1

# the step size to make a window of frames
StepSize : 1

# The number of patches to be selected from each recording of the patch sampling set
NumRandomPatches: 16
NumRandomPatches: 1000

# the number of clusters to be generated from the selected patch set
NumClusters: 64
NumClusters: 256


# Applying noise reduction and whitening if these options are set to 'true'
Expand Down
167 changes: 119 additions & 48 deletions src/AnalysisPrograms/MahnooshSandpit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace AnalysisPrograms
using Accord.MachineLearning;
using Accord.Math;
using Accord.Statistics;
using Acoustics.Shared;
using Acoustics.Shared.ConfigFile;
using Acoustics.Shared.Csv;
using AudioAnalysisTools.DSP;
Expand All @@ -31,14 +32,14 @@ public class MahnooshSandpit

public void Execute(Arguments arguments)
{
LoggedConsole.WriteLine("feature extraction process");
LoggedConsole.WriteLine("feature learning process");

var inputDir = @"M:\Postdoc\Liz\"; //"@"D:\Mahnoosh\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //
var inputDir = @"D:\Mahnoosh\Liz\"; //@"M:\Postdoc\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //
var resultDir = Path.Combine(inputDir, "FeatureLearning");
var inputPath = Path.Combine(inputDir, "PatchSamplingSegments");
var inputPath = Path.Combine(inputDir, "TrainSet"); //PatchSamplingSegments //PatchSampling
var trainSetPath = Path.Combine(inputDir, "TrainSet");
var testSetPath = Path.Combine(inputDir, "TestSet");
var configPath = @"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
var configPath = @"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
Expand Down Expand Up @@ -137,6 +138,7 @@ public void Execute(Arguments arguments)

// extracting features
FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir);
LoggedConsole.WriteLine("Done...");
/*
// check whether there is any file in the folder/subfolders
if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
Expand Down Expand Up @@ -470,26 +472,26 @@ public void Execute(Arguments arguments)
var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
var normVector = inputVector;
//if (inputVector.Euclidean() == 0)
//{
//LoggedConsole.WriteLine(j.ToString());
//}
// to avoid vectors with NaN values, only normalize those that their norm is not equal to zero.
if (inputVector.Euclidean() != 0)
{
normVector = ART_2A.NormaliseVector(inputVector);
}
//if (normVector.HasNaN())
//{
//var vec = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
//LoggedConsole.WriteLine(j.ToString());
//}
featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
}
Expand Down Expand Up @@ -695,8 +697,7 @@ public void Execute(Arguments arguments)
//}
//}
//}
// Reconstructing the target spectrogram based on clusters' centroids
//List<double[,]> convertedSpec = new List<double[,]>();
//int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand;
Expand All @@ -711,7 +712,6 @@ public void Execute(Arguments arguments)
// DO DRAW SPECTROGRAM
//var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);
//reconstructedSpecImage.Save(outputReSpecImagePath, ImageFormat.Png);
*
}
}
Expand Down Expand Up @@ -848,6 +848,7 @@ public void Execute(Arguments arguments)
//*****
*/
}

/*
public class FeatureLearningConfig : Config
{
Expand Down Expand Up @@ -933,63 +934,133 @@ public class Arguments : SubCommandBase
{
public override Task<int> Execute(CommandLineApplication app)
{
var instance = new MahnooshSandpit();
instance.Execute(this);
//TestSpectrograms();
//var instance = new MahnooshSandpit();
//instance.Execute(this);
//GenerateSpectrograms();
ExtractClusteringFeatures();

return this.Ok();
}
}

public static void TestSpectrograms()
public static void ExtractClusteringFeatures()
{
var recordingPath = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\SM304264_0+1_20160421_094539_37-38min.wav"; // "SM304264_0+1_20160421_004539_47-48min.wav"
var resultDir = @"C:\Users\kholghim\Mahnoosh\Liz\SpectrogramTestResults\";
var outputAmpSpecImagePath = Path.Combine(resultDir, "AmplitudeSpectrogram.bmp");
var outputDecibelSpecImagePath = Path.Combine(resultDir, "DecibelSpectrogram.bmp");
var outputEnergySpecImagePath = Path.Combine(resultDir, "EnergySpectrogram.bmp");
var outputLogEnergySpecImagePath = Path.Combine(resultDir, "LogEnergySpectrogram.bmp");
var outputLinScaImagePath = Path.Combine(resultDir, "LinearScaleSpectrogram.bmp");
var outputMelScaImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.bmp");
var outputNormalizedImagePath = Path.Combine(resultDir, "NormalizedSpectrogram.bmp");
var outputNoiseReducedImagePath = Path.Combine(resultDir, "NoiseReducedSpectrogram.bmp");
var outputLogPsdImagePath = Path.Combine(resultDir, "Psd.bmp");

int nyquist = new AudioRecording(recordingPath).Nyquist; // 11025;
int frameSize = 1024;
int finalBinCount = 512; //256; //128; // 100; // 40; // 200; //
int hertzInterval = 1000;
LoggedConsole.WriteLine("feature extraction process");
var inputDir = @"D:\Mahnoosh\Liz\"; //@"M:\Postdoc\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //
var resultDir = Path.Combine(inputDir, "FeatureLearning");
var trainSetPath = Path.Combine(inputDir, "TrainSet");
var testSetPath = Path.Combine(inputDir, "TestSet");
var configPath = @"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; // @"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
var centroidsPath = Path.Combine(resultDir, "ClusterCentroids0.csv");

//FreqScaleType scaleType = FreqScaleType.Linear;
var scaleType = FreqScaleType.Linear;
var configFile = configPath.ToFileInfo();

//var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
//var fst = freqScale.ScaleType;
if (configFile == null)
{
throw new FileNotFoundException("No config file argument provided");
}
else if (!configFile.Exists)
{
throw new ArgumentException($"Config file {configFile.FullName} not found");
}

var configuration = ConfigFile.Deserialize<FeatureLearningSettings>(configFile);

List<double[][]> centroids = new List<double[][]>();
centroids.Add(Csv.ReadMatrixFromCsv<double>(centroidsPath.ToFileInfo(), TwoDimensionalArray.None).ToJagged());
FeatureExtraction.UnsupervisedFeatureExtraction(configuration, centroids, testSetPath, resultDir);
LoggedConsole.WriteLine("Done...");
}

public static void GenerateSpectrograms()
{
var recordingDir = @"M:\Postdoc\Liz\SupervisedPatchSamplingSet\Recordings\"; //@"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\SM304264_0+1_20160421_094539_37-38min.wav"; // "SM304264_0+1_20160421_004539_47-48min.wav"
var resultDir = @"M:\Postdoc\Liz\SupervisedPatchSamplingSet\";

// check whether there is any file in the folder/subfolders
if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0)
{
throw new ArgumentException("The folder of recordings is empty...");
}

int frameSize = 1024;
int finalBinCount = 256;
int hertzInterval = 1000;
FreqScaleType scaleType = FreqScaleType.Mel;
var settings = new SpectrogramSettings()
{
WindowSize = frameSize,

// the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
// The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
// The "WindowOverlap" is calculated to answer this question
// each 24 single-frames duration is equal to 1 second
// note that the "WindowOverlap" value should be recalculated if frame size is changed
// this has not yet been considered in the Config file!
WindowOverlap = 0.1028,
DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
MelBinCount = 256, //(scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
NoiseReductionType = NoiseReductionType.None,
NoiseReductionParameter = 0.0,
};
//int minFreqBin = 24; // i.e., 500 Hz
//int maxFreqBin = 82; // i.e., 3500 Hz
//int numFreqBand = 1;

var recording = new AudioRecording(recordingPath);
//double[,] inputMatrix;

settings.SourceFileName = recording.BaseName;
//var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

var sonogram = new EnergySpectrogram(settings, recording.WavReader);
sonogram.Data = MatrixTools.Matrix2LogValues(sonogram.Data);
var attributes = new SpectrogramAttributes()
foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav"))
{
NyquistFrequency = sonogram.Attributes.NyquistFrequency,
Duration = sonogram.Attributes.Duration,
};
FileInfo fileInfo = filePath.ToFileInfo();

// process the wav file if it is not empty
if (fileInfo.Length != 0)
{
var recording = new AudioRecording(filePath);
settings.SourceFileName = recording.BaseName;

var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader);

var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);

// DO RMS NORMALIZATION
//sonogram.Data = SNR.RmsNormalization(sonogram.Data);

// DO NOISE REDUCTION
decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);

// draw the spectrogram
var attributes = new SpectrogramAttributes()
{
NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency,
Duration = decibelSpectrogram.Attributes.Duration,
};

Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes);
string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp");
image.Save(pathToSpectrogramFiles, ImageFormat.Bmp);

// write the matrix to a csv file
string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv");
Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data);

/*
// check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
if (minFreqBin != 1 || maxFreqBin != finalBinCount)
{
inputMatrix =
PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
}
else
{
inputMatrix = decibelSpectrogram.Data;
}
Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes);
image.Save(outputLogEnergySpecImagePath, ImageFormat.Bmp);
// creating matrices from different freq bands of the source spectrogram
List<double[,]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
*/
}
}
}
}
}
5 changes: 3 additions & 2 deletions src/AudioAnalysisTools/DSP/FeatureExtraction.cs
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,11 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
}

// To preserve the temporal information, we can concatenate the similarity vectors of a group of frames with
// the length indicated as FrameWindowLength
// To preserve the temporal information, we can concatenate the similarity vectors of a group of frames using
// FrameWindowLength

// patchId refers to the patch id that has been processed so far according to the step size.
// if we want no overlap between different frame windows, then stepSize = frameWindowLength
int patchId = 0;

// patchCounter refers to the number of patches that has been processed so far accroding to FrameWindowLength.
Expand Down
6 changes: 4 additions & 2 deletions src/AudioAnalysisTools/DSP/FeatureLearning.cs
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,10 @@ public static class FeatureLearning
}

/// <summary>
/// Apply feature learning process on a set of 1-minute recordings that contains the bird call of interest
/// in order to build one cluster (supervisedly built cluster!)
/// This method is called supervised feature learning because the frames to form a cluster
/// have been manually selected from 1-min recordings.
/// The input to this methods is a group of frames that contains the bird of interest based on the
/// configuration set, i.e., the freq band...
/// </summary>
public static List<KmeansClustering.Output> supervisedFeatureLearning(FeatureLearningSettings config,
string inputPath)
Expand Down

0 comments on commit c951ea4

Please sign in to comment.