Skip to content

Commit

Permalink
Add support for PCA whitening
Browse files Browse the repository at this point in the history
Add more experiments with different patch size on different frequency bands
  • Loading branch information
mkholghi committed Mar 22, 2018
1 parent 0197ca0 commit ec25c3b
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 7 deletions.
16 changes: 13 additions & 3 deletions src/AudioAnalysisTools/DSP/PatchSampling.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ public static double[][] GetPatches(double[,] spectrogram, int patchWidth, int p
{
double[,] m = new double[patchHeight, patchWidth];

for (int col = 0; col < vector.Length; col += patchWidth)
for (int col = 0; col < vector.Length; col += patchHeight)
{
for (int row = 0; row < patchWidth; row++)
for (int row = 0; row < patchHeight; row++)
{
m[row, col / patchHeight] = vector[col + row];
}
Expand Down Expand Up @@ -206,7 +206,17 @@ public static List<double[,]> GetFreqBandMatrices(double[,] matrix)
colSize = colSize + submat[i].GetLength(1);
}

double[,] matrix = new double[submat[1].GetLength(0), colSize];
//storing the number of rows of each submatrice in an array
int[] noRows = new int[submat.Length];
for (int i = 0; i < submat.Length; i++)
{
noRows[i] = submat[i].GetLength(0);
}

//find the max number of rows from noRows array
int maxRows = noRows.Max();

double[,] matrix = new double[maxRows, colSize];

//might be better way to do this
AddToArray(matrix, submat[0]);
Expand Down
94 changes: 90 additions & 4 deletions tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void Cleanup()
public void TestPcaWhitening()
{
var outputDir = this.outputDirectory;
var resultDir = PathHelper.ResolveAssetPath("PcaWhitening");
var resultDir = PathHelper.ResolveAssetPath("C:\\Users\\kholghim\\Mahnoosh\\PcaWhitening");
var outputLinScaImagePath = Path.Combine(resultDir, "LinearFreqScaleSpectrogram.png");
var outputAmpSpecImagePath = Path.Combine(resultDir, "AmplitudeSpectrogram.png");
var outputNormAmpImagePath = Path.Combine(resultDir, "NormAmplitudeSpectrogram.png");
Expand All @@ -56,7 +56,7 @@ public void TestPcaWhitening()
var outputWhitenedSpectrogramPath = Path.Combine(resultDir, "WhitenedSpectrogram.png");
var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");
var projectionMatrixPath = Path.Combine(resultDir, "ProjectionMatrix");
var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); // resultDir, "20160705_064611_22069.wav"
var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); // "PcaWhitening", "20160705_064611_22069.wav"
var recording = new AudioRecording(recordingPath);

// GENERATE AMPLITUDE SPECTROGRAM
Expand Down Expand Up @@ -205,7 +205,7 @@ public void TestPcaWhitening()
//+++++++++++++++++++++++++++++++++++++++++++++++++Exp2
*/

//+++++++++++++++++++++++++++++++++++++++++++++++++Exp3: different freq bands, different source-target, same patch size
//+++++++++++++++++++++++++++++++++++++++++++++++++Exp3: different freq bands, different source-target, same patch size

//First: creating 3 matrices from 3 different freq bands of the source spectrogram
List<double[,]> allSubmatrices = PatchSampling.GetFreqBandMatrices(sonogram.Data);
Expand All @@ -217,6 +217,50 @@ public void TestPcaWhitening()
List<double[,]> eigenVectors = new List<double[,]>();
List<int> noOfComponents = new List<int>();

//+++++++++++++++++++++++++++++++++++++++++++++++++Exp4: different freq bands, different source-target, different patch size
int freqBandIndex = 0;
while (freqBandIndex < allSubmatrices.Count)
{
if (freqBandIndex == 0) //lower band: patch size is 32-by-8
{
int lowPatchWidth = 8;
int lowPatchHeight = 32;
var randomPatches = PatchSampling.GetPatches(matrices[freqBandIndex], lowPatchWidth, lowPatchHeight, noOfRandomPatches, "random").ToMatrix();
var actual = PcaWhitening.Whitening(randomPatches);
projectionMatrices.Add(actual.Item1);
eigenVectors.Add(actual.Item3);
noOfComponents.Add(actual.Item4);
freqBandIndex++;
}
else
{
if (freqBandIndex == 1) //mid band: patch size is 16-by-16
{
var randomPatches = PatchSampling.GetPatches(matrices[freqBandIndex], patchWidth, patchHeight, noOfRandomPatches, "random").ToMatrix();
var actual = PcaWhitening.Whitening(randomPatches);
projectionMatrices.Add(actual.Item1);
eigenVectors.Add(actual.Item3);
noOfComponents.Add(actual.Item4);
freqBandIndex++;
}
else
{
if (freqBandIndex == 2) //upper band: patch size is 8-by-32
{
int upPatchWidth = 32;
int upPatchHeight = 8;
var randomPatches = PatchSampling.GetPatches(matrices[freqBandIndex], upPatchWidth, upPatchHeight, noOfRandomPatches, "random").ToMatrix();
var actual = PcaWhitening.Whitening(randomPatches);
projectionMatrices.Add(actual.Item1);
eigenVectors.Add(actual.Item3);
noOfComponents.Add(actual.Item4);
freqBandIndex++;
}
}
}
}
//+++++++++++++++++++++++++++++++++++++++++++++++++Exp4: different freq bands, different source-target, different patch size
/*
for (int i = 0; i < allSubmatrices.Count; i++)
{
var randomPatches = PatchSampling.GetPatches(matrices[i], patchWidth, patchHeight, noOfRandomPatches, "random").ToMatrix();
Expand All @@ -225,10 +269,11 @@ public void TestPcaWhitening()
eigenVectors.Add(actual.Item3);
noOfComponents.Add(actual.Item4);
}
*/

//Third: divide the target spectrogram into 3 submatrices with different freq bands.
//divide each submatrix into sequential patches
var recording2Path = PathHelper.ResolveAsset(resultDir, "20160705_064611_22069.wav"); // "Recordings", "BAC2_20071008-085040.wav"
var recording2Path = PathHelper.ResolveAsset("PcaWhitening", "20160705_064611_22069.wav"); // "Recordings", "BAC2_20071008-085040.wav"
var recording2 = new AudioRecording(recording2Path);
var fst2 = FreqScaleType.Linear;
var freqScale2 = new FrequencyScale(fst2);
Expand Down Expand Up @@ -257,12 +302,53 @@ public void TestPcaWhitening()

//Forth: Reconstruct the source matrix with projection matrices
List<double[,]> clearedSubmat = new List<double[,]>();

//+++++++++++++++++++++++++++++++++++++++++++++++++Exp4: different freq bands, different source-target, different patch size
freqBandIndex = 0;
while (freqBandIndex < allSubmatrices2.Count)
{
if (freqBandIndex == 0) //lower band: patch size is 32-by-8
{
int lowPatchWidth = 8;
int lowPatchHeight = 32;
var sequentialPatches = PatchSampling.GetPatches(allSubmatrices2.ToArray()[freqBandIndex], lowPatchWidth, lowPatchHeight, (rows2 / lowPatchHeight) * (allSubmatrices2.ToArray()[freqBandIndex].GetLength(1) / lowPatchWidth), "sequential");
double[,] reconstructedSpec2 = PcaWhitening.ReconstructSpectrogram(projectionMatrices.ToArray()[freqBandIndex], sequentialPatches.ToMatrix(), eigenVectors.ToArray()[freqBandIndex], noOfComponents.ToArray()[freqBandIndex]);
clearedSubmat.Add(PatchSampling.ConvertPatches(reconstructedSpec2, lowPatchWidth, lowPatchHeight, allSubmatrices2.ToArray()[freqBandIndex].GetLength(1)));
freqBandIndex++;
}
else
{
if (freqBandIndex == 1) //mid band: patch size is 16-by-16
{
var sequentialPatches = PatchSampling.GetPatches(allSubmatrices2.ToArray()[freqBandIndex], patchWidth, patchHeight, (rows2 / patchHeight) * (allSubmatrices2.ToArray()[freqBandIndex].GetLength(1) / patchWidth), "sequential");
double[,] reconstructedSpec2 = PcaWhitening.ReconstructSpectrogram(projectionMatrices.ToArray()[freqBandIndex], sequentialPatches.ToMatrix(), eigenVectors.ToArray()[freqBandIndex], noOfComponents.ToArray()[freqBandIndex]);
clearedSubmat.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, allSubmatrices2.ToArray()[freqBandIndex].GetLength(1)));
freqBandIndex++;
}
else
{
if (freqBandIndex == 2) //upper band: patch size is 8-by-32
{
int upPatchWidth = 32;
int upPatchHeight = 8;
var sequentialPatches = PatchSampling.GetPatches(allSubmatrices2.ToArray()[freqBandIndex], upPatchWidth, upPatchHeight, (rows2 / upPatchHeight) * (allSubmatrices2.ToArray()[freqBandIndex].GetLength(1) / upPatchWidth), "sequential");
double[,] reconstructedSpec2 = PcaWhitening.ReconstructSpectrogram(projectionMatrices.ToArray()[freqBandIndex], sequentialPatches.ToMatrix(), eigenVectors.ToArray()[freqBandIndex], noOfComponents.ToArray()[freqBandIndex]);
clearedSubmat.Add(PatchSampling.ConvertPatches(reconstructedSpec2, upPatchWidth, upPatchHeight, allSubmatrices2.ToArray()[freqBandIndex].GetLength(1)));
freqBandIndex++;
}
}
}
}
//+++++++++++++++++++++++++++++++++++++++++++++++++Exp4: different freq bands, different source-target, different patch size

/*
for (int i = 0; i < allSubmatrices2.Count; i++)
{
var sequentialPatches = PatchSampling.GetPatches(allSubmatrices2.ToArray()[i], patchWidth, patchHeight, (rows2 / patchHeight) * (allSubmatrices2.ToArray()[i].GetLength(1) / patchWidth), "sequential");
double[,] reconstructedSpec2 = PcaWhitening.ReconstructSpectrogram(projectionMatrices.ToArray()[i], sequentialPatches.ToMatrix(), eigenVectors.ToArray()[i], noOfComponents.ToArray()[i]);
clearedSubmat.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, allSubmatrices2.ToArray()[i].GetLength(1)));
}
*/

sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(clearedSubmat);
var respecImage2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + fst2.ToString(), freqScale2.GridLineLocations);
Expand Down

0 comments on commit ec25c3b

Please sign in to comment.