Verify the reconstruction using statistical indicators

Dreamtonics · Nov 19, 2017 · cbdcdcd · cbdcdcd
1 parent afd6eb3
commit cbdcdcd
Show file tree

Hide file tree

Showing 4 changed files with 191 additions and 9 deletions.
diff --git a/makefile b/makefile
@@ -32,7 +32,7 @@ test: $(OUT_DIR)/test-structs \
 	$(OUT_DIR)/test-layer0-anasynth
 
 $(OUT_DIR)/test-%: test/test-%.c $(TARGET_A) \
-	  $(LIBPYIN_A) $(LIBGVPS_A) $(CIGLET_O)
+	  $(LIBPYIN_A) $(LIBGVPS_A) $(CIGLET_O) test/verify-utils.h
 	$(CC) $(CFLAGS_DBG) -o $(OUT_DIR)/test-$* test/test-$*.c \
 	  $(TARGET_A) $(LIBPYIN_A) $(LIBGVPS_A) $(CIGLET_O) -lm
 

diff --git a/test/test-dsputils.c b/test/test-dsputils.c
@@ -1,7 +1,7 @@
 #include "../llsm.h"
 #include "../dsputils.h"
 #include "../external/ciglet/ciglet.h"
-#include <assert.h>
+#include "verify-utils.h"
 
 #define assert_equal(a, b) \
   assert(approx_equal(a, b))
@@ -165,6 +165,8 @@ static void test_glottal_model() {
 }
 
 int main() {
+  srand(1);
+  test_empirical_kld();
   test_spectral_envelope();
   test_harmonic_analysis(LLSM_AOPTION_HMPP);
   test_glottal_model();

diff --git a/test/test-layer0-anasynth.c b/test/test-layer0-anasynth.c
@@ -1,6 +1,6 @@
 #include "../llsm.h"
-#include "../external/ciglet/ciglet.h"
 #include "../external/libpyin/pyin.h"
+#include "verify-utils.h"
 
 int main() {
   int fs = 0;
@@ -14,20 +14,25 @@ int main() {
   param.fmin = 50.0;
   param.fmax = 500.0;
   param.trange = 24;
-  param.bias = 10;
+  param.bias = 2;
   param.nf = ceil(fs * 0.025);
   FP_TYPE* f0 = pyin_analyze(param, x, nx, fs, & nfrm);
 
   llsm_aoptions* opt_a = llsm_create_aoptions();
   opt_a -> thop = (FP_TYPE)nhop / fs;
+  llsm_soptions* opt_s = llsm_create_soptions(fs);
   llsm_chunk* chunk = llsm_analyze(opt_a, x, nx, fs, f0, nfrm, NULL);
 
-  // llsm_chunk_tolayer1(chunk, 2048);
-  // llsm_chunk_tolayer0(chunk);
-
-  llsm_soptions* opt_s = llsm_create_soptions(fs);
   llsm_output* out = llsm_synthesize(opt_s, chunk);
-  wavwrite(out -> y, out -> ny, opt_s -> fs, 24, "out.wav");
+  wavwrite(out -> y_noise, out -> ny, opt_s -> fs, 24,
+    "test/test-layer0-noise.wav");
+  wavwrite(out -> y_sin  , out -> ny, opt_s -> fs, 24,
+    "test/test-layer0-sin.wav");
+  wavwrite(out -> y      , out -> ny, opt_s -> fs, 24,
+    "test/test-layer0.wav");
+
+  verify_data_distribution(x, nx, out -> y, out -> ny);
+  verify_spectral_distribution(x, nx, out -> y, out -> ny);
 
   llsm_delete_output(out);
   llsm_delete_chunk(chunk);

diff --git a/test/verify-utils.h b/test/verify-utils.h
@@ -0,0 +1,175 @@
+#include "../external/ciglet/ciglet.h"
+#include <assert.h>
+
+// F. Perez-Cruz, "Kullback-Leibler divergence estimation of continuous
+//   distributions," 2008 IEEE International Symposium on Information
+//   Theory, 2008.
+// Estimates the KL divergence between X and its approximation Y.
+static inline FP_TYPE empirical_kld(FP_TYPE* x, int nx, FP_TYPE* y, int ny) {
+  FP_TYPE* xsorted = sort(x, nx, NULL);
+  FP_TYPE* ysorted = sort(y, ny, NULL);
+  ysorted[0] = xsorted[0];
+  ysorted[ny - 1] = xsorted[nx - 1];
+  FP_TYPE D = 0;
+
+  int xi = 1; int yi = 0;
+  for(int i = 0; i < nx; i ++) {
+    while(yi < ny - 1 && ysorted[yi] < xsorted[i]) yi ++;
+    yi = max(yi, 1);
+    xi = max(i, 1);
+    FP_TYPE d_xi = xsorted[xi] - xsorted[xi - 1];
+    FP_TYPE d_yi = ysorted[yi] - ysorted[yi - 1];
+    d_xi = max(d_xi, 1e-10);
+    d_yi = max(d_yi, 1e-10);
+    D += log(ny * d_yi / nx / d_xi);
+  }
+  free(xsorted); free(ysorted);
+
+  return D / nx - 1.0;
+}
+
+// This is a test for the helper function empirical_kld.
+static inline void test_empirical_kld() {
+  int nx = 100000;
+  int ny = 50000;
+  FP_TYPE* x_samples = calloc(nx, sizeof(FP_TYPE));
+  FP_TYPE* y_samples = calloc(ny, sizeof(FP_TYPE));
+  for(int i = 0; i < nx; i ++)
+    x_samples[i] = randn(1.0, 1.0);
+  for(int i = 0; i < ny; i ++)
+    y_samples[i] = randn(1.0, 1.0);
+
+  FP_TYPE kld1 = empirical_kld(x_samples, nx, y_samples, ny);
+
+  for(int i = 0; i < ny; i ++)
+    y_samples[i] = randn(0.0, 1.0);
+
+  FP_TYPE kld2 = empirical_kld(x_samples, nx, y_samples, ny);
+  assert(kld2 > kld1);
+
+  for(int i = 0; i < ny; i ++)
+    y_samples[i] = randn(1.0, 3.0);
+
+  FP_TYPE kld3 = empirical_kld(x_samples, nx, y_samples, ny);
+  assert(kld3 > kld1);
+
+  for(int i = 0; i < ny; i ++)
+    y_samples[i] = randu();
+
+  FP_TYPE kld4 = empirical_kld(x_samples, nx, y_samples, ny);
+  assert(kld4 > kld1);
+
+  printf("KL Divergences: %.3f(~0) %.3f(~%.3f) %.3f(~%.3f) %.3f\n",
+    kld1,
+    kld2, log(1.0 / 1.0) + (1.0 + 1.0) / 2.0 - 0.5,
+    kld3, log(sqrt(3.0) / 1.0) + (1.0 + 0.0) / 6.0 - 0.5,
+    kld4);
+
+  free(x_samples); free(y_samples);
+}
+
+static inline void dither(FP_TYPE* dst, FP_TYPE* src, int nx) {
+  for(int i = 0; i < nx; i ++)
+    dst[i] = src[i] + randn(0, 1.0) * 1e-4;
+}
+
+static inline void verify_data_distribution(FP_TYPE* x, int nx,
+  FP_TYPE* x_approx, int ny) {
+  FP_TYPE* x_dithered = calloc(nx, sizeof(FP_TYPE));
+  FP_TYPE* y_dithered = calloc(ny, sizeof(FP_TYPE));
+  // This is to prevent numerical overflow when taking the log of a very small
+  //   number in empirical_kld.
+  dither(x_dithered, x, nx);
+  dither(y_dithered, x_approx, ny);
+
+  FP_TYPE kld = empirical_kld(x_dithered, nx, y_dithered, ny);
+  printf("KL Divergence between the original waveform distribution and its "
+    "reconstruction: %f\n", kld);
+  assert(kld < 0.05);
+
+  for(int i = 1; i < nx; i ++)
+    x_dithered[i] = x[i] - x[i - 1] + randn(0, 1.0) * 1e-4;
+  for(int i = 1; i < ny; i ++)
+    y_dithered[i] = x_approx[i] - x_approx[i - 1] + randn(0, 1.0) * 1e-4;
+  kld = empirical_kld(x_dithered, nx, y_dithered, ny);
+  printf("KL Divergence between the original waveform distribution and its "
+    "reconstruction (1st derivative): %f\n", kld);
+  assert(kld < 0.05);
+
+  for(int i = 1; i < nx - 1; i ++)
+    x_dithered[i] = x[i + 1] - 2.0 * x[i] + x[i - 1] + randn(0, 1.0) * 1e-4;
+  for(int i = 1; i < ny - 1; i ++)
+    y_dithered[i] = x_approx[i + 1] - 2.0 * x_approx[i] + x_approx[i - 1] +
+      randn(0, 1.0) * 1e-4;
+  kld = empirical_kld(x_dithered, nx, y_dithered, ny);
+  printf("KL Divergence between the original waveform distribution and its "
+    "reconstruction (2nd derivative): %f\n", kld);
+  assert(kld < 0.05);
+
+  free(x_dithered); free(y_dithered);
+}
+
+static inline FP_TYPE spectral_correlation(FP_TYPE** X, FP_TYPE** Y,
+  int m, int n) {
+  FP_TYPE* X_flat = flatten(X, m, n, sizeof(FP_TYPE));
+  FP_TYPE* Y_flat = flatten(Y, m, n, sizeof(FP_TYPE));
+  FP_TYPE cc = corr(X_flat, Y_flat, m * n);
+  free(X_flat); free(Y_flat);
+  return cc;
+}
+
+static inline void verify_spectral_distribution(FP_TYPE* x, int nx,
+  FP_TYPE* x_approx, int ny) {
+  int nhop = 512;
+  int x_nfrm = nx / nhop;
+  int y_nfrm = ny / nhop;
+  int min_nfrm = min(x_nfrm, y_nfrm);
+  int hop_fc = 4; int zp_fc = 1;
+  int nfft = nhop * hop_fc;
+  FP_TYPE** X = malloc2d(x_nfrm, nfft / 2 + 1, sizeof(FP_TYPE));
+  FP_TYPE** Y = malloc2d(y_nfrm, nfft / 2 + 1, sizeof(FP_TYPE));
+  stft(x       , nx, nhop, x_nfrm, hop_fc, zp_fc, NULL, NULL, X, NULL);
+  stft(x_approx, ny, nhop, y_nfrm, hop_fc, zp_fc, NULL, NULL, Y, NULL);
+
+  FP_TYPE* X_flat = flatten(X, x_nfrm, nfft / 2 + 1, sizeof(FP_TYPE));
+  FP_TYPE* Y_flat = flatten(Y, y_nfrm, nfft / 2 + 1, sizeof(FP_TYPE));
+  for(int i = 0; i < x_nfrm * (nfft / 2 + 1); i ++)
+    X_flat[i] = sqrt(X_flat[i]);
+  for(int i = 0; i < y_nfrm * (nfft / 2 + 1); i ++)
+    Y_flat[i] = sqrt(Y_flat[i]);
+
+  FP_TYPE cc = spectral_correlation(X, Y, min_nfrm, nfft / 2 + 1);
+  printf("Correlation coefficient between the original spectrum "
+    "and its reconstruction: %f\n", cc);
+  assert(cc > 0.95);
+
+  dither(X_flat, X_flat, x_nfrm * (nfft / 2 + 1));
+  dither(Y_flat, Y_flat, y_nfrm * (nfft / 2 + 1));
+  FP_TYPE kld = empirical_kld(X_flat, x_nfrm * (nfft / 2 + 1), Y_flat,
+    y_nfrm * (nfft / 2 + 1));
+  printf("KL Divergence between the original spectral distribution and its "
+    "reconstruction: %f\n", kld);
+  free(X_flat); free(Y_flat);
+  assert(kld < 0.05);
+
+  // 1st derivative
+  for(int j = 0; j < nfft / 2 + 1; j ++)
+    for(int i = 0; i < x_nfrm - 1; i ++)
+      X[i][j] = X[i + 1][j] - X[i][j];
+  for(int j = 0; j < nfft / 2 + 1; j ++)
+    for(int i = 0; i < y_nfrm - 1; i ++)
+      Y[i][j] = Y[i + 1][j] - Y[i][j];
+
+  X_flat = flatten(X, x_nfrm - 1, nfft / 2 + 1, sizeof(FP_TYPE));
+  Y_flat = flatten(Y, y_nfrm - 1, nfft / 2 + 1, sizeof(FP_TYPE));
+  dither(X_flat, X_flat, (x_nfrm - 1) * (nfft / 2 + 1));
+  dither(Y_flat, Y_flat, (y_nfrm - 1) * (nfft / 2 + 1));
+  kld = empirical_kld(X_flat, (x_nfrm - 1) * (nfft / 2 + 1), Y_flat,
+    (y_nfrm - 1) * (nfft / 2 + 1));
+  printf("KL Divergence between the original spectral distribution and its "
+    "reconstruction (1st derivative): %f\n", kld);
+  assert(kld < 0.05);
+
+  free(X_flat); free(Y_flat);
+  free2d(X, x_nfrm); free2d(Y, y_nfrm);
+}