Skip to content

Commit

Permalink
Merge pull request #6 from david-ryan-snyder/chain
Browse files Browse the repository at this point in the history
xvector: Objf and Deriv
  • Loading branch information
danpovey committed Feb 14, 2016
2 parents 598e9b1 + 7596d01 commit 1e1d1ef
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 104 deletions.
4 changes: 2 additions & 2 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2105,11 +2105,11 @@ static void _compute_xvector_objf(const Real* scores, MatrixDim scores_dim,
int32_cuda scores_index = i + j * scores_dim.stride;
Real K = 1.0 / (scores_dim.rows - 2.0);
Real L = scores[scores_index];
if (i < scores_dim.cols && j < scores_dim.rows && i < j) {
if (i < scores_dim.cols && j < scores_dim.rows) {
if (i + 1 == j && i % 2 == 0) {
obfj_terms[scores_index] = log(1.0 + exp(-L));
obfj_derivs[scores_index] = 1.0 / (1.0 + exp(L));
} else if (i != j) {
} else if (i < j) {
obfj_terms[scores_index] = K * log(1.0 + exp(L));
obfj_derivs[scores_index] = -K / (1.0 + exp(-L));
}
Expand Down
34 changes: 18 additions & 16 deletions src/cudamatrix/cu-math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,11 @@ void Copy(const CuMatrixBase<Real> &src, const CuArray<int32> &copy_from_indices
}
}

template<typename Real>
void ComputeXvectorObjfFromScores(const CuMatrixBase<Real> &scores,
CuMatrixBase<Real> *objf_terms,
CuMatrixBase<Real> *objf_derivs) {
void ComputeXvectorObjfFromScores(const CuMatrixBase<BaseFloat> &scores,
CuMatrixBase<BaseFloat> *objf_terms,
CuMatrixBase<BaseFloat> *objf_derivs) {
KALDI_ASSERT(SameDim(*objf_terms, *objf_derivs)
&& SameDim(*objf_terms, scores));
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
Expand All @@ -226,8 +227,19 @@ void ComputeXvectorObjfFromScores(const CuMatrixBase<Real> &scores,
} else
#endif
{
// TODO: Add the CPU version.
KALDI_LOG << "NOT USING CUDA";
int32 num_rows = scores.NumRows();
BaseFloat K = 1.0 / (num_rows - 2.0);
for (int32 i = 0; i < num_rows; i++) {
for (int32 j = i + 1; j < num_rows; j++) {
if (i + 1 == j && i % 2 == 0) {
(*objf_terms)(i, j) = log(1.0 + exp(-scores(i, j)));
(*objf_derivs)(i, j) = 1.0 / (1.0 + exp(scores(i, j)));
} else {
(*objf_terms)(i, j) = K * log(1.0 + exp(scores(i, j)));
(*objf_derivs)(i, j) = -K / (1.0 + exp(-scores(i, j)));
}
}
}
}
}

Expand Down Expand Up @@ -259,16 +271,6 @@ void Randomize(const CuMatrixBase<double> &src,
const CuArray<int32> &copy_from_idx,
CuMatrixBase<double> *tgt);

template
void ComputeXvectorObjfFromScores(const CuMatrixBase<float> &scores,
CuMatrixBase<float> *objf_terms,
CuMatrixBase<float> *objf_derivs);
template
void ComputeXvectorObjfFromScores(const CuMatrixBase<double> &scores,
CuMatrixBase<double> *objf_terms,
CuMatrixBase<double> *objf_derivs);



} //namespace cu

Expand Down
26 changes: 24 additions & 2 deletions src/cudamatrix/cu-math.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,31 @@ void Group2norm(const CuMatrixBase<Real> &src,
int32 group_stride);

/*
TODO: Documentation.
This function is used in computing the objective function and derivatives
in xvector training.
@param [in] scores 'scores' is a symmetric matrix of scores which are to
be interpreted as log-odds (according to the model) of pairs coming from the
same class, so scores(i, j) is the model's log p(same/different) for
elements i and j of the original minibatch of input. We assume that the data
in 'scores' has been arranged in such a way that pairs of indexes of the form
(2k, 2k+1), e.g., (0, 1), (2, 3), (4, 5), etc, are from the same class, but
indexes of any other form, such as (0, 2), (1, 2), etc, are from different
classes.
@param [out] objf_terms 'objf_terms' is a matrix of the same dimension as
'scores' whose elements we will sum to get the objective function for this
minibatch. This function computes the appropriate contributions to the
objective function, as follows.
if i == j:
objf_terms(i, j)== 0 # the same exact element is not scored
elsif i%2 == j%2:
objf_terms(i, j) = log(p(same))
= -log(1 + exp(-scores(i, j))
else:
objf_terms(i, j) = 1 / (scores.NumRows() - 2) * log(p(different))
= -1/(scores.NumRows() - 2) * log(1+exp(scores(i,j))
@param [out] objf_derivs Element (i,j) of this matrix is the derivative
of objf_terms(i,j) with respect to scores(i, j).
*/
template <typename BaseFloat>
void ComputeXvectorObjfFromScores(const CuMatrixBase<BaseFloat> &scores,
CuMatrixBase<BaseFloat> *objf_terms,
CuMatrixBase<BaseFloat> *objf_derivs);
Expand Down
133 changes: 71 additions & 62 deletions src/ivector/xvector-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ void TestGetDeriv(const CuVector<BaseFloat> &v,
const CuVector<BaseFloat> &w, const CuSpMatrix<BaseFloat> &S,
BaseFloat b, bool is_same, BaseFloat similarity_score,
CuVector<BaseFloat> *deriv_v, CuVector<BaseFloat> *deriv_w,
CuVector<BaseFloat> *deriv_S_and_b);
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b);

void TestComputeXvectorObjfAndDeriv(
const CuMatrixBase<BaseFloat> &xvector_pairs,
const CuSpMatrix<BaseFloat> &S,
BaseFloat b, CuMatrixBase<BaseFloat> *deriv_xvector,
CuVector<BaseFloat> *deriv_S_and_b, BaseFloat *tot_objf,
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b, BaseFloat *tot_objf,
BaseFloat *tot_weight);

bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
int32 xvector_dim = RandInt(4, 30),
int32 xvector_dim = RandInt(4, 50),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand All @@ -49,14 +49,15 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
S.Scale(1.0e-01);
BaseFloat b = RandInt(-100, 100) / 10.0,
tot_weight,
tot_objf;
tot_objf,
deriv_b;
int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
CuMatrix<BaseFloat> xvector_pairs(num_rows, xvector_dim, kSetZero),
deriv_xvector(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b(S_dim + 1, kSetZero);
CuVector<BaseFloat> deriv_S(S_dim, kSetZero);
xvector_pairs.SetRandn();
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector,
&deriv_S_and_b, &tot_objf, &tot_weight);
&deriv_S, &deriv_b, &tot_objf, &tot_weight);
CuVector<BaseFloat> deriv_xvector_vec(xvector_dim);

// Sum over the derivatives for xvector input.
Expand All @@ -74,14 +75,12 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
xvector_pairs_p(j, i) += perturb_delta;
xvector_pairs_n(j, i) += -perturb_delta;
}
CuMatrix<BaseFloat> deriv_xvector_tmp(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b_tmp(S_dim + 1, kSetZero);
BaseFloat tot_objf_p;
BaseFloat tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_n, &tot_weight);
BaseFloat tot_objf_p,
tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, NULL,
NULL, NULL, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, NULL,
NULL, NULL, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n)
* 1.0 / (2.0 * perturb_delta);
l2_xvector += pow(deriv_xvector_vec(i) - delta, 2);
Expand All @@ -92,43 +91,42 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
for (int32 i = 0; i < S_dim; i++) {
CuSpMatrix<BaseFloat> S_p(S);
CuSpMatrix<BaseFloat> S_n(S);
S_p.Data()[i] += perturb_delta;
S_n.Data()[i] -= perturb_delta;
CuMatrix<BaseFloat> deriv_xvector_tmp(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b_tmp(S_dim + 1, kSetZero);
BaseFloat tot_objf_p;
BaseFloat tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_n, &tot_weight);
CuSubVector<BaseFloat> S_p_vec(S_p.Data(), S_dim);
CuSubVector<BaseFloat> S_n_vec(S_n.Data(), S_dim);
S_p_vec(i) += perturb_delta;
S_n_vec(i) += -perturb_delta;
BaseFloat tot_objf_p,
tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, NULL,
NULL, NULL, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, NULL,
NULL, NULL, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n)
* 1.0 / (2.0 * perturb_delta);
l2_S += pow(deriv_S_and_b(i) - delta, 2);
l2_S += pow(deriv_S(i) - delta, 2);
}

// Compare the b derivative calculated above with a numerical
// approximation.
BaseFloat b_p = b + perturb_delta;
BaseFloat b_n = b - perturb_delta;
CuMatrix<BaseFloat> deriv_xvector_tmp(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b_tmp(S_dim + 1, kSetZero);
BaseFloat tot_objf_p;
BaseFloat tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n) * 1.0 / (2.0 * perturb_delta);
l2_b = pow(deriv_S_and_b(S_dim) - delta, 2);
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, NULL,
NULL, NULL, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, NULL,
NULL, NULL, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n)
* 1.0 / (2.0 * perturb_delta);
l2_b = pow(deriv_b - delta, 2);
KALDI_ASSERT(l2_xvector < 1.0e-03);
KALDI_ASSERT(l2_S < 1.0e-03);
KALDI_ASSERT(l2_b < 1.0e-03);
return true;
}

bool TestXvectorComputeObjf() {
int32 xvector_dim = RandInt(4, 30),
int32 xvector_dim = RandInt(4, 40),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand All @@ -139,19 +137,21 @@ bool TestXvectorComputeObjf() {
tot_weight,
tot_weight_test,
tot_objf,
tot_objf_test;
tot_objf_test,
deriv_b,
deriv_b_test;
int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
CuMatrix<BaseFloat> xvector_pairs(num_rows, xvector_dim, kSetZero),
deriv_xvector(num_rows, xvector_dim, kSetZero),
deriv_xvector_test(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b(S_dim + 1, kSetZero),
deriv_S_and_b_test(S_dim + 1, kSetZero);
CuVector<BaseFloat> deriv_S(S_dim, kSetZero),
deriv_S_test(S_dim, kSetZero);
xvector_pairs.SetRandn();

ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector,
&deriv_S_and_b, &tot_objf, &tot_weight);
&deriv_S, &deriv_b, &tot_objf, &tot_weight);
TestComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector_test,
&deriv_S_and_b_test, &tot_objf_test, &tot_weight_test);
&deriv_S_test, &deriv_b_test, &tot_objf_test, &tot_weight_test);

CuVector<BaseFloat> deriv_xvector_vec(xvector_dim);
deriv_xvector_vec.AddRowSumMat(1.0, deriv_xvector, 0.0);
Expand All @@ -160,43 +160,50 @@ bool TestXvectorComputeObjf() {

// Verify that the objfs are the same.
KALDI_ASSERT(ApproxEqual(tot_objf, tot_objf_test, 0.001));

// Also verify that the gradients are the same.
for (int32 i = 0; i < deriv_xvector_vec.Dim(); i++)
KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i), deriv_xvector_vec_test(i), 0.001));
for (int32 i = 0; i < deriv_S_and_b.Dim(); i++)
KALDI_ASSERT(ApproxEqual(deriv_S_and_b(i), deriv_S_and_b_test(i), 0.001));
KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i),
deriv_xvector_vec_test(i), 0.001));

// Verify that the S derivates are the same.
for (int32 i = 0; i < deriv_S.Dim(); i++)
KALDI_ASSERT(ApproxEqual(deriv_S(i), deriv_S_test(i), 0.001));

// Verify that the b derivates are the same.
KALDI_ASSERT(ApproxEqual(deriv_b, deriv_b_test, 0.001));
return true;
}

void TestComputeXvectorObjfAndDeriv(
const CuMatrixBase<BaseFloat> &xvector_pairs,
const CuSpMatrix<BaseFloat> &S,
BaseFloat b, CuMatrixBase<BaseFloat> *deriv_xvector,
CuVector<BaseFloat> *deriv_S_and_b, BaseFloat *tot_objf,
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b, BaseFloat *tot_objf,
BaseFloat *tot_weight) {

int32 N = xvector_pairs.NumRows();
BaseFloat same_objf = 0,
diff_objf = 0;
BaseFloat K = 1.0 / (N - 2.0);
int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
CuMatrix<BaseFloat> tmp_deriv(N, xvector_pairs.NumCols()
+ S_dim + 1, kSetZero);
(*deriv_b) = 0;
// Handle portion of the objf corresponding to pairs of xvectors
// from the same classes.
for (int32 i = 0; i < N/2; i++) {
const CuVector<BaseFloat> &v(xvector_pairs.Row(2 * i)),
&w(xvector_pairs.Row(2 * i + 1));
CuVector<BaseFloat> deriv_v,
deriv_w,
deriv_S_and_b_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b);
deriv_S_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b),
deriv_b_part = 0;
same_objf += Log(1 + Exp(-similarity_score));
TestGetDeriv(v, w, S, b, true, similarity_score, &deriv_v,
&deriv_w, &deriv_S_and_b_part);
&deriv_w, &deriv_S_part, &deriv_b_part);
deriv_xvector->Row(2 * i).AddVec(1.0, deriv_v);
deriv_xvector->Row(2 * i + 1).AddVec(1.0, deriv_w);
deriv_S_and_b->AddVec(1.0, deriv_S_and_b_part);
deriv_S->AddVec(1.0, deriv_S_part);
(*deriv_b) += deriv_b_part;
}

// Handle portion of the objf corresponding to pairs of xvectors
Expand All @@ -207,14 +214,16 @@ void TestComputeXvectorObjfAndDeriv(
&w(xvector_pairs.Row(j));
CuVector<BaseFloat> deriv_v,
deriv_w,
deriv_S_and_b_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b);
deriv_S_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b),
deriv_b_part = 0;
diff_objf += Log(1 + Exp(similarity_score));
TestGetDeriv(v, w, S, b, false, similarity_score, &deriv_v,
&deriv_w, &deriv_S_and_b_part);
&deriv_w, &deriv_S_part, &deriv_b_part);
deriv_xvector->Row(i).AddVec(K, deriv_v);
deriv_xvector->Row(j).AddVec(K, deriv_w);
deriv_S_and_b->AddVec(K, deriv_S_and_b_part);
deriv_S->AddVec(K, deriv_S_part);
(*deriv_b) += K * deriv_b_part;
}
}
// Scale the same and different portions of the objective function
Expand All @@ -228,12 +237,12 @@ void TestGetDeriv(const CuVector<BaseFloat> &v,
const CuVector<BaseFloat> &w, const CuSpMatrix<BaseFloat> &S,
BaseFloat b, bool is_same, BaseFloat similarity_score,
CuVector<BaseFloat> *deriv_v, CuVector<BaseFloat> *deriv_w,
CuVector<BaseFloat> *deriv_S_and_b) {
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b) {
int32 d = is_same ? 1 : -1,
S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
deriv_v->Resize(v.Dim(), kSetZero);
deriv_w->Resize(v.Dim(), kSetZero);
deriv_S_and_b->Resize(S_dim + 1, kSetZero);
deriv_S->Resize(S_dim, kSetZero);

// This scalar is common to the different derivatives.
BaseFloat deriv_coef = d * Exp(-1 * d * similarity_score)
Expand All @@ -254,11 +263,10 @@ void TestGetDeriv(const CuVector<BaseFloat> &v,
for (int32 i = 0; i < S.NumCols(); i++)
deriv_S_mat(i, i) = 0.5 * deriv_S_mat(i, i);
CuSubVector<BaseFloat> deriv_S_vec(deriv_S_mat.Data(), S_dim);
CuSubVector<BaseFloat> sub_deriv_S_and_b(*deriv_S_and_b, 0, S_dim);
sub_deriv_S_and_b.AddVec(deriv_coef, deriv_S_vec);
deriv_S->AddVec(deriv_coef, deriv_S_vec);

// Handle derivative with respect to b.
(*deriv_S_and_b)(S_dim) = -deriv_coef;
(*deriv_b) = -deriv_coef;
}

BaseFloat TestSimilarityScore(const CuVector<BaseFloat> &v,
Expand Down Expand Up @@ -286,14 +294,15 @@ void UnitTestXvectorExtractor() {

int main() {
using namespace kaldi;
for (int32 i = 0; i < 2; i++)
for (int32 i = 0; i < 2; i++) {
#if HAVE_CUDA == 1
if (i == 0)
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no");
else
CuDevice::Instantiate().SelectGpuId("yes"); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("yes");
#endif
UnitTestXvectorExtractor();
}
std::cout << "Xvector tests succeeded.\n";
return 0;
}
Loading

0 comments on commit 1e1d1ef

Please sign in to comment.