diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 00d4f8bebf8..41288b411c0 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -2105,11 +2105,11 @@ static void _compute_xvector_objf(const Real* scores, MatrixDim scores_dim, int32_cuda scores_index = i + j * scores_dim.stride; Real K = 1.0 / (scores_dim.rows - 2.0); Real L = scores[scores_index]; - if (i < scores_dim.cols && j < scores_dim.rows && i < j) { + if (i < scores_dim.cols && j < scores_dim.rows) { if (i + 1 == j && i % 2 == 0) { obfj_terms[scores_index] = log(1.0 + exp(-L)); obfj_derivs[scores_index] = 1.0 / (1.0 + exp(L)); - } else if (i != j) { + } else if (i < j) { obfj_terms[scores_index] = K * log(1.0 + exp(L)); obfj_derivs[scores_index] = -K / (1.0 + exp(-L)); } diff --git a/src/cudamatrix/cu-math.cc b/src/cudamatrix/cu-math.cc index d5e9cfb6ef8..005bb3146c0 100644 --- a/src/cudamatrix/cu-math.cc +++ b/src/cudamatrix/cu-math.cc @@ -206,10 +206,11 @@ void Copy(const CuMatrixBase &src, const CuArray ©_from_indices } } -template -void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, - CuMatrixBase *objf_terms, - CuMatrixBase *objf_derivs) { +void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, + CuMatrixBase *objf_terms, + CuMatrixBase *objf_derivs) { + KALDI_ASSERT(SameDim(*objf_terms, *objf_derivs) + && SameDim(*objf_terms, scores)); #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; @@ -226,8 +227,19 @@ void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, } else #endif { - // TODO: Add the CPU version. - KALDI_LOG << "NOT USING CUDA"; + int32 num_rows = scores.NumRows(); + BaseFloat K = 1.0 / (num_rows - 2.0); + for (int32 i = 0; i < num_rows; i++) { + for (int32 j = i + 1; j < num_rows; j++) { + if (i + 1 == j && i % 2 == 0) { + (*objf_terms)(i, j) = log(1.0 + exp(-scores(i, j))); + (*objf_derivs)(i, j) = 1.0 / (1.0 + exp(scores(i, j))); + } else { + (*objf_terms)(i, j) = K * log(1.0 + exp(scores(i, j))); + (*objf_derivs)(i, j) = -K / (1.0 + exp(-scores(i, j))); + } + } + } } } @@ -259,16 +271,6 @@ void Randomize(const CuMatrixBase &src, const CuArray ©_from_idx, CuMatrixBase *tgt); -template -void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, - CuMatrixBase *objf_terms, - CuMatrixBase *objf_derivs); -template -void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, - CuMatrixBase *objf_terms, - CuMatrixBase *objf_derivs); - - } //namespace cu diff --git a/src/cudamatrix/cu-math.h b/src/cudamatrix/cu-math.h index a30cec5d9df..4aaa4ceb29a 100644 --- a/src/cudamatrix/cu-math.h +++ b/src/cudamatrix/cu-math.h @@ -80,9 +80,31 @@ void Group2norm(const CuMatrixBase &src, int32 group_stride); /* -TODO: Documentation. + This function is used in computing the objective function and derivatives + in xvector training. + @param [in] scores 'scores' is a symmetric matrix of scores which are to + be interpreted as log-odds (according to the model) of pairs coming from the + same class, so scores(i, j) is the model's log p(same/different) for + elements i and j of the original minibatch of input. We assume that the data + in 'scores' has been arranged in such a way that pairs of indexes of the form + (2k, 2k+1), e.g., (0, 1), (2, 3), (4, 5), etc, are from the same class, but + indexes of any other form, such as (0, 2), (1, 2), etc, are from different + classes. + @param [out] objf_terms 'objf_terms' is a matrix of the same dimension as + 'scores' whose elements we will sum to get the objective function for this + minibatch. This function computes the appropriate contributions to the + objective function, as follows. + if i == j: + objf_terms(i, j)== 0 # the same exact element is not scored + elsif i%2 == j%2: + objf_terms(i, j) = log(p(same)) + = -log(1 + exp(-scores(i, j)) + else: + objf_terms(i, j) = 1 / (scores.NumRows() - 2) * log(p(different)) + = -1/(scores.NumRows() - 2) * log(1+exp(scores(i,j)) + @param [out] objf_derivs Element (i,j) of this matrix is the derivative + of objf_terms(i,j) with respect to scores(i, j). */ -template void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, CuMatrixBase *objf_terms, CuMatrixBase *objf_derivs); diff --git a/src/ivector/xvector-test.cc b/src/ivector/xvector-test.cc index 229863e820a..ae3b6d7e57b 100644 --- a/src/ivector/xvector-test.cc +++ b/src/ivector/xvector-test.cc @@ -30,17 +30,17 @@ void TestGetDeriv(const CuVector &v, const CuVector &w, const CuSpMatrix &S, BaseFloat b, bool is_same, BaseFloat similarity_score, CuVector *deriv_v, CuVector *deriv_w, - CuVector *deriv_S_and_b); + CuVector *deriv_S, BaseFloat *deriv_b); void TestComputeXvectorObjfAndDeriv( const CuMatrixBase &xvector_pairs, const CuSpMatrix &S, BaseFloat b, CuMatrixBase *deriv_xvector, - CuVector *deriv_S_and_b, BaseFloat *tot_objf, + CuVector *deriv_S, BaseFloat *deriv_b, BaseFloat *tot_objf, BaseFloat *tot_weight); bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) { - int32 xvector_dim = RandInt(4, 30), + int32 xvector_dim = RandInt(4, 50), num_rows = 2 * RandInt(2, 10); // The number of rows must be even // and greater than 2. CuSpMatrix S(xvector_dim); @@ -49,14 +49,15 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) { S.Scale(1.0e-01); BaseFloat b = RandInt(-100, 100) / 10.0, tot_weight, - tot_objf; + tot_objf, + deriv_b; int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2; CuMatrix xvector_pairs(num_rows, xvector_dim, kSetZero), deriv_xvector(num_rows, xvector_dim, kSetZero); - CuVector deriv_S_and_b(S_dim + 1, kSetZero); + CuVector deriv_S(S_dim, kSetZero); xvector_pairs.SetRandn(); ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector, - &deriv_S_and_b, &tot_objf, &tot_weight); + &deriv_S, &deriv_b, &tot_objf, &tot_weight); CuVector deriv_xvector_vec(xvector_dim); // Sum over the derivatives for xvector input. @@ -74,14 +75,12 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) { xvector_pairs_p(j, i) += perturb_delta; xvector_pairs_n(j, i) += -perturb_delta; } - CuMatrix deriv_xvector_tmp(num_rows, xvector_dim, kSetZero); - CuVector deriv_S_and_b_tmp(S_dim + 1, kSetZero); - BaseFloat tot_objf_p; - BaseFloat tot_objf_n; - ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, &deriv_xvector_tmp, - &deriv_S_and_b_tmp, &tot_objf_p, &tot_weight); - ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, &deriv_xvector_tmp, - &deriv_S_and_b_tmp, &tot_objf_n, &tot_weight); + BaseFloat tot_objf_p, + tot_objf_n; + ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, NULL, + NULL, NULL, &tot_objf_p, &tot_weight); + ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, NULL, + NULL, NULL, &tot_objf_n, &tot_weight); BaseFloat delta = (tot_objf_p - tot_objf_n) * 1.0 / (2.0 * perturb_delta); l2_xvector += pow(deriv_xvector_vec(i) - delta, 2); @@ -92,35 +91,34 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) { for (int32 i = 0; i < S_dim; i++) { CuSpMatrix S_p(S); CuSpMatrix S_n(S); - S_p.Data()[i] += perturb_delta; - S_n.Data()[i] -= perturb_delta; - CuMatrix deriv_xvector_tmp(num_rows, xvector_dim, kSetZero); - CuVector deriv_S_and_b_tmp(S_dim + 1, kSetZero); - BaseFloat tot_objf_p; - BaseFloat tot_objf_n; - ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, &deriv_xvector_tmp, - &deriv_S_and_b_tmp, &tot_objf_p, &tot_weight); - ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, &deriv_xvector_tmp, - &deriv_S_and_b_tmp, &tot_objf_n, &tot_weight); + CuSubVector S_p_vec(S_p.Data(), S_dim); + CuSubVector S_n_vec(S_n.Data(), S_dim); + S_p_vec(i) += perturb_delta; + S_n_vec(i) += -perturb_delta; + BaseFloat tot_objf_p, + tot_objf_n; + ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, NULL, + NULL, NULL, &tot_objf_p, &tot_weight); + ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, NULL, + NULL, NULL, &tot_objf_n, &tot_weight); BaseFloat delta = (tot_objf_p - tot_objf_n) * 1.0 / (2.0 * perturb_delta); - l2_S += pow(deriv_S_and_b(i) - delta, 2); + l2_S += pow(deriv_S(i) - delta, 2); } // Compare the b derivative calculated above with a numerical // approximation. BaseFloat b_p = b + perturb_delta; BaseFloat b_n = b - perturb_delta; - CuMatrix deriv_xvector_tmp(num_rows, xvector_dim, kSetZero); - CuVector deriv_S_and_b_tmp(S_dim + 1, kSetZero); BaseFloat tot_objf_p; BaseFloat tot_objf_n; - ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, &deriv_xvector_tmp, - &deriv_S_and_b_tmp, &tot_objf_p, &tot_weight); - ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, &deriv_xvector_tmp, - &deriv_S_and_b_tmp, &tot_objf_n, &tot_weight); - BaseFloat delta = (tot_objf_p - tot_objf_n) * 1.0 / (2.0 * perturb_delta); - l2_b = pow(deriv_S_and_b(S_dim) - delta, 2); + ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, NULL, + NULL, NULL, &tot_objf_p, &tot_weight); + ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, NULL, + NULL, NULL, &tot_objf_n, &tot_weight); + BaseFloat delta = (tot_objf_p - tot_objf_n) + * 1.0 / (2.0 * perturb_delta); + l2_b = pow(deriv_b - delta, 2); KALDI_ASSERT(l2_xvector < 1.0e-03); KALDI_ASSERT(l2_S < 1.0e-03); KALDI_ASSERT(l2_b < 1.0e-03); @@ -128,7 +126,7 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) { } bool TestXvectorComputeObjf() { - int32 xvector_dim = RandInt(4, 30), + int32 xvector_dim = RandInt(4, 40), num_rows = 2 * RandInt(2, 10); // The number of rows must be even // and greater than 2. CuSpMatrix S(xvector_dim); @@ -139,19 +137,21 @@ bool TestXvectorComputeObjf() { tot_weight, tot_weight_test, tot_objf, - tot_objf_test; + tot_objf_test, + deriv_b, + deriv_b_test; int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2; CuMatrix xvector_pairs(num_rows, xvector_dim, kSetZero), deriv_xvector(num_rows, xvector_dim, kSetZero), deriv_xvector_test(num_rows, xvector_dim, kSetZero); - CuVector deriv_S_and_b(S_dim + 1, kSetZero), - deriv_S_and_b_test(S_dim + 1, kSetZero); + CuVector deriv_S(S_dim, kSetZero), + deriv_S_test(S_dim, kSetZero); xvector_pairs.SetRandn(); ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector, - &deriv_S_and_b, &tot_objf, &tot_weight); + &deriv_S, &deriv_b, &tot_objf, &tot_weight); TestComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector_test, - &deriv_S_and_b_test, &tot_objf_test, &tot_weight_test); + &deriv_S_test, &deriv_b_test, &tot_objf_test, &tot_weight_test); CuVector deriv_xvector_vec(xvector_dim); deriv_xvector_vec.AddRowSumMat(1.0, deriv_xvector, 0.0); @@ -160,11 +160,18 @@ bool TestXvectorComputeObjf() { // Verify that the objfs are the same. KALDI_ASSERT(ApproxEqual(tot_objf, tot_objf_test, 0.001)); + // Also verify that the gradients are the same. for (int32 i = 0; i < deriv_xvector_vec.Dim(); i++) - KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i), deriv_xvector_vec_test(i), 0.001)); - for (int32 i = 0; i < deriv_S_and_b.Dim(); i++) - KALDI_ASSERT(ApproxEqual(deriv_S_and_b(i), deriv_S_and_b_test(i), 0.001)); + KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i), + deriv_xvector_vec_test(i), 0.001)); + + // Verify that the S derivates are the same. + for (int32 i = 0; i < deriv_S.Dim(); i++) + KALDI_ASSERT(ApproxEqual(deriv_S(i), deriv_S_test(i), 0.001)); + + // Verify that the b derivates are the same. + KALDI_ASSERT(ApproxEqual(deriv_b, deriv_b_test, 0.001)); return true; } @@ -172,16 +179,14 @@ void TestComputeXvectorObjfAndDeriv( const CuMatrixBase &xvector_pairs, const CuSpMatrix &S, BaseFloat b, CuMatrixBase *deriv_xvector, - CuVector *deriv_S_and_b, BaseFloat *tot_objf, + CuVector *deriv_S, BaseFloat *deriv_b, BaseFloat *tot_objf, BaseFloat *tot_weight) { int32 N = xvector_pairs.NumRows(); BaseFloat same_objf = 0, diff_objf = 0; BaseFloat K = 1.0 / (N - 2.0); - int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2; - CuMatrix tmp_deriv(N, xvector_pairs.NumCols() - + S_dim + 1, kSetZero); + (*deriv_b) = 0; // Handle portion of the objf corresponding to pairs of xvectors // from the same classes. for (int32 i = 0; i < N/2; i++) { @@ -189,14 +194,16 @@ void TestComputeXvectorObjfAndDeriv( &w(xvector_pairs.Row(2 * i + 1)); CuVector deriv_v, deriv_w, - deriv_S_and_b_part; - BaseFloat similarity_score = TestSimilarityScore(v, w, S, b); + deriv_S_part; + BaseFloat similarity_score = TestSimilarityScore(v, w, S, b), + deriv_b_part = 0; same_objf += Log(1 + Exp(-similarity_score)); TestGetDeriv(v, w, S, b, true, similarity_score, &deriv_v, - &deriv_w, &deriv_S_and_b_part); + &deriv_w, &deriv_S_part, &deriv_b_part); deriv_xvector->Row(2 * i).AddVec(1.0, deriv_v); deriv_xvector->Row(2 * i + 1).AddVec(1.0, deriv_w); - deriv_S_and_b->AddVec(1.0, deriv_S_and_b_part); + deriv_S->AddVec(1.0, deriv_S_part); + (*deriv_b) += deriv_b_part; } // Handle portion of the objf corresponding to pairs of xvectors @@ -207,14 +214,16 @@ void TestComputeXvectorObjfAndDeriv( &w(xvector_pairs.Row(j)); CuVector deriv_v, deriv_w, - deriv_S_and_b_part; - BaseFloat similarity_score = TestSimilarityScore(v, w, S, b); + deriv_S_part; + BaseFloat similarity_score = TestSimilarityScore(v, w, S, b), + deriv_b_part = 0; diff_objf += Log(1 + Exp(similarity_score)); TestGetDeriv(v, w, S, b, false, similarity_score, &deriv_v, - &deriv_w, &deriv_S_and_b_part); + &deriv_w, &deriv_S_part, &deriv_b_part); deriv_xvector->Row(i).AddVec(K, deriv_v); deriv_xvector->Row(j).AddVec(K, deriv_w); - deriv_S_and_b->AddVec(K, deriv_S_and_b_part); + deriv_S->AddVec(K, deriv_S_part); + (*deriv_b) += K * deriv_b_part; } } // Scale the same and different portions of the objective function @@ -228,12 +237,12 @@ void TestGetDeriv(const CuVector &v, const CuVector &w, const CuSpMatrix &S, BaseFloat b, bool is_same, BaseFloat similarity_score, CuVector *deriv_v, CuVector *deriv_w, - CuVector *deriv_S_and_b) { + CuVector *deriv_S, BaseFloat *deriv_b) { int32 d = is_same ? 1 : -1, S_dim = S.NumCols() * (S.NumCols() + 1) / 2; deriv_v->Resize(v.Dim(), kSetZero); deriv_w->Resize(v.Dim(), kSetZero); - deriv_S_and_b->Resize(S_dim + 1, kSetZero); + deriv_S->Resize(S_dim, kSetZero); // This scalar is common to the different derivatives. BaseFloat deriv_coef = d * Exp(-1 * d * similarity_score) @@ -254,11 +263,10 @@ void TestGetDeriv(const CuVector &v, for (int32 i = 0; i < S.NumCols(); i++) deriv_S_mat(i, i) = 0.5 * deriv_S_mat(i, i); CuSubVector deriv_S_vec(deriv_S_mat.Data(), S_dim); - CuSubVector sub_deriv_S_and_b(*deriv_S_and_b, 0, S_dim); - sub_deriv_S_and_b.AddVec(deriv_coef, deriv_S_vec); + deriv_S->AddVec(deriv_coef, deriv_S_vec); // Handle derivative with respect to b. - (*deriv_S_and_b)(S_dim) = -deriv_coef; + (*deriv_b) = -deriv_coef; } BaseFloat TestSimilarityScore(const CuVector &v, @@ -286,14 +294,15 @@ void UnitTestXvectorExtractor() { int main() { using namespace kaldi; - for (int32 i = 0; i < 2; i++) + for (int32 i = 0; i < 2; i++) { #if HAVE_CUDA == 1 if (i == 0) - CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU + CuDevice::Instantiate().SelectGpuId("no"); else - CuDevice::Instantiate().SelectGpuId("yes"); // -1 means no GPU + CuDevice::Instantiate().SelectGpuId("yes"); #endif UnitTestXvectorExtractor(); + } std::cout << "Xvector tests succeeded.\n"; return 0; } diff --git a/src/ivector/xvector.cc b/src/ivector/xvector.cc index a6e8533b611..c06942d1cb6 100644 --- a/src/ivector/xvector.cc +++ b/src/ivector/xvector.cc @@ -25,52 +25,86 @@ void ComputeXvectorObjfAndDeriv( const CuMatrixBase &xvector_pairs, const CuSpMatrix &S, BaseFloat b, CuMatrixBase *deriv_xvector, - CuVector *deriv_S_and_b, BaseFloat *tot_objf, + CuVector *deriv_S, BaseFloat *deriv_b, + BaseFloat *tot_objf, BaseFloat *tot_weight) { int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2, N = xvector_pairs.NumRows(), xvector_dim = xvector_pairs.NumCols(); - BaseFloat K = 1.0 / (N - 2.0); (*tot_objf) = 0; if (deriv_xvector == NULL) - KALDI_ASSERT(deriv_S_and_b == NULL); + KALDI_ASSERT(deriv_S == NULL && deriv_b == NULL); else { KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim); KALDI_ASSERT(deriv_xvector->NumRows() == N); - KALDI_ASSERT(deriv_S_and_b->Dim() == S_dim + 1); + KALDI_ASSERT(deriv_S->Dim() == S_dim); } - CuMatrix S_tmp(S); - CuMatrix P(N, xvector_dim), + CuMatrix S_tmp(S), + P(N, xvector_dim), Q(N, N), R(N, N), - T(N, N), + scores(N, N), // The raw scores. objf_terms(N, N), - objf_deriv_terms(N, N); - + objf_deriv_terms(N, N); // Derivative of the + // objf w.r.t. the scores. CuVector r(N); + P.AddMatMat(1.0, xvector_pairs, kNoTrans, S_tmp, kNoTrans, 0.0); r.AddDiagMatMat(1.0, xvector_pairs, kNoTrans, P, kTrans, 0.0); R.AddVecToRows(1.0, r); Q.SymAddMat2(1.0, xvector_pairs, kNoTrans, 0.0); Q.CopyLowerToUpper(); - T.AddMat(1.0, Q, kNoTrans); - T.AddMat(-1.0, R, kTrans); - T.AddMat(-1.0, R, kNoTrans); - T.Add(b); + scores.AddMat(1.0, Q, kNoTrans); + scores.AddMat(-1.0, R, kTrans); + scores.AddMat(-1.0, R, kNoTrans); + scores.Add(b); - cu::ComputeXvectorObjfFromScores(T, &objf_terms, &objf_deriv_terms); + cu::ComputeXvectorObjfFromScores(scores, &objf_terms, &objf_deriv_terms); CuVector objf_terms_vec(N); objf_terms_vec.AddRowSumMat(1.0, objf_terms); (*tot_objf) = objf_terms_vec.Sum(); if (deriv_xvector != NULL) { - /* TODO: Call cu-math function that handles the derivatives of S - and the xvectors. - */ - (*deriv_S_and_b)(S_dim) = -objf_deriv_terms.Sum(); + // Some vector and matrix quantities for computing the + // derivatives. + CuMatrix objf_deriv_terms_trans(objf_deriv_terms, kTrans), + S_deriv_part(N, xvector_dim), + S_deriv(xvector_dim, xvector_dim); + CuVector cvec_rows(N), + cvec_cols(N); + cvec_rows.AddRowSumMat(1.0, objf_deriv_terms, 1.0); + cvec_cols.AddRowSumMat(1.0, objf_deriv_terms_trans, 1.0); + CuVector cvec(cvec_rows); + cvec.AddVec(1.0, cvec_cols, 1.0); + + // Compute derivative of the objf with respect to the xvectors. + CuMatrix SX(N, xvector_dim); + SX.AddMatMat(1.0, xvector_pairs, kNoTrans, S_tmp, kNoTrans, 0.0); + deriv_xvector->AddDiagVecMat(-1.0, cvec_rows, xvector_pairs, + kNoTrans, 0.0); + deriv_xvector->AddMatMat(-1.0, objf_deriv_terms, kTrans, + xvector_pairs, kNoTrans, 1.0); + deriv_xvector->AddDiagVecMat(2.0, cvec_cols, SX, + kNoTrans, 1.0); + deriv_xvector->AddMatMat(2.0, objf_deriv_terms, kNoTrans, + SX, kNoTrans, 1.0); + + // Compute derivative of the objf with respect to the symmetric matrix + // S. + S_deriv_part.AddDiagVecMat(2.0, cvec, xvector_pairs, + kNoTrans, 0.0); + S_deriv.AddMatMat(1.0, xvector_pairs, kTrans, S_deriv_part, + kNoTrans, 1.0); + CuSpMatrix S_deriv_tmp(S_deriv); + S_deriv_tmp.ScaleDiag(0.5); + deriv_S->CopyFromVec(CuSubVector(S_deriv_tmp.Data(), + S_dim)); + + // Compute derivative of objf with respect to the scalar offset b. + (*deriv_b) = -objf_deriv_terms.Sum(); } (*tot_weight) = N; } diff --git a/src/ivector/xvector.h b/src/ivector/xvector.h index ddb05c632d7..53d0864575a 100644 --- a/src/ivector/xvector.h +++ b/src/ivector/xvector.h @@ -55,8 +55,10 @@ namespace kaldi { (2, 4), etc, are from different classes. @param [out] deriv_xvector If non-NULL, the derivative of the objective function with respect to the xvectors is written here. - @param [out] deriv_S_and_b If non-NULL, the derivative of the objective - function with respect to the parameters S and b are written here. + @param [out] deriv_S If non-NULL, the derivative of the objective + function with respect to the parameter S are written here. + @param [out] deriv_b If other derivates are non-NULL the derivative of + the objective function with respect to the parameter b is written here. @param [out] tot_objf The total objective function described above @param [out] tot_weight The total normalizing factor for the objective function, equal to dvector_pairs.NumRows(). @@ -65,10 +67,10 @@ namespace kaldi { const CuSpMatrix &S, BaseFloat b, CuMatrixBase *deriv_xvector, - CuVector *deriv_S_and_b, + CuVector *deriv_S, + BaseFloat *deriv_b, BaseFloat *tot_objf, BaseFloat *tot_weight); - } // namespace kaldi #endif