diff --git a/pkg/kv/txn_coord_sender.go b/pkg/kv/txn_coord_sender.go index 53a5068d0bb3..788c1b83cc96 100644 --- a/pkg/kv/txn_coord_sender.go +++ b/pkg/kv/txn_coord_sender.go @@ -631,6 +631,23 @@ func (tc *TxnCoordSender) AugmentMeta(ctx context.Context, meta roachpb.TxnCoord if tc.mu.txn.ID != meta.Txn.ID { return } + + // If the TxnCoordMeta is telling us the transaction has been aborted, it's + // better if we don't ingest it. Ingesting it would possibly put us in an + // inconsistent state, with an ABORTED proto but with the heartbeat loop still + // running. When this TxnCoordMeta was received from DistSQL, it presumably + // followed a TxnAbortedError that was also received. If that error was also + // passed to us, then we've already aborted the txn and ingesting the meta + // would be OK. However, as it stands, if the TxnAbortedError followed a + // non-retriable error, than we don't get the aborted error (in fact, we don't + // get either of the errors; the client is responsible for rolling back). + // TODO(andrei): A better design would be to abort the txn as soon as any + // error is received from DistSQL, which would eliminate qualms about what + // error comes first. + if meta.Txn.Status != roachpb.PENDING { + return + } + tc.augmentMetaLocked(ctx, meta) } diff --git a/pkg/kv/txn_coord_sender_test.go b/pkg/kv/txn_coord_sender_test.go index f679b0bda3a4..752a0698886f 100644 --- a/pkg/kv/txn_coord_sender_test.go +++ b/pkg/kv/txn_coord_sender_test.go @@ -2384,3 +2384,23 @@ func TestLeafTxnClientRejectError(t *testing.T) { t.Fatalf("expected UnhandledRetryableError(TransactionAbortedError), got: (%T) %v", err, err) } } + +// Check that ingesting an Aborted txn record is a no-op. The TxnCoordSender is +// supposed to reject such updates because they risk putting it into an +// inconsistent state. See comments in TxnCoordSender.AugmentMeta(). +func TestAugmentTxnCoordMetaAbortedTxn(t *testing.T) { + defer leaktest.AfterTest(t)() + s := createTestDBWithContextAndKnobs(t, client.DefaultDBContext(), nil /* knobs */) + defer s.Stop() + ctx := context.Background() + + txn := client.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */, client.RootTxn) + txnProto := txn.Serialize() + txnProto.Status = roachpb.ABORTED + txn.AugmentTxnCoordMeta(ctx, roachpb.TxnCoordMeta{Txn: *txnProto}) + // Check that the transaction was not updated. + txnProto = txn.Serialize() + if txnProto.Status != roachpb.PENDING { + t.Fatalf("expected PENDING txn, got: %s", txnProto.Status) + } +}