From dd0932091148a4d77201e3cd78d33a195f07984a Mon Sep 17 00:00:00 2001 From: Ben Eddy Date: Mon, 1 Jul 2024 09:26:04 -0700 Subject: [PATCH] Avoid recreating gocql session on transient connection errors (#6207) ## What changed? Modifies gocql error handling introduced in https://github.com/temporalio/temporal/pull/4132 to avoid recreating the gocql session on transient connection errors. ## Why? Recreating the session adds latency and isn't necessary when connections are closed or reset. ## How did you test it? Ran a prod-like workload and periodically restarted cassandra nodes. With the patch applied, we observed a reduction in p50 and p99 latency across persistence operations. ## Potential risks No ## Is hotfix candidate? No --- .../persistence/nosql/nosqlplugin/cassandra/gocql/session.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/common/persistence/nosql/nosqlplugin/cassandra/gocql/session.go b/common/persistence/nosql/nosqlplugin/cassandra/gocql/session.go index d228e923337..6e5723b7e96 100644 --- a/common/persistence/nosql/nosqlplugin/cassandra/gocql/session.go +++ b/common/persistence/nosql/nosqlplugin/cassandra/gocql/session.go @@ -28,7 +28,6 @@ import ( "context" "sync" "sync/atomic" - "syscall" "time" "github.com/gocql/gocql" @@ -203,9 +202,7 @@ func (s *session) handleError( ) { switch err { case gocql.ErrNoConnections, - gocql.ErrSessionClosed, - gocql.ErrConnectionClosed, - syscall.ECONNRESET: + gocql.ErrSessionClosed: s.refresh() default: // noop