@@ -341,6 +341,8 @@ func (n *clusterNode) Close() error {
341
341
return n .Client .Close ()
342
342
}
343
343
344
+ const maximumNodeLatency = 1 * time .Minute
345
+
344
346
func (n * clusterNode ) updateLatency () {
345
347
const numProbe = 10
346
348
var dur uint64
@@ -361,7 +363,7 @@ func (n *clusterNode) updateLatency() {
361
363
if successes == 0 {
362
364
// If none of the pings worked, set latency to some arbitrarily high value so this node gets
363
365
// least priority.
364
- latency = float64 ((1 * time . Minute ) / time .Microsecond )
366
+ latency = float64 ((maximumNodeLatency ) / time .Microsecond )
365
367
} else {
366
368
latency = float64 (dur ) / float64 (successes )
367
369
}
@@ -735,20 +737,40 @@ func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
735
737
return c .nodes .Random ()
736
738
}
737
739
738
- var node * clusterNode
740
+ var allNodesFailing = true
741
+ var (
742
+ closestNonFailingNode * clusterNode
743
+ closestNode * clusterNode
744
+ minLatency time.Duration
745
+ )
746
+
747
+ // setting the max possible duration as zerovalue for minlatency
748
+ minLatency = time .Duration (math .MaxInt64 )
749
+
739
750
for _ , n := range nodes {
740
- if n .Failing () {
741
- continue
742
- }
743
- if node == nil || n .Latency () < node .Latency () {
744
- node = n
751
+ if closestNode == nil || n .Latency () < minLatency {
752
+ closestNode = n
753
+ minLatency = n .Latency ()
754
+ if ! n .Failing () {
755
+ closestNonFailingNode = n
756
+ allNodesFailing = false
757
+ }
745
758
}
746
759
}
747
- if node != nil {
748
- return node , nil
760
+
761
+ // pick the healthly node with the lowest latency
762
+ if ! allNodesFailing && closestNonFailingNode != nil {
763
+ return closestNonFailingNode , nil
764
+ }
765
+
766
+ // if all nodes are failing, we will pick the temporarily failing node with lowest latency
767
+ if minLatency < maximumNodeLatency && closestNode != nil {
768
+ internal .Logger .Printf (context .TODO (), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency" )
769
+ return closestNode , nil
749
770
}
750
771
751
- // If all nodes are failing - return random node
772
+ // If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
773
+ internal .Logger .Printf (context .TODO (), "redis: pings to all nodes are failing, picking a random node across the cluster" )
752
774
return c .nodes .Random ()
753
775
}
754
776
0 commit comments