Skip to content

Commit

Permalink
Do not transition servers into the healthy state until at least one h…
Browse files Browse the repository at this point in the history
…ealthy probe is detected.

By simply initializing the failure count to the failure threshold, the first health probe will either keep the destination unhealthy, or it will swap to healthy just like before.

This will prevent new destinations with slower server startup times from erroneously tranisitioning into healthy temporarily until the unhealthy threshold is eventually reached.
  • Loading branch information
Robbie Knuth committed Feb 22, 2024
1 parent 73a5b85 commit 4c921c8
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 19 deletions.
5 changes: 4 additions & 1 deletion src/ReverseProxy/Health/ConsecutiveFailuresHealthPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ public void ProbingCompleted(ClusterState cluster, IReadOnlyList<DestinationProb
}

var threshold = GetFailureThreshold(cluster);
// the next check will either keep us in the unhealthy territory or reset the
// counter to 0 which evaluates to healthy.
var initialCount = (int)Math.Ceiling(threshold);

var newHealthStates = new NewActiveDestinationHealth[probingResults.Count];
for (var i = 0; i < probingResults.Count; i++)
{
var destination = probingResults[i].Destination;

var count = _failureCounters.GetOrCreateValue(destination);
var count = _failureCounters.GetValue(destination, state => new AtomicCounter(initialCount));
var newHealth = EvaluateHealthState(threshold, probingResults[i].Response, count);
newHealthStates[i] = new NewActiveDestinationHealth(destination, newHealth);
}
Expand Down
11 changes: 11 additions & 0 deletions src/ReverseProxy/Utilities/AtomicCounter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ internal sealed class AtomicCounter
{
private int _value;

/// <summary>
/// Creates an AtomicCounter with an initial value of 0.
/// </summary>
public AtomicCounter() : this(0) { }

/// <summary>
/// Creates an Atomic counter with an initial value of <paramref name="initialValue"/>
/// </summary>
/// <param name="initialValue">The initial value of the counter.</param>
public AtomicCounter(int initialValue) => _value = initialValue;

/// <summary>
/// Gets the current value of the counter.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,43 +21,62 @@ public void ProbingCompleted_FailureThresholdExceeded_MarkDestinationUnhealthy()
var options = Options.Create(new ConsecutiveFailuresHealthPolicyOptions { DefaultThreshold = 2 });
var policy = new ConsecutiveFailuresHealthPolicy(options, new DestinationHealthUpdaterStub());
var cluster0 = GetClusterInfo("cluster0", destinationCount: 2);
var cluster1 = GetClusterInfo("cluster0", destinationCount: 2, failureThreshold: 3);
var cluster1 = GetClusterInfo("cluster1", destinationCount: 2, failureThreshold: 3);

var probingResults0 = new[] {
var cluster0ProbingResults0 = new[] {
new DestinationProbingResult(cluster0.Destinations.Values.First(), new HttpResponseMessage(HttpStatusCode.InternalServerError), null),
new DestinationProbingResult(cluster0.Destinations.Values.Skip(1).First(), new HttpResponseMessage(HttpStatusCode.OK), null)
};
var probingResults1 = new[] {

var cluster0ProbingResults1 = new[] {
new DestinationProbingResult(cluster0.Destinations.Values.First(), new HttpResponseMessage(HttpStatusCode.OK), null),
new DestinationProbingResult(cluster0.Destinations.Values.Skip(1).First(), new HttpResponseMessage(HttpStatusCode.InternalServerError), null)
};

var cluster1ProbingResults0 = new[] {
new DestinationProbingResult(cluster1.Destinations.Values.First(), new HttpResponseMessage(HttpStatusCode.OK), null),
new DestinationProbingResult(cluster1.Destinations.Values.Skip(1).First(), null, new InvalidOperationException())
};

var cluster1ProbingResults1 = new[] {
new DestinationProbingResult(cluster1.Destinations.Values.First(), null, new InvalidOperationException()),
new DestinationProbingResult(cluster1.Destinations.Values.Skip(1).First(), new HttpResponseMessage(HttpStatusCode.OK), null)
};

Assert.Equal(HealthCheckConstants.ActivePolicy.ConsecutiveFailures, policy.Name);

// Initial state
Assert.All(cluster0.Destinations.Values, d => Assert.Equal(DestinationHealth.Unknown, d.Health.Active));
Assert.All(cluster1.Destinations.Values, d => Assert.Equal(DestinationHealth.Unknown, d.Health.Active));

// First probing attempt
policy.ProbingCompleted(cluster0, probingResults0);
Assert.All(cluster0.Destinations.Values, d => Assert.Equal(DestinationHealth.Healthy, d.Health.Active));
policy.ProbingCompleted(cluster1, probingResults1);
Assert.All(cluster1.Destinations.Values, d => Assert.Equal(DestinationHealth.Healthy, d.Health.Active));

// Second probing attempt
policy.ProbingCompleted(cluster0, probingResults0);
policy.ProbingCompleted(cluster0, cluster0ProbingResults0);
Assert.Equal(DestinationHealth.Unhealthy, cluster0.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Healthy, cluster0.Destinations.Values.Skip(1).First().Health.Active);
policy.ProbingCompleted(cluster1, probingResults1);
policy.ProbingCompleted(cluster1, cluster1ProbingResults0);
Assert.Equal(DestinationHealth.Healthy, cluster1.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Unhealthy, cluster1.Destinations.Values.Skip(1).First().Health.Active);

// Second probing attempt
policy.ProbingCompleted(cluster0, cluster0ProbingResults1);
Assert.All(cluster0.Destinations.Values, d => Assert.Equal(DestinationHealth.Healthy, d.Health.Active));
policy.ProbingCompleted(cluster1, cluster1ProbingResults1);
Assert.All(cluster1.Destinations.Values, d => Assert.Equal(DestinationHealth.Healthy, d.Health.Active));

// Third probing attempt
policy.ProbingCompleted(cluster0, probingResults0);
Assert.Equal(DestinationHealth.Unhealthy, cluster0.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Healthy, cluster0.Destinations.Values.Skip(1).First().Health.Active);
policy.ProbingCompleted(cluster1, probingResults1);
Assert.Equal(DestinationHealth.Healthy, cluster1.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Unhealthy, cluster1.Destinations.Values.Skip(1).First().Health.Active);
policy.ProbingCompleted(cluster0, cluster0ProbingResults1);
Assert.Equal(DestinationHealth.Healthy, cluster0.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Unhealthy, cluster0.Destinations.Values.Skip(1).First().Health.Active);
policy.ProbingCompleted(cluster1, cluster1ProbingResults1);
Assert.All(cluster1.Destinations.Values, d => Assert.Equal(DestinationHealth.Healthy, d.Health.Active));

// fourth probing attempt
policy.ProbingCompleted(cluster0, cluster0ProbingResults1);
Assert.Equal(DestinationHealth.Healthy, cluster0.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Unhealthy, cluster0.Destinations.Values.Skip(1).First().Health.Active);
policy.ProbingCompleted(cluster1, cluster1ProbingResults1);
Assert.Equal(DestinationHealth.Unhealthy, cluster1.Destinations.Values.First().Health.Active);
Assert.Equal(DestinationHealth.Healthy, cluster1.Destinations.Values.Skip(1).First().Health.Active);

Assert.All(cluster0.Destinations.Values, d => Assert.Equal(DestinationHealth.Unknown, d.Health.Passive));
Assert.All(cluster1.Destinations.Values, d => Assert.Equal(DestinationHealth.Unknown, d.Health.Passive));
Expand Down
10 changes: 9 additions & 1 deletion test/ReverseProxy.Tests/Utilities/AtomicCounterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@ public class AtomicCounterTests
[Fact]
public void Constructor_Works()
{
new AtomicCounter();
var counter = new AtomicCounter();
Assert.Equal(0, counter.Value);
}

[Fact]
public void Constructor_InitialValue_Works()
{
var counter = new AtomicCounter(10);
Assert.Equal(10, counter.Value);
}

[Fact]
Expand Down

0 comments on commit 4c921c8

Please sign in to comment.