-
Notifications
You must be signed in to change notification settings - Fork 177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve cluster creation success in Korea #1196
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,16 @@ | ||
package checker | ||
|
||
import "context" | ||
|
||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the Apache License 2.0. | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
) | ||
|
||
type Checker interface { | ||
Check(context.Context) error | ||
Name() string | ||
} | ||
|
||
var errRequeue = errors.New("requeue") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,24 +14,13 @@ import ( | |
"github.com/sirupsen/logrus" | ||
corev1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/client-go/util/retry" | ||
|
||
"github.com/Azure/ARO-RP/pkg/operator" | ||
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1" | ||
aroclient "github.com/Azure/ARO-RP/pkg/operator/clientset/versioned/typed/aro.openshift.io/v1alpha1" | ||
"github.com/Azure/ARO-RP/pkg/operator/controllers" | ||
) | ||
|
||
// if a check fails it is retried using the following parameters | ||
var checkBackoff = wait.Backoff{ | ||
Steps: 5, | ||
Duration: 5 * time.Second, | ||
Factor: 1.5, | ||
Jitter: 0.5, | ||
Cap: 1 * time.Minute, | ||
} | ||
|
||
// InternetChecker reconciles a Cluster object | ||
type InternetChecker struct { | ||
arocli aroclient.AroV1alpha1Interface | ||
|
@@ -63,6 +52,24 @@ func (r *InternetChecker) Name() string { | |
|
||
// Reconcile will keep checking that the cluster can connect to essential services. | ||
func (r *InternetChecker) Check(ctx context.Context) error { | ||
cli := &http.Client{ | ||
Transport: &http.Transport{ | ||
// We set DisableKeepAlives for two reasons: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good explanation |
||
// | ||
// 1. If we're talking HTTP/2 and the remote end blackholes traffic, | ||
// Go has a bug whereby it doesn't reset the connection after a | ||
// timeout (https://github.com/golang/go/issues/36026). If this | ||
// happens, we never have a chance to get healthy. We have | ||
// specifically seen this with gcs.prod.monitoring.core.windows.net | ||
// in Korea Central, which currently has a bad server which when we | ||
// hit it causes our cluster creations to fail. | ||
// | ||
// 2. We *want* to evaluate our capability to successfully create | ||
// *new* connections to internet endpoints anyway. | ||
DisableKeepAlives: true, | ||
}, | ||
} | ||
|
||
instance, err := r.arocli.Clusters().Get(ctx, arov1alpha1.SingletonClusterName, metav1.GetOptions{}) | ||
if err != nil { | ||
return err | ||
|
@@ -73,7 +80,7 @@ func (r *InternetChecker) Check(ctx context.Context) error { | |
for _, url := range instance.Spec.InternetChecker.URLs { | ||
checkCount++ | ||
go func(urlToCheck string) { | ||
ch <- r.checkWithRetry(&http.Client{}, urlToCheck, checkBackoff) | ||
ch <- r.checkWithRetry(cli, urlToCheck, time.Minute) | ||
}(url) | ||
} | ||
|
||
|
@@ -107,27 +114,53 @@ func (r *InternetChecker) Check(ctx context.Context) error { | |
|
||
} | ||
|
||
return controllers.SetCondition(ctx, r.arocli, condition, r.role) | ||
err = controllers.SetCondition(ctx, r.arocli, condition, r.role) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if checkFailed { | ||
return errRequeue | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// check the URL, retrying a failed query a few times according to the given backoff | ||
func (r *InternetChecker) checkWithRetry(client simpleHTTPClient, url string, backoff wait.Backoff) error { | ||
return retry.OnError(backoff, func(_ error) bool { return true }, func() error { | ||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) | ||
defer cancel() | ||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil) | ||
if err != nil { | ||
return fmt.Errorf("%s: %s", url, err) | ||
} | ||
// check the URL, retrying a failed query a few times | ||
func (r *InternetChecker) checkWithRetry(client simpleHTTPClient, url string, timeout time.Duration) error { | ||
var err error | ||
|
||
resp, err := client.Do(req) | ||
if err != nil { | ||
return fmt.Errorf("%s: %s", url, err) | ||
for i := 0; i < 6; i++ { | ||
err = r.checkOnce(client, url, timeout/6) | ||
if err == nil { | ||
return nil | ||
} | ||
defer resp.Body.Close() | ||
} | ||
|
||
return err | ||
} | ||
|
||
// checkOnce checks a given url. The check both times out after a given timeout | ||
// *and* will wait for the timeout if it fails, so that we don't hit endpoints | ||
// too much. | ||
func (r *InternetChecker) checkOnce(client simpleHTTPClient, url string, timeout time.Duration) error { | ||
ctx, cancel := context.WithTimeout(context.Background(), timeout) | ||
defer cancel() | ||
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil) | ||
if err != nil { | ||
<-ctx.Done() | ||
return err | ||
} | ||
|
||
resp, err := client.Do(req) | ||
if err != nil { | ||
<-ctx.Done() | ||
return fmt.Errorf("%s: %s", url, err) | ||
} | ||
|
||
return nil | ||
}) | ||
resp.Body.Close() | ||
return nil | ||
} | ||
|
||
func (r *InternetChecker) conditionType() (ctype status.ConditionType) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this necessary? Can't we use
err
instead ofthisErr
? The only real error-generating statement this this function isc.Check(ctx)
so one variable should be enoughThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good questions, why this is not enough?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's subtle. Consider the following case: checker 0 returns an error, and checker 1 returns nil. In that case by your code we would return foo, nil when we exit the function, but in fact we want to return foo, err.