diff --git a/.github/e2e-tests-olm/action.yaml b/.github/e2e-tests-olm/action.yaml index fd125449..73c57541 100644 --- a/.github/e2e-tests-olm/action.yaml +++ b/.github/e2e-tests-olm/action.yaml @@ -4,7 +4,7 @@ inputs: kind-version: description: "kind version" required: false - default: 'v0.14.0' + default: 'v0.20.0' kind-image: description: "kind version" required: false diff --git a/pkg/controllers/monitoring/thanos-querier/components.go b/pkg/controllers/monitoring/thanos-querier/components.go index c28c22bf..67b11fb9 100644 --- a/pkg/controllers/monitoring/thanos-querier/components.go +++ b/pkg/controllers/monitoring/thanos-querier/components.go @@ -26,8 +26,6 @@ func thanosComponentReconcilers(thanos *msoapi.ThanosQuerier, sidecarUrls []stri func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecarUrls []string, thanosCfg ThanosConfiguration) *appsv1.Deployment { args := []string{ "query", - "--grpc-address=127.0.0.1:10901", - "--http-address=127.0.0.1:9090", "--log.format=logfmt", "--query.replica-label=prometheus_replica", "--query.auto-downsampling", @@ -71,7 +69,7 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar Image: thanosCfg.Image, Ports: []corev1.ContainerPort{ { - ContainerPort: 9090, + ContainerPort: 10902, Name: "metrics", }, }, @@ -116,7 +114,7 @@ func newService(name string, namespace string) *corev1.Service { Spec: corev1.ServiceSpec{ Ports: []corev1.ServicePort{ { - Port: 9090, + Port: 10902, Name: "http", }, }, diff --git a/test/e2e/framework/assertions.go b/test/e2e/framework/assertions.go index 9072696a..e6352ee9 100644 --- a/test/e2e/framework/assertions.go +++ b/test/e2e/framework/assertions.go @@ -48,7 +48,8 @@ func WithPollInterval(d time.Duration) OptionFn { } } -// AssertResourceNeverExists asserts that a statefulset is never created for the duration of DefaultTestTimeout +// AssertResourceNeverExists asserts that a resource is never created for the +// duration of the timeout func (f *Framework) AssertResourceNeverExists(name, namespace string, resource client.Object, fns ...OptionFn) func(t *testing.T) { option := AssertOption{ PollInterval: 5 * time.Second, @@ -60,7 +61,7 @@ func (f *Framework) AssertResourceNeverExists(name, namespace string, resource c return func(t *testing.T) { //nolint - if err := wait.Poll(option.PollInterval, option.WaitTimeout, func() (bool, error) { + if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { key := types.NamespacedName{ Name: name, Namespace: namespace, @@ -76,6 +77,35 @@ func (f *Framework) AssertResourceNeverExists(name, namespace string, resource c } } +// AssertResourceAbsent asserts that a resource is not present or, if present, is deleted +// within the timeout +func (f *Framework) AssertResourceAbsent(name, namespace string, resource client.Object, fns ...OptionFn) func(t *testing.T) { + option := AssertOption{ + PollInterval: 5 * time.Second, + WaitTimeout: DefaultTestTimeout, + } + for _, fn := range fns { + fn(&option) + } + + return func(t *testing.T) { + //nolint + if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { + key := types.NamespacedName{ + Name: name, + Namespace: namespace, + } + if err := f.K8sClient.Get(context.Background(), key, resource); errors.IsNotFound(err) { + return true, nil + } + + return false, fmt.Errorf("resource %s/%s should not be present", namespace, name) + }); wait.Interrupted(err) { + t.Fatal(err) + } + } +} + // AssertResourceEventuallyExists asserts that a resource is created duration a time period of customForeverTestTimeout func (f *Framework) AssertResourceEventuallyExists(name, namespace string, resource client.Object, fns ...OptionFn) func(t *testing.T) { option := AssertOption{ @@ -113,8 +143,7 @@ func (f *Framework) AssertStatefulsetReady(name, namespace string, fns ...Option } return func(t *testing.T) { key := types.NamespacedName{Name: name, Namespace: namespace} - //nolint - if err := wait.Poll(5*time.Second, option.WaitTimeout, func() (bool, error) { + if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { pod := &appsv1.StatefulSet{} err := f.K8sClient.Get(context.Background(), key, pod) return err == nil && pod.Status.ReadyReplicas == *pod.Spec.Replicas, nil @@ -125,8 +154,11 @@ func (f *Framework) AssertStatefulsetReady(name, namespace string, fns ...Option } func (f *Framework) GetResourceWithRetry(t *testing.T, name, namespace string, obj client.Object) { - //nolint - err := wait.Poll(5*time.Second, DefaultTestTimeout, func() (bool, error) { + option := AssertOption{ + PollInterval: 5 * time.Second, + WaitTimeout: DefaultTestTimeout, + } + err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { key := types.NamespacedName{Name: name, Namespace: namespace} if err := f.K8sClient.Get(context.Background(), key, obj); errors.IsNotFound(err) { @@ -238,7 +270,7 @@ func (f *Framework) GetStackWhenAvailable(t *testing.T, name, namespace string) } var lastErr error - err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, DefaultTestTimeout+10*time.Second, true, func(ctx context.Context) (bool, error) { + err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, DefaultTestTimeout*2, true, func(ctx context.Context) (bool, error) { lastErr = nil err := f.K8sClient.Get(context.Background(), key, &ms) if err != nil { diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index c8a5ec8e..eb02007e 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -9,7 +9,8 @@ import ( "strings" "testing" - policyv1beta1 "k8s.io/api/policy/v1beta1" + "github.com/pkg/errors" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -37,7 +38,7 @@ type Framework struct { func (f *Framework) StartPortForward(podName string, ns string, port string, stopChan chan struct{}) error { roundTripper, upgrader, err := spdy.RoundTripperFor(f.Config) if err != nil { - return err + return errors.Wrap(err, "error creating RoundTripper") } path := fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/portforward", ns, podName) @@ -49,7 +50,7 @@ func (f *Framework) StartPortForward(podName string, ns string, port string, sto out, errOut := new(bytes.Buffer), new(bytes.Buffer) forwarder, err := portforward.New(dialer, []string{port}, stopChan, readyChan, out, errOut) if err != nil { - return err + return errors.Wrap(err, "failed to create portforward") } go func() { @@ -131,9 +132,9 @@ func (f *Framework) Evict(pod *corev1.Pod, gracePeriodSeconds int64) error { GracePeriodSeconds: &gracePeriodSeconds, } - eviction := &policyv1beta1.Eviction{ + eviction := &policyv1.Eviction{ TypeMeta: metav1.TypeMeta{ - APIVersion: policyv1beta1.SchemeGroupVersion.String(), + APIVersion: policyv1.SchemeGroupVersion.String(), Kind: "Eviction", }, ObjectMeta: metav1.ObjectMeta{ @@ -147,7 +148,7 @@ func (f *Framework) Evict(pod *corev1.Pod, gracePeriodSeconds int64) error { if err != nil { return err } - return c.PolicyV1beta1().Evictions(pod.Namespace).Evict(context.Background(), eviction) + return c.PolicyV1().Evictions(pod.Namespace).Evict(context.Background(), eviction) } func (f *Framework) CleanUp(t *testing.T, cleanupFunc func()) { diff --git a/test/e2e/monitoring_stack_controller_test.go b/test/e2e/monitoring_stack_controller_test.go index 0418143a..6857666e 100644 --- a/test/e2e/monitoring_stack_controller_test.go +++ b/test/e2e/monitoring_stack_controller_test.go @@ -98,6 +98,7 @@ func TestMonitoringStackController(t *testing.T) { if err != nil { t.Fatal(err) } + assertPDBExpectedPodsAreHealthy(t, stackName+"-alertmanager", e2eTestNamespace) assertAlertmanagersAreOnDifferentNodes(t, pods) assertAlertmanagersAreResilientToDisruption(t, pods) }, @@ -396,7 +397,7 @@ func singlePrometheusReplicaHasNoPDB(t *testing.T) { assert.NilError(t, err, "failed to update monitoring stack") // ensure there is no pdb - f.AssertResourceNeverExists(pdbName, ms.Namespace, &pdb)(t) + f.AssertResourceAbsent(pdbName, ms.Namespace, &pdb)(t) } func assertPrometheusScrapesItself(t *testing.T) { @@ -508,6 +509,21 @@ func assertAlertmanagersAreResilientToDisruption(t *testing.T, pods []corev1.Pod } } +func assertPDBExpectedPodsAreHealthy(t *testing.T, name, namespace string) { + if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, framework.DefaultTestTimeout, true, func(ctx context.Context) (bool, error) { + pdb := &policyv1.PodDisruptionBudget{} + key := types.NamespacedName{Name: name, Namespace: namespace} + err := f.K8sClient.Get(context.Background(), key, pdb) + if err != nil { + return false, nil + } + return pdb.Status.CurrentHealthy == pdb.Status.ExpectedPods, nil + + }); err != nil { + t.Fatal(err) + } +} + func assertAlertmanagerReceivesAlerts(t *testing.T) { ms := newMonitoringStack(t, "alerting") if err := f.K8sClient.Create(context.Background(), ms); err != nil { @@ -838,11 +854,12 @@ func namespaceSelectorTest(t *testing.T) { err := deployDemoApp(t, ns, nsLabels, resourceLabels) assert.NilError(t, err, "%s: deploying demo app failed", ns) } + f.AssertStatefulsetReady("prometheus-"+stackName, e2eTestNamespace, framework.WithTimeout(5*time.Minute))(t) stopChan := make(chan struct{}) defer close(stopChan) //nolint - if pollErr := wait.Poll(15*time.Second, framework.DefaultTestTimeout, func() (bool, error) { + if pollErr := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, framework.DefaultTestTimeout*2, true, func(ctx context.Context) (bool, error) { err := f.StartServicePortForward(ms.Name+"-prometheus", e2eTestNamespace, "9090", stopChan) return err == nil, nil }); pollErr != nil { diff --git a/test/e2e/thanos_querier_controller_test.go b/test/e2e/thanos_querier_controller_test.go index 4b25bce8..569dae09 100644 --- a/test/e2e/thanos_querier_controller_test.go +++ b/test/e2e/thanos_querier_controller_test.go @@ -76,16 +76,17 @@ func singleStackWithSidecar(t *testing.T) { stopChan := make(chan struct{}) defer close(stopChan) if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { - err = f.StartServicePortForward(name, e2eTestNamespace, "9090", stopChan) + err = f.StartServicePortForward(name, e2eTestNamespace, "10902", stopChan) return err == nil, nil }); wait.Interrupted(err) { - t.Fatal(err) + t.Fatal("timeout waiting for port-forward") } - promClient := framework.NewPrometheusClient("http://localhost:9090") + promClient := framework.NewPrometheusClient("http://localhost:10902") expectedResults := map[string]int{ "prometheus_build_info": 2, // must return from both prometheus pods } + var lastErr error if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { correct := 0 for query, value := range expectedResults { @@ -99,8 +100,8 @@ func singleStackWithSidecar(t *testing.T) { } if len(result.Data.Result) > value { - resultErr := fmt.Errorf("invalid result for query %s, got %d, want %d", query, len(result.Data.Result), value) - return true, resultErr + lastErr = fmt.Errorf("invalid result for query %s, got %d, want %d", query, len(result.Data.Result), value) + return true, lastErr } if len(result.Data.Result) != value { @@ -112,7 +113,7 @@ func singleStackWithSidecar(t *testing.T) { return correct == len(expectedResults), nil }); wait.Interrupted(err) { - t.Fatal(err) + t.Fatal(fmt.Errorf("querying thanos did not yield expected results: %w", lastErr)) } } diff --git a/test/run-e2e.sh b/test/run-e2e.sh index dd5ee113..42d8b38d 100755 --- a/test/run-e2e.sh +++ b/test/run-e2e.sh @@ -22,6 +22,7 @@ declare SHOW_USAGE=false declare LOGS_DIR="tmp/e2e" declare OPERATORS_NS="operators" declare TEST_TIMEOUT="15m" +declare RUN_REGEX="" cleanup() { info "Cleaning up ..." @@ -154,7 +155,7 @@ run_e2e() { watch_obo_errors "$obo_error_log" & local ret=0 - go test -v -failfast -timeout $TEST_TIMEOUT ./test/e2e/... --retain=true | tee "$LOGS_DIR/e2e.log" || ret=1 + go test -v -failfast -timeout $TEST_TIMEOUT ./test/e2e/... -run "$RUN_REGEX" -count 1 | tee "$LOGS_DIR/e2e.log" || ret=1 # terminte both log_events { jobs -p | xargs -I {} -- pkill -TERM -P {}; } || true @@ -203,6 +204,11 @@ parse_args() { OPERATORS_NS=$1 shift ;; + --run) + shift + RUN_REGEX=$1 + shift + ;; *) return 1 ;; # show usage on everything else esac done @@ -227,7 +233,9 @@ print_usage() { --no-builds skip building operator images, useful when operator image is already built and pushed --ns NAMESPACE namespace to deploy operators (default: $OPERATORS_NS) - For running against openshift use --ns openshift-operators + for running against openshift use --ns openshift-operators + --run REGEX regex to limit which tests are run. See go help testflag -run entry + for details EOF_HELP @@ -337,6 +345,7 @@ print_config() { Skip Deploy: $NO_DEPLOY Operator namespace: $OPERATORS_NS Logs directory: $LOGS_DIR + Run regex: $RUN_REGEX EOF line 50