From b39e01b16be0703c6260bcbbd0537a5a321196fc Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 3 Dec 2020 17:06:16 -0800 Subject: [PATCH] Fix containerd socket mount on retry Recent UX refactoring broke part of the logic for related to runtime retries during default creation. The result was the builder "worked" but didn't actually have a containerd socket mount and was running with the worker instead. This also refines the basic integration test to run a Pod with the image that was just built. --- integration/common/basesuites.go | 6 +- integration/common/kubeclient.go | 102 ++++++++++++++++++++++++++++ integration/suites/rootless_test.go | 8 ++- pkg/driver/kubernetes/driver.go | 1 + 4 files changed, 113 insertions(+), 4 deletions(-) diff --git a/integration/common/basesuites.go b/integration/common/basesuites.go index 00e6b478..717c3049 100644 --- a/integration/common/basesuites.go +++ b/integration/common/basesuites.go @@ -69,9 +69,10 @@ func (s *BaseSuite) TestSimpleBuild() { "--builder", s.Name, ) } + imageName := "dummy.acme.com/" + s.Name + "replaceme:latest" args = append( args, - "--tag", s.Name+"replaceme:latest", + "--tag", imageName, dir, ) err = RunBuild(args) @@ -81,6 +82,9 @@ func (s *BaseSuite) TestSimpleBuild() { } else { require.NoError(s.T(), err, "build failed") } + + err = RunSimpleBuildImageAsPod(context.Background(), s.Name+"-testbuiltimage", imageName, s.Namespace, s.ClientSet) + require.NoError(s.T(), err, "failed to start pod with image") } func isRootlessCreate(flags []string) bool { diff --git a/integration/common/kubeclient.go b/integration/common/kubeclient.go index 3a252359..d8cfc318 100644 --- a/integration/common/kubeclient.go +++ b/integration/common/kubeclient.go @@ -3,8 +3,17 @@ package common import ( + "context" + "fmt" + "time" + + "github.com/sirupsen/logrus" "k8s.io/cli-runtime/pkg/genericclioptions" "k8s.io/client-go/kubernetes" + + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // GetKubeClientset retrieves the clientset and namespace @@ -22,3 +31,96 @@ func GetKubeClientset() (*kubernetes.Clientset, string, error) { clientset, err := kubernetes.NewForConfig(restClientConfig) return clientset, ns, err } + +func RunSimpleBuildImageAsPod(ctx context.Context, name, imageName, namespace string, clientset *kubernetes.Clientset) error { + podClient := clientset.CoreV1().Pods(namespace) + eventClient := clientset.CoreV1().Events(namespace) + logrus.Infof("starting pod %s for image: %s", name, imageName) + // Start the pod + pod, err := podClient.Create(ctx, + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: name, + Image: imageName, + Command: []string{"sleep", "60"}, + ImagePullPolicy: v1.PullNever, + }, + }, + }, + }, + metav1.CreateOptions{}, + ) + if err != nil { + return err + } + + defer func() { + err := podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{}) + logrus.Warnf("failed to clean up pod %s: %s", pod.Name, err) + }() + + logrus.Infof("waiting for pod to start...") + // Wait for it to get started, and make sure it isn't complaining about image not being found + // TODO - multi-node test clusters will need some refinement here if we wind up not scaling the builder up in some scenarios + var refUID *string + var refKind *string + reportedEvents := map[string]interface{}{} + + // TODO - DRY this out with pkg/driver/kubernetes/driver.go:wait(...) + for try := 0; try < 100; try++ { + + stringRefUID := string(pod.GetUID()) + if len(stringRefUID) > 0 { + refUID = &stringRefUID + } + stringRefKind := pod.Kind + if len(stringRefKind) > 0 { + refKind = &stringRefKind + } + selector := eventClient.GetFieldSelector(&pod.Name, &pod.Namespace, refKind, refUID) + options := metav1.ListOptions{FieldSelector: selector.String()} + events, err2 := eventClient.List(ctx, options) + if err2 != nil { + return err2 + } + + for _, event := range events.Items { + if event.InvolvedObject.UID != pod.ObjectMeta.UID { + continue + } + msg := fmt.Sprintf("%s:%s:%s:%s\n", + event.Type, + pod.Name, + event.Reason, + event.Message, + ) + if _, alreadyProcessed := reportedEvents[msg]; alreadyProcessed { + continue + } + reportedEvents[msg] = struct{}{} + logrus.Info(msg) + + if event.Reason == "ErrImageNeverPull" { + // Fail fast, it will never converge + return fmt.Errorf(msg) + } + } + + <-time.After(time.Duration(100+try*20) * time.Millisecond) + pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{}) + if err != nil { + return err + } + logrus.Infof("Pod Phase: %s", pod.Status.Phase) + if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodSucceeded { + return nil + } + } + return fmt.Errorf("pod never started") +} diff --git a/integration/suites/rootless_test.go b/integration/suites/rootless_test.go index ccbf3c36..b4c9412c 100644 --- a/integration/suites/rootless_test.go +++ b/integration/suites/rootless_test.go @@ -22,6 +22,8 @@ func TestRootlessSuite(t *testing.T) { }) } -// func (s *rootlessSuite) TestSimpleBuild() { -// s.T().Skip("Rootless doesn't support loading to the runtime") -// } +func (s *rootlessSuite) TestSimpleBuild() { + // This test in the Base Suite attempts to run a pod, so we need to skip it + // Other tests will exercise the builder without running a pod + s.T().Skip("Rootless doesn't support loading to the runtime") +} diff --git a/pkg/driver/kubernetes/driver.go b/pkg/driver/kubernetes/driver.go index 794df07b..e76f8d58 100644 --- a/pkg/driver/kubernetes/driver.go +++ b/pkg/driver/kubernetes/driver.go @@ -272,6 +272,7 @@ func (d *Driver) wait(ctx context.Context, sub progress.SubLogger) error { sub.Log(1, []byte(fmt.Sprintf("WARN: initial attempt to deploy configured for the %s runtime failed, retrying with %s\n", attemptedRuntime, runtime))) d.InitConfig.DriverOpts["runtime"] = runtime + d.InitConfig.DriverOpts["worker"] = "auto" err = d.initDriverFromConfig() // This will toggle userSpecifiedRuntime to true to prevent cycles if err != nil { return err