diff --git a/Makefile b/Makefile index db60d80..5c6c647 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION ?= v1.0.1 +VERSION ?= v1.1.0 IMAGE_BUILDER ?= docker IMAGE_BUILD_CMD ?= buildx diff --git a/README.md b/README.md index 47f23da..892adfd 100644 --- a/README.md +++ b/README.md @@ -174,3 +174,6 @@ See [test/sanity](https://github.com/warm-metal/csi-driver-image/tree/master/tes ### E2E test See [test/e2e](https://github.com/warm-metal/csi-driver-image/tree/master/test/e2e). + +## Note on logging image size +Image sizes are logged after they finish pulling. We've noticed that for smaller images, usually under 1KiB, containerd may report an incorrect image size. An issue has been raised in the containerd github repository: https://github.com/containerd/containerd/issues/9641. diff --git a/charts/warm-metal-csi-driver/Chart.yaml b/charts/warm-metal-csi-driver/Chart.yaml index 46b2106..c941841 100644 --- a/charts/warm-metal-csi-driver/Chart.yaml +++ b/charts/warm-metal-csi-driver/Chart.yaml @@ -15,9 +15,9 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0.1 +version: 1.1.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. -appVersion: v1.0.1 +appVersion: v1.1.0 diff --git a/cmd/plugin/node_server.go b/cmd/plugin/node_server.go index 55047ed..a134a81 100644 --- a/cmd/plugin/node_server.go +++ b/cmd/plugin/node_server.go @@ -7,6 +7,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "github.com/containerd/containerd/reference/docker" + "github.com/google/uuid" "github.com/warm-metal/csi-driver-image/pkg/backend" "github.com/warm-metal/csi-driver-image/pkg/metrics" "github.com/warm-metal/csi-driver-image/pkg/mountexecutor" @@ -59,7 +60,8 @@ type NodeServer struct { } func (n NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (resp *csi.NodePublishVolumeResponse, err error) { - klog.Infof("mount request: %s", req.String()) + valuesLogger := klog.LoggerWithValues(klog.NewKlogr(), "pod-name", req.VolumeContext["pod-name"], "namespace", req.VolumeContext["namespace"], "uid", req.VolumeContext["uid"], "request-id", uuid.NewString()) + valuesLogger.Info("Incoming NodePublishVolume request", "request string", req.String()) if len(req.VolumeId) == 0 { err = status.Error(codes.InvalidArgument, "VolumeId is missing") return @@ -134,6 +136,7 @@ func (n NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishV PullAlways: pullAlways, Image: image, PullSecrets: req.Secrets, + Logger: valuesLogger, } if e := n.pullExecutor.StartPulling(po); e != nil { @@ -157,6 +160,7 @@ func (n NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishV TargetPath: req.TargetPath, VolumeCapability: req.VolumeCapability, ReadOnly: req.Readonly, + Logger: valuesLogger, } if e := n.mountExecutor.StartMounting(o); e != nil { diff --git a/cmd/plugin/node_server_test.go b/cmd/plugin/node_server_test.go index e3ee73d..c27ef4c 100644 --- a/cmd/plugin/node_server_test.go +++ b/cmd/plugin/node_server_test.go @@ -57,6 +57,10 @@ func TestNodePublishVolumeAsync(t *testing.T) { VolumeContext: map[string]string{ // so that the test would always attempt to pull an image ctxKeyPullAlways: "true", + // to see improved logs + "pod-name": "test-pod", + "namespace": "test-namespace", + "uid": "test-uid", }, VolumeCapability: &csi.VolumeCapability{ AccessMode: &csi.VolumeCapability_AccessMode{ @@ -179,6 +183,10 @@ func TestNodePublishVolumeSync(t *testing.T) { VolumeContext: map[string]string{ // so that the test would always attempt to pull an image ctxKeyPullAlways: "true", + // to see improved logs + "pod-name": "test-pod", + "namespace": "test-namespace", + "uid": "test-uid", }, VolumeCapability: &csi.VolumeCapability{ AccessMode: &csi.VolumeCapability_AccessMode{ @@ -361,6 +369,10 @@ func TestMetrics(t *testing.T) { VolumeContext: map[string]string{ // so that the test would always attempt to pull an image ctxKeyPullAlways: "true", + // to see improved logs + "pod-name": "test-pod", + "namespace": "test-namespace", + "uid": "test-uid", }, VolumeCapability: &csi.VolumeCapability{ AccessMode: &csi.VolumeCapability_AccessMode{ @@ -381,6 +393,10 @@ func TestMetrics(t *testing.T) { VolumeContext: map[string]string{ // so that the test would always attempt to pull an image ctxKeyPullAlways: "true", + // to see improved logs + "pod-name": "test-pod", + "namespace": "test-namespace", + "uid": "test-uid", }, VolumeCapability: &csi.VolumeCapability{ AccessMode: &csi.VolumeCapability_AccessMode{ diff --git a/pkg/mountexecutor/mountexecutor.go b/pkg/mountexecutor/mountexecutor.go index 6462187..62548af 100644 --- a/pkg/mountexecutor/mountexecutor.go +++ b/pkg/mountexecutor/mountexecutor.go @@ -37,6 +37,7 @@ type MountOptions struct { TargetPath string VolumeCapability *csi.VolumeCapability ReadOnly bool + Logger klog.Logger } // MountExecutor executes mount @@ -59,10 +60,12 @@ func NewMountExecutor(o *MountExecutorOptions) *MountExecutor { // StartMounting starts the mounting func (m *MountExecutor) StartMounting(o *MountOptions) error { + o.Logger.Info("Mounting image", "image", o.NamedRef.Name()) if pullstatus.Get(o.NamedRef) != pullstatus.Pulled || mountstatus.Get(o.TargetPath) == mountstatus.StillMounting { - klog.Infof("image '%s' hasn't been pulled yet (status: %s) or volume is still mounting (status: %s)", - o.NamedRef.Name(), - pullstatus.Get(o.NamedRef), mountstatus.Get(o.TargetPath)) + o.Logger.Info("Could not mount image because image hasn't finshed pulling or volume is still mounting", + "image", o.NamedRef.Name(), + "pull-status", pullstatus.Get(o.NamedRef), + "mount-status", mountstatus.Get(o.TargetPath)) return nil } @@ -74,11 +77,14 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error { mountstatus.Update(o.TargetPath, mountstatus.StillMounting) startTime := time.Now() if err := m.mounter.Mount(o.Context, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil { + o.Logger.Error(err, "mount error") metrics.OperationErrorsCount.WithLabelValues("StartMounting").Inc() mountstatus.Update(o.TargetPath, mountstatus.Errored) return err } - metrics.ImageMountTime.WithLabelValues(metrics.Sync).Observe(time.Since(startTime).Seconds()) + elapsed := time.Since(startTime) + metrics.ImageMountTime.WithLabelValues(metrics.Sync).Observe(elapsed.Seconds()) + o.Logger.Info("Finished mounting", "image", o.NamedRef.Name(), "mount-duration", elapsed) mountstatus.Update(o.TargetPath, mountstatus.Mounted) return nil } @@ -92,13 +98,15 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error { mountstatus.Update(o.TargetPath, mountstatus.StillMounting) startTime := time.Now() if err := m.mounter.Mount(ctx, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil { - klog.Errorf("mount err: %v", err.Error()) + o.Logger.Error(err, "mount error") metrics.OperationErrorsCount.WithLabelValues("StartMounting").Inc() mountstatus.Update(o.TargetPath, mountstatus.Errored) m.asyncErrs[o.NamedRef] = fmt.Errorf("err: %v: %v", err, m.asyncErrs[o.NamedRef]) return } - metrics.ImageMountTime.WithLabelValues(metrics.Async).Observe(time.Since(startTime).Seconds()) + elapsed := time.Since(startTime) + metrics.ImageMountTime.WithLabelValues(metrics.Async).Observe(elapsed.Seconds()) + o.Logger.Info("Finished mounting", "image", o.NamedRef.Name(), "mount-duration", elapsed) mountstatus.Update(o.TargetPath, mountstatus.Mounted) }() diff --git a/pkg/pullexecutor/pullexecutor.go b/pkg/pullexecutor/pullexecutor.go index 945e32d..301e216 100644 --- a/pkg/pullexecutor/pullexecutor.go +++ b/pkg/pullexecutor/pullexecutor.go @@ -40,6 +40,7 @@ type PullOptions struct { PullAlways bool PullSecrets map[string]string Image string + Logger klog.Logger } // PullExecutor executes the pulls @@ -76,15 +77,19 @@ func (m *PullExecutor) StartPulling(o *PullOptions) error { puller := remoteimage.NewPuller(m.imageSvcClient, o.NamedRef, keyring) shouldPull := o.PullAlways || !m.mounter.ImageExists(o.Context, o.NamedRef) if shouldPull { - klog.Infof("pull image %q ", o.Image) + o.Logger.Info("Pulling image", "image", o.Image) pullstatus.Update(o.NamedRef, pullstatus.StillPulling) startTime := time.Now() if err = puller.Pull(o.Context); err != nil { pullstatus.Update(o.NamedRef, pullstatus.Errored) metrics.OperationErrorsCount.WithLabelValues("StartPulling").Inc() + o.Logger.Error(err, "Unable to pull image", "image", o.NamedRef) return errors.Errorf("unable to pull image %q: %s", o.NamedRef, err) } - metrics.ImagePullTime.WithLabelValues(metrics.Sync).Observe(time.Since(startTime).Seconds()) + elapsed := time.Since(startTime) + metrics.ImagePullTime.WithLabelValues(metrics.Sync).Observe(elapsed.Seconds()) + size := puller.ImageSize(o.Context) + o.Logger.Info("Finished pulling image", "image", o.Image, "pull-duration", elapsed, "image-size", fmt.Sprintf("%.2f MiB", float64(size)/(1024.0*1024.0))) } pullstatus.Update(o.NamedRef, pullstatus.Pulled) return nil @@ -110,17 +115,22 @@ func (m *PullExecutor) StartPulling(o *PullOptions) error { puller := remoteimage.NewPuller(m.imageSvcClient, o.NamedRef, keyring) shouldPull := o.PullAlways || !m.mounter.ImageExists(o.Context, o.NamedRef) if shouldPull { - klog.Infof("pull image %q ", o.Image) + o.Logger.Info("Pulling image asynchronously", "image", o.Image) pullstatus.Update(o.NamedRef, pullstatus.StillPulling) startTime := time.Now() if err = puller.Pull(c); err != nil { pullstatus.Update(o.NamedRef, pullstatus.Errored) metrics.OperationErrorsCount.WithLabelValues("StartPulling").Inc() + o.Logger.Error(err, "Unable to pull image", "image", o.Image) m.asyncErrs[o.NamedRef] = fmt.Errorf("unable to pull image %q: %s", o.Image, err) return } - metrics.ImagePullTime.WithLabelValues(metrics.Async).Observe(time.Since(startTime).Seconds()) + elapsed := time.Since(startTime) + metrics.ImagePullTime.WithLabelValues(metrics.Async).Observe(elapsed.Seconds()) + size := puller.ImageSize(o.Context) + o.Logger.Info("Finished pulling image", "image", o.Image, "pull-duration", elapsed, "image-size", fmt.Sprintf("%.2f MiB", float64(size)/(1024.0*1024.0))) + } pullstatus.Update(o.NamedRef, pullstatus.Pulled) } diff --git a/pkg/remoteimage/pull.go b/pkg/remoteimage/pull.go index 721515c..7ca29d5 100644 --- a/pkg/remoteimage/pull.go +++ b/pkg/remoteimage/pull.go @@ -11,6 +11,7 @@ import ( type Puller interface { Pull(context.Context) error + ImageSize(context.Context) int } func NewPuller(imageSvc cri.ImageServiceClient, image docker.Named, @@ -28,6 +29,16 @@ type puller struct { keyring credentialprovider.DockerKeyring } +// Returns the compressed size of the image that was pulled in bytes +// see https://github.com/containerd/containerd/issues/9261 +func (p puller) ImageSize(ctx context.Context) int { + imageSpec := &cri.ImageSpec{Image: p.image.String()} + imageStatusResponse, _ := p.imageSvc.ImageStatus(ctx, &cri.ImageStatusRequest{ + Image: imageSpec, + }) + return int(imageStatusResponse.Image.Size_) +} + func (p puller) Pull(ctx context.Context) (err error) { repo := p.image.Name() imageSpec := &cri.ImageSpec{Image: p.image.String()}