diff --git a/internal/guest/runtime/hcsv2/nvidia_utils.go b/internal/guest/runtime/hcsv2/nvidia_utils.go index 0146a576f7..7a86fef06c 100644 --- a/internal/guest/runtime/hcsv2/nvidia_utils.go +++ b/internal/guest/runtime/hcsv2/nvidia_utils.go @@ -8,6 +8,7 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "strings" oci "github.com/opencontainers/runtime-spec/specs-go" @@ -19,20 +20,20 @@ import ( "github.com/Microsoft/hcsshim/pkg/annotations" ) -const nvidiaDebugFilePath = "/nvidia-container.log" - +const nvidiaDebugFilePath = "nvidia-container.log" const nvidiaToolBinary = "nvidia-container-cli" // described here: https://github.com/opencontainers/runtime-spec/blob/39c287c415bf86fb5b7506528d471db5405f8ca8/config.md#posix-platform-hooks // addNvidiaDeviceHook builds the arguments for nvidia-container-cli and creates the prestart hook -func addNvidiaDeviceHook(ctx context.Context, spec *oci.Spec) error { +func addNvidiaDeviceHook(ctx context.Context, spec *oci.Spec, ociBundlePath string) error { genericHookBinary := "generichook" genericHookPath, err := exec.LookPath(genericHookBinary) if err != nil { return errors.Wrapf(err, "failed to find %s for container device support", genericHookBinary) } - debugOption := fmt.Sprintf("--debug=%s", nvidiaDebugFilePath) + toolDebugPath := filepath.Join(ociBundlePath, nvidiaDebugFilePath) + debugOption := fmt.Sprintf("--debug=%s", toolDebugPath) args := []string{ genericHookPath, nvidiaToolBinary, @@ -63,8 +64,10 @@ func addNvidiaDeviceHook(ctx context.Context, spec *oci.Spec) error { // add template for pid argument to be injected later by the generic hook binary args = append(args, "--no-cgroups", "--pid={{pid}}", spec.Root.Path) - hookLogDebugFileEnvOpt := fmt.Sprintf("%s=%s", generichook.LogDebugFileEnvKey, nvidiaDebugFilePath) + // setup environment variables for the hook to run in + hookLogDebugFileEnvOpt := fmt.Sprintf("%s=%s", generichook.LogDebugFileEnvKey, toolDebugPath) hookEnv := append(updateEnvWithNvidiaVariables(), hookLogDebugFileEnvOpt) + nvidiaHook := hooks.NewOCIHook(genericHookPath, args, hookEnv) return hooks.AddOCIHook(spec, hooks.CreateRuntime, nvidiaHook) } diff --git a/internal/guest/runtime/hcsv2/uvm.go b/internal/guest/runtime/hcsv2/uvm.go index 8d723421d1..617d2bdf7d 100644 --- a/internal/guest/runtime/hcsv2/uvm.go +++ b/internal/guest/runtime/hcsv2/uvm.go @@ -363,7 +363,7 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM if !ok || sid == "" { return nil, errors.Errorf("unsupported 'io.kubernetes.cri.sandbox-id': '%s'", sid) } - if err := setupWorkloadContainerSpec(ctx, sid, id, settings.OCISpecification); err != nil { + if err := setupWorkloadContainerSpec(ctx, sid, id, settings.OCISpecification, settings.OCIBundlePath); err != nil { return nil, err } diff --git a/internal/guest/runtime/hcsv2/workload_container.go b/internal/guest/runtime/hcsv2/workload_container.go index fdea39e323..28349de5df 100644 --- a/internal/guest/runtime/hcsv2/workload_container.go +++ b/internal/guest/runtime/hcsv2/workload_container.go @@ -93,7 +93,7 @@ func specHasGPUDevice(spec *oci.Spec) bool { return false } -func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.Spec) (err error) { +func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.Spec, ociBundlePath string) (err error) { ctx, span := oc.StartSpan(ctx, "hcsv2::setupWorkloadContainerSpec") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -150,7 +150,7 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci. if spec.Windows != nil { // we only support Nvidia gpus right now if specHasGPUDevice(spec) { - if err := addNvidiaDeviceHook(ctx, spec); err != nil { + if err := addNvidiaDeviceHook(ctx, spec, ociBundlePath); err != nil { return err } }