-
-
Notifications
You must be signed in to change notification settings - Fork 15.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
libnvidia-container: 1.9.0 -> 1.16.2 #347867
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,22 @@ | ||
From 8799541f99785d2bd881561386676fb0985e939e Mon Sep 17 00:00:00 2001 | ||
From: Moritz Sanft <[email protected]> | ||
Date: Thu, 10 Oct 2024 14:32:42 +0200 | ||
Subject: [PATCH] fix library resolving | ||
|
||
Signed-off-by: Moritz Sanft <[email protected]> | ||
--- | ||
src/ldcache.c | 46 +++++++++++++++++----------------------------- | ||
src/ldcache.h | 2 +- | ||
src/nvc_info.c | 10 +++------- | ||
src/nvc_ldcache.c | 2 +- | ||
4 files changed, 22 insertions(+), 38 deletions(-) | ||
|
||
diff --git a/src/ldcache.c b/src/ldcache.c | ||
index 38bab05..e1abc89 100644 | ||
index 38bab055..8cd30a0f 100644 | ||
--- a/src/ldcache.c | ||
+++ b/src/ldcache.c | ||
@@ -108,40 +108,27 @@ ldcache_close(struct ldcache *ctx) | ||
@@ -108,40 +108,28 @@ ldcache_close(struct ldcache *ctx) | ||
|
||
int | ||
ldcache_resolve(struct ldcache *ctx, uint32_t arch, const char *root, const char * const libs[], | ||
- char *paths[], size_t size, ldcache_select_fn select, void *select_ctx) | ||
|
@@ -14,26 +27,17 @@ index 38bab05..e1abc89 100644 | |
- int override; | ||
+ char dir[PATH_MAX]; | ||
+ char lib[PATH_MAX]; | ||
|
||
- h = (struct header_libc6 *)ctx->ptr; | ||
memset(paths, 0, size * sizeof(*paths)); | ||
|
||
- for (uint32_t i = 0; i < h->nlibs; ++i) { | ||
- int32_t flags = h->libs[i].flags; | ||
- char *key = (char *)ctx->ptr + h->libs[i].key; | ||
- char *value = (char *)ctx->ptr + h->libs[i].value; | ||
- | ||
- if (!(flags & LD_ELF) || (flags & LD_ARCH_MASK) != arch) | ||
+ for (size_t j = 0; j < size; ++j) { | ||
+ snprintf(dir, 100, "/run/opengl-driver%s/lib", | ||
+ arch == LD_I386_LIB32 ? "-32" : ""); | ||
+ if (!strncmp(libs[j], "libvdpau_nvidia.so", 100)) | ||
+ strcat(dir, "/vdpau"); | ||
+ snprintf(lib, 100, "%s/%s.%s", dir, libs[j], version); | ||
+ if (path_resolve_full(ctx->err, path, "/", lib) < 0) | ||
+ return (-1); | ||
+ if (!file_exists(ctx->err, path)) | ||
continue; | ||
- continue; | ||
- | ||
- for (size_t j = 0; j < size; ++j) { | ||
- if (!str_has_prefix(key, libs[j])) | ||
|
@@ -52,14 +56,25 @@ index 38bab05..e1abc89 100644 | |
- } | ||
- break; | ||
- } | ||
+ for (size_t j = 0; j < size; ++j) { | ||
+ snprintf(dir, 100, "@driverLink@/lib"); | ||
+ | ||
+ if (!strncmp(libs[j], "libvdpau_nvidia.so", 100)) | ||
+ strcat(dir, "/vdpau"); | ||
+ snprintf(lib, 100, "%s/%s.%s", dir, libs[j], version); | ||
+ if (path_resolve_full(ctx->err, path, "/", lib) < 0) | ||
+ return (-1); | ||
+ if (!file_exists(ctx->err, path)) | ||
+ continue; | ||
+ | ||
+ paths[j] = xstrdup(ctx->err, path); | ||
+ if (paths[j] == NULL) | ||
+ return (-1); | ||
} | ||
return (0); | ||
} | ||
diff --git a/src/ldcache.h b/src/ldcache.h | ||
index 33d78dd..2b087db 100644 | ||
index 33d78dd7..2b087dbc 100644 | ||
--- a/src/ldcache.h | ||
+++ b/src/ldcache.h | ||
@@ -50,6 +50,6 @@ void ldcache_init(struct ldcache *, struct error *, const char *); | ||
|
@@ -68,19 +83,19 @@ index 33d78dd..2b087db 100644 | |
int ldcache_resolve(struct ldcache *, uint32_t, const char *, const char * const [], | ||
- char *[], size_t, ldcache_select_fn, void *); | ||
+ char *[], size_t, const char*); | ||
|
||
#endif /* HEADER_LDCACHE_H */ | ||
diff --git a/src/nvc_info.c b/src/nvc_info.c | ||
index 30e3cfd..6d12a50 100644 | ||
index b7b8adfa..d42f2beb 100644 | ||
--- a/src/nvc_info.c | ||
+++ b/src/nvc_info.c | ||
@@ -167,15 +167,13 @@ find_library_paths(struct error *err, struct nvc_driver_info *info, const char * | ||
@@ -217,15 +217,13 @@ find_library_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_ | ||
if (path_resolve_full(err, path, root, ldcache) < 0) | ||
return (-1); | ||
ldcache_init(&ld, err, path); | ||
- if (ldcache_open(&ld) < 0) | ||
- return (-1); | ||
|
||
info->nlibs = size; | ||
info->libs = array_new(err, size); | ||
if (info->libs == NULL) | ||
|
@@ -89,42 +104,44 @@ index 30e3cfd..6d12a50 100644 | |
- info->libs, info->nlibs, select_libraries_fn, info) < 0) | ||
+ info->libs, info->nlibs, info->nvrm_version) < 0) | ||
goto fail; | ||
|
||
info->nlibs32 = size; | ||
@@ -183,13 +181,11 @@ find_library_paths(struct error *err, struct nvc_driver_info *info, const char * | ||
@@ -233,13 +231,11 @@ find_library_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_ | ||
if (info->libs32 == NULL) | ||
goto fail; | ||
if (ldcache_resolve(&ld, LIB32_ARCH, root, libs, | ||
- info->libs32, info->nlibs32, select_libraries_fn, info) < 0) | ||
+ info->libs32, info->nlibs32, info->nvrm_version) < 0) | ||
goto fail; | ||
rv = 0; | ||
|
||
fail: | ||
- if (ldcache_close(&ld) < 0) | ||
- return (-1); | ||
return (rv); | ||
} | ||
@@ -203,7 +199,7 @@ find_binary_paths(struct error *err, struct nvc_driver_info *info, const char *r | ||
|
||
@@ -253,7 +249,7 @@ find_binary_paths(struct error *err, struct dxcore_context* dxcore, struct nvc_d | ||
char path[PATH_MAX]; | ||
int rv = -1; | ||
|
||
- if ((env = secure_getenv("PATH")) == NULL) { | ||
+ if ((env = "/run/nvidia-docker/bin:/run/nvidia-docker/extras/bin") == NULL) { | ||
error_setx(err, "environment variable PATH not found"); | ||
return (-1); | ||
} | ||
diff --git a/src/nvc_ldcache.c b/src/nvc_ldcache.c | ||
index 6ff380f..cbe6a69 100644 | ||
index db3b2f69..ae5def43 100644 | ||
--- a/src/nvc_ldcache.c | ||
+++ b/src/nvc_ldcache.c | ||
@@ -340,7 +340,7 @@ nvc_ldcache_update(struct nvc_context *ctx, const struct nvc_container *cnt) | ||
@@ -367,7 +367,7 @@ nvc_ldcache_update(struct nvc_context *ctx, const struct nvc_container *cnt) | ||
if (validate_args(ctx, cnt != NULL) < 0) | ||
return (-1); | ||
- argv = (char * []){cnt->cfg.ldconfig, cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; | ||
|
||
- argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; | ||
+ argv = (char * []){cnt->cfg.ldconfig, "-f", "/tmp/ld.so.conf.nvidia-host", "-C", "/tmp/ld.so.cache.nvidia-host", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fwiw this is not guaranteed to be writable either /tmp/ld.so.conf.nvidia-host |
||
if (*argv[0] == '@') { | ||
/* | ||
* We treat this path specially to be relative to the host filesystem. | ||
-- | ||
2.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,27 @@ | ||
{ stdenv | ||
, lib | ||
, addDriverRunpath | ||
, fetchFromGitHub | ||
, pkg-config | ||
, elfutils | ||
, libcap | ||
, libseccomp | ||
, rpcsvc-proto | ||
, libtirpc | ||
, makeWrapper | ||
, substituteAll | ||
, removeReferencesTo | ||
, go | ||
{ | ||
stdenv, | ||
lib, | ||
addDriverRunpath, | ||
fetchFromGitHub, | ||
pkg-config, | ||
elfutils, | ||
libcap, | ||
libseccomp, | ||
rpcsvc-proto, | ||
libtirpc, | ||
makeWrapper, | ||
substituteAll, | ||
removeReferencesTo, | ||
replaceVars, | ||
go, | ||
}: | ||
let | ||
modprobeVersion = "495.44"; | ||
modprobeVersion = "550.54.14"; | ||
nvidia-modprobe = fetchFromGitHub { | ||
owner = "NVIDIA"; | ||
repo = "nvidia-modprobe"; | ||
rev = modprobeVersion; | ||
sha256 = "sha256-Y3ZOfge/EcmhqI19yWO7UfPqkvY1CHHvFC5l9vYyGuU="; | ||
sha256 = "sha256-iBRMkvOXacs/llTtvc/ZC5i/q9gc8lMuUHxMbu8A+Kg="; | ||
}; | ||
modprobePatch = substituteAll { | ||
src = ./modprobe.patch; | ||
|
@@ -28,21 +30,25 @@ let | |
in | ||
stdenv.mkDerivation rec { | ||
pname = "libnvidia-container"; | ||
version = "1.9.0"; | ||
version = "1.16.2"; | ||
|
||
src = fetchFromGitHub { | ||
owner = "NVIDIA"; | ||
repo = pname; | ||
repo = "libnvidia-container"; | ||
rev = "v${version}"; | ||
sha256 = "sha256-7OTawWwjeKU8wIa8I/+aSvAJli4kEua94nJSNyCajpE="; | ||
sha256 = "sha256-hX+2B+0kHiAC2lyo6kwe7DctPLJWgRdbhlc316OO3r8="; | ||
}; | ||
|
||
patches = [ | ||
# locations of nvidia-driver libraries are not resolved via ldconfig which | ||
# doesn't get used on NixOS. Additional support binaries like nvidia-smi | ||
# Locations of nvidia driver libraries are not resolved via ldconfig which | ||
# doesn't get used on NixOS. | ||
# TODO: The latter doesn't really apply anymore. | ||
# Additional support binaries like nvidia-smi | ||
# are not resolved via the environment PATH but via the derivation output | ||
# path. | ||
./libnvc-ldconfig-and-path-fixes.patch | ||
(replaceVars ./fix-library-resolving.patch { | ||
inherit (addDriverRunpath) driverLink; | ||
}) | ||
|
||
# fix bogus struct declaration | ||
./inline-c-struct.patch | ||
|
@@ -54,6 +60,11 @@ stdenv.mkDerivation rec { | |
-e 's/^COMPILER :=.*/COMPILER = $(CC)/' \ | ||
mk/common.mk | ||
|
||
sed -i \ | ||
-e 's/^GIT_TAG ?=.*/GIT_TAG = ${version}/' \ | ||
-e 's/^GIT_COMMIT ?=.*/GIT_COMMIT = ${src.rev}/' \ | ||
Comment on lines
+64
to
+65
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this the same as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (can improve in a follow-up) |
||
versions.mk | ||
|
||
mkdir -p deps/src/nvidia-modprobe-${modprobeVersion} | ||
cp -r ${nvidia-modprobe}/* deps/src/nvidia-modprobe-${modprobeVersion} | ||
chmod -R u+w deps/src | ||
|
@@ -84,12 +95,26 @@ stdenv.mkDerivation rec { | |
HOME="$(mktemp -d)" | ||
''; | ||
|
||
env.NIX_CFLAGS_COMPILE = toString [ "-I${libtirpc.dev}/include/tirpc" ]; | ||
NIX_LDFLAGS = [ "-L${libtirpc.dev}/lib" "-ltirpc" ]; | ||
env.NIX_CFLAGS_COMPILE = toString [ "-I${lib.getInclude libtirpc}/include/tirpc" ]; | ||
NIX_LDFLAGS = [ | ||
"-L${lib.getLib libtirpc}/lib" | ||
"-ltirpc" | ||
]; | ||
|
||
nativeBuildInputs = [ pkg-config go rpcsvc-proto makeWrapper removeReferencesTo ]; | ||
nativeBuildInputs = [ | ||
pkg-config | ||
go | ||
rpcsvc-proto | ||
makeWrapper | ||
removeReferencesTo | ||
]; | ||
|
||
buildInputs = [ elfutils libcap libseccomp libtirpc ]; | ||
buildInputs = [ | ||
elfutils | ||
libcap | ||
libseccomp | ||
libtirpc | ||
]; | ||
|
||
makeFlags = [ | ||
"WITH_LIBELF=yes" | ||
|
@@ -103,10 +128,14 @@ stdenv.mkDerivation rec { | |
postInstall = | ||
let | ||
inherit (addDriverRunpath) driverLink; | ||
libraryPath = lib.makeLibraryPath [ "$out" driverLink "${driverLink}-32" ]; | ||
libraryPath = lib.makeLibraryPath [ | ||
"$out" | ||
driverLink | ||
"${driverLink}-32" | ||
msanft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
]; | ||
in | ||
'' | ||
remove-references-to -t "${go}" $out/lib/libnvidia-container-go.so.1.9.0 | ||
remove-references-to -t "${go}" $out/lib/libnvidia-container-go.so.${version} | ||
wrapProgram $out/bin/nvidia-container-cli --prefix LD_LIBRARY_PATH : ${libraryPath} | ||
''; | ||
disallowedReferences = [ go ]; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I honestly fail to remember what the difference was, but I'll just dump this here for reference: https://github.com/NixOS/nixpkgs/pull/279235/files#diff-2b4dc4504c07052fdeb991c058ab1cd1b3fc215f2475fddab960ebea2db772e7R94-R98. With the original patch, does libnvidia-container still run ldconfig in non-nixos environments?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it would. But I think it's correct that it doesn't. On non NixOS-systems, we'd probably don't want the impurities caused by this neither, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In case of drivers we actually kind of do, because they usually aren't at the nixos' predictable location. The dynamic loading behaviour we want pretty much for all apps is:
@driverLink@/lib
)/etc/ld.so.{cache,conf}
but ensure some kind of isolation (cf. e.g. the libcapsule discussions elsewhere on github and on matrix). We never implemented this last bit in any consistent manner because complications, but in principle it's something to considerThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I think I need to give this another go to actually understand it more thoroughly. The current patch should have the exact same behaviour as the old one, so it would at least not cause any further regressions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gave this a more thorough review and think that
ldconfig
is ran on all systems (NixOS and non-NixOS), but the system cache (i.e./etc/ld.so.conf
and/etc/ld.so.cache
) are not used. Instead, the special/tmp/ld.so.conf.nvidia-host
and/tmp/ld.so.cache.nvidia-host
directories are used for the resolving of nvidia libraries. This should (under the assumption of nothing else using this cache) not cause any impurities on either system. This essentially ensures the precedence of steps 0 through 2 in your list, but not 3. I would also advocate to not implement step 3 here, as I really wouldn't like to maintain such a complex patch (for a library which is unnecessarily complex already).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh this definitely wouldn't be implement here, it's a much larger scale effort.
I think for libcuda.so on non-NixOS we do want an impurity, but let's merge this PR regardless and address failures as they come