From 881c67f5f18dda0a1f64c51daacb7ab66185da71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Tue, 23 Jan 2024 22:53:47 +0000 Subject: [PATCH 01/31] Add option to give nvcc extra arguments --- nvcc4jupyter/parsers.py | 1 + nvcc4jupyter/plugin.py | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/nvcc4jupyter/parsers.py b/nvcc4jupyter/parsers.py index e94afce..76a0392 100644 --- a/nvcc4jupyter/parsers.py +++ b/nvcc4jupyter/parsers.py @@ -19,6 +19,7 @@ def get_parser_cuda() -> argparse.ArgumentParser: parser.add_argument("-t", "--timeit", action="store_true") parser.add_argument("-p", "--profile", action="store_true") parser.add_argument("-a", "--profiler-args", type=str, default="") + parser.add_argument("-c", "--compiler-args", type=str, default="") return parser diff --git a/nvcc4jupyter/plugin.py b/nvcc4jupyter/plugin.py index 269a2cc..413f836 100644 --- a/nvcc4jupyter/plugin.py +++ b/nvcc4jupyter/plugin.py @@ -87,7 +87,10 @@ def _delete_group(self, group_name: str) -> None: shutil.rmtree(group_dirpath) def _compile( - self, group_name: str, executable_fname: str = DEFAULT_EXEC_FNAME + self, + group_name: str, + executable_fname: str = DEFAULT_EXEC_FNAME, + compiler_args: str = "", ) -> str: """ Compiles all source files in a given group together with all source @@ -97,6 +100,7 @@ def _compile( group_name: The name of the source file group to be compiled. executable_fname: The output executable file name. Defaults to "cuda_exec.out". + compiler_args: The optional "nvcc" compiler arguments. Raises: RuntimeError: If the group does not exist or if does not have any @@ -121,18 +125,12 @@ def _compile( executable_fpath = os.path.join(group_dirpath, executable_fname) - args = [ - "nvcc", - "-I" + shared_dirpath + "," + group_dirpath, - ] + args = ["nvcc"] + args.extend(compiler_args.split()) + args.append("-I" + shared_dirpath + "," + group_dirpath) args.extend(source_files) - args.extend( - [ - "-o", - executable_fpath, - "-Wno-deprecated-gpu-targets", - ] - ) + args.extend(["-o", executable_fpath, "-Wno-deprecated-gpu-targets"]) + subprocess.check_output(args, stderr=subprocess.STDOUT) return executable_fpath @@ -188,7 +186,10 @@ def _compile_and_run( self, group_name: str, args: argparse.Namespace ) -> str: try: - exec_fpath = self._compile(group_name) + exec_fpath = self._compile( + group_name=group_name, + compiler_args=args.compiler_args, + ) output = self._run( exec_fpath=exec_fpath, timeit=args.timeit, From 50bc8ff4a650b3b4779518ff63a5eebb918d151b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Tue, 23 Jan 2024 22:55:53 +0000 Subject: [PATCH 02/31] Add test for nvcc options that changes c++ dialect from c++17 to c++14 --- tests/fixtures/compiler/cpp_17.cu | 47 +++++++++++++++++++++++++++++++ tests/fixtures/fixtures.py | 7 ++++- tests/test_plugin.py | 26 +++++++++++++++-- 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/compiler/cpp_17.cu diff --git a/tests/fixtures/compiler/cpp_17.cu b/tests/fixtures/compiler/cpp_17.cu new file mode 100644 index 0000000..6aedd6f --- /dev/null +++ b/tests/fixtures/compiler/cpp_17.cu @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +#include + +struct S { + int n; + std::string s; + float d; + bool operator<(const S& rhs) const + { + // compares n to rhs.n, + // then s to rhs.s, + // then d to rhs.d + return std::tie(n, s, d) < std::tie(rhs.n, rhs.s, rhs.d); + } +}; + +int main() +{ + std::set mySet; + + // pre C++17: + { + S value{42, "Test", 3.14}; + std::set::iterator iter; + bool inserted; + + // unpacks the return val of insert into iter and inserted + std::tie(iter, inserted) = mySet.insert(value); + + if (inserted) + std::cout << "Value was inserted\n"; + } + + // with C++17: + { + S value{100, "abc", 100.0}; + const auto [iter, inserted] = mySet.insert(value); + + if (inserted) + std::cout << "Value(" << iter->n << ", " << iter->s << ", ...) was inserted" << "\n"; + } +} diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py index 93b88fb..a1d4e17 100644 --- a/tests/fixtures/fixtures.py +++ b/tests/fixtures/fixtures.py @@ -27,10 +27,15 @@ def fixtures_path(tests_path): return os.path.join(tests_path, "fixtures") +@pytest.fixture(scope="session") +def compiler_cpp_17_fpath(fixtures_path: str): + return os.path.join(fixtures_path, "compiler", "cpp_17.cu") + + @pytest.fixture(scope="session") def sample_magic_cu_line(): # fmt: off - return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"' # noqa: E501 + return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum" --compiler-args "--optimize 3"' # noqa: E501 # fmt: on diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 05d340e..2d6dbcb 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -3,6 +3,7 @@ import os import re import shutil +import subprocess from typing import List import pytest @@ -88,6 +89,21 @@ def test_compile( plugin._compile(gname) +def test_compile_args( + plugin: NVCCPlugin, + compiler_cpp_17_fpath: str, +): + gname = "test_compile_args" + copy_source_to_group(compiler_cpp_17_fpath, gname, plugin.workdir) + + exec_fpath = plugin._compile(gname, compiler_args="--std c++17") + assert os.path.exists(exec_fpath) + + # should fail due to the source file having c++ 17 features + with pytest.raises(subprocess.CalledProcessError): + exec_fpath = plugin._compile(gname, compiler_args="--std c++14") + + def test_run( plugin: NVCCPlugin, sample_cuda_fpath: str, @@ -143,7 +159,10 @@ def test_compile_and_run_multiple_files( for fpath in multiple_source_fpaths: copy_source_to_group(fpath, gname, plugin.workdir) output = plugin._compile_and_run( - gname, argparse.Namespace(timeit=False, profile=True, profiler_args="") + group_name=gname, + args=argparse.Namespace( + timeit=False, profile=True, profiler_args="", compiler_args="" + ), ) check_profiler_output(output) @@ -165,7 +184,10 @@ def test_compile_and_run_multiple_files_shared( else: copy_source_to_group(fpath, "shared", plugin.workdir) output = plugin._compile_and_run( - gname, argparse.Namespace(timeit=False, profile=True, profiler_args="") + group_name=gname, + args=argparse.Namespace( + timeit=False, profile=True, profiler_args="", compiler_args="" + ), ) check_profiler_output(output) From 595e450eb9777be5cadece782ef91deddd506905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Tue, 23 Jan 2024 22:57:59 +0000 Subject: [PATCH 03/31] Add make and the english language pack to devcontainer to be able to build the documentation --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9088efc..092a2d1 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -4,7 +4,7 @@ ARG VENV_PATH=/opt/dev-venv ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate RUN apt update -RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git +RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git make language-pack-en # the mkdir command bypasses a profiler error, which allows us to run it with # host code only to at least check that the profiler parameters are correctly From 405c16efb3932587801b7ef2e17fba5607b1abaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Tue, 23 Jan 2024 22:58:56 +0000 Subject: [PATCH 04/31] Update documentation config to automatically import the current version of the package --- docs/source/conf.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 665059c..2e5d3b3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -6,11 +6,18 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import os +import sys + +sys.path.append(os.path.join("..", "..")) +from nvcc4jupyter.__init__ import __version__ # noqa: E402 + project = "nvcc4jupyter" copyright = "2024, Andrei Nechaev & Cosmin Stefan Ciocan" author = "Andrei Nechaev & Cosmin Stefan Ciocan" -release = "1.0.1" -version = "1.0.1" +release = __version__ +version = __version__ + # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration From 65eca38a67ecb74e91b848336642dbdf13a503bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Tue, 23 Jan 2024 23:01:38 +0000 Subject: [PATCH 05/31] Document new --compiler-args argument --- docs/source/magics.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/source/magics.rst b/docs/source/magics.rst index 2073f35..58823a5 100644 --- a/docs/source/magics.rst +++ b/docs/source/magics.rst @@ -21,6 +21,7 @@ Usage - ``%%cuda``: Compile and run this cell. - ``%%cuda -p``: Also runs the Nsight Compute profiler. - ``%%cuda -p -a ""``: Also runs the Nsight Compute profiler. + - ``%%cude -c "`_ +-c, --compiler-args + String. Optional compiler arguments that can be space separated + by wrapping them in double quotes. They will be passed to "nvcc". + See all options here: + `NVCC Options `_ + + .. note:: If both "\-\-profile" and "\-\-timeit" are used then no profiling is done. @@ -47,10 +55,11 @@ Examples -------- :: - # compile, run, and profile the code in the cell with the Nsight - # compute profiler while collecting only metrics from the - # "MemoryWorkloadAnalysis" section. - %%cuda --profile --profiler-args "--section MemoryWorkloadAnalysis" + # compile, run, and profile the code in the cell with the Nsight compute + # profiler while collecting only metrics from the "MemoryWorkloadAnalysis" + # section; also provides the "--optimize 3" option to "nvcc" during + # compilation to optimize host code + %%cuda -p -a "--section MemoryWorkloadAnalysis" -c "--optimize 3" ------ From 6236fe2b1eae72629ede617118568af644c37d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Tue, 23 Jan 2024 23:44:36 +0000 Subject: [PATCH 06/31] Improve tests coverage by testing for bad arguments and the error output during a failed compilation --- tests/test_plugin.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 2d6dbcb..674392d 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -103,6 +103,17 @@ def test_compile_args( with pytest.raises(subprocess.CalledProcessError): exec_fpath = plugin._compile(gname, compiler_args="--std c++14") + output = plugin._compile_and_run( + group_name=gname, + args=argparse.Namespace( + timeit=False, + profile=True, + profiler_args="", + compiler_args="--std c++14", + ), + ) + assert "errors detected in the compilation of" in output + def test_run( plugin: NVCCPlugin, @@ -213,6 +224,16 @@ def test_magic_cuda( check_profiler_output(capsys.readouterr().out) +def test_magic_cuda_bad_args( + capsys, + plugin: NVCCPlugin, + sample_cuda_code: str, +): + plugin.cuda("--this-is-an-unrecognized-argument", sample_cuda_code) + output = capsys.readouterr().out + assert output.startswith("usage: ") + + def test_magic_cuda_group_save(plugin: NVCCPlugin, sample_cuda_code: str): gname = "test_save_source" sname = "sample.cu" From 639624be79e202ed9e9761a939b7ae1f060c0974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Wed, 24 Jan 2024 00:17:35 +0000 Subject: [PATCH 07/31] Add IPython to docs requirements to allow the __version__ import for readthedocs env --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 53fc1f3..4a750cb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ sphinx==7.1.2 sphinx-rtd-theme==1.3.0rc1 +IPython>=8.19.0 From 36fc282eed92b54c443cb1e03db70ef98c786d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:11:23 +0000 Subject: [PATCH 08/31] Change devcontainer base image to have the latest CUDA toolkit --- .devcontainer/Dockerfile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 092a2d1..349e764 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,10 +1,19 @@ -FROM ubuntu +FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 ARG VENV_PATH=/opt/dev-venv ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate +ENV DEBIAN_FRONTEND="noninteractive" RUN apt update -RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git make language-pack-en +RUN apt install -y \ + gcc \ + git \ + language-pack-en \ + libopencv-dev \ + make \ + pkg-config \ + python3.10-venv \ + vim # the mkdir command bypasses a profiler error, which allows us to run it with # host code only to at least check that the profiler parameters are correctly From b49062e9e2235ae4e36364aacd7677bab2efca87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:25:58 +0000 Subject: [PATCH 09/31] Mock the nsight compute tool with a bash script --- .devcontainer/Dockerfile | 5 ----- tests/fixtures/fixtures.py | 5 +++++ tests/fixtures/scripts/ncu | 7 +++++++ tests/test_plugin.py | 5 +++++ 4 files changed, 17 insertions(+), 5 deletions(-) create mode 100755 tests/fixtures/scripts/ncu diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 349e764..0f46540 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -15,11 +15,6 @@ RUN apt install -y \ python3.10-venv \ vim -# the mkdir command bypasses a profiler error, which allows us to run it with -# host code only to at least check that the profiler parameters are correctly -# provided; without this line, some tests will fail -RUN mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections - # we create the virtualenv here so that the devcontainer.json setting # python.defaultInterpreterPath can be used to find it; if we do it in the # post_create.sh script, the virtualenv will not be loaded and features like diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py index a1d4e17..6e50541 100644 --- a/tests/fixtures/fixtures.py +++ b/tests/fixtures/fixtures.py @@ -27,6 +27,11 @@ def fixtures_path(tests_path): return os.path.join(tests_path, "fixtures") +@pytest.fixture(scope="session") +def scripts_path(fixtures_path: str): + return os.path.join(fixtures_path, "scripts") + + @pytest.fixture(scope="session") def compiler_cpp_17_fpath(fixtures_path: str): return os.path.join(fixtures_path, "compiler", "cpp_17.cu") diff --git a/tests/fixtures/scripts/ncu b/tests/fixtures/scripts/ncu new file mode 100755 index 0000000..4d059e5 --- /dev/null +++ b/tests/fixtures/scripts/ncu @@ -0,0 +1,7 @@ +#!/bin/bash + +# this is a mock of nsight compute cli tool that just executes the program +# given as the last argument +"${@: -1}" + +echo "==WARNING== No kernels were profiled" diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 674392d..7448adc 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -37,6 +37,11 @@ def copy_source_to_group( return destination_fpath +@pytest.fixture(autouse=True, scope="session") +def before_all(scripts_path: str): + os.environ["PATH"] = scripts_path + os.pathsep + os.environ["PATH"] + + @pytest.fixture(autouse=True, scope="function") def before_each(plugin: NVCCPlugin): shutil.rmtree(plugin.workdir, ignore_errors=True) # before test From c1fbc0660454330b1d7663bb1f57e43b5de2eb67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:30:32 +0000 Subject: [PATCH 10/31] Add test to compile with opencv --- tests/fixtures/compiler/opencv.cu | 8 ++++++++ tests/fixtures/fixtures.py | 5 +++++ tests/test_plugin.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 tests/fixtures/compiler/opencv.cu diff --git a/tests/fixtures/compiler/opencv.cu b/tests/fixtures/compiler/opencv.cu new file mode 100644 index 0000000..75380ee --- /dev/null +++ b/tests/fixtures/compiler/opencv.cu @@ -0,0 +1,8 @@ +#include +#include + +int main(int argc, char** argv) +{ + std::cout << cv::getBuildInformation() << std::endl; + return 0; +} diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py index 6e50541..ca8248d 100644 --- a/tests/fixtures/fixtures.py +++ b/tests/fixtures/fixtures.py @@ -37,6 +37,11 @@ def compiler_cpp_17_fpath(fixtures_path: str): return os.path.join(fixtures_path, "compiler", "cpp_17.cu") +@pytest.fixture(scope="session") +def compiler_opencv_fpath(fixtures_path: str): + return os.path.join(fixtures_path, "compiler", "opencv.cu") + + @pytest.fixture(scope="session") def sample_magic_cu_line(): # fmt: off diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 7448adc..e290f91 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -120,6 +120,36 @@ def test_compile_args( assert "errors detected in the compilation of" in output +def test_compile_opencv( + plugin: NVCCPlugin, + compiler_opencv_fpath: str, +): + gname = "test_compile_opencv" + copy_source_to_group(compiler_opencv_fpath, gname, plugin.workdir) + + # check that "pkg-config" exists + assert subprocess.check_call(["which", "pkg-config"]) == 0 + + opencv_compile_options = ( + subprocess.check_output( + args=["pkg-config", "--cflags", "--libs", "opencv4"] + ) + .decode() + .strip() + ) + + output = plugin._compile_and_run( + group_name=gname, + args=argparse.Namespace( + timeit=False, + profile=True, + profiler_args="", + compiler_args=opencv_compile_options, + ), + ) + assert "General configuration for OpenCV" in output + + def test_run( plugin: NVCCPlugin, sample_cuda_fpath: str, From bc9162097130006897938d8733d93b7bf80fe9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:22:29 +0000 Subject: [PATCH 11/31] Add new page to documentation that contains a new notebook that explains compiling with external libraries --- docs/source/index.rst | 1 + docs/source/magics.rst | 8 ++++++++ docs/source/notebooks.rst | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 docs/source/notebooks.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 1f07bdd..3ed1746 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,4 +10,5 @@ which provides CUDA capable GPUs with the CUDA toolkit already installed. :caption: Contents: usage + notebooks magics diff --git a/docs/source/magics.rst b/docs/source/magics.rst index 58823a5..28a3bf1 100644 --- a/docs/source/magics.rst +++ b/docs/source/magics.rst @@ -27,19 +27,27 @@ Usage Options ------- +.. _timeit: + -t, --timeit Boolean. If set, returns the output of the "timeit" built-in ipython magic instead of stdout. +.. _profile: + -p, --profile Boolean. If set, runs the NVIDIA Nsight Compute profiler whose output is appended to standard output. +.. _profiler_args: + -a, --profiler-args String. Optional profiler arguments that can be space separated by wrapping them in double quotes. See all options here: `Nsight Compute CLI `_ +.. _compiler_args: + -c, --compiler-args String. Optional compiler arguments that can be space separated by wrapping them in double quotes. They will be passed to "nvcc". diff --git a/docs/source/notebooks.rst b/docs/source/notebooks.rst new file mode 100644 index 0000000..a662ef4 --- /dev/null +++ b/docs/source/notebooks.rst @@ -0,0 +1,34 @@ +********* +Notebooks +********* + +This page provides a list of useful Jupyter notebooks written with the +**nvcc4jupyter** library. + +.. note:: + These notebooks are written for Google's Colab, but you may run them in + other environments by installing all expected dependencies. If running in + Colab, make sure to set the runtime type to a GPU instance (at the time of + writing this, T4 is the GPU offered for free by Colab). + +------ + +.. _compiling_with_external_libraries: + +Compiling with external libraries +================================= + +[`NOTEBOOK `_] + +If you need to compile CUDA C++ code that uses external libraries in the host +code (e.g. OpenCV for reading and writing images to disk) then this section is +for you. + +To achieve this, use the :ref:`compiler-args ` option of the +:ref:`cuda ` magic command to pass the correct compiler options +of the OpenCV library to **nvcc** for it to link the OpenCV code with the +code in your Jupyter cell. Those compiler options can be provided by the +`pkg-config `_ tool. + +In the notebook we show how to use OpenCV to load an image, blur it with a CUDA +kernel, and then save it back to disk using OpenCV again. From e9f131a67856d15f12f76621be885858fd70e016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 00:41:39 +0000 Subject: [PATCH 12/31] Add autodocstring vscode extension to devcontainer --- .devcontainer/devcontainer.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c6e997c..ad02373 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -16,10 +16,12 @@ "ms-python.isort", "ms-python.flake8", "ms-python.black-formatter", - "ryanluker.vscode-coverage-gutters" + "ryanluker.vscode-coverage-gutters", + "njpwerner.autodocstring" ], "settings": { - "python.defaultInterpreterPath": "/opt/dev-venv/bin/python" + "python.defaultInterpreterPath": "/opt/dev-venv/bin/python", + "autoDocstring.docstringFormat": "google-notypes" } } } From b3c015ae741aa47c952c8b12c519551196c3b44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 01:40:47 +0000 Subject: [PATCH 13/31] Add function that modifies the default profiler/compiler arguments to allow reusing them in multiple magic command calls --- docs/source/usage.rst | 44 ++++++++++++++++++++++++++++++++++ nvcc4jupyter/__init__.py | 1 + nvcc4jupyter/parsers.py | 51 ++++++++++++++++++++++++++++++++++++++-- nvcc4jupyter/plugin.py | 4 ++-- tests/test_plugin.py | 49 +++++++++++++++++++++++++++++++------- 5 files changed, 137 insertions(+), 12 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 38ff35c..50fe879 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -255,3 +255,47 @@ Running the cell above will compile and execute the vector addition code in the SM Active Cycles cycle 383.65 Compute (SM) Throughput % 1.19 ----------------------- ------------- ------------ + +Compiler arguments +------------------ + +In the same way profiler arguments can be passed to the profiling tool, +compiling arguments can be passed to **nvcc**: + +.. code-block:: c++ + + %cuda_group_run --group "vector_add" --compiler-args "--optimize 3" + +Running the cell above will compile and execute the vector addition code in the +"vector_add" group. During compilation, **nvcc** receives the "\-\-optimize" +option which specifies the optimization level for host code. + +Set default arguments +--------------------- + +In the case where you execute multiple magic commands with the same compiler or +profiler arguments you can avoid writing them every time by setting the default +arguments: + +.. code-block:: python + + from nvcc4jupyter import set_defaults + set_defaults(compiler_args="--optimize 3", profiler_args="--section SpeedOfLight") + +The same effect can be achieved by running "set_defaults" once for each config +due to the fact that the default value is not changed if an a value is not +given to the "set_defaults" function. + +.. code-block:: python + + from nvcc4jupyter import set_defaults + set_defaults(compiler_args="--optimize 3") + set_defaults(profiler_args="--section SpeedOfLight") + + +Now we can run the following cell without specifying the compiler and profiler +arguments once again. + +.. code-block:: c++ + + %cuda_group_run --group "vector_add" --profile diff --git a/nvcc4jupyter/__init__.py b/nvcc4jupyter/__init__.py index 97b8902..41d6337 100644 --- a/nvcc4jupyter/__init__.py +++ b/nvcc4jupyter/__init__.py @@ -2,6 +2,7 @@ nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook """ +from .parsers import set_defaults # noqa: F401 from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401 __version__ = "1.0.3" diff --git a/nvcc4jupyter/parsers.py b/nvcc4jupyter/parsers.py index 76a0392..a35e49f 100644 --- a/nvcc4jupyter/parsers.py +++ b/nvcc4jupyter/parsers.py @@ -3,6 +3,39 @@ """ import argparse +from typing import Callable, Optional + +_default_profiler_args: str = "" +_default_compiler_args: str = "" + + +def set_defaults( + compiler_args: Optional[str] = None, profiler_args: Optional[str] = None +) -> None: + """ + Set the default values for various arguments of the magic commands. These + values will be used if the user does not explicitly provide those arguments + to override this behaviour on a cell by cell basis. + + Args: + compiler_args: If not None, this value becomes the new default compiler + config. Defaults to "". + profiler_args: If not None, this value becomes the new default profiler + config. Defaults to "". + """ + + # pylint: disable=global-statement + global _default_compiler_args + global _default_profiler_args + if compiler_args is not None: + _default_compiler_args = compiler_args + if profiler_args is not None: + _default_profiler_args = profiler_args + + +def str_to_lambda(arg: str) -> Callable[[], str]: + """Convert argparse string to lambda""" + return lambda: arg def get_parser_cuda() -> argparse.ArgumentParser: @@ -18,8 +51,22 @@ def get_parser_cuda() -> argparse.ArgumentParser: ) parser.add_argument("-t", "--timeit", action="store_true") parser.add_argument("-p", "--profile", action="store_true") - parser.add_argument("-a", "--profiler-args", type=str, default="") - parser.add_argument("-c", "--compiler-args", type=str, default="") + + # --profiler-args and --compiler-args values are lambda functions to allow + # changing the default value at runtime + parser.add_argument( + "-a", + "--profiler-args", + type=str_to_lambda, + default=lambda: _default_profiler_args, + ) + parser.add_argument( + "-c", + "--compiler-args", + type=str_to_lambda, + default=lambda: _default_compiler_args, + ) + return parser diff --git a/nvcc4jupyter/plugin.py b/nvcc4jupyter/plugin.py index 413f836..1da4f63 100644 --- a/nvcc4jupyter/plugin.py +++ b/nvcc4jupyter/plugin.py @@ -188,13 +188,13 @@ def _compile_and_run( try: exec_fpath = self._compile( group_name=group_name, - compiler_args=args.compiler_args, + compiler_args=args.compiler_args(), ) output = self._run( exec_fpath=exec_fpath, timeit=args.timeit, profile=args.profile, - profiler_args=args.profiler_args, + profiler_args=args.profiler_args(), ) except subprocess.CalledProcessError as e: output = e.output.decode("utf8") diff --git a/tests/test_plugin.py b/tests/test_plugin.py index e290f91..4c6120b 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -8,6 +8,7 @@ import pytest +from nvcc4jupyter.parsers import get_parser_cuda, set_defaults from nvcc4jupyter.plugin import NVCCPlugin @@ -44,9 +45,12 @@ def before_all(scripts_path: str): @pytest.fixture(autouse=True, scope="function") def before_each(plugin: NVCCPlugin): - shutil.rmtree(plugin.workdir, ignore_errors=True) # before test + # BEFORE TESTS + set_defaults(compiler_args="", profiler_args="") + shutil.rmtree(plugin.workdir, ignore_errors=True) yield - pass # after test + # AFTER TESTS + pass def test_save_source(plugin: NVCCPlugin, sample_cuda_code: str) -> None: @@ -113,8 +117,8 @@ def test_compile_args( args=argparse.Namespace( timeit=False, profile=True, - profiler_args="", - compiler_args="--std c++14", + profiler_args=lambda: "", + compiler_args=lambda: "--std c++14", ), ) assert "errors detected in the compilation of" in output @@ -143,8 +147,8 @@ def test_compile_opencv( args=argparse.Namespace( timeit=False, profile=True, - profiler_args="", - compiler_args=opencv_compile_options, + profiler_args=lambda: "", + compiler_args=lambda: opencv_compile_options, ), ) assert "General configuration for OpenCV" in output @@ -207,7 +211,10 @@ def test_compile_and_run_multiple_files( output = plugin._compile_and_run( group_name=gname, args=argparse.Namespace( - timeit=False, profile=True, profiler_args="", compiler_args="" + timeit=False, + profile=True, + profiler_args=lambda: "", + compiler_args=lambda: "", ), ) check_profiler_output(output) @@ -232,7 +239,10 @@ def test_compile_and_run_multiple_files_shared( output = plugin._compile_and_run( group_name=gname, args=argparse.Namespace( - timeit=False, profile=True, profiler_args="", compiler_args="" + timeit=False, + profile=True, + profiler_args=lambda: "", + compiler_args=lambda: "", ), ) check_profiler_output(output) @@ -249,6 +259,29 @@ def test_read_args(plugin: NVCCPlugin): assert math.isclose(args.b, 0.75) +def test_set_defaults(): + parser = get_parser_cuda() + args = parser.parse_args([]) + assert args.profiler_args() == "" + assert args.compiler_args() == "" + set_defaults(profiler_args="123") + args = parser.parse_args([]) + assert args.profiler_args() == "123" + assert args.compiler_args() == "" + set_defaults(compiler_args="456") + args = parser.parse_args([]) + assert args.profiler_args() == "123" + assert args.compiler_args() == "456" + set_defaults(profiler_args="") + args = parser.parse_args([]) + assert args.profiler_args() == "" + assert args.compiler_args() == "456" + set_defaults(profiler_args="123") + args = parser.parse_args(["--profiler-args", "789"]) + assert args.profiler_args() == "789" + assert args.compiler_args() == "456" + + def test_magic_cuda( capsys, plugin: NVCCPlugin, From 33801a3491fa27d2486b6a989db7962759b7544b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 01:42:17 +0000 Subject: [PATCH 14/31] Update pylint exceptions --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 71966ef..2bc6d1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -286,6 +286,6 @@ deprecated-modules="optparse,tkinter.tix" [tool.pylint.'EXCEPTIONS'] overgeneral-exceptions= [ - "BaseException", - "Exception" + "builtins.BaseException", + "builtins.Exception" ] From a3f4f319621bebe4a9b234ec36a2a9bb545d22c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 01:57:09 +0000 Subject: [PATCH 15/31] Update contributing instructions --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ff6d5f9..cfdbee2 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Here are just a few of the things that nvcc4jupyter does well: - [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world) - [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling) + - [Compile your code with external libraries (e.g. OpenCV)](https://nvcc4jupyter.readthedocs.io/en/latest/notebooks.html#compiling-with-external-libraries) - [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups) ## Install @@ -88,13 +89,14 @@ The official documentation is hosted on [readthedocs](https://nvcc4jupyter.readt ## Contributing -Install the package with the development dependencies: -```bash -pip install .[dev] -``` +The recommended setup for development is using the devcontainer in GitHub +Codespaces or locally in VSCode. -As a developer, make sure you install the pre-commit hook before commiting any changes: +If not using the devcontainer you need to install the package with the +development dependencies and install the pre-commit hook before commiting any +changes: ```bash +pip install .[dev] pre-commit install ``` From 9663c74598150d7a4248a1a9854a5c48ee7a1068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 02:04:34 +0000 Subject: [PATCH 16/31] Change version from 1.0.3 to 1.1.0 due to adding features in a backward-compatible manner --- nvcc4jupyter/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nvcc4jupyter/__init__.py b/nvcc4jupyter/__init__.py index 41d6337..356eb20 100644 --- a/nvcc4jupyter/__init__.py +++ b/nvcc4jupyter/__init__.py @@ -5,4 +5,4 @@ from .parsers import set_defaults # noqa: F401 from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401 -__version__ = "1.0.3" +__version__ = "1.1.0" From aaaa2605e1c7747859a365f07b6f50e70a71fe92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 13:26:33 +0000 Subject: [PATCH 17/31] Install latest CUDA toolkit on the test runner to pass the OpenCV compilation test --- .github/workflows/test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6fd78e1..501e7e5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,14 +27,14 @@ jobs: with: python-version: ${{ matrix.python-version }} - # the mkdir command bypasses a profiler error, which allows us to run it - # with host code only to at least check that the profiler parameters are - # correctly provided - - name: Install CUDA tools + - name: Install CUDA toolkit run: | sudo apt update - sudo apt install nvidia-cuda-toolkit - sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections + sudo apt install -y wget + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get -y install cuda-toolkit-12-3 - name: Install Python dependencies run: | From 28637d5c643c04db3c3d0932cdce93ce728041c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 13:35:16 +0000 Subject: [PATCH 18/31] Install opencv in test runner and update code coverage install --- .github/workflows/test.yml | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 501e7e5..3bceaa8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,6 +36,10 @@ jobs: sudo apt-get update sudo apt-get -y install cuda-toolkit-12-3 + - name: Install OpenCV + run: | + sudo apt install -y libopencv-dev pkg-config + - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -65,11 +69,18 @@ jobs: with: python-version: "3.10" - - name: Install CUDA tools + - name: Install CUDA toolkit run: | sudo apt update - sudo apt install nvidia-cuda-toolkit - sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections + sudo apt install -y wget + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get -y install cuda-toolkit-12-3 + + - name: Install OpenCV + run: | + sudo apt install -y libopencv-dev pkg-config - name: Install Python dependencies run: | From 863cdcfa17ba7150c7a0aff23c0687d7766cde6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 13:42:22 +0000 Subject: [PATCH 19/31] Add CUDA bin to PATH in test and coverage runners --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3bceaa8..3f6ef1b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,7 @@ jobs: sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-12-3 + export PATH="$PATH:/usr/local/cuda/bin" - name: Install OpenCV run: | @@ -77,6 +78,7 @@ jobs: sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-12-3 + export PATH="$PATH:/usr/local/cuda/bin" - name: Install OpenCV run: | From 2614c92b20909880e6da1fac578803c0b6af28b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 13:49:00 +0000 Subject: [PATCH 20/31] Add cuda bin to path variable in .bashrc --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3f6ef1b..bec2d28 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,7 +35,7 @@ jobs: sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-12-3 - export PATH="$PATH:/usr/local/cuda/bin" + echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc - name: Install OpenCV run: | @@ -78,7 +78,7 @@ jobs: sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-12-3 - export PATH="$PATH:/usr/local/cuda/bin" + echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc - name: Install OpenCV run: | From 27b045b7828f92a8c39cf555489a41af19d015ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 14:35:06 +0000 Subject: [PATCH 21/31] Update way to set environment variable PATH in github action --- .github/workflows/test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bec2d28..9eeb8cb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,9 +33,9 @@ jobs: sudo apt install -y wget wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb - sudo apt-get update - sudo apt-get -y install cuda-toolkit-12-3 - echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc + sudo apt update + sudo apt -y install cuda-toolkit-12-3 + echo "PATH=$PATH:/usr/local/cuda/bin" >> $GITHUB_ENV - name: Install OpenCV run: | @@ -76,9 +76,9 @@ jobs: sudo apt install -y wget wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb - sudo apt-get update - sudo apt-get -y install cuda-toolkit-12-3 - echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc + sudo apt update + sudo apt -y install cuda-toolkit-12-3 + echo "PATH=$PATH:/usr/local/cuda/bin" >> $GITHUB_ENV - name: Install OpenCV run: | From ee9aa3dba3de5ec577213537544ade6cffdff4d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Sat, 27 Jan 2024 14:45:16 +0000 Subject: [PATCH 22/31] Change devcontainer base image back to ubuntu:22.04 to match the environment from the test runner --- .devcontainer/Dockerfile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 0f46540..f5e11b2 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,17 +1,27 @@ -FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 +FROM ubuntu:22.04 ARG VENV_PATH=/opt/dev-venv ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate ENV DEBIAN_FRONTEND="noninteractive" +# install the latest CUDA toolkit (https://developer.nvidia.com/cuda-downloads) RUN apt update +RUN apt install -y wget +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb +RUN dpkg -i cuda-keyring_1.1-1_all.deb +RUN apt update +RUN apt -y install cuda-toolkit-12-3 +RUN echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc + +# install OpenCV to test compilation with external libraries +RUN apt install -y libopencv-dev pkg-config + +# make & language-pack-en are for documentation RUN apt install -y \ gcc \ git \ language-pack-en \ - libopencv-dev \ make \ - pkg-config \ python3.10-venv \ vim From 8d39ce01c36e5a75141bacb19a593c054dbd4a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:46:45 +0000 Subject: [PATCH 23/31] Add option to choose between NSYS and NCU profilers --- nvcc4jupyter/__init__.py | 2 +- nvcc4jupyter/parsers.py | 42 ++++++++++++++++++++++++++++++++++------ nvcc4jupyter/plugin.py | 11 +++++++---- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/nvcc4jupyter/__init__.py b/nvcc4jupyter/__init__.py index 356eb20..87f5779 100644 --- a/nvcc4jupyter/__init__.py +++ b/nvcc4jupyter/__init__.py @@ -2,7 +2,7 @@ nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook """ -from .parsers import set_defaults # noqa: F401 +from .parsers import Profiler, set_defaults # noqa: F401 from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401 __version__ = "1.1.0" diff --git a/nvcc4jupyter/parsers.py b/nvcc4jupyter/parsers.py index a35e49f..fb626c2 100644 --- a/nvcc4jupyter/parsers.py +++ b/nvcc4jupyter/parsers.py @@ -3,14 +3,28 @@ """ import argparse -from typing import Callable, Optional +from enum import Enum +from typing import Callable, Optional, Type, TypeVar + +class Profiler(Enum): + """Choice between Nsight Compute and Nsight Systems profilers.""" + + NCU = "ncu" + NSYS = "nsys" + + +_default_profiler: Profiler = Profiler.NCU _default_profiler_args: str = "" _default_compiler_args: str = "" +T = TypeVar("T") + def set_defaults( - compiler_args: Optional[str] = None, profiler_args: Optional[str] = None + profiler: Optional[Profiler] = None, + compiler_args: Optional[str] = None, + profiler_args: Optional[str] = None, ) -> None: """ Set the default values for various arguments of the magic commands. These @@ -18,17 +32,22 @@ def set_defaults( to override this behaviour on a cell by cell basis. Args: + profiler: If not None, this value becomes the new default profiler. + Defaults to None. compiler_args: If not None, this value becomes the new default compiler - config. Defaults to "". + config. Defaults to None. profiler_args: If not None, this value becomes the new default profiler - config. Defaults to "". + config. Defaults to None. """ # pylint: disable=global-statement + global _default_profiler + if profiler is not None: + _default_profiler = profiler global _default_compiler_args - global _default_profiler_args if compiler_args is not None: _default_compiler_args = compiler_args + global _default_profiler_args if profiler_args is not None: _default_profiler_args = profiler_args @@ -38,6 +57,11 @@ def str_to_lambda(arg: str) -> Callable[[], str]: return lambda: arg +def class_to_lambda(arg: str, cls: Type[T]) -> Callable[[], T]: + """Convert string value to class and then to lambda""" + return lambda: cls(arg) + + def get_parser_cuda() -> argparse.ArgumentParser: """ %%cuda magic command parser. @@ -52,8 +76,14 @@ def get_parser_cuda() -> argparse.ArgumentParser: parser.add_argument("-t", "--timeit", action="store_true") parser.add_argument("-p", "--profile", action="store_true") - # --profiler-args and --compiler-args values are lambda functions to allow + # the type of the following arguments is a lambda lambda function to allow # changing the default value at runtime + parser.add_argument( + "-l", + "--profiler", + type=lambda arg: class_to_lambda(arg, cls=Profiler), + default=lambda: _default_profiler, + ) parser.add_argument( "-a", "--profiler-args", diff --git a/nvcc4jupyter/plugin.py b/nvcc4jupyter/plugin.py index 1da4f63..56f32b5 100644 --- a/nvcc4jupyter/plugin.py +++ b/nvcc4jupyter/plugin.py @@ -135,11 +135,12 @@ def _compile( return executable_fpath - def _run( + def _run( # pylint: disable=too-many-arguments self, exec_fpath: str, timeit: bool = False, profile: bool = False, + profiler: parsers.Profiler = parsers.Profiler.NCU, profiler_args: str = "", ) -> str: """ @@ -150,8 +151,9 @@ def _run( timeit: If True, returns the result of the "timeit" magic instead of the standard output of the CUDA process. Defaults to False. profile: If True, the executable is profiled with NVIDIA Nsight - Compute profiling tool and its output is added to stdout. - Defaults to False. + Compute or NVIDIA Nsight Systems and the profiling output is + added to stdout. Defaults to False. + profiler: The profiling tool to use. profiler_args: The profiler arguments used to customize the information gathered by it and its overall behaviour. Defaults to an empty string. @@ -173,7 +175,7 @@ def _run( else: run_args = [] if profile: - run_args.extend(["ncu"] + profiler_args.split()) + run_args.extend([profiler.value] + profiler_args.split()) run_args.append(exec_fpath) output = subprocess.check_output( run_args, stderr=subprocess.STDOUT @@ -194,6 +196,7 @@ def _compile_and_run( exec_fpath=exec_fpath, timeit=args.timeit, profile=args.profile, + profiler=args.profiler(), profiler_args=args.profiler_args(), ) except subprocess.CalledProcessError as e: From 2c108442f6747fdc2461406ab0f50c2ceb69068f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:55:17 +0000 Subject: [PATCH 24/31] Add tests for choosing the profiler --- tests/fixtures/fixtures.py | 13 +++++ tests/fixtures/scripts/ncu | 4 +- tests/fixtures/scripts/nsys | 7 +++ tests/test_plugin.py | 100 +++++++++++++++--------------------- 4 files changed, 64 insertions(+), 60 deletions(-) create mode 100755 tests/fixtures/scripts/nsys diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py index ca8248d..ef672a7 100644 --- a/tests/fixtures/fixtures.py +++ b/tests/fixtures/fixtures.py @@ -1,9 +1,11 @@ +import argparse import glob import os import pytest from IPython.core.interactiveshell import InteractiveShell +from nvcc4jupyter.parsers import Profiler from nvcc4jupyter.plugin import NVCCPlugin @@ -70,3 +72,14 @@ def multiple_source_fpaths(fixtures_path: str): pattern_h = os.path.join(fixtures_path, "multiple_files", "*.h") pattern_cu = os.path.join(fixtures_path, "multiple_files", "*.cu") return list(glob.glob(pattern_h)) + list(glob.glob(pattern_cu)) + + +@pytest.fixture(scope="session") +def default_args(): + return argparse.Namespace( + timeit=False, + profile=True, + profiler=lambda: Profiler.NCU, + profiler_args=lambda: "", + compiler_args=lambda: "", + ) diff --git a/tests/fixtures/scripts/ncu b/tests/fixtures/scripts/ncu index 4d059e5..1ad31cb 100755 --- a/tests/fixtures/scripts/ncu +++ b/tests/fixtures/scripts/ncu @@ -1,7 +1,7 @@ #!/bin/bash +echo "[NCU]" + # this is a mock of nsight compute cli tool that just executes the program # given as the last argument "${@: -1}" - -echo "==WARNING== No kernels were profiled" diff --git a/tests/fixtures/scripts/nsys b/tests/fixtures/scripts/nsys new file mode 100755 index 0000000..50365c8 --- /dev/null +++ b/tests/fixtures/scripts/nsys @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "[NSYS]" + +# this is a mock of nsight systems cli tool that just executes the program +# given as the last argument +"${@: -1}" diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 4c6120b..3875bd1 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -1,29 +1,26 @@ -import argparse import math import os import re import shutil import subprocess +from argparse import ArgumentParser, Namespace +from copy import deepcopy from typing import List import pytest -from nvcc4jupyter.parsers import get_parser_cuda, set_defaults +from nvcc4jupyter.parsers import Profiler, get_parser_cuda, set_defaults from nvcc4jupyter.plugin import NVCCPlugin -def check_profiler_output(output: str): - # the profiler output will be a line of "Hello World!" along with some - # warning lines which start with "==WARNING==" +def check_profiler_output(output: str, profiler: str = "[NCU]"): + # the output from the profiler will first be a line containing only + # "[NCU]" or "[NSYS]" depending on what profiler was used and another + # line containing the string "Hello World!" lines = output.strip().split("\n") - warn_count = 0 - for line in lines: - if not line.startswith("==WARNING=="): - assert line == "Hello World!" - else: - warn_count += 1 - assert warn_count >= 1 - assert warn_count == len(lines) - 1 + assert len(lines) >= 2 + assert lines[0] == profiler + assert lines[1] == "Hello World!" def copy_source_to_group( @@ -46,7 +43,7 @@ def before_all(scripts_path: str): @pytest.fixture(autouse=True, scope="function") def before_each(plugin: NVCCPlugin): # BEFORE TESTS - set_defaults(compiler_args="", profiler_args="") + set_defaults(profiler=Profiler.NCU, compiler_args="", profiler_args="") shutil.rmtree(plugin.workdir, ignore_errors=True) yield # AFTER TESTS @@ -101,6 +98,7 @@ def test_compile( def test_compile_args( plugin: NVCCPlugin, compiler_cpp_17_fpath: str, + default_args: Namespace, ): gname = "test_compile_args" copy_source_to_group(compiler_cpp_17_fpath, gname, plugin.workdir) @@ -112,21 +110,16 @@ def test_compile_args( with pytest.raises(subprocess.CalledProcessError): exec_fpath = plugin._compile(gname, compiler_args="--std c++14") - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: "--std c++14", - ), - ) + args = deepcopy(default_args) + args.compiler_args = lambda: "--std c++14" + output = plugin._compile_and_run(group_name=gname, args=args) assert "errors detected in the compilation of" in output def test_compile_opencv( plugin: NVCCPlugin, compiler_opencv_fpath: str, + default_args: Namespace, ): gname = "test_compile_opencv" copy_source_to_group(compiler_opencv_fpath, gname, plugin.workdir) @@ -134,23 +127,14 @@ def test_compile_opencv( # check that "pkg-config" exists assert subprocess.check_call(["which", "pkg-config"]) == 0 + pkg_config_args = ["pkg-config", "--cflags", "--libs", "opencv4"] opencv_compile_options = ( - subprocess.check_output( - args=["pkg-config", "--cflags", "--libs", "opencv4"] - ) - .decode() - .strip() + subprocess.check_output(args=pkg_config_args).decode().strip() ) - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: opencv_compile_options, - ), - ) + args = deepcopy(default_args) + args.compiler_args = lambda: opencv_compile_options + output = plugin._compile_and_run(group_name=gname, args=args) assert "General configuration for OpenCV" in output @@ -199,7 +183,9 @@ def test_run_profile(plugin: NVCCPlugin, sample_cuda_fpath: str): def test_compile_and_run_multiple_files( - plugin: NVCCPlugin, multiple_source_fpaths: List[str] + plugin: NVCCPlugin, + multiple_source_fpaths: List[str], + default_args: Namespace, ): """ Compiles and executes 3 cuda source files from @@ -208,20 +194,14 @@ def test_compile_and_run_multiple_files( gname = "test_compile_and_run_multiple_files" for fpath in multiple_source_fpaths: copy_source_to_group(fpath, gname, plugin.workdir) - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: "", - ), - ) + output = plugin._compile_and_run(group_name=gname, args=default_args) check_profiler_output(output) def test_compile_and_run_multiple_files_shared( - plugin: NVCCPlugin, multiple_source_fpaths: List[str] + plugin: NVCCPlugin, + multiple_source_fpaths: List[str], + default_args: Namespace, ): """ Compiles and executes 3 cuda source files from @@ -236,20 +216,12 @@ def test_compile_and_run_multiple_files_shared( copy_source_to_group(fpath, gname, plugin.workdir) else: copy_source_to_group(fpath, "shared", plugin.workdir) - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: "", - ), - ) + output = plugin._compile_and_run(group_name=gname, args=default_args) check_profiler_output(output) def test_read_args(plugin: NVCCPlugin): - parser = argparse.ArgumentParser() + parser = ArgumentParser() parser.add_argument("-a", type=str, required=True) parser.add_argument("-b", type=float, required=True) args = plugin._read_args( @@ -292,6 +264,18 @@ def test_magic_cuda( check_profiler_output(capsys.readouterr().out) +def test_magic_cuda_set_default_profiler( + capsys, + plugin: NVCCPlugin, + sample_cuda_code: str, + sample_magic_cu_line: str, +): + # set the default profiler to Nsight Systems + set_defaults(profiler=Profiler.NSYS) + plugin.cuda(sample_magic_cu_line, sample_cuda_code) + check_profiler_output(capsys.readouterr().out, profiler="[NSYS]") + + def test_magic_cuda_bad_args( capsys, plugin: NVCCPlugin, From 5a880c93bdd5304ab6b2fe80621d46dd3cdbfe04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 2 Feb 2024 13:26:40 +0000 Subject: [PATCH 25/31] Add isort config to help it find local modules so they are not considered 3rd party libraries --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 2bc6d1d..6244158 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ exclude_lines = [ [tool.isort] profile = "black" +src_paths = ["nvcc4jupyter"] # tells isort where to find local modules to not consider them 3rd party libraries [tool.bandit] exclude_dirs = ["build","dist","tests","scripts"] From 26fab4d31e0e1198cfb0ca3227a86a8d28061286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 2 Feb 2024 13:31:18 +0000 Subject: [PATCH 26/31] Replace experimental-string-processing black formatter config with enable-unstable-feature as it was removed in version 24.1.0 --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6244158..e70fe45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,8 @@ skips = ["B101", "B311", "B404", "B603"] [tool.black] line-length = 79 fast = true -experimental-string-processing = true +preview = true +enable-unstable-feature = ["string_processing"] [tool.coverage.run] branch = true From ba775f7ce1eab529f6e33099deea1725005d05a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:40:29 +0000 Subject: [PATCH 27/31] Search for profiling tools executable paths when they are required --- nvcc4jupyter/path_utils.py | 61 ++++++++++++++++++++++++++++++ nvcc4jupyter/plugin.py | 59 +++++++++++++++++++++++++---- tests/fixtures/scripts/searchforme | 3 ++ tests/test_path_utils.py | 16 ++++++++ 4 files changed, 131 insertions(+), 8 deletions(-) create mode 100644 nvcc4jupyter/path_utils.py create mode 100755 tests/fixtures/scripts/searchforme create mode 100644 tests/test_path_utils.py diff --git a/nvcc4jupyter/path_utils.py b/nvcc4jupyter/path_utils.py new file mode 100644 index 0000000..b6cb27a --- /dev/null +++ b/nvcc4jupyter/path_utils.py @@ -0,0 +1,61 @@ +""" +Helper functions relating to file paths. +""" + +import os +from glob import glob +from typing import List, Optional + +CUDA_SEARCH_PATHS: List[str] = [ + "/opt/nvidia/nsight-compute", + "/usr/local/cuda", + "/opt", + "/usr", +] + + +def is_executable(fpath: str) -> bool: + """Check if file exists and is executable""" + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + +def which(name: str) -> Optional[str]: + """Find an executable by name by searching the PATH directories""" + for path_dir in os.environ.get("PATH", "").split(os.pathsep): + exec_path = os.path.join(path_dir, name) + if is_executable(exec_path): + return exec_path + return None + + +def find_executable( + name: str, search_paths: Optional[List[str]] = None +) -> Optional[str]: + """ + Find an executable, either by searching in the directories of the PATH + environment variable or, if that did not work, by searching recursively + in directories a list given as parameter. + + Args: + name: The name of the executable to be found. + search_paths: If None, only executables that are available from PATH + will be found. Otherwise, will recursively search these + directories. Defaults to None. + + Returns: + The path to the executable if it is found, and None otherwise. + """ + if search_paths is None: + search_paths = [] + + which_path = which(name) + if which_path is not None: + return which_path + + for search_path in search_paths: + search_path = os.path.abspath(search_path) + search_path = os.path.join(search_path, f"**/{name}") + for exec_path in glob(search_path, recursive=True): + return exec_path + + return None diff --git a/nvcc4jupyter/plugin.py b/nvcc4jupyter/plugin.py index 56f32b5..612d321 100644 --- a/nvcc4jupyter/plugin.py +++ b/nvcc4jupyter/plugin.py @@ -9,13 +9,20 @@ import subprocess import tempfile import uuid -from typing import List, Optional +from typing import Dict, List, Optional # pylint: disable=import-error from IPython.core.interactiveshell import InteractiveShell from IPython.core.magic import Magics, cell_magic, line_magic, magics_class -from . import parsers +from .parsers import ( + Profiler, + get_parser_cuda, + get_parser_cuda_group_delete, + get_parser_cuda_group_run, + get_parser_cuda_group_save, +) +from .path_utils import CUDA_SEARCH_PATHS, find_executable DEFAULT_EXEC_FNAME = "cuda_exec.out" SHARED_GROUP_NAME = "shared" @@ -37,14 +44,19 @@ def __init__(self, shell: InteractiveShell): super().__init__(shell) self.shell: InteractiveShell # type hint not provided by parent class - self.parser_cuda = parsers.get_parser_cuda() - self.parser_cuda_group_save = parsers.get_parser_cuda_group_save() - self.parser_cuda_group_delete = parsers.get_parser_cuda_group_delete() - self.parser_cuda_group_run = parsers.get_parser_cuda_group_run() + self.parser_cuda = get_parser_cuda() + self.parser_cuda_group_save = get_parser_cuda_group_save() + self.parser_cuda_group_delete = get_parser_cuda_group_delete() + self.parser_cuda_group_run = get_parser_cuda_group_run() self.workdir = tempfile.mkdtemp() print(f'Source files will be saved in "{self.workdir}".') + self.profiler_paths: Dict[Profiler, Optional[str]] = { + Profiler.NCU: None, + Profiler.NSYS: None, + } + def _save_source( self, source_name: str, source_code: str, group_name: str ) -> None: @@ -135,12 +147,42 @@ def _compile( return executable_fpath + def _get_profiler_path(self, profiler: Profiler) -> str: + """ + Get the path of the executable of a given profiling tool. Searches + the directories of the PATH environment variable and some extra + directories where CUDA is usually installed. + + Args: + profiler: The profiler whose executable should be found. + + Raises: + RuntimeError: If the profiler executable could not be found. + + Returns: + The file path of the executable. + """ + profiler_path = self.profiler_paths[profiler] + if profiler_path is not None: + return profiler_path + + profiler_path = find_executable(profiler.value, CUDA_SEARCH_PATHS) + if profiler_path is None: + raise RuntimeError( + f'Could not find the "{profiler.value}" profiling tool.' + " Consider searching for where it is installed and adding its" + " directory to the PATH environment variable." + ) + + self.profiler_paths[profiler] = profiler_path + return profiler_path + def _run( # pylint: disable=too-many-arguments self, exec_fpath: str, timeit: bool = False, profile: bool = False, - profiler: parsers.Profiler = parsers.Profiler.NCU, + profiler: Profiler = Profiler.NCU, profiler_args: str = "", ) -> str: """ @@ -175,7 +217,8 @@ def _run( # pylint: disable=too-many-arguments else: run_args = [] if profile: - run_args.extend([profiler.value] + profiler_args.split()) + profiler_path = self._get_profiler_path(profiler) + run_args.extend([profiler_path] + profiler_args.split()) run_args.append(exec_fpath) output = subprocess.check_output( run_args, stderr=subprocess.STDOUT diff --git a/tests/fixtures/scripts/searchforme b/tests/fixtures/scripts/searchforme new file mode 100755 index 0000000..d698fec --- /dev/null +++ b/tests/fixtures/scripts/searchforme @@ -0,0 +1,3 @@ +#!/bin/bash + +echo "This is just used to test the path_utils.find_executable function" diff --git a/tests/test_path_utils.py b/tests/test_path_utils.py new file mode 100644 index 0000000..4969d8f --- /dev/null +++ b/tests/test_path_utils.py @@ -0,0 +1,16 @@ +import os + +from nvcc4jupyter.path_utils import find_executable + + +def test_which(): + assert find_executable("ls") == "/usr/bin/ls" + + +def test_find_executable(fixtures_path: str): + exec_path = find_executable("searchforme", [fixtures_path]) + assert exec_path is not None + + exec_dir, exec_fname = os.path.split(exec_path) + assert exec_fname == "searchforme" + assert os.path.basename(exec_dir) == "scripts" From bac447ef678ae98e3ea3fa7eadd6f79aacb0c4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 2 Feb 2024 23:05:50 +0000 Subject: [PATCH 28/31] Install dev dependencies in editable mode --- .devcontainer/post_create.sh | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/post_create.sh b/.devcontainer/post_create.sh index 15fd069..20d60a0 100644 --- a/.devcontainer/post_create.sh +++ b/.devcontainer/post_create.sh @@ -1,7 +1,7 @@ #!/bin/bash # install developer dependencies -pip install .[dev] +pip install -e .[dev] # make sure the developer uses pre-commit hooks pre-commit install diff --git a/README.md b/README.md index cfdbee2..4a23e32 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ If not using the devcontainer you need to install the package with the development dependencies and install the pre-commit hook before commiting any changes: ```bash -pip install .[dev] +pip install -e .[dev] pre-commit install ``` From 0908891a47a77802e2d7c50cbce0bdc079afab12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Fri, 2 Feb 2024 23:08:02 +0000 Subject: [PATCH 29/31] Add documentation for using Nsight Systems instead of the default Nsight Compute profiling tool --- docs/source/magics.rst | 17 +++++++++++++---- docs/source/usage.rst | 28 +++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/docs/source/magics.rst b/docs/source/magics.rst index 28a3bf1..1720167 100644 --- a/docs/source/magics.rst +++ b/docs/source/magics.rst @@ -36,15 +36,24 @@ Options .. _profile: -p, --profile - Boolean. If set, runs the NVIDIA Nsight Compute profiler whose - output is appended to standard output. + Boolean. If set, runs the NVIDIA Nsight Compute (or NVIDIA Nsight Systems + if changed via the \-\-profiler option) profiler whose output is appended to + standard output. + +.. _profiler: + +-l, --profiler + String. Can either be "ncu" (the default) to use NVIDIA Nsight Compute + profiling tool, or "nsys" to use NVIDIA Nsight Systems profiling tool. .. _profiler_args: -a, --profiler-args String. Optional profiler arguments that can be space separated - by wrapping them in double quotes. See all options here: - `Nsight Compute CLI `_ + by wrapping them in double quotes. Will be passed to the profiler selected + by the \-\-profiler option.. See profiler options here: + `Nsight Compute `_ + or `Nsight Systems `_. .. _compiler_args: diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 50fe879..efaddaf 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -225,10 +225,11 @@ Profiling --------- Another important feature of nvcc4jupyter is its integration with the NVIDIA -Nsight Compute profiler, which you need to make sure is installed and its -executable can be found in a directory in your PATH environment variable. +Nsight Compute / NVIDIA Nsight Systems profilers, which you need to make sure +are installed and the executables can be found in a directory in your PATH +environment variable. -In order to use it and provide the profiler with custom arguments, simply run: +To profile using Nsight Compute with custom arguments: .. code-block:: c++ @@ -256,6 +257,27 @@ Running the cell above will compile and execute the vector addition code in the Compute (SM) Throughput % 1.19 ----------------------- ------------- ------------ +To profile using Nsight Systems with custom arguments: + +.. code-block:: c++ + + %cuda_group_run --group "vector_add" --profiler nsys --profile --profiler-args "profile --stats=true" + +Running the cell above will compile and execute the vector addition code in the +"vector_add" group and profile it with Nsight Systems. The output will contain +multiple tables, one of which will look similar to this: + +.. code-block:: + + [5/8] Executing 'cuda_api_sum' stats report + + Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name + -------- --------------- --------- ------------- ------------- ----------- ----------- ----------- ---------------------- + 77.3 200,844,276 1 200,844,276.0 200,844,276.0 200,844,276 200,844,276 0.0 cudaMalloc + 22.6 58,594,762 2 29,297,381.0 29,297,381.0 29,153,999 29,440,763 202,772.8 cudaMemcpy + 0.1 305,450 1 305,450.0 305,450.0 305,450 305,450 0.0 cudaLaunchKernel + 0.0 1,970 1 1,970.0 1,970.0 1,970 1,970 0.0 cuModuleGetLoadingMode + Compiler arguments ------------------ From c3b8524be6dbf186d11083aacf1a1097262f2c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosmin=20=C8=98tefan=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:49:08 +0100 Subject: [PATCH 30/31] Fix cuda typo --- docs/source/magics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/magics.rst b/docs/source/magics.rst index 1720167..a0394de 100644 --- a/docs/source/magics.rst +++ b/docs/source/magics.rst @@ -21,7 +21,7 @@ Usage - ``%%cuda``: Compile and run this cell. - ``%%cuda -p``: Also runs the Nsight Compute profiler. - ``%%cuda -p -a ""``: Also runs the Nsight Compute profiler. - - ``%%cude -c " Date: Fri, 16 Feb 2024 22:50:58 +0000 Subject: [PATCH 31/31] Mention Nsight Systems in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4a23e32..bc38b90 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ to own a GPU yourself. Here are just a few of the things that nvcc4jupyter does well: - [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world) - - [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling) + - [Profile your code with NVIDIA Nsight Compute or Nsight Systems](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling) - [Compile your code with external libraries (e.g. OpenCV)](https://nvcc4jupyter.readthedocs.io/en/latest/notebooks.html#compiling-with-external-libraries) - [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups)