From eac79eff44737e9fd07bac385057f317b15e0256 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Wed, 7 Dec 2022 03:47:28 -0800 Subject: [PATCH 01/11] create cuda stream on each device --- nvbench/cuda_stream.cuh | 21 ++++++++++++++++----- nvbench/state.cuh | 3 ++- nvbench/state.cxx | 2 ++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh index 2c7536c..6329608 100644 --- a/nvbench/cuda_stream.cuh +++ b/nvbench/cuda_stream.cuh @@ -18,11 +18,14 @@ #pragma once -#include - #include +#include +#include +#include + #include +#include namespace nvbench { @@ -42,10 +45,18 @@ struct cuda_stream * Constructs a cuda_stream that owns a new stream, created with * `cudaStreamCreate`. */ - cuda_stream() - : m_stream{[]() { + cuda_stream(std::optional device) + : m_stream{[device]() { cudaStream_t s; - NVBENCH_CUDA_CALL(cudaStreamCreate(&s)); + if (device.has_value()) + { + nvbench::detail::device_scope scope_guard{device.value().get_id()}; + NVBENCH_CUDA_CALL(cudaStreamCreate(&s)); + } + else + { + NVBENCH_CUDA_CALL(cudaStreamCreate(&s)); + } return s; }(), stream_deleter{true}} diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 53c7413..6a3afc9 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -261,7 +261,6 @@ private: std::optional device, std::size_t type_config_index); - nvbench::cuda_stream m_cuda_stream; std::reference_wrapper m_benchmark; nvbench::named_values m_axis_values; std::optional m_device; @@ -277,6 +276,8 @@ private: nvbench::float64_t m_skip_time; nvbench::float64_t m_timeout; + nvbench::cuda_stream m_cuda_stream; + // Deadlock protection. See blocking_kernel's class doc for details. nvbench::float64_t m_blocking_kernel_timeout{30.0}; diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 3cf105c..f6f8993 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -41,6 +41,7 @@ state::state(const benchmark_base &bench) , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} + , m_cuda_stream{std::nullopt} {} state::state(const benchmark_base &bench, @@ -58,6 +59,7 @@ state::state(const benchmark_base &bench, , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} + , m_cuda_stream{m_device} {} nvbench::int64_t state::get_int64(const std::string &axis_name) const From 8e85886224b7ebbb73a9e41a879eb7130026693d Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Wed, 7 Dec 2022 04:15:02 -0800 Subject: [PATCH 02/11] fixes include order --- nvbench/cuda_stream.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh index 6329608..9bae1c4 100644 --- a/nvbench/cuda_stream.cuh +++ b/nvbench/cuda_stream.cuh @@ -18,12 +18,12 @@ #pragma once -#include - #include #include #include +#include + #include #include From 1301b52e48a587d92040a6cc8a222f5f3b4def1a Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 8 Dec 2022 01:47:41 -0800 Subject: [PATCH 03/11] adds device documentation on stream ctor --- nvbench/cuda_stream.cuh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh index 9bae1c4..025b66d 100644 --- a/nvbench/cuda_stream.cuh +++ b/nvbench/cuda_stream.cuh @@ -42,10 +42,12 @@ namespace nvbench struct cuda_stream { /** - * Constructs a cuda_stream that owns a new stream, created with - * `cudaStreamCreate`. + * Constructs a cuda_stream that owns a new stream, created with `cudaStreamCreate`. + * + * @param device The device that this stream should be associated with. If no device is provided, + * the stream will be associated with the device that is active at the call time. */ - cuda_stream(std::optional device) + explicit cuda_stream(std::optional device) : m_stream{[device]() { cudaStream_t s; if (device.has_value()) From 8b191fe22b398d01480ed58aa25849f498ffc75e Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Wed, 18 Jan 2023 07:17:44 -0800 Subject: [PATCH 04/11] adds back default ctor for cuda_stream --- nvbench/cuda_stream.cuh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh index 025b66d..1cb1460 100644 --- a/nvbench/cuda_stream.cuh +++ b/nvbench/cuda_stream.cuh @@ -64,6 +64,14 @@ struct cuda_stream stream_deleter{true}} {} + /** + * @brief Constructs a new cuda_stream tha is asociated with the device that is active at the call + * time. + */ + cuda_stream() + : cuda_stream(std::nullopt) + {} + /** * Constructs a `cuda_stream` from an explicit cudaStream_t. * From ff4e811207789206320a109f361445701440daf3 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 19 Jan 2023 02:23:57 -0800 Subject: [PATCH 05/11] adds tests for cuda_stream --- testing/CMakeLists.txt | 1 + testing/cuda_stream.cu | 72 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 testing/cuda_stream.cu diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 4928ebc..01cacaa 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -3,6 +3,7 @@ set(test_srcs benchmark.cu create.cu cuda_timer.cu + cuda_stream.cu cpu_timer.cu enum_type_list.cu float64_axis.cu diff --git a/testing/cuda_stream.cu b/testing/cuda_stream.cu new file mode 100644 index 0000000..a853c50 --- /dev/null +++ b/testing/cuda_stream.cu @@ -0,0 +1,72 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "test_asserts.cuh" + +#include + +namespace +{ +/** + * @brief Queries and returns the device id that the given \p cuda_stream is associated with + * + * @param cuda_stream The stream to get the device id for + * @return The device id that \p cuda_stream is associated with + */ +int get_device_of_stream(cudaStream_t cuda_stream) +{ + CUcontext ctx; + cuStreamGetCtx(CUstream{cuda_stream}, &ctx); + cuCtxPushCurrent(ctx); + CUdevice device_id{}; + cuCtxGetDevice(&device_id); + cuCtxPopCurrent(&ctx); + return static_cast(device_id); +} +} // namespace + +void test_basic() +{ + // Get devices + auto devices = nvbench::device_manager::get().get_devices(); + + // Iterate over devices + for (auto const &device_info : devices) + { + // Create stream on the device before it becomes the active device + nvbench::cuda_stream device_stream(device_info); + + // Verify cuda stream is associated with the correct cuda device + ASSERT(get_device_of_stream(device_stream.get_stream()) == device_info.get_id()); + + // Set the device as active device + device_info.set_active(); + + // Create the stream (implicitly) on the device that is currently active + nvbench::cuda_stream current_device_stream{}; + + // Verify the cuda stream was in fact associated with the currently active device + ASSERT(get_device_of_stream(current_device_stream.get_stream()) == device_info.get_id()); + } +} + +int main() { test_basic(); } From 7c82037acb7942751c70ba40bb99655c8c1b2980 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 19 Jan 2023 02:35:04 -0800 Subject: [PATCH 06/11] adds check for status returned from cuda driver api --- testing/cuda_stream.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testing/cuda_stream.cu b/testing/cuda_stream.cu index a853c50..08a5f35 100644 --- a/testing/cuda_stream.cu +++ b/testing/cuda_stream.cu @@ -35,11 +35,11 @@ namespace int get_device_of_stream(cudaStream_t cuda_stream) { CUcontext ctx; - cuStreamGetCtx(CUstream{cuda_stream}, &ctx); - cuCtxPushCurrent(ctx); + NVBENCH_DRIVER_API_CALL(cuStreamGetCtx(CUstream{cuda_stream}, &ctx)); + NVBENCH_DRIVER_API_CALL(cuCtxPushCurrent(ctx)); CUdevice device_id{}; - cuCtxGetDevice(&device_id); - cuCtxPopCurrent(&ctx); + NVBENCH_DRIVER_API_CALL(cuCtxGetDevice(&device_id)); + NVBENCH_DRIVER_API_CALL(cuCtxPopCurrent(&ctx)); return static_cast(device_id); } } // namespace From 14079ae14538bad2f8e6cf7c66213a58baadeca9 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 26 Jan 2023 01:06:24 -0800 Subject: [PATCH 07/11] guard cuda driver API calls by cupti macro --- testing/cuda_stream.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/testing/cuda_stream.cu b/testing/cuda_stream.cu index 08a5f35..a721a6e 100644 --- a/testing/cuda_stream.cu +++ b/testing/cuda_stream.cu @@ -16,6 +16,7 @@ * limitations under the License. */ +#include #include #include #include @@ -26,6 +27,7 @@ namespace { +#ifdef NVBENCH_HAS_CUPTI /** * @brief Queries and returns the device id that the given \p cuda_stream is associated with * @@ -42,10 +44,12 @@ int get_device_of_stream(cudaStream_t cuda_stream) NVBENCH_DRIVER_API_CALL(cuCtxPopCurrent(&ctx)); return static_cast(device_id); } +#endif } // namespace void test_basic() { +#ifdef NVBENCH_HAS_CUPTI // Get devices auto devices = nvbench::device_manager::get().get_devices(); @@ -67,6 +71,7 @@ void test_basic() // Verify the cuda stream was in fact associated with the currently active device ASSERT(get_device_of_stream(current_device_stream.get_stream()) == device_info.get_id()); } +#endif } int main() { test_basic(); } From b6a29eca7373711c36ddf762a8763938711cae9c Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Tue, 31 Jan 2023 08:58:00 -0800 Subject: [PATCH 08/11] limit states test to available devices --- testing/state_generator.cu | 58 ++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/testing/state_generator.cu b/testing/state_generator.cu index f75be02..c0a0549 100644 --- a/testing/state_generator.cu +++ b/testing/state_generator.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include "test_asserts.cuh" @@ -719,20 +720,44 @@ void test_create_with_masked_types() void test_devices() { - const auto device_0 = nvbench::device_info{0, {}}; - const auto device_1 = nvbench::device_info{1, {}}; - const auto device_2 = nvbench::device_info{2, {}}; + // Get devices + auto devices = nvbench::device_manager::get().get_devices(); + + // Generate reference table + std::string ref = R"expected( +| State | Device | S | I |)expected"; + const std::vector device_refs = { + R"expected( +| 0 | 0 | foo | 2 | +| 1 | 0 | bar | 2 | +| 2 | 0 | foo | 4 | +| 3 | 0 | bar | 4 |)expected", + R"expected(| 4 | 1 | foo | 2 | +| 5 | 1 | bar | 2 | +| 6 | 1 | foo | 4 | +| 7 | 1 | bar | 4 |)expected", + R"expected(| 8 | 2 | foo | 2 | +| 9 | 2 | bar | 2 | +| 10 | 2 | foo | 4 | +| 11 | 2 | bar | 4 |)expected"}; + + const auto num_devices_to_test = std::min(devices.size(), device_refs.size()); + std::vector devices_to_test{}; + for (std::size_t device_id = 0; device_id < num_devices_to_test; device_id++) + { + ref += device_refs[device_id] + "\n"; + devices_to_test.push_back(devices[device_id]); + } dummy_bench bench; - bench.set_devices({device_0, device_1, device_2}); + bench.set_devices(devices_to_test); bench.add_string_axis("S", {"foo", "bar"}); bench.add_int64_axis("I", {2, 4}); - const std::vector states = - nvbench::detail::state_generator::create(bench); + const std::vector states = nvbench::detail::state_generator::create(bench); - // 3 devices * 4 axis configs = 12 total states - ASSERT(states.size() == 12); + // N devices * 4 axis configs = 4N total states + ASSERT(states.size() == 4 * devices_to_test.size()); fmt::memory_buffer buffer; const std::string table_format = "| {:^5} | {:^6} | {:^5} | {:^3} |\n"; @@ -751,23 +776,6 @@ void test_devices() state.get_int64("I")); } - const std::string ref = - R"expected( -| State | Device | S | I | -| 0 | 0 | foo | 2 | -| 1 | 0 | bar | 2 | -| 2 | 0 | foo | 4 | -| 3 | 0 | bar | 4 | -| 4 | 1 | foo | 2 | -| 5 | 1 | bar | 2 | -| 6 | 1 | foo | 4 | -| 7 | 1 | bar | 4 | -| 8 | 2 | foo | 2 | -| 9 | 2 | bar | 2 | -| 10 | 2 | foo | 4 | -| 11 | 2 | bar | 4 | -)expected"; - const std::string test = fmt::to_string(buffer); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } From 7281bbd79a1617cf4520c2475268ec7fb5be1f00 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 2 Feb 2023 04:32:16 -0800 Subject: [PATCH 09/11] revert state_generator test changes --- testing/state_generator.cu | 58 ++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/testing/state_generator.cu b/testing/state_generator.cu index c0a0549..f75be02 100644 --- a/testing/state_generator.cu +++ b/testing/state_generator.cu @@ -22,7 +22,6 @@ #include #include #include -#include #include "test_asserts.cuh" @@ -720,44 +719,20 @@ void test_create_with_masked_types() void test_devices() { - // Get devices - auto devices = nvbench::device_manager::get().get_devices(); - - // Generate reference table - std::string ref = R"expected( -| State | Device | S | I |)expected"; - const std::vector device_refs = { - R"expected( -| 0 | 0 | foo | 2 | -| 1 | 0 | bar | 2 | -| 2 | 0 | foo | 4 | -| 3 | 0 | bar | 4 |)expected", - R"expected(| 4 | 1 | foo | 2 | -| 5 | 1 | bar | 2 | -| 6 | 1 | foo | 4 | -| 7 | 1 | bar | 4 |)expected", - R"expected(| 8 | 2 | foo | 2 | -| 9 | 2 | bar | 2 | -| 10 | 2 | foo | 4 | -| 11 | 2 | bar | 4 |)expected"}; - - const auto num_devices_to_test = std::min(devices.size(), device_refs.size()); - std::vector devices_to_test{}; - for (std::size_t device_id = 0; device_id < num_devices_to_test; device_id++) - { - ref += device_refs[device_id] + "\n"; - devices_to_test.push_back(devices[device_id]); - } + const auto device_0 = nvbench::device_info{0, {}}; + const auto device_1 = nvbench::device_info{1, {}}; + const auto device_2 = nvbench::device_info{2, {}}; dummy_bench bench; - bench.set_devices(devices_to_test); + bench.set_devices({device_0, device_1, device_2}); bench.add_string_axis("S", {"foo", "bar"}); bench.add_int64_axis("I", {2, 4}); - const std::vector states = nvbench::detail::state_generator::create(bench); + const std::vector states = + nvbench::detail::state_generator::create(bench); - // N devices * 4 axis configs = 4N total states - ASSERT(states.size() == 4 * devices_to_test.size()); + // 3 devices * 4 axis configs = 12 total states + ASSERT(states.size() == 12); fmt::memory_buffer buffer; const std::string table_format = "| {:^5} | {:^6} | {:^5} | {:^3} |\n"; @@ -776,6 +751,23 @@ void test_devices() state.get_int64("I")); } + const std::string ref = + R"expected( +| State | Device | S | I | +| 0 | 0 | foo | 2 | +| 1 | 0 | bar | 2 | +| 2 | 0 | foo | 4 | +| 3 | 0 | bar | 4 | +| 4 | 1 | foo | 2 | +| 5 | 1 | bar | 2 | +| 6 | 1 | foo | 4 | +| 7 | 1 | bar | 4 | +| 8 | 2 | foo | 2 | +| 9 | 2 | bar | 2 | +| 10 | 2 | foo | 4 | +| 11 | 2 | bar | 4 | +)expected"; + const std::string test = fmt::to_string(buffer); ASSERT_MSG(test == ref, "Expected:\n\"{}\"\n\nActual:\n\"{}\"", ref, test); } From 85645cb16914d27e57c042b5e67bd8776ce873f5 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 3 Feb 2023 14:37:55 -0800 Subject: [PATCH 10/11] lazily initializes cuda stream during measurements --- nvbench/detail/measure_cold.cu | 8 +++++++- nvbench/detail/measure_cupti.cu | 8 +++++++- nvbench/detail/measure_hot.cu | 8 +++++++- nvbench/state.cuh | 7 +++++-- nvbench/state.cxx | 2 +- testing/state.cu | 3 +++ 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 69ceb7e..2d65fe5 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -39,7 +39,13 @@ namespace nvbench::detail measure_cold_base::measure_cold_base(state &exec_state) : m_state{exec_state} - , m_launch{m_state.get_cuda_stream()} + , m_launch{nvbench::launch([this]() -> decltype(auto) { + if (!m_state.get_cuda_stream().has_value()) + { + m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()}); + } + return m_state.get_cuda_stream().value(); + }())} , m_run_once{exec_state.get_run_once()} , m_no_block{exec_state.get_disable_blocking_kernel()} , m_min_samples{exec_state.get_min_samples()} diff --git a/nvbench/detail/measure_cupti.cu b/nvbench/detail/measure_cupti.cu index e583cd5..52bcd4e 100644 --- a/nvbench/detail/measure_cupti.cu +++ b/nvbench/detail/measure_cupti.cu @@ -165,7 +165,13 @@ measure_cupti_base::measure_cupti_base(state &exec_state) // (formatter doesn't handle `try :` very well...) try : m_state{exec_state} - , m_launch{m_state.get_cuda_stream()} + , m_launch{[this]()->decltype(auto) { + if (!m_state.get_cuda_stream().has_value()) + { + m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()}); + } + return m_state.get_cuda_stream().value(); + }()} , m_cupti{*m_state.get_device(), add_metrics(m_state)} {} // clang-format on diff --git a/nvbench/detail/measure_hot.cu b/nvbench/detail/measure_hot.cu index 9497122..2f231f1 100644 --- a/nvbench/detail/measure_hot.cu +++ b/nvbench/detail/measure_hot.cu @@ -37,7 +37,13 @@ namespace nvbench::detail measure_hot_base::measure_hot_base(state &exec_state) : m_state{exec_state} - , m_launch{m_state.get_cuda_stream()} + , m_launch{nvbench::launch([this]() -> decltype(auto) { + if (!m_state.get_cuda_stream().has_value()) + { + m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()}); + } + return m_state.get_cuda_stream().value(); + }())} , m_min_samples{exec_state.get_min_samples()} , m_min_time{exec_state.get_min_time()} , m_skip_time{exec_state.get_skip_time()} diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 6a3afc9..5d72477 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -63,7 +63,10 @@ struct state state &operator=(const state &) = delete; state &operator=(state &&) = default; - [[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const { return m_cuda_stream; } + [[nodiscard]] const std::optional &get_cuda_stream() const + { + return m_cuda_stream; + } void set_cuda_stream(nvbench::cuda_stream &&stream) { m_cuda_stream = std::move(stream); } /// The CUDA device associated with with this benchmark state. May be @@ -276,7 +279,7 @@ private: nvbench::float64_t m_skip_time; nvbench::float64_t m_timeout; - nvbench::cuda_stream m_cuda_stream; + std::optional m_cuda_stream; // Deadlock protection. See blocking_kernel's class doc for details. nvbench::float64_t m_blocking_kernel_timeout{30.0}; diff --git a/nvbench/state.cxx b/nvbench/state.cxx index f6f8993..2f4e284 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -59,7 +59,7 @@ state::state(const benchmark_base &bench, , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} - , m_cuda_stream{m_device} + , m_cuda_stream{std::nullopt} {} nvbench::int64_t state::get_int64(const std::string &axis_name) const diff --git a/testing/state.cu b/testing/state.cu index a2300d1..313aab9 100644 --- a/testing/state.cu +++ b/testing/state.cu @@ -57,6 +57,9 @@ void test_streams() state_tester state{bench}; + // Confirm that the stream hasn't been initialized yet + ASSERT(!state.get_cuda_stream().has_value()); + // Test non-owning stream cudaStream_t default_stream = 0; state.set_cuda_stream(nvbench::cuda_stream{default_stream, false}); From 78fa3c65089be398968cd302f80f63315003d934 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 3 Feb 2023 23:12:03 -0800 Subject: [PATCH 11/11] fixes format --- nvbench/detail/measure_cupti.cu | 2 +- testing/state.cu | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/nvbench/detail/measure_cupti.cu b/nvbench/detail/measure_cupti.cu index 52bcd4e..de6abf1 100644 --- a/nvbench/detail/measure_cupti.cu +++ b/nvbench/detail/measure_cupti.cu @@ -165,7 +165,7 @@ measure_cupti_base::measure_cupti_base(state &exec_state) // (formatter doesn't handle `try :` very well...) try : m_state{exec_state} - , m_launch{[this]()->decltype(auto) { + , m_launch{[this]() -> decltype(auto) { if (!m_state.get_cuda_stream().has_value()) { m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()}); diff --git a/testing/state.cu b/testing/state.cu index 313aab9..0064556 100644 --- a/testing/state.cu +++ b/testing/state.cu @@ -43,8 +43,7 @@ struct state_tester : public nvbench::state void set_param(std::string name, T &&value) { this->state::m_axis_values.set_value(std::move(name), - nvbench::named_values::value_type{ - std::forward(value)}); + nvbench::named_values::value_type{std::forward(value)}); } }; } // namespace nvbench::detail