diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 27245ad59..9b50e314a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2019-2024 Advanced Micro Devices, Inc. +# Copyright 2019-2025 Advanced Micro Devices, Inc. # ######################################################################## include: @@ -23,6 +23,10 @@ stages: - test # Tests if unit tests are passing (CTest) - benchmark # Runs the non-internal benchmarks (Google Benchmark) +workflow: + rules: + - if: $CI_MERGE_REQUEST_LABELS !~ /CI Skip/ + variables: # Helper variables PACKAGE_DIR: $BUILD_DIR/package @@ -74,7 +78,6 @@ copyright-date: -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D BUILD_TEST=OFF - -D BUILD_HIPSTDPAR_TEST=OFF -D BUILD_EXAMPLE=OFF -D ROCM_DEP_ROCMCORE=OFF -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c @@ -116,7 +119,6 @@ copyright-date: -D CMAKE_BUILD_TYPE=$BUILD_TYPE -D BUILD_$BUILD_TARGET=ON -D GPU_TARGETS=$GPU_TARGETS - -D AMDGPU_TEST_TARGETS=$GPU_TARGETS -D RNG_SEED_COUNT=$rng_seed_count -D PRNG_SEEDS=$prng_seeds -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c @@ -137,7 +139,7 @@ copyright-date: - $CI_PROJECT_DIR/build/.ninja_log exclude: - $CI_PROJECT_DIR/build/**/*.o - expire_in: 2 weeks + expire_in: 1 day build:cmake-latest: stage: build @@ -183,7 +185,7 @@ build:package: paths: - $PACKAGE_DIR/rocthrust*.deb - $PACKAGE_DIR/rocthrust*.zip - expire_in: 2 weeks + expire_in: 1 day build:windows: stage: build @@ -231,7 +233,7 @@ build:windows: paths: - $CI_PROJECT_DIR/build/ - $ROCPRIM_DIR/build/install - expire_in: 2 weeks + expire_in: 1 day test:package: stage: test @@ -268,13 +270,18 @@ test:doc: extends: - .build:docs - .rules:test + artifacts: + paths: + - $DOCS_DIR/_build/html/ + expire_in: 2 weeks -test: +.test:rocm: stage: test + tags: + - rocm + - $GPU extends: - .cmake-minimum - - .rules:test - - .gpus:rocm needs: - job: build:cmake-minimum parallel: @@ -294,10 +301,76 @@ test: - HSA_ENABLE_SDMA=0 ctest --output-on-failure --repeat-until-fail 2 - --tests-regex $GPU_TARGET --resource-spec-file ./resources.json --parallel $PARALLEL_JOBS +test:rocm-any-gpu: + variables: + GPU: "" + PARALLEL_JOBS: 1 + extends: + - .test:rocm + rules: + - if: $CI_MERGE_REQUEST_TITLE =~ /Draft:/ && $CI_MERGE_REQUEST_LABELS !~ /Arch::/ + +test:rocm-label-arch: + extends: + - .gpus:rocm + - .test:rocm + - .rules:arch-labels + +test:rocm-all-gpus: + variables: + SHOULD_BE_UNDRAFTED: "true" + extends: + - .gpus:rocm + - .test:rocm + - .rules:test + +test-bitwise-repro-generate: + stage: test + extends: + - .cmake-minimum + - .rules:manual + - .gpus:rocm + needs: + - job: build:cmake-minimum + parallel: + matrix: + - BUILD_TYPE: Release + BUILD_TARGET: TEST + BUILD_VERSION: 14 + cache: + key: database + paths: + - $CI_PROJECT_DIR/bitwise.db + policy: push + script: + - cd $CI_PROJECT_DIR/build/test/ + - ROCTHRUST_BWR_PATH=$CI_PROJECT_DIR/bitwise.db ROCTHRUST_BWR_GENERATE=1 ./reproducibility.hip + +test-bitwise-repro: + stage: test + extends: + - .cmake-minimum + - .rules:test + - .gpus:rocm + needs: + - job: build:cmake-minimum + parallel: + matrix: + - BUILD_TYPE: Release + BUILD_TARGET: TEST + BUILD_VERSION: 14 + cache: + key: database + paths: + - $CI_PROJECT_DIR/bitwise.db + policy: pull + script: + - cd $CI_PROJECT_DIR/build/test/ + - ROCTHRUST_BWR_PATH=$CI_PROJECT_DIR/bitwise.db ./reproducibility.hip + .rocm-windows:test: extends: - .gpus:rocm-windows @@ -358,7 +431,7 @@ build:cuda-and-omp: tags: - build variables: - CCCL_GIT_BRANCH: v2.5.0 + CCCL_GIT_BRANCH: v2.6.0 CCCL_DIR: ${CI_PROJECT_DIR}/cccl needs: [] script: @@ -402,16 +475,17 @@ build:cuda-and-omp: - $CCCL_DIR/thrust/cmake/ThrustRunTest.cmake - $CCCL_DIR/thrust/cmake/ThrustRunExample.cmake - $CI_PROJECT_DIR/build/.ninja_log - expire_in: 1 week + expire_in: 1 day -test:cuda-and-omp: +.test:cuda-and-omp: stage: test + tags: + - nvcc + - $GPU needs: - build:cuda-and-omp extends: - .nvcc - - .gpus:nvcc - - .rules:test before_script: # This is only needed because of the legacy before_script in .gpus:nvcc would otherwise overwrite before_script - !reference [.nvcc, before_script] @@ -420,6 +494,23 @@ test:cuda-and-omp: # These tests are executed on the build stage because they require sources - ctest --output-on-failure --exclude-regex "thrust.example.cmake.add_subdir|thrust.test.cmake.check_source_files" +test:cuda-and-omp-any-gpu: + variables: + GPU: "" + PARALLEL_JOBS: 1 + extends: + - .test:cuda-and-omp + rules: + - if: $CI_MERGE_REQUEST_TITLE =~ /Draft:/ + +test:cuda-and-omp-all-gpus: + variables: + SHOULD_BE_UNDRAFTED: "true" + extends: + - .gpus:nvcc + - .test:cuda-and-omp + - .rules:test + .benchmark-base: stage: benchmark extends: @@ -430,7 +521,12 @@ test:cuda-and-omp: benchmark: needs: - - build:cmake-minimum + - job: build:cmake-minimum + parallel: + matrix: + - BUILD_TYPE: Release + BUILD_TARGET: BENCHMARKS + BUILD_VERSION: 14 extends: - .cmake-minimum - .gpus:rocm @@ -453,4 +549,4 @@ benchmark: artifacts: paths: - ${BENCHMARK_RESULT_DIR} - expire_in: 1 week + expire_in: 1 day diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7bdfeaf..cc43581ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ Documentation for rocThrust available at ### Added -* gfx950 support +* Added gfx950 support. +* Merged changes from upstream CCCL/thrust 2.6.0 ## rocThrust 3.3.0 for ROCm 6.4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 56933c451..3a47990f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2019-2024 Advanced Micro Devices, Inc. +# Copyright 2019-2025 Advanced Micro Devices, Inc. # ######################################################################## cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR) @@ -22,6 +22,12 @@ endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +if (CMAKE_CXX_STANDARD EQUAL 14) + message(WARNING "C++14 will be deprecated in the next major release") +elseif(NOT CMAKE_CXX_STANDARD EQUAL 17) + message(FATAL_ERROR "Only C++14 and C++17 are supported") +endif() + # Set HIP flags set(CMAKE_HIP_STANDARD 17) set(CMAKE_HIP_STANDARD_REQUIRED ON) @@ -39,6 +45,7 @@ option(BUILD_HIPSTDPAR_TEST_WITH_TBB "Build hipstdpar tests with TBB" OFF) option(BUILD_EXAMPLES "Build examples" OFF) option(BUILD_BENCHMARKS "Build benchmarks" OFF) option(DOWNLOAD_ROCPRIM "Download rocPRIM and do not search for rocPRIM package" OFF) +cmake_dependent_option(DOWNLOAD_ROCRAND "Download rocRAND and do not search for rocRAND package" OFF BUILD_BENCHMARKS OFF) option(DOWNLOAD_ROCRAND "Download rocRAND and do not search for rocRAND package" OFF) option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) cmake_dependent_option(ENABLE_UPSTREAM_TESTS "Enable upstream (thrust) tests" ON BUILD_TEST OFF) @@ -130,16 +137,9 @@ if (NOT THRUST_HOST_SYSTEM IN_LIST THRUST_HOST_SYSTEM_OPTIONS) ) endif () -if(DISABLE_WERROR) - add_compile_options(-Wall -Wextra) -else() - add_compile_options(-Wall -Wextra -Werror) -endif() - -if (CMAKE_CXX_STANDARD EQUAL 14) - message(WARNING "C++14 will be deprecated in the next major release") -elseif(NOT CMAKE_CXX_STANDARD EQUAL 17) - message(FATAL_ERROR "Only C++14 and C++17 are supported") +set(COMPILE_OPTIONS -Wall -Wextra) +if(NOT DISABLE_WERROR) + list(APPEND COMPILE_OPTIONS -Werror) endif() if (WIN32) @@ -149,13 +149,13 @@ endif() # Address Sanitizer if(BUILD_ADDRESS_SANITIZER) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") + list(APPEND COMPILE_OPTIONS -fsanitize=address -shared-libasan) add_link_options(-fuse-ld=lld) endif() # Setup VERSION rocm_setup_version(VERSION "3.3.0") +math(EXPR rocthrust_VERSION_NUMBER "${rocthrust_VERSION_MAJOR} * 100000 + ${rocthrust_VERSION_MINOR} * 100 + ${rocthrust_VERSION_PATCH}") # Print configuration summary include(cmake/Summary.cmake) @@ -191,7 +191,6 @@ endif() # Benchmarks if(BUILD_BENCHMARKS) add_subdirectory(benchmarks) - add_subdirectory(internal/benchmark) endif() #Create header wrapper for backward compatibility diff --git a/README.md b/README.md index 83325749f..88336dcbd 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # rocThrust > [!NOTE] -> The published documentation is available at [rocThrust](https://rocm.docs.amd.com/projects/rocThrust/en/latest/) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the `docs` folder of this repository. As with all ROCm projects, the documentation is open source. For more information on contributing to the documentation, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). +> The published rocThrust documentation is available [here](https://rocm.docs.amd.com/projects/rocThrust/en/latest/) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the `docs` folder of this repository. As with all ROCm projects, the documentation is open source. For more information on contributing to the documentation, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). Thrust is a parallel algorithm library. It has been ported to [HIP](https://github.com/ROCm/HIP) and [ROCm](https://www.github.com/ROCm/ROCm), which use @@ -35,28 +35,6 @@ For ROCm hardware requirements, refer to: * [Linux support](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) * [Windows support](https://rocm.docs.amd.com/projects/install-on-windows/en/latest/reference/system-requirements.html) -## Documentation - -Documentation for rocThrust available at -[https://rocm.docs.amd.com/projects/rocThrust/en/latest/](https://rocm.docs.amd.com/projects/rocThrust/en/latest/). - -You can build our documentation locally using the following commands: - -```shell -# Go to rocThrust docs directory -cd rocThrust; cd docs - -# Install Python dependencies -python3 -m pip install -r sphinx/requirements.txt - -# Build the documentation -python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html - -# For e.g. serve the HTML docs locally -cd _build/html -python3 -m http.server -``` - ## Build and install ```sh @@ -309,6 +287,58 @@ ctest --output-on-failure * Notice that oneTBB (oneAPI TBB) may fail to compile when libstdc++-9 or -10 is used, due to them using legacy TBB interfaces that are incompatible with the oneTBB ones (see the [release notes](https://www.intel.com/content/www/us/en/developer/articles/release-notes/intel-oneapi-threading-building-blocks-release-notes.html)). * CMake (3.10.2 or later) +## Building the documentation locally + +### Requirements + +#### Doxygen + +The build system uses Doxygen [version 1.9.4](https://github.com/doxygen/doxygen/releases/tag/Release_1_9_4). You can try using a newer version, but that might cause issues. + +After you have downloaded Doxygen version 1.9.4: + +```shell +# Add doxygen to your PATH +echo 'export PATH=/bin:$PATH' >> ~/.bashrc + +# Apply the updated .bashrc +source ~/.bashrc + +# Confirm that you are using version 1.9.4 +doxygen --version +``` + +#### Python + +The build system uses Python version 3.10. You can try using a newer version, but that might cause issues. + +You can install Python 3.10 alongside your other Python versions using [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation): + +```shell +# Install Python 3.10 +pyenv install 3.10 + +# Create a Python 3.10 virtual environment +pyenv virtualenv 3.10 venv_rocthrust + +# Activate the virtual environment +pyenv activate venv_rocthrust +``` + +### Building + +After cloning this repository, and `cd`ing into it: + +```shell +# Install Python dependencies +python3 -m pip install -r docs/sphinx/requirements.txt + +# Build the documentation +python3 -m sphinx -T -E -b html -d docs/_build/doctrees -D language=en docs docs/_build/html +``` + +You can then open `docs/_build/html/index.html` in your browser to view the documentation. + ## Support You can report bugs and feature requests through the GitHub diff --git a/benchmarks/bench/equal/basic.cu b/benchmarks/bench/equal/basic.cu new file mode 100644 index 000000000..3100d5b87 --- /dev/null +++ b/benchmarks/bench/equal/basic.cu @@ -0,0 +1,165 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Benchmark utils +#include "../../bench_utils/bench_utils.hpp" + +// rocThrust +#include +#include +#include + +// Google Benchmark +#include + +// STL +#include +#include +#include + +struct basic +{ + template + float64_t run(thrust::device_vector& a, thrust::device_vector& b, Policy policy) + { + bench_utils::gpu_timer d_timer; + + d_timer.start(0); + thrust::equal(policy, a.begin(), a.end(), b.begin()); + d_timer.stop(0); + + return d_timer.get_duration(); + } +}; + +template +void run_benchmark(benchmark::State& state, + const std::size_t elements, + const std::string , /*seed_type*/ + const float64_t common_prefix_ratio) +{ + // Benchmark object + Benchmark benchmark {}; + + // GPU times + std::vector gpu_times; + + thrust::device_vector a(elements, T{1}); + thrust::device_vector b(elements, T{1}); + + const auto same_elements = std::min(static_cast(elements * common_prefix_ratio), elements); + + bench_utils::caching_allocator_t alloc; + thrust::detail::device_t policy {}; + + thrust::fill(policy(alloc), b.begin() + same_elements, b.end(), T{2}); + + for(auto _ : state) + { + float64_t duration = benchmark.template run(a, b, policy(alloc)); + state.SetIterationTime(duration); + gpu_times.push_back(duration); + } + + // BytesProcessed include read and written bytes, so when the BytesProcessed/s are reported + // it will actually be the global memory bandwidth gotten. + // using `same_elements` instead of `elements` corresponds to the + // actual elements read in an early exit + state.SetBytesProcessed(state.iterations() * 2 * + std::max(same_elements, std::size_t(1)) * sizeof(T)); + state.SetItemsProcessed(state.iterations() * + std::max(same_elements, std::size_t(1))); + + const double gpu_cv = bench_utils::StatisticsCV(gpu_times); + state.counters["gpu_noise"] = gpu_cv; +} + +#define CREATE_BENCHMARK(T, Elements, CommonPrefixRatio) \ + benchmark::RegisterBenchmark( \ + bench_utils::bench_naming::format_name("{algo:equal,subalgo:" + name + ",input_type:" #T \ + + ",elements:" #Elements + \ + ", common_prefix_ratio:" #CommonPrefixRatio) \ + .c_str(), \ + run_benchmark, \ + Elements, \ + seed_type, \ + CommonPrefixRatio) + +#define BENCHMARK_ELEMENTS(type, elements) \ + CREATE_BENCHMARK(type, elements, 1.0), \ + CREATE_BENCHMARK(type, elements, 0.5), \ + CREATE_BENCHMARK(type, elements, 0.0) + +#define BENCHMARK_TYPE(type) \ + BENCHMARK_ELEMENTS(type, 1 << 16), BENCHMARK_ELEMENTS(type, 1 << 20), \ + BENCHMARK_ELEMENTS(type, 1 << 24), BENCHMARK_ELEMENTS(type, 1 << 28) + +template +void add_benchmarks(const std::string& name, + std::vector& benchmarks, + const std::string seed_type) +{ + std::vector bs = {BENCHMARK_TYPE(int8_t), + BENCHMARK_TYPE(int16_t), + BENCHMARK_TYPE(int32_t), + BENCHMARK_TYPE(int64_t)}; + + benchmarks.insert(benchmarks.end(), bs.begin(), bs.end()); +} + +int main(int argc, char* argv[]) +{ + cli::Parser parser(argc, argv); + parser.set_optional( + "name_format", "name_format", "human", "either: json,human,txt"); + parser.set_optional("seed", "seed", "random", bench_utils::get_seed_message()); + parser.run_and_exit_if_error(); + + // Parse argv + benchmark::Initialize(&argc, argv); + bench_utils::bench_naming::set_format( + parser.get("name_format")); /* either: json,human,txt */ + const std::string seed_type = parser.get("seed"); + + // Benchmark info + bench_utils::add_common_benchmark_info(); + benchmark::AddCustomContext("seed", seed_type); + + // Add benchmark + std::vector benchmarks; + add_benchmarks("basic", benchmarks, seed_type); + + // Use manual timing + for(auto& b : benchmarks) + { + b->UseManualTime(); + b->Unit(benchmark::kMicrosecond); + b->MinTime(0.4); // in seconds + } + + // Run benchmarks + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); + + // Finish + benchmark::Shutdown(); + return 0; +} diff --git a/benchmarks/bench_utils/bench_utils.hpp b/benchmarks/bench_utils/bench_utils.hpp index 94b11258a..73e586a55 100644 --- a/benchmarks/bench_utils/bench_utils.hpp +++ b/benchmarks/bench_utils/bench_utils.hpp @@ -1,6 +1,6 @@ // MIT License // -// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/benchmarks/bench_utils/custom_reporter.hpp b/benchmarks/bench_utils/custom_reporter.hpp index ea5ec56db..d8e9fbf29 100644 --- a/benchmarks/bench_utils/custom_reporter.hpp +++ b/benchmarks/bench_utils/custom_reporter.hpp @@ -1,6 +1,6 @@ // MIT License // -// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -833,7 +833,7 @@ benchmark::BenchmarkReporter* ChooseCustomReporter() { // benchmark::BenchmarkReporter is polymorphic as it has a virtual // function which allows us to use dynamic_cast to detect the derived type. - typedef benchmark::BenchmarkReporter* PtrType; + using PtrType = benchmark::BenchmarkReporter*; PtrType default_display_reporter = benchmark::CreateDefaultDisplayReporter(); if (IsType(default_display_reporter)) diff --git a/benchmarks/bench_utils/generation_utils.hpp b/benchmarks/bench_utils/generation_utils.hpp index 3549b4e8e..fc35c4c2e 100644 --- a/benchmarks/bench_utils/generation_utils.hpp +++ b/benchmarks/bench_utils/generation_utils.hpp @@ -1,6 +1,6 @@ // MIT License // -// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -62,6 +62,16 @@ #include #include +#define ROCRAND_CHECK(condition) \ + { \ + rocrand_status _status = condition; \ + if(_status != ROCRAND_STATUS_SUCCESS) \ + { \ + std::cout << "rocRAND error: " << _status << " line: " << __LINE__ << std::endl; \ + exit(_status); \ + } \ + } + namespace bench_utils { /// \brief Provides a sequence of seeds. @@ -295,14 +305,14 @@ namespace detail , seed_type(m_seed_type) , entropy_reduction(m_entropy_reduction) { - rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_DEFAULT); + ROCRAND_CHECK(rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_DEFAULT)); const managed_seed managed_seed {seed_type}; seed = seed_t {managed_seed.get_0()}; } ~device_generator_base_t() { - rocrand_destroy_generator(gen); + ROCRAND_CHECK(rocrand_destroy_generator(gen)); } template @@ -368,8 +378,8 @@ namespace detail distribution.resize(num_items); double* d_distribution = thrust::raw_pointer_cast(distribution.data()); - rocrand_set_seed(gen, seed.get()); - rocrand_generate_uniform_double(gen, d_distribution, num_items); + ROCRAND_CHECK(rocrand_set_seed(gen, seed.get())); + ROCRAND_CHECK(rocrand_generate_uniform_double(gen, d_distribution, num_items)); hipError_t error = hipDeviceSynchronize(); if(error != hipSuccess) diff --git a/cmake/Benchmarks.cmake b/cmake/Benchmarks.cmake index e74f9bb43..9ec2f5b46 100644 --- a/cmake/Benchmarks.cmake +++ b/cmake/Benchmarks.cmake @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2024 Advanced Micro Devices, Inc. +# Copyright 2024-2025 Advanced Micro Devices, Inc. # ######################################################################## # ########################### @@ -8,32 +8,6 @@ # Common functionality for configuring rocThrust's benchmarks -function(find_rocrand) - # rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) - if(NOT DOWNLOAD_ROCRAND) - find_package(rocrand QUIET) - endif() - if(NOT rocrand_FOUND) - message(STATUS "Downloading and building rocrand.") - set(ROCRAND_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/rocrand CACHE PATH "") - - download_project( - PROJ rocrand - GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocRAND.git - GIT_TAG develop - INSTALL_DIR ${ROCRAND_ROOT} - CMAKE_ARGS -DBUILD_TEST=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm - LOG_DOWNLOAD TRUE - LOG_CONFIGURE TRUE - LOG_BUILD TRUE - LOG_INSTALL TRUE - BUILD_PROJECT TRUE - UPDATE_DISCONNECTED TRUE # Never update automatically from the remote repository - ) - find_package(rocrand REQUIRED CONFIG PATHS ${ROCRAND_ROOT}) - endif() -endfunction() - # Registers a .cu as C++ rocThrust benchmark function(add_thrust_benchmark BENCHMARK_NAME BENCHMARK_SOURCE NOT_INTERNAL) set(BENCHMARK_TARGET "benchmark_thrust_${BENCHMARK_NAME}") @@ -50,6 +24,7 @@ function(add_thrust_benchmark BENCHMARK_NAME BENCHMARK_SOURCE NOT_INTERNAL) endif() add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) + target_compile_options(${BENCHMARK_TARGET} PRIVATE ${COMPILE_OPTIONS}) target_link_libraries(${BENCHMARK_TARGET} PRIVATE rocthrust @@ -58,7 +33,6 @@ function(add_thrust_benchmark BENCHMARK_NAME BENCHMARK_SOURCE NOT_INTERNAL) # Internal benchmark does not use Google Benchmark nor rocRAND. # This can be omited when that benchmark is removed. if(NOT_INTERNAL) - find_rocrand() target_link_libraries(${BENCHMARK_TARGET} PRIVATE roc::rocrand diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index b0bd252f2..afad93485 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2019-2024 Advanced Micro Devices, Inc. +# Copyright 2019-2025 Advanced Micro Devices, Inc. # ######################################################################## # ########################### @@ -11,6 +11,7 @@ # For downloading, building, and installing required dependencies include(cmake/DownloadProject.cmake) +include(FetchContent) # rocPRIM (https://github.com/ROCmSoftwarePlatform/rocPRIM) if(NOT DOWNLOAD_ROCPRIM) @@ -22,6 +23,7 @@ if(NOT rocprim_FOUND) PROJ rocprim GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocPRIM.git GIT_TAG develop + GIT_SHALLOW TRUE INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/deps/rocprim CMAKE_ARGS -DBUILD_TEST=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm LOG_DOWNLOAD TRUE @@ -52,6 +54,7 @@ if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) PROJ googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG release-1.11.0 + GIT_SHALLOW TRUE INSTALL_DIR ${GTEST_ROOT} CMAKE_ARGS -DBUILD_GTEST=ON -DINSTALL_GTEST=ON -Dgtest_force_shared_crt=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX= LOG_DOWNLOAD TRUE @@ -90,8 +93,6 @@ if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) # for cache serialization. We also want to use a static SQLite, # and distro static libraries aren't typically built # position-independent. - include( FetchContent ) - if(DEFINED ENV{SQLITE_3_43_2_SRC_URL}) set(SQLITE_3_43_2_SRC_URL_INIT $ENV{SQLITE_3_43_2_SRC_URL}) else() @@ -161,6 +162,7 @@ if(BUILD_BENCHMARKS) PROJ googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG v${BENCHMARK_VERSION} + GIT_SHALLOW TRUE INSTALL_DIR ${GOOGLEBENCHMARK_ROOT} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_SHARED_LIBS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE} LOG_DOWNLOAD TRUE @@ -172,4 +174,39 @@ if(BUILD_BENCHMARKS) ) find_package(benchmark REQUIRED CONFIG PATHS ${GOOGLEBENCHMARK_ROOT} NO_DEFAULT_PATH) endif() + + # rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) + if(NOT DOWNLOAD_ROCRAND) + find_package(rocrand QUIET) + endif() + if(NOT rocrand_FOUND) + message(STATUS "Downloading and building rocrand.") + set(ROCRAND_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/rocrand CACHE PATH "") + + set(EXTRA_CMAKE_ARGS "-DGPU_TARGETS=${GPU_TARGETS}") + # CMAKE_ARGS of download_project (or ExternalProject_Add) can't contain ; so another separator + # is needed and LIST_SEPARATOR is passed to download_project() + string(REPLACE ";" "|" EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS}") + # Pass launcher so sccache can be used to speed up building rocRAND + if(CMAKE_CXX_COMPILER_LAUNCHER) + set(EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}") + endif() + download_project( + PROJ rocrand + GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocRAND.git + GIT_TAG develop + GIT_SHALLOW TRUE + INSTALL_DIR ${ROCRAND_ROOT} + LIST_SEPARATOR | + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm ${EXTRA_CMAKE_ARGS} + LOG_DOWNLOAD TRUE + LOG_CONFIGURE TRUE + LOG_BUILD TRUE + LOG_INSTALL TRUE + LOG_OUTPUT_ON_FAILURE TRUE + BUILD_PROJECT TRUE + UPDATE_DISCONNECTED TRUE + ) + find_package(rocrand REQUIRED CONFIG PATHS ${ROCRAND_ROOT}) + endif() endif() diff --git a/cmake/DownloadProject.CMakeLists.cmake.in b/cmake/DownloadProject.CMakeLists.cmake.in index 8a90d2ab1..f8d071d07 100644 --- a/cmake/DownloadProject.CMakeLists.cmake.in +++ b/cmake/DownloadProject.CMakeLists.cmake.in @@ -1,7 +1,7 @@ # Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. -cmake_minimum_required(VERSION 2.8.2) +cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR) project(${DL_ARGS_PROJ}-download NONE) diff --git a/cmake/GenerateResourceSpec.cmake b/cmake/GenerateResourceSpec.cmake index 8930c646a..ebb6139ed 100755 --- a/cmake/GenerateResourceSpec.cmake +++ b/cmake/GenerateResourceSpec.cmake @@ -20,7 +20,7 @@ if(ROCMINFO_EXIT_CODE) message(FATAL_ERROR ${ROCMINFO_STDERR}) endif() -string(REGEX MATCHALL [[--(gfx[0-9]+)]] +string(REGEX MATCHALL [[--(gfx[0-9a-f]+)]] ROCMINFO_MATCHES ${ROCMINFO_STDOUT} ) @@ -37,7 +37,7 @@ string(REGEX MATCHALL [[--(gfx[0-9]+)]] # std::string ip; # int id; # }; -# +# # std::vector GFXIP_AND_ID{ {"gfx900",0},{"gfx803",1},{"gfx900",2} }; # std::sort(GFXIP_AND_ID.begin(), GFXIP_AND_ID.end(), # [](const device& lhs, const device& rhs) @@ -85,14 +85,14 @@ list(SORT GFXIP_AND_ID) set(JSON_PAYLOAD) set(IT1 0) list(GET GFXIP_AND_ID ${IT1} I1) -string(REGEX REPLACE ":[0-9]+" "" IP1 ${I1}) +string(REGEX REPLACE ":[0-9a-f]+" "" IP1 ${I1}) list(LENGTH GFXIP_AND_ID COUNT) while(IT1 LESS COUNT) string(APPEND JSON_PAYLOAD "\n \"${IP1}\": [") set(IT2 ${IT1}) list(GET GFXIP_AND_ID ${IT2} I2) - string(REGEX REPLACE [[:[0-9]+$]] "" IP2 ${I2}) - string(REGEX REPLACE [[^gfx[0-9]+:]] "" ID2 ${I2}) + string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) + string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) while(${IP2} STREQUAL ${IP1} AND IT2 LESS COUNT) string(APPEND JSON_PAYLOAD "\n {\n" @@ -102,13 +102,14 @@ while(IT1 LESS COUNT) math(EXPR IT2 "${IT2} + 1") if(IT2 LESS COUNT) list(GET GFXIP_AND_ID ${IT2} I2) - string(REGEX REPLACE [[:[0-9]+$]] "" IP2 ${I2}) - string(REGEX REPLACE [[^gfx[0-9]+:]] "" ID2 ${I2}) + string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) + string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) endif() endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) string(APPEND JSON_PAYLOAD "\n ],") set(IT1 ${IT2}) + set(IP1 ${IP2}) endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) @@ -131,4 +132,4 @@ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/resources.json ${JSON_HEAD} ${JSON_PAYLOAD} ${JSON_TAIL} -) \ No newline at end of file +) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 6c061238e..4e79a2b88 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,6 +21,49 @@ # SOFTWARE. function (print_configuration_summary) + find_package(Git) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} show --format=%H --no-patch + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + execute_process( + COMMAND ${GIT_EXECUTABLE} show --format=%s --no-patch + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE COMMIT_SUBJECT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + endif() + + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} --version + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERBOSE_DETAILS + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + find_program(UNAME_EXECUTABLE uname) + if(UNAME_EXECUTABLE) + execute_process( + COMMAND ${UNAME_EXECUTABLE} -a + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE LINUX_KERNEL_DETAILS + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + endif() + + string(REPLACE "\n" ";" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") + list(TRANSFORM CMAKE_CXX_COMPILER_VERBOSE_DETAILS PREPEND "-- ") + string(REPLACE ";" "\n" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") + + # Joins CMAKE_CXX_FLAGS and COMPILE_OPTIONS + string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP) + string(REPLACE " " ";" CMAKE_CXX_FLAGS_AND_OPTIONS_LIST "${CMAKE_CXX_FLAGS_STRIP}") + list(APPEND CMAKE_CXX_FLAGS_AND_OPTIONS_LIST "${COMPILE_OPTIONS}") + list(JOIN CMAKE_CXX_FLAGS_AND_OPTIONS_LIST " " CMAKE_CXX_FLAGS_AND_OPTIONS) + message(STATUS "") message(STATUS "******** Summary ********") message(STATUS "General:") @@ -34,8 +77,7 @@ if(USE_HIPCXX) else() message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") - string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP) - message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}") + message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_AND_OPTIONS}") endif() message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") message(STATUS " Install prefix : ${CMAKE_INSTALL_PREFIX}") @@ -49,11 +91,23 @@ endif() message(STATUS "") message(STATUS " DISABLE_WERROR : ${DISABLE_WERROR}") message(STATUS " DOWNLOAD_ROCPRIM : ${DOWNLOAD_ROCPRIM}") - message(STATUS " DOWNLOAD_ROCRAND : ${DOWNLOAD_ROCRAND}") message(STATUS " BUILD_TEST : ${BUILD_TEST}") message(STATUS " BUILD_HIPSTDPAR_TEST : ${BUILD_HIPSTDPAR_TEST}") message(STATUS " BUILD_HIPSTDPAR_TEST_WITH_TBB : ${BUILD_HIPSTDPAR_TEST_WITH_TBB}") message(STATUS " BUILD_EXAMPLES : ${BUILD_EXAMPLES}") message(STATUS " BUILD_BENCHMARKS : ${BUILD_BENCHMARKS}") +if(BUILD_BENCHMARKS) + message(STATUS " DOWNLOAD_ROCRAND : ${DOWNLOAD_ROCRAND}") +endif() message(STATUS " BUILD_ADDRESS_SANITIZER : ${BUILD_ADDRESS_SANITIZER}") + message(STATUS "") + message(STATUS "Detailed:") + message(STATUS " C++ compiler details : \n${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") +if(GIT_FOUND) + message(STATUS " Commit : ${COMMIT_HASH}") + message(STATUS " ${COMMIT_SUBJECT}") +endif() +if(UNAME_EXECUTABLE) + message(STATUS " Unix name : ${LINUX_KERNEL_DETAILS}") +endif() endfunction() diff --git a/docs/conf.py b/docs/conf.py index 63a2f813b..1a79794f2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ # for PDF output on Read the Docs project = "rocThrust Documentation" author = "Advanced Micro Devices, Inc." -copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved." +copyright = "Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved." version = version_number release = version_number @@ -34,4 +34,4 @@ "path": "doxygen/xml", } -cpp_id_attributes = ["__device__", "__host__"] +cpp_id_attributes = ["__device__", "__host__", "THRUST_HOST_DEVICE", "THRUST_HOST", "THRUST_DEVICE"] diff --git a/docs/cpp_api.rst b/docs/cpp_api.rst index c63d5f910..4d81199be 100644 --- a/docs/cpp_api.rst +++ b/docs/cpp_api.rst @@ -172,7 +172,6 @@ System ====== .. doxygengroup:: system - :inner: Utility ======= diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index c654f6995..c321c0129 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -1,4 +1,4 @@ -# Doxyfile 1.8.17 +# Doxyfile 1.9.4 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -12,6 +12,15 @@ # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables: +# doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options @@ -60,16 +69,28 @@ PROJECT_LOGO = OUTPUT_DIRECTORY = . -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes -# performance problems for the file system. +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# numer of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode @@ -81,26 +102,18 @@ ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -227,6 +240,14 @@ QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. @@ -250,16 +271,16 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = @@ -304,18 +325,21 @@ OPTIMIZE_OUTPUT_SLICE = NO # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is -# Fortran), use: inc=Fortran f=C. +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. EXTENSION_MAPPING = @@ -449,6 +473,19 @@ TYPEDEF_HIDES_STRUCT = YES LOOKUP_CACHE_SIZE = 0 +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which effectively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -512,6 +549,13 @@ EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation @@ -549,11 +593,18 @@ HIDE_IN_BODY_DOCS = NO INTERNAL_DOCS = NO -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# (including Cygwin) ands Mac users are advised to set this option to NO. +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. # The default value is: system dependent. CASE_SENSE_NAMES = NO @@ -572,6 +623,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -729,7 +786,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -775,24 +833,35 @@ WARNINGS = YES WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. # The default value is: NO. WARN_AS_ERROR = YES @@ -803,13 +872,27 @@ WARN_AS_ERROR = YES # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard -# error (stderr). +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). WARN_LOGFILE = @@ -835,8 +918,8 @@ INPUT = ../../thrust \ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: https://www.gnu.org/software/libiconv/) for the list of -# possible encodings. +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 @@ -849,12 +932,14 @@ INPUT_ENCODING = UTF-8 # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen -# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd, +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, # *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = @@ -895,7 +980,7 @@ EXCLUDE_PATTERNS = # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test +# ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* @@ -1074,6 +1159,46 @@ USE_HTAGS = NO VERBATIM_HEADERS = YES +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: +# http://clang.llvm.org/) for more accurate parsing at the cost of reduced +# performance. This can be particularly helpful with template rich C++ code for +# which doxygen's built-in parser lacks the necessary type information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS +# tag is set to YES then doxygen will add the directory of each input to the +# include path. +# The default value is: YES. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_ADD_INC_PATHS = YES + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +# If clang assisted parsing is enabled you can provide the clang parser with the +# path to the directory containing a file called compile_commands.json. This +# file is the compilation database (see: +# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the +# options used when the source files were built. This is equivalent to +# specifying the -p option to a clang tool, such as clang-check. These options +# will then be passed to the parser. Any options specified with CLANG_OPTIONS +# will be added as well. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. + +CLANG_DATABASE_PATH = + #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- @@ -1184,7 +1309,7 @@ HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see +# this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. @@ -1194,7 +1319,7 @@ HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1255,10 +1380,11 @@ HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: https://developer.apple.com/xcode/), introduced with OSX -# 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. @@ -1275,6 +1401,13 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1300,8 +1433,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1331,7 +1468,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). +# (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. @@ -1376,7 +1513,8 @@ QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace -# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1384,8 +1522,8 @@ QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- -# folders). +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1393,16 +1531,16 @@ QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = @@ -1414,9 +1552,9 @@ QHP_CUST_FILTER_ATTRS = QHP_SECT_FILTER_ATTRS = -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = @@ -1459,16 +1597,28 @@ DISABLE_INDEX = NO # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1493,6 +1643,24 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML @@ -1530,11 +1698,29 @@ FORMULA_MACROFILE = USE_MATHJAX = YES +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1547,22 +1733,29 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see https://docs.mathjax.org/en/v2.7-latest/tex.html +# #tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1609,7 +1802,8 @@ SERVER_BASED_SEARCH = NO # # Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: https://xapian.org/). +# Xapian (see: +# https://xapian.org/). # # See the section "External Indexing and Searching" for details. # The default value is: NO. @@ -1622,8 +1816,9 @@ EXTERNAL_SEARCH = NO # # Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: https://xapian.org/). See the section "External Indexing and -# Searching" for details. +# Xapian (see: +# https://xapian.org/). See the section "External Indexing and Searching" for +# details. # This tag requires that the tag SEARCHENGINE is set to YES. SEARCHENGINE_URL = @@ -1732,29 +1927,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1787,9 +1984,11 @@ LATEX_EXTRA_FILES = PDF_HYPERLINKS = YES -# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate -# the PDF file directly from the LaTeX files. Set this option to YES, to get a -# higher quality PDF documentation. +# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as +# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX +# files. Set this option to YES, to get a higher quality PDF documentation. +# +# See also section LATEX_CMD_NAME for selecting the engine. # The default value is: YES. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1797,8 +1996,7 @@ USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode # command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# if errors occur, instead of asking the user for help. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1811,16 +2009,6 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See # https://en.wikipedia.org/wiki/BibTeX and \cite for more info. @@ -1901,16 +2089,6 @@ RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -2007,15 +2185,6 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- @@ -2102,7 +2271,8 @@ SEARCH_INCLUDES = NO # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by the -# preprocessor. +# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of +# RECURSIVE has no effect here. # This tag requires that the tag SEARCH_INCLUDES is set to YES. INCLUDE_PATH = @@ -2123,18 +2293,17 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -# Set THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP (5) to use original tuple and pair definitions -# instead of CUDA backends's types from ::cuda::std:: - PREDEFINED = THRUST_DOXYGEN \ THRUST_DEVICE_SYSTEM=5 \ THRUST_CPP_DIALECT=2017 \ "THRUST_INLINE_CONSTANT=inline constexpr" \ - "THRUST_NODISCARD=[[nodiscard]]" \ - "THRUST_MR_DEFAULT_ALIGNMENT=alignof(std::max_align_t)" \ + THRUST_NODISCARD=[[nodiscard]] \ + THRUST_MR_DEFAULT_ALIGNMENT=alignof(std::max_align_t) \ __cpp_lib_remove_cvref=201711 \ THRUST_PREVENT_MACRO_SUBSTITUTION= \ THRUST_EXEC_CHECK_DISABLE= \ + THRUST_ALIAS_ATTRIBUTE(x)= \ + THRUST_DEPRECATED_BECAUSE(x)= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2205,15 +2374,6 @@ EXTERNAL_PAGES = YES # Configuration options related to the dot tool #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS = NO - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -2270,13 +2430,16 @@ DOT_FONTSIZE = 10 DOT_FONTPATH = -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a +# graph for each documented class showing the direct and indirect inheritance +# relations. In case HAVE_DOT is set as well dot will be used to draw the graph, +# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set +# to TEXT the direct and indirect inheritance relations will be shown as texts / +# links. +# Possible values are: NO, YES, TEXT and GRAPH. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. -CLASS_GRAPH = YES +CLASS_GRAPH = TEXT # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a # graph for each documented class showing the direct and indirect implementation @@ -2288,7 +2451,8 @@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for -# groups, showing the direct groups dependencies. +# groups, showing the direct groups dependencies. See also the chapter Grouping +# in the manual. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2311,10 +2475,32 @@ UML_LOOK = NO # but if the number exceeds 15, the total amount of fields shown is limited to # 10. # Minimum value: 0, maximum value: 100, default value: 10. -# This tag requires that the tag HAVE_DOT is set to YES. +# This tag requires that the tag UML_LOOK is set to YES. UML_LIMIT_NUM_FIELDS = 10 +# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and +# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS +# tag is set to YES, doxygen will add type and arguments for attributes and +# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen +# will not generate fields with class member information in the UML graphs. The +# class diagrams will look similar to the default class diagrams but using UML +# notation for the relationships. +# Possible values are: NO, YES and NONE. +# The default value is: NO. +# This tag requires that the tag UML_LOOK is set to YES. + +DOT_UML_DETAILS = NO + +# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters +# to display on a single line. If the actual line length exceeds this threshold +# significantly it will wrapped across multiple lines. Some heuristics are apply +# to avoid ugly line breaks. +# Minimum value: 0, maximum value: 1000, default value: 17. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_WRAP_THRESHOLD = 17 + # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and # collaboration graphs will show the relations between templates and their # instances. @@ -2381,6 +2567,13 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES +# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels +# of child directories generated in directory dependency graphs by dot. +# Minimum value: 1, maximum value: 25, default value: 1. +# This tag requires that the tag DIRECTORY_GRAPH is set to YES. + +DIR_GRAPH_MAX_DEPTH = 1 + # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: @@ -2434,10 +2627,10 @@ MSCFILE_DIRS = DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the -# path where java can find the plantuml.jar file. If left blank, it is assumed -# PlantUML is not used or called during a preprocessing step. Doxygen will -# generate a warning when it encounters a \startuml command in this case and -# will not generate output for the diagram. +# path where java can find the plantuml.jar file or to the filename of jar file +# to be used. If left blank, it is assumed PlantUML is not used or called during +# a preprocessing step. Doxygen will generate a warning when it encounters a +# \startuml command in this case and will not generate output for the diagram. PLANTUML_JAR_PATH = @@ -2499,14 +2692,18 @@ DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page # explaining the meaning of the various boxes and arrows in the dot generated # graphs. +# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal +# graphical representation for inheritance and collaboration diagrams is used. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. GENERATE_LEGEND = YES -# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot +# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. +# +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. DOT_CLEANUP = YES diff --git a/docs/hip-execution-policies.rst b/docs/hip-execution-policies.rst index c8b03b1a1..a464ea13e 100644 --- a/docs/hip-execution-policies.rst +++ b/docs/hip-execution-policies.rst @@ -1,3 +1,5 @@ +:orphan: + .. meta:: :description: rocThrust documentation and API reference :keywords: rocThrust, ROCm, API, reference, execution policy @@ -17,7 +19,7 @@ rocThrust's HIP backend provides the following: * ``hip_rocprim::par_nosync`` - This policy tells Thrust that algorithms may avoid synchronization barriers when it is possible to do so. As a result, algorithms may be launched asynchronously with respect to the host. This can allow you to perform other host-side work while the algorithms - are running on the device. If you use this policy, you must synchronize before accessing results + are running on the device. If you use this policy, you must synchronize before accessing results on the host side. The example below illustrates the behaviour of these two policies. diff --git a/docs/hipgraph-support.rst b/docs/hipgraph-support.rst index f24d0c1ae..3e755a17e 100644 --- a/docs/hipgraph-support.rst +++ b/docs/hipgraph-support.rst @@ -1,3 +1,5 @@ +:orphan: + .. meta:: :description: rocThrust documentation and API reference :keywords: rocThrust, ROCm, API, reference, hipGraph diff --git a/docs/license.rst b/docs/license.rst index d8bb31c62..bca5e4c5f 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -9,3 +9,4 @@ License ****************************************** .. include:: ../LICENSE + :literal: diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b36063ce4..7f598be8f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2020-2023 Advanced Micro Devices, Inc. +# Copyright 2020-2025 Advanced Micro Devices, Inc. # ######################################################################## if (WIN32) @@ -22,6 +22,7 @@ function(add_thrust_example EXAMPLE) endif() add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCE}) + target_compile_options(${EXAMPLE_TARGET} PRIVATE ${COMPILE_OPTIONS}) target_link_libraries(${EXAMPLE_TARGET} PRIVATE rocthrust diff --git a/examples/bounding_box.cu b/examples/bounding_box.cu index 13228052e..903f517e2 100644 --- a/examples/bounding_box.cu +++ b/examples/bounding_box.cu @@ -52,7 +52,7 @@ struct bbox }; // reduce a pair of bounding boxes (a,b) to a bounding box containing a and b -struct bbox_reduction : public thrust::binary_function +struct bbox_reduction { __host__ __device__ bbox operator()(bbox a, bbox b) diff --git a/examples/bucket_sort2d.cu b/examples/bucket_sort2d.cu index d5c0ef57d..9cfdd6c3b 100644 --- a/examples/bucket_sort2d.cu +++ b/examples/bucket_sort2d.cu @@ -12,7 +12,7 @@ #include "include/host_device.h" // define a 2d float vector -typedef thrust::tuple vec2; +using vec2 = thrust::tuple; // return a random vec2 in [0,1)^2 vec2 make_random_vec2(void) @@ -26,7 +26,7 @@ vec2 make_random_vec2(void) // hash a point in the unit square to the index of // the grid bucket that contains it -struct point_to_bucket_index : public thrust::unary_function +struct point_to_bucket_index { unsigned int width; // buckets in the x dimension (grid spacing = 1/width) unsigned int height; // buckets in the y dimension (grid spacing = 1/height) diff --git a/examples/counting_iterator.cu b/examples/counting_iterator.cu index e090e9e5e..c7476b0de 100644 --- a/examples/counting_iterator.cu +++ b/examples/counting_iterator.cu @@ -28,7 +28,7 @@ int main(void) thrust::counting_iterator last = first + 8; // compute indices of nonzero elements - typedef thrust::device_vector::iterator IndexIterator; + using IndexIterator = thrust::device_vector::iterator; IndexIterator indices_end = thrust::copy_if(first, last, stencil.begin(), diff --git a/examples/cpp_integration/CMakeLists.txt b/examples/cpp_integration/CMakeLists.txt index 9df4f1ade..ae413ad90 100644 --- a/examples/cpp_integration/CMakeLists.txt +++ b/examples/cpp_integration/CMakeLists.txt @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2020-2023 Advanced Micro Devices, Inc. +# Copyright 2020-2025 Advanced Micro Devices, Inc. # ######################################################################## @@ -13,6 +13,7 @@ set_source_files_properties(device.cu ) add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCE}) +target_compile_options(${EXAMPLE_TARGET} PRIVATE ${COMPILE_OPTIONS}) target_link_libraries(${EXAMPLE_TARGET} PRIVATE rocthrust diff --git a/examples/cuda/custom_temporary_allocation.cu b/examples/cuda/custom_temporary_allocation.cu index 7bba0fa9e..3f41217e3 100644 --- a/examples/cuda/custom_temporary_allocation.cu +++ b/examples/cuda/custom_temporary_allocation.cu @@ -48,7 +48,7 @@ private: // A simple allocator for caching cudaMalloc allocations. struct cached_allocator { - typedef char value_type; + using value_type = char; cached_allocator() {} @@ -122,8 +122,8 @@ struct cached_allocator } private: - typedef std::multimap free_blocks_type; - typedef std::map allocated_blocks_type; + using free_blocks_type = std::multimap; + using allocated_blocks_type = std::map; free_blocks_type free_blocks; allocated_blocks_type allocated_blocks; diff --git a/examples/cuda/global_device_vector.cu b/examples/cuda/global_device_vector.cu index ef4cb9af3..dc7a72bc3 100644 --- a/examples/cuda/global_device_vector.cu +++ b/examples/cuda/global_device_vector.cu @@ -15,11 +15,10 @@ extern "C" cudaError_t cudaFreeIgnoreShutdown(void* ptr) { return err; } -typedef thrust::system::cuda::detail::cuda_memory_resource< - cudaMalloc, - cudaFreeIgnoreShutdown, - thrust::cuda::pointer -> device_ignore_shutdown_memory_resource; + using device_ignore_shutdown_memory_resource = thrust::system::cuda::detail::cuda_memory_resource< + cudaMalloc, + cudaFreeIgnoreShutdown, + thrust::cuda::pointer >; template using device_ignore_shutdown_allocator = diff --git a/examples/cuda/range_view.cu b/examples/cuda/range_view.cu index e26051668..c9b53b319 100644 --- a/examples/cuda/range_view.cu +++ b/examples/cuda/range_view.cu @@ -18,11 +18,11 @@ template class range_view { public: - typedef Iterator iterator; - typedef typename thrust::iterator_traits::value_type value_type; - typedef typename thrust::iterator_traits::pointer pointer; - typedef typename thrust::iterator_traits::difference_type difference_type; - typedef typename thrust::iterator_traits::reference reference; + using iterator = Iterator; + using value_type = typename thrust::iterator_traits::value_type; + using pointer = typename thrust::iterator_traits::pointer; + using difference_type = typename thrust::iterator_traits::difference_type; + using reference = typename thrust::iterator_traits::reference; private: const iterator first; @@ -154,8 +154,8 @@ make_range_view(Vector& v) // This saxpy functor stores view of X, Y, Z array, and accesses them in // vector-like way -template -struct saxpy_functor : public thrust::unary_function +template +struct saxpy_functor { const float a; View1 x; @@ -189,7 +189,7 @@ void saxpy(float A, View1 X, View2 Y, View3 Z) saxpy_functor(A,X,Y,Z)); } -struct f1 : public thrust::unary_function +struct f1 { __host__ __device__ float operator()(float x) const diff --git a/examples/dot_products_with_zip.cu b/examples/dot_products_with_zip.cu index 2628eaa10..0b3c75815 100644 --- a/examples/dot_products_with_zip.cu +++ b/examples/dot_products_with_zip.cu @@ -16,11 +16,10 @@ // We'll use a 3-tuple to store our 3d vector type -typedef thrust::tuple Float3; - +using Float3 = thrust::tuple; // This functor implements the dot product between 3d vectors -struct DotProduct : public thrust::binary_function +struct DotProduct { __host__ __device__ float operator()(const Float3& a, const Float3& b) const @@ -83,9 +82,9 @@ int main(void) // METHOD #1 // Defining a zip_iterator type can be a little cumbersome ... - typedef thrust::device_vector::iterator FloatIterator; - typedef thrust::tuple FloatIteratorTuple; - typedef thrust::zip_iterator Float3Iterator; + using FloatIterator = thrust::device_vector::iterator; + using FloatIteratorTuple = thrust::tuple; + using Float3Iterator = thrust::zip_iterator; // Now we'll create some zip_iterators for A and B Float3Iterator A_first = thrust::make_zip_iterator(thrust::make_tuple(A0.begin(), A1.begin(), A2.begin())); diff --git a/examples/expand.cu b/examples/expand.cu index f61edec8f..9fade2997 100644 --- a/examples/expand.cu +++ b/examples/expand.cu @@ -25,8 +25,8 @@ OutputIterator expand(InputIterator1 first1, InputIterator2 first2, OutputIterator output) { - typedef typename thrust::iterator_difference::type difference_type; - + using difference_type = typename thrust::iterator_difference::type; + difference_type input_size = thrust::distance(first1, last1); difference_type output_size = thrust::reduce(first1, last1); @@ -64,7 +64,7 @@ OutputIterator expand(InputIterator1 first1, template void print(const std::string& s, const Vector& v) { - typedef typename Vector::value_type T; + using T = typename Vector::value_type; std::cout << s; thrust::copy(v.begin(), v.end(), std::ostream_iterator(std::cout, " ")); diff --git a/examples/histogram.cu b/examples/histogram.cu index fbcfd0aab..045557430 100644 --- a/examples/histogram.cu +++ b/examples/histogram.cu @@ -50,7 +50,7 @@ template void print_vector(const std::string& name, const Vector& v) { - typedef typename Vector::value_type T; + using T = typename Vector::value_type; std::cout << " " << std::setw(20) << name << " "; thrust::copy(v.begin(), v.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; @@ -62,8 +62,8 @@ template data(input); @@ -108,8 +108,8 @@ void sparse_histogram(const Vector1& input, Vector2& histogram_values, Vector3& histogram_counts) { - typedef typename Vector1::value_type ValueType; // input value type - typedef typename Vector3::value_type IndexType; // histogram index type + using ValueType = typename Vector1::value_type; // input value type + using IndexType = typename Vector3::value_type; // histogram index type // copy input data (could be skipped if input is allowed to be modified) thrust::device_vector data(input); diff --git a/examples/lambda.cu b/examples/lambda.cu index febe44b54..0dac14b6c 100644 --- a/examples/lambda.cu +++ b/examples/lambda.cu @@ -26,7 +26,6 @@ using namespace thrust::placeholders; // implementing SAXPY with a functor is cumbersome and verbose struct saxpy_functor - : public thrust::binary_function { float a; diff --git a/examples/max_abs_diff.cu b/examples/max_abs_diff.cu index 0e379fb56..2e811a104 100644 --- a/examples/max_abs_diff.cu +++ b/examples/max_abs_diff.cu @@ -11,7 +11,7 @@ // between the elements of two vectors template -struct abs_diff : public thrust::binary_function +struct abs_diff { __host__ __device__ T operator()(const T& a, const T& b) diff --git a/examples/minmax.cu b/examples/minmax.cu index 04b1cbf00..9ebdf2380 100644 --- a/examples/minmax.cu +++ b/examples/minmax.cu @@ -24,7 +24,6 @@ struct minmax_pair // are initialized to x. template struct minmax_unary_op - : public thrust::unary_function< T, minmax_pair > { __host__ __device__ minmax_pair operator()(const T& x) const @@ -42,7 +41,6 @@ struct minmax_unary_op // the minimums and maximums of the input pairs template struct minmax_binary_op - : public thrust::binary_function< minmax_pair, minmax_pair, minmax_pair > { __host__ __device__ minmax_pair operator()(const minmax_pair& x, const minmax_pair& y) const diff --git a/examples/monte_carlo.cu b/examples/monte_carlo.cu index ae750e616..8e436318c 100644 --- a/examples/monte_carlo.cu +++ b/examples/monte_carlo.cu @@ -23,7 +23,7 @@ unsigned int hash(unsigned int a) return a; } -struct estimate_pi : public thrust::unary_function +struct estimate_pi { __host__ __device__ float operator()(unsigned int thread_id) diff --git a/examples/monte_carlo_disjoint_sequences.cu b/examples/monte_carlo_disjoint_sequences.cu index 3ed2daf2f..08aa89a31 100644 --- a/examples/monte_carlo_disjoint_sequences.cu +++ b/examples/monte_carlo_disjoint_sequences.cu @@ -23,7 +23,7 @@ // past n states of the RNG. This function is accelerated and executes // in O(lg n) time. -struct estimate_pi : public thrust::unary_function +struct estimate_pi { __host__ __device__ float operator()(unsigned int thread_id) diff --git a/examples/mr_basic.cu b/examples/mr_basic.cu index 733799425..20712bc83 100644 --- a/examples/mr_basic.cu +++ b/examples/mr_basic.cu @@ -30,7 +30,7 @@ int main() { // no virtual calls will be issued - typedef thrust::mr::allocator Alloc; + using Alloc = thrust::mr::allocator; Alloc alloc(&memres); do_stuff_with_vector >(alloc); @@ -39,7 +39,7 @@ int main() { // virtual calls will be issued - wrapping in a polymorphic wrapper thrust::mr::polymorphic_adaptor_resource adaptor(&memres); - typedef thrust::mr::polymorphic_allocator Alloc; + using Alloc = thrust::mr::polymorphic_allocator; Alloc alloc(&adaptor); do_stuff_with_vector >(alloc); @@ -47,34 +47,32 @@ int main() { // use the global device_ptr-flavored device memory resource - typedef thrust::device_ptr_memory_resource Resource; + using Resource = thrust::device_ptr_memory_resource; thrust::mr::polymorphic_adaptor_resource > adaptor( thrust::mr::get_global_resource() ); - typedef thrust::mr::polymorphic_allocator > Alloc; + using Alloc = thrust::mr::polymorphic_allocator >; Alloc alloc(&adaptor); do_stuff_with_vector >(alloc); } - typedef thrust::mr::unsynchronized_pool_resource< - thrust::mr::new_delete_resource - > Pool; + using Pool = thrust::mr::unsynchronized_pool_resource< + thrust::mr::new_delete_resource >; Pool pool(&memres); { - typedef thrust::mr::allocator Alloc; + using Alloc = thrust::mr::allocator; Alloc alloc(&pool); do_stuff_with_vector >(alloc); } - typedef thrust::mr::disjoint_unsynchronized_pool_resource< - thrust::mr::new_delete_resource, - thrust::mr::new_delete_resource - > DisjointPool; + using DisjointPool = thrust::mr::disjoint_unsynchronized_pool_resource< + thrust::mr::new_delete_resource, + thrust::mr::new_delete_resource >; DisjointPool disjoint_pool(&memres, &memres); { - typedef thrust::mr::allocator Alloc; + using Alloc = thrust::mr::allocator; Alloc alloc(&disjoint_pool); do_stuff_with_vector >(alloc); diff --git a/examples/padded_grid_reduction.cu b/examples/padded_grid_reduction.cu index cab4072eb..3de95a567 100644 --- a/examples/padded_grid_reduction.cu +++ b/examples/padded_grid_reduction.cu @@ -22,12 +22,10 @@ // where the bool is true for valid grid values and false for // values in the padded region of the grid template -struct transform_tuple : - public thrust::unary_function< thrust::tuple, - thrust::tuple > +struct transform_tuple { - typedef typename thrust::tuple InputTuple; - typedef typename thrust::tuple OutputTuple; + using InputTuple = typename thrust::tuple; + using OutputTuple = typename thrust::tuple; IndexType n, N; @@ -45,12 +43,9 @@ struct transform_tuple : // reduce two tuples (bool,value,value) into a single tuple such that output // contains the smallest and largest *valid* values. template -struct reduce_tuple : - public thrust::binary_function< thrust::tuple, - thrust::tuple, - thrust::tuple > +struct reduce_tuple { - typedef typename thrust::tuple Tuple; + using Tuple = typename thrust::tuple; __host__ __device__ Tuple operator()(const Tuple& t0, const Tuple& t1) const @@ -99,7 +94,7 @@ int main(void) std::cout << "\n"; // compute min & max over valid region of the 2d grid - typedef thrust::tuple result_type; + using result_type = thrust::tuple; result_type init(true, FLT_MAX, -FLT_MAX); // initial value transform_tuple unary_op(n, N); // transformation operator diff --git a/examples/raw_reference_cast.cu b/examples/raw_reference_cast.cu index 335476d5c..a08efd8c6 100644 --- a/examples/raw_reference_cast.cu +++ b/examples/raw_reference_cast.cu @@ -73,7 +73,7 @@ struct copy_iterators template void print(const std::string& name, const Vector& v) { - typedef typename Vector::value_type T; + using T = typename Vector::value_type; std::cout << name << ": "; thrust::copy(v.begin(), v.end(), std::ostream_iterator(std::cout, " ")); @@ -82,9 +82,9 @@ void print(const std::string& name, const Vector& v) int main(void) { - typedef thrust::device_vector Vector; - typedef Vector::iterator Iterator; - typedef thrust::device_system_tag System; + using Vector = thrust::device_vector; + using Iterator = Vector::iterator; + using System = thrust::device_system_tag; // allocate device memory Vector A(5); diff --git a/examples/repeated_range.cu b/examples/repeated_range.cu index 5b877c1b4..1942b71bf 100644 --- a/examples/repeated_range.cu +++ b/examples/repeated_range.cu @@ -20,30 +20,29 @@ template class repeated_range { - public: +public: + using difference_type = typename thrust::iterator_difference::type; - typedef typename thrust::iterator_difference::type difference_type; - - struct repeat_functor : public thrust::unary_function + struct repeat_functor { difference_type repeats; repeat_functor(difference_type repeats) - : repeats(repeats) {} + : repeats(repeats) + {} - __host__ __device__ - difference_type operator()(const difference_type& i) const - { + __host__ __device__ difference_type operator()(const difference_type& i) const + { return i / repeats; } }; - typedef typename thrust::counting_iterator CountingIterator; - typedef typename thrust::transform_iterator TransformIterator; - typedef typename thrust::permutation_iterator PermutationIterator; + using CountingIterator = typename thrust::counting_iterator; + using TransformIterator = typename thrust::transform_iterator; + using PermutationIterator = typename thrust::permutation_iterator; // type of the repeated_range iterator - typedef PermutationIterator iterator; + using iterator = PermutationIterator; // construct repeated_range for the range [first,last) repeated_range(Iterator first, Iterator last, difference_type repeats) @@ -77,7 +76,7 @@ int main(void) std::cout << "range "; thrust::copy(data.begin(), data.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; - typedef thrust::device_vector::iterator Iterator; + using Iterator = thrust::device_vector::iterator; // create repeated_range with elements repeated twice repeated_range twice(data.begin(), data.end(), 2); diff --git a/examples/saxpy.cu b/examples/saxpy.cu index 7eaedfc8b..ef7561c0c 100644 --- a/examples/saxpy.cu +++ b/examples/saxpy.cu @@ -18,7 +18,7 @@ // implements the operation with a single transformation // and represents "best practice". -struct saxpy_functor : public thrust::binary_function +struct saxpy_functor { const float a; diff --git a/examples/scan_by_key.cu b/examples/scan_by_key.cu index 67955bda0..787a71fa2 100644 --- a/examples/scan_by_key.cu +++ b/examples/scan_by_key.cu @@ -7,10 +7,9 @@ #include "include/host_device.h" // BinaryPredicate for the head flag segment representation -// equivalent to thrust::not2(thrust::project2nd())); +// equivalent to thrust::not_fn(thrust::project2nd())); template struct head_flag_predicate - : public thrust::binary_function { __host__ __device__ bool operator()(HeadFlagType, HeadFlagType right) const diff --git a/examples/scan_matrix_by_rows.cu b/examples/scan_matrix_by_rows.cu index 4fd152ea6..5c91b6428 100644 --- a/examples/scan_matrix_by_rows.cu +++ b/examples/scan_matrix_by_rows.cu @@ -28,7 +28,8 @@ void scan_matrix_by_rows0(thrust::device_vector& u, int n, int m) { // So first, we define an unary function object which takes the index of an // element and returns the row that it belongs to. -struct which_row : thrust::unary_function { +struct which_row +{ int row_length; __host__ __device__ diff --git a/examples/simple_moving_average.cu b/examples/simple_moving_average.cu index 199a5e786..f20d6dce6 100644 --- a/examples/simple_moving_average.cu +++ b/examples/simple_moving_average.cu @@ -24,7 +24,7 @@ // compute the difference of two positions in the cumumulative sum and // divide by the SMA window size w. template -struct minus_and_divide : public thrust::binary_function +struct minus_and_divide { T w; @@ -40,7 +40,7 @@ struct minus_and_divide : public thrust::binary_function template void simple_moving_average(const InputVector& data, size_t w, OutputVector& output) { - typedef typename InputVector::value_type T; + using T = typename InputVector::value_type; if (data.size() < w) return; diff --git a/examples/sparse_vector.cu b/examples/sparse_vector.cu index c7528cff2..79a2ff4fb 100644 --- a/examples/sparse_vector.cu +++ b/examples/sparse_vector.cu @@ -32,8 +32,8 @@ void sum_sparse_vectors(const IndexVector1& A_index, IndexVector3& C_index, ValueVector3& C_value) { - typedef typename IndexVector3::value_type IndexType; - typedef typename ValueVector3::value_type ValueType; + using IndexType = typename IndexVector3::value_type; + using ValueType = typename ValueVector3::value_type; // sanity test assert(A_index.size() == A_value.size()); diff --git a/examples/stream_compaction.cu b/examples/stream_compaction.cu index 26c696d32..c613f7450 100644 --- a/examples/stream_compaction.cu +++ b/examples/stream_compaction.cu @@ -12,7 +12,7 @@ // this functor returns true if the argument is odd, and false otherwise template -struct is_odd : public thrust::unary_function +struct is_odd { __host__ __device__ bool operator()(T x) @@ -25,7 +25,7 @@ struct is_odd : public thrust::unary_function template void print_range(const std::string& name, Iterator first, Iterator last) { - typedef typename std::iterator_traits::value_type T; + using T = typename std::iterator_traits::value_type; std::cout << name << ": "; thrust::copy(first, last, std::ostream_iterator(std::cout, " ")); @@ -38,8 +38,8 @@ int main(void) size_t N = 10; // define some types - typedef thrust::device_vector Vector; - typedef Vector::iterator Iterator; + using Vector = thrust::device_vector; + using Iterator = Vector::iterator; // allocate storage for array Vector values(N); diff --git a/examples/strided_range.cu b/examples/strided_range.cu index 6e3c068d9..882b1c6de 100644 --- a/examples/strided_range.cu +++ b/examples/strided_range.cu @@ -22,9 +22,9 @@ class strided_range { public: - typedef typename thrust::iterator_difference::type difference_type; + using difference_type = typename thrust::iterator_difference::type; - struct stride_functor : public thrust::unary_function + struct stride_functor { difference_type stride; @@ -38,12 +38,12 @@ class strided_range } }; - typedef typename thrust::counting_iterator CountingIterator; - typedef typename thrust::transform_iterator TransformIterator; - typedef typename thrust::permutation_iterator PermutationIterator; + using CountingIterator = typename thrust::counting_iterator; + using TransformIterator = typename thrust::transform_iterator; + using PermutationIterator = typename thrust::permutation_iterator; // type of the strided_range iterator - typedef PermutationIterator iterator; + using iterator = PermutationIterator; // construct strided_range for the range [first,last) strided_range(Iterator first, Iterator last, difference_type stride) @@ -81,7 +81,7 @@ int main(void) std::cout << "data: "; thrust::copy(data.begin(), data.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; - typedef thrust::device_vector::iterator Iterator; + using Iterator = thrust::device_vector::iterator; // create strided_range with indices [0,2,4,6] strided_range evens(data.begin(), data.end(), 2); diff --git a/examples/sum_rows.cu b/examples/sum_rows.cu index bb3a175c5..faadb13ec 100644 --- a/examples/sum_rows.cu +++ b/examples/sum_rows.cu @@ -10,7 +10,7 @@ // convert a linear index to a row index template -struct linear_index_to_row_index : public thrust::unary_function +struct linear_index_to_row_index { T C; // number of columns diff --git a/examples/summary_statistics.cu b/examples/summary_statistics.cu index 5cf82a7c2..2908db35d 100644 --- a/examples/summary_statistics.cu +++ b/examples/summary_statistics.cu @@ -70,10 +70,7 @@ struct summary_stats_unary_op // approximation to the summary_stats for // all values that have been agregated so far template -struct summary_stats_binary_op - : public thrust::binary_function&, - const summary_stats_data&, - summary_stats_data > +struct summary_stats_binary_op { __host__ __device__ summary_stats_data operator()(const summary_stats_data& x, const summary_stats_data & y) const @@ -116,7 +113,7 @@ struct summary_stats_binary_op template void print_range(const std::string& name, Iterator first, Iterator last) { - typedef typename std::iterator_traits::value_type T; + using T = typename std::iterator_traits::value_type; std::cout << name << ": "; thrust::copy(first, last, std::ostream_iterator(std::cout, " ")); @@ -126,7 +123,7 @@ void print_range(const std::string& name, Iterator first, Iterator last) int main(void) { - typedef float T; + using T = float; // initialize host array T h_x[] = {4, 7, 13, 16}; diff --git a/examples/summed_area_table.cu b/examples/summed_area_table.cu index 6a865cae1..86a96f218 100644 --- a/examples/summed_area_table.cu +++ b/examples/summed_area_table.cu @@ -13,10 +13,8 @@ // This example computes a summed area table using segmented scan // http://en.wikipedia.org/wiki/Summed_area_table - - -// convert a linear index to a linear index in the transpose -struct transpose_index : public thrust::unary_function +// convert a linear index to a linear index in the transpose +struct transpose_index { size_t m, n; @@ -34,7 +32,7 @@ struct transpose_index : public thrust::unary_function }; // convert a linear index to a row index -struct row_index : public thrust::unary_function +struct row_index { size_t n; diff --git a/examples/tiled_range.cu b/examples/tiled_range.cu index 90f6d3f62..0fe207722 100644 --- a/examples/tiled_range.cu +++ b/examples/tiled_range.cu @@ -21,29 +21,28 @@ template class tiled_range { public: + using difference_type = typename thrust::iterator_difference::type; - typedef typename thrust::iterator_difference::type difference_type; - - struct tile_functor : public thrust::unary_function + struct tile_functor { difference_type tile_size; tile_functor(difference_type tile_size) - : tile_size(tile_size) {} + : tile_size(tile_size) + {} - __host__ __device__ - difference_type operator()(const difference_type& i) const - { + __host__ __device__ difference_type operator()(const difference_type& i) const + { return i % tile_size; } }; - typedef typename thrust::counting_iterator CountingIterator; - typedef typename thrust::transform_iterator TransformIterator; - typedef typename thrust::permutation_iterator PermutationIterator; + using CountingIterator = typename thrust::counting_iterator; + using TransformIterator = typename thrust::transform_iterator; + using PermutationIterator = typename thrust::permutation_iterator; // type of the tiled_range iterator - typedef PermutationIterator iterator; + using iterator = PermutationIterator; // construct repeated_range for the range [first,last) tiled_range(Iterator first, Iterator last, difference_type tiles) @@ -77,7 +76,7 @@ int main(void) std::cout << "range "; thrust::copy(data.begin(), data.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; - typedef thrust::device_vector::iterator Iterator; + using Iterator = thrust::device_vector::iterator; // create tiled_range with two tiles tiled_range two(data.begin(), data.end(), 2); diff --git a/examples/transform_iterator.cu b/examples/transform_iterator.cu index 5b23cbda2..d1fa1e0bc 100644 --- a/examples/transform_iterator.cu +++ b/examples/transform_iterator.cu @@ -12,7 +12,7 @@ // this functor clamps a value to the range [lo, hi] template -struct clamp : public thrust::unary_function +struct clamp { T lo, hi; @@ -32,7 +32,7 @@ struct clamp : public thrust::unary_function }; template -struct simple_negate : public thrust::unary_function +struct simple_negate { __host__ __device__ T operator()(T x) @@ -44,7 +44,7 @@ struct simple_negate : public thrust::unary_function template void print_range(const std::string& name, Iterator first, Iterator last) { - typedef typename std::iterator_traits::value_type T; + using T = typename std::iterator_traits::value_type; std::cout << name << ": "; thrust::copy(first, last, std::ostream_iterator(std::cout, " ")); @@ -59,8 +59,8 @@ int main(void) int hi = 5; // define some types - typedef thrust::device_vector Vector; - typedef Vector::iterator VectorIterator; + using Vector = thrust::device_vector; + using VectorIterator = Vector::iterator; // initialize values Vector values(8); @@ -77,7 +77,7 @@ int main(void) print_range("values ", values.begin(), values.end()); // define some more types - typedef thrust::transform_iterator, VectorIterator> ClampedVectorIterator; + using ClampedVectorIterator = thrust::transform_iterator, VectorIterator>; // create a transform_iterator that applies clamp() to the values array ClampedVectorIterator cv_begin = thrust::make_transform_iterator(values.begin(), clamp(lo, hi)); @@ -96,8 +96,8 @@ int main(void) //// // combine transform_iterator with other fancy iterators like counting_iterator - typedef thrust::counting_iterator CountingIterator; - typedef thrust::transform_iterator, CountingIterator> ClampedCountingIterator; + using CountingIterator = thrust::counting_iterator; + using ClampedCountingIterator = thrust::transform_iterator, CountingIterator>; CountingIterator count_begin(0); CountingIterator count_end(10); @@ -113,7 +113,7 @@ int main(void) //// // combine transform_iterator with another transform_iterator - typedef thrust::transform_iterator, ClampedCountingIterator> NegatedClampedCountingIterator; + using NegatedClampedCountingIterator = thrust::transform_iterator, ClampedCountingIterator>; NegatedClampedCountingIterator ncs_begin = thrust::make_transform_iterator(cs_begin, thrust::negate()); NegatedClampedCountingIterator ncs_end = thrust::make_transform_iterator(cs_end, thrust::negate()); @@ -123,7 +123,7 @@ int main(void) //// // when a functor does not define result_type, a third template argument must be provided - typedef thrust::transform_iterator, VectorIterator, int> NegatedVectorIterator; + using NegatedVectorIterator = thrust::transform_iterator, VectorIterator, int>; NegatedVectorIterator nv_begin(values.begin(), simple_negate()); NegatedVectorIterator nv_end(values.end(), simple_negate()); diff --git a/examples/uninitialized_vector.cu b/examples/uninitialized_vector.cu index 5962de2d2..a831041aa 100644 --- a/examples/uninitialized_vector.cu +++ b/examples/uninitialized_vector.cu @@ -40,7 +40,7 @@ uninitialized_allocator & operator=(const uninitialized_allocator &) = default; template struct rebind { - typedef uninitialized_allocator other; + using other = uninitialized_allocator; }; // note that construct is annotated as @@ -54,7 +54,7 @@ uninitialized_allocator & operator=(const uninitialized_allocator &) = default; // to make a device_vector which does not initialize its elements, // use uninitialized_allocator as the 2nd template parameter -typedef thrust::device_vector > uninitialized_vector; +using uninitialized_vector = thrust::device_vector >; int main() { diff --git a/examples/weld_vertices.cu b/examples/weld_vertices.cu index d133473f8..8c2f1f65d 100644 --- a/examples/weld_vertices.cu +++ b/examples/weld_vertices.cu @@ -34,7 +34,7 @@ */ // define a 2d float vector -typedef thrust::tuple vec2; +using vec2 = thrust::tuple; int main(void) { diff --git a/examples/word_count.cu b/examples/word_count.cu index 4da29d74b..7aa158671 100644 --- a/examples/word_count.cu +++ b/examples/word_count.cu @@ -23,7 +23,6 @@ bool is_alpha(const char c) // determines whether the right character begins a new word struct is_word_start - : public thrust::binary_function { __host__ __device__ bool operator()(const char& left, const char& right) const diff --git a/internal/benchmark/CMakeLists.txt b/internal/benchmark/CMakeLists.txt deleted file mode 100644 index 2ba15d098..000000000 --- a/internal/benchmark/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -# ######################################################################## -# Copyright 2020-2024 Advanced Micro Devices, Inc. -# ######################################################################## - -include(Benchmarks) - -# **************************************************************************** -# Benchmarks -# **************************************************************************** -message (STATUS "Configuring internal benchmarks") - -add_thrust_benchmark("internal_bench" "bench.cu" OFF) diff --git a/internal/benchmark/README.txt b/internal/benchmark/README.txt deleted file mode 100644 index 73b0cc058..000000000 --- a/internal/benchmark/README.txt +++ /dev/null @@ -1,31 +0,0 @@ -Directions for compiling and running the benchmark with Ubuntu Linux: - -Install Intel's Threading Building Blocks library (TBB): -$ sudo apt-get install libtbb-dev - -Compile the benchmark: -$ nvcc -O3 -arch=sm_20 bench.cu -ltbb -o bench - -Run the benchmark: -$ ./bench - -Typical output (Tesla C2050): - -Benchmarking with input size 33554432 -Core Primitive Performance (elements per second) - Algorithm, STL, TBB, Thrust - reduce, 3121746688, 3739585536, 26134038528 - transform, 1869492736, 2347719424, 13804681216 - scan, 1394143744, 1439394816, 5039195648 - sort, 11070660, 34622352, 673543168 -Sorting Performance (keys per second) - Type, STL, TBB, Thrust - char, 24050078, 62987040, 2798874368 - short, 15644141, 41275164, 1428603008 - int, 11062616, 33478628, 682295744 - long, 11249874, 33972564, 219719184 - float, 9850043, 29011806, 692407232 -double, 9700181, 27153626, 224345568 - -The reported numbers are performance rates in "elements per second" (higher is better). - diff --git a/internal/benchmark/bench.cu b/internal/benchmark/bench.cu deleted file mode 100644 index 76fb2c3e3..000000000 --- a/internal/benchmark/bench.cu +++ /dev/null @@ -1,1555 +0,0 @@ -/* - * Copyright 20011-2021 NVIDIA Corporation - * Modifications Copyright© 2020-2024 Advanced Micro Devices, Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include // For CHAR_BIT. -#include // For `sqrt` and `abs`. -#include // For `atoi`. -#include -#include -#include -#include -#include - -#include // For `intN_t`. - -#include "random.h" -#include "timer.h" - -#if defined(HAVE_TBB) - #include "tbb_algos.h" -#endif - -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP // Hip clang compiler -#ifdef __HIP_DEVICE_COMPILE__ - using ::abs; - using ::sqrt; -#else - using std::abs; - using std::sqrt; -#endif -#else // Not Hip clang device compiler - using std::abs; - using std::sqrt; -#endif - -#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_HIP - #include // For `thrust::system_error` - #include // For `thrust::hip_category` -#endif - -#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - #include // For `thrust::system_error` - #include // For `thrust::cuda_category` -#endif - -// We don't use THRUST_PP_STRINGIZE and THRUST_PP_CAT because they are new, and -// we want this benchmark to be backwards-compatible to older versions of Thrust. -#define PP_STRINGIZE_(expr) #expr -#define PP_STRINGIZE(expr) PP_STRINGIZE_(expr) - -#define PP_CAT(a, b) a ## b -/////////////////////////////////////////////////////////////////////////////// - -template -struct squared_difference -{ -private: - T const average; - -public: - __host__ __device__ - squared_difference(squared_difference const& rhs) : average(rhs.average) {} - - __host__ __device__ - squared_difference(T average_) : average(average_) {} - - __host__ __device__ - T operator()(T x) const - { - return (x - average) * (x - average); - } -}; - -template -struct value_and_count -{ - T value; - uint64_t count; - - __host__ __device__ - value_and_count(value_and_count const& other) - : value(other.value), count(other.count) {} - - __host__ __device__ - value_and_count(T const& value_) - : value(value_), count(1) {} - - __host__ __device__ - value_and_count(T const& value_, uint64_t count_) - : value(value_), count(count_) {} - - __host__ __device__ - value_and_count& operator=(value_and_count const& other) - { - value = other.value; - count = other.count; - return *this; - } - - __host__ __device__ - value_and_count& operator=(T const& value_) - { - value = value_; - count = 1; - return *this; - } -}; - -template -struct counting_op -{ -private: - ReduceOp reduce; - -public: - __host__ __device__ - counting_op() : reduce() {} - - __host__ __device__ - counting_op(counting_op const& other) : reduce(other.reduce) {} - - __host__ __device__ - counting_op(ReduceOp const& reduce_) : reduce(reduce_) {} - - __host__ __device__ - value_and_count operator()( - value_and_count const& x - , T const& y - ) const - { - return value_and_count(reduce(x.value, y), x.count + 1); - } - - __host__ __device__ - value_and_count operator()( - value_and_count const& x - , value_and_count const& y - ) const - { - return value_and_count(reduce(x.value, y.value), x.count + y.count); - } -}; - -template -T arithmetic_mean(InputIt first, InputIt last, T init) -{ - value_and_count init_vc(init, 0); - - counting_op > reduce_vc; - - value_and_count vc - = thrust::reduce(first, last, init_vc, reduce_vc); - - return vc.value / vc.count; -} - -template -typename thrust::iterator_traits::value_type -arithmetic_mean(InputIt first, InputIt last) -{ - typedef typename thrust::iterator_traits::value_type T; - return arithmetic_mean(first, last, T()); -} - -template -T sample_standard_deviation(InputIt first, InputIt last, T average) -{ - value_and_count init_vc(T(), 0); - - counting_op > reduce_vc; - - squared_difference transform(average); - - value_and_count vc - = thrust::transform_reduce(first, last, transform, init_vc, reduce_vc); - - return sqrt(vc.value / T(vc.count - 1)); -} - -template ::value> -struct partition_predicate -{ -}; - -template -struct partition_predicate -{ - __host__ __device__ bool operator()(T x) const - { - return (x % 2) == 0; - } -}; - -template -struct partition_predicate -{ - __host__ __device__ bool operator()(T x) const - { - return x > 0.5; - } -}; - -/////////////////////////////////////////////////////////////////////////////// - -// Formulas for propagation of uncertainty from: -// -// https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulas -// -// Even though it's Wikipedia, I trust it as I helped write that table. -// -// XXX Replace with a proper reference. - -// Compute the propagated uncertainty from the multiplication of two uncertain -// values, `A +/- A_unc` and `B +/- B_unc`. Given `f = AB` or `f = A/B`, where -// `A != 0` and `B != 0`, the uncertainty in `f` is approximately: -// -// f_unc = abs(f) * sqrt((A_unc / A) ^ 2 + (B_unc / B) ^ 2) -// -template -__host__ __device__ -T uncertainty_multiplicative( - T const& f - , T const& A, T const& A_unc - , T const& B, T const& B_unc - ) -{ - return abs(f) - * sqrt((A_unc / A) * (A_unc / A) + (B_unc / B) * (B_unc / B)); -} - -// Compute the propagated uncertainty from addition of two uncertain values, -// `A +/- A_unc` and `B +/- B_unc`. Given `f = cA + dB` (where `c` and `d` are -// certain constants), the uncertainty in `f` is approximately: -// -// f_unc = sqrt(c ^ 2 * A_unc ^ 2 + d ^ 2 * B_unc ^ 2) -// -template -__host__ __device__ -T uncertainty_additive( - T const& c, T const& A_unc - , T const& d, T const& B_unc - ) -{ - return sqrt((c * c * A_unc * A_unc) + (d * d * B_unc * B_unc)); -} - -/////////////////////////////////////////////////////////////////////////////// - -// Return the significant digit of `x`. The result is the number of digits -// after the decimal place to round to (negative numbers indicate rounding -// before the decimal place) -template -int find_significant_digit(T x) -{ - if (x == T(0)) return T(0); - return -int(std::floor(std::log10(std::abs(x)))); -} - -// Round `x` to `ndigits` after the decimal place (Python-style). -template -T round_to_precision(T x, N ndigits) -{ - double m = (x < 0.0) ? -1.0 : 1.0; - double pwr = std::pow(T(10.0), ndigits); - return (std::floor(x * m * pwr + 0.5) / pwr) * m; -} - -/////////////////////////////////////////////////////////////////////////////// - -void print_experiment_header() -{ // {{{ - std::cout << "Thrust Version" - << "," << "Algorithm" - << "," << "Element Type" - << "," << "Element Size" - << "," << "Elements per Trial" - << "," << "Total Input Size" - << "," << "STL Trials" - << "," << "STL Average Walltime" - << "," << "STL Walltime Uncertainty" - << "," << "STL Average Throughput" - << "," << "STL Throughput Uncertainty" - << "," << "Thrust Trials" - << "," << "Thrust Average Walltime" - << "," << "Thrust Walltime Uncertainty" - << "," << "Thrust Average Throughput" - << "," << "Thrust Throughput Uncertainty" - #if defined(HAVE_TBB) - << "," << "TBB Trials" - << "," << "TBB Average Walltime" - << "," << "TBB Walltime Uncertainty" - << "," << "TBB Average Throughput" - << "," << "TBB Throughput Uncertainty" - #endif - << std::endl; - - std::cout << "" // Thrust Version. - << "," << "" // Algorithm. - << "," << "" // Element Type. - << "," << "bits/element" // Element Size. - << "," << "elements" // Elements per Trial. - << "," << "MiBs" // Total Input Size. - << "," << "trials" // STL Trials. - << "," << "secs" // STL Average Walltime. - << "," << "secs" // STL Walltime Uncertainty. - << "," << "elements/sec" // STL Average Throughput. - << "," << "elements/sec" // STL Throughput Uncertainty. - << "," << "trials" // Thrust Trials. - << "," << "secs" // Thrust Average Walltime. - << "," << "secs" // Thrust Walltime Uncertainty. - << "," << "elements/sec" // Thrust Average Throughput. - << "," << "elements/sec" // Thrust Throughput Uncertainty. - #if defined(HAVE_TBB) - << "," << "trials" // TBB Trials. - << "," << "secs" // TBB Average Walltime. - << "," << "secs" // TBB Walltime Uncertainty. - << "," << "elements/sec" // TBB Average Throughput. - << "," << "elements/sec" // TBB Throughput Uncertainty. - #endif - << std::endl; -} // }}} - -/////////////////////////////////////////////////////////////////////////////// - -struct experiment_results -{ - double const average_time; // Arithmetic mean of trial times in seconds. - double const stdev_time; // Sample standard deviation of trial times. - - experiment_results(double average_time_, double stdev_time_) - : average_time(average_time_), stdev_time(stdev_time_) {} -}; - -/////////////////////////////////////////////////////////////////////////////// - -template < - template class Test - , typename ElementMetaType // Has an embedded typedef `type, - // and a static method `name` that - // returns a char const*. - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -struct experiment_driver -{ - typedef typename ElementMetaType::type element_type; - - static char const* const test_name; - static char const* const element_type_name; // Element type name as a string. - - static uint64_t const elements; // # of elements per trial. - static uint64_t const element_size; // Size of each element in bits. - static double const input_size; // `elements` * `element_size` in MiB. - static uint64_t const baseline_trials; // # of baseline trials per experiment. - static uint64_t const regular_trials; // # of regular trials per experiment. - - static void run_experiment() - { // {{{ - experiment_results stl = std_experiment(); - experiment_results thrust = thrust_experiment(); - #if defined(HAVE_TBB) - experiment_results tbb = tbb_experiment(); - #endif - - double stl_average_walltime = stl.average_time; - double thrust_average_walltime = thrust.average_time; - #if defined(HAVE_TBB) - double tbb_average_walltime = tbb.average_time; - #endif - - double stl_average_throughput = elements / stl.average_time; - double thrust_average_throughput = elements / thrust.average_time; - #if defined(HAVE_TBB) - double tbb_average_throughput = elements / tbb.average_time; - #endif - - double stl_walltime_uncertainty = stl.stdev_time; - double thrust_walltime_uncertainty = thrust.stdev_time; - #if defined(HAVE_TBB) - double tbb_walltime_uncertainty = tbb.stdev_time; - #endif - - double stl_throughput_uncertainty = uncertainty_multiplicative( - stl_average_throughput - , double(elements), 0.0 - , stl_average_walltime, stl_walltime_uncertainty - ); - double thrust_throughput_uncertainty = uncertainty_multiplicative( - thrust_average_throughput - , double(elements), 0.0 - , thrust_average_walltime, thrust_walltime_uncertainty - ); - - #if defined(HAVE_TBB) - double tbb_throughput_uncertainty = uncertainty_multiplicative( - tbb_average_throughput - , double(elements), 0.0 - , tbb_average_walltime, tbb_walltime_uncertainty - ); - #endif -/* - // Round the average walltime and walltime uncertainty to the - // significant figure of the walltime uncertainty. - int stl_walltime_precision = std::max( - find_significant_digit(stl.average_time) - , find_significant_digit(stl.stdev_time) - ); - int thrust_walltime_precision = std::max( - find_significant_digit(thrust.average_time) - , find_significant_digit(thrust.stdev_time) - ); - #if defined(HAVE_TBB) - int tbb_walltime_precision = std::max( - find_significant_digit(tbb.average_time) - , find_significant_digit(tbb.stdev_time) - ); - #endif - - stl_average_walltime = round_to_precision( - stl_average_walltime, stl_walltime_precision - ); - thrust_average_walltime = round_to_precision( - thrust_average_walltime, thrust_walltime_precision - ); - #if defined(HAVE_TBB) - tbb_average_walltime = round_to_precision( - tbb_average_walltime, tbb_walltime_precision - ); - #endif - - stl_walltime_uncertainty = round_to_precision( - stl_walltime_uncertainty, stl_walltime_precision - ); - thrust_walltime_uncertainty = round_to_precision( - thrust_walltime_uncertainty, thrust_walltime_precision - ); - #if defined(HAVE_TBB) - tbb_walltime_uncertainty = round_to_precision( - tbb_walltime_uncertainty, tbb_walltime_precision - ); - #endif - - // Round the average throughput and throughput uncertainty to the - // significant figure of the throughput uncertainty. - int stl_throughput_precision = std::max( - find_significant_digit(stl_average_throughput) - , find_significant_digit(stl_throughput_uncertainty) - ); - int thrust_throughput_precision = std::max( - find_significant_digit(thrust_average_throughput) - , find_significant_digit(thrust_throughput_uncertainty) - ); - #if defined(HAVE_TBB) - int tbb_throughput_precision = std::max( - find_significant_digit(tbb_average_throughput) - , find_significant_digit(tbb_throughput_uncertainty) - ); - #endif - - stl_average_throughput = round_to_precision( - stl_average_throughput, stl_throughput_precision - ); - thrust_average_throughput = round_to_precision( - thrust_average_throughput, thrust_throughput_precision - ); - #if defined(HAVE_TBB) - tbb_average_throughput = round_to_precision( - tbb_average_throughput, tbb_throughput_precision - ); - #endif - - stl_throughput_uncertainty = round_to_precision( - stl_throughput_uncertainty, stl_throughput_precision - ); - thrust_throughput_uncertainty = round_to_precision( - thrust_throughput_uncertainty, thrust_throughput_precision - ); - #if defined(HAVE_TBB) - tbb_throughput_uncertainty = round_to_precision( - tbb_throughput_uncertainty, tbb_throughput_precision - ); - #endif -*/ - std::cout << THRUST_VERSION // Thrust Version. - << "," << test_name // Algorithm. - << "," << element_type_name // Element Type. - << "," << element_size // Element Size. - << "," << elements // Elements per Trial. - << "," << input_size // Total Input Size. - << "," << baseline_trials // STL Trials. - << "," << stl_average_walltime // STL Average Walltime. - << "," << stl_walltime_uncertainty // STL Walltime Uncertainty. - << "," << stl_average_throughput // STL Average Throughput. - << "," << stl_throughput_uncertainty // STL Throughput Uncertainty. - << "," << regular_trials // Thrust Trials. - << "," << thrust_average_walltime // Thrust Average Walltime. - << "," << thrust_walltime_uncertainty // Thrust Walltime Uncertainty. - << "," << thrust_average_throughput // Thrust Average Throughput. - << "," << thrust_throughput_uncertainty // Thrust Throughput Uncertainty. - #if defined(HAVE_TBB) - << "," << regular_trials // TBB Trials. - << "," << tbb_average_walltime // TBB Average Walltime. - << "," << tbb_walltime_uncertainty // TBB Walltime Uncertainty. - << "," << tbb_average_throughput // TBB Average Throughput. - << "," << tbb_throughput_uncertainty // TBB Throughput Uncertainty. - #endif - << std::endl; - } // }}} - -private: - static experiment_results std_experiment() - { - return experiment::std_trial>(); - } - - static experiment_results thrust_experiment() - { - return experiment::thrust_trial>(); - } - - #if defined(HAVE_TBB) - static experiment_results tbb_experiment() - { - return experiment::tbb_trial>(); - } - #endif - - template - static experiment_results experiment() - { // {{{ - Trial trial; - - // Allocate storage and generate random input for the warmup trial. - trial.setup(elements); - - // Warmup trial. - trial(); - - uint64_t const trials - = trial.is_baseline() ? baseline_trials : regular_trials; - - std::vector times; - times.reserve(trials); - - for (uint64_t t = 0; t < trials; ++t) - { - // Generate random input for next trial. - trial.setup(elements); - - steady_timer e; - - // Benchmark. - e.start(); - trial(); - e.stop(); - - times.push_back(e.seconds_elapsed()); - } - - double average_time - = arithmetic_mean(times.begin(), times.end()); - - double stdev_time - = sample_standard_deviation(times.begin(), times.end(), average_time); - - return experiment_results(average_time, stdev_time); - } // }}} -}; - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -char const* const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::test_name - = Test::test_name(); - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -char const* const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::element_type_name - = ElementMetaType::name(); - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -uint64_t const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::element_size - = CHAR_BIT * sizeof(typename ElementMetaType::type); - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -uint64_t const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::elements - = Elements; - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -double const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::input_size - = double( Elements /* [elements] */ - * sizeof(typename ElementMetaType::type) /* [bytes/element] */ - ) - / double(1024 * 1024 /* [bytes/MiB] */); - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -uint64_t const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::baseline_trials - = BaselineTrials; - -template < - template class Test - , typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -uint64_t const -experiment_driver< - Test, ElementMetaType, Elements, BaselineTrials, RegularTrials ->::regular_trials - = RegularTrials; - -/////////////////////////////////////////////////////////////////////////////// - -// Never create variables, pointers or references of any of the `*_trial_base` -// classes. They are purely mixin base classes and do not have vtables and -// virtual destructors. Using them for polymorphism instead of composition will -// probably cause slicing. - -struct baseline_trial {}; -struct regular_trial {}; - -template -struct trial_base; - -template <> -struct trial_base -{ - static bool is_baseline() { return true; } -}; - -template <> -struct trial_base -{ - static bool is_baseline() { return false; } -}; - -template -struct inplace_trial_base : trial_base -{ - Container input; - - void setup(uint64_t elements) - { - input.resize(elements); - - randomize(input); - } -}; - -template -struct copy_trial_base : trial_base -{ - Container input; - Container output; - - void setup(uint64_t elements) - { - input.resize(elements); - output.resize(elements); - - randomize(input); - } -}; - -template -struct shuffle_trial_base : trial_base -{ - Container input; - - void setup(uint64_t elements) - { - input.resize(elements); - - randomize(input); - } -}; - -template -struct partition_trial_base : trial_base -{ - Container input; - - void setup(uint64_t elements) - { - input.resize(elements); - - randomize(input); - } -}; - -template -struct partition_copy_trial_base : trial_base -{ - Container input; - Container out_true; - Container out_false; - - void setup(uint64_t elements) - { - input.resize(elements); - out_true.resize(elements); - out_false.resize(elements); - - randomize(input); - } -}; - -template -struct partition_stencil_trial_base : trial_base -{ - Container input; - Container stencil; - - void setup(uint64_t elements) - { - input.resize(elements); - stencil.resize(elements); - - randomize(input); - randomize(stencil); - } -}; - -template -struct partition_copy_stencil_trial_base : trial_base -{ - Container input; - Container out_true; - Container out_false; - Container stencil; - - void setup(uint64_t elements) - { - input.resize(elements); - out_true.resize(elements); - out_false.resize(elements); - stencil.resize(elements); - - randomize(input); - randomize(stencil); - } -}; - -/////////////////////////////////////////////////////////////////////////////// - -template -struct reduce_tester -{ - static char const* test_name() { return "reduce"; } - - struct std_trial : inplace_trial_base, baseline_trial> - { - void operator()() - { - if (std::accumulate(this->input.begin(), this->input.end(), T(0)) == 0) - // Prevent optimizer from removing body. - std::cout << "xyz"; - } - }; - - struct thrust_trial : inplace_trial_base > - { - void operator()() - { - thrust::reduce(this->input.begin(), this->input.end()); - } - }; - - #if defined(HAVE_TBB) - struct tbb_trial : inplace_trial_base > - { - void operator()() - { - tbb_reduce(this->input); - } - }; - #endif -}; - -template -struct sort_tester -{ - static char const* test_name() { return "sort"; } - - struct std_trial : inplace_trial_base, baseline_trial> - { - void operator()() - { - std::sort(this->input.begin(), this->input.end()); - } - }; - - struct thrust_trial : inplace_trial_base > - { - void operator()() - { - thrust::sort(this->input.begin(), this->input.end()); - #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if (err != cudaSuccess) - throw thrust::error_code(err, thrust::cuda_category()); - #endif - } - }; - - #if defined(HAVE_TBB) - struct tbb_trial : inplace_trial_base > - { - void operator()() - { - tbb_sort(this->input); - } - } - #endif -}; - - -template -struct transform_inplace_tester -{ - static char const* test_name() { return "transform_inplace"; } - - struct std_trial : inplace_trial_base, baseline_trial> - { - void operator()() - { - std::transform( - this->input.begin(), this->input.end(), this->input.begin() - , thrust::negate() - ); - } - }; - - struct thrust_trial : inplace_trial_base > - { - void operator()() - { - thrust::transform( - this->input.begin(), this->input.end(), this->input.begin() - , thrust::negate() - ); - #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if (err != cudaSuccess) - throw thrust::error_code(err, thrust::cuda_category()); - #endif - } - }; - - #if defined(HAVE_TBB) - struct tbb_trial : inplace_trial_base > - { - void operator()() - { - tbb_transform(this->input); - } - }; - #endif -}; - -template -struct inclusive_scan_inplace_tester -{ - static char const* test_name() { return "inclusive_scan_inplace"; } - - struct std_trial : inplace_trial_base, baseline_trial> - { - void operator()() - { - std::partial_sum( - this->input.begin(), this->input.end(), this->input.begin() - ); - } - }; - - struct thrust_trial : inplace_trial_base > - { - void operator()() - { - thrust::inclusive_scan( - this->input.begin(), this->input.end(), this->input.begin() - ); - #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if (err != cudaSuccess) - throw thrust::error_code(err, thrust::cuda_category()); - #endif - } - }; - - #if defined(HAVE_TBB) - struct tbb_trial : inplace_trial_base > - { - void operator()() - { - tbb_scan(this->input); - } - }; - #endif -}; - -template -struct copy_tester -{ - static char const* test_name() { return "copy"; } - - struct std_trial : copy_trial_base > - { - void operator()() - { - std::copy(this->input.begin(), this->input.end(), this->output.begin()); - } - }; - - struct thrust_trial : copy_trial_base > - { - void operator()() - { - thrust::copy(this->input.begin(), this->input.end(), this->input.begin()); - #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if (err != cudaSuccess) - throw thrust::error_code(err, thrust::cuda_category()); - #endif - } - }; - - #if defined(HAVE_TBB) - struct tbb_trial : copy_trial_base > - { - void operator()() - { - tbb_copy(this->input, this->output); - } - }; - #endif -}; - -template -struct shuffle_tester -{ - static char const* test_name() { return "shuffle"; } - - struct std_trial : shuffle_trial_base, baseline_trial> - { - std::default_random_engine g; - void operator()() - { - std::shuffle(this->input.begin(), this->input.end(), this->g); - } - }; - - struct thrust_trial : shuffle_trial_base > - { - thrust::default_random_engine g; - void operator()() - { - thrust::shuffle(this->input.begin(), this->input.end(), this->g); - #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if (err != cudaSuccess) - throw thrust::error_code(err, thrust::cuda_category()); - #endif - } - }; -}; - -template -struct partition_tester -{ - static char const* test_name() - { - return "partition"; - } - - struct std_trial : partition_trial_base> - { - void operator()() - { - std::partition(this->input.begin(), this->input.end(), partition_predicate {}); - } - }; - - struct thrust_trial : partition_trial_base> - { - void operator()() - { - thrust::partition(this->input.begin(), this->input.end(), partition_predicate {}); - } - }; -}; - -template -struct partition_copy_tester -{ - static char const* test_name() - { - return "partition_copy"; - } - - struct std_trial : partition_copy_trial_base> - { - void operator()() - { - std::partition_copy(this->input.begin(), - this->input.end(), - this->out_true.begin(), - this->out_false.begin(), - partition_predicate {}); - } - }; - struct thrust_trial : partition_copy_trial_base> - { - void operator()() - { - thrust::partition_copy(this->input.begin(), - this->input.end(), - this->out_true.begin(), - this->out_false.begin(), - partition_predicate {}); - } - }; -}; - -template -struct partition_stencil_tester -{ - static char const* test_name() - { - return "partition_stencil"; - } - - struct std_trial : partition_stencil_trial_base> - { - void operator()() - { - std::vector> zipped(this->input.size()); - std::transform(this->input.begin(), this->input.end(), this->stencil.begin(), zipped.begin(), [](T a, T b) { - return std::tuple {a, b}; - }); - std::partition(zipped.begin(), zipped.end(), [](std::tuple t) { - return partition_predicate {}(std::get<1>(t)); - }); - std::transform(zipped.begin(), zipped.end(), this->input.begin(), [](std::tuple t) { - return std::get<0>(t); - }); - } - }; - - struct thrust_trial : partition_stencil_trial_base> - { - void operator()() - { - thrust::partition( - this->input.begin(), this->input.end(), this->stencil.begin(), partition_predicate {}); -#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if(err != cudaSuccess) - throw thrust::error_code(err, thrust::cuda_category()); -#endif - } - }; -}; - -template -struct partition_copy_stencil_tester -{ - static char const* test_name() - { - return "partition_copy_stencil"; - } - - struct std_trial : partition_copy_stencil_trial_base> - { - void operator()() - { - std::vector> zipped(this->input.size()); - std::vector> zipped_true{}; - std::vector> zipped_false{}; - - std::transform(this->input.begin(), this->input.end(), this->stencil.begin(), zipped.begin(), [](T a, T b) { - return std::tuple {a, b}; - }); - std::partition_copy(zipped.begin(), zipped.end(), std::back_inserter(zipped_true), std::back_inserter(zipped_false), [](std::tuple t) { - return partition_predicate {}(std::get<1>(t)); - }); - std::transform(zipped_true.begin(), zipped_true.end(), this->out_true.begin(), [](std::tuple t) { - return std::get<0>(t); - }); - std::transform(zipped_false.begin(), zipped_false.end(), this->out_false.begin(), [](std::tuple t) { - return std::get<0>(t); - }); - } - }; - struct thrust_trial : partition_copy_stencil_trial_base> - { - void operator()() - { - thrust::partition_copy(this->input.begin(), - this->input.end(), - this->stencil.begin(), - this->out_true.begin(), - this->out_false.begin(), - partition_predicate {}); - } - }; -}; - -/////////////////////////////////////////////////////////////////////////////// - -template < - typename ElementMetaType - , uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -void run_core_primitives_experiments_for_type() -{ - experiment_driver< - reduce_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - transform_inplace_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - inclusive_scan_inplace_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - sort_tester - , ElementMetaType -// , Elements / sizeof(typename ElementMetaType::type) - , (Elements >> 6) // Sorting is more sensitive to element count than - // memory footprint. - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - copy_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - shuffle_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - partition_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - partition_copy_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - partition_stencil_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); - - experiment_driver< - partition_copy_stencil_tester - , ElementMetaType - , Elements / sizeof(typename ElementMetaType::type) - , BaselineTrials - , RegularTrials - >::run_experiment(); -} - -/////////////////////////////////////////////////////////////////////////////// - -#define DEFINE_ELEMENT_META_TYPE(T) \ - struct PP_CAT(T, _meta) \ - { \ - typedef T type; \ - \ - static char const* name() { return PP_STRINGIZE(T); } \ - }; \ - /**/ - -DEFINE_ELEMENT_META_TYPE(char); -DEFINE_ELEMENT_META_TYPE(int); -DEFINE_ELEMENT_META_TYPE(int8_t); -DEFINE_ELEMENT_META_TYPE(int16_t); -DEFINE_ELEMENT_META_TYPE(int32_t); -DEFINE_ELEMENT_META_TYPE(int64_t); -DEFINE_ELEMENT_META_TYPE(float); -DEFINE_ELEMENT_META_TYPE(double); - -/////////////////////////////////////////////////////////////////////////////// - -template < - uint64_t Elements - , uint64_t BaselineTrials - , uint64_t RegularTrials -> -void run_core_primitives_experiments() -{ - run_core_primitives_experiments_for_type< - char_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - int_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - int8_t_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - int16_t_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - int32_t_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - int64_t_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - float_meta, Elements, BaselineTrials, RegularTrials - >(); - run_core_primitives_experiments_for_type< - double_meta, Elements, BaselineTrials, RegularTrials - >(); -} - -/////////////////////////////////////////////////////////////////////////////// - -// XXX Use `std::string_view` when possible. -std::vector split(std::string const& str, std::string const& delim) -{ - std::vector tokens; - std::string::size_type prev = 0, pos = 0; - do - { - pos = str.find(delim, prev); - if (pos == std::string::npos) pos = str.length(); - std::string token = str.substr(prev, pos - prev); - if (!token.empty()) tokens.push_back(token); - prev = pos + delim.length(); - } - while (pos < str.length() && prev < str.length()); - return tokens; -} - -/////////////////////////////////////////////////////////////////////////////// - -struct command_line_option_error : std::exception -{ - virtual ~command_line_option_error() noexcept {} - virtual const char* what() const noexcept = 0; -}; - -struct only_one_option_allowed : command_line_option_error -{ - // Construct a new `only_one_option_allowed` exception. `key` is the - // option name and `[first, last)` is a sequence of - // `std::pair`s (the values). - template - only_one_option_allowed(std::string const& key, InputIt first, InputIt last) - : message() - { - message = "Only one `--"; - message += key; - message += "` option is allowed, but multiple were received: "; - - for (; first != last; ++first) - { - message += "`"; - message += (*first).second; - message += "` "; - } - - // Remove the trailing space added by the last iteration of the above loop. - message.erase(message.size() - 1, 1); - - message += "."; - } - - virtual ~only_one_option_allowed() noexcept {} - - virtual const char* what() const noexcept - { - return message.c_str(); - } - -private: - std::string message; -}; - -struct required_option_missing : command_line_option_error -{ - // Construct a new `requirement_option_missing` exception. `key` is the - // option name. - required_option_missing(std::string const& key) - : message() - { - message = "`--"; - message += key; - message += "` option is required."; - } - - virtual ~required_option_missing() noexcept {} - - virtual const char* what() const noexcept - { - return message.c_str(); - } - -private: - std::string message; -}; - -struct command_line_processor -{ - typedef std::vector positional_options_type; - - typedef std::multimap keyword_options_type; - - typedef std::pair< - keyword_options_type::const_iterator - , keyword_options_type::const_iterator - > keyword_option_values; - - command_line_processor(int argc, char** argv) - : pos_args(), kw_args() - { // {{{ - for (int i = 1; i < argc; ++i) - { - std::string arg(argv[i]); - - // Look for --key or --key=value options. - if (arg.substr(0, 2) == "--") - { - std::string::size_type n = arg.find('=', 2); - - keyword_options_type::value_type key_value; - - if (n == std::string::npos) // --key - kw_args.insert(keyword_options_type::value_type( - arg.substr(2), "" - )); - else // --key=value - kw_args.insert(keyword_options_type::value_type( - arg.substr(2, n - 2), arg.substr(n + 1) - )); - - kw_args.insert(key_value); - } - else // Assume it's positional. - pos_args.push_back(arg); - } - } // }}} - - // Return the value for option `key`. - // - // Throws: - // * `only_one_option_allowed` if there is more than one value for `key`. - // * `required_option_missing` if there is no value for `key`. - std::string operator()(std::string const& key) const - { - keyword_option_values v = kw_args.equal_range(key); - - keyword_options_type::difference_type d = std::distance(v.first, v.second); - - if (1 < d) // Too many options. - throw only_one_option_allowed(key, v.first, v.second); - else if (0 == d) // No option. - throw required_option_missing(key); - - return (*v.first).second; - } - - // Return the value for option `key`, or `dflt` if `key` has no value. - // - // Throws: `only_one_option_allowed` if there is more than one value for `key`. - std::string operator()(std::string const& key, std::string const& dflt) const - { - keyword_option_values v = kw_args.equal_range(key); - - keyword_options_type::difference_type d = std::distance(v.first, v.second); - - if (1 < d) // Too many options. - throw only_one_option_allowed(key, v.first, v.second); - - if (0 == d) // No option. - return dflt; - else // 1 option. - return (*v.first).second; - } - - // Returns `true` if the option `key` was specified at least once. - bool has(std::string const& key) const - { - return kw_args.count(key) > 0; - } - -private: - positional_options_type pos_args; - keyword_options_type kw_args; -}; - -/////////////////////////////////////////////////////////////////////////////// - -int main(int argc, char** argv) -{ - command_line_processor clp(argc, argv); - - #if defined(HAVE_TBB) - tbb::task_scheduler_init init; - - test_tbb(); - #endif - - #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - // Set the CUDA device to use for the benchmark - `0` by default. - - int device = std::atoi(clp("device", "0").c_str()); - // `std::atoi` returns 0 if the conversion fails. - - cudaSetDevice(device); - #endif - - if (!clp.has("no-header")) - print_experiment_header(); - - /* Elements | Trials */ - /* | Baseline | Regular */ -//run_core_primitives_experiments< 1LLU << 21LLU , 4 , 16 >(); -//run_core_primitives_experiments< 1LLU << 22LLU , 4 , 16 >(); -//run_core_primitives_experiments< 1LLU << 23LLU , 4 , 16 >(); -//run_core_primitives_experiments< 1LLU << 24LLU , 4 , 16 >(); -//run_core_primitives_experiments< 1LLU << 25LLU , 4 , 16 >(); - run_core_primitives_experiments< 1LLU << 26LLU , 4 , 16 >(); - run_core_primitives_experiments< 1LLU << 27LLU , 4 , 16 >(); -//run_core_primitives_experiments< 1LLU << 28LLU , 4 , 16 >(); -//run_core_primitives_experiments< 1LLU << 29LLU , 4 , 16 >(); - - return 0; -} - -// TODO: Add different input sizes and half precision diff --git a/internal/benchmark/combine_benchmark_results.py b/internal/benchmark/combine_benchmark_results.py deleted file mode 100644 index f82b21f80..000000000 --- a/internal/benchmark/combine_benchmark_results.py +++ /dev/null @@ -1,817 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright (c) 2012-7 Bryce Adelstein Lelbach aka wash -# -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -############################################################################### - -############################################################################### -# Copyright (c) 2018 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### - -# XXX Put code shared with `compare_benchmark_results.py` in a common place. - -# XXX Relative uncertainty. - -from sys import exit, stdout - -from os.path import splitext - -from itertools import imap # Lazy map. - -from math import sqrt, log10, floor - -from collections import deque - -from argparse import ArgumentParser as argument_parser - -from csv import DictReader as csv_dict_reader -from csv import DictWriter as csv_dict_writer - -from re import compile as regex_compile - -############################################################################### - -def unpack_tuple(f): - """Return a unary function that calls `f` with its argument unpacked.""" - return lambda args: f(*iter(args)) - -def strip_dict(d): - """Strip leading and trailing whitespace from all keys and values in `d`.""" - d.update({key: value.strip() for (key, value) in d.items()}) - -def merge_dicts(d0, d1): - """Create a new `dict` that is the union of `dict`s `d0` and `d1`.""" - d = d0.copy() - d.update(d1) - return d - -def strip_list(l): - """Strip leading and trailing whitespace from all values in `l`.""" - for i, value in enumerate(l): l[i] = value.strip() - -############################################################################### - -def int_or_float(x): - """Convert `x` to either `int` or `float`, preferring `int`. - - Raises: - ValueError : If `x` is not convertible to either `int` or `float` - """ - try: - return int(x) - except ValueError: - return float(x) - -def try_int_or_float(x): - """Try to convert `x` to either `int` or `float`, preferring `int`. `x` is - returned unmodified if conversion fails. - """ - try: - return int_or_float(x) - except ValueError: - return x - -############################################################################### - -def find_significant_digit(x): - """Return the significant digit of the number x. The result is the number of - digits after the decimal place to round to (negative numbers indicate rounding - before the decimal place).""" - if x == 0: return 0 - return -int(floor(log10(abs(x)))) - -def round_with_int_conversion(x, ndigits = None): - """Rounds `x` to `ndigits` after the the decimal place. If `ndigits` is less - than 1, convert the result to `int`. If `ndigits` is `None`, the significant - digit of `x` is used.""" - if ndigits is None: ndigits = find_significant_digit(x) - x_rounded = round(x, ndigits) - return int(x_rounded) if ndigits < 1 else x_rounded - -############################################################################### - -class measured_variable(object): - """A meta-variable representing measured data. It is composed of three raw - variables plus units meta-data. - - Attributes: - quantity (`str`) : - Name of the quantity variable of this object. - uncertainty (`str`) : - Name of the uncertainty variable of this object. - sample_size (`str`) : - Name of the sample size variable of this object. - units (units class or `None`) : - The units the value is measured in. - """ - - def __init__(self, quantity, uncertainty, sample_size, units = None): - self.quantity = quantity - self.uncertainty = uncertainty - self.sample_size = sample_size - self.units = units - - def as_tuple(self): - return (self.quantity, self.uncertainty, self.sample_size, self.units) - - def __iter__(self): - return iter(self.as_tuple()) - - def __str__(self): - return str(self.as_tuple()) - - def __repr__(self): - return str(self) - -class measured_value(object): - """An object that represents a value determined by multiple measurements. - - Attributes: - quantity (scalar) : - The quantity of the value, e.g. the arithmetic mean. - uncertainty (scalar) : - The measurement uncertainty, e.g. the sample standard deviation. - sample_size (`int`) : - The number of observations contributing to the value. - units (units class or `None`) : - The units the value is measured in. - """ - - def __init__(self, quantity, uncertainty, sample_size = 1, units = None): - self.quantity = quantity - self.uncertainty = uncertainty - self.sample_size = sample_size - self.units = units - - def as_tuple(self): - return (self.quantity, self.uncertainty, self.sample_size, self.units) - - def __iter__(self): - return iter(self.as_tuple()) - - def __str__(self): - return str(self.as_tuple()) - - def __repr__(self): - return str(self) - -############################################################################### - -def arithmetic_mean(X): - """Computes the arithmetic mean of the sequence `X`. - - Let: - - * `n = len(X)`. - * `u` denote the arithmetic mean of `X`. - - .. math:: - - u = \frac{\sum_{i = 0}^{n - 1} X_i}{n} - """ - return sum(X) / len(X) - -def sample_variance(X, u = None): - """Computes the sample variance of the sequence `X`. - - Let: - - * `n = len(X)`. - * `u` denote the arithmetic mean of `X`. - * `s` denote the sample standard deviation of `X`. - - .. math:: - - v = \frac{\sum_{i = 0}^{n - 1} (X_i - u)^2}{n - 1} - - Args: - X (`Iterable`) : The sequence of values. - u (number) : The arithmetic mean of `X`. - """ - if u is None: u = arithmetic_mean(X) - return sum(imap(lambda X_i: (X_i - u) ** 2, X)) / (len(X) - 1) - -def sample_standard_deviation(X, u = None, v = None): - """Computes the sample standard deviation of the sequence `X`. - - Let: - - * `n = len(X)`. - * `u` denote the arithmetic mean of `X`. - * `v` denote the sample variance of `X`. - * `s` denote the sample standard deviation of `X`. - - .. math:: - - s &= \sqrt{v} - &= \sqrt{\frac{\sum_{i = 0}^{n - 1} (X_i - u)^2}{n - 1}} - - Args: - X (`Iterable`) : The sequence of values. - u (number) : The arithmetic mean of `X`. - v (number) : The sample variance of `X`. - """ - if u is None: u = arithmetic_mean(X) - if v is None: v = sample_variance(X, u) - return sqrt(v) - -def combine_sample_size(As): - """Computes the combined sample variance of a group of `measured_value`s. - - Let: - - * `g = len(As)`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - - .. math:: - - n = \sum{i = 0}^{g - 1} n_i - """ - return sum(imap(unpack_tuple(lambda u_i, s_i, n_i, t_i: n_i), As)) - -def combine_arithmetic_mean(As, n = None): - """Computes the combined arithmetic mean of a group of `measured_value`s. - - Let: - - * `g = len(As)`. - * `u_i = As[i].quantity`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - * `u` denote the arithmetic mean of the quantities of `As`. - - .. math:: - - u = \frac{\sum{i = 0}^{g - 1} n_i u_i}{n} - """ - if n is None: n = combine_sample_size(As) - return sum(imap(unpack_tuple(lambda u_i, s_i, n_i, t_i: n_i * u_i), As)) / n - -def combine_sample_variance(As, n = None, u = None): - """Computes the combined sample variance of a group of `measured_value`s. - - Let: - - * `g = len(As)`. - * `u_i = As[i].quantity`. - * `s_i = As[i].uncertainty`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - * `u` denote the arithmetic mean of the quantities of `As`. - * `v` denote the sample variance of `X`. - - .. math:: - - v = \frac{(\sum_{i = 0}^{g - 1} n_i (u_i - u)^2 + s_i^2 (n_i - 1))}{n - 1} - - Args: - As (`Iterable` of `measured_value`s) : The sequence of values. - n (number) : The combined sample sizes of `As`. - u (number) : The combined arithmetic mean of `As`. - """ - if n <= 1: return 0 - if n is None: n = combine_sample_size(As) - if u is None: u = combine_arithmetic_mean(As, n) - return sum(imap(unpack_tuple( - lambda u_i, s_i, n_i, t_i: n_i * (u_i - u) ** 2 + (s_i ** 2) * (n_i - 1) - ), As)) / (n - 1) - -def combine_sample_standard_deviation(As, n = None, u = None, v = None): - """Computes the combined sample standard deviation of a group of - `measured_value`s. - - Let: - - * `g = len(As)`. - * `u_i = As[i].quantity`. - * `s_i = As[i].uncertainty`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - * `u` denote the arithmetic mean of the quantities of `As`. - * `v` denote the sample variance of `X`. - * `s` denote the sample standard deviation of `X`. - - .. math:: - - s &= \sqrt{v} - &= \sqrt{\frac{(\sum_{i = 0}^{g - 1} n_i (u_i - u)^2 + s_i^2 (n_i - 1))}{n - 1}} - - Args: - As (`Iterable` of `measured_value`s) : The sequence of values. - n (number) : The combined sample sizes of `As`. - u (number) : The combined arithmetic mean of `As`. - v (number) : The combined sample variance of `As`. - """ - if n <= 1: return 0 - if n is None: n = combine_sample_size(As) - if u is None: u = combine_arithmetic_mean(As, n) - if v is None: v = combine_sample_variance(As, n, u) - return sqrt(v) - -############################################################################### - -def process_program_arguments(): - ap = argument_parser( - description = ( - "Aggregates the results of multiple runs of benchmark results stored in " - "CSV format." - ) - ) - - ap.add_argument( - "-d", "--dependent-variable", - help = ("Treat the specified three variables as a dependent variable. The " - "1st variable is the measured quantity, the 2nd is the uncertainty " - "of the measurement and the 3rd is the sample size. The defaults " - "are the dependent variables of Thrust's benchmark suite. May be " - "specified multiple times."), - action = "append", type = str, dest = "dependent_variables", - metavar = "QUANTITY,UNCERTAINTY,SAMPLES" - ) - - ap.add_argument( - "-p", "--preserve-whitespace", - help = ("Don't trim leading and trailing whitespace from each CSV cell."), - action = "store_true", default = False - ) - - ap.add_argument( - "-o", "--output-file", - help = ("The file that results are written to. If `-`, results are " - "written to stdout."), - action = "store", type = str, default = "-", - metavar = "OUTPUT" - ) - - ap.add_argument( - "input_files", - help = ("Input CSV files. The first two rows should be a header. The 1st " - "header row specifies the name of each variable, and the 2nd " - "header row specifies the units for that variable."), - type = str, nargs = "+", - metavar = "INPUTS" - ) - - return ap.parse_args() - -############################################################################### - -def filter_comments(f, s = "#"): - """Return an iterator to the file `f` which filters out all lines beginning - with `s`.""" - return filter(lambda line: not line.startswith(s), f) - -############################################################################### - -class io_manager(object): - """Manages I/O operations and represents the input data as an `Iterable` - sequence of `dict`s. - - It is `Iterable` and an `Iterator`. It can be used with `with`. - - Attributes: - preserve_whitespace (`bool`) : - If `False`, leading and trailing whitespace is stripped from each CSV cell. - writer (`csv_dict_writer`) : - CSV writer object that the output is written to. - output_file (`file` or `stdout`) : - The output `file` object. - readers (`list` of `csv_dict_reader`s) : - List of input files as CSV reader objects. - input_files (list of `file`s) : - List of input `file` objects. - variable_names (`list` of `str`s) : - Names of the variables, in order. - variable_units (`list` of `str`s) : - Units of the variables, in order. - """ - - def __init__(self, input_files, output_file, preserve_whitespace = True): - """Read input files and open the output file and construct a new `io_manager` - object. - - If `preserve_whitespace` is `False`, leading and trailing whitespace is - stripped from each CSV cell. - - Raises - AssertionError : - If `len(input_files) <= 0` or `type(preserve_whitespace) != bool`. - """ - assert len(input_files) > 0, "No input files provided." - - assert type(preserve_whitespace) == bool - - self.preserve_whitespace = preserve_whitespace - - self.readers = deque() - - self.variable_names = None - self.variable_units = None - - self.input_files = deque() - - for input_file in input_files: - input_file_object = open(input_file) - reader = csv_dict_reader(filter_comments(input_file_object)) - - if not self.preserve_whitespace: - strip_list(reader.fieldnames) - - if self.variable_names is None: - self.variable_names = reader.fieldnames - else: - # Make sure all inputs have the same schema. - assert self.variable_names == reader.fieldnames, \ - "Input file (`" + input_file + "`) variable schema `" + \ - str(reader.fieldnames) + "` does not match the variable schema `" + \ - str(self.variable_names) + "`." - - # Consume the next row, which should be the second line of the header. - variable_units = reader.next() - - if not self.preserve_whitespace: - strip_dict(variable_units) - - if self.variable_units is None: - self.variable_units = variable_units - else: - # Make sure all inputs have the same units schema. - assert self.variable_units == variable_units, \ - "Input file (`" + input_file + "`) units schema `" + \ - str(variable_units) + "` does not match the units schema `" + \ - str(self.variable_units) + "`." - - self.readers.append(reader) - self.input_files.append(input_file_object) - - if output_file == "-": # Output to stdout. - self.output_file = stdout - else: # Output to user-specified file. - self.output_file = open(output_file, "w") - - self.writer = csv_dict_writer( - self.output_file, fieldnames = self.variable_names - ) - - def __enter__(self): - """Called upon entering a `with` statement.""" - return self - - def __exit__(self, *args): - """Called upon exiting a `with` statement.""" - if self.output_file is stdout: - self.output_file = None - elif self.output_file is not None: - self.output_file.__exit__(*args) - - for input_file in self.input_files: - input_file.__exit__(*args) - - ############################################################################# - # Input Stream. - - def __iter__(self): - """Return an iterator to the input sequence. - - This is a requirement for the `Iterable` protocol. - """ - return self - - def next(self): - """Consume and return the next record (a `dict` representing a CSV row) in - the input. - - This is a requirement for the `Iterator` protocol. - - Raises: - StopIteration : If there is no more input. - """ - if len(self.readers) == 0: - raise StopIteration() - - try: - row = self.readers[0].next() - if not self.preserve_whitespace: strip_dict(row) - return row - except StopIteration: - # The current reader is empty, so pop it, pop it's input file, close the - # input file, and then call ourselves again. - self.readers.popleft() - self.input_files.popleft().close() - return self.next() - - ############################################################################# - # Output. - - def write_header(self): - """Write the header for the output CSV file.""" - # Write the first line of the header. - self.writer.writeheader() - - # Write the second line of the header. - self.writer.writerow(self.variable_units) - - def write(self, d): - """Write a record (a `dict`) to the output CSV file.""" - self.writer.writerow(d) - -############################################################################### - -class dependent_variable_parser(object): - """Parses a `--dependent-variable=AVG,STDEV,TRIALS` command line argument.""" - - ############################################################################# - # Grammar - - # Parse a variable_name. - variable_name_rule = r'[^,]+' - - # Parse a variable classification. - dependent_variable_rule = r'(' + variable_name_rule + r')' \ - + r',' \ - + r'(' + variable_name_rule + r')' \ - + r',' \ - + r'(' + variable_name_rule + r')' - - engine = regex_compile(dependent_variable_rule) - - ############################################################################# - - def __call__(self, s): - """Parses the string `s` with the form "AVG,STDEV,TRIALS". - - Returns: - A `measured_variable`. - - Raises: - AssertionError : If parsing fails. - """ - - match = self.engine.match(s) - - assert match is not None, \ - "Dependent variable (-d) `" +s+ "` is invalid, the format is " + \ - "`AVG,STDEV,TRIALS`." - - return measured_variable(match.group(1), match.group(2), match.group(3)) - -############################################################################### - -class record_aggregator(object): - """Consumes and combines records and represents the result as an `Iterable` - sequence of `dict`s. - - It is `Iterable` and an `Iterator`. - - Attributes: - dependent_variables (`list` of `measured_variable`s) : - A list of dependent variables provided on the command line. - dataset (`dict`) : - A mapping of distinguishing (e.g. control + independent) values (`tuple`s - of variable-quantity pairs) to `list`s of dependent values (`dict`s from - variables to lists of cells). - in_order_dataset_keys : - A list of unique dataset keys (e.g. distinguishing variables) in order of - appearance. - """ - - parse_dependent_variable = dependent_variable_parser() - - def __init__(self, raw_dependent_variables): - """Parse dependent variables and construct a new `record_aggregator` object. - - Raises: - AssertionError : If parsing of dependent variables fails. - """ - self.dependent_variables = [] - - if raw_dependent_variables is not None: - for variable in raw_dependent_variables: - self.dependent_variables.append(self.parse_dependent_variable(variable)) - - self.dataset = {} - - self.in_order_dataset_keys = deque() - - ############################################################################# - # Insertion. - - def append(self, record): - """Add `record` to the dataset. - - Raises: - ValueError : If any `str`-to-numeric conversions fail. - """ - # The distinguishing variables are the control and independent variables. - # They form the key for each record in the dataset. Records with the same - # distinguishing variables are treated as observations of the same data - # point. - dependent_values = {} - - # To allow the same sample size variable to be used for multiple dependent - # variables, we don't pop sample size variables until we're done processing - # all variables. - sample_size_variables = [] - - # Separate the dependent values from the distinguishing variables and - # perform `str`-to-numeric conversions. - for variable in self.dependent_variables: - quantity, uncertainty, sample_size, units = variable.as_tuple() - - dependent_values[quantity] = [int_or_float(record.pop(quantity))] - dependent_values[uncertainty] = [int_or_float(record.pop(uncertainty))] - dependent_values[sample_size] = [int(record[sample_size])] - - sample_size_variables.append(sample_size) - - # Pop sample size variables. - for sample_size_variable in sample_size_variables: - # Allowed to fail, as we may have duplicates. - record.pop(sample_size_variable, None) - - # `dict`s aren't hashable, so create a tuple of key-value pairs. - distinguishing_values = tuple(record.items()) - - if distinguishing_values in self.dataset: - # These distinguishing values already exist, so get the `dict` they're - # mapped to, look up each key in `dependent_values` in the `dict`, and - # add the corresponding quantity in `dependent_values` to the list in the - # the `dict`. - for variable, columns in dependent_values.iteritems(): - self.dataset[distinguishing_values][variable] += columns - else: - # These distinguishing values aren't in the dataset, so add them and - # record them in `in_order_dataset_keys`. - self.dataset[distinguishing_values] = dependent_values - self.in_order_dataset_keys.append(distinguishing_values) - - ############################################################################# - # Postprocessing. - - def combine_dependent_values(self, dependent_values): - """Takes a mapping of dependent variables to lists of cells and returns - a new mapping with the cells combined. - - Raises: - AssertionError : If class invariants were violated. - """ - combined_dependent_values = dependent_values.copy() - - for variable in self.dependent_variables: - quantity, uncertainty, sample_size, units = variable.as_tuple() - - quantities = dependent_values[quantity] - uncertainties = dependent_values[uncertainty] - sample_sizes = dependent_values[sample_size] - - if type(sample_size) is list: - # Sample size hasn't been combined yet. - assert len(quantities) == len(uncertainties) \ - and len(uncertainties) == len(sample_sizes), \ - "Length of quantities list `(" + str(len(quantities)) + ")`, " + \ - "length of uncertainties list `(" + str(len(uncertainties)) + \ - "),` and length of sample sizes list `(" + str(len(sample_sizes)) + \ - ")` are not the same." - else: - # Another dependent variable that uses our sample size has combined it - # already. - assert len(quantities) == len(uncertainties), \ - "Length of quantities list `(" + str(len(quantities)) + ")` and " + \ - "length of uncertainties list `(" + str(len(uncertainties)) + \ - ")` are not the same." - - # Convert the three separate `list`s into one list of `measured_value`s. - measured_values = [] - - for i in range(len(quantities)): - mv = measured_value( - quantities[i], uncertainties[i], sample_sizes[i], units - ) - - measured_values.append(mv) - - # Combine the `measured_value`s. - combined_sample_size = combine_sample_size( - measured_values - ) - - combined_arithmetic_mean = combine_arithmetic_mean( - measured_values, combined_sample_size - ) - - combined_sample_standard_deviation = combine_sample_standard_deviation( - measured_values, combined_sample_size, combined_arithmetic_mean - ) - - # Round the quantity and uncertainty to the significant digit of - # uncertainty and insert the combined values into the results. - sigdig = find_significant_digit(combined_sample_standard_deviation) - -# combined_arithmetic_mean = round_with_int_conversion( -# combined_arithmetic_mean, sigdig -# ) - -# combined_sample_standard_deviation = round_with_int_conversion( -# combined_sample_standard_deviation, sigdig -# ) - - combined_dependent_values[quantity] = combined_arithmetic_mean - combined_dependent_values[uncertainty] = combined_sample_standard_deviation - combined_dependent_values[sample_size] = combined_sample_size - - return combined_dependent_values - - ############################################################################# - # Output Stream. - - def __iter__(self): - """Return an iterator to the output sequence of separated distinguishing - variables and dependent variables (a tuple of two `dict`s). - - This is a requirement for the `Iterable` protocol. - """ - return self - - def records(self): - """Return an iterator to the output sequence of CSV rows (`dict`s of - variables to values). - """ - return imap(unpack_tuple(lambda dist, dep: merge_dicts(dist, dep)), self) - - def next(self): - """Produce the components of the next output record - a tuple of two - `dict`s. The first `dict` is a mapping of distinguishing variables to - distinguishing values, the second `dict` is a mapping of dependent - variables to combined dependent values. Combining the two dicts forms a - CSV row suitable for output. - - This is a requirement for the `Iterator` protocol. - - Raises: - StopIteration : If there is no more output. - AssertionError : If class invariants were violated. - """ - assert len(self.dataset.keys()) == len(self.in_order_dataset_keys), \ - "Number of dataset keys (`" + str(len(self.dataset.keys())) + \ - "`) is not equal to the number of keys in the ordering list (`" + \ - str(len(self.in_order_dataset_keys)) + "`)." - - if len(self.in_order_dataset_keys) == 0: - raise StopIteration() - - # Get the next set of distinguishing values and convert them to a `dict`. - raw_distinguishing_values = self.in_order_dataset_keys.popleft() - distinguishing_values = dict(raw_distinguishing_values) - - dependent_values = self.dataset.pop(raw_distinguishing_values) - - combined_dependent_values = self.combine_dependent_values(dependent_values) - - return (distinguishing_values, combined_dependent_values) - -############################################################################### - -args = process_program_arguments() - -if args.dependent_variables is None: - args.dependent_variables = [ - "STL Average Walltime,STL Walltime Uncertainty,STL Trials", - "STL Average Throughput,STL Throughput Uncertainty,STL Trials", - "Thrust Average Walltime,Thrust Walltime Uncertainty,Thrust Trials", - "Thrust Average Throughput,Thrust Throughput Uncertainty,Thrust Trials" - ] - -# Read input files and open the output file. -with io_manager(args.input_files, - args.output_file, - args.preserve_whitespace) as iom: - # Parse dependent variable options. - ra = record_aggregator(args.dependent_variables) - - # Add all input data to the `record_aggregator`. - for record in iom: - ra.append(record) - - iom.write_header() - - # Write combined results out. - for record in ra.records(): - iom.write(record) - diff --git a/internal/benchmark/compare_benchmark_results.py b/internal/benchmark/compare_benchmark_results.py deleted file mode 100755 index 22e7be8cf..000000000 --- a/internal/benchmark/compare_benchmark_results.py +++ /dev/null @@ -1,1308 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright (c) 2012-7 Bryce Adelstein Lelbach aka wash -# -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -############################################################################### - -############################################################################### -# Copyright (c) 2018 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### - -# XXX Put code shared with `combine_benchmark_results.py` in a common place. - -# XXX Relative uncertainty. - -# XXX Create uncertain value class which is quantity + uncertainty. - -from sys import exit, stdout - -from os.path import splitext - -from itertools import imap # Lazy map. - -from math import sqrt, log10, floor - -from collections import deque - -from argparse import ArgumentParser as argument_parser -from argparse import Action as argument_action - -from csv import DictReader as csv_dict_reader -from csv import DictWriter as csv_dict_writer - -from re import compile as regex_compile - -############################################################################### - -def unpack_tuple(f): - """Return a unary function that calls `f` with its argument unpacked.""" - return lambda args: f(*iter(args)) - -def strip_dict(d): - """Strip leading and trailing whitespace from all keys and values in `d`. - - Returns: - The modified dict `d`. - """ - d.update({key: value.strip() for (key, value) in d.items()}) - return d - -def merge_dicts(d0, d1): - """Create a new `dict` that is the union of `dict`s `d0` and `d1`.""" - d = d0.copy() - d.update(d1) - return d - -def change_key_in_dict(d, old_key, new_key): - """Change the key of the entry in `d` with key `old_key` to `new_key`. If - there is an existing entry - - Returns: - The modified dict `d`. - - Raises: - KeyError : If `old_key` is not in `d`. - """ - d[new_key] = d.pop(old_key) - return d - -def key_from_dict(d): - """Create a hashable key from a `dict` by converting the `dict` to a tuple.""" - return tuple(sorted(d.items())) - -def strip_list(l): - """Strip leading and trailing whitespace from all values in `l`.""" - for i, value in enumerate(l): l[i] = value.strip() - return l - -def remove_from_list(l, item): - """Remove the first occurence of `item` from list `l` and return a tuple of - the index that was removed and the element that was removed. - - Raises: - ValueError : If `item` is not in `l`. - """ - idx = l.index(item) - item = l.pop(idx) - return (idx, item) - -############################################################################### - -def int_or_float(x): - """Convert `x` to either `int` or `float`, preferring `int`. - - Raises: - ValueError : If `x` is not convertible to either `int` or `float` - """ - try: - return int(x) - except ValueError: - return float(x) - -def try_int_or_float(x): - """Try to convert `x` to either `int` or `float`, preferring `int`. `x` is - returned unmodified if conversion fails. - """ - try: - return int_or_float(x) - except ValueError: - return x - -############################################################################### - -def ranges_overlap(x1, x2, y1, y2): - """Returns true if the ranges `[x1, x2]` and `[y1, y2]` overlap, - where `x1 <= x2` and `y1 <= y2`. - - Raises: - AssertionError : If `x1 > x2` or `y1 > y2`. - """ - assert x1 <= x2 - assert y1 <= y2 - return x1 <= y2 and y1 <= x2 - -def ranges_overlap_uncertainty(x, x_unc, y, y_unc): - """Returns true if the ranges `[x - x_unc, x + x_unc]` and - `[y - y_unc, y + y_unc]` overlap, where `x_unc >= 0` and `y_unc >= 0`. - - Raises: - AssertionError : If `x_unc < 0` or `y_unc < 0`. - """ - assert x_unc >= 0 - assert y_unc >= 0 - return ranges_overlap(x - x_unc, x + x_unc, y - y_unc, y + y_unc) - -############################################################################### - -# Formulas for propagation of uncertainty from: -# -# https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulas -# -# Even though it's Wikipedia, I trust it as I helped write that table. -# -# XXX Replace with a proper reference. - -def uncertainty_multiplicative(f, A, A_abs_unc, B, B_abs_unc): - """Compute the propagated uncertainty from the multiplication of two - uncertain values, `A +/- A_abs_unc` and `B +/- B_abs_unc`. Given `f = AB` or - `f = A/B`, where `A != 0` and `B != 0`, the uncertainty in `f` is - approximately: - - .. math:: - - \sigma_f = |f| \sqrt{\frac{\sigma_A}{A} ^ 2 + \frac{\sigma_B}{B} ^ 2} - - Raises: - ZeroDivisionError : If `A == 0` or `B == 0`. - """ - return abs(f) * sqrt((A_abs_unc / A) ** 2 + (B_abs_unc / B) ** 2); - -def uncertainty_additive(c, A_abs_unc, d, B_abs_unc): - """Compute the propagated uncertainty from addition of two uncertain values, - `A +/- A_abs_unc` and `B +/- B_abs_unc`. Given `f = cA + dB`, where `c` and - `d` are certain constants, the uncertainty in `f` is approximately: - - .. math:: - - f_{\sigma} = \sqrt{c ^ 2 * A_{\sigma} ^ 2 + d ^ 2 * B_{\sigma} ^ 2} - """ - return sqrt(((c ** 2) * (A_abs_unc ** 2)) + ((d ** 2) * (B_abs_unc ** 2))) - -############################################################################### - -# XXX Create change class. - -def absolute_change(old, new): - """Computes the absolute change from old to new: - - .. math:: - - absolute_change = new - old - """ - return new - old - -def absolute_change_uncertainty(old, old_unc, new, new_unc): - """Computes the uncertainty in the absolute change from old to new and returns - a tuple of the absolute change and the absolute change uncertainty. - """ - absolute_change = new - old - absolute_change_unc = uncertainty_additive(1.0, new_unc, -1.0, old_unc) - - return (absolute_change, absolute_change_unc) - -def percent_change(old, new): - """Computes the percent change from old to new: - - .. math:: - - percent_change = 100 \frac{new - old}{abs(old)} - """ - return float(new - old) / abs(old) - -def percent_change_uncertainty(old, old_unc, new, new_unc): - """Computes the uncertainty in the percent change from old to new and returns - a tuple of the absolute change, the absolute change uncertainty, the percent - change and the percent change uncertainty. - """ - # Let's break this down into a few sub-operations: - # - # absolute_change = new - old <- Additive propagation. - # relative_change = change / abs(old) <- Multiplicative propagation. - # percent_change = 100 * y <- Multiplicative propagation. - - if old == 0: - # We can't compute relative change because the old value is 0. - return (float("nan"), float("nan"), float("nan"), float("nan")) - - (absolute_change, absolute_change_unc) = absolute_change_uncertainty( - old, old_unc, new, new_unc - ) - - if absolute_change == 0: - # We can't compute relative change uncertainty because the relative - # uncertainty of a value of 0 is undefined. - return (absolute_change, absolute_change_unc, float("nan"), float("nan")) - - relative_change = float(absolute_change) / abs(old) - relative_change_unc = uncertainty_multiplicative( - relative_change, absolute_change, absolute_change_unc, old, old_unc - ) - - percent_change = 100.0 * relative_change - percent_change_unc = uncertainty_multiplicative( - percent_change, 100.0, 0.0, relative_change, relative_change_unc - ) - - return ( - absolute_change, absolute_change_unc, percent_change, percent_change_unc - ) - -############################################################################### - -def find_significant_digit(x): - """Return the significant digit of the number x. The result is the number of - digits after the decimal place to round to (negative numbers indicate rounding - before the decimal place).""" - if x == 0: return 0 - return -int(floor(log10(abs(x)))) - -def round_with_int_conversion(x, ndigits = None): - """Rounds `x` to `ndigits` after the the decimal place. If `ndigits` is less - than 1, convert the result to `int`. If `ndigits` is `None`, the significant - digit of `x` is used.""" - if ndigits is None: ndigits = find_significant_digit(x) - x_rounded = round(x, ndigits) - return int(x_rounded) if ndigits < 1 else x_rounded - -############################################################################### - -class measured_variable(object): - """A meta-variable representing measured data. It is composed of three raw - variables plus units meta-data. - - Attributes: - quantity (`str`) : - Name of the quantity variable of this object. - uncertainty (`str`) : - Name of the uncertainty variable of this object. - sample_size (`str`) : - Name of the sample size variable of this object. - units (units class or `None`) : - The units the value is measured in. - """ - - def __init__(self, quantity, uncertainty, sample_size, units = None): - self.quantity = quantity - self.uncertainty = uncertainty - self.sample_size = sample_size - self.units = units - - def as_tuple(self): - return (self.quantity, self.uncertainty, self.sample_size, self.units) - - def __iter__(self): - return iter(self.as_tuple()) - - def __str__(self): - return str(self.as_tuple()) - - def __repr__(self): - return str(self) - -class measured_value(object): - """An object that represents a value determined by multiple measurements. - - Attributes: - quantity (scalar) : - The quantity of the value, e.g. the arithmetic mean. - uncertainty (scalar) : - The measurement uncertainty, e.g. the sample standard deviation. - sample_size (`int`) : - The number of observations contributing to the value. - units (units class or `None`) : - The units the value is measured in. - """ - - def __init__(self, quantity, uncertainty, sample_size = 1, units = None): - self.quantity = quantity - self.uncertainty = uncertainty - self.sample_size = sample_size - self.units = units - - def as_tuple(self): - return (self.quantity, self.uncertainty, self.sample_size, self.units) - - def __iter__(self): - return iter(self.as_tuple()) - - def __str__(self): - return str(self.as_tuple()) - - def __repr__(self): - return str(self) - -############################################################################### - -def arithmetic_mean(X): - """Computes the arithmetic mean of the sequence `X`. - - Let: - - * `n = len(X)`. - * `u` denote the arithmetic mean of `X`. - - .. math:: - - u = \frac{\sum_{i = 0}^{n - 1} X_i}{n} - """ - return sum(X) / len(X) - -def sample_variance(X, u = None): - """Computes the sample variance of the sequence `X`. - - Let: - - * `n = len(X)`. - * `u` denote the arithmetic mean of `X`. - * `s` denote the sample standard deviation of `X`. - - .. math:: - - v = \frac{\sum_{i = 0}^{n - 1} (X_i - u)^2}{n - 1} - - Args: - X (`Iterable`) : The sequence of values. - u (number) : The arithmetic mean of `X`. - """ - if u is None: u = arithmetic_mean(X) - return sum(imap(lambda X_i: (X_i - u) ** 2, X)) / (len(X) - 1) - -def sample_standard_deviation(X, u = None, v = None): - """Computes the sample standard deviation of the sequence `X`. - - Let: - - * `n = len(X)`. - * `u` denote the arithmetic mean of `X`. - * `v` denote the sample variance of `X`. - * `s` denote the sample standard deviation of `X`. - - .. math:: - - s &= \sqrt{v} - &= \sqrt{\frac{\sum_{i = 0}^{n - 1} (X_i - u)^2}{n - 1}} - - Args: - X (`Iterable`) : The sequence of values. - u (number) : The arithmetic mean of `X`. - v (number) : The sample variance of `X`. - """ - if u is None: u = arithmetic_mean(X) - if v is None: v = sample_variance(X, u) - return sqrt(v) - -def combine_sample_size(As): - """Computes the combined sample variance of a group of `measured_value`s. - - Let: - - * `g = len(As)`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - - .. math:: - - n = \sum{i = 0}^{g - 1} n_i - """ - return sum(imap(unpack_tuple(lambda u_i, s_i, n_i, t_i: n_i), As)) - -def combine_arithmetic_mean(As, n = None): - """Computes the combined arithmetic mean of a group of `measured_value`s. - - Let: - - * `g = len(As)`. - * `u_i = As[i].quantity`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - * `u` denote the arithmetic mean of the quantities of `As`. - - .. math:: - - u = \frac{\sum{i = 0}^{g - 1} n_i u_i}{n} - """ - if n is None: n = combine_sample_size(As) - return sum(imap(unpack_tuple(lambda u_i, s_i, n_i, t_i: n_i * u_i), As)) / n - -def combine_sample_variance(As, n = None, u = None): - """Computes the combined sample variance of a group of `measured_value`s. - - Let: - - * `g = len(As)`. - * `u_i = As[i].quantity`. - * `s_i = As[i].uncertainty`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - * `u` denote the arithmetic mean of the quantities of `As`. - * `v` denote the sample variance of `X`. - - .. math:: - - v = \frac{(\sum_{i = 0}^{g - 1} n_i (u_i - u)^2 + s_i^2 (n_i - 1))}{n - 1} - - Args: - As (`Iterable` of `measured_value`s) : The sequence of values. - n (number) : The combined sample sizes of `As`. - u (number) : The combined arithmetic mean of `As`. - """ - if n <= 1: return 0 - if n is None: n = combine_sample_size(As) - if u is None: u = combine_arithmetic_mean(As, n) - return sum(imap(unpack_tuple( - lambda u_i, s_i, n_i, t_i: n_i * (u_i - u) ** 2 + (s_i ** 2) * (n_i - 1) - ), As)) / (n - 1) - -def combine_sample_standard_deviation(As, n = None, u = None, v = None): - """Computes the combined sample standard deviation of a group of - `measured_value`s. - - Let: - - * `g = len(As)`. - * `u_i = As[i].quantity`. - * `s_i = As[i].uncertainty`. - * `n_i = As[i].samples`. - * `n` denote the combined sample size of `As`. - * `u` denote the arithmetic mean of the quantities of `As`. - * `v` denote the sample variance of `X`. - * `s` denote the sample standard deviation of `X`. - - .. math:: - v &= \frac{(\sum_{i = 0}^{g - 1} n_i (u_i - u)^2 + s_i^2 (n_i - 1))}{n - 1} - - s &= \sqrt{v} - - Args: - As (`Iterable` of `measured_value`s) : The sequence of values. - n (number) : The combined sample sizes of `As`. - u (number) : The combined arithmetic mean of `As`. - v (number) : The combined sample variance of `As`. - """ - if n <= 1: return 0 - if n is None: n = combine_sample_size(As) - if u is None: u = combine_arithmetic_mean(As, n) - if v is None: v = combine_sample_variance(As, n, u) - return sqrt(v) - -############################################################################### - -def store_const_multiple(const, *destinations): - """Returns an `argument_action` class that sets multiple argument - destinations (`destinations`) to `const`.""" - class store_const_multiple_action(argument_action): - def __init__(self, *args, **kwargs): - super(store_const_multiple_action, self).__init__( - metavar = None, nargs = 0, const = const, *args, **kwargs - ) - - def __call__(self, parser, namespace, values, option_string = None): - for destination in destinations: - setattr(namespace, destination, const) - - return store_const_multiple_action - -def store_true_multiple(*destinations): - """Returns an `argument_action` class that sets multiple argument - destinations (`destinations`) to `True`.""" - return store_const_multiple(True, *destinations) - -def store_false_multiple(*destinations): - """Returns an `argument_action` class that sets multiple argument - destinations (`destinations`) to `False`.""" - return store_const_multiple(False, *destinations) - -############################################################################### - -def process_program_arguments(): - ap = argument_parser( - description = ( - "Compares two sets of combined performance results and identifies " - "statistically significant changes." - ) - ) - - ap.add_argument( - "baseline_input_file", - help = ("CSV file containing the baseline performance results. The first " - "two rows should be a header. The 1st header row specifies the " - "name of each variable, and the 2nd header row specifies the units " - "for that variable. The baseline results may be a superset of the " - "observed performance results, but the reverse is not true. The " - "baseline results must contain data for every datapoint in the " - "observed performance results."), - type = str - ) - - ap.add_argument( - "observed_input_file", - help = ("CSV file containing the observed performance results. The first " - "two rows should be a header. The 1st header row specifies the name " - "of header row specifies the units for that variable."), - type = str - ) - - ap.add_argument( - "-o", "--output-file", - help = ("The file that results are written to. If `-`, results are " - "written to stdout."), - action = "store", type = str, default = "-", - metavar = "OUTPUT" - ) - - ap.add_argument( - "-c", "--control-variable", - help = ("Treat the specified variable as a control variable. This means " - "it will be filtered out when forming dataset keys. For example, " - "this could be used to ignore a timestamp variable that is " - "different in the baseline and observed results. May be specified " - "multiple times."), - action = "append", type = str, dest = "control_variables", default = [], - metavar = "QUANTITY" - ) - - ap.add_argument( - "-d", "--dependent-variable", - help = ("Treat the specified three variables as a dependent variable. The " - "1st variable is the measured quantity, the 2nd is the uncertainty " - "of the measurement and the 3rd is the sample size. The defaults " - "are the dependent variables of Thrust's benchmark suite. May be " - "specified multiple times."), - action = "append", type = str, dest = "dependent_variables", default = [], - metavar = "QUANTITY,UNCERTAINTY,SAMPLES" - ) - - ap.add_argument( - "-t", "--change-threshold", - help = ("Treat relative changes less than this amount (a percentage) as " - "statistically insignificant. The default is 5%%."), - action = "store", type = float, default = 5, - metavar = "PERCENTAGE" - ) - - ap.add_argument( - "-p", "--preserve-whitespace", - help = ("Don't trim leading and trailing whitespace from each CSV cell."), - action = "store_true", default = False - ) - - ap.add_argument( - "--output-all-variables", - help = ("Don't omit original absolute values in output."), - action = "store_true", default = False - ) - - ap.add_argument( - "--output-all-datapoints", - help = ("Don't omit datapoints that are statistically indistinguishable " - "in output."), - action = "store_true", default = False - ) - - ap.add_argument( - "-a", "--output-all", - help = ("Equivalent to `--output-all-variables --output-all-datapoints`."), - action = store_true_multiple("output_all_variables", "output_all_datapoints") - ) - - return ap.parse_args() - -############################################################################### - -def filter_comments(f, s = "#"): - """Return an iterator to the file `f` which filters out all lines beginning - with `s`.""" - return filter(lambda line: not line.startswith(s), f) - -############################################################################### - -class io_manager(object): - """Manages I/O operations and represents the input data as an `Iterable` - sequence of `dict`s. - - It is `Iterable` and an `Iterator`. It can be used with `with`. - - Attributes: - preserve_whitespace (`bool`) : - If `False`, leading and trailing whitespace is stripped from each CSV cell. - writer (`csv_dict_writer`) : - CSV writer object that the output is written to. - output_file (`file` or `stdout`) : - The output `file` object. - baseline_reader (`csv_dict_reader`) : - CSV reader object for the baseline results. - observed_reader (`csv_dict_reader`) : - CSV reader object for the observed results. - baseline_input_file (`file`) : - `file` object for the baseline results. - observed_input_file (`file`) : - `file` object for the observed results.. - variable_names (`list` of `str`s) : - Names of the variables, in order. - variable_units (`list` of `str`s) : - Units of the variables, in order. - """ - - def __init__(self, - baseline_input_file, observed_input_file, - output_file, - preserve_whitespace = False): - """Read input files and open the output file and construct a new `io_manager` - object. - - If `preserve_whitespace` is `False`, leading and trailing whitespace is - stripped from each CSV cell. - - Raises - AssertionError : - If `type(preserve_whitespace) != bool`. - """ - assert type(preserve_whitespace) == bool - - self.preserve_whitespace = preserve_whitespace - - # Open baseline results. - self.baseline_input_file = open(baseline_input_file) - self.baseline_reader = csv_dict_reader( - filter_comments(self.baseline_input_file) - ) - - if not self.preserve_whitespace: - strip_list(self.baseline_reader.fieldnames) - - self.variable_names = list(self.baseline_reader.fieldnames) # Copy. - self.variable_units = self.baseline_reader.next() - - if not self.preserve_whitespace: - strip_dict(self.variable_units) - - # Open observed results. - self.observed_input_file = open(observed_input_file) - self.observed_reader = csv_dict_reader( - filter_comments(self.observed_input_file) - ) - - if not self.preserve_whitespace: - strip_list(self.observed_reader.fieldnames) - - # Make sure all inputs have the same variables schema. - assert self.variable_names == self.observed_reader.fieldnames, \ - "Observed results input file (`" + observed_input_file + "`) " + \ - "variable schema `" + str(self.observed_reader.fieldnames) + "` does " + \ - "not match the baseline results input file (`" + baseline_input_file + \ - "`) variable schema `" + str(self.variable_names) + "`." - - # Consume the next row, which should be the second line of the header. - observed_variable_units = self.observed_reader.next() - - if not self.preserve_whitespace: - strip_dict(observed_variable_units) - - # Make sure all inputs have the same units schema. - assert self.variable_units == observed_variable_units, \ - "Observed results input file (`" + observed_input_file + "`) " + \ - "units schema `" + str(observed_variable_units) + "` does not " + \ - "match the baseline results input file (`" + baseline_input_file + \ - "`) units schema `" + str(self.variable_units) + "`." - - if output_file == "-": # Output to stdout. - self.output_file = stdout - else: # Output to user-specified file. - self.output_file = open(output_file, "w") - - self.writer = csv_dict_writer( - self.output_file, fieldnames = self.variable_names - ) - - def __enter__(self): - """Called upon entering a `with` statement.""" - return self - - def __exit__(self, *args): - """Called upon exiting a `with` statement.""" - if self.output_file is stdout: - self.output_file = None - elif self.output_file is not None: - self.output_file.__exit__(*args) - - self.baseline_input_file.__exit__(*args) - self.observed_input_file.__exit__(*args) - - def append_variable(self, name, units): - """Add a new variable to the output schema.""" - self.variable_names.append(name) - self.variable_units.update({name : units}) - - # Update CSV writer field names. - self.writer.fieldnames = self.variable_names - - def insert_variable(self, idx, name, units): - """Insert a new variable into the output schema at index `idx`.""" - self.variable_names.insert(idx, name) - self.variable_units.update({name : units}) - - # Update CSV writer field names. - self.writer.fieldnames = self.variable_names - - def remove_variable(self, name): - """Remove variable from the output schema and return a tuple of the variable - index and the variable units. - - Raises: - ValueError : If `name` is not in the output schema. - """ - # Remove the variable and get its index, which we'll need to remove the - # corresponding units entry. - (idx, item) = remove_from_list(self.variable_names, name) - - # Remove the units entry. - units = self.variable_units.pop(item) - - # Update CSV writer field names. - self.writer.fieldnames = self.variable_names - - return (idx, units) - - ############################################################################# - # Input Stream. - - def baseline(self): - """Return an iterator to the baseline results input sequence.""" - return imap(lambda row: strip_dict(row), self.baseline_reader) - - def observed(self): - """Return an iterator to the observed results input sequence.""" - return imap(lambda row: strip_dict(row), self.observed_reader) - - ############################################################################# - # Output. - - def write_header(self): - """Write the header for the output CSV file.""" - # Write the first line of the header. - self.writer.writeheader() - - # Write the second line of the header. - self.writer.writerow(self.variable_units) - - def write(self, d): - """Write a record (a `dict`) to the output CSV file.""" - self.writer.writerow(d) - -############################################################################### - -class dependent_variable_parser(object): - """Parses a `--dependent-variable=AVG,STDEV,TRIALS` command line argument.""" - - ############################################################################# - # Grammar - - # Parse a variable_name. - variable_name_rule = r'[^,]+' - - # Parse a variable classification. - dependent_variable_rule = r'(' + variable_name_rule + r')' \ - + r',' \ - + r'(' + variable_name_rule + r')' \ - + r',' \ - + r'(' + variable_name_rule + r')' - - engine = regex_compile(dependent_variable_rule) - - ############################################################################# - - def __call__(self, s): - """Parses the string `s` with the form "AVG,STDEV,TRIALS". - - Returns: - A `measured_variable`. - - Raises: - AssertionError : If parsing fails. - """ - - match = self.engine.match(s) - - assert match is not None, \ - "Dependent variable (-d) `" +s+ "` is invalid, the format is " + \ - "`AVG,STDEV,TRIALS`." - - return measured_variable(match.group(1), match.group(2), match.group(3)) - -############################################################################### - -class record_aggregator(object): - """Consumes and combines records and represents the result as an `Iterable` - sequence of `dict`s. - - It is `Iterable` and an `Iterator`. - - Attributes: - dependent_variables (`list` of `measured_variable`s) : - A list of dependent variables provided on the command line. - control_variables (`list` of `str`s) : - A list of control variables provided on the command line. - dataset (`dict`) : - A mapping of distinguishing (e.g. control + independent) values (`tuple`s - of variable-quantity pairs) to `list`s of dependent values (`dict`s from - variables to lists of cells). - in_order_dataset_keys : - A list of unique dataset keys (e.g. distinguishing variables) in order of - appearance. - """ - - def __init__(self, dependent_variables, control_variables): - """Construct a new `record_aggregator` object. - - Raises: - AssertionError : If parsing of dependent variables fails. - """ - self.dependent_variables = dependent_variables - self.control_variables = control_variables - - self.dataset = {} - - self.in_order_dataset_keys = deque() - - ############################################################################# - # Insertion. - - def key_from_dict(self, d): - """Create a hashable key from a `dict` by filtering out control variables - and then converting the `dict` to a tuple. - - Raises: - AssertionError : If any control variable was not found in `d`. - """ - distinguishing_values = d.copy() - - # Filter out control variables. - for var in self.control_variables: - distinguishing_values.pop(var, None) - - return key_from_dict(distinguishing_values) - - def append(self, record): - """Add `record` to the dataset. - - Raises: - ValueError : If any `str`-to-numeric conversions fail. - """ - # The distinguishing variables are the control and independent variables. - # They form the key for each record in the dataset. Records with the same - # distinguishing variables are treated as observations of the same - # datapoint. - dependent_values = {} - - # To allow the same sample size variable to be used for multiple dependent - # variables, we don't pop sample size variables until we're done processing - # all variables. - sample_size_variables = [] - - # Separate the dependent values from the distinguishing variables and - # perform `str`-to-numeric conversions. - for var in self.dependent_variables: - quantity, uncertainty, sample_size, units = var.as_tuple() - - dependent_values[quantity] = [int_or_float(record.pop(quantity))] - dependent_values[uncertainty] = [int_or_float(record.pop(uncertainty))] - dependent_values[sample_size] = [int(record[sample_size])] - - sample_size_variables.append(sample_size) - - # Pop sample size variables. - for var in sample_size_variables: - # Allowed to fail, as we may have duplicates. - record.pop(var, None) - - distinguishing_values = self.key_from_dict(record) - - if distinguishing_values in self.dataset: - # These distinguishing values already exist, so get the `dict` they're - # mapped to, look up each key in `dependent_values` in the `dict`, and - # add the corresponding quantity in `dependent_values` to the list in the - # the `dict`. - for var, columns in dependent_values.iteritems(): - self.dataset[distinguishing_values][var] += columns - else: - # These distinguishing values aren't in the dataset, so add them and - # record them in `in_order_dataset_keys`. - self.dataset[distinguishing_values] = dependent_values - self.in_order_dataset_keys.append(distinguishing_values) - - ############################################################################# - # Postprocessing. - - def combine_dependent_values(self, dependent_values): - """Takes a mapping of dependent variables to lists of cells and returns - a new mapping with the cells combined. - - Raises: - AssertionError : If class invariants were violated. - """ - combined_dependent_values = dependent_values.copy() - - for var in self.dependent_variables: - quantity, uncertainty, sample_size, units = var.as_tuple() - - quantities = dependent_values[quantity] - uncertainties = dependent_values[uncertainty] - sample_sizes = dependent_values[sample_size] - - if type(sample_size) is list: - # Sample size hasn't been combined yet. - assert len(quantities) == len(uncertainties) \ - and len(uncertainties) == len(sample_sizes), \ - "Length of quantities list `(" + str(len(quantities)) + ")`, " + \ - "length of uncertainties list `(" + str(len(uncertainties)) + \ - "),` and length of sample sizes list `(" + str(len(sample_sizes)) + \ - ")` are not the same." - else: - # Another dependent variable that uses our sample size has combined it - # already. - assert len(quantities) == len(uncertainties), \ - "Length of quantities list `(" + str(len(quantities)) + ")` and " + \ - "length of uncertainties list `(" + str(len(uncertainties)) + \ - ")` are not the same." - - # Convert the three separate `list`s into one list of `measured_value`s. - measured_values = [] - - for i in range(len(quantities)): - mv = measured_value( - quantities[i], uncertainties[i], sample_sizes[i], units - ) - - measured_values.append(mv) - - # Combine the `measured_value`s. - combined_sample_size = combine_sample_size( - measured_values - ) - - combined_arithmetic_mean = combine_arithmetic_mean( - measured_values, combined_sample_size - ) - - combined_sample_standard_deviation = combine_sample_standard_deviation( - measured_values, combined_sample_size, combined_arithmetic_mean - ) - - # Round the quantity and uncertainty to the significant digit of - # uncertainty and insert the combined values into the results. - sigdig = find_significant_digit(combined_sample_standard_deviation) - -# combined_arithmetic_mean = round_with_int_conversion( -# combined_arithmetic_mean, sigdig -# ) - -# combined_sample_standard_deviation = round_with_int_conversion( -# combined_sample_standard_deviation, sigdig -# ) - - combined_dependent_values[quantity] = combined_arithmetic_mean - combined_dependent_values[uncertainty] = combined_sample_standard_deviation - combined_dependent_values[sample_size] = combined_sample_size - - return combined_dependent_values - - ############################################################################# - # Output Stream. - - def __iter__(self): - """Return an iterator to the output sequence of separated distinguishing - variables and dependent variables (a tuple of two `dict`s). - - This is a requirement for the `Iterable` protocol. - """ - return self - - def records(self): - """Return an iterator to the output sequence of CSV rows (`dict`s of - variables to values). - """ - return imap(unpack_tuple(lambda dist, dep: merge_dicts(dist, dep)), self) - - def next(self): - """Produce the components of the next output record - a tuple of two - `dict`s. The first `dict` is a mapping of distinguishing variables to - distinguishing values, the second `dict` is a mapping of dependent - variables to combined dependent values. Combining the two dicts forms a - CSV row suitable for output. - - This is a requirement for the `Iterator` protocol. - - Raises: - StopIteration : If there is no more output. - AssertionError : If class invariants were violated. - """ - assert len(self.dataset.keys()) == len(self.in_order_dataset_keys), \ - "Number of dataset keys (`" + str(len(self.dataset.keys())) + \ - "`) is not equal to the number of keys in the ordering list (`" + \ - str(len(self.in_order_dataset_keys)) + "`)." - - if len(self.in_order_dataset_keys) == 0: - raise StopIteration() - - # Get the next set of distinguishing values and convert them to a `dict`. - raw_distinguishing_values = self.in_order_dataset_keys.popleft() - distinguishing_values = dict(raw_distinguishing_values) - - dependent_values = self.dataset.pop(raw_distinguishing_values) - - combined_dependent_values = self.combine_dependent_values(dependent_values) - - return (distinguishing_values, combined_dependent_values) - - def __getitem__(self, distinguishing_values): - """Produce the dependent component, a `dict` mapping dependent variables to - combined dependent values, associated with `distinguishing_values`. - - Args: - distinguishing_values (`dict`) : - A `dict` mapping distinguishing variables to distinguishing values. - - Raises: - KeyError : If `distinguishing_values` is not in the dataset. - """ - raw_distinguishing_values = self.key_from_dict(distinguishing_values) - - dependent_values = self.dataset[raw_distinguishing_values] - - combined_dependent_values = self.combine_dependent_values(dependent_values) - - return combined_dependent_values - -############################################################################### - -args = process_program_arguments() - -if len(args.dependent_variables) == 0: - args.dependent_variables = [ - "STL Average Walltime,STL Walltime Uncertainty,STL Trials", - "STL Average Throughput,STL Throughput Uncertainty,STL Trials", - "Thrust Average Walltime,Thrust Walltime Uncertainty,Thrust Trials", - "Thrust Average Throughput,Thrust Throughput Uncertainty,Thrust Trials" - ] - -# Parse dependent variable options. -dependent_variables = [] - -parse_dependent_variable = dependent_variable_parser() - -#if args.dependent_variables is not None: -for var in args.dependent_variables: - dependent_variables.append(parse_dependent_variable(var)) - -# Read input files and open the output file. -with io_manager(args.baseline_input_file, - args.observed_input_file, - args.output_file, - args.preserve_whitespace) as iom: - - # Create record aggregators. - baseline_ra = record_aggregator(dependent_variables, args.control_variables) - observed_ra = record_aggregator(dependent_variables, args.control_variables) - - # Duplicate dependent variables: one for baseline results, one for observed - # results. - baseline_suffix = " - `{0}`".format( - args.baseline_input_file - ) - observed_suffix = " - `{0}`".format( - args.observed_input_file - ) - - for var in dependent_variables: - # Remove the existing quantity variable: - # - # [ ..., a, b, c, ... ] - # ^- remove b at index i - # - (quantity_idx, quantity_units) = iom.remove_variable(var.quantity) - - # If the `--output-all-variables` option was specified, add the new baseline - # and observed quantity variables. Note that we insert in the reverse of - # the order we desire (which is baseline then observed): - # - # [ ..., a, b_1, c, ... ] - # ^- insert b_1 at index i - # - # [ ..., a, b_0, b_1, c, ... ] - # ^- insert b_0 at index i - # - if args.output_all_variables: - iom.insert_variable( - quantity_idx, var.quantity + observed_suffix, quantity_units - ) - iom.insert_variable( - quantity_idx, var.quantity + baseline_suffix, quantity_units - ) - - # Remove the existing uncertainty variable. - (uncertainty_idx, uncertainty_units) = iom.remove_variable(var.uncertainty) - - # If the `--output-all-variables` option was specified, add the new baseline - # and observed uncertainty variables. - if args.output_all_variables: - iom.insert_variable( - uncertainty_idx, var.uncertainty + observed_suffix, uncertainty_units - ) - iom.insert_variable( - uncertainty_idx, var.uncertainty + baseline_suffix, uncertainty_units - ) - - try: - # Remove the existing sample size variable. - (sample_size_idx, sample_size_units) = iom.remove_variable(var.sample_size) - - # If the `--output-all-variables` option was specified, add the new - # baseline and observed sample size variables. - if args.output_all_variables: - iom.insert_variable( - sample_size_idx, var.sample_size + observed_suffix, sample_size_units - ) - iom.insert_variable( - sample_size_idx, var.sample_size + baseline_suffix, sample_size_units - ) - except ValueError: - # This is alright, because dependent variables may share the same sample - # size variable. - pass - - for var in args.control_variables: - iom.remove_variable(var) - - # Add change variables. - absolute_change_suffix = " - Change (`{0}` - `{1}`)".format( - args.observed_input_file, args.baseline_input_file - ) - - percent_change_suffix = " - % Change (`{0}` to `{1}`)".format( - args.observed_input_file, args.baseline_input_file - ) - - for var in dependent_variables: - iom.append_variable(var.quantity + absolute_change_suffix, var.units) - iom.append_variable(var.uncertainty + absolute_change_suffix, var.units) - iom.append_variable(var.quantity + percent_change_suffix, "") - iom.append_variable(var.uncertainty + percent_change_suffix, "") - - # Add all baseline input data to the `record_aggregator`. - for record in iom.baseline(): - baseline_ra.append(record) - - for record in iom.observed(): - observed_ra.append(record) - - iom.write_header() - - # Compare and output results. - for distinguishing_values, observed_dependent_values in observed_ra: - try: - baseline_dependent_values = baseline_ra[distinguishing_values] - except KeyError: - assert False, \ - "Distinguishing value `" + \ - str(baseline_ra.key_from_dict(distinguishing_values)) + \ - "` was not found in the baseline results." - - statistically_significant_change = False - - record = distinguishing_values.copy() - - # Compute changes, add the values and changes to the record, and identify - # changes that are statistically significant. - for var in dependent_variables: - # Compute changes. - baseline_quantity = baseline_dependent_values[var.quantity] - baseline_uncertainty = baseline_dependent_values[var.uncertainty] - baseline_sample_size = baseline_dependent_values[var.sample_size] - - observed_quantity = observed_dependent_values[var.quantity] - observed_uncertainty = observed_dependent_values[var.uncertainty] - observed_sample_size = observed_dependent_values[var.sample_size] - - (abs_change, abs_change_unc, per_change, per_change_unc) = \ - percent_change_uncertainty( - baseline_quantity, baseline_uncertainty, - observed_quantity, observed_uncertainty - ) - - # Round the change quantities and uncertainties to the significant digit - # of uncertainty. - try: - abs_change_sigdig = max( - find_significant_digit(abs_change), - find_significant_digit(abs_change_unc), - ) - -# abs_change = round_with_int_conversion( -# abs_change, abs_change_sigdig -# ) -# abs_change_unc = round_with_int_conversion( -# abs_change_unc, abs_change_sigdig -# ) - except: - # Any value errors should be due to NaNs returned by - # `percent_change_uncertainty` because quantities or change in - # quantities was 0. We can ignore these. - pass - - try: - per_change_sigdig = max( - find_significant_digit(per_change), - find_significant_digit(per_change_unc) - ) - -# per_change = round_with_int_conversion( -# per_change, per_change_sigdig -# ) -# per_change_unc = round_with_int_conversion( -# per_change_unc, per_change_sigdig -# ) - except: - # Any value errors should be due to NaNs returned by - # `percent_change_uncertainty` because quantities or change in - # quantities was 0. We can ignore these. - pass - - # Add the values (if the `--output-all-variables` option was specified) - # and the changes to the record. Note that the record's schema is - # different from the original schema. If multiple dependent variables - # share the same sample size variable, it's fine - they will overwrite - # each other, but with the same value. - if args.output_all_variables: - record[var.quantity + baseline_suffix] = baseline_quantity - record[var.uncertainty + baseline_suffix] = baseline_uncertainty - record[var.sample_size + baseline_suffix] = baseline_sample_size - record[var.quantity + observed_suffix] = observed_quantity - record[var.uncertainty + observed_suffix] = observed_uncertainty - record[var.sample_size + observed_suffix] = observed_sample_size - - record[var.quantity + absolute_change_suffix] = abs_change - record[var.uncertainty + absolute_change_suffix] = abs_change_unc - record[var.quantity + percent_change_suffix] = per_change - record[var.uncertainty + percent_change_suffix] = per_change_unc - - # If the range of uncertainties overlap don't overlap and the percentage - # change is greater than the change threshold, then change is - # statistically significant. - overlap = ranges_overlap_uncertainty( - baseline_quantity, baseline_uncertainty, - observed_quantity, observed_uncertainty - ) - if not overlap and per_change >= args.change_threshold: - statistically_significant_change = True - - # Print the record if a statistically significant change was found or if the - # `--output-all-datapoints` option was specified. - if args.output_all_datapoints or statistically_significant_change: - iom.write(record) - diff --git a/internal/benchmark/random.h b/internal/benchmark/random.h deleted file mode 100644 index 719588771..000000000 --- a/internal/benchmark/random.h +++ /dev/null @@ -1,100 +0,0 @@ -#pragma once - -#include -#include - -struct hash32 -{ - __host__ __device__ - unsigned int operator()(unsigned int h) const - { - h = ~h + (h << 15); - h = h ^ (h >> 12); - h = h + (h << 2); - h = h ^ (h >> 4); - h = h + (h << 3) + (h << 11); - h = h ^ (h >> 16); - return h; - } -}; - -struct hash64 -{ - __host__ __device__ - unsigned long long operator()(unsigned long long h) const - { - h = ~h + (h << 21); - h = h ^ (h >> 24); - h = (h + (h << 3)) + (h << 8); - h = h ^ (h >> 14); - h = (h + (h << 2)) + (h << 4); - h = h ^ (h >> 28); - h = h + (h << 31); - return h; - } -}; - -struct hashtofloat -{ - __host__ __device__ - float operator()(unsigned int h) const - { - return static_cast(hash32()(h)) / 4294967296.0f; - } -}; - -struct hashtodouble -{ - __host__ __device__ - double operator()(unsigned long long h) const - { - return static_cast(hash64()(h)) / 18446744073709551616.0; - } -}; - - - -template -void _randomize(Vector& v, T) -{ - thrust::transform(thrust::counting_iterator(0), - thrust::counting_iterator(0) + v.size(), - v.begin(), - hash32()); -} - -template -void _randomize(Vector& v, long long) -{ - thrust::transform(thrust::counting_iterator(0), - thrust::counting_iterator(0) + v.size(), - v.begin(), - hash64()); -} - -template -void _randomize(Vector& v, float) -{ - thrust::transform(thrust::counting_iterator(0), - thrust::counting_iterator(0) + v.size(), - v.begin(), - hashtofloat()); -} - -template -void _randomize(Vector& v, double) -{ - thrust::transform(thrust::counting_iterator(0), - thrust::counting_iterator(0) + v.size(), - v.begin(), - hashtodouble()); -} - -// fill Vector with random values -template -void randomize(Vector& v) -{ - _randomize(v, typename Vector::value_type()); -} - - diff --git a/internal/benchmark/tbb_algos.h b/internal/benchmark/tbb_algos.h deleted file mode 100644 index a50a1cd2f..000000000 --- a/internal/benchmark/tbb_algos.h +++ /dev/null @@ -1,195 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include // For std::size_t. - -#include - -template -struct NegateBody -{ - void operator()(T& x) const - { - x = -x; - } -}; - -template -struct ForBody -{ - typedef typename Vector::value_type T; - -private: - Vector& v; - -public: - ForBody(Vector& x) : v(x) {} - - void operator()(tbb::blocked_range const& r) const - { - for (std::size_t i = r.begin(); i != r.end(); ++i) - v[i] = -v[i]; - } -}; - -template -struct ReduceBody -{ - typedef typename Vector::value_type T; - -private: - Vector& v; - -public: - T sum; - - ReduceBody(Vector& x) : v(x), sum(0) {} - - ReduceBody(ReduceBody& x, tbb::split) : v(x.v), sum(0) {} - - void operator()(tbb::blocked_range const& r) - { - for (std::size_t i = r.begin(); i != r.end(); ++i) - sum += v[i]; - } - - void join(ReduceBody const& x) { sum += x.sum; } -}; - -template -struct ScanBody -{ - typedef typename Vector::value_type T; - -private: - Vector& v; - -public: - T sum; - - ScanBody(Vector& x) : sum(0), v(x) {} - - ScanBody(ScanBody& x, tbb::split) : v(x.v), sum(0) {} - - template - void operator()(tbb::blocked_range const& r, Tag) - { - T temp = sum; - for (std::size_t i = r.begin(); i < r.end(); ++i) - { - temp = temp + x[i]; - if (Tag::is_final_scan()) - x[i] = temp; - } - sum = temp; - } - - void assign(ScanBody const& x) { sum = x.sum; } - - T get_sum() const { return sum; } - - void reverse_join(ScanBody const& x) { sum = x.sum + sum;} -}; - -template -struct CopyBody -{ - typedef typename Vector::value_type T; - -private: - Vector &v; - Vector &u; - -public: - CopyBody(Vector& x, Vector& y) : v(x), u(y) {} - - void operator()(tbb::blocked_range const& r) const - { - for (std::size_t i = r.begin(); i != r.end(); ++i) - v[i] = u[i]; - } -}; - -template -typename Vector::value_type tbb_reduce(Vector& v) -{ - ReduceBody body(v); - tbb::parallel_reduce(tbb::blocked_range(0, v.size()), body); - return body.sum; -} - -template -void tbb_sort(Vector& v) -{ - tbb::parallel_sort(v.begin(), v.end()); -} - -template -void tbb_transform(Vector& v) -{ - ForBody body(v); - tbb::parallel_for(tbb::blocked_range(0, v.size()), body); -} - -template -void tbb_scan(Vector& v) -{ - ScanBody body(v); - tbb::parallel_scan(tbb::blocked_range(0, v.size()), body); -} - -template -void tbb_copy(Vector& v, Vector& u) -{ - CopyBody body(v, u); - tbb::parallel_for(tbb::blocked_range(0, v.size()), body); -} - -void test_tbb() -{ - std::size_t elements = 1 << 20; - - std::vector A(elements); - std::vector B(elements); - std::vector C(elements); - std::vector D(elements); - - randomize(A); - randomize(B); - assert(std::accumulate(A.begin(), A.end(), 0) == tbb_reduce(A)); - - randomize(A); - randomize(B); - std::transform(A.begin(), A.end(), A.begin(), thrust::negate()); - tbb_transform(B); - assert(A == B); - - randomize(A); - randomize(B); - std::partial_sum(A.begin(), A.end(), A.begin()); - tbb_scan(B); - assert(A == B); - - randomize(A); - randomize(B); - std::sort(A.begin(), A.end()); - tbb_sort(B); - assert(A == B); - - randomize(A); - randomize(B); - randomize(C); - randomize(D); - std::copy(A.begin(), A.end(), C.begin()); - tbb_copy(B, D); - assert(A == B); - assert(C == D); -} - diff --git a/internal/benchmark/timer.h b/internal/benchmark/timer.h deleted file mode 100644 index cd0128e67..000000000 --- a/internal/benchmark/timer.h +++ /dev/null @@ -1,82 +0,0 @@ -#pragma once - -#include - -#include - -#if(THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC || defined(_WIN32)) -#include - -class steady_timer -{ - LARGE_INTEGER start_; - LARGE_INTEGER stop_; - LARGE_INTEGER frequency_; // Cached to avoid system calls. - - public: - steady_timer() : start_(), stop_(), frequency_() - { - BOOL const r = QueryPerformanceFrequency(&frequency_); - assert(0 != r); - (void)r; // Silence unused variable 'r' in Release builds, when - // the assertion evaporates. - } - - void start() - { - BOOL const r = QueryPerformanceCounter(&start_); - assert(0 != r); - (void)r; // Silence unused variable 'r' in Release builds, when - // the assertion evaporates. - } - - - void stop() - { - BOOL const r = QueryPerformanceCounter(&stop_); - assert(0 != r); - (void)r; // Silence unused variable 'r' in Release builds, when - // the assertion evaporates. - } - - double seconds_elapsed() - { - return double(stop_.QuadPart - start_.QuadPart) - / double(frequency_.QuadPart); - } -}; - -#else -#include - -class steady_timer -{ - timespec start_; - timespec stop_; - - public: - steady_timer() : start_(), stop_() {} - - void start() - { - int const r = clock_gettime(CLOCK_MONOTONIC, &start_); - assert(0 == r); - (void)r; // Silence unused variable 'r' in Release builds, when - // the assertion evaporates. - } - - void stop() - { - int const r = clock_gettime(CLOCK_MONOTONIC, &stop_); - assert(0 == r); - (void)r; // Silence unused variable 'r' in Release builds, when - // the assertion evaporates. - } - - double seconds_elapsed() - { - return double(stop_.tv_sec - start_.tv_sec) - + double(stop_.tv_nsec - start_.tv_nsec) * 1.0e-9; - } -}; -#endif diff --git a/rmake.py b/rmake.py index 2bb8834a1..0d1dff5fd 100644 --- a/rmake.py +++ b/rmake.py @@ -1,5 +1,5 @@ #!/usr/bin/python3 -"""Copyright 2020-2023 Advanced Micro Devices, Inc. +"""Copyright 2020-2025 Advanced Micro Devices, Inc. Manage build and installation""" import re diff --git a/rtest.py b/rtest.py index d35b89e3b..228fe559b 100644 --- a/rtest.py +++ b/rtest.py @@ -1,5 +1,5 @@ #!/usr/bin/python3 -"""Copyright 2021 Advanced Micro Devices, Inc. +"""Copyright 2021-2025 Advanced Micro Devices, Inc. Run tests on build""" import re diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ec5145908..62ee5974f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2019-2024 Advanced Micro Devices, Inc. +# Copyright 2019-2025 Advanced Micro Devices, Inc. # ######################################################################## set(INSTALL_TEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/install_CTestTestfile.cmake") @@ -55,6 +55,7 @@ function(add_rocthrust_test TEST) set(TEST_TARGET "${TEST}.hip") add_executable(${TEST_TARGET} ${TEST_SOURCE}) + target_compile_options(${TEST_TARGET} PRIVATE ${COMPILE_OPTIONS}) target_include_directories(${TEST_TARGET} SYSTEM BEFORE PUBLIC $ diff --git a/test/bitwise_repro/bwr_utils.hpp b/test/bitwise_repro/bwr_utils.hpp index fbfe288aa..fc414db27 100644 --- a/test/bitwise_repro/bwr_utils.hpp +++ b/test/bitwise_repro/bwr_utils.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ #ifndef BWR_UTILS_HPP #define BWR_UTILS_HPP -#include "../thrust/include/rocthrust_version.hpp" +#include #include #include @@ -502,4 +502,3 @@ class TokenHelper } // end namespace bwr_utils #endif // BRW_UTILS_HPP - diff --git a/test/hipstdpar/CMakeLists.txt b/test/hipstdpar/CMakeLists.txt index fa9479f0f..f02bc3edf 100644 --- a/test/hipstdpar/CMakeLists.txt +++ b/test/hipstdpar/CMakeLists.txt @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright 2024 Advanced Micro Devices, Inc. +# Copyright 2024-2025 Advanced Micro Devices, Inc. # ######################################################################## function(add_hipstdpar_test TEST TEST_TYPE INTERPOSE_ALLOC) @@ -10,12 +10,17 @@ function(add_hipstdpar_test TEST TEST_TYPE INTERPOSE_ALLOC) set(TEST_TARGET "${TEST}_${TEST_TYPE}.hip") add_executable(${TEST_TARGET} ${TEST_SOURCE}) + target_compile_options(${TEST_TARGET} PRIVATE ${COMPILE_OPTIONS}) target_compile_options(${TEST_TARGET} PRIVATE --hipstdpar --hipstdpar-path=${HIPSTDPAR_LOCATION} --hipstdpar-thrust-path=${THRUST_LOCATION} - --hipstdpar-prim-path=${ROCPRIM_LOCATION}) + --hipstdpar-prim-path=${ROCPRIM_LOCATION} + # --hipstdpar-thrust-path is not enough because thrust/rocthrust_version.hpp is + # generated in the binary directory + -idirafter ${THRUST_BINARY_LOCATION} + ) if(INTERPOSE_ALLOC) target_compile_options(${TEST_TARGET} PRIVATE @@ -88,8 +93,10 @@ set(CMAKE_CXX_STANDARD 17) # Dependencies find_package(Threads REQUIRED) -# Define where to find rocThrust, hipstdpar and rocPRIM headers +# Define where to find rocThrust, rocPRIM and hipstdpar headers set(THRUST_LOCATION ${PROJECT_SOURCE_DIR}) +set(THRUST_BINARY_LOCATION ${PROJECT_BINARY_DIR}/thrust/include) +set(ROCPRIM_LOCATION ${rocprim_INCLUDE_DIR}) set(HIPSTDPAR_LOCATION ${THRUST_LOCATION}/thrust/system/hip/hipstdpar) set(ROCPRIM_LOCATION ${ROCPRIM_INCLUDE_DIR}) diff --git a/test/test_adjacent_difference.cpp b/test/test_adjacent_difference.cpp index d8f6f6466..351ff7c00 100644 --- a/test/test_adjacent_difference.cpp +++ b/test/test_adjacent_difference.cpp @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2025 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,7 +79,7 @@ TYPED_TEST(AdjacentDifferenceVariableTests, TestAdjacentDifference) SCOPED_TRACE(testing::Message() << "with seed= " << seed); thrust::host_vector h_input = get_random_data( - size, std::numeric_limits::min(), std::numeric_limits::max(), seed); + size, get_default_limits::min(), get_default_limits::max(), seed); thrust::device_vector d_input = h_input; thrust::host_vector h_output(size); @@ -135,7 +135,7 @@ TYPED_TEST(AdjacentDifferenceVariableTests, TestAdjacentDifferenceInPlaceWithRel SCOPED_TRACE(testing::Message() << "with seed= " << seed); thrust::host_vector h_input = get_random_data( - size, std::numeric_limits::min(), std::numeric_limits::max(), seed); + size, get_default_limits::min(), get_default_limits::max(), seed); thrust::device_vector d_input = h_input; thrust::host_vector h_output(size); @@ -178,7 +178,7 @@ TYPED_TEST(AdjacentDifferenceVariableTests, TestAdjacentDifferenceDiscardIterato SCOPED_TRACE(testing::Message() << "with seed= " << seed); thrust::host_vector h_input = get_random_data( - size, std::numeric_limits::min(), std::numeric_limits::max(), seed); + size, get_default_limits::min(), get_default_limits::max(), seed); thrust::device_vector d_input = h_input; thrust::discard_iterator<> h_result; diff --git a/test/test_advance.cpp b/test/test_advance.cpp index 576eb04fa..b9b53aab5 100644 --- a/test/test_advance.cpp +++ b/test/test_advance.cpp @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2025 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ TYPED_TEST(AdvanceVectorTests, TestAdvance) using Vector = typename TestFixture::input_type; using T = typename Vector::value_type; - typedef typename Vector::iterator Iterator; + using Iterator = typename Vector::iterator; SCOPED_TRACE(testing::Message() << "with device_id= " << test::set_device_from_ctest()); @@ -54,7 +54,7 @@ TYPED_TEST(AdvanceVectorTests, TestNext) using Vector = typename TestFixture::input_type; using T = typename Vector::value_type; - typedef typename Vector::iterator Iterator; + using Iterator = typename Vector::iterator; Vector v(10); thrust::sequence(v.begin(), v.end()); @@ -85,7 +85,7 @@ TYPED_TEST(AdvanceVectorTests, TestPrev) using Vector = typename TestFixture::input_type; using T = typename Vector::value_type; - typedef typename Vector::iterator Iterator; + using Iterator = typename Vector::iterator; Vector v(10); thrust::sequence(v.begin(), v.end()); diff --git a/test/test_allocator.cpp b/test/test_allocator.cpp index f0ba34647..0ee93a6c2 100644 --- a/test/test_allocator.cpp +++ b/test/test_allocator.cpp @@ -1,6 +1,6 @@ /* * Copyright 2008-2018 NVIDIA Corporation - * Modifications Copyright© 2019-2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2025 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -117,8 +117,7 @@ struct my_allocator_with_custom_destroy return !(*this == other); } - typedef thrust::detail::true_type is_always_equal; - + using is_always_equal = thrust::detail::true_type; // use composition rather than inheritance @@ -141,12 +140,12 @@ TEST(AllocatorTests, TestAllocatorCustomDestroy) struct my_minimal_allocator { - typedef int value_type; + using value_type = int; // XXX ideally, we shouldn't require // these two typedefs - typedef int& reference; - typedef const int& const_reference; + using reference = int&; + using const_reference = const int&; __host__ __device__ my_minimal_allocator() {} diff --git a/test/test_allocator_aware_policies.cpp b/test/test_allocator_aware_policies.cpp index 5d62226f0..644420168 100644 --- a/test/test_allocator_aware_policies.cpp +++ b/test/test_allocator_aware_policies.cpp @@ -1,6 +1,6 @@ /* * Copyright 2008-2018 NVIDIA Corporation - * Modifications Copyright© 2019-2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2025 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,12 +47,12 @@ struct test_memory_resource_t final : thrust::mr::memory_resource<> template class CRTPBase> struct policy_info { - typedef Policy policy; + using policy = Policy; template