From d9dfbb40b79b0cab944a7031bf2b0feab142104a Mon Sep 17 00:00:00 2001 From: jjwilke Date: Wed, 29 Apr 2020 19:37:24 -0700 Subject: [PATCH] add spack nvcc_wrapper --- CMakeLists.txt | 17 ++ README.md | 13 +- nvcc_wrapper.in | 477 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 505 insertions(+), 2 deletions(-) create mode 100644 CMakeLists.txt create mode 100755 nvcc_wrapper.in diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b1fe276 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,17 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.8 FATAL_ERROR) +PROJECT(KokkosNVCCWrapper CXX CUDA) +SET(KokkosNVCCWrapper_VERSION_MAJOR 2) +SET(KokkosNVCCWrapper_VERSION_MINOR 7) +SET(KokkosNVCCWrapper_VERSION_PATCH 4) + +IF (CMAKE_CUDA_HOST_COMPILER) +SET(KOKKOS_CXX ${CMAKE_CUDA_HOST_COMPILER}) +ELSE() +SET(KOKKOS_CXX ${CMAKE_CXX_COMPILER}) +ENDIF() + +CONFIGURE_FILE(nvcc_wrapper.in nvcc_wrapper @ONLY) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/nvcc_wrapper + DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE) + diff --git a/README.md b/README.md index c4646b0..b6a2b02 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ -# THIS REPOSITORY HAS BEEN DEPRECATED +# Kokkos NVCC Wrapper + +This wraps an nvcc, allowing it to be treated as a real C++ compiler with all the usual flags. +Simple run `cmake` as usual to configure the compiler. The only two flags that need to be set are: +```` +-DCMAKE_CXX_COMPILER= +-DCMAKE_CUDA_COMPILER= +```` +so that the wrapper knows which underlying `nvcc` and which underlying `g++` to use. +This is configured as both a CXX and CUDA project with CMake, which means a valid +CUDA installation must be available. -Use `bin/nvcc_wrapper` from [kokkos/kokkos](https://github.com/kokkos/kokkos) instead diff --git a/nvcc_wrapper.in b/nvcc_wrapper.in new file mode 100755 index 0000000..275ffdc --- /dev/null +++ b/nvcc_wrapper.in @@ -0,0 +1,477 @@ +#!/bin/bash +# +# This shell script (nvcc_wrapper) wraps both the host compiler and +# NVCC, if you are building legacy C or C++ code with CUDA enabled. +# The script remedies some differences between the interface of NVCC +# and that of the host compiler, in particular for linking. +# It also means that a legacy code doesn't need separate .cu files; +# it can just use .cpp files. +# +# Default settings: change those according to your machine. For +# example, you may have have two different wrappers with either icpc +# or g++ as their back-end compiler. The defaults can be overwritten +# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). + +default_arch="sm_35" +#default_arch="sm_50" + +# +# The default C++ compiler. +# +if [[ -z "${KOKKOS_CXX}" ]]; then + host_compiler=@KOKKOS_CXX@ +else + host_compiler=${KOKKOS_CXX} +fi + +#host_compiler="icpc" +#host_compiler="/usr/local/gcc/4.8.3/bin/g++" +#host_compiler="/usr/local/gcc/4.9.1/bin/g++" + +# +# Internal variables +# + +# C++ files +cpp_files="" + +# Host compiler arguments +xcompiler_args="" + +# Cuda (NVCC) only arguments +cuda_args="" + +# Arguments for both NVCC and Host compiler +shared_args="" + +# Argument -c +compile_arg="" + +# Argument -o +output_arg="" + +# Linker arguments +xlinker_args="" + +# Object files passable to NVCC +object_files="" + +# Link objects for the host linker only +object_files_xlinker="" + +# Shared libraries with version numbers are not handled correctly by NVCC +shared_versioned_libraries_host="" +shared_versioned_libraries="" + +# Does the User set the architecture +arch_set=0 + +# Does the user overwrite the host compiler +ccbin_set=0 + +#Error code of compilation +error_code=0 + +# Do a dry run without actually compiling +dry_run=0 + +# Skip NVCC compilation and use host compiler directly +host_only=0 +host_only_args="" + +# Just run version on host compiler +get_host_version=0 + +# Enable workaround for CUDA 6.5 for pragma ident +replace_pragma_ident=0 + +# Mark first host compiler argument +first_xcompiler_arg=1 + +temp_dir=${TMPDIR:-/tmp} + +# optimization flag added as a command-line argument +optimization_flag="" + +# std standard flag added as a command-line argument +std_flag="" + +# Run nvcc a second time to generate dependencies if needed +depfile_separate=0 +depfile_output_arg="" +depfile_target_arg="" + +# Option to remove duplicate libraries and object files +remove_duplicate_link_files=0 + +function warn_std_flag() { + echo "nvcc_wrapper - *warning* you have set multiple standard flags (-std=c++1* or --std=c++1*), only the last is used because nvcc can only accept a single std setting" +} + +#echo "Arguments: $# $@" + +while [ $# -gt 0 ] +do + case $1 in + #show the executed command + --show|--nvcc-wrapper-show) + dry_run=1 + ;; + #run host compilation only + --host-only) + host_only=1 + ;; + #get the host version only + --host-version) + get_host_version=1 + ;; + #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros + --replace-pragma-ident) + replace_pragma_ident=1 + ;; + #remove duplicate link files + --remove-duplicate-link-files) + remove_duplicate_link_files=1 + ;; + #handle source files to be compiled as cuda files + *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) + cpp_files="$cpp_files $1" + ;; + # Ensure we only have one optimization flag because NVCC doesn't allow muliple + -O*) + if [ -n "$optimization_flag" ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the last is used because nvcc can only accept a single optimization setting." + shared_args=${shared_args/ $optimization_flag/} + fi + if [ "$1" = "-O" ]; then + optimization_flag="-O2" + else + optimization_flag=$1 + fi + shared_args="$shared_args $optimization_flag" + ;; + #Handle shared args (valid for both nvcc and the host compiler) + -D*) + unescape_commas=`echo "$1" | sed -e 's/\\\,/,/g'` + arg=`printf "%q" $unescape_commas` + shared_args="$shared_args $arg" + ;; + -I*|-L*|-l*|-g|--help|--version|-E|-M|-shared|-w) + shared_args="$shared_args $1" + ;; + #Handle compilation argument + -c) + compile_arg="$1" + ;; + #Handle output argument + -o) + output_arg="$output_arg $1 $2" + shift + ;; + # Handle depfile arguments. We map them to a separate call to nvcc. + -MD|-MMD) + depfile_separate=1 + host_only_args="$host_only_args $1" + ;; + -MF) + depfile_output_arg="-o $2" + host_only_args="$host_only_args $1 $2" + shift + ;; + -MT) + depfile_target_arg="$1 $2" + host_only_args="$host_only_args $1 $2" + shift + ;; + #Handle known nvcc args + --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*|--fmad*) + cuda_args="$cuda_args $1" + ;; + #Handle more known nvcc args + --expt-extended-lambda|--expt-relaxed-constexpr) + cuda_args="$cuda_args $1" + ;; + #Handle known nvcc args that have an argument + -rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad) + cuda_args="$cuda_args $1 $2" + shift + ;; + -rdc=*|-maxrregcount*|--maxrregcount*) + cuda_args="$cuda_args $1" + ;; + #Handle unsupported standard flags + --std=c++1y|-std=c++1y|--std=c++1z|-std=c++1z|--std=gnu++1y|-std=gnu++1y|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a|--std=c++17|-std=c++17) + fallback_std_flag="-std=c++14" + # this is hopefully just occurring in a downstream project during CMake feature tests + # we really have no choice here but to accept the flag and change to an accepted C++ standard + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + std_flag=$fallback_std_flag + shared_args="$shared_args $std_flag" + ;; + -std=gnu*) + corrected_std_flag=${1/gnu/c} + echo "nvcc_wrapper has been given GNU extension standard flag $1 - reverting flag to $corrected_std_flag" + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + std_flag=$corrected_std_flag + shared_args="$shared_args $std_flag" + ;; + --std=c++11|-std=c++11|--std=c++14|-std=c++14) + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + std_flag=$1 + shared_args="$shared_args $std_flag" + ;; + + #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 + -std=c++98|--std=c++98) + ;; + #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor + -pedantic|-Wpedantic|-ansi) + ;; + #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" + -Woverloaded-virtual) + ;; + #strip -Xcompiler because we add it + -Xcompiler) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$2" + fi + shift + ;; + #strip of "-x cu" because we add that + -x) + if [[ $2 != "cu" ]]; then + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,$2" + fi + fi + shift + ;; + #Handle -+ (same as -x c++, specifically used for xl compilers, but mutually exclusive with -x. So replace it with -x c++) + -+) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,c++" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,c++" + fi + ;; + #Handle -ccbin (if its not set we can set it to a default value) + -ccbin) + cuda_args="$cuda_args $1 $2" + ccbin_set=1 + host_compiler=$2 + shift + ;; + + #Handle -arch argument (if its not set use a default) this is the version with = sign + -arch*|-gencode*) + cuda_args="$cuda_args $1" + arch_set=1 + ;; + #Handle -code argument (if its not set use a default) this is the version with = sign + -code*) + cuda_args="$cuda_args $1" + ;; + #Handle -arch argument (if its not set use a default) this is the version without = sign + -arch|-gencode) + cuda_args="$cuda_args $1 $2" + arch_set=1 + shift + ;; + #Handle -code argument (if its not set use a default) this is the version without = sign + -code) + cuda_args="$cuda_args $1 $2" + shift + ;; + #Handle -Xcudafe argument + -Xcudafe) + cuda_args="$cuda_args -Xcudafe $2" + shift + ;; + #Handle -Xlinker argument + -Xlinker) + xlinker_args="$xlinker_args -Xlinker $2" + shift + ;; + #Handle args that should be sent to the linker + -Wl,*) + xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" + host_linker_args="$host_linker_args ${1:4:${#1}}" + ;; + #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking + *.a|*.so|*.o|*.obj) + object_files="$object_files $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking + @*|*.dylib) + object_files="$object_files -Xlinker $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle shared libraries with *.so.* names which nvcc can't do. + *.so.*) + shared_versioned_libraries_host="$shared_versioned_libraries_host $1" + shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" + ;; + #All other args are sent to the host compiler + *) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args=$1 + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$1" + fi + ;; + esac + + shift +done + +# Only print host compiler version +if [ $get_host_version -eq 1 ]; then + $host_compiler --version + exit +fi + +#Remove duplicate object files +if [ $remove_duplicate_link_files -eq 1 ]; then +for obj in $object_files +do + object_files_reverse="$obj $object_files_reverse" +done + +object_files_reverse_clean="" +for obj in $object_files_reverse +do + exists=false + for obj2 in $object_files_reverse_clean + do + if [ "$obj" == "$obj2" ] + then + exists=true + echo "Exists: $obj" + fi + done + if [ "$exists" == "false" ] + then + object_files_reverse_clean="$object_files_reverse_clean $obj" + fi +done + +object_files="" +for obj in $object_files_reverse_clean +do + object_files="$obj $object_files" +done +fi + +#Add default host compiler if necessary +if [ $ccbin_set -ne 1 ]; then + cuda_args="$cuda_args -ccbin $host_compiler" +fi + +#Add architecture command +if [ $arch_set -ne 1 ]; then + cuda_args="$cuda_args -arch=$default_arch" +fi + +#Compose compilation command +nvcc_command="@CMAKE_CUDA_COMPILER@ $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" +if [ $first_xcompiler_arg -eq 0 ]; then + nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" +fi + +#Replace all commas in xcompiler_args with a space for the host only command +xcompiler_args=${xcompiler_args//,/" "} + +#Compose host only command +host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host" + +#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' +if [ $replace_pragma_ident -eq 1 ]; then + cpp_files2="" + for file in $cpp_files + do + var=`grep pragma ${file} | grep ident | grep "#"` + if [ "${#var}" -gt 0 ] + then + sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file + cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" + else + cpp_files2="$cpp_files2 $file" + fi + done + cpp_files=$cpp_files2 + #echo $cpp_files +fi + +if [ "$cpp_files" ]; then + nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" +else + nvcc_command="$nvcc_command $object_files" +fi + +if [ "$cpp_files" ]; then + host_command="$host_command $object_files $cpp_files" +else + host_command="$host_command $object_files" +fi + +if [ $depfile_separate -eq 1 ]; then + # run nvcc a second time to generate dependencies (without compiling) + nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg" +else + nvcc_depfile_command="" +fi + +nvcc_command="$nvcc_command $compile_arg $output_arg" + +#Print command for dryrun +if [ $dry_run -eq 1 ]; then + if [ $host_only -eq 1 ]; then + echo $host_command + elif [ -n "$nvcc_depfile_command" ]; then + echo $nvcc_command "&&" $nvcc_depfile_command + else + echo $nvcc_command + fi + exit 0 +fi + +#Run compilation command +if [ $host_only -eq 1 ]; then + if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then + echo "$host_command" + fi + $host_command +elif [ -n "$nvcc_depfile_command" ]; then + if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then + echo "$nvcc_command && $nvcc_depfile_command" + fi + $nvcc_command && $nvcc_depfile_command +else + if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then + echo "$nvcc_command" + fi + $nvcc_command +fi +error_code=$? + +#Report error code +exit $error_code