Skip to content

Commit

Permalink
Merge Pull Request trilinos#7151 from bartlettroscoe/Trilinos/cdofa-1…
Browse files Browse the repository at this point in the history
…05-atdm-van1-tx2

Automatically Merged using Trilinos Pull Request AutoTester
PR Title: ATDM: Add new 'van-tx2' env to support ASTRA systems
PR Author: bartlettroscoe
  • Loading branch information
trilinos-autotester authored Apr 13, 2020
2 parents 977d337 + 02303b7 commit 2827cd5
Show file tree
Hide file tree
Showing 15 changed files with 410 additions and 7 deletions.
1 change: 1 addition & 0 deletions cmake/ctest/drivers/atdm/ctest-s-driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ echo " ==> `date`"
echo

echo "Loading env and running ctest -S comamnd to configure, build, and test ..."
echo

source ${WORKSPACE}/Trilinos/cmake/ctest/drivers/atdm/utils/setup_env.sh

Expand Down
2 changes: 2 additions & 0 deletions cmake/ctest/drivers/atdm/utils/setup_env.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
set +x

#
# A) Load the env
#
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

#export SALLOC_CTEST_TIME_LIMIT_MINUTES=1:00:00

if [ "${Trilinos_TRACK}" == "" ] ; then
export Trilinos_TRACK=Experimental
fi

$WORKSPACE/Trilinos/cmake/ctest/drivers/atdm/van1-tx2/local-driver.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

#export SALLOC_CTEST_TIME_LIMIT_MINUTES=1:00:00

if [ "${Trilinos_TRACK}" == "" ] ; then
export Trilinos_TRACK=Specialized
fi

$WORKSPACE/Trilinos/cmake/ctest/drivers/atdm/van1-tx2/local-driver.sh
51 changes: 51 additions & 0 deletions cmake/ctest/drivers/atdm/van1-tx2/local-driver-on-allocation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash
#
# This script is designed to run inside of an allocation which then runs the
# build and tests on the same allocated compute node. (See
# van1-tx2/local-drivers.sh and atdm/READM.md for details.)
#

set +x

echo
echo "======================================================================"
echo ""
echo " Running ${JOB_NAME}"
echo " in salloc allocation from node '$(hostname)'"
echo ""
echo "======================================================================"
echo

# Must use same site name for build and test results so they match up on
# CDash!
export CTEST_SITE=${ATDM_CONFIG_CDASH_HOSTNAME}

echo
echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
echo "***"
echo "*** Running start, update, configure and build on a compute node"
echo "***"
echo

set -x
env \
CTEST_DO_TEST=FALSE \
srun -N 1 \
$WORKSPACE/Trilinos/cmake/ctest/drivers/atdm/ctest-s-driver.sh
set +x

echo
echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
echo "***"
echo "*** Running tests on compute node launched from the login node"
echo "***"
echo

set -x
env \
CTEST_DO_NEW_START=FALSE \
CTEST_DO_UPDATES=FALSE \
CTEST_DO_CONFIGURE=FALSE \
CTEST_DO_BUILD=FALSE \
$WORKSPACE/Trilinos/cmake/ctest/drivers/atdm/ctest-s-driver.sh
set +x
32 changes: 32 additions & 0 deletions cmake/ctest/drivers/atdm/van1-tx2/local-driver.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash -l

set +x

if [[ "${SALLOC_CTEST_LIMIT_MINUTES}" == "" ]] ; then
SALLOC_CTEST_LIMIT_MINUTES=4:00:00
# From prior builds on 'stria', it looks like 4 hours should be plenty of
# time to do the build and tests as it has taken only about 2 1/2 hours to
# do everything.
fi

source $WORKSPACE/Trilinos/cmake/std/atdm/load-env.sh $JOB_NAME
echo

if [[ "${ATDM_CONFIG_WCID_ACCOUNT}" == "" ]] ; then
export ATDM_CONFIG_WCID_ACCOUNT=${ATDM_CONFIG_WCID_ACCOUNT_DEFAULT}
fi

set -x

salloc -N 1 --time=${SALLOC_CTEST_LIMIT_MINUTES} -p short,batch \
--account=${ATDM_CONFIG_WCID_ACCOUNT} \
$WORKSPACE/Trilinos/cmake/ctest/drivers/atdm/van1-tx2/local-driver-on-allocation.sh

set -x

# NOTE: Above, we get a single compute-node allocation using 'salloc' and then
# run the build and tests from inside of that.

# NOTE: We might need to switch from salloc to the more complex sbatch
# appraoch in the function atdm_run_script_on_compute_node. If we see random
# ODTE errors then that is what we will need to do.
6 changes: 6 additions & 0 deletions cmake/std/atdm/ATDMDevEnvSettings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ ATDM_SET_ATDM_VAR_FROM_ENV_AND_DEFAULT(FPIC OFF)
ATDM_SET_ATDM_VAR_FROM_ENV_AND_DEFAULT(COMPLEX OFF)

ATDM_SET_ATDM_VAR_FROM_ENV_AND_DEFAULT(Kokkos_ENABLE_SERIAL ON)
#ATDM_SET_ATDM_VAR_FROM_ENV_AND_DEFAULT(HDF5_NO_SYSTEM_PATHS OFF)

IF (ATDM_USE_PTHREADS)
MESSAGE(FATAL_ERROR "Error, the Kokkos Pthreads backend is no longer supported!"
Expand Down Expand Up @@ -273,6 +274,7 @@ ATDM_SET_CACHE(MPI_EXEC_POST_NUMPROCS_FLAGS "$ENV{ATDM_CONFIG_MPI_POST_FLAGS}"
CACHE STRING)
ATDM_SET_CACHE(Trilinos_VERBOSE_CONFIGURE OFF CACHE BOOL)
ATDM_SET_CACHE(Trilinos_ENABLE_EXPLICIT_INSTANTIATION ON CACHE BOOL)
#ATDM_SET_CACHE(HDF5_NO_SYSTEM_PATHS ${ATDM_HDF5_NO_SYSTEM_PATHS} CACHE BOOL)
ATDM_SET_CACHE(Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES ON CACHE BOOL)
ATDM_SET_CACHE(Trilinos_ENABLE_DEVELOPMENT_MODE OFF CACHE BOOL)
ATDM_SET_CACHE(Trilinos_ASSERT_MISSING_PACKAGES ON CACHE BOOL)
Expand All @@ -294,6 +296,7 @@ ATDM_SET_CACHE(EpetraExt_ENABLE_HDF5 OFF CACHE BOOL)
ATDM_SET_CACHE(Panzer_ENABLE_FADTYPE "Sacado::Fad::DFad<RealType>" CACHE STRING)
ATDM_SET_CACHE(Phalanx_KOKKOS_DEVICE_TYPE "${ATDM_NODE_TYPE}" CACHE STRING)
ATDM_SET_CACHE(Phalanx_SHOW_DEPRECATED_WARNINGS OFF CACHE BOOL)
ATDM_SET_CACHE(DAKOTA_ENABLE_TESTS OFF CACHE BOOL)
ATDM_SET_CACHE(Tpetra_INST_CUDA "${ATDM_USE_CUDA}" CACHE BOOL)
ATDM_SET_CACHE(Tpetra_INST_SERIAL "${ATDM_INST_SERIAL}" CACHE BOOL)
ATDM_SET_CACHE(Tpetra_INST_INT_INT OFF CACHE BOOL)
Expand Down Expand Up @@ -398,6 +401,9 @@ ATDM_SET_CACHE(TPL_LAPACK_LIBRARIES "$ENV{ATDM_CONFIG_LAPACK_LIBS}" CACHE FILEPA
ATDM_SET_ENABLE(TPL_ENABLE_CGNS ${ATDM_ENABLE_SPARC_SETTINGS})
ATDM_SET_CACHE(CGNS_INCLUDE_DIRS "$ENV{CGNS_ROOT}/include" CACHE FILEPATH)
ATDM_SET_CACHE(CGNS_LIBRARY_DIRS "$ENV{CGNS_ROOT}/lib" CACHE FILEPATH)
IF (NOT "$ENV{ATDM_CONFIG_CGNS_LIBRARY_NAMES}" STREQUAL "")
ATDM_SET_CACHE(CGNS_LIBRARY_NAMES "$ENV{ATDM_CONFIG_CGNS_LIBRARY_NAMES}" CACHE FILEPATH)
ENDIF()
ATDM_SET_CACHE(TPL_CGNS_LIBRARIES "$ENV{ATDM_CONFIG_CGNS_LIBS}" CACHE FILEPATH)

# HDF5
Expand Down
86 changes: 85 additions & 1 deletion cmake/std/atdm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ example, skip the configure, skip the build, skip running tests, etc.
* <a href="#cee-rhel6-and-rhel7-environment">CEE RHEL6 and RHEL7 Environment</a>
* <a href="#waterman">waterman</a>
* <a href="#ats-2">ATS-2</a>
* <a href="#astra-vanguard-arm-system">ASTRA (Vanguard ARM System)</a>
### ride/white
Expand Down Expand Up @@ -729,7 +730,7 @@ $ make NP=16
$ salloc -N1 --time=0:20:00 --account=<YOUR_WCID> ctest -j4
```
To get information on <YOUR_WCID> used above, there is a WC tool tab on
To get information on `<YOUR_WCID>` used above, there is a WC tool tab on
computing.sandia.gov
**NOTE:** Unlike some of the other machines, one must load the environment,
Expand Down Expand Up @@ -1162,6 +1163,89 @@ $ ./checkin-test-atdm.sh <buildname0> <buildname1> ... \
use caution when doing so as not to disturb the job running.


### ASTRA (Vanguard ARM System)

Once logged onto a supported Vanguard ARM system (called system 'van1-tx2')
system like 'stria', one can build and configure on a login node.

To configure, build and run the tests for the default `arm-20.0` build for
`Kokkos` (after cloning Trilinos on the 'develop' branch), run the following
from a login node on 'stria':

```bash
$ cd <some_build_dir>/

# List available environments
$ source $TRILINOS_DIR/cmake/std/atdm/load-env.sh help

# Load hsw env and configure on the login node
$ source $TRILINOS_DIR/cmake/std/atdm/load-env.sh arm-20.0
$ cmake -G Ninja \
-DTrilinos_CONFIGURE_OPTIONS_FILE:STRING=cmake/std/atdm/ATDMDevEnv.cmake \
-DTrilinos_ENABLE_TESTS=ON \
-DTrilinos_ENABLE_Tpetra=ON \
$TRILINOS_DIR

$ make NP=8 # This is a shared node!

# Get a node allocation and run ctest
$ salloc -N 1 --time=2:00:00 -p short,batch --account=<YOUR_WCID> ctest -j4
```

One can also get an allocation first and then configure, build on a compute
node, and then run the test suite using:

```bash
$ salloc -N 1 --time=4:00:00 -p short,batch --account=<YOUR_WCID> bash
# NOTE: After the above runs, hostname=stria-login<n> but now a compute node
# has been allocated for immediately usage.

$ source $TRILINOS_DIR/cmake/std/atdm/load-env.sh arm-20.0

$ cmake -G Ninja \
-DTrilinos_CONFIGURE_OPTIONS_FILE:STRING=cmake/std/atdm/ATDMDevEnv.cmake \
-DTrilinos_ENABLE_TESTS=ON \
-DTrilinos_ENABLE_Tpetra=ON \
$TRILINOS_DIR

$ srun -N 1 make NP=20 # We have the entire compute node to ourselves!

$ ctest -j4
```

The advantage of the latter approach is that one just waits once for a node
allocation and then one can immediately run fast parallel builds on the compute
node (taking up the entire node). Then one can run the test suite multiple
times without waiting for a new allocation.

One can also directly build on a compute node from the login node in one
command:

```bash
$ srun -N 1 --time=2:00:00 -p short,batch --account=<YOUR_WCID> make NP=20
```

To use the `ctest-s-local-test-driver.sh` script, one must set one's WCID
account using:

```
$ export ATDM_CONFIG_WCID_ACCOUNT=<YOUR_WCID>
```

If `ATDM_CONFIG_WCID_ACCOUNT` is not set, then a default account will be used.
(But if the user is not approved for that account, then the allocation will
fail.)

**NOTES:**
- To get information on <YOUR_WCID> used above, there is a WC tool tab on
computing.sandia.gov.
- CTest runs everything using the `mpirun` command and this must be run from
inside a `salloc` or `sbatch` allocation and can **not** be directly
launched from a compute node. For example, one cannot get an interactive
shell directly on a compute node using `srun ... bash' an` then run `mpirun`
from there.


## Building and installing Trilinos for ATDM Applications

See the following internal SNL wiki page for instructions on building and
Expand Down
2 changes: 1 addition & 1 deletion cmake/std/atdm/load-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ fi
source ${ATDM_CONFIG_SCRIPT_DIR}/utils/get_system_info.sh

if [[ $ATDM_CONFIG_SYSTEM_NAME == "" ]] ; then
echo "Error, could not determine a system configuration, aborting env loading script!"
echo "Error, could not determine a system configuration for hostname='$realHostname', aborting env loading script!"
return
fi

Expand Down
31 changes: 26 additions & 5 deletions cmake/std/atdm/utils/get_known_system_info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ fi
source ${ATDM_CONFIG_SCRIPT_DIR}/utils/get_system_info_utils.sh

realHostname=`hostname`
if [[ "${ATDM_CONFIG_GET_KNOW_SYSTEM_INFO_REAL_HOSTNAME_OVERRIDE_FOR_UNIT_TESTING}" ]] ; then
echo
echo "***"
echo "*** WARNING: realHostname=$realHostname overriden to value of"
echo "*** ATDM_CONFIG_GET_KNOW_SYSTEM_INFO_REAL_HOSTNAME_OVERRIDE_FOR_UNIT_TESTING='${ATDM_CONFIG_GET_KNOW_SYSTEM_INFO_REAL_HOSTNAME_OVERRIDE_FOR_UNIT_TESTING}'"
echo "*** in <trilinos-dir>/cmake/std/atdm/utils/get_known_system_info.sh."
echo "*** This variable should only be set for unit testing purposes!"
echo "***"
echo
realHostname=${ATDM_CONFIG_GET_KNOW_SYSTEM_INFO_REAL_HOSTNAME_OVERRIDE_FOR_UNIT_TESTING}
fi
#echo "Hostname = '$realHostname'"

#
Expand All @@ -52,6 +63,7 @@ ATDM_KNOWN_SYSTEM_NAMES_LIST=(
mutrino # Will be repalced by 'ats1'
waterman
ats2
van1-tx2
cts1
tlcc2
sems-rhel7
Expand Down Expand Up @@ -124,10 +136,10 @@ fi
# matching system type will be selected.
#

# TLCC2 systems
if [[ $SNLSYSTEM == "tlcc2"* ]] ; then
systemNameTypeMatchedList+=(tlcc2)
systemNameTypeMatchedListHostNames[tlcc2]=$SNLCLUSTER
# ASTRA/Van1-Tx2 systems
if [[ $SNLSYSTEM == "astra"* ]] ; then
systemNameTypeMatchedList+=(van1-tx2)
systemNameTypeMatchedListHostNames[van1-tx2]=$SNLCLUSTER
fi

# CTS1 systems
Expand All @@ -136,13 +148,22 @@ if [[ $SNLSYSTEM == "cts1" ]] ; then
systemNameTypeMatchedListHostNames[cts1]=$SNLCLUSTER
fi

# TLCC2 systems
if [[ $SNLSYSTEM == "tlcc2"* ]] ; then
systemNameTypeMatchedList+=(tlcc2)
systemNameTypeMatchedListHostNames[tlcc2]=$SNLCLUSTER
fi

# SEMS RHEL6 and RHEL7 systems
if [[ "${SEMS_PLATFORM}" == "rhel6-x86_64" ]] ; then
systemNameTypeMatchedList+=(sems-rhel6)
systemNameTypeMatchedListHostNames[sems-rhel6]=sems-rhel6
elif [[ "${SEMS_PLATFORM}" == "rhel7-x86_64" ]] ; then
systemNameTypeMatchedList+=(sems-rhel7)
systemNameTypeMatchedListHostNames[sems-rhel7]=sems-rhel7
elif [[ "${SNLSYSTEM}" == "astra" ]] ; then
echo "Don't call get-platform on 'astra' systems" > /dev/null
# Above logic avoids an 'ERROR: Unrecognized cluster <name>' on these systems
elif [[ -f /projects/sems/modulefiles/utils/get-platform ]] ; then
ATDM_SYSTEM_NAME=`source /projects/sems/modulefiles/utils/get-platform`
if [[ $ATDM_SYSTEM_NAME == "rhel6-x86_64" ]] ; then
Expand Down Expand Up @@ -210,7 +231,7 @@ fi
#

if [[ $ATDM_SYSTEM_NAME != "" ]] ; then
echo "Hostname '$realHostname' matches known ATDM host '$ATDM_HOSTNAME' and system '$ATDM_SYSTEM_NAME'"
echo "Hostname '$realHostname' matches known ATDM host '$realHostname' and system '$ATDM_SYSTEM_NAME'"
export ATDM_CONFIG_REAL_HOSTNAME=$realHostname
export ATDM_CONFIG_CDASH_HOSTNAME=$ATDM_HOSTNAME
export ATDM_CONFIG_SYSTEM_NAME=$ATDM_SYSTEM_NAME
Expand Down
2 changes: 2 additions & 0 deletions cmake/std/atdm/utils/set_build_options.sh
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ elif [[ $ATDM_CONFIG_BUILD_NAME == *"-Volta72"* ]]; then
export ATDM_CONFIG_KOKKOS_ARCH=Volta72
elif [[ $ATDM_CONFIG_BUILD_NAME == *"-WSM"* ]]; then
export ATDM_CONFIG_KOKKOS_ARCH=WSM
elif [[ $ATDM_CONFIG_BUILD_NAME == *"-TX2"* ]]; then
export ATDM_CONFIG_KOKKOS_ARCH=ARMv8-TX2
else
export ATDM_CONFIG_KOKKOS_ARCH=DEFAULT
if [[ $ATDM_CONFIG_VERBOSE == "1" ]] ; then
Expand Down
1 change: 1 addition & 0 deletions cmake/std/atdm/utils/unset_atdm_config_vars_environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ unset ATDM_CONFIG_SUPERLUDIST_INCLUDE_DIRS
unset ATDM_CONFIG_SUPERLUDIST_LIBS
unset ATDM_CONFIG_METIS_LIBS
unset ATDM_CONFIG_PARMETIS_LIBS
unset ATDM_CONFIG_CGNS_LIBRARY_NAMES
unset ATDM_CONFIG_CGNS_LIBS
unset ATDM_CONFIG_MPI_EXEC
unset ATDM_CONFIG_MPI_PRE_FLAGS
Expand Down
8 changes: 8 additions & 0 deletions cmake/std/atdm/van1-tx2/all_supported_builds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This script is sourced to return all of the supported builds

export ATDM_CONFIG_CTEST_S_BUILD_NAME_PREFIX=Trilinos-atdm-

export ATDM_CONFIG_ALL_SUPPORTED_BUILDS=(
van1-tx2_arm-20.0_openmpi-4.0.2_openmp_static_opt
van1-tx2_arm-20.0_openmpi-4.0.2_openmp_static_dbg
)
24 changes: 24 additions & 0 deletions cmake/std/atdm/van1-tx2/custom_builds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Custom builds for van1-tx2 env
#

if [[ $ATDM_CONFIG_BUILD_NAME == *"arm-20.0-openmpi-4.0.2"* ]] \
|| [[ $ATDM_CONFIG_BUILD_NAME == *"arm-20.0_openmpi-4.0.2"* ]] \
|| [[ $ATDM_CONFIG_BUILD_NAME == *"arm-20.0"* ]] \
|| [[ $ATDM_CONFIG_BUILD_NAME == *"arm-20"* ]] \
|| [[ $ATDM_CONFIG_BUILD_NAME == *"default" ]] \
; then
export ATDM_CONFIG_COMPILER=ARM-20.0_OPENMPI-4.0.2

else
echo
echo "***"
echo "*** ERROR: A supported compiler was not selected for 'van1-tx2' (stria) env"
echo "***"
echo "*** Supported compilers include:"
echo "***"
echo "**** arm-20.0-openmpi-4.0.2 (arm-20.0, default)"
echo "***"
return

fi
Loading

0 comments on commit 2827cd5

Please sign in to comment.