From 16cfd141ca2ecd0d7e246aae4fbb11d5024ca327 Mon Sep 17 00:00:00 2001 From: "Chan-Hoo.Jeon-NOAA" <60152248+chan-hoo@users.noreply.github.com> Date: Fri, 15 Jan 2021 14:33:00 -0500 Subject: [PATCH] develop: Fix to run workflow tasks on WCOSS (#390) Fix to run workflow tasks on the WCOSS dell and cray --- .../tasks/wcoss_cray/get_extrn_ics.local | 3 ++ .../tasks/wcoss_cray/get_extrn_lbcs.local | 3 ++ modulefiles/tasks/wcoss_cray/make_grid.local | 35 ++++---------- modulefiles/tasks/wcoss_cray/make_ics.local | 35 ++++---------- modulefiles/tasks/wcoss_cray/make_lbcs.local | 35 ++++---------- modulefiles/tasks/wcoss_cray/make_orog.local | 12 +++++ .../tasks/wcoss_cray/make_sfc_climo.local | 12 +++++ modulefiles/tasks/wcoss_cray/run_fcst.local | 16 ++++++- modulefiles/tasks/wcoss_cray/run_post.local | 22 +++++---- .../tasks/wcoss_dell_p3/get_extrn_ics.local | 3 +- .../tasks/wcoss_dell_p3/get_extrn_lbcs.local | 3 +- .../tasks/wcoss_dell_p3/make_grid.local | 4 ++ .../tasks/wcoss_dell_p3/make_ics.local | 4 ++ .../tasks/wcoss_dell_p3/make_lbcs.local | 4 ++ .../tasks/wcoss_dell_p3/run_fcst.local | 4 ++ .../tasks/wcoss_dell_p3/run_post.local | 5 ++ scripts/exregional_make_orog.sh | 48 ++++--------------- scripts/exregional_run_fcst.sh | 21 +++++--- ush/launch_FV3LAM_wflow.sh | 10 ++++ ush/link_fix.sh | 2 +- ush/load_modules_run_task.sh | 4 +- ush/templates/FV3LAM_wflow.xml | 20 ++++++++ 22 files changed, 164 insertions(+), 141 deletions(-) create mode 100644 modulefiles/tasks/wcoss_cray/make_orog.local create mode 100644 modulefiles/tasks/wcoss_cray/make_sfc_climo.local diff --git a/modulefiles/tasks/wcoss_cray/get_extrn_ics.local b/modulefiles/tasks/wcoss_cray/get_extrn_ics.local index e82aa51538..d540aea2cc 100644 --- a/modulefiles/tasks/wcoss_cray/get_extrn_ics.local +++ b/modulefiles/tasks/wcoss_cray/get_extrn_ics.local @@ -4,5 +4,8 @@ module purge +module use /usrx/local/prod/modulefiles module load hpss/4.1.0.3 +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_cray/get_extrn_lbcs.local b/modulefiles/tasks/wcoss_cray/get_extrn_lbcs.local index 49db5c6669..d7ec3794e4 100644 --- a/modulefiles/tasks/wcoss_cray/get_extrn_lbcs.local +++ b/modulefiles/tasks/wcoss_cray/get_extrn_lbcs.local @@ -4,5 +4,8 @@ module purge +module use /usrx/local/prod/modulefiles module load hpss/4.1.0.3 +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_cray/make_grid.local b/modulefiles/tasks/wcoss_cray/make_grid.local index 2790204cb1..edee5d7249 100644 --- a/modulefiles/tasks/wcoss_cray/make_grid.local +++ b/modulefiles/tasks/wcoss_cray/make_grid.local @@ -1,34 +1,15 @@ #%Module -module load modules -module load xt-lsfhpc -module load ncep +module use /opt/cray/ari/modulefiles module load alps -module load dvs -module load xpmem -module load ugni -module load craype-network-aries -module load switch -module load rca module load gni-headers +module load pmi/5.0.11 +module load rca module load udreg -module load hpss - -module load prod_util -module load g2tmpl-intel/1.4.0 -module load crtm-intel/2.2.6 -module load iobuf/2.0.7 -module load gempak/7.3.0 - -module load nco-gnu-sandybridge/4.4.4 -module load NetCDF-intel-sandybridge/4.2 -module load cfp-intel-sandybridge/1.1.0 -export USE_CFP=YES - -module load grib_util/1.1.0 - -module use -a /gpfs/hps3/emc/nems/noscrub/emc.nemspara/soft/modulefiles -module load esmf/8.0.0 +module load ugni +module load xpmem -module load python/3.6.3 +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 +module list diff --git a/modulefiles/tasks/wcoss_cray/make_ics.local b/modulefiles/tasks/wcoss_cray/make_ics.local index 2790204cb1..04eb686e14 100644 --- a/modulefiles/tasks/wcoss_cray/make_ics.local +++ b/modulefiles/tasks/wcoss_cray/make_ics.local @@ -1,34 +1,17 @@ #%Module -module load modules -module load xt-lsfhpc -module load ncep +module use /opt/cray/ari/modulefiles module load alps -module load dvs -module load xpmem -module load ugni -module load craype-network-aries -module load switch -module load rca module load gni-headers +module load pmi/5.0.11 +module load rca module load udreg -module load hpss - -module load prod_util -module load g2tmpl-intel/1.4.0 -module load crtm-intel/2.2.6 -module load iobuf/2.0.7 -module load gempak/7.3.0 - -module load nco-gnu-sandybridge/4.4.4 -module load NetCDF-intel-sandybridge/4.2 -module load cfp-intel-sandybridge/1.1.0 -export USE_CFP=YES - -module load grib_util/1.1.0 +module load ugni +module load xpmem -module use -a /gpfs/hps3/emc/nems/noscrub/emc.nemspara/soft/modulefiles -module load esmf/8.0.0 +module use /usrx/local/prod/modulefiles +module load hpss/4.1.0.3 -module load python/3.6.3 +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_cray/make_lbcs.local b/modulefiles/tasks/wcoss_cray/make_lbcs.local index 2790204cb1..04eb686e14 100644 --- a/modulefiles/tasks/wcoss_cray/make_lbcs.local +++ b/modulefiles/tasks/wcoss_cray/make_lbcs.local @@ -1,34 +1,17 @@ #%Module -module load modules -module load xt-lsfhpc -module load ncep +module use /opt/cray/ari/modulefiles module load alps -module load dvs -module load xpmem -module load ugni -module load craype-network-aries -module load switch -module load rca module load gni-headers +module load pmi/5.0.11 +module load rca module load udreg -module load hpss - -module load prod_util -module load g2tmpl-intel/1.4.0 -module load crtm-intel/2.2.6 -module load iobuf/2.0.7 -module load gempak/7.3.0 - -module load nco-gnu-sandybridge/4.4.4 -module load NetCDF-intel-sandybridge/4.2 -module load cfp-intel-sandybridge/1.1.0 -export USE_CFP=YES - -module load grib_util/1.1.0 +module load ugni +module load xpmem -module use -a /gpfs/hps3/emc/nems/noscrub/emc.nemspara/soft/modulefiles -module load esmf/8.0.0 +module use /usrx/local/prod/modulefiles +module load hpss/4.1.0.3 -module load python/3.6.3 +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_cray/make_orog.local b/modulefiles/tasks/wcoss_cray/make_orog.local new file mode 100644 index 0000000000..c431755327 --- /dev/null +++ b/modulefiles/tasks/wcoss_cray/make_orog.local @@ -0,0 +1,12 @@ +#%Module + +module use /opt/cray/ari/modulefiles +module load alps +module load gni-headers +module load pmi/5.0.11 +module load rca +module load udreg +module load ugni +module load xpmem + +module list diff --git a/modulefiles/tasks/wcoss_cray/make_sfc_climo.local b/modulefiles/tasks/wcoss_cray/make_sfc_climo.local new file mode 100644 index 0000000000..c431755327 --- /dev/null +++ b/modulefiles/tasks/wcoss_cray/make_sfc_climo.local @@ -0,0 +1,12 @@ +#%Module + +module use /opt/cray/ari/modulefiles +module load alps +module load gni-headers +module load pmi/5.0.11 +module load rca +module load udreg +module load ugni +module load xpmem + +module list diff --git a/modulefiles/tasks/wcoss_cray/run_fcst.local b/modulefiles/tasks/wcoss_cray/run_fcst.local index 525e87aae6..edee5d7249 100644 --- a/modulefiles/tasks/wcoss_cray/run_fcst.local +++ b/modulefiles/tasks/wcoss_cray/run_fcst.local @@ -1,3 +1,15 @@ #%Module -module unload python/2.7.14 -module load python/3.6.3 + +module use /opt/cray/ari/modulefiles +module load alps +module load gni-headers +module load pmi/5.0.11 +module load rca +module load udreg +module load ugni +module load xpmem + +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 + +module list diff --git a/modulefiles/tasks/wcoss_cray/run_post.local b/modulefiles/tasks/wcoss_cray/run_post.local index 5d1e72c237..4529dd2f37 100644 --- a/modulefiles/tasks/wcoss_cray/run_post.local +++ b/modulefiles/tasks/wcoss_cray/run_post.local @@ -1,14 +1,18 @@ #%Module -module load modules -module load xt-lsfhpc -module load ncep +module use /opt/cray/ari/modulefiles module load alps -module load dvs -module load xpmem -module load ugni -module load craype-network-aries -module load switch -module load rca module load gni-headers +module load pmi/5.0.11 +module load rca module load udreg +module load ugni +module load xpmem + +module use /usrx/local/prod/modulefiles +module load hpss/4.1.0.3 + +module use /usrx/local/nceplibs/modulefiles +module load srw-app-python/1.0.0 + + diff --git a/modulefiles/tasks/wcoss_dell_p3/get_extrn_ics.local b/modulefiles/tasks/wcoss_dell_p3/get_extrn_ics.local index f0c321901a..1502c04d56 100644 --- a/modulefiles/tasks/wcoss_dell_p3/get_extrn_ics.local +++ b/modulefiles/tasks/wcoss_dell_p3/get_extrn_ics.local @@ -3,6 +3,7 @@ ############################################################# module purge - module load HPSS/5.0.2.5 +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_dell_p3/get_extrn_lbcs.local b/modulefiles/tasks/wcoss_dell_p3/get_extrn_lbcs.local index 8211cb9364..74e8f878c1 100644 --- a/modulefiles/tasks/wcoss_dell_p3/get_extrn_lbcs.local +++ b/modulefiles/tasks/wcoss_dell_p3/get_extrn_lbcs.local @@ -3,6 +3,7 @@ ############################################################# module purge - module load HPSS/5.0.2.5 +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_dell_p3/make_grid.local b/modulefiles/tasks/wcoss_dell_p3/make_grid.local index 7f651f92ea..0e25550649 100644 --- a/modulefiles/tasks/wcoss_dell_p3/make_grid.local +++ b/modulefiles/tasks/wcoss_dell_p3/make_grid.local @@ -1,3 +1,7 @@ #%Module + module load lsf/10.1 module load python/3.6.3 + +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_dell_p3/make_ics.local b/modulefiles/tasks/wcoss_dell_p3/make_ics.local index 7f651f92ea..0e25550649 100644 --- a/modulefiles/tasks/wcoss_dell_p3/make_ics.local +++ b/modulefiles/tasks/wcoss_dell_p3/make_ics.local @@ -1,3 +1,7 @@ #%Module + module load lsf/10.1 module load python/3.6.3 + +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_dell_p3/make_lbcs.local b/modulefiles/tasks/wcoss_dell_p3/make_lbcs.local index 7f651f92ea..0e25550649 100644 --- a/modulefiles/tasks/wcoss_dell_p3/make_lbcs.local +++ b/modulefiles/tasks/wcoss_dell_p3/make_lbcs.local @@ -1,3 +1,7 @@ #%Module + module load lsf/10.1 module load python/3.6.3 + +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_dell_p3/run_fcst.local b/modulefiles/tasks/wcoss_dell_p3/run_fcst.local index 525e87aae6..5b62d935fd 100644 --- a/modulefiles/tasks/wcoss_dell_p3/run_fcst.local +++ b/modulefiles/tasks/wcoss_dell_p3/run_fcst.local @@ -1,3 +1,7 @@ #%Module + module unload python/2.7.14 module load python/3.6.3 + +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 diff --git a/modulefiles/tasks/wcoss_dell_p3/run_post.local b/modulefiles/tasks/wcoss_dell_p3/run_post.local index 529df93fd9..9e6f62305a 100644 --- a/modulefiles/tasks/wcoss_dell_p3/run_post.local +++ b/modulefiles/tasks/wcoss_dell_p3/run_post.local @@ -1,2 +1,7 @@ #%Module + module load lsf/10.1 + +module use /usrx/local/nceplibs/dev/modulefiles +module load srw-app-python/1.0.0 + diff --git a/scripts/exregional_make_orog.sh b/scripts/exregional_make_orog.sh index 2d4cdd0b56..873fab97d1 100755 --- a/scripts/exregional_make_orog.sh +++ b/scripts/exregional_make_orog.sh @@ -287,50 +287,12 @@ cat "${input_redirect_fn}" print_info_msg "$VERBOSE" " Starting orography file generation..." -case $MACHINE in - - "WCOSS_CRAY") -# -# On WCOSS and WCOSS_C, use cfp to run multiple tiles simulatneously for -# the orography. For now, we have only one tile in the regional case, -# but in the future we will have more. First, create an input file for -# cfp. -# - ufs_utils_ushdir="${UFS_UTILS_DIR}/ush" - res="0" # What should this be set to??? - printf "%s\n" "\ -${ufs_utils_ushdir}/${orog_gen_scr} \ -$res \ -${TILE_RGNL} \ -${FIXLAM} \ -${raw_dir} \ -${UFS_UTILS_DIR} \ -${TOPO_DIR} \ -${tmp_dir}" \ - >> ${tmp_dir}/orog.file1 - aprun -j 1 -n 4 -N 4 -d 6 -cc depth cfp ${tmp_dir}/orog.file1 - rm_vrfy ${tmp_dir}/orog.file1 - ;; - - "WCOSS_DELL_P3") - ufs_utils_ushdir="${UFS_UTILS_DIR}/ush" - res="0" # What should this be set to??? - "${exec_fp}" < "${input_redirect_fn}" || \ - print_err_msg_exit "\ -Call to executable (exec_fp) that generates the raw orography file returned -with nonzero exit code: - exec_fp = \"${exec_fp}\"" - ;; - - "CHEYENNE" | "HERA" | "ORION" | "JET" | "ODIN" | "STAMPEDE") - $APRUN "${exec_fp}" < "${input_redirect_fn}" || \ +$APRUN "${exec_fp}" < "${input_redirect_fn}" || \ print_err_msg_exit "\ Call to executable (exec_fp) that generates the raw orography file returned with nonzero exit code: exec_fp = \"${exec_fp}\"" - ;; -esac # # Change location to the original directory. # @@ -457,7 +419,13 @@ cp_vrfy "${raw_orog_fp}" "${filtered_orog_fp}" # filtering executable will run) with the same name as the grid file and # point it to the actual grid file specified by grid_fp. # -ln_vrfy -fs --relative "${grid_fp}" "${filter_dir}/${grid_fn}" + +if [ "${MACHINE}" = "WCOSS_CRAY" ]; then + ln_vrfy -fs "${grid_fp}" "${filter_dir}/${grid_fn}" +else + ln_vrfy -fs --relative "${grid_fp}" "${filter_dir}/${grid_fn}" +fi + # # Create the namelist file (in the filter_dir directory) that the orography # filtering executable will read in. diff --git a/scripts/exregional_run_fcst.sh b/scripts/exregional_run_fcst.sh index e1eeeee2b3..7f78b4cf64 100755 --- a/scripts/exregional_run_fcst.sh +++ b/scripts/exregional_run_fcst.sh @@ -92,7 +92,12 @@ case $MACHINE in "WCOSS_CRAY") ulimit -s unlimited ulimit -a - APRUN="aprun -b -j1 -n${PE_MEMBER01} -N24 -d1 -cc depth" + + if [ ${PE_MEMBER01} -gt 24 ];then + APRUN="aprun -b -j1 -n${PE_MEMBER01} -N24 -d1 -cc depth" + else + APRUN="aprun -b -j1 -n24 -N24 -d1 -cc depth" + fi ;; "WCOSS_DELL_P3") @@ -174,7 +179,7 @@ the grid and (filtered) orography files ..." cd_vrfy ${run_dir}/INPUT relative_or_null="" -if [ "${RUN_TASK_MAKE_GRID}" = "TRUE" ]; then +if [ "${RUN_TASK_MAKE_GRID}" = "TRUE" ] && [ "${MACHINE}" != "WCOSS_CRAY" ]; then relative_or_null="--relative" fi @@ -239,7 +244,7 @@ fi relative_or_null="" -if [ "${RUN_TASK_MAKE_OROG}" = "TRUE" ]; then +if [ "${RUN_TASK_MAKE_OROG}" = "TRUE" ] && [ "${MACHINE}" != "WCOSS_CRAY" ] ; then relative_or_null="--relative" fi @@ -345,7 +350,7 @@ static) files in the FIXam directory: run_dir = \"${run_dir}\"" relative_or_null="" -if [ "${RUN_ENVIR}" != "nco" ]; then +if [ "${RUN_ENVIR}" != "nco" ] && [ "${MACHINE}" != "WCOSS_CRAY" ] ; then relative_or_null="--relative" fi @@ -395,7 +400,7 @@ Creating links in the current run directory to cycle-independent model input files in the main experiment directory..." relative_or_null="" -if [ "${RUN_ENVIR}" != "nco" ]; then +if [ "${RUN_ENVIR}" != "nco" ] && [ "${MACHINE}" != "WCOSS_CRAY" ] ; then relative_or_null="--relative" fi @@ -436,7 +441,11 @@ cycle's (cdate) run directory (run_dir) failed: #----------------------------------------------------------------------- # if [ "${DO_ENSEMBLE}" = "TRUE" ]; then - relative_or_null="--relative" + if [ "${MACHINE}" = "WCOSS_CRAY" ]; then + relative_or_null="" + else + relative_or_null="--relative" + fi diag_table_fp="${cycle_dir}/${DIAG_TABLE_FN}" ln_vrfy -sf ${relative_or_null} ${diag_table_fp} ${run_dir} fi diff --git a/ush/launch_FV3LAM_wflow.sh b/ush/launch_FV3LAM_wflow.sh index 14cb1ad19b..702dbdf936 100755 --- a/ush/launch_FV3LAM_wflow.sh +++ b/ush/launch_FV3LAM_wflow.sh @@ -96,6 +96,16 @@ if [ "$MACHINE" = "CHEYENNE" ]; then module load rocoto elif [ "$MACHINE" = "ORION" ]; then module load contrib rocoto +elif [ "$MACHINE" = "WCOSS_DELL_P3" ]; then + module purge + module load lsf/10.1 + module use /gpfs/dell3/usrx/local/dev/emc_rocoto/modulefiles/ + module load ruby/2.5.1 rocoto/1.2.4 +elif [ "$MACHINE" = "WCOSS_CRAY" ]; then + module purge + module load xt-lsfhpc/9.1.3 + module use -a /usrx/local/emc_rocoto/modulefiles + module load rocoto/1.2.4 else module purge module load rocoto diff --git a/ush/link_fix.sh b/ush/link_fix.sh index dd4e008c9c..651f4fbfd0 100755 --- a/ush/link_fix.sh +++ b/ush/link_fix.sh @@ -351,7 +351,7 @@ Please ensure that all files have the same resolution." #----------------------------------------------------------------------- # relative_or_null="" - if [ "${run_task}" = "TRUE" ]; then + if [ "${run_task}" = "TRUE" ] && [ "${MACHINE}" != "WCOSS_CRAY" ] ; then relative_or_null="--relative" fi diff --git a/ush/load_modules_run_task.sh b/ush/load_modules_run_task.sh index 047f42946c..c2e7cbc9a9 100755 --- a/ush/load_modules_run_task.sh +++ b/ush/load_modules_run_task.sh @@ -153,8 +153,8 @@ jjob_fp="$2" #----------------------------------------------------------------------- # machine=${MACHINE,,} -env_fn="README_${machine}_${COMPILER}.txt" -env_fp="${SR_WX_APP_TOP_DIR}/docs/${env_fn}" +env_fn="build_${machine}_${COMPILER}.env" +env_fp="${SR_WX_APP_TOP_DIR}/env/${env_fn}" source "${env_fp}" || print_err_msg_exit "\ Sourcing platform- and compiler-specific environment file (env_fp) for the workflow task specified by task_name failed: diff --git a/ush/templates/FV3LAM_wflow.xml b/ush/templates/FV3LAM_wflow.xml index 2dab388cbf..2f20b7fafb 100644 --- a/ush/templates/FV3LAM_wflow.xml +++ b/ush/templates/FV3LAM_wflow.xml @@ -133,7 +133,11 @@ MODULES_RUN_TASK_FP script. &RSRV_DEFAULT; &LOAD_MODULES_RUN_TASK_FP; "&MAKE_GRID_TN;" "&JOBSDIR;/JREGIONAL_MAKE_GRID" + {% if machine in ["WCOSS_DELL_P3", "WCOSS_CRAY"] %} + {{ nnodes_make_grid }}:ppn=1 + {% else %} {{ nnodes_make_grid }}:ppn={{ ppn_make_grid }} + {% endif %} {{ wtime_make_grid }} &NCORES_PER_NODE; &MAKE_GRID_TN; @@ -153,7 +157,11 @@ MODULES_RUN_TASK_FP script. &RSRV_DEFAULT; &LOAD_MODULES_RUN_TASK_FP; "&MAKE_OROG_TN;" "&JOBSDIR;/JREGIONAL_MAKE_OROG" + {% if machine in ["WCOSS_DELL_P3", "WCOSS_CRAY"] %} + {{ nnodes_make_orog }}:ppn=1 + {% else %} {{ nnodes_make_orog }}:ppn={{ ppn_make_orog }} + {% endif %} {{ wtime_make_orog }} &NCORES_PER_NODE; &MAKE_OROG_TN; @@ -214,7 +222,13 @@ MODULES_RUN_TASK_FP script. &RSRV_HPSS; + {% if machine in ["WCOSS_CRAY"] %} + + {% endif %} &LOAD_MODULES_RUN_TASK_FP; "&GET_EXTRN_ICS_TN;" "&JOBSDIR;/JREGIONAL_GET_EXTRN_MDL_FILES" + {% if machine in ["WCOSS_DELL_P3"] %} + 2048M-R affinity[core] + {% endif %} {{ nnodes_get_extrn_ics }}:ppn={{ ppn_get_extrn_ics }} {{ wtime_get_extrn_ics }} &NCORES_PER_NODE; @@ -236,7 +250,13 @@ MODULES_RUN_TASK_FP script. &RSRV_HPSS; + {% if machine in ["WCOSS_CRAY"] %} + + {% endif %} &LOAD_MODULES_RUN_TASK_FP; "&GET_EXTRN_LBCS_TN;" "&JOBSDIR;/JREGIONAL_GET_EXTRN_MDL_FILES" + {% if machine in ["WCOSS_DELL_P3"] %} + 2048M-R affinity[core] + {% endif %} {{ nnodes_get_extrn_lbcs }}:ppn={{ ppn_get_extrn_lbcs }} {{ wtime_get_extrn_lbcs }} &NCORES_PER_NODE;