From b88503893053a32f490853e3aa206babbde9afa8 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Sun, 17 Jun 2018 11:01:19 +1000 Subject: [PATCH] libaccessom2 intercommunicator profiling support This patch modifies CICE to support changes to libaccessom2 which allow the user to disable intercommunicators, instead using MPI_COMM_WORLD to exchange messages between models. This work is primarily to support limitations in the Score-P profiler. Specific changed detailed below. - accessom2%config_sync uses the new API, and requires an libaccessom2 update. - il_commatm has been removed from cpl_interface, since coupler%atm_root provides this information. - Most of the work in prism_init has been removed are replaced with equivalent operations in coupler%init_begin. - Explicit MPI send/recv calls now use ranks provided by coupler, rather than explicit sends to 0 over OASIS-provided intercommunicators. - Similarly, data sends now happen on the zero rank of CICE, rather than checking if the atmosphere rank of the intercommunicator is zero. Functionally, there should be no difference, but beware the unexpected. - Buildscripts for score-p have been added. Further changes to cpl_interface are possible, since most of the MPI information is provided by the coupler object. --- Makefile | 7 +-- bld/Macros.scorep | 81 ++++++++++++++++++++++++++++++ bld/config.scorep.auscom.1440x1080 | 29 +++++++++++ bld/config.scorep.auscom.3600x2700 | 29 +++++++++++ bld/config.scorep.auscom.360x300 | 21 ++++++++ drivers/auscom/CICE_InitMod.F90 | 5 +- drivers/auscom/cpl_interface.F90 | 63 +++++++++-------------- 7 files changed, 190 insertions(+), 45 deletions(-) create mode 100644 bld/Macros.scorep create mode 100644 bld/config.scorep.auscom.1440x1080 create mode 100644 bld/config.scorep.auscom.3600x2700 create mode 100644 bld/config.scorep.auscom.360x300 diff --git a/Makefile b/Makefile index 57162d1..ccc72c6 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,11 @@ +platform ?= nci 1deg: - bld/build.sh nci auscom 360x300 + bld/build.sh $(platform) auscom 360x300 025deg: - bld/build.sh nci auscom 1440x1080 + bld/build.sh $(platform) auscom 1440x1080 01deg: - bld/build.sh nci auscom 3600x2700 + bld/build.sh $(platform) auscom 3600x2700 clean: rm -rf build_* diff --git a/bld/Macros.scorep b/bld/Macros.scorep new file mode 100644 index 0000000..638b673 --- /dev/null +++ b/bld/Macros.scorep @@ -0,0 +1,81 @@ +#============================================================================== +# Makefile macros for xe.nci.org.au, an SGI ALTIX system running Linux +# Note: Use the -mp flag if precision is critical. It slows down the +# code by 25% (or more). +#============================================================================== + +INCLDIR := -I. +SLIBS := +ULIBS := +CPP := cpp +FC := scorep mpif90 + +CPPFLAGS := -P -traditional +CPPDEFS := -DLINUX -DPAROPT +CFLAGS := -c -O2 +FIXEDFLAGS := -132 +FREEFLAGS := + +ifeq ($(DEBUG), 1) + FFLAGS := -r8 -i4 -O0 -traceback -g -debug all -check all -no-vec -align all -w -fpe0 -ftz -convert big_endian -assume byterecl -assume nobuffered_io -check noarg_temp_created + CPPDEFS := $(CPPDEFS) -DDEBUG=$(DEBUG) +else + FFLAGS := -r8 -i4 -O2 -traceback -g -align all -xHost -fpe0 -w -ftz -convert big_endian -assume byterecl -assume buffered_io -check noarg_temp_created +endif + +MOD_SUFFIX := mod +LD := $(FC) +LDFLAGS := $(FFLAGS) -v + +CPPDEFS := $(CPPDEFS) -DNXGLOB=$(NXGLOB) -DNYGLOB=$(NYGLOB) \ + -DNUMIN=$(NUMIN) -DNUMAX=$(NUMAX) \ + -DTRAGE=$(TRAGE) -DTRFY=$(TRFY) -DTRLVL=$(TRLVL) \ + -DTRPND=$(TRPND) -DNTRAERO=$(NTRAERO) -DTRBRI=$(TRBRI) \ + -DNBGCLYR=$(NBGCLYR) -DTRBGCS=$(TRBGCS) \ + -DNICECAT=$(NICECAT) -DNICELYR=$(NICELYR) \ + -DNSNWLYR=$(NSNWLYR) \ + -DBLCKX=$(BLCKX) -DBLCKY=$(BLCKY) -DMXBLCKS=$(MXBLCKS) + +ifeq ($(COMMDIR), mpi) + SLIBS := $(SLIBS) -lmpi +endif + +ifeq ($(DITTO), yes) + CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE +endif + +ifeq ($(IO_TYPE), netcdf) + CPPDEFS := $(CPPDEFS) -Dncdf + INCLDIR := $(INCLDIR) -I$(NETCDF_ROOT)/include + SLIBS := $(SLIBS) -L$(NETCDF_ROOT)/lib -lnetcdf -lnetcdff +endif + +ifeq ($(USE_ESMF), yes) + CPPDEFS := $(CPPDEFS) -Duse_esmf + INCLDIR := $(INCLDIR) -I ??? + SLIBS := $(SLIBS) -L ??? -lesmf -lcprts -lrt -ldl +endif + +ifeq ($(AusCOM), yes) + CPPDEFS := $(CPPDEFS) -DAusCOM -Dcoupled + INCLDIR := $(INCLDIR) $(CPL_INCS) $(LIBAUSCOM_INCS) + SLIBS := $(SLIBS) -L$(CPLLIBDIR) -laccessom2 +endif + +ifeq ($(UNIT_TESTING), yes) + CPPDEFS := $(CPPDEFS) -DUNIT_TESTING +endif +ifeq ($(ACCESS), yes) + CPPDEFS := $(CPPDEFS) -DACCESS +endif +# standalone CICE with AusCOM mods +ifeq ($(ACCICE), yes) + CPPDEFS := $(CPPDEFS) -DACCICE +endif +# no MOM just CICE+UM +ifeq ($(NOMOM), yes) + CPPDEFS := $(CPPDEFS) -DNOMOM +endif +ifeq ($(OASIS3_MCT), yes) + CPPDEFS := $(CPPDEFS) -DOASIS3_MCT +endif diff --git a/bld/config.scorep.auscom.1440x1080 b/bld/config.scorep.auscom.1440x1080 new file mode 100644 index 0000000..749dd26 --- /dev/null +++ b/bld/config.scorep.auscom.1440x1080 @@ -0,0 +1,29 @@ + +# Recommendations: +# use processor_shape = slenderX1 or slenderX2 in ice_in +# one per processor with distribution_type='cartesian' or +# squarish blocks with distribution_type='rake' +# If BLCKX (BLCKY) does not divide NXGLOB (NYGLOB) evenly, padding +# will be used on the right (top) of the grid. + +setenv NTASK 480 +setenv RES 1440x1080 + +set NXGLOB = `echo $RES | sed s/x.\*//` +set NYGLOB = `echo $RES | sed s/.\*x//` + +setenv BLCKX `expr $NXGLOB / 24` # x-dimension of blocks ( not including ) +setenv BLCKY `expr $NYGLOB / 20` # y-dimension of blocks ( ghost cells ) + +source /etc/profile.d/nf_csh_modules +module purge +module load intel-fc/17.0.1.132 +module load intel-cc/17.0.1.132 +module load netcdf/4.2.1.1 +module load openmpi/1.10.2 +module load scorep/3.1 + +# correct papi module +module unload papi +module use /short/z35/dsr900/tools/Modules +module load papi/5.1.1-patched diff --git a/bld/config.scorep.auscom.3600x2700 b/bld/config.scorep.auscom.3600x2700 new file mode 100644 index 0000000..7c17205 --- /dev/null +++ b/bld/config.scorep.auscom.3600x2700 @@ -0,0 +1,29 @@ + +# Recommendations: +# use processor_shape = slenderX1 or slenderX2 in ice_in +# one per processor with distribution_type='cartesian' or +# squarish blocks with distribution_type='rake' +# If BLCKX (BLCKY) does not divide NXGLOB (NYGLOB) evenly, padding +# will be used on the right (top) of the grid. + +setenv NTASK 1200 +setenv RES 3600x2700 + +set NXGLOB = `echo $RES | sed s/x.\*//` +set NYGLOB = `echo $RES | sed s/.\*x//` + +setenv BLCKX `expr $NXGLOB / 40` # x-dimension of blocks ( not including ) +setenv BLCKY `expr $NYGLOB / 30` # y-dimension of blocks ( ghost cells ) + +source /etc/profile.d/nf_csh_modules +module purge +module load intel-fc/17.0.1.132 +module load intel-cc/17.0.1.132 +module load netcdf/4.2.1.1 +module load openmpi/1.10.2 +module load scorep/3.1 + +# correct papi module +module unload papi +module use /short/z35/dsr900/tools/Modules +module load papi/5.1.1-patched diff --git a/bld/config.scorep.auscom.360x300 b/bld/config.scorep.auscom.360x300 new file mode 100644 index 0000000..35691db --- /dev/null +++ b/bld/config.scorep.auscom.360x300 @@ -0,0 +1,21 @@ + +setenv NTASK 24 +setenv RES 360x300 +set NXGLOB = `echo $RES | sed s/x.\*//` +set NYGLOB = `echo $RES | sed s/.\*x//` + +setenv BLCKX `expr $NXGLOB / 24` # x-dimension of blocks ( not including ) +setenv BLCKY `expr $NYGLOB / 1` # y-dimension of blocks ( ghost cells ) + +source /etc/profile.d/nf_csh_modules +module purge +module load intel-fc/17.0.1.132 +module load intel-cc/17.0.1.132 +module load netcdf/4.2.1.1 +module load openmpi/1.10.2 +module load scorep/3.1 + +# correct papi module +module unload papi +module use /short/z35/dsr900/tools/Modules +module load papi/5.1.1-patched diff --git a/drivers/auscom/CICE_InitMod.F90 b/drivers/auscom/CICE_InitMod.F90 index 0968521..52947b5 100644 --- a/drivers/auscom/CICE_InitMod.F90 +++ b/drivers/auscom/CICE_InitMod.F90 @@ -20,7 +20,8 @@ module CICE_InitMod use cpl_parameters use cpl_parameters, only : read_namelist_parameters, accessom2_config_dir use cpl_forcing_handler, only : get_time0_sstsss, get_u_star - use cpl_interface , only : prism_init, init_cpl, il_commlocal, il_commatm + use cpl_interface , only : prism_init, init_cpl, il_commlocal + use cpl_interface, only: coupler use cpl_arrays_setup, only : gwork, u_star0 use ice_gather_scatter @@ -126,7 +127,7 @@ subroutine cice_init(accessom2) num_ocean_to_ice_fields=n_o2i) ! Synchronise accessom2 configuration between all models and PEs - call accessom2%sync_config(il_commatm, -1, -1) + call accessom2%sync_config(coupler) ! Use accessom2 configuration call input_data(accessom2%get_cur_exp_date_array(), & diff --git a/drivers/auscom/cpl_interface.F90 b/drivers/auscom/cpl_interface.F90 index 99ac406..07df216 100644 --- a/drivers/auscom/cpl_interface.F90 +++ b/drivers/auscom/cpl_interface.F90 @@ -45,12 +45,15 @@ module cpl_interface !mpi stuff use ice_broadcast, only : broadcast_array + use coupler_mod, only: coupler_type => coupler + implicit none public :: prism_init, init_cpl, coupler_termination, get_time0_sstsss, & - from_atm, into_ocn, from_ocn, il_commlocal, il_commatm + from_atm, into_ocn, from_ocn, il_commlocal public :: update_halos_from_ocn, update_halos_from_atm public :: write_boundary_checksums + public :: coupler private @@ -66,13 +69,13 @@ module cpl_interface integer(kind=int_kind) :: il_nbcplproc ! Number of processes involved in the coupling integer(kind=int_kind) :: l_ilo, l_ihi, l_jlo, l_jhi !local partition - integer(kind=int_kind) :: il_commatm, my_commatm_task - integer :: sendsubarray, recvsubarray , resizedrecvsubarray integer, dimension(:), allocatable :: counts, disps real(kind=dbl_kind), dimension(:,:), allocatable :: vwork2d + type(coupler_type) :: coupler + contains !====================================================================== @@ -85,6 +88,9 @@ subroutine prism_init(accessom2_config_dir) character(len=12) :: chiceout character(len=6) :: chout + ! NOTE: This function can probably be replaced by coupler%init_begin, but + ! let's move slowly for now. + !----------------------------------- ! 'define' the model global domain: !----------------------------------- @@ -94,38 +100,16 @@ subroutine prism_init(accessom2_config_dir) ! Initialize PSMILe. !------------------- - ! Initialise MPI - mpiflag = .FALSE. - call MPI_Initialized (mpiflag, ierror) + call coupler%init_begin('cicexx', config_dir=accessom2_config_dir) - if ( .not. mpiflag ) then - call MPI_INIT(ierror) - endif - - call prism_init_comp_proto(il_comp_id, cp_modnam, & - ierror, config_dir=accessom2_config_dir) + il_commlocal = coupler%localcomm - if (ierror /= PRISM_Ok) then - call prism_abort_proto(il_comp_id, 'cice prism_init', 'STOP 1') - endif - - ! - ! PSMILe attribution of local communicator. - ! - ! Either MPI_COMM_WORLD if MPI2 is used, - ! or a local communicator created by Oasis if MPI1 is used. - ! - call prism_get_localcomm_proto(il_commlocal, ierror) - ! - if (ierror /= PRISM_Ok) then - call prism_abort_proto(il_comp_id, 'cice prism_init', 'STOP 2') - endif ! ! Inquire if model is parallel or not and open the process log file ! call MPI_Comm_Size(il_commlocal, il_nbtotproc, ierror) - call MPI_Comm_Rank(il_commlocal, my_task, ierror) + my_task = coupler%my_local_pe il_nbcplproc = il_nbtotproc !multi-process coupling @@ -144,9 +128,6 @@ subroutine prism_init(accessom2_config_dir) nx_block,ny_block,max_blocks #endif - call prism_get_intercomm(il_commatm, 'matmxx', ierror) - call mpi_comm_rank(il_commatm, my_commatm_task, ierror) - end subroutine prism_init !======================================================================= @@ -408,20 +389,21 @@ subroutine send_grid_to_atm() tag = 0 buf_int(1) = nx_global buf_int(2) = ny_global - call MPI_send(buf_int, 2, MPI_INTEGER, 0, tag, il_commatm, ierror) + call MPI_send(buf_int, 2, MPI_INTEGER, coupler%atm_root, tag, & + coupler%atm_intercomm, ierror) allocate(buf_real(nx_global*ny_global)) buf_real(:) = reshape(tlat_global(:, :), (/ size(tlat_global) /)) - call MPI_send(buf_real, nx_global*ny_global, MPI_DOUBLE, 0, tag, & - il_commatm, ierror) + call MPI_send(buf_real, nx_global*ny_global, MPI_DOUBLE, & + coupler%atm_root, tag, coupler%atm_intercomm, ierror) buf_real(:) = reshape(tlon_global(:, :), (/ size(tlon_global) /)) - call MPI_send(buf_real, nx_global*ny_global, MPI_DOUBLE, 0, tag, & - il_commatm, ierror) + call MPI_send(buf_real, nx_global*ny_global, MPI_DOUBLE, & + coupler%atm_root, tag, coupler%atm_intercomm, ierror) buf_real(:) = reshape(mask_global(:, :), (/ size(mask_global) /)) - call MPI_send(buf_real, nx_global*ny_global, MPI_DOUBLE, 0, tag, & - il_commatm, ierror) + call MPI_send(buf_real, nx_global*ny_global, MPI_DOUBLE, & + coupler%atm_root, tag, coupler%atm_intercomm, ierror) deallocate(buf_real) deallocate(tlat_global) @@ -482,10 +464,11 @@ subroutine from_atm(isteps) endif ! Allow atm to progress. It is waiting on a receive. - if (my_commatm_task == 0) then + if (my_task == 0) then request = MPI_REQUEST_NULL tag = 0 - call MPI_Isend(buf, 1, MPI_INTEGER, 0, tag, il_commatm, request, ierror) + call MPI_Isend(buf, 1, MPI_INTEGER, coupler%atm_root, tag, & + coupler%atm_intercomm, request, ierror) endif call ice_timer_stop(timer_from_atm)