From b2665bf25e53def77e0709c14b8cd1dda8465091 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Tue, 6 Feb 2024 16:32:58 -0500 Subject: [PATCH 1/5] +Fix myStats global indexing segmentation faults Revised the interfaces to the myStats routine in the horizontal_regridding module to avoid segmentation faults due to inconsistent horizontal indices and array extents in global indexing mode. Rather than passing in absolute array extents to work on, an ocean grid type is now passed as an argument to myStats, with the new optional full_halo argument used to capture the case where the tracer statistics are being taken over the full data domain. The most frequently encountered problems occurred when the hard-coded debug variable in the horiz_interp_and_extrap_tracer routines are changed from false to true. When global indexing is not used, this revised work exactly as before, but when it is used with global indexing, it avoids segmentation faults that were preventing the model from running in some cases with all debugging enabled. --- src/framework/MOM_horizontal_regridding.F90 | 44 +++++++++++-------- .../MOM_tracer_initialization_from_Z.F90 | 2 +- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/framework/MOM_horizontal_regridding.F90 b/src/framework/MOM_horizontal_regridding.F90 index 34d0b73cb9..205dd6d7be 100644 --- a/src/framework/MOM_horizontal_regridding.F90 +++ b/src/framework/MOM_horizontal_regridding.F90 @@ -44,27 +44,32 @@ module MOM_horizontal_regridding contains !> Write to the terminal some basic statistics about the k-th level of an array -subroutine myStats(array, missing, is, ie, js, je, k, mesg, scale) - real, dimension(:,:), intent(in) :: array !< input array in arbitrary units [A ~> a] - real, intent(in) :: missing !< missing value in arbitrary units [A ~> a] - integer, intent(in) :: is !< Start index in i - integer, intent(in) :: ie !< End index in i - integer, intent(in) :: js !< Start index in j - integer, intent(in) :: je !< End index in j - integer, intent(in) :: k !< Level to calculate statistics for - character(len=*), intent(in) :: mesg !< Label to use in message - real, optional, intent(in) :: scale !< A scaling factor for output [a A-1 ~> 1] +subroutine myStats(array, missing, G, k, mesg, scale, full_halo) + type(ocean_grid_type), intent(in) :: G !< Ocean grid type + real, dimension(SZI_(G),SZJ_(G)), & + intent(in) :: array !< input array in arbitrary units [A ~> a] + real, intent(in) :: missing !< missing value in arbitrary units [A ~> a] + integer, intent(in) :: k !< Level to calculate statistics for + character(len=*), intent(in) :: mesg !< Label to use in message + real, optional, intent(in) :: scale !< A scaling factor for output [a A-1 ~> 1] + logical, optional, intent(in) :: full_halo !< If present and true, test values on the whole + !! array rather than just the computational domain. ! Local variables real :: minA ! Minimum value in the array in the arbitrary units of the input array [A ~> a] real :: maxA ! Maximum value in the array in the arbitrary units of the input array [A ~> a] real :: scl ! A factor for undoing any scaling of the array statistics for output [a A-1 ~> 1] - integer :: i,j + integer :: i, j, is, ie, js, je logical :: found character(len=120) :: lMesg scl = 1.0 ; if (present(scale)) scl = scale minA = 9.E24 / scl ; maxA = -9.E24 / scl ; found = .false. + is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec + if (present(full_halo)) then ; if (full_halo) then + is = G%isd ; ie = G%ied ; js = G%jsd ; je = G%jed + endif ; endif + do j=js,je ; do i=is,ie if (array(i,j) /= array(i,j)) stop 'Nan!' if (abs(array(i,j)-missing) > 1.e-6*abs(missing)) then @@ -309,7 +314,7 @@ subroutine horiz_interp_and_extrap_tracer_record(filename, varnam, recnum, G, tr real, dimension(:,:), allocatable :: tr_in !< A 2-d array for holding input data on its !! native horizontal grid, with units that change !! as the input data is interpreted [a] then [A ~> a] - real, dimension(:,:,:), allocatable :: tr_in_full !< A 3-d array for holding input data on the + real, dimension(:,:,:), allocatable :: tr_in_full !< A 3-d array for holding input data on the !! model horizontal grid, with units that change !! as the input data is interpreted [a] then [A ~> a] real, dimension(:,:), allocatable :: tr_inp !< Native horizontal grid data extended to the poles @@ -332,7 +337,7 @@ subroutine horiz_interp_and_extrap_tracer_record(filename, varnam, recnum, G, tr real :: npole ! The number of points contributing to the pole value [nondim] real :: missing_val_in ! The missing value in the input field [a] real :: roundoff ! The magnitude of roundoff, usually ~2e-16 [nondim] - real :: add_offset, scale_factor ! File-specific conversion factors. + real :: add_offset, scale_factor ! File-specific conversion factors [a] or [nondim] integer :: ans_date ! The vintage of the expressions and order of arithmetic to use logical :: found_attr logical :: add_np @@ -545,7 +550,7 @@ subroutine horiz_interp_and_extrap_tracer_record(filename, varnam, recnum, G, tr endif if (debug) then - call myStats(tr_inp, missing_value, 1, id, 1, jd, k, 'Tracer from file', scale=I_scale) + call myStats(tr_inp, missing_value, G, k, 'Tracer from file', scale=I_scale, full_halo=.true.) endif call run_horiz_interp(Interp, tr_inp, tr_out(is:ie,js:je), missing_value=missing_value) @@ -568,11 +573,12 @@ subroutine horiz_interp_and_extrap_tracer_record(filename, varnam, recnum, G, tr (mask_out(i,j) < 1.0)) & fill(i,j) = 1.0 enddo ; enddo + call pass_var(fill, G%Domain) call pass_var(good, G%Domain) if (debug) then - call myStats(tr_out, missing_value, is, ie, js, je, k, 'variable from horiz_interp()', scale=I_scale) + call myStats(tr_out, missing_value, G, k, 'variable from horiz_interp()', scale=I_scale) endif ! Horizontally homogenize data to produce perfectly "flat" initial conditions @@ -589,7 +595,7 @@ subroutine horiz_interp_and_extrap_tracer_record(filename, varnam, recnum, G, tr call fill_miss_2d(tr_outf, good2, fill2, tr_prev, G, dtr_iter_stop, answer_date=ans_date) if (debug) then - call myStats(tr_outf, missing_value, is, ie, js, je, k, 'field from fill_miss_2d()', scale=I_scale) + call myStats(tr_outf, missing_value, G, k, 'field from fill_miss_2d()', scale=I_scale) endif tr_z(:,:,k) = tr_outf(:,:) * G%mask2dT(:,:) @@ -850,7 +856,7 @@ subroutine horiz_interp_and_extrap_tracer_fms_id(field, Time, G, tr_z, mask_z, & endif if (debug) then - call myStats(tr_inp, missing_value, 1, id, 1, jd, k, 'Tracer from file', scale=I_scale) + call myStats(tr_inp, missing_value, G, k, 'Tracer from file', scale=I_scale, full_halo=.true.) endif tr_out(:,:) = 0.0 @@ -878,7 +884,7 @@ subroutine horiz_interp_and_extrap_tracer_fms_id(field, Time, G, tr_z, mask_z, & call pass_var(good, G%Domain) if (debug) then - call myStats(tr_out, missing_value, is, ie, js, je, k, 'variable from horiz_interp()', scale=I_scale) + call myStats(tr_out, missing_value, G, k, 'variable from horiz_interp()', scale=I_scale) endif ! Horizontally homogenize data to produce perfectly "flat" initial conditions @@ -897,7 +903,7 @@ subroutine horiz_interp_and_extrap_tracer_fms_id(field, Time, G, tr_z, mask_z, & ! if (debug) then ! call hchksum(tr_outf, 'field from fill_miss_2d ', G%HI, scale=I_scale) -! call myStats(tr_outf, missing_value, is, ie, js, je, k, 'field from fill_miss_2d()', scale=I_scale) +! call myStats(tr_outf, missing_value, G, k, 'field from fill_miss_2d()', scale=I_scale) ! endif tr_z(:,:,k) = tr_outf(:,:) * G%mask2dT(:,:) diff --git a/src/initialization/MOM_tracer_initialization_from_Z.F90 b/src/initialization/MOM_tracer_initialization_from_Z.F90 index 5a172b5d97..d28a925c03 100644 --- a/src/initialization/MOM_tracer_initialization_from_Z.F90 +++ b/src/initialization/MOM_tracer_initialization_from_Z.F90 @@ -217,7 +217,7 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_ deallocate( h1 ) do k=1,nz - call myStats(tr(:,:,k), missing_value, is, ie, js, je, k, 'Tracer from ALE()') + call myStats(tr(:,:,k), missing_value, G, k, 'Tracer from ALE()') enddo call cpu_clock_end(id_clock_ALE) endif ! useALEremapping From 0426531baa141a796c83843dc6fb7a113beab286 Mon Sep 17 00:00:00 2001 From: Kate Hedstrom Date: Tue, 14 Nov 2023 19:02:18 -0500 Subject: [PATCH 2/5] Compute dz in the halo for OBC segments - It was blowing up with "forrtl: error (65): floating invalid" when accessing dz in the halo at the boundary, but just sometimes. My default layout is trouble while my testing layout of 48 cores is not. --- src/core/MOM_open_boundary.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/MOM_open_boundary.F90 b/src/core/MOM_open_boundary.F90 index 94320a30c7..8e53980243 100644 --- a/src/core/MOM_open_boundary.F90 +++ b/src/core/MOM_open_boundary.F90 @@ -3883,7 +3883,7 @@ subroutine update_OBC_segment_data(G, GV, US, OBC, tv, h, Time) endif if (OBC%number_of_segments >= 1) then - call thickness_to_dz(h, tv, dz, G, GV, US) + call thickness_to_dz(h, tv, dz, G, GV, US, halo_size=G%Domain%nihalo) call pass_var(dz, G%Domain) endif From 497c50dfe0d6203cde904b1fe916ade94f257160 Mon Sep 17 00:00:00 2001 From: Kate Hedstrom Date: Wed, 15 Nov 2023 15:33:29 -0900 Subject: [PATCH 3/5] More updates for reproducible layouts -even for PPM advection and OBCs. --- src/core/MOM.F90 | 5 ++++- src/tracer/MOM_tracer_advect.F90 | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/core/MOM.F90 b/src/core/MOM.F90 index 9d64b9bf17..7fe321998e 100644 --- a/src/core/MOM.F90 +++ b/src/core/MOM.F90 @@ -1436,8 +1436,11 @@ subroutine step_MOM_tracer_dyn(CS, G, GV, US, h, Time_local) CS%tracer_diff_CSp, CS%tracer_Reg, CS%tv) if (CS%debug) call MOM_tracer_chksum("Post-diffuse ", CS%tracer_Reg, G) if (showCallTree) call callTree_waypoint("finished tracer advection/diffusion (step_MOM)") - call update_segment_tracer_reservoirs(G, GV, CS%uhtr, CS%vhtr, h, CS%OBC, & + if (associated(CS%OBC) .and. allocated(CS%uhtr) .and. allocated(CS%vhtr)) then + call pass_vector(CS%uhtr, CS%vhtr, G%Domain) + call update_segment_tracer_reservoirs(G, GV, CS%uhtr, CS%vhtr, h, CS%OBC, & CS%t_dyn_rel_adv, CS%tracer_Reg) + endif call cpu_clock_end(id_clock_tracer) ; call cpu_clock_end(id_clock_thermo) call cpu_clock_begin(id_clock_other) ; call cpu_clock_begin(id_clock_diagnostics) diff --git a/src/tracer/MOM_tracer_advect.F90 b/src/tracer/MOM_tracer_advect.F90 index efe6397de0..ef2c3125cd 100644 --- a/src/tracer/MOM_tracer_advect.F90 +++ b/src/tracer/MOM_tracer_advect.F90 @@ -123,7 +123,8 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, GV, US, CS, Reg, x_first x_first = (MOD(G%first_direction,2) == 0) ! increase stencil size for Colella & Woodward PPM - if (CS%usePPM .and. .not. CS%useHuynh) stencil = 3 +! if (CS%usePPM .and. .not. CS%useHuynh) stencil = 3 + if (CS%usePPM) stencil = 3 ntr = Reg%ntr Idt = 1.0 / dt From 37f4fb91c98977dc86983d3d8de01efb2eb933d1 Mon Sep 17 00:00:00 2001 From: Kate Hedstrom Date: Thu, 16 Nov 2023 11:35:08 -0900 Subject: [PATCH 4/5] The dz halo is behaving today, I don't know --- src/core/MOM_open_boundary.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/MOM_open_boundary.F90 b/src/core/MOM_open_boundary.F90 index 8e53980243..94320a30c7 100644 --- a/src/core/MOM_open_boundary.F90 +++ b/src/core/MOM_open_boundary.F90 @@ -3883,7 +3883,7 @@ subroutine update_OBC_segment_data(G, GV, US, OBC, tv, h, Time) endif if (OBC%number_of_segments >= 1) then - call thickness_to_dz(h, tv, dz, G, GV, US, halo_size=G%Domain%nihalo) + call thickness_to_dz(h, tv, dz, G, GV, US) call pass_var(dz, G%Domain) endif From 88e12a5886239a713a85d9039b0cbd59eb6b390e Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Fri, 8 Mar 2024 15:03:27 -0500 Subject: [PATCH 5/5] Remove allocated test for CS%uhtr Removed allocated tests for the potentially statically allocated arrays CS%uhtr and CS%vhtr in a newly added line testing whether there are OBCs in use that would require a halo update on these arrays. Even in dynamic memory mode, these arrays are always being allocated, so these tests served no purpose, but in static memory mode they led to compile time errors. All answers are bitwise identical. --- src/core/MOM.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/MOM.F90 b/src/core/MOM.F90 index 7fe321998e..53f20bb10b 100644 --- a/src/core/MOM.F90 +++ b/src/core/MOM.F90 @@ -1436,7 +1436,7 @@ subroutine step_MOM_tracer_dyn(CS, G, GV, US, h, Time_local) CS%tracer_diff_CSp, CS%tracer_Reg, CS%tv) if (CS%debug) call MOM_tracer_chksum("Post-diffuse ", CS%tracer_Reg, G) if (showCallTree) call callTree_waypoint("finished tracer advection/diffusion (step_MOM)") - if (associated(CS%OBC) .and. allocated(CS%uhtr) .and. allocated(CS%vhtr)) then + if (associated(CS%OBC)) then call pass_vector(CS%uhtr, CS%vhtr, G%Domain) call update_segment_tracer_reservoirs(G, GV, CS%uhtr, CS%vhtr, h, CS%OBC, & CS%t_dyn_rel_adv, CS%tracer_Reg)