From a3cef876b2bc96107c97eeebf9053412186d1e91 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Fri, 6 Dec 2024 07:18:33 -0500 Subject: [PATCH] +Add optional unscale argument to reproducing_sum Added the new optional unscale argument to reproducing_sum() and reproducing_sum_EFP(). When this is used, the resulting real sum is restored to the same scaling as the input arrays, but when there is an EFP_type returned it is the same as it would have been had the input array been rescaled before it was passed in. All answers are bitwise identical, but thre is a new optional argument to a two publicly visible interfaces. --- src/framework/MOM_coms.F90 | 169 ++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 59 deletions(-) diff --git a/src/framework/MOM_coms.F90 b/src/framework/MOM_coms.F90 index e4f5235da8..8471dc0b29 100644 --- a/src/framework/MOM_coms.F90 +++ b/src/framework/MOM_coms.F90 @@ -51,7 +51,8 @@ module MOM_coms logical :: NaN_error = .false. !< This becomes true if a NaN is encountered. logical :: debug = .false. !< Making this true enables debugging output. -!> Find an accurate and order-invariant sum of a distributed 2d or 3d field +!> Find an accurate and order-invariant sum of a distributed 2d or 3d field, in some cases after +!! undoing the scaling of the input array and restoring that scaling in the returned value interface reproducing_sum module procedure reproducing_sum_2d, reproducing_sum_3d end interface reproducing_sum @@ -91,8 +92,9 @@ module MOM_coms !! the result returned as an extended fixed point type that can be converted back to a real number !! using EFP_to_real. This technique is described in Hallberg & Adcroft, 2014, Parallel Computing, !! doi:10.1016/j.parco.2014.04.007. -function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, only_on_PE) result(EFP_sum) - real, dimension(:,:), intent(in) :: array !< The array to be summed in arbitrary units [a] +function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, only_on_PE, unscale) result(EFP_sum) + real, dimension(:,:), intent(in) :: array !< The array to be summed in arbitrary units [a], or in + !! arbitrary scaled units [A ~> a] if unscale is present integer, optional, intent(in) :: isr !< The starting i-index of the sum, noting !! that the array indices starts at 1 integer, optional, intent(in) :: ier !< The ending i-index of the sum, noting @@ -102,15 +104,17 @@ function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, integer, optional, intent(in) :: jer !< The ending j-index of the sum, noting !! that the array indices starts at 1 logical, optional, intent(in) :: overflow_check !< If present and false, disable - !! checking for overflows in incremental results. - !! This can speed up calculations if the number - !! of values being summed is small enough - integer, optional, intent(out) :: err !< If present, return an error code instead of - !! triggering any fatal errors directly from - !! this routine. + !! checking for overflows in incremental results. + !! This can speed up calculations if the number + !! of values being summed is small enough + integer, optional, intent(out) :: err !< If present, return an error code instead of + !! triggering any fatal errors directly from + !! this routine. logical, optional, intent(in) :: only_on_PE !< If present and true, do not do the sum - !! across processors, only reporting the local sum - type(EFP_type) :: EFP_sum !< The result in extended fixed point format + !! across processors, only reporting the local sum + real, optional, intent(in) :: unscale !< A factor that is used to undo scaling of array before it is + !! summed, often to compensate for the scaling in [a A-1 ~> 1] + type(EFP_type) :: EFP_sum !< The result in extended fixed point format ! This subroutine uses a conversion to an integer representation ! of real numbers to give order-invariant sums that will reproduce @@ -120,7 +124,8 @@ function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, integer(kind=int64) :: ival, prec_error real :: rs ! The remaining value to add, in arbitrary units [a] real :: max_mag_term ! A running maximum magnitude of the values in arbitrary units [a] - logical :: over_check, do_sum_across_PEs + real :: descale ! A local copy of unscale if it is present [a A-1 ~> 1] or 1 + logical :: over_check, do_sum_across_PEs, do_unscale character(len=256) :: mesg integer :: i, j, n, is, ie, js, je, sgn @@ -130,7 +135,7 @@ function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, prec_error = ((2_int64)**62 + ((2_int64)**62 - 1)) / num_PEs() - is = 1 ; ie = size(array,1) ; js = 1 ; je = size(array,2 ) + is = 1 ; ie = size(array,1) ; js = 1 ; je = size(array,2) if (present(isr)) then if (isr < is) call MOM_error(FATAL, "Value of isr too small in reproducing_EFP_sum_2d.") is = isr @@ -150,32 +155,40 @@ function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, over_check = .true. ; if (present(overflow_check)) over_check = overflow_check do_sum_across_PEs = .true. ; if (present(only_on_PE)) do_sum_across_PEs = .not.only_on_PE + do_unscale = .false. ; if (present(unscale)) do_unscale = (unscale /= 1.0) + descale = 1.0 ; if (do_unscale) descale = unscale overflow_error = .false. ; NaN_error = .false. ; max_mag_term = 0.0 ints_sum(:) = 0 if (over_check) then if ((je+1-js)*(ie+1-is) < max_count_prec) then - do j=js,je ; do i=is,ie - call increment_ints_faster(ints_sum, array(i,j), max_mag_term) - enddo ; enddo + ! This is the most common case, so handle the do_unscale case separately for efficiency. + if (do_unscale) then + do j=js,je ; do i=is,ie + call increment_ints_faster(ints_sum, unscale*array(i,j), max_mag_term) + enddo ; enddo + else + do j=js,je ; do i=is,ie + call increment_ints_faster(ints_sum, array(i,j), max_mag_term) + enddo ; enddo + endif call carry_overflow(ints_sum, prec_error) elseif ((ie+1-is) < max_count_prec) then do j=js,je do i=is,ie - call increment_ints_faster(ints_sum, array(i,j), max_mag_term) + call increment_ints_faster(ints_sum, descale*array(i,j), max_mag_term) enddo call carry_overflow(ints_sum, prec_error) enddo else do j=js,je ; do i=is,ie - call increment_ints(ints_sum, real_to_ints(array(i,j), prec_error), & - prec_error) + call increment_ints(ints_sum, real_to_ints(descale*array(i,j), prec_error), prec_error) enddo ; enddo endif else do j=js,je ; do i=is,ie sgn = 1 ; if (array(i,j)<0.0) sgn = -1 - rs = abs(array(i,j)) + rs = abs(descale*array(i,j)) do n=1,ni ival = int(rs*I_pr(n), kind=int64) rs = rs - ival*pr(n) @@ -213,13 +226,15 @@ function reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, end function reproducing_EFP_sum_2d + !> This subroutine uses a conversion to an integer representation of real numbers to give an !! order-invariant sum of distributed 2-D arrays that reproduces across domain decomposition. !! This technique is described in Hallberg & Adcroft, 2014, Parallel Computing, !! doi:10.1016/j.parco.2014.04.007. function reproducing_sum_2d(array, isr, ier, jsr, jer, EFP_sum, reproducing, & - overflow_check, err, only_on_PE) result(sum) - real, dimension(:,:), intent(in) :: array !< The array to be summed in arbitrary units [a] + overflow_check, err, only_on_PE, unscale) result(sum) + real, dimension(:,:), intent(in) :: array !< The array to be summed in arbitrary units [a], or in + !! arbitrary scaled units [A ~> a] if unscale is present integer, optional, intent(in) :: isr !< The starting i-index of the sum, noting !! that the array indices starts at 1 integer, optional, intent(in) :: ier !< The ending i-index of the sum, noting @@ -228,7 +243,7 @@ function reproducing_sum_2d(array, isr, ier, jsr, jer, EFP_sum, reproducing, & !! that the array indices starts at 1 integer, optional, intent(in) :: jer !< The ending j-index of the sum, noting !! that the array indices starts at 1 - type(EFP_type), optional, intent(out) :: EFP_sum !< The result in extended fixed point format + type(EFP_type), optional, intent(out) :: EFP_sum !< The result in extended fixed point format logical, optional, intent(in) :: reproducing !< If present and false, do the sum !! using the naive non-reproducing approach logical, optional, intent(in) :: overflow_check !< If present and false, disable @@ -240,16 +255,18 @@ function reproducing_sum_2d(array, isr, ier, jsr, jer, EFP_sum, reproducing, & !! this routine. logical, optional, intent(in) :: only_on_PE !< If present and true, do not do the sum !! across processors, only reporting the local sum - real :: sum !< The sum of the values in array in arbitrary units [a] - - ! This subroutine uses a conversion to an integer representation - ! of real numbers to give order-invariant sums that will reproduce - ! across PE count. This idea comes from R. Hallberg and A. Adcroft. + real, optional, intent(in) :: unscale !< A factor that is used to undo scaling of array before it is + !! summed, often to compensate for the scaling in [a A-1 ~> 1] + real :: sum !< The sum of the values in array in the same + !! arbitrary units as array [a] or [A ~> a] + ! Local variables integer(kind=int64), dimension(ni) :: ints_sum integer(kind=int64) :: prec_error - real :: rsum(1) ! The running sum, in arbitrary units [a] - logical :: repro, do_sum_across_PEs + real :: rsum(1) ! The running sum, in arbitrary units [a] + real :: descale ! A local copy of unscale if it is present [a A-1 ~> 1] or 1 + real :: I_unscale ! The reciprocal of unscale [A a-1 ~> 1] + logical :: repro, do_sum_across_PEs, do_unscale character(len=256) :: mesg type(EFP_type) :: EFP_val ! An extended fixed point version of the sum integer :: i, j, is, ie, js, je @@ -260,7 +277,7 @@ function reproducing_sum_2d(array, isr, ier, jsr, jer, EFP_sum, reproducing, & prec_error = ((2_int64)**62 + ((2_int64)**62 - 1)) / num_PEs() - is = 1 ; ie = size(array,1) ; js = 1 ; je = size(array,2 ) + is = 1 ; ie = size(array,1) ; js = 1 ; je = size(array,2) if (present(isr)) then if (isr < is) call MOM_error(FATAL, "Value of isr too small in reproducing_sum_2d.") is = isr @@ -280,19 +297,25 @@ function reproducing_sum_2d(array, isr, ier, jsr, jer, EFP_sum, reproducing, & repro = .true. ; if (present(reproducing)) repro = reproducing do_sum_across_PEs = .true. ; if (present(only_on_PE)) do_sum_across_PEs = .not.only_on_PE + do_unscale = .false. ; if (present(unscale)) do_unscale = (unscale /= 1.0) + descale = 1.0 ; I_unscale = 1.0 + if (do_unscale) then + descale = unscale + if (abs(unscale) > 0.0) I_unscale = 1.0 / unscale + endif if (repro) then - EFP_val = reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, only_on_PE) - sum = ints_to_real(EFP_val%v) + EFP_val = reproducing_EFP_sum_2d(array, isr, ier, jsr, jer, overflow_check, err, only_on_PE, unscale) + sum = ints_to_real(EFP_val%v) * I_unscale if (present(EFP_sum)) EFP_sum = EFP_val if (debug) ints_sum(:) = EFP_sum%v(:) else rsum(1) = 0.0 do j=js,je ; do i=is,ie - rsum(1) = rsum(1) + array(i,j) + rsum(1) = rsum(1) + descale*array(i,j) enddo ; enddo if (do_sum_across_PEs) call sum_across_PEs(rsum,1) - sum = rsum(1) + sum = rsum(1) * I_unscale if (present(err)) then ; err = 0 ; endif @@ -312,7 +335,7 @@ function reproducing_sum_2d(array, isr, ier, jsr, jer, EFP_sum, reproducing, & endif if (debug) then - write(mesg,'("2d RS: ", ES24.16, 6 Z17.16)') sum, ints_sum(1:ni) + write(mesg,'("2d RS: ", ES24.16, 6 Z17.16)') sum*descale, ints_sum(1:ni) call MOM_mesg(mesg, 3) endif @@ -322,9 +345,10 @@ end function reproducing_sum_2d !! order-invariant sum of distributed 3-D arrays that reproduces across domain decomposition. !! This technique is described in Hallberg & Adcroft, 2014, Parallel Computing, !! doi:10.1016/j.parco.2014.04.007. -function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_sums, err, only_on_PE) & +function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_sums, err, only_on_PE, unscale) & result(sum) - real, dimension(:,:,:), intent(in) :: array !< The array to be summed in arbitrary units [a] + real, dimension(:,:,:), intent(in) :: array !< The array to be summed in arbitrary units [a], or in + !! arbitrary scaled units [A ~> a] if unscale is present integer, optional, intent(in) :: isr !< The starting i-index of the sum, noting !! that the array indices starts at 1 integer, optional, intent(in) :: ier !< The ending i-index of the sum, noting @@ -333,28 +357,31 @@ function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_su !! that the array indices starts at 1 integer, optional, intent(in) :: jer !< The ending j-index of the sum, noting !! that the array indices starts at 1 - real, dimension(:), optional, intent(out) :: sums !< The sums by vertical layer in abitrary units [a] + real, dimension(:), optional, intent(out) :: sums !< The sums by vertical layer in the same + !! abitrary units as array [a] or [A ~> a] type(EFP_type), optional, intent(out) :: EFP_sum !< The result in extended fixed point format type(EFP_type), dimension(:), & optional, intent(out) :: EFP_lay_sums !< The sums by vertical layer in EFP format - integer, optional, intent(out) :: err !< If present, return an error code instead of - !! triggering any fatal errors directly from - !! this routine. + integer, optional, intent(out) :: err !< If present, return an error code instead of + !! triggering any fatal errors directly from + !! this routine. logical, optional, intent(in) :: only_on_PE !< If present and true, do not do the sum - !! across processors, only reporting the local sum - real :: sum !< The sum of the values in array in arbitrary units [a] - - ! This subroutine uses a conversion to an integer representation - ! of real numbers to give order-invariant sums that will reproduce - ! across PE count. This idea comes from R. Hallberg and A. Adcroft. + !! across processors, only reporting the local sum + real, optional, intent(in) :: unscale !< A factor that is used to undo scaling of array before it is + !! summed, often to compensate for the scaling in [a A-1 ~> 1] + real :: sum !< The sum of the values in array in the same + !! arbitrary units as array [a] or [A ~> a] + ! Local variables real :: val ! The real number that is extracted in arbitrary units [a] real :: max_mag_term ! A running maximum magnitude of the val's in arbitrary units [a] + real :: descale ! A local copy of unscale if it is present [a A-1 ~> 1] or 1 + real :: I_unscale ! The Adcroft reciprocal of unscale [A a-1 ~> 1] integer(kind=int64), dimension(ni) :: ints_sum integer(kind=int64), dimension(ni,size(array,3)) :: ints_sums integer(kind=int64) :: prec_error character(len=256) :: mesg - logical :: do_sum_across_PEs + logical :: do_sum_across_PEs, do_unscale integer :: i, j, k, is, ie, js, je, ke, isz, jsz, n if (num_PEs() > max_count_prec) call MOM_error(FATAL, & @@ -384,6 +411,7 @@ function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_su jsz = je+1-js; isz = ie+1-is do_sum_across_PEs = .true. ; if (present(only_on_PE)) do_sum_across_PEs = .not.only_on_PE + do_unscale = .false. ; if (present(unscale)) do_unscale = (unscale /= 1.0) if (present(sums) .or. present(EFP_lay_sums)) then if (present(sums)) then ; if (size(sums) < ke) then @@ -396,22 +424,28 @@ function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_su overflow_error = .false. ; NaN_error = .false. ; max_mag_term = 0.0 if (jsz*isz < max_count_prec) then do k=1,ke - do j=js,je ; do i=is,ie - call increment_ints_faster(ints_sums(:,k), array(i,j,k), max_mag_term) - enddo ; enddo + if (do_unscale) then + do j=js,je ; do i=is,ie + call increment_ints_faster(ints_sums(:,k), unscale*array(i,j,k), max_mag_term) + enddo ; enddo + else + do j=js,je ; do i=is,ie + call increment_ints_faster(ints_sums(:,k), array(i,j,k), max_mag_term) + enddo ; enddo + endif call carry_overflow(ints_sums(:,k), prec_error) enddo elseif (isz < max_count_prec) then do k=1,ke ; do j=js,je do i=is,ie - call increment_ints_faster(ints_sums(:,k), array(i,j,k), max_mag_term) + call increment_ints_faster(ints_sums(:,k), descale*array(i,j,k), max_mag_term) enddo call carry_overflow(ints_sums(:,k), prec_error) enddo ; enddo else do k=1,ke ; do j=js,je ; do i=is,ie call increment_ints(ints_sums(:,k), & - real_to_ints(array(i,j,k), prec_error), prec_error) + real_to_ints(descale*array(i,j,k), prec_error), prec_error) enddo ; enddo ; enddo endif if (present(err)) then @@ -458,21 +492,27 @@ function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_su overflow_error = .false. ; NaN_error = .false. ; max_mag_term = 0.0 if (jsz*isz < max_count_prec) then do k=1,ke - do j=js,je ; do i=is,ie - call increment_ints_faster(ints_sum, array(i,j,k), max_mag_term) - enddo ; enddo + if (do_unscale) then + do j=js,je ; do i=is,ie + call increment_ints_faster(ints_sum, unscale*array(i,j,k), max_mag_term) + enddo ; enddo + else + do j=js,je ; do i=is,ie + call increment_ints_faster(ints_sum, array(i,j,k), max_mag_term) + enddo ; enddo + endif call carry_overflow(ints_sum, prec_error) enddo elseif (isz < max_count_prec) then do k=1,ke ; do j=js,je do i=is,ie - call increment_ints_faster(ints_sum, array(i,j,k), max_mag_term) + call increment_ints_faster(ints_sum, descale*array(i,j,k), max_mag_term) enddo call carry_overflow(ints_sum, prec_error) enddo ; enddo else do k=1,ke ; do j=js,je ; do i=is,ie - call increment_ints(ints_sum, real_to_ints(array(i,j,k), prec_error), & + call increment_ints(ints_sum, real_to_ints(descale*array(i,j,k), prec_error), & prec_error) enddo ; enddo ; enddo endif @@ -504,6 +544,15 @@ function reproducing_sum_3d(array, isr, ier, jsr, jer, sums, EFP_sum, EFP_lay_su endif endif + if (do_unscale) then + ! Revise the sum to restore the scaling of input array before it is returned + I_unscale = 0.0 ; if (abs(unscale) > 0.0) I_unscale = 1.0 / unscale + sum = sum * I_unscale + if (present(sums)) then + do k=1,ke ; sums(k) = sums(k) * I_unscale ; enddo + endif + endif + end function reproducing_sum_3d !> Convert a real number into the array of integers constitute its extended-fixed-point representation @@ -516,9 +565,11 @@ function real_to_ints(r, prec_error, overflow) result(ints) logical, optional, intent(inout) :: overflow !< Returns true if the conversion is being !! done on a value that is too large to be represented integer(kind=int64), dimension(ni) :: ints + ! This subroutine converts a real number to an equivalent representation ! using several long integers. + ! Local variables real :: rs ! The remaining value to add, in arbitrary units [a] character(len=80) :: mesg integer(kind=int64) :: ival, prec_err