From 60ce8b510296b1b37a90b8581a58a711ccb99ac9 Mon Sep 17 00:00:00 2001 From: Eric Kemp Date: Wed, 18 Dec 2019 12:11:03 -0500 Subject: [PATCH 1/2] 557 post mode now uses Welford algorithm for standard deviation. The previous, straight-forward calculation of standard deviation was prone to run-off error, which could lead to negative variances and imaginary standard deviation. This new formulation uses the Welford algorithm which is more resistent to loss of precision. Note, however, that the algorithm is only used when processing multiple ensemble members; if a single deterministic run is processed, standard deviation is just set to missing. --- lvt/core/LVT_DataStreamsMod.F90 | 209 ++++++++++++++------------------ 1 file changed, 94 insertions(+), 115 deletions(-) diff --git a/lvt/core/LVT_DataStreamsMod.F90 b/lvt/core/LVT_DataStreamsMod.F90 index 4605c6a48..728b83d04 100644 --- a/lvt/core/LVT_DataStreamsMod.F90 +++ b/lvt/core/LVT_DataStreamsMod.F90 @@ -410,6 +410,10 @@ subroutine LVT_writeDataStreams character*10 :: stepType_mean character*10 :: stepType_ssdev + ! EMK...For Welford algorithm + integer :: count + real :: mean, m2, stddev, new_value + ! EMK...This is only used when LVT is run in "557 post" mode. if (trim(LVT_rc%runmode) .ne. "557 post") return @@ -1529,22 +1533,12 @@ subroutine LVT_writeDataStreams enddo ! r ! Apply the smoother - if(.not.((dataEntry%short_name.eq."Landcover").or.& - (dataEntry%short_name.eq."Landmask").or.& - (dataEntry%short_name.eq."Soiltype").or.& - (dataEntry%short_name.eq.'Elevation').or. & - (dataEntry%short_name.eq."Greenness").or.& - (dataEntry%short_name.eq."Tair_f_min").or.& - (dataEntry%short_name.eq."Tair_f_max").or.& - (dataEntry%short_name.eq."TotalPrecip").or.& - (dataEntry%short_name.eq."Wind_f").or.& - (dataEntry%short_name.eq."Tair_f").or.& - (dataEntry%short_name.eq."SWdown_f").or.& - (dataEntry%short_name.eq."LWdown_f").or.& - (dataEntry%short_name.eq."RHMin"))) then - if(LVT_rc%applyNoiseReductionFilter.eq.1) then - call applyNoiseReductionFilter(gtmp1_1d_mem) - endif + ! EMK...Removed the hardwired exceptions to + ! smoothing. The original exception list did not + ! consider forcing perturbations. It seams best + ! to just trust the setting in the lvt.config file. + if(LVT_rc%applyNoiseReductionFilter.eq.1) then + call applyNoiseReductionFilter(gtmp1_1d_mem) endif ! Now provide smoothed field to ensemble mean and @@ -1553,15 +1547,25 @@ subroutine LVT_writeDataStreams do c=1,LVT_rc%lnc if(LVT_domain%gindex(c,r).ne.-1) then gid = LVT_domain%gindex(c,r) - gtmp1_1d(c+(r-1)*LVT_rc%lnc) = & - gtmp1_1d(c+(r-1)*LVT_rc%lnc) + & - gtmp1_1d_mem(c+(r-1)*LVT_rc%lnc) - ngtmp1_1d(c+(r-1)*LVT_rc%lnc) = & - ngtmp1_1d(c+(r-1)*LVT_rc%lnc) + 1 + if (LVT_rc%nensem > 1) then - gtmp1_ss(c+(r-1)*LVT_rc%lnc) = & - gtmp1_ss(c+(r-1)*LVT_rc%lnc) + & - (gtmp1_1d_mem(c+(r-1)*LVT_rc%lnc))**2 + ! Use Welford algorithm to calculate + ! mean and standard deviation + count = ngtmp1_1d(c+(r-1)*LVT_rc%lnc) + mean = gtmp1_1d(c+(r-1)*LVT_rc%lnc) + m2 = gtmp1_ss(c+(r-1)*LVT_rc%lnc) + new_value = gtmp1_1d_mem(c+(r-1)*LVT_rc%lnc) + call welford_update(count, mean, m2, & + new_value) + ngtmp1_1d(c+(r-1)*LVT_rc%lnc) = count + gtmp1_1d(c+(r-1)*LVT_rc%lnc) = mean + gtmp1_ss(c+(r-1)*LVT_rc%lnc) = m2 + else + gtmp1_1d(c+(r-1)*LVT_rc%lnc) = & + gtmp1_1d(c+(r-1)*LVT_rc%lnc) + & + gtmp1_1d_mem(c+(r-1)*LVT_rc%lnc) + ngtmp1_1d(c+(r-1)*LVT_rc%lnc) = & + ngtmp1_1d(c+(r-1)*LVT_rc%lnc) + 1 end if endif enddo ! c @@ -1572,103 +1576,34 @@ subroutine LVT_writeDataStreams ! Finalize the ensemble mean and spread do r=1,LVT_rc%lnr do c=1,LVT_rc%lnc - if(ngtmp1_1d(c+(r-1)*LVT_rc%lnc).gt.0) then - gtmp1_1d(c+(r-1)*LVT_rc%lnc) = & - gtmp1_1d(c+(r-1)*LVT_rc%lnc)/& - ngtmp1_1d(c+(r-1)*LVT_rc%lnc) - - if (LVT_rc%nensem > 1) then - variance = gtmp1_ss(c+(r-1)*LVT_rc%lnc)/& - ngtmp1_1d(c+(r-1)*LVT_rc%lnc) - & - gtmp1_1d(c+(r-1)*LVT_rc%lnc)**2 - if(variance.gt.0) then - gtmp1_ss(c+(r-1)*LVT_rc%lnc) = & - sqrt(variance) - else - gtmp1_ss(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef - endif - else + if (LVT_rc%nensem > 1) then + ! Use Welford algorithm to calculate mean and + ! standard deviation + count = ngtmp1_1d(c+(r-1)*LVT_rc%lnc) + if (count < 1) then + gtmp1_1d(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef gtmp1_ss(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef + else + mean = gtmp1_1d(c+(r-1)*LVT_rc%lnc) + m2 = gtmp1_ss(c+(r-1)*LVT_rc%lnc) + call welford_finalize(count, mean, m2, stddev) + gtmp1_1d(c+(r-1)*LVT_rc%lnc) = mean + gtmp1_ss(c+(r-1)*LVT_rc%lnc) = stddev + end if + else ! No ensembles, just calculate mean + if(ngtmp1_1d(c+(r-1)*LVT_rc%lnc).gt.0) then + gtmp1_1d(c+(r-1)*LVT_rc%lnc) = & + gtmp1_1d(c+(r-1)*LVT_rc%lnc)/& + ngtmp1_1d(c+(r-1)*LVT_rc%lnc) + else + gtmp1_1d(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef end if - else - gtmp1_1d(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef gtmp1_ss(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef - endif + end if enddo ! c enddo ! r ! EMK END...k loop ends further down - -! do k=1,dataEntry%vlevels -! gtmp1_1d = 0.0 -! ngtmp1_1d = 0 -! gtmp1_ss = 0.0 - -! do m=1,LVT_rc%nensem -! do r=1,LVT_rc%lnr -! do c=1,LVT_rc%lnc -! if(LVT_domain%gindex(c,r).ne.-1) then -! gid = LVT_domain%gindex(c,r) -! gtmp1_1d(c+(r-1)*LVT_rc%lnc) = & -! gtmp1_1d(c+(r-1)*LVT_rc%lnc) + & -! dataEntry%value(gid,m,k) -! ngtmp1_1d(c+(r-1)*LVT_rc%lnc) = & -! ngtmp1_1d(c+(r-1)*LVT_rc%lnc) + 1 - -! gtmp1_ss(c+(r-1)*LVT_rc%lnc) = & -! gtmp1_ss(c+(r-1)*LVT_rc%lnc) + & -! dataEntry%value(gid,m,k)**2 - -! endif -! enddo -! enddo -! enddo - -! do r=1,LVT_rc%lnr -! do c=1,LVT_rc%lnc -! if(ngtmp1_1d(c+(r-1)*LVT_rc%lnc).gt.0) then -! gtmp1_1d(c+(r-1)*LVT_rc%lnc) = & -! gtmp1_1d(c+(r-1)*LVT_rc%lnc)/& -! ngtmp1_1d(c+(r-1)*LVT_rc%lnc) -! variance = gtmp1_ss(c+(r-1)*LVT_rc%lnc)/& -! ngtmp1_1d(c+(r-1)*LVT_rc%lnc) - & -! gtmp1_1d(c+(r-1)*LVT_rc%lnc)**2 - -! if(variance.gt.0) then -! gtmp1_ss(c+(r-1)*LVT_rc%lnc) = & -! sqrt(variance) -! else -! gtmp1_ss(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef -! endif -! else -! gtmp1_1d(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef -! gtmp1_ss(c+(r-1)*LVT_rc%lnc) = LVT_rc%udef -! endif -! enddo -! enddo -! ! open(100,file='test.bin',form='unformatted') -! ! write(100) gtmp1_1d -! if(.not.((dataEntry%short_name.eq."Landcover").or.& -! (dataEntry%short_name.eq."Landmask").or.& -! (dataEntry%short_name.eq."Soiltype").or.& -! (dataEntry%short_name.eq."Greenness").or.& -! (dataEntry%short_name.eq."Tair_f_min").or.& -! (dataEntry%short_name.eq."Tair_f_max").or.& -! (dataEntry%short_name.eq."TotalPrecip").or.& -! (dataEntry%short_name.eq."Wind_f").or.& -! (dataEntry%short_name.eq."Tair_f").or.& -! (dataEntry%short_name.eq."SWdown_f").or.& -! (dataEntry%short_name.eq."LWdown_f").or.& -! (dataEntry%short_name.eq."RHMin"))) then -! if(LVT_rc%applyNoiseReductionFilter.eq.1) then -! call applyNoiseReductionFilter(gtmp1_1d) -! endif -! endif - -! ! write(100) gtmp1_1d -! ! close(100) -! ! stop - - + if(LVT_rc%lvt_out_format.eq."grib2") then call writeSingleGrib2Var(ftn_mean,& @@ -4243,4 +4178,48 @@ subroutine construct_hycom_cice_filename(rootdir, & return end subroutine construct_hycom_cice_filename + ! EMK...Calculate mean and standard deviation using Welford algorithm. + ! See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + ! Intent is to avoid catastrophic cancellation when calculating variance, + ! which can result in negative variance and imaginary standard deviation. + ! REFERENCES: + ! Chan, T F, G H Golub, and R J LeVeque, 1983: Algorithms for computing + ! the sample variance: Analysis and recommendations. The American + ! Statistician, 37, 242-247. doi:10.1080/00031305.1983.10483115. + ! Knuth, D E, 1998: The Art of Computer Programming, volume 2: + ! Seminumerical Algorithms. Third Edition, p 232. Boston: + ! Addison-Wesley. + ! Ling, R F, 1974: Comparisons of several algorithms for computing + ! sample means and variances. Journal of the American Statistical + ! Association, 69, 859-866. doi:10.2307/2286154. + ! Welford, B P, 1962: Note on a method for calculating corrected sums of + ! squares and products. Technometrics, 4, 419-420. doi:10.2307/1266577. + + subroutine welford_update(count, mean, m2, new_value) + implicit none + integer, intent(inout) :: count + real, intent(inout) :: mean + real, intent(inout) :: m2 + real, intent(in) :: new_value + real :: delta, delta2 + count = count + 1 + delta = new_value - mean + mean = mean + (delta / real(count)) + delta2 = new_value - mean + m2 = m2 + (delta * delta2) + end subroutine welford_update + subroutine welford_finalize(count, mean, m2, stddev) + implicit none + integer, intent(in) :: count + real, intent(inout) :: mean + real, intent(in) :: m2 + real, intent(out) :: stddev + if (count < 1) then + mean = 0 + stddev = 0 + else + stddev = sqrt(m2 / real(count)) + end if + end subroutine welford_finalize + end module LVT_DataStreamsMod From 9f59863c8e793a682a9b4f947f5bdb4532be5187 Mon Sep 17 00:00:00 2001 From: Eric Kemp Date: Mon, 6 Jan 2020 17:17:15 -0500 Subject: [PATCH 2/2] Restored smoothing exception list for categorical variables. Since smoothing categorical variables (such as soil type) makes no physical sense, these variables are prohibited from being smoothed. The prohibition for other variables is *not* reintroduced at this time, pending further discussions. --- lvt/core/LVT_DataStreamsMod.F90 | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lvt/core/LVT_DataStreamsMod.F90 b/lvt/core/LVT_DataStreamsMod.F90 index 728b83d04..deeaaecf3 100644 --- a/lvt/core/LVT_DataStreamsMod.F90 +++ b/lvt/core/LVT_DataStreamsMod.F90 @@ -1537,9 +1537,16 @@ subroutine LVT_writeDataStreams ! smoothing. The original exception list did not ! consider forcing perturbations. It seams best ! to just trust the setting in the lvt.config file. - if(LVT_rc%applyNoiseReductionFilter.eq.1) then - call applyNoiseReductionFilter(gtmp1_1d_mem) - endif + ! EMK...Restored exception list for categorical + ! variables, since smoothing makes no physical sense + if (.not. ( & + (dataEntry%short_name .eq. "Landcover") .or. & + (dataEntry%short_name .eq. "Landmask") .or. & + (dataEntry%short_name .eq. "Soiltype"))) then + if(LVT_rc%applyNoiseReductionFilter.eq.1) then + call applyNoiseReductionFilter(gtmp1_1d_mem) + end if + end if ! Now provide smoothed field to ensemble mean and ! spread