From 5c3554d536f792c2cabf11fc1ab211242ee8aae6 Mon Sep 17 00:00:00 2001
From: Brewster Malevich <bmalevich@rhg.com>
Date: Wed, 3 Nov 2021 18:01:30 -0700
Subject: [PATCH 1/4] Quick fix validation reading entire zarr store for QC
 check

---
 HISTORY.rst    |  1 +
 dodola/core.py | 48 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 7557a5e..7b5a712 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -6,6 +6,7 @@ History
 0.X.X (XXXX-XX-XX)
 ------------------
 * Add pre-training slicing options to train-qdm and train-aiqpd. (PR #123, @brews)
+* Quick fix validation reading entire zarr store for check. (@brews)
 
 
 0.7.0 (2021-11-02)
diff --git a/dodola/core.py b/dodola/core.py
index 7aecedd..562943c 100644
--- a/dodola/core.py
+++ b/dodola/core.py
@@ -4,8 +4,9 @@
 """
 
 
-import numpy as np
 import logging
+import dask
+import numpy as np
 from skdownscale.spatial_models import SpatialDisaggregator
 import xarray as xr
 from xclim import sdba, set_options
@@ -509,21 +510,44 @@ def validate_dataset(ds, var, data_type, time_period="future"):
     time_period : {"historical", "future"}
         Time period of data that will be validated.
     """
+    # This is pretty rough but works to communicate the idea.
+    # Should have this raise something like ValidationError rather than
+    # AssertionErrors.
 
-    # validation for all variables
+    # These only read in Zarr Store metadata -- not memory intensive.
     _test_variable_names(ds, var)
-    _test_for_nans(ds, var)
     _test_timesteps(ds, data_type, time_period)
 
-    # variable specific validation
-    if var == "tasmin" or var == "tasmax":
-        _test_temp_range(ds, var)
-    if var == "dtr":
-        _test_dtr_range(ds, var)
-    if var == "dtr" or var == "pr":
-        _test_negative_values(ds, var)
-    if var == "pr":
-        _test_maximum_precip(ds, var)
+    # Other test are done on annual selections with dask.delayed to
+    # avoid large memory errors. xr.map_blocks had trouble with this.
+    @dask.delayed
+    def clear_memory_intensive_tests(ds, v, t):
+        d = ds.sel(time=str(t))
+
+        _test_for_nans(d, v)
+
+        if v == "tasmin" or v == "tasmax":
+            _test_temp_range(d, v)
+        if v == "dtr":
+            _test_dtr_range(d, v)
+        if v == "dtr" or v == "pr":
+            _test_negative_values(d, v)
+        if v == "pr":
+            _test_maximum_precip(d, v)
+        else:
+            raise ValueError(f"Argument {v=} not recognized")
+
+        # Assumes error thrown if had problem before this.
+        return True
+
+    tasks = []
+    for t in np.unique(ds["time"].dt.year.data):
+        logger.debug(f"Validating for year {t}")
+        test_results = clear_memory_intensive_tests(ds, var, t)
+        tasks.append(test_results)
+    tasks = dask.compute(*tasks)
+    assert all(tasks)  # Likely don't need this
+    return True
 
 
 def _test_for_nans(ds, var):

From e2451581cca0b19e49a5153dce8ffcf11f6acad5 Mon Sep 17 00:00:00 2001
From: Brewster Malevich <bmalevich@rhg.com>
Date: Wed, 3 Nov 2021 18:03:11 -0700
Subject: [PATCH 2/4] Add PR number to HISTORY

---
 HISTORY.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 7b5a712..31c7ed1 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -6,7 +6,7 @@ History
 0.X.X (XXXX-XX-XX)
 ------------------
 * Add pre-training slicing options to train-qdm and train-aiqpd. (PR #123, @brews)
-* Quick fix validation reading entire zarr store for check. (@brews)
+* Quick fix validation reading entire zarr store for check. (PR #124, @brews)
 
 
 0.7.0 (2021-11-02)

From 6658bf0ca1699974f7f31849b0bcc54f27538256 Mon Sep 17 00:00:00 2001
From: Brewster Malevich <bmalevich@rhg.com>
Date: Wed, 3 Nov 2021 18:23:01 -0700
Subject: [PATCH 3/4] Clean up sloppy variable check, make per-variable check
 cleaner

---
 dodola/core.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/dodola/core.py b/dodola/core.py
index 562943c..e549e15 100644
--- a/dodola/core.py
+++ b/dodola/core.py
@@ -511,8 +511,8 @@ def validate_dataset(ds, var, data_type, time_period="future"):
         Time period of data that will be validated.
     """
     # This is pretty rough but works to communicate the idea.
-    # Should have this raise something like ValidationError rather than
-    # AssertionErrors.
+    # Consider having failed tests raise something like ValidationError rather
+    # than AssertionErrors.
 
     # These only read in Zarr Store metadata -- not memory intensive.
     _test_variable_names(ds, var)
@@ -521,18 +521,20 @@ def validate_dataset(ds, var, data_type, time_period="future"):
     # Other test are done on annual selections with dask.delayed to
     # avoid large memory errors. xr.map_blocks had trouble with this.
     @dask.delayed
-    def clear_memory_intensive_tests(ds, v, t):
+    def memory_intensive_tests(ds, v, t):
         d = ds.sel(time=str(t))
 
         _test_for_nans(d, v)
 
-        if v == "tasmin" or v == "tasmax":
+        if v == "tasmin":
             _test_temp_range(d, v)
-        if v == "dtr":
+        elif v == "tasmax":
+            _test_temp_range(d, v)
+        elif v == "dtr":
             _test_dtr_range(d, v)
-        if v == "dtr" or v == "pr":
             _test_negative_values(d, v)
-        if v == "pr":
+        elif v == "pr":
+            _test_negative_values(d, v)
             _test_maximum_precip(d, v)
         else:
             raise ValueError(f"Argument {v=} not recognized")
@@ -543,7 +545,7 @@ def clear_memory_intensive_tests(ds, v, t):
     tasks = []
     for t in np.unique(ds["time"].dt.year.data):
         logger.debug(f"Validating for year {t}")
-        test_results = clear_memory_intensive_tests(ds, var, t)
+        test_results = memory_intensive_tests(ds, var, t)
         tasks.append(test_results)
     tasks = dask.compute(*tasks)
     assert all(tasks)  # Likely don't need this

From 4824c595a3595ce70397992131c1106c7bc93e29 Mon Sep 17 00:00:00 2001
From: Brewster Malevich <bmalevich@rhg.com>
Date: Wed, 3 Nov 2021 18:30:16 -0700
Subject: [PATCH 4/4] Make variables pretty, minor cleanup

---
 dodola/core.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dodola/core.py b/dodola/core.py
index e549e15..f641448 100644
--- a/dodola/core.py
+++ b/dodola/core.py
@@ -542,13 +542,12 @@ def memory_intensive_tests(ds, v, t):
         # Assumes error thrown if had problem before this.
         return True
 
-    tasks = []
+    results = []
     for t in np.unique(ds["time"].dt.year.data):
-        logger.debug(f"Validating for year {t}")
-        test_results = memory_intensive_tests(ds, var, t)
-        tasks.append(test_results)
-    tasks = dask.compute(*tasks)
-    assert all(tasks)  # Likely don't need this
+        logger.debug(f"Validating year {t}")
+        results.append(memory_intensive_tests(ds, var, t))
+    results = dask.compute(*results)
+    assert all(results)  # Likely don't need this
     return True