From 7b7b13240b6cd1e869761dcb30f8121f0a0a98f4 Mon Sep 17 00:00:00 2001
From: Chuck Daniels <chuck@developmentseed.org>
Date: Mon, 4 Nov 2024 16:07:51 -0500
Subject: [PATCH 1/6] Fix pqdm_kwargs integration tests

---
 earthaccess/api.py     | 40 +++++++++---------
 earthaccess/store.py   | 94 +++++++++++++++++++++++-------------------
 tests/unit/test_api.py | 52 +++++++++++------------
 3 files changed, 97 insertions(+), 89 deletions(-)

diff --git a/earthaccess/api.py b/earthaccess/api.py
index 5ad75ed5..6b758aa2 100644
--- a/earthaccess/api.py
+++ b/earthaccess/api.py
@@ -1,4 +1,5 @@
 import logging
+from pathlib import Path
 
 import requests
 import s3fs
@@ -202,9 +203,10 @@ def login(strategy: str = "all", persist: bool = False, system: System = PROD) -
 
 def download(
     granules: Union[DataGranule, List[DataGranule], str, List[str]],
-    local_path: Optional[str],
+    local_path: Optional[Union[Path, str]] = None,
     provider: Optional[str] = None,
     threads: int = 8,
+    *,
     pqdm_kwargs: Optional[Mapping[str, Any]] = None,
 ) -> List[str]:
     """Retrieves data granules from a remote storage system.
@@ -215,7 +217,11 @@ def download(
 
     Parameters:
         granules: a granule, list of granules, a granule link (HTTP), or a list of granule links (HTTP)
-        local_path: local directory to store the remote data granules
+        local_path: Local directory to store the remote data granules.  If not
+            supplied, defaults to a subdirectory of the current working directory
+            of the form `data/YYYY-MM-DD-UUID`, where `YYYY-MM-DD` is the year,
+            month, and day of the current date, and `UUID` is the last 6 digits
+            of a UUID4 value.
         provider: if we download a list of URLs, we need to specify the provider.
         threads: parallel number of threads to use to download the files, adjust as necessary, default = 8
         pqdm_kwargs: Additional keyword arguments to pass to pqdm, a parallel processing library.
@@ -228,31 +234,29 @@ def download(
     Raises:
         Exception: A file download failed.
     """
-    provider = _normalize_location(provider)
-    pqdm_kwargs = {
-        "exception_behavior": "immediate",
-        "n_jobs": threads,
-        **(pqdm_kwargs or {}),
-    }
+    provider = _normalize_location(str(provider))
+
     if isinstance(granules, DataGranule):
         granules = [granules]
     elif isinstance(granules, str):
         granules = [granules]
+
     try:
-        results = earthaccess.__store__.get(
-            granules, local_path, provider, threads, pqdm_kwargs
+        return earthaccess.__store__.get(
+            granules, local_path, provider, threads, pqdm_kwargs=pqdm_kwargs
         )
     except AttributeError as err:
         logger.error(
             f"{err}: You must call earthaccess.login() before you can download data"
         )
-        return []
-    return results
+
+    return []
 
 
 def open(
     granules: Union[List[str], List[DataGranule]],
     provider: Optional[str] = None,
+    *,
     pqdm_kwargs: Optional[Mapping[str, Any]] = None,
 ) -> List[AbstractFileSystem]:
     """Returns a list of file-like objects that can be used to access files
@@ -269,15 +273,11 @@ def open(
     Returns:
         A list of "file pointers" to remote (i.e. s3 or https) files.
     """
-    provider = _normalize_location(provider)
-    pqdm_kwargs = {
-        "exception_behavior": "immediate",
-        **(pqdm_kwargs or {}),
-    }
-    results = earthaccess.__store__.open(
-        granules=granules, provider=provider, pqdm_kwargs=pqdm_kwargs
+    return earthaccess.__store__.open(
+        granules=granules,
+        provider=_normalize_location(provider),
+        pqdm_kwargs=pqdm_kwargs,
     )
-    return results
 
 
 def get_s3_credentials(
diff --git a/earthaccess/store.py b/earthaccess/store.py
index f7b5c85e..58ac9f59 100644
--- a/earthaccess/store.py
+++ b/earthaccess/store.py
@@ -63,7 +63,7 @@ def __repr__(self) -> str:
 def _open_files(
     url_mapping: Mapping[str, Union[DataGranule, None]],
     fs: fsspec.AbstractFileSystem,
-    threads: int = 8,
+    *,
     pqdm_kwargs: Optional[Mapping[str, Any]] = None,
 ) -> List[fsspec.spec.AbstractBufferedFile]:
     def multi_thread_open(data: tuple[str, Optional[DataGranule]]) -> EarthAccessFile:
@@ -71,14 +71,12 @@ def multi_thread_open(data: tuple[str, Optional[DataGranule]]) -> EarthAccessFil
         return EarthAccessFile(fs.open(url), granule)  # type: ignore
 
     pqdm_kwargs = {
-        "exception_behavior": "immediate",
+        "exception_behaviour": "immediate",
+        "n_jobs": 8,
         **(pqdm_kwargs or {}),
     }
 
-    fileset = pqdm(
-        url_mapping.items(), multi_thread_open, n_jobs=threads, **pqdm_kwargs
-    )
-    return fileset
+    return pqdm(url_mapping.items(), multi_thread_open, **pqdm_kwargs)
 
 
 def make_instance(
@@ -344,6 +342,7 @@ def open(
         self,
         granules: Union[List[str], List[DataGranule]],
         provider: Optional[str] = None,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[fsspec.spec.AbstractBufferedFile]:
         """Returns a list of file-like objects that can be used to access files
@@ -361,7 +360,7 @@ def open(
             A list of "file pointers" to remote (i.e. s3 or https) files.
         """
         if len(granules):
-            return self._open(granules, provider, pqdm_kwargs)
+            return self._open(granules, provider, pqdm_kwargs=pqdm_kwargs)
         return []
 
     @singledispatchmethod
@@ -369,6 +368,7 @@ def _open(
         self,
         granules: Union[List[str], List[DataGranule]],
         provider: Optional[str] = None,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[Any]:
         raise NotImplementedError("granules should be a list of DataGranule or URLs")
@@ -378,7 +378,8 @@ def _open_granules(
         self,
         granules: List[DataGranule],
         provider: Optional[str] = None,
-        threads: int = 8,
+        *,
+        pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[Any]:
         fileset: List = []
         total_size = round(sum([granule.size() for granule in granules]) / 1024, 2)
@@ -411,7 +412,7 @@ def _open_granules(
                     fileset = _open_files(
                         url_mapping,
                         fs=s3_fs,
-                        threads=threads,
+                        pqdm_kwargs=pqdm_kwargs,
                     )
                 except Exception as e:
                     raise RuntimeError(
@@ -420,19 +421,19 @@ def _open_granules(
                         f"Exception: {traceback.format_exc()}"
                     ) from e
             else:
-                fileset = self._open_urls_https(url_mapping, threads=threads)
-            return fileset
+                fileset = self._open_urls_https(url_mapping, pqdm_kwargs=pqdm_kwargs)
         else:
             url_mapping = _get_url_granule_mapping(granules, access="on_prem")
-            fileset = self._open_urls_https(url_mapping, threads=threads)
-            return fileset
+            fileset = self._open_urls_https(url_mapping, pqdm_kwargs=pqdm_kwargs)
+
+        return fileset
 
     @_open.register
     def _open_urls(
         self,
         granules: List[str],
         provider: Optional[str] = None,
-        threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[Any]:
         fileset: List = []
@@ -460,7 +461,6 @@ def _open_urls(
                         fileset = _open_files(
                             url_mapping,
                             fs=s3_fs,
-                            threads=threads,
                             pqdm_kwargs=pqdm_kwargs,
                         )
                     except Exception as e:
@@ -481,15 +481,16 @@ def _open_urls(
                 raise ValueError(
                     "We cannot open S3 links when we are not in-region, try using HTTPS links"
                 )
-            fileset = self._open_urls_https(url_mapping, threads, pqdm_kwargs)
+            fileset = self._open_urls_https(url_mapping, pqdm_kwargs=pqdm_kwargs)
             return fileset
 
     def get(
         self,
         granules: Union[List[DataGranule], List[str]],
-        local_path: Union[Path, str, None] = None,
+        local_path: Optional[Union[Path, str]] = None,
         provider: Optional[str] = None,
         threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[str]:
         """Retrieves data granules from a remote storage system.
@@ -503,7 +504,11 @@ def get(
 
         Parameters:
             granules: A list of granules(DataGranule) instances or a list of granule links (HTTP).
-            local_path: Local directory to store the remote data granules.
+            local_path: Local directory to store the remote data granules.  If not
+                supplied, defaults to a subdirectory of the current working directory
+                of the form `data/YYYY-MM-DD-UUID`, where `YYYY-MM-DD` is the year,
+                month, and day of the current date, and `UUID` is the last 6 digits
+                of a UUID4 value.
             provider: a valid cloud provider, each DAAC has a provider code for their cloud distributions
             threads: Parallel number of threads to use to download the files;
                 adjust as necessary, default = 8.
@@ -514,18 +519,20 @@ def get(
         Returns:
             List of downloaded files
         """
+        if not granules:
+            raise ValueError("List of URLs or DataGranule instances expected")
+
         if local_path is None:
-            today = datetime.datetime.today().strftime("%Y-%m-%d")
+            today = datetime.datetime.now().strftime("%Y-%m-%d")
             uuid = uuid4().hex[:6]
             local_path = Path.cwd() / "data" / f"{today}-{uuid}"
-        elif isinstance(local_path, str):
-            local_path = Path(local_path)
 
-        if len(granules):
-            files = self._get(granules, local_path, provider, threads, pqdm_kwargs)
-            return files
-        else:
-            raise ValueError("List of URLs or DataGranule instances expected")
+        pqdm_kwargs = {
+            "n_jobs": threads,
+            **(pqdm_kwargs or {}),
+        }
+
+        return self._get(granules, Path(local_path), provider, pqdm_kwargs=pqdm_kwargs)
 
     @singledispatchmethod
     def _get(
@@ -533,7 +540,7 @@ def _get(
         granules: Union[List[DataGranule], List[str]],
         local_path: Path,
         provider: Optional[str] = None,
-        threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[str]:
         """Retrieves data granules from a remote storage system.
@@ -566,7 +573,7 @@ def _get_urls(
         granules: List[str],
         local_path: Path,
         provider: Optional[str] = None,
-        threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[str]:
         data_links = granules
@@ -590,7 +597,7 @@ def _get_urls(
         else:
             # if we are not in AWS
             return self._download_onprem_granules(
-                data_links, local_path, threads, pqdm_kwargs
+                data_links, local_path, pqdm_kwargs=pqdm_kwargs
             )
 
     @_get.register
@@ -599,7 +606,7 @@ def _get_granules(
         granules: List[DataGranule],
         local_path: Path,
         provider: Optional[str] = None,
-        threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[str]:
         data_links: List = []
@@ -615,7 +622,7 @@ def _get_granules(
                 for granule in granules
             )
         )
-        total_size = round(sum([granule.size() for granule in granules]) / 1024, 2)
+        total_size = round(sum(granule.size() for granule in granules) / 1024, 2)
         logger.info(
             f" Getting {len(granules)} granules, approx download size: {total_size} GB"
         )
@@ -642,7 +649,7 @@ def _get_granules(
             # if the data are cloud-based, but we are not in AWS,
             # it will be downloaded as if it was on prem
             return self._download_onprem_granules(
-                data_links, local_path, threads, pqdm_kwargs
+                data_links, local_path, pqdm_kwargs=pqdm_kwargs
             )
 
     def _download_file(self, url: str, directory: Path) -> str:
@@ -684,7 +691,7 @@ def _download_onprem_granules(
         self,
         urls: List[str],
         directory: Path,
-        threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[Any]:
         """Downloads a list of URLS into the data directory.
@@ -711,25 +718,26 @@ def _download_onprem_granules(
 
         arguments = [(url, directory) for url in urls]
 
-        results = pqdm(
-            arguments,
-            self._download_file,
-            n_jobs=threads,
-            argument_type="args",
-            **pqdm_kwargs,
-        )
-        return results
+        pqdm_kwargs = {
+            "exception_behaviour": "immediate",
+            **(pqdm_kwargs or {}),
+            # We don't want a user to be able to override the following kwargs,
+            # which is why they appear *after* spreading pqdm_kwargs above.
+            "argument_type": "args",
+        }
+
+        return pqdm(arguments, self._download_file, **pqdm_kwargs)
 
     def _open_urls_https(
         self,
         url_mapping: Mapping[str, Union[DataGranule, None]],
-        threads: int = 8,
+        *,
         pqdm_kwargs: Optional[Mapping[str, Any]] = None,
     ) -> List[fsspec.AbstractFileSystem]:
         https_fs = self.get_fsspec_session()
 
         try:
-            return _open_files(url_mapping, https_fs, threads, pqdm_kwargs)
+            return _open_files(url_mapping, https_fs, pqdm_kwargs=pqdm_kwargs)
         except Exception:
             logger.exception(
                 "An exception occurred while trying to access remote files via HTTPS"
diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py
index 20980e35..0f50fe93 100644
--- a/tests/unit/test_api.py
+++ b/tests/unit/test_api.py
@@ -1,10 +1,15 @@
-from unittest.mock import Mock
+from pathlib import Path
+from unittest.mock import patch
 
 import earthaccess
 import pytest
 
 
-def test_download_immediate_failure(monkeypatch):
+def fail_to_download_file(*args, **kwargs):
+    raise IOError("Download failed")
+
+
+def test_download_immediate_failure(tmp_path: Path):
     earthaccess.login()
 
     results = earthaccess.search_data(
@@ -14,37 +19,32 @@ def test_download_immediate_failure(monkeypatch):
         count=10,
     )
 
-    def mock_get(*args, **kwargs):
-        raise Exception("Download failed")
-
-    mock_store = Mock()
-    monkeypatch.setattr(earthaccess, "__store__", mock_store)
-    monkeypatch.setattr(mock_store, "get", mock_get)
+    with patch.object(earthaccess.__store__, "_download_file", fail_to_download_file):
+        with pytest.raises(IOError, match="Download failed"):
+            earthaccess.download(results, str(tmp_path))
 
-    with pytest.raises(Exception, match="Download failed"):
-        earthaccess.download(results, "/home/download-folder")
 
-
-def test_download_deferred_failure(monkeypatch):
+def test_download_deferred_failure(tmp_path: Path):
     earthaccess.login()
 
+    count = 3
     results = earthaccess.search_data(
         short_name="ATL06",
         bounding_box=(-10, 20, 10, 50),
         temporal=("1999-02", "2019-03"),
-        count=10,
-    )
-
-    def mock_get(*args, **kwargs):
-        return [Exception("Download failed")] * len(results)
-
-    mock_store = Mock()
-    monkeypatch.setattr(earthaccess, "__store__", mock_store)
-    monkeypatch.setattr(mock_store, "get", mock_get)
-
-    results = earthaccess.download(
-        results, "/home/download-folder", None, 8, {"exception_behavior": "deferred"}
+        count=count,
     )
 
-    assert all(isinstance(e, Exception) for e in results)
-    assert len(results) == 10
+    with patch.object(earthaccess.__store__, "_download_file", fail_to_download_file):
+        with pytest.raises(Exception) as exc_info:
+            earthaccess.download(
+                results,
+                tmp_path,
+                None,
+                8,
+                pqdm_kwargs={"exception_behaviour": "deferred"},
+            )
+
+    errors = exc_info.value.args
+    assert len(errors) == count
+    assert all(isinstance(e, IOError) and str(e) == "Download failed" for e in errors)

From cf7799022160d84237f6ca6fe56c410456eb3a10 Mon Sep 17 00:00:00 2001
From: Chuck Daniels <chuck@developmentseed.org>
Date: Tue, 5 Nov 2024 11:36:05 -0500
Subject: [PATCH 2/6] Change pqdm download tests to integration tests

---
 .github/workflows/test.yml    |  2 +-
 scripts/integration-test.sh   |  2 +-
 tests/integration/test_api.py | 46 ++++++++++++++++++++++++++++++++
 tests/unit/test_api.py        | 50 -----------------------------------
 4 files changed, 48 insertions(+), 52 deletions(-)
 delete mode 100644 tests/unit/test_api.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1d6250d4..cd7b6cd3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -27,7 +27,7 @@ jobs:
         run: mypy
 
       - name: Test
-        run: pytest tests/unit --cov=earthaccess --cov=tests --cov-report=term-missing --capture=no --tb=native --log-cli-level=INFO
+        run: pytest tests/unit --verbose --cov=earthaccess --cov-report=term-missing --capture=no --tb=native --log-cli-level=INFO
 
       - name: Upload coverage
         # Don't upload coverage when using the `act` tool to run the workflow locally
diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh
index 506976ad..1a8ad4f6 100755
--- a/scripts/integration-test.sh
+++ b/scripts/integration-test.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 set -x
-pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing "${@}" --capture=no --tb=native --log-cli-level=INFO
+pytest tests/integration --cov=earthaccess --cov-report=term-missing "${@}" --capture=no --tb=native --log-cli-level=INFO
 RET=$?
 set +x
 
diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py
index f0fdd219..f6ec1fcd 100644
--- a/tests/integration/test_api.py
+++ b/tests/integration/test_api.py
@@ -1,6 +1,7 @@
 import logging
 import os
 from pathlib import Path
+from unittest.mock import patch
 
 import earthaccess
 import pytest
@@ -77,6 +78,51 @@ def test_download(tmp_path, selection, use_url):
     assert all(Path(f).exists() for f in files)
 
 
+def fail_to_download_file(*args, **kwargs):
+    raise IOError("Download failed")
+
+
+def test_download_immediate_failure(tmp_path: Path):
+    results = earthaccess.search_data(
+        short_name="ATL06",
+        bounding_box=(-10, 20, 10, 50),
+        temporal=("1999-02", "2019-03"),
+        count=3,
+    )
+
+    with patch.object(earthaccess.__store__, "_download_file", fail_to_download_file):
+        with pytest.raises(IOError, match="Download failed"):
+            # By default, we set pqdm exception_behavior to "immediate" so that
+            # it simply propagates the first download error it encounters, halting
+            # any further downloads.
+            earthaccess.download(results, tmp_path, pqdm_kwargs=dict(disable=True))
+
+
+def test_download_deferred_failure(tmp_path: Path):
+    count = 3
+    results = earthaccess.search_data(
+        short_name="ATL06",
+        bounding_box=(-10, 20, 10, 50),
+        temporal=("1999-02", "2019-03"),
+        count=count,
+    )
+
+    with patch.object(earthaccess.__store__, "_download_file", fail_to_download_file):
+        # With "deferred" exceptions, pqdm catches all exceptions, then at the end
+        # raises a single generic Exception, passing the sequence of caught exceptions
+        # as arguments to the Exception constructor.
+        with pytest.raises(Exception) as exc_info:
+            earthaccess.download(
+                results,
+                tmp_path,
+                pqdm_kwargs=dict(exception_behaviour="deferred", disable=True),
+            )
+
+    errors = exc_info.value.args
+    assert len(errors) == count
+    assert all(isinstance(e, IOError) and str(e) == "Download failed" for e in errors)
+
+
 def test_auth_environ():
     earthaccess.login(strategy="environment")
     environ = earthaccess.auth_environ()
diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py
deleted file mode 100644
index 0f50fe93..00000000
--- a/tests/unit/test_api.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from pathlib import Path
-from unittest.mock import patch
-
-import earthaccess
-import pytest
-
-
-def fail_to_download_file(*args, **kwargs):
-    raise IOError("Download failed")
-
-
-def test_download_immediate_failure(tmp_path: Path):
-    earthaccess.login()
-
-    results = earthaccess.search_data(
-        short_name="ATL06",
-        bounding_box=(-10, 20, 10, 50),
-        temporal=("1999-02", "2019-03"),
-        count=10,
-    )
-
-    with patch.object(earthaccess.__store__, "_download_file", fail_to_download_file):
-        with pytest.raises(IOError, match="Download failed"):
-            earthaccess.download(results, str(tmp_path))
-
-
-def test_download_deferred_failure(tmp_path: Path):
-    earthaccess.login()
-
-    count = 3
-    results = earthaccess.search_data(
-        short_name="ATL06",
-        bounding_box=(-10, 20, 10, 50),
-        temporal=("1999-02", "2019-03"),
-        count=count,
-    )
-
-    with patch.object(earthaccess.__store__, "_download_file", fail_to_download_file):
-        with pytest.raises(Exception) as exc_info:
-            earthaccess.download(
-                results,
-                tmp_path,
-                None,
-                8,
-                pqdm_kwargs={"exception_behaviour": "deferred"},
-            )
-
-    errors = exc_info.value.args
-    assert len(errors) == count
-    assert all(isinstance(e, IOError) and str(e) == "Download failed" for e in errors)

From bc73b03c69948d77828398742fdc2cc0dcec17e2 Mon Sep 17 00:00:00 2001
From: Chuck Daniels <chuck@developmentseed.org>
Date: Tue, 5 Nov 2024 13:16:53 -0500
Subject: [PATCH 3/6] Checkout merge commit on pull_request_target event

---
 .github/workflows/integration-test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index b868549d..e4514bc2 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -73,6 +73,8 @@ jobs:
 
       - name: Checkout source
         uses: actions/checkout@v4
+        with:
+          ref: "${{ github.event.pull_request.merge_commit_sha }}"
 
       - name: Install package with dependencies
         uses: ./.github/actions/install-pkg

From b3a1ebf826e7d02b9cee7bcc3c103f48839f8325 Mon Sep 17 00:00:00 2001
From: Chuck Daniels <chuck@developmentseed.org>
Date: Tue, 5 Nov 2024 13:23:07 -0500
Subject: [PATCH 4/6] Checkout merge commit on pull_request_target event

---
 .github/workflows/integration-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index e4514bc2..cd143fc3 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -74,7 +74,7 @@ jobs:
       - name: Checkout source
         uses: actions/checkout@v4
         with:
-          ref: "${{ github.event.pull_request.merge_commit_sha }}"
+          ref: refs/pull/${{ github.event.number }}/merge
 
       - name: Install package with dependencies
         uses: ./.github/actions/install-pkg

From e615b43bc3b0d0fc330df78fbeb28735198de921 Mon Sep 17 00:00:00 2001
From: Chuck Daniels <chuck@developmentseed.org>
Date: Tue, 5 Nov 2024 13:44:59 -0500
Subject: [PATCH 5/6] Debug int test checkout

---
 .github/workflows/integration-test.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index cd143fc3..0554fd77 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -71,10 +71,13 @@ jobs:
           echo "See [Keeping your GitHub Actions and workflows secure Part 1: Preventing pwn requests](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests)." >> $GITHUB_STEP_SUMMARY
           exit 1
 
+      - name: Check merge commit SHA
+        run: echo "github.event.pull_request.merge_commit_sha = ${{ github.event.pull_request.merge_commit_sha }}"
+
       - name: Checkout source
         uses: actions/checkout@v4
         with:
-          ref: refs/pull/${{ github.event.number }}/merge
+          ref: "${{ github.event.pull_request.merge_commit_sha }}"
 
       - name: Install package with dependencies
         uses: ./.github/actions/install-pkg

From cf73e1d4593aa59c1c8a15d467365077fa3fa996 Mon Sep 17 00:00:00 2001
From: Chuck Daniels <chuck@developmentseed.org>
Date: Tue, 5 Nov 2024 15:53:37 -0500
Subject: [PATCH 6/6] Checkout correct commit for int. tests

---
 .github/workflows/integration-test.yml | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 0554fd77..6b563135 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -71,13 +71,23 @@ jobs:
           echo "See [Keeping your GitHub Actions and workflows secure Part 1: Preventing pwn requests](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests)." >> $GITHUB_STEP_SUMMARY
           exit 1
 
-      - name: Check merge commit SHA
-        run: echo "github.event.pull_request.merge_commit_sha = ${{ github.event.pull_request.merge_commit_sha }}"
-
       - name: Checkout source
         uses: actions/checkout@v4
         with:
-          ref: "${{ github.event.pull_request.merge_commit_sha }}"
+          # Getting the correct commit for a pull_request_target event appears to be
+          # a known, problematic issue: https://github.com/actions/checkout/issues/518
+          # It seems that ideally, we want github.event.pull_request.merge_commit_sha,
+          # but that it is not reliable, and can sometimes be a null values.  It
+          # appears that this is the most reasonable way to ensure that we are pulling
+          # the same code that triggered things, based upon this particular comment:
+          # https://github.com/actions/checkout/issues/518#issuecomment-1661941548
+          ref: "refs/pull/${{ github.event.number }}/merge"
+          fetch-depth: 2
+
+      - name: Sanity check
+        # Continuing from previous comment in checkout step above.
+        run: |
+          [[ "$(git rev-parse 'HEAD~2')" == "${{ github.event.pull_request.head.sha }}" ]]
 
       - name: Install package with dependencies
         uses: ./.github/actions/install-pkg