support new directroy structure created by radiant-mlhub>0.5

microsoft · Feb 9, 2023 · 2286ec1 · 2286ec1
1 parent d897e33
commit 2286ec1
Show file tree

Hide file tree

Showing 14 changed files with 9 additions and 33 deletions.
diff --git a/environment.yml b/environment.yml
@@ -38,7 +38,7 @@ dependencies:
     - pytorch-lightning>=1.5.1
     - git+https://github.com/pytorch/pytorch_sphinx_theme
     - pyupgrade>=2.4
-    - radiant-mlhub>=0.2.1,<0.5
+    - radiant-mlhub>0.5
     - rtree>=1
     - scikit-image>=0.18
     - scikit-learn>=0.22

diff --git a/tests/data/nasa_marine_debris/nasa_marine_debris.tar.gz b/tests/data/nasa_marine_debris/nasa_marine_debris.tar.gz
diff --git a/...3233_0e16_16816-29821-16/pixel_bounds.npy → ...3233_0e16_16816-29821-16/pixel_bounds.npy b/...3233_0e16_16816-29821-16/pixel_bounds.npy → ...3233_0e16_16816-29821-16/pixel_bounds.npy
diff --git a/...3233_0e16_16816-29824-16/pixel_bounds.npy → ...3233_0e16_16816-29824-16/pixel_bounds.npy b/...3233_0e16_16816-29824-16/pixel_bounds.npy → ...3233_0e16_16816-29824-16/pixel_bounds.npy
diff --git a/...3233_0e16_16816-29825-16/pixel_bounds.npy → ...3233_0e16_16816-29825-16/pixel_bounds.npy b/...3233_0e16_16816-29825-16/pixel_bounds.npy → ...3233_0e16_16816-29825-16/pixel_bounds.npy
diff --git a/...3233_0e16_16816-29828-16/pixel_bounds.npy → ...3233_0e16_16816-29828-16/pixel_bounds.npy b/...3233_0e16_16816-29828-16/pixel_bounds.npy → ...3233_0e16_16816-29828-16/pixel_bounds.npy
diff --git a/...233_0e16_16816-29821-16/image_geotiff.tif → ...233_0e16_16816-29821-16/image_geotiff.tif b/...233_0e16_16816-29821-16/image_geotiff.tif → ...233_0e16_16816-29821-16/image_geotiff.tif
diff --git a/...233_0e16_16816-29824-16/image_geotiff.tif → ...233_0e16_16816-29824-16/image_geotiff.tif b/...233_0e16_16816-29824-16/image_geotiff.tif → ...233_0e16_16816-29824-16/image_geotiff.tif
diff --git a/...233_0e16_16816-29825-16/image_geotiff.tif → ...233_0e16_16816-29825-16/image_geotiff.tif b/...233_0e16_16816-29825-16/image_geotiff.tif → ...233_0e16_16816-29825-16/image_geotiff.tif
diff --git a/...233_0e16_16816-29828-16/image_geotiff.tif → ...233_0e16_16816-29828-16/image_geotiff.tif b/...233_0e16_16816-29828-16/image_geotiff.tif → ...233_0e16_16816-29828-16/image_geotiff.tif
diff --git a/tests/data/nasa_marine_debris/nasa_marine_debris_labels.tar.gz b/tests/data/nasa_marine_debris/nasa_marine_debris_labels.tar.gz
diff --git a/tests/data/nasa_marine_debris/nasa_marine_debris_source.tar.gz b/tests/data/nasa_marine_debris/nasa_marine_debris_source.tar.gz
diff --git a/tests/datasets/test_nasa_marine_debris.py b/tests/datasets/test_nasa_marine_debris.py
@@ -17,9 +17,9 @@
 
 class Dataset:
     def download(self, output_dir: str, **kwargs: str) -> None:
-        glob_path = os.path.join("tests", "data", "nasa_marine_debris", "*.tar.gz")
-        for tarball in glob.iglob(glob_path):
-            shutil.copy(tarball, output_dir)
+        ds_folder = os.path.join("tests", "data", "nasa_marine_debris", "nasa_marine_debris")
+        output_dir = os.path.join(output_dir, "nasa_marine_debris")
+        shutil.copytree(ds_folder, output_dir, dirs_exist_ok=True)
 
 
 def fetch(dataset_id: str, **kwargs: str) -> Dataset:
@@ -29,9 +29,9 @@ def fetch(dataset_id: str, **kwargs: str) -> Dataset:
 class TestNASAMarineDebris:
     @pytest.fixture()
     def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> NASAMarineDebris:
-        radiant_mlhub = pytest.importorskip("radiant_mlhub", minversion="0.2.1")
+        radiant_mlhub = pytest.importorskip("radiant_mlhub", minversion="0.5.0")
         monkeypatch.setattr(radiant_mlhub.Dataset, "fetch", fetch)
-        md5s = ["fe8698d1e68b3f24f0b86b04419a797d", "d8084f5a72778349e07ac90ec1e1d990"]
+        md5s = ["29dc40158bb6a7c53daa6b815d3821c7"]
         monkeypatch.setattr(NASAMarineDebris, "md5s", md5s)
         root = str(tmp_path)
         transforms = nn.Identity()
@@ -53,14 +53,6 @@ def test_already_downloaded(
     ) -> None:
         NASAMarineDebris(root=str(tmp_path), download=True)
 
-    def test_already_downloaded_not_extracted(
-        self, dataset: NASAMarineDebris, tmp_path: Path
-    ) -> None:
-        shutil.rmtree(dataset.root)
-        os.makedirs(str(tmp_path), exist_ok=True)
-        Dataset().download(output_dir=str(tmp_path))
-        NASAMarineDebris(root=str(tmp_path), download=False)
-
     def test_not_downloaded(self, tmp_path: Path) -> None:
         err = "Dataset not found in `root` directory and `download=False`, "
         "either specify a different `root` directory or use `download=True` "

diff --git a/torchgeo/datasets/nasa_marine_debris.py b/torchgeo/datasets/nasa_marine_debris.py
@@ -52,9 +52,9 @@ class NASAMarineDebris(NonGeoDataset):
     """
 
     dataset_id = "nasa_marine_debris"
-    directories = ["nasa_marine_debris_source", "nasa_marine_debris_labels"]
-    filenames = ["nasa_marine_debris_source.tar.gz", "nasa_marine_debris_labels.tar.gz"]
-    md5s = ["fe8698d1e68b3f24f0b86b04419a797d", "d8084f5a72778349e07ac90ec1e1d990"]
+    directories = ["nasa_marine_debris/nasa_marine_debris_source", "nasa_marine_debris/nasa_marine_debris_labels"]
+    filenames = ["nasa_marine_debris.tar.gz"]
+    md5s = ["29dc40158bb6a7c53daa6b815d3821c7"]
     class_label = "marine_debris"
 
     def __init__(
@@ -187,19 +187,6 @@ def _verify(self) -> None:
         if all(exists):
             return
 
-        # Check if zip file already exists (if so then extract)
-        exists = []
-        for filename in self.filenames:
-            filepath = os.path.join(self.root, filename)
-            if os.path.exists(filepath):
-                exists.append(True)
-                extract_archive(filepath)
-            else:
-                exists.append(False)
-
-        if all(exists):
-            return
-
         # Check if the user requested to download the dataset
         if not self.download:
             raise RuntimeError(
@@ -211,9 +198,6 @@ def _verify(self) -> None:
         # TODO: need a checksum check in here post downloading
         # Download and extract the dataset
         download_radiant_mlhub_dataset(self.dataset_id, self.root, self.api_key)
-        for filename in self.filenames:
-            filepath = os.path.join(self.root, filename)
-            extract_archive(filepath)
 
     def plot(
         self,