From bee17337d30650317c9918dc6ffe7a2610554d90 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 11 Apr 2023 18:10:58 +0200 Subject: [PATCH 01/13] basic working version --- torchgeo/datasets/__init__.py | 2 + .../western_usa_live_fuel_moisture.py | 326 ++++++++++++++++++ 2 files changed, 328 insertions(+) create mode 100644 torchgeo/datasets/western_usa_live_fuel_moisture.py diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py index 2eff64d8696..ae25e0795ce 100644 --- a/torchgeo/datasets/__init__.py +++ b/torchgeo/datasets/__init__.py @@ -111,6 +111,7 @@ ) from .vaihingen import Vaihingen2D from .vhr10 import VHR10 +from .western_usa_live_fuel_moisture import WesternUSALiveFuelMoisture from .xview import XView2 from .zuericrop import ZueriCrop @@ -203,6 +204,7 @@ "USAVars", "Vaihingen2D", "VHR10", + "WesternUSALiveFuelMoisture", "XView2", "ZueriCrop", # Base classes diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py new file mode 100644 index 00000000000..b9a2bd34cb5 --- /dev/null +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -0,0 +1,326 @@ +"""Western USA Live Fuel Moisture Dataset.""" + +import glob +import json +import os +from typing import Any, Callable, Dict, List, Optional + +import torch +from torch import Tensor + +from .geo import NonGeoDataset +from .utils import download_radiant_mlhub_collection, extract_archive + + +class WesternUSALiveFuelMoisture(NonGeoDataset): + """Western USA Live Fuel Moisture Dataset. + + This tabular style dataset contains fuel moisture + (mass of water in vegetation) and remotely sensed variables + in the western United States. For more details see the + `dataset page `_. + + If you use this dataset in your research, please cite the following paper: + + * https://doi.org/10.1016/j.rse.2020.111797 + + .. note:: + + This dataset requires the following additional library to be installed: + + * `radiant-mlhub `_ to download the + imagery and labels from the Radiant Earth MLHub + """ + + collection_id = "su_sar_moisture_content" + + label_name = "percent(t)" + + all_variable_names = [ + # "date", + "slope(t)", + "elevation(t)", + "canopy_height(t)", + "forest_cover(t)", + "silt(t)", + "sand(t)", + "clay(t)", + "vv(t)", + "vh(t)", + "red(t)", + "green(t)", + "blue(t)", + "swir(t)", + "nir(t)", + "ndvi(t)", + "ndwi(t)", + "nirv(t)", + "vv_red(t)", + "vv_green(t)", + "vv_blue(t)", + "vv_swir(t)", + "vv_nir(t)", + "vv_ndvi(t)", + "vv_ndwi(t)", + "vv_nirv(t)", + "vh_red(t)", + "vh_green(t)", + "vh_blue(t)", + "vh_swir(t)", + "vh_nir(t)", + "vh_ndvi(t)", + "vh_ndwi(t)", + "vh_nirv(t)", + "vh_vv(t)", + "slope(t-1)", + "elevation(t-1)", + "canopy_height(t-1)", + "forest_cover(t-1)", + "silt(t-1)", + "sand(t-1)", + "clay(t-1)", + "vv(t-1)", + "vh(t-1)", + "red(t-1)", + "green(t-1)", + "blue(t-1)", + "swir(t-1)", + "nir(t-1)", + "ndvi(t-1)", + "ndwi(t-1)", + "nirv(t-1)", + "vv_red(t-1)", + "vv_green(t-1)", + "vv_blue(t-1)", + "vv_swir(t-1)", + "vv_nir(t-1)", + "vv_ndvi(t-1)", + "vv_ndwi(t-1)", + "vv_nirv(t-1)", + "vh_red(t-1)", + "vh_green(t-1)", + "vh_blue(t-1)", + "vh_swir(t-1)", + "vh_nir(t-1)", + "vh_ndvi(t-1)", + "vh_ndwi(t-1)", + "vh_nirv(t-1)", + "vh_vv(t-1)", + "slope(t-2)", + "elevation(t-2)", + "canopy_height(t-2)", + "forest_cover(t-2)", + "silt(t-2)", + "sand(t-2)", + "clay(t-2)", + "vv(t-2)", + "vh(t-2)", + "red(t-2)", + "green(t-2)", + "blue(t-2)", + "swir(t-2)", + "nir(t-2)", + "ndvi(t-2)", + "ndwi(t-2)", + "nirv(t-2)", + "vv_red(t-2)", + "vv_green(t-2)", + "vv_blue(t-2)", + "vv_swir(t-2)", + "vv_nir(t-2)", + "vv_ndvi(t-2)", + "vv_ndwi(t-2)", + "vv_nirv(t-2)", + "vh_red(t-2)", + "vh_green(t-2)", + "vh_blue(t-2)", + "vh_swir(t-2)", + "vh_nir(t-2)", + "vh_ndvi(t-2)", + "vh_ndwi(t-2)", + "vh_nirv(t-2)", + "vh_vv(t-2)", + "slope(t-3)", + "elevation(t-3)", + "canopy_height(t-3)", + "forest_cover(t-3)", + "silt(t-3)", + "sand(t-3)", + "clay(t-3)", + "vv(t-3)", + "vh(t-3)", + "red(t-3)", + "green(t-3)", + "blue(t-3)", + "swir(t-3)", + "nir(t-3)", + "ndvi(t-3)", + "ndwi(t-3)", + "nirv(t-3)", + "vv_red(t-3)", + "vv_green(t-3)", + "vv_blue(t-3)", + "vv_swir(t-3)", + "vv_nir(t-3)", + "vv_ndvi(t-3)", + "vv_ndwi(t-3)", + "vv_nirv(t-3)", + "vh_red(t-3)", + "vh_green(t-3)", + "vh_blue(t-3)", + "vh_swir(t-3)", + "vh_nir(t-3)", + "vh_ndvi(t-3)", + "vh_ndwi(t-3)", + "vh_nirv(t-3)", + "vh_vv(t-3)", + "lat", + "lon", + ] + + def __init__( + self, + root: str = "data", + input_variables: List[str] = all_variable_names, + transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, + download: bool = False, + api_key: Optional[str] = None, + checksum: bool = False, + ) -> None: + """Initialize a new Tropical Cyclone Wind Estimation Competition Dataset. + + Args: + root: root directory where dataset can be found + transforms: a function/transform that takes input sample and its target as + entry and returns a transformed version + download: if True, download dataset and store it in the root directory + api_key: a RadiantEarth MLHub API key to use for downloading the dataset + checksum: if True, check the MD5 of the downloaded files (may be slow) + + Raises: + AssertionError: if ``split`` argument is invalid + RuntimeError: if ``download=False`` but dataset is missing or checksum fails + RuntimeError: if ``input_variables`` contains invalid variable names + ImportError: if pandas are are not installed + """ + super().__init__() + + self.root = root + self.transforms = transforms + self.checksum = checksum + + if download: + self._download(api_key) + + if not self._check_integrity(): + raise RuntimeError( + "Dataset not found or corrupted. " + + "You can use download=True to download it" + ) + + try: + import pandas as pd # noqa: F401 + except ImportError: + raise ImportError( + "pandas is not installed and is required to use this dataset" + ) + + assert all( + input in self.all_variable_names for input in input_variables + ), "Invalid input variable name." + self.input_variables = input_variables + + self.collection = self._retrieve_collection() + + self.dataframe = self._load_data() + + def _retrieve_collection(self) -> List[str]: + """Retrieve dataset collection that maps samples to paths. + + Returns: + list of sample paths + """ + return glob.glob( + os.path.join(self.root, self.collection_id, "**", "labels.geojson") + ) + + def __len__(self) -> int: + """Return the number of data points in the dataset. + + Returns: + length of the dataset + """ + return len(self.dataframe) + + def __getitem__(self, index: int) -> Dict[str, Any]: + """Return an index within the dataset. + + Args: + index: index to return + + Returns: + data at that index + """ + data = self.dataframe.iloc[index, :] + + sample: Dict[str, Tensor] = { + "input": torch.tensor(data.drop([self.label_name]), dtype=torch.float32), + "label": torch.tensor(data[self.label_name], dtype=torch.float32), + } + + if self.transforms is not None: + sample = self.transforms(sample) + + return sample + + def _load_data(self): + """Load data from individual files into pandas dataframe. + + Returns: + the features and label + """ + import pandas as pd + + data_rows = [] + for path in self.collection: + with open(path) as f: + content = json.load(f) + data_dict = content["properties"] + data_dict["lat"] = content["geometry"]["coordinates"][0] + data_dict["lon"] = content["geometry"]["coordinates"][1] + data_rows.append(data_dict) + + df: pd.DataFrame = pd.DataFrame(data_rows) + df = df[self.input_variables + [self.label_name]] + return df + + def _check_integrity(self) -> bool: + """Check integrity of dataset. + + Returns: + True if dataset files are found and/or MD5s match, else False + """ + # for split, resources in self.md5s.items(): + # for resource_type, md5 in resources.items(): + # filename = "_".join([self.collection_id, split, resource_type]) + # filename = os.path.join(self.root, filename + ".tar.gz") + # if not check_integrity(filename, md5 if self.checksum else None): + # return False + return True + + def _download(self, api_key: Optional[str] = None) -> None: + """Download the dataset and extract it. + + Args: + api_key: a RadiantEarth MLHub API key to use for downloading the dataset + + Raises: + RuntimeError: if download doesn't work correctly or checksums don't match + """ + if self._check_integrity(): + print("Files already downloaded and verified") + return + + download_radiant_mlhub_collection(self.collection_id, self.root, api_key) + filename = os.path.join(self.root, self.collection_id) + ".tar.gz" + extract_archive(filename, self.root) From 56887b8c167a74c6abd543883bf2c81faa60bf34 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 11:23:53 +0200 Subject: [PATCH 02/13] format data.py --- .../western_usa_live_fuel_moisture/data.py | 223 ++++++++++++++++++ .../su_sar_moisture_content.tar.gz | Bin 0 -> 2152 bytes .../su_sar_moisture_content_0/labels.geojson | 1 + .../su_sar_moisture_content_0/stac.json | 1 + .../su_sar_moisture_content_1/labels.geojson | 1 + .../su_sar_moisture_content_1/stac.json | 1 + .../su_sar_moisture_content_2/labels.geojson | 1 + .../su_sar_moisture_content_2/stac.json | 1 + .../test_western_usa_live_fuel_moisture.py | 88 +++++++ .../western_usa_live_fuel_moisture.py | 62 +++-- 10 files changed, 356 insertions(+), 23 deletions(-) create mode 100644 tests/data/western_usa_live_fuel_moisture/data.py create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content.tar.gz create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/labels.geojson create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/stac.json create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/labels.geojson create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/stac.json create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/labels.geojson create mode 100644 tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/stac.json create mode 100644 tests/datasets/test_western_usa_live_fuel_moisture.py diff --git a/tests/data/western_usa_live_fuel_moisture/data.py b/tests/data/western_usa_live_fuel_moisture/data.py new file mode 100644 index 00000000000..d64c445ed41 --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/data.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import hashlib +import json +import os +import shutil + +NUM_SAMPLES = 3 + + +data_dir = "su_sar_moisture_content" + +LABELS = { + "type": "Feature", + "properties": { + "percent(t)": 132.6666667, + "site": "Blackstone", + "date": "6/30/15", + "slope(t)": 0.599961042, + "elevation(t)": 1522.0, + "canopy_height(t)": 0.0, + "forest_cover(t)": 130.0, + "silt(t)": 36.0, + "sand(t)": 38.0, + "clay(t)": 26.0, + "vv(t)": -12.80108143, + "vh(t)": -20.86413967, + "red(t)": 2007.5, + "green(t)": 1669.5, + "blue(t)": 1234.5, + "swir(t)": 3226.5, + "nir(t)": 2764.5, + "ndvi(t)": 0.158611467, + "ndwi(t)": -0.07713057, + "nirv(t)": 438.5596345, + "vv_red(t)": -0.006376628, + "vv_green(t)": -0.007667614, + "vv_blue(t)": -0.010369446, + "vv_swir(t)": -0.003967482, + "vv_nir(t)": -0.004630523, + "vv_ndvi(t)": -80.70716267, + "vv_ndwi(t)": 165.9663796, + "vv_nirv(t)": -0.029188919, + "vh_red(t)": -0.010393096, + "vh_green(t)": -0.012497238, + "vh_blue(t)": -0.016900883, + "vh_swir(t)": -0.006466493, + "vh_nir(t)": -0.007547166, + "vh_ndvi(t)": -131.5424422, + "vh_ndwi(t)": 270.5041557, + "vh_nirv(t)": -0.047574236, + "vh_vv(t)": -8.063058239, + "slope(t-1)": 0.599961042, + "elevation(t-1)": 1522.0, + "canopy_height(t-1)": 0.0, + "forest_cover(t-1)": 130.0, + "silt(t-1)": 36.0, + "sand(t-1)": 38.0, + "clay(t-1)": 26.0, + "vv(t-1)": -12.93716855, + "vh(t-1)": -20.92368901, + "red(t-1)": 1792.0, + "green(t-1)": 1490.0, + "blue(t-1)": 1102.5, + "swir(t-1)": 3047.0, + "nir(t-1)": 2574.0, + "ndvi(t-1)": 0.179116009, + "ndwi(t-1)": -0.084146807, + "nirv(t-1)": 461.0691997, + "vv_red(t-1)": -0.007219402, + "vv_green(t-1)": -0.008682663, + "vv_blue(t-1)": -0.011734393, + "vv_swir(t-1)": -0.004245871, + "vv_nir(t-1)": -0.005026095, + "vv_ndvi(t-1)": -72.22787422, + "vv_ndwi(t-1)": 153.7452097, + "vv_nirv(t-1)": -0.02805906, + "vh_red(t-1)": -0.011676166, + "vh_green(t-1)": -0.014042744, + "vh_blue(t-1)": -0.018978403, + "vh_swir(t-1)": -0.00686698, + "vh_nir(t-1)": -0.008128861, + "vh_ndvi(t-1)": -116.8164094, + "vh_ndwi(t-1)": 248.6569562, + "vh_nirv(t-1)": -0.0453808, + "vh_vv(t-1)": -7.986520458, + "slope(t-2)": 0.599961042, + "elevation(t-2)": 1522.0, + "canopy_height(t-2)": 0.0, + "forest_cover(t-2)": 130.0, + "silt(t-2)": 36.0, + "sand(t-2)": 38.0, + "clay(t-2)": 26.0, + "vv(t-2)": -13.07325567, + "vh(t-2)": -20.98323835, + "red(t-2)": 1721.5, + "green(t-2)": 1432.0, + "blue(t-2)": 1056.5, + "swir(t-2)": 2950.0, + "nir(t-2)": 2476.0, + "ndvi(t-2)": 0.179768568, + "ndwi(t-2)": -0.087357002, + "nirv(t-2)": 445.0984812, + "vv_red(t-2)": -0.007594107, + "vv_green(t-2)": -0.009129368, + "vv_blue(t-2)": -0.012374118, + "vv_swir(t-2)": -0.004431612, + "vv_nir(t-2)": -0.00527999, + "vv_ndvi(t-2)": -72.72270011, + "vv_ndwi(t-2)": 149.6532084, + "vv_nirv(t-2)": -0.029371603, + "vh_red(t-2)": -0.012188927, + "vh_green(t-2)": -0.014653099, + "vh_blue(t-2)": -0.019861087, + "vh_swir(t-2)": -0.007112962, + "vh_nir(t-2)": -0.008474652, + "vh_ndvi(t-2)": -116.7236217, + "vh_ndwi(t-2)": 240.2009889, + "vh_nirv(t-2)": -0.047142912, + "vh_vv(t-2)": -7.909982677, + "slope(t-3)": 0.599961042, + "elevation(t-3)": 1522.0, + "canopy_height(t-3)": 0.0, + "forest_cover(t-3)": 130.0, + "silt(t-3)": 36.0, + "sand(t-3)": 38.0, + "clay(t-3)": 26.0, + "vv(t-3)": -12.35794964, + "vh(t-3)": -20.25746909, + "red(t-3)": 1367.333333, + "green(t-3)": 1151.0, + "blue(t-3)": 827.3333333, + "swir(t-3)": 2349.333333, + "nir(t-3)": 2051.0, + "ndvi(t-3)": 0.216978329, + "ndwi(t-3)": -0.050717071, + "nirv(t-3)": 413.3885932, + "vv_red(t-3)": -0.009037993, + "vv_green(t-3)": -0.010736707, + "vv_blue(t-3)": -0.014937087, + "vv_swir(t-3)": -0.005260194, + "vv_nir(t-3)": -0.006025329, + "vv_ndvi(t-3)": -56.95476465, + "vv_ndwi(t-3)": 243.6644995, + "vv_nirv(t-3)": -0.029894269, + "vh_red(t-3)": -0.014815311, + "vh_green(t-3)": -0.017599886, + "vh_blue(t-3)": -0.024485257, + "vh_swir(t-3)": -0.008622646, + "vh_nir(t-3)": -0.009876874, + "vh_ndvi(t-3)": -93.36171601, + "vh_ndwi(t-3)": 399.4211186, + "vh_nirv(t-3)": -0.049003454, + "vh_vv(t-3)": -7.899519455, + }, + "geometry": {"type": "Point", "coordinates": [-115.8855556, 42.44111111]}, +} + +STAC = { + "assets": { + "documentation": { + "href": "../_common/documentation.pdf", + "type": "application/pdf", + }, + "labels": {"href": "labels.geojson", "type": "application/geo+json"}, + "training_features_descriptions": { + "href": "../_common/training_features_descriptions.csv", + "title": "Training Features Descriptions", + "type": "text/csv", + }, + }, + "bbox": [-115.8855556, 42.44111111, -115.8855556, 42.44111111], + "collection": "su_sar_moisture_content", + "geometry": {"coordinates": [-115.8855556, 42.44111111], "type": "Point"}, + "id": "su_sar_moisture_content_0001", + "links": [ + {"href": "../collection.json", "rel": "collection"}, + {"href": "../collection.json", "rel": "parent"}, + ], + "properties": { + "datetime": "2015-06-30T00:00:00Z", + "label:description": "", + "label:properties": ["percent(t)"], + "label:type": "vector", + }, + "stac_extensions": ["label"], + "stac_version": "1.0.0-beta.2", + "type": "Feature", +} + + +def create_file(path: str) -> None: + label_path = os.path.join(path, "labels.geojson") + with open(label_path, "w") as f: + json.dump(LABELS, f) + + stac_path = os.path.join(path, "stac.json") + with open(stac_path, "w") as f: + json.dump(STAC, f) + + +if __name__ == "__main__": + # Remove old data + if os.path.isdir(data_dir): + shutil.rmtree(data_dir) + + os.makedirs(os.path.join(os.getcwd(), data_dir)) + + for i in range(NUM_SAMPLES): + sample_dir = os.path.join(data_dir, data_dir + f"_{i}") + os.makedirs(sample_dir) + create_file(sample_dir) + + # Compress data + shutil.make_archive(data_dir, "gztar", ".", data_dir) + + # Compute checksums + with open(data_dir + ".tar.gz", "rb") as f: + md5 = hashlib.md5(f.read()).hexdigest() + print(f"{data_dir}.tar.gz: {md5}") diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content.tar.gz b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..fe38d993d677c7f7e38e9b8f8699f9cc591284b1 GIT binary patch literal 2152 zcma)-X*kr67ROOEmTJn#s407lVv;37METpZw;1MXq^1~};ZKR7EM<#OVi?&Oq@wKe z@2L#NzL$o^h_a2fG3)R0;=aB2p65KD7w7wY&zp0ea}<+6pf1mIeqsPV6ovQ2p#p<2 z_>fTC4HP;E8*&31q7&ka6Isfk2Qdu~{`$9+rsH-SUGLSP3$isk3b+5TBvEregyIe;Y-KqN7SUiq{%`tU#a@bY{fM7A1*@T};29qC2W zFT{myA6tRB6$Bq4@BCXyg0z^c+`{j{wsI;I{{)5;np3Oq!^fYUm>Oo)(#Cg@Gx1LN zLOQ5TN6H`uk?-s;NgX{h zx@2fDKK>gkfE!vom|oQtvg568tbTukl$v_*A}7Yf(_RNz`=0C;7VYB&58(8VY#QOK zy>B(s&D89TK?*rsQm+1GQKPp(x2cCtojo^f5uBnfhJdXcK z^G6JrmJkoQisyO`T@V~`kS;P9FYoe)WtN}Lp_LQMmP3h>OU1)3XC}at>-H+z7red7 z(|k%T)JFV9Qf_4V#@_+?i_>o@j8)G*f(o zRd^YGC%pYysk@9mUaL$Sv)5guJ!d9eHK^$6W2ybfe-)WEzKJoWWunho>5yLUKim|n z<7Q8Mn|_+AGj#1{m7jR{OS5m+mFgAlaa3@%P$>S7br`ehEz*A0&k40AhB;AP=XMw; zoK4kTt?lmRr$}EhCf|J$ys0O~FpUX*Zee7r#FvO!AzFkxDZykxa3uyt*`fl9WgU;x z=$Dd7vUJ?tJ}{85j@GWX6E%)`lMc_5h0gte&_Kz7gTIIt*&u!Bk-9v|*e4dMnZBYW zbuQ=CODeP99n4eG0Ke^6RJ&QK7VK%V&*_#I7_@GyoILaW(C%KA9lT+dgLwg$gKor$ zSwKbgZInuzTSQK2ldZ}i(V}G{;909aYF~PWD%yBU_JD)!kE-Y62KXAK;L$=abPZZc z>|N+ggR6p8+@Rf~NUInn2hg-HSvL;>cEJ9l)tv)#YO>gFk!Ei?tNaLT4H^K;P;*UNNCx=ZtwN>Iqx zDo8$|Djr3*`CYopK+n(;leatXW+5k5`xu${!nyahj58=&r+^sod{E0-JyV6IxI#NW z59?M#jzZx8nw-#Q61aZQmQ9gw0=(IgW}lNLm!j5#pEkqZ{_4k#jj#k) zeLU<#VtBmB=99-N_f4>cj}`m)n~d5!hUe=yv`Dbe=1>Sdi(q9mB3en2x<5thug-ms zF|V4WM$AXmSMwalVEiIMMQ1*N{B7f@?~4C;+yKGmDXNUNBIHgyNDIif65PGQ9V78) zut99azEVRJiqw1GGQMtNB)4upNp?{?Rkm7w&f{wf+I4pBdq7`u{Th-h8s^Az`_+17 z@?P6WLyULpdRO;B*i0Pa=xE*t)^GYc#ca!5eQUkbTmq_v&2BrexTk$GmSC=9y(c0g zu!<1w2{CKy0diKc_x+Lie-VGQNiS@lEUBq literal 0 HcmV?d00001 diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/labels.geojson b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/labels.geojson new file mode 100644 index 00000000000..9492503feae --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/labels.geojson @@ -0,0 +1 @@ +{"type": "Feature", "properties": {"percent(t)": 132.6666667, "site": "Blackstone", "date": "6/30/15", "slope(t)": 0.599961042, "elevation(t)": 1522.0, "canopy_height(t)": 0.0, "forest_cover(t)": 130.0, "silt(t)": 36.0, "sand(t)": 38.0, "clay(t)": 26.0, "vv(t)": -12.80108143, "vh(t)": -20.86413967, "red(t)": 2007.5, "green(t)": 1669.5, "blue(t)": 1234.5, "swir(t)": 3226.5, "nir(t)": 2764.5, "ndvi(t)": 0.158611467, "ndwi(t)": -0.07713057, "nirv(t)": 438.5596345, "vv_red(t)": -0.006376628, "vv_green(t)": -0.007667614, "vv_blue(t)": -0.010369446, "vv_swir(t)": -0.003967482, "vv_nir(t)": -0.004630523, "vv_ndvi(t)": -80.70716267, "vv_ndwi(t)": 165.9663796, "vv_nirv(t)": -0.029188919, "vh_red(t)": -0.010393096, "vh_green(t)": -0.012497238, "vh_blue(t)": -0.016900883, "vh_swir(t)": -0.006466493, "vh_nir(t)": -0.007547166, "vh_ndvi(t)": -131.5424422, "vh_ndwi(t)": 270.5041557, "vh_nirv(t)": -0.047574236, "vh_vv(t)": -8.063058239, "slope(t-1)": 0.599961042, "elevation(t-1)": 1522.0, "canopy_height(t-1)": 0.0, "forest_cover(t-1)": 130.0, "silt(t-1)": 36.0, "sand(t-1)": 38.0, "clay(t-1)": 26.0, "vv(t-1)": -12.93716855, "vh(t-1)": -20.92368901, "red(t-1)": 1792.0, "green(t-1)": 1490.0, "blue(t-1)": 1102.5, "swir(t-1)": 3047.0, "nir(t-1)": 2574.0, "ndvi(t-1)": 0.179116009, "ndwi(t-1)": -0.084146807, "nirv(t-1)": 461.0691997, "vv_red(t-1)": -0.007219402, "vv_green(t-1)": -0.008682663, "vv_blue(t-1)": -0.011734393, "vv_swir(t-1)": -0.004245871, "vv_nir(t-1)": -0.005026095, "vv_ndvi(t-1)": -72.22787422, "vv_ndwi(t-1)": 153.7452097, "vv_nirv(t-1)": -0.02805906, "vh_red(t-1)": -0.011676166, "vh_green(t-1)": -0.014042744, "vh_blue(t-1)": -0.018978403, "vh_swir(t-1)": -0.00686698, "vh_nir(t-1)": -0.008128861, "vh_ndvi(t-1)": -116.8164094, "vh_ndwi(t-1)": 248.6569562, "vh_nirv(t-1)": -0.0453808, "vh_vv(t-1)": -7.986520458, "slope(t-2)": 0.599961042, "elevation(t-2)": 1522.0, "canopy_height(t-2)": 0.0, "forest_cover(t-2)": 130.0, "silt(t-2)": 36.0, "sand(t-2)": 38.0, "clay(t-2)": 26.0, "vv(t-2)": -13.07325567, "vh(t-2)": -20.98323835, "red(t-2)": 1721.5, "green(t-2)": 1432.0, "blue(t-2)": 1056.5, "swir(t-2)": 2950.0, "nir(t-2)": 2476.0, "ndvi(t-2)": 0.179768568, "ndwi(t-2)": -0.087357002, "nirv(t-2)": 445.0984812, "vv_red(t-2)": -0.007594107, "vv_green(t-2)": -0.009129368, "vv_blue(t-2)": -0.012374118, "vv_swir(t-2)": -0.004431612, "vv_nir(t-2)": -0.00527999, "vv_ndvi(t-2)": -72.72270011, "vv_ndwi(t-2)": 149.6532084, "vv_nirv(t-2)": -0.029371603, "vh_red(t-2)": -0.012188927, "vh_green(t-2)": -0.014653099, "vh_blue(t-2)": -0.019861087, "vh_swir(t-2)": -0.007112962, "vh_nir(t-2)": -0.008474652, "vh_ndvi(t-2)": -116.7236217, "vh_ndwi(t-2)": 240.2009889, "vh_nirv(t-2)": -0.047142912, "vh_vv(t-2)": -7.909982677, "slope(t-3)": 0.599961042, "elevation(t-3)": 1522.0, "canopy_height(t-3)": 0.0, "forest_cover(t-3)": 130.0, "silt(t-3)": 36.0, "sand(t-3)": 38.0, "clay(t-3)": 26.0, "vv(t-3)": -12.35794964, "vh(t-3)": -20.25746909, "red(t-3)": 1367.333333, "green(t-3)": 1151.0, "blue(t-3)": 827.3333333, "swir(t-3)": 2349.333333, "nir(t-3)": 2051.0, "ndvi(t-3)": 0.216978329, "ndwi(t-3)": -0.050717071, "nirv(t-3)": 413.3885932, "vv_red(t-3)": -0.009037993, "vv_green(t-3)": -0.010736707, "vv_blue(t-3)": -0.014937087, "vv_swir(t-3)": -0.005260194, "vv_nir(t-3)": -0.006025329, "vv_ndvi(t-3)": -56.95476465, "vv_ndwi(t-3)": 243.6644995, "vv_nirv(t-3)": -0.029894269, "vh_red(t-3)": -0.014815311, "vh_green(t-3)": -0.017599886, "vh_blue(t-3)": -0.024485257, "vh_swir(t-3)": -0.008622646, "vh_nir(t-3)": -0.009876874, "vh_ndvi(t-3)": -93.36171601, "vh_ndwi(t-3)": 399.4211186, "vh_nirv(t-3)": -0.049003454, "vh_vv(t-3)": -7.899519455}, "geometry": {"type": "Point", "coordinates": [-115.8855556, 42.44111111]}} \ No newline at end of file diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/stac.json b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/stac.json new file mode 100644 index 00000000000..469f98574d9 --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_0/stac.json @@ -0,0 +1 @@ +{"assets": {"documentation": {"href": "../_common/documentation.pdf", "type": "application/pdf"}, "labels": {"href": "labels.geojson", "type": "application/geo+json"}, "training_features_descriptions": {"href": "../_common/training_features_descriptions.csv", "title": "Training Features Descriptions", "type": "text/csv"}}, "bbox": [-115.8855556, 42.44111111, -115.8855556, 42.44111111], "collection": "su_sar_moisture_content", "geometry": {"coordinates": [-115.8855556, 42.44111111], "type": "Point"}, "id": "su_sar_moisture_content_0001", "links": [{"href": "../collection.json", "rel": "collection"}, {"href": "../collection.json", "rel": "parent"}], "properties": {"datetime": "2015-06-30T00:00:00Z", "label:description": "", "label:properties": ["percent(t)"], "label:type": "vector"}, "stac_extensions": ["label"], "stac_version": "1.0.0-beta.2", "type": "Feature"} \ No newline at end of file diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/labels.geojson b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/labels.geojson new file mode 100644 index 00000000000..9492503feae --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/labels.geojson @@ -0,0 +1 @@ +{"type": "Feature", "properties": {"percent(t)": 132.6666667, "site": "Blackstone", "date": "6/30/15", "slope(t)": 0.599961042, "elevation(t)": 1522.0, "canopy_height(t)": 0.0, "forest_cover(t)": 130.0, "silt(t)": 36.0, "sand(t)": 38.0, "clay(t)": 26.0, "vv(t)": -12.80108143, "vh(t)": -20.86413967, "red(t)": 2007.5, "green(t)": 1669.5, "blue(t)": 1234.5, "swir(t)": 3226.5, "nir(t)": 2764.5, "ndvi(t)": 0.158611467, "ndwi(t)": -0.07713057, "nirv(t)": 438.5596345, "vv_red(t)": -0.006376628, "vv_green(t)": -0.007667614, "vv_blue(t)": -0.010369446, "vv_swir(t)": -0.003967482, "vv_nir(t)": -0.004630523, "vv_ndvi(t)": -80.70716267, "vv_ndwi(t)": 165.9663796, "vv_nirv(t)": -0.029188919, "vh_red(t)": -0.010393096, "vh_green(t)": -0.012497238, "vh_blue(t)": -0.016900883, "vh_swir(t)": -0.006466493, "vh_nir(t)": -0.007547166, "vh_ndvi(t)": -131.5424422, "vh_ndwi(t)": 270.5041557, "vh_nirv(t)": -0.047574236, "vh_vv(t)": -8.063058239, "slope(t-1)": 0.599961042, "elevation(t-1)": 1522.0, "canopy_height(t-1)": 0.0, "forest_cover(t-1)": 130.0, "silt(t-1)": 36.0, "sand(t-1)": 38.0, "clay(t-1)": 26.0, "vv(t-1)": -12.93716855, "vh(t-1)": -20.92368901, "red(t-1)": 1792.0, "green(t-1)": 1490.0, "blue(t-1)": 1102.5, "swir(t-1)": 3047.0, "nir(t-1)": 2574.0, "ndvi(t-1)": 0.179116009, "ndwi(t-1)": -0.084146807, "nirv(t-1)": 461.0691997, "vv_red(t-1)": -0.007219402, "vv_green(t-1)": -0.008682663, "vv_blue(t-1)": -0.011734393, "vv_swir(t-1)": -0.004245871, "vv_nir(t-1)": -0.005026095, "vv_ndvi(t-1)": -72.22787422, "vv_ndwi(t-1)": 153.7452097, "vv_nirv(t-1)": -0.02805906, "vh_red(t-1)": -0.011676166, "vh_green(t-1)": -0.014042744, "vh_blue(t-1)": -0.018978403, "vh_swir(t-1)": -0.00686698, "vh_nir(t-1)": -0.008128861, "vh_ndvi(t-1)": -116.8164094, "vh_ndwi(t-1)": 248.6569562, "vh_nirv(t-1)": -0.0453808, "vh_vv(t-1)": -7.986520458, "slope(t-2)": 0.599961042, "elevation(t-2)": 1522.0, "canopy_height(t-2)": 0.0, "forest_cover(t-2)": 130.0, "silt(t-2)": 36.0, "sand(t-2)": 38.0, "clay(t-2)": 26.0, "vv(t-2)": -13.07325567, "vh(t-2)": -20.98323835, "red(t-2)": 1721.5, "green(t-2)": 1432.0, "blue(t-2)": 1056.5, "swir(t-2)": 2950.0, "nir(t-2)": 2476.0, "ndvi(t-2)": 0.179768568, "ndwi(t-2)": -0.087357002, "nirv(t-2)": 445.0984812, "vv_red(t-2)": -0.007594107, "vv_green(t-2)": -0.009129368, "vv_blue(t-2)": -0.012374118, "vv_swir(t-2)": -0.004431612, "vv_nir(t-2)": -0.00527999, "vv_ndvi(t-2)": -72.72270011, "vv_ndwi(t-2)": 149.6532084, "vv_nirv(t-2)": -0.029371603, "vh_red(t-2)": -0.012188927, "vh_green(t-2)": -0.014653099, "vh_blue(t-2)": -0.019861087, "vh_swir(t-2)": -0.007112962, "vh_nir(t-2)": -0.008474652, "vh_ndvi(t-2)": -116.7236217, "vh_ndwi(t-2)": 240.2009889, "vh_nirv(t-2)": -0.047142912, "vh_vv(t-2)": -7.909982677, "slope(t-3)": 0.599961042, "elevation(t-3)": 1522.0, "canopy_height(t-3)": 0.0, "forest_cover(t-3)": 130.0, "silt(t-3)": 36.0, "sand(t-3)": 38.0, "clay(t-3)": 26.0, "vv(t-3)": -12.35794964, "vh(t-3)": -20.25746909, "red(t-3)": 1367.333333, "green(t-3)": 1151.0, "blue(t-3)": 827.3333333, "swir(t-3)": 2349.333333, "nir(t-3)": 2051.0, "ndvi(t-3)": 0.216978329, "ndwi(t-3)": -0.050717071, "nirv(t-3)": 413.3885932, "vv_red(t-3)": -0.009037993, "vv_green(t-3)": -0.010736707, "vv_blue(t-3)": -0.014937087, "vv_swir(t-3)": -0.005260194, "vv_nir(t-3)": -0.006025329, "vv_ndvi(t-3)": -56.95476465, "vv_ndwi(t-3)": 243.6644995, "vv_nirv(t-3)": -0.029894269, "vh_red(t-3)": -0.014815311, "vh_green(t-3)": -0.017599886, "vh_blue(t-3)": -0.024485257, "vh_swir(t-3)": -0.008622646, "vh_nir(t-3)": -0.009876874, "vh_ndvi(t-3)": -93.36171601, "vh_ndwi(t-3)": 399.4211186, "vh_nirv(t-3)": -0.049003454, "vh_vv(t-3)": -7.899519455}, "geometry": {"type": "Point", "coordinates": [-115.8855556, 42.44111111]}} \ No newline at end of file diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/stac.json b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/stac.json new file mode 100644 index 00000000000..469f98574d9 --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_1/stac.json @@ -0,0 +1 @@ +{"assets": {"documentation": {"href": "../_common/documentation.pdf", "type": "application/pdf"}, "labels": {"href": "labels.geojson", "type": "application/geo+json"}, "training_features_descriptions": {"href": "../_common/training_features_descriptions.csv", "title": "Training Features Descriptions", "type": "text/csv"}}, "bbox": [-115.8855556, 42.44111111, -115.8855556, 42.44111111], "collection": "su_sar_moisture_content", "geometry": {"coordinates": [-115.8855556, 42.44111111], "type": "Point"}, "id": "su_sar_moisture_content_0001", "links": [{"href": "../collection.json", "rel": "collection"}, {"href": "../collection.json", "rel": "parent"}], "properties": {"datetime": "2015-06-30T00:00:00Z", "label:description": "", "label:properties": ["percent(t)"], "label:type": "vector"}, "stac_extensions": ["label"], "stac_version": "1.0.0-beta.2", "type": "Feature"} \ No newline at end of file diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/labels.geojson b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/labels.geojson new file mode 100644 index 00000000000..9492503feae --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/labels.geojson @@ -0,0 +1 @@ +{"type": "Feature", "properties": {"percent(t)": 132.6666667, "site": "Blackstone", "date": "6/30/15", "slope(t)": 0.599961042, "elevation(t)": 1522.0, "canopy_height(t)": 0.0, "forest_cover(t)": 130.0, "silt(t)": 36.0, "sand(t)": 38.0, "clay(t)": 26.0, "vv(t)": -12.80108143, "vh(t)": -20.86413967, "red(t)": 2007.5, "green(t)": 1669.5, "blue(t)": 1234.5, "swir(t)": 3226.5, "nir(t)": 2764.5, "ndvi(t)": 0.158611467, "ndwi(t)": -0.07713057, "nirv(t)": 438.5596345, "vv_red(t)": -0.006376628, "vv_green(t)": -0.007667614, "vv_blue(t)": -0.010369446, "vv_swir(t)": -0.003967482, "vv_nir(t)": -0.004630523, "vv_ndvi(t)": -80.70716267, "vv_ndwi(t)": 165.9663796, "vv_nirv(t)": -0.029188919, "vh_red(t)": -0.010393096, "vh_green(t)": -0.012497238, "vh_blue(t)": -0.016900883, "vh_swir(t)": -0.006466493, "vh_nir(t)": -0.007547166, "vh_ndvi(t)": -131.5424422, "vh_ndwi(t)": 270.5041557, "vh_nirv(t)": -0.047574236, "vh_vv(t)": -8.063058239, "slope(t-1)": 0.599961042, "elevation(t-1)": 1522.0, "canopy_height(t-1)": 0.0, "forest_cover(t-1)": 130.0, "silt(t-1)": 36.0, "sand(t-1)": 38.0, "clay(t-1)": 26.0, "vv(t-1)": -12.93716855, "vh(t-1)": -20.92368901, "red(t-1)": 1792.0, "green(t-1)": 1490.0, "blue(t-1)": 1102.5, "swir(t-1)": 3047.0, "nir(t-1)": 2574.0, "ndvi(t-1)": 0.179116009, "ndwi(t-1)": -0.084146807, "nirv(t-1)": 461.0691997, "vv_red(t-1)": -0.007219402, "vv_green(t-1)": -0.008682663, "vv_blue(t-1)": -0.011734393, "vv_swir(t-1)": -0.004245871, "vv_nir(t-1)": -0.005026095, "vv_ndvi(t-1)": -72.22787422, "vv_ndwi(t-1)": 153.7452097, "vv_nirv(t-1)": -0.02805906, "vh_red(t-1)": -0.011676166, "vh_green(t-1)": -0.014042744, "vh_blue(t-1)": -0.018978403, "vh_swir(t-1)": -0.00686698, "vh_nir(t-1)": -0.008128861, "vh_ndvi(t-1)": -116.8164094, "vh_ndwi(t-1)": 248.6569562, "vh_nirv(t-1)": -0.0453808, "vh_vv(t-1)": -7.986520458, "slope(t-2)": 0.599961042, "elevation(t-2)": 1522.0, "canopy_height(t-2)": 0.0, "forest_cover(t-2)": 130.0, "silt(t-2)": 36.0, "sand(t-2)": 38.0, "clay(t-2)": 26.0, "vv(t-2)": -13.07325567, "vh(t-2)": -20.98323835, "red(t-2)": 1721.5, "green(t-2)": 1432.0, "blue(t-2)": 1056.5, "swir(t-2)": 2950.0, "nir(t-2)": 2476.0, "ndvi(t-2)": 0.179768568, "ndwi(t-2)": -0.087357002, "nirv(t-2)": 445.0984812, "vv_red(t-2)": -0.007594107, "vv_green(t-2)": -0.009129368, "vv_blue(t-2)": -0.012374118, "vv_swir(t-2)": -0.004431612, "vv_nir(t-2)": -0.00527999, "vv_ndvi(t-2)": -72.72270011, "vv_ndwi(t-2)": 149.6532084, "vv_nirv(t-2)": -0.029371603, "vh_red(t-2)": -0.012188927, "vh_green(t-2)": -0.014653099, "vh_blue(t-2)": -0.019861087, "vh_swir(t-2)": -0.007112962, "vh_nir(t-2)": -0.008474652, "vh_ndvi(t-2)": -116.7236217, "vh_ndwi(t-2)": 240.2009889, "vh_nirv(t-2)": -0.047142912, "vh_vv(t-2)": -7.909982677, "slope(t-3)": 0.599961042, "elevation(t-3)": 1522.0, "canopy_height(t-3)": 0.0, "forest_cover(t-3)": 130.0, "silt(t-3)": 36.0, "sand(t-3)": 38.0, "clay(t-3)": 26.0, "vv(t-3)": -12.35794964, "vh(t-3)": -20.25746909, "red(t-3)": 1367.333333, "green(t-3)": 1151.0, "blue(t-3)": 827.3333333, "swir(t-3)": 2349.333333, "nir(t-3)": 2051.0, "ndvi(t-3)": 0.216978329, "ndwi(t-3)": -0.050717071, "nirv(t-3)": 413.3885932, "vv_red(t-3)": -0.009037993, "vv_green(t-3)": -0.010736707, "vv_blue(t-3)": -0.014937087, "vv_swir(t-3)": -0.005260194, "vv_nir(t-3)": -0.006025329, "vv_ndvi(t-3)": -56.95476465, "vv_ndwi(t-3)": 243.6644995, "vv_nirv(t-3)": -0.029894269, "vh_red(t-3)": -0.014815311, "vh_green(t-3)": -0.017599886, "vh_blue(t-3)": -0.024485257, "vh_swir(t-3)": -0.008622646, "vh_nir(t-3)": -0.009876874, "vh_ndvi(t-3)": -93.36171601, "vh_ndwi(t-3)": 399.4211186, "vh_nirv(t-3)": -0.049003454, "vh_vv(t-3)": -7.899519455}, "geometry": {"type": "Point", "coordinates": [-115.8855556, 42.44111111]}} \ No newline at end of file diff --git a/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/stac.json b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/stac.json new file mode 100644 index 00000000000..469f98574d9 --- /dev/null +++ b/tests/data/western_usa_live_fuel_moisture/su_sar_moisture_content/su_sar_moisture_content_2/stac.json @@ -0,0 +1 @@ +{"assets": {"documentation": {"href": "../_common/documentation.pdf", "type": "application/pdf"}, "labels": {"href": "labels.geojson", "type": "application/geo+json"}, "training_features_descriptions": {"href": "../_common/training_features_descriptions.csv", "title": "Training Features Descriptions", "type": "text/csv"}}, "bbox": [-115.8855556, 42.44111111, -115.8855556, 42.44111111], "collection": "su_sar_moisture_content", "geometry": {"coordinates": [-115.8855556, 42.44111111], "type": "Point"}, "id": "su_sar_moisture_content_0001", "links": [{"href": "../collection.json", "rel": "collection"}, {"href": "../collection.json", "rel": "parent"}], "properties": {"datetime": "2015-06-30T00:00:00Z", "label:description": "", "label:properties": ["percent(t)"], "label:type": "vector"}, "stac_extensions": ["label"], "stac_version": "1.0.0-beta.2", "type": "Feature"} \ No newline at end of file diff --git a/tests/datasets/test_western_usa_live_fuel_moisture.py b/tests/datasets/test_western_usa_live_fuel_moisture.py new file mode 100644 index 00000000000..490e523cbbf --- /dev/null +++ b/tests/datasets/test_western_usa_live_fuel_moisture.py @@ -0,0 +1,88 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import builtins +import os +import shutil +from pathlib import Path +from typing import Any + +import pytest +import torch +import torch.nn as nn +from _pytest.fixtures import SubRequest +from _pytest.monkeypatch import MonkeyPatch + +from torchgeo.datasets import WesternUSALiveFuelMoisture + + +class Collection: + def download(self, output_dir: str, **kwargs: str) -> None: + tarball_path = os.path.join( + "tests", + "data", + "western_usa_live_fuel_moisture", + "su_sar_moisture_content.tar.gz", + ) + shutil.copy(tarball_path, output_dir) + + +def fetch(collection_id: str, **kwargs: str) -> Collection: + return Collection() + + +class TestWesternUSALiveFuelMoisture: + @pytest.fixture + def dataset( + self, monkeypatch: MonkeyPatch, tmp_path: Path + ) -> WesternUSALiveFuelMoisture: + radiant_mlhub = pytest.importorskip("radiant_mlhub", minversion="0.2.1") + monkeypatch.setattr(radiant_mlhub.Collection, "fetch", fetch) + md5 = "ecbc9269dd27c4efe7aa887960054351" + monkeypatch.setattr(WesternUSALiveFuelMoisture, "md5", md5) + root = str(tmp_path) + transforms = nn.Identity() + return WesternUSALiveFuelMoisture( + root, transforms=transforms, download=True, api_key="", checksum=True + ) + + @pytest.mark.parametrize("index", [0, 1, 2]) + def test_getitem(self, dataset: WesternUSALiveFuelMoisture, index: int) -> None: + x = dataset[index] + assert isinstance(x, dict) + assert isinstance(x["input"], torch.Tensor) + assert isinstance(x["label"], torch.Tensor) + + def test_len(self, dataset: WesternUSALiveFuelMoisture) -> None: + assert len(dataset) == 3 + + def test_already_downloaded(self, dataset: WesternUSALiveFuelMoisture) -> None: + WesternUSALiveFuelMoisture(root=dataset.root, download=True, api_key="") + + def test_not_downloaded(self, tmp_path: Path) -> None: + with pytest.raises(RuntimeError, match="Dataset not found in"): + WesternUSALiveFuelMoisture(str(tmp_path)) + + @pytest.fixture(params=["pandas"]) + def mock_missing_module(self, monkeypatch: MonkeyPatch, request: SubRequest) -> str: + import_orig = builtins.__import__ + package = str(request.param) + + def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any: + if name == package: + raise ImportError() + return import_orig(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", mocked_import) + return package + + def test_mock_missing_module( + self, dataset: WesternUSALiveFuelMoisture, mock_missing_module: str + ) -> None: + package = mock_missing_module + if package == "pandas": + with pytest.raises( + ImportError, + match=f"{package} is not installed and is required to use this dataset", + ): + WesternUSALiveFuelMoisture(dataset.root) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index b9a2bd34cb5..e4647e38507 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + """Western USA Live Fuel Moisture Dataset.""" import glob @@ -34,6 +37,8 @@ class WesternUSALiveFuelMoisture(NonGeoDataset): collection_id = "su_sar_moisture_content" + md5 = "a6c0721f06a3a0110b7d1243b18614f0" + label_name = "percent(t)" all_variable_names = [ @@ -208,15 +213,10 @@ def __init__( self.root = root self.transforms = transforms self.checksum = checksum + self.download = download + self.api_key = api_key - if download: - self._download(api_key) - - if not self._check_integrity(): - raise RuntimeError( - "Dataset not found or corrupted. " - + "You can use download=True to download it" - ) + self._verify() try: import pandas as pd # noqa: F401 @@ -294,19 +294,39 @@ def _load_data(self): df = df[self.input_variables + [self.label_name]] return df - def _check_integrity(self) -> bool: - """Check integrity of dataset. + def _verify(self) -> None: + """Verify the integrity of the dataset. - Returns: - True if dataset files are found and/or MD5s match, else False + Raises: + RuntimeError: if ``download=False`` but dataset is missing or checksum fails """ - # for split, resources in self.md5s.items(): - # for resource_type, md5 in resources.items(): - # filename = "_".join([self.collection_id, split, resource_type]) - # filename = os.path.join(self.root, filename + ".tar.gz") - # if not check_integrity(filename, md5 if self.checksum else None): - # return False - return True + # Check if the extracted files already exist + pathname = os.path.join(self.root, self.collection_id) + if os.path.exists(pathname): + return + + # Check if the zip files have already been downloaded + pathname = os.path.join(self.root, self.collection_id) + ".tar.gz" + if os.path.exists(pathname): + self._extract() + return + + # Check if the user requested to download the dataset + if not self.download: + raise RuntimeError( + f"Dataset not found in `root={self.root}` and `download=False`, " + "either specify a different `root` directory or use `download=True` " + "to automatically download the dataset." + ) + + # Download the dataset + self._download() + self._extract() + + def _extract(self) -> None: + """Extract the dataset.""" + pathname = os.path.join(self.root, self.collection_id) + ".tar.gz" + extract_archive(pathname, self.root) def _download(self, api_key: Optional[str] = None) -> None: """Download the dataset and extract it. @@ -317,10 +337,6 @@ def _download(self, api_key: Optional[str] = None) -> None: Raises: RuntimeError: if download doesn't work correctly or checksums don't match """ - if self._check_integrity(): - print("Files already downloaded and verified") - return - download_radiant_mlhub_collection(self.collection_id, self.root, api_key) filename = os.path.join(self.root, self.collection_id) + ".tar.gz" extract_archive(filename, self.root) From 4bac291441148b11a6e9366a91ae795ac5ccb6d8 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 11:28:59 +0200 Subject: [PATCH 03/13] add test for features variables --- tests/datasets/test_western_usa_live_fuel_moisture.py | 4 ++++ torchgeo/datasets/western_usa_live_fuel_moisture.py | 11 ++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/datasets/test_western_usa_live_fuel_moisture.py b/tests/datasets/test_western_usa_live_fuel_moisture.py index 490e523cbbf..465d2919e39 100644 --- a/tests/datasets/test_western_usa_live_fuel_moisture.py +++ b/tests/datasets/test_western_usa_live_fuel_moisture.py @@ -63,6 +63,10 @@ def test_not_downloaded(self, tmp_path: Path) -> None: with pytest.raises(RuntimeError, match="Dataset not found in"): WesternUSALiveFuelMoisture(str(tmp_path)) + def test_invalid_features(self, dataset: WesternUSALiveFuelMoisture) -> None: + with pytest.raises(AssertionError, match="Invalid input variable name."): + WesternUSALiveFuelMoisture(dataset.root, input_features=["foo"]) + @pytest.fixture(params=["pandas"]) def mock_missing_module(self, monkeypatch: MonkeyPatch, request: SubRequest) -> str: import_orig = builtins.__import__ diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index e4647e38507..f433b8ef1b2 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -186,7 +186,7 @@ class WesternUSALiveFuelMoisture(NonGeoDataset): def __init__( self, root: str = "data", - input_variables: List[str] = all_variable_names, + input_features: List[str] = all_variable_names, transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, download: bool = False, api_key: Optional[str] = None, @@ -196,6 +196,7 @@ def __init__( Args: root: root directory where dataset can be found + input_features: which input features to include transforms: a function/transform that takes input sample and its target as entry and returns a transformed version download: if True, download dataset and store it in the root directory @@ -205,7 +206,7 @@ def __init__( Raises: AssertionError: if ``split`` argument is invalid RuntimeError: if ``download=False`` but dataset is missing or checksum fails - RuntimeError: if ``input_variables`` contains invalid variable names + RuntimeError: if ``input_features`` contains invalid variable names ImportError: if pandas are are not installed """ super().__init__() @@ -226,9 +227,9 @@ def __init__( ) assert all( - input in self.all_variable_names for input in input_variables + input in self.all_variable_names for input in input_features ), "Invalid input variable name." - self.input_variables = input_variables + self.input_features = input_features self.collection = self._retrieve_collection() @@ -291,7 +292,7 @@ def _load_data(self): data_rows.append(data_dict) df: pd.DataFrame = pd.DataFrame(data_rows) - df = df[self.input_variables + [self.label_name]] + df = df[self.input_features + [self.label_name]] return df def _verify(self) -> None: From bfb905e45c4aba5ae9bb785bd1dabaf4329ee97c Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 13:23:32 +0200 Subject: [PATCH 04/13] pyupgrade --- .../datasets/western_usa_live_fuel_moisture.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index f433b8ef1b2..907a06b5c11 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -6,7 +6,7 @@ import glob import json import os -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Optional import torch from torch import Tensor @@ -186,8 +186,8 @@ class WesternUSALiveFuelMoisture(NonGeoDataset): def __init__( self, root: str = "data", - input_features: List[str] = all_variable_names, - transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, + input_features: list[str] = all_variable_names, + transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None, download: bool = False, api_key: Optional[str] = None, checksum: bool = False, @@ -207,7 +207,7 @@ def __init__( AssertionError: if ``split`` argument is invalid RuntimeError: if ``download=False`` but dataset is missing or checksum fails RuntimeError: if ``input_features`` contains invalid variable names - ImportError: if pandas are are not installed + ImportError: if pandas is not installed """ super().__init__() @@ -235,7 +235,7 @@ def __init__( self.dataframe = self._load_data() - def _retrieve_collection(self) -> List[str]: + def _retrieve_collection(self) -> list[str]: """Retrieve dataset collection that maps samples to paths. Returns: @@ -253,18 +253,18 @@ def __len__(self) -> int: """ return len(self.dataframe) - def __getitem__(self, index: int) -> Dict[str, Any]: + def __getitem__(self, index: int) -> dict[str, Any]: """Return an index within the dataset. Args: index: index to return Returns: - data at that index + input features and target at that index """ data = self.dataframe.iloc[index, :] - sample: Dict[str, Tensor] = { + sample: dict[str, Tensor] = { "input": torch.tensor(data.drop([self.label_name]), dtype=torch.float32), "label": torch.tensor(data[self.label_name], dtype=torch.float32), } From 7406145891980106fee76f81e502840f9d6e9520 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 13:29:27 +0200 Subject: [PATCH 05/13] add docs --- docs/api/datasets.rst | 5 +++++ docs/api/non_geo_datasets.csv | 1 + torchgeo/datasets/western_usa_live_fuel_moisture.py | 7 ++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index 362f3b2a375..e0b7bcebb8f 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -329,6 +329,11 @@ VHR-10 .. autoclass:: VHR10 +WesternUSALiveFuelMoisture +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: WesternUSALiveFuelMoisture + xView2 ^^^^^^ diff --git a/docs/api/non_geo_datasets.csv b/docs/api/non_geo_datasets.csv index 035c8cba9d7..2dd3e2d866f 100644 --- a/docs/api/non_geo_datasets.csv +++ b/docs/api/non_geo_datasets.csv @@ -33,5 +33,6 @@ Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands `USAVars`_,R,NAIP Aerial,100K,-,-,4,"RGB, NIR" `Vaihingen`_,S,Aerial,33,6,"1,281--3,816",0.09,RGB `VHR-10`_,I,"Google Earth, Vaihingen",800,10,"358--1,728",0.08--2,RGB +`WesternUSALiveFuelMoisture`_,R,"Landsat8, Sentinel-1",2615,-,-,- `xView2`_,CD,Maxar,"3,732",4,"1,024x1,024",0.8,RGB `ZueriCrop`_,"I, T",Sentinel-2,116K,48,24x24,10,MSI diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index 907a06b5c11..057a0d99e75 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -20,7 +20,8 @@ class WesternUSALiveFuelMoisture(NonGeoDataset): This tabular style dataset contains fuel moisture (mass of water in vegetation) and remotely sensed variables - in the western United States. For more details see the + in the western United States. It contains 2615 datapoints and 138 + variables. For more details see the `dataset page `_. If you use this dataset in your research, please cite the following paper: @@ -234,6 +235,10 @@ def __init__( self.collection = self._retrieve_collection() self.dataframe = self._load_data() + import pdb + + pdb.set_trace() + print(0) def _retrieve_collection(self) -> list[str]: """Retrieve dataset collection that maps samples to paths. From bf2a89ff11ce5618207fef7505fed665685bef8f Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 13:32:24 +0200 Subject: [PATCH 06/13] additional doc --- torchgeo/datasets/western_usa_live_fuel_moisture.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index 057a0d99e75..886c1448b17 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -24,6 +24,15 @@ class WesternUSALiveFuelMoisture(NonGeoDataset): variables. For more details see the `dataset page `_. + Dataset Format: + + * .geojson file for each datapoint + + Dataset Features: + + * 138 remote sensing derived variables, some with a time dependency + * 2615 datapoints with regression target of predicting fuel moisture + If you use this dataset in your research, please cite the following paper: * https://doi.org/10.1016/j.rse.2020.111797 From 5d2dca72017beb7d03b0f71c3f3aa141e19d1a92 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 16:13:04 +0200 Subject: [PATCH 07/13] forgot pdb --- torchgeo/datasets/western_usa_live_fuel_moisture.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index 886c1448b17..70124fa2a4d 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -244,10 +244,6 @@ def __init__( self.collection = self._retrieve_collection() self.dataframe = self._load_data() - import pdb - - pdb.set_trace() - print(0) def _retrieve_collection(self) -> list[str]: """Retrieve dataset collection that maps samples to paths. From d15f925635b5b0ac5d73532592cf3f68638236e5 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 18:42:22 +0200 Subject: [PATCH 08/13] requested changes --- torchgeo/datasets/western_usa_live_fuel_moisture.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index 70124fa2a4d..45160413702 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -43,6 +43,8 @@ class WesternUSALiveFuelMoisture(NonGeoDataset): * `radiant-mlhub `_ to download the imagery and labels from the Radiant Earth MLHub + + .. versionadded:: 0.5 """ collection_id = "su_sar_moisture_content" @@ -202,7 +204,7 @@ def __init__( api_key: Optional[str] = None, checksum: bool = False, ) -> None: - """Initialize a new Tropical Cyclone Wind Estimation Competition Dataset. + """Initialize a new Western USA Live Fuel Moisture Dataset. Args: root: root directory where dataset can be found @@ -284,7 +286,7 @@ def __getitem__(self, index: int) -> dict[str, Any]: return sample - def _load_data(self): + def _load_data(self) -> "pd.DataFrame": """Load data from individual files into pandas dataframe. Returns: From a06234d08b1b488c489769ae666c460118135d75 Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 18:58:33 +0200 Subject: [PATCH 09/13] ignore pd return type --- torchgeo/datasets/western_usa_live_fuel_moisture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index 45160413702..2c6e1c55c2a 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -286,7 +286,7 @@ def __getitem__(self, index: int) -> dict[str, Any]: return sample - def _load_data(self) -> "pd.DataFrame": + def _load_data(self) -> "pd.DataFrame": # type: ignore[name-defined] # noqa: F821 """Load data from individual files into pandas dataframe. Returns: From 976c6a117b02e848b2ad9a016f0d0b79b693a5de Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Tue, 18 Apr 2023 19:42:57 +0200 Subject: [PATCH 10/13] fill coverage gap --- .../datasets/test_western_usa_live_fuel_moisture.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/datasets/test_western_usa_live_fuel_moisture.py b/tests/datasets/test_western_usa_live_fuel_moisture.py index 465d2919e39..ad79ca7937c 100644 --- a/tests/datasets/test_western_usa_live_fuel_moisture.py +++ b/tests/datasets/test_western_usa_live_fuel_moisture.py @@ -56,8 +56,16 @@ def test_getitem(self, dataset: WesternUSALiveFuelMoisture, index: int) -> None: def test_len(self, dataset: WesternUSALiveFuelMoisture) -> None: assert len(dataset) == 3 - def test_already_downloaded(self, dataset: WesternUSALiveFuelMoisture) -> None: - WesternUSALiveFuelMoisture(root=dataset.root, download=True, api_key="") + def test_already_downloaded(self, tmp_path: Path) -> None: + pathname = os.path.join( + "tests", + "data", + "western_usa_live_fuel_moisture", + "su_sar_moisture_content.tar.gz", + ) + root = str(tmp_path) + shutil.copy(pathname, root) + WesternUSALiveFuelMoisture(root) def test_not_downloaded(self, tmp_path: Path) -> None: with pytest.raises(RuntimeError, match="Dataset not found in"): From e69b349862cff17333386bcf13386eaf9125064f Mon Sep 17 00:00:00 2001 From: Nils Lehmann Date: Wed, 19 Apr 2023 10:35:10 +0200 Subject: [PATCH 11/13] split dataset name for docs --- docs/api/datasets.rst | 4 ++-- docs/api/non_geo_datasets.csv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index e0b7bcebb8f..6668316a64e 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -329,8 +329,8 @@ VHR-10 .. autoclass:: VHR10 -WesternUSALiveFuelMoisture -^^^^^^^^^^^^^^^^^^^^^^^^^^ +Western USA Live Fuel Moisture +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: WesternUSALiveFuelMoisture diff --git a/docs/api/non_geo_datasets.csv b/docs/api/non_geo_datasets.csv index 2dd3e2d866f..69fc866296b 100644 --- a/docs/api/non_geo_datasets.csv +++ b/docs/api/non_geo_datasets.csv @@ -33,6 +33,6 @@ Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands `USAVars`_,R,NAIP Aerial,100K,-,-,4,"RGB, NIR" `Vaihingen`_,S,Aerial,33,6,"1,281--3,816",0.09,RGB `VHR-10`_,I,"Google Earth, Vaihingen",800,10,"358--1,728",0.08--2,RGB -`WesternUSALiveFuelMoisture`_,R,"Landsat8, Sentinel-1",2615,-,-,- +`Western USA Live Fuel Moisture`_,R,"Landsat8, Sentinel-1",2615,-,-,- `xView2`_,CD,Maxar,"3,732",4,"1,024x1,024",0.8,RGB `ZueriCrop`_,"I, T",Sentinel-2,116K,48,24x24,10,MSI From 0b4361704ecbebe76641402a93dff1ba8214c836 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Wed, 19 Apr 2023 09:32:47 -0500 Subject: [PATCH 12/13] Add missing column --- docs/api/non_geo_datasets.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/non_geo_datasets.csv b/docs/api/non_geo_datasets.csv index 69fc866296b..4f7731e934f 100644 --- a/docs/api/non_geo_datasets.csv +++ b/docs/api/non_geo_datasets.csv @@ -33,6 +33,6 @@ Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands `USAVars`_,R,NAIP Aerial,100K,-,-,4,"RGB, NIR" `Vaihingen`_,S,Aerial,33,6,"1,281--3,816",0.09,RGB `VHR-10`_,I,"Google Earth, Vaihingen",800,10,"358--1,728",0.08--2,RGB -`Western USA Live Fuel Moisture`_,R,"Landsat8, Sentinel-1",2615,-,-,- +`Western USA Live Fuel Moisture`_,R,"Landsat8, Sentinel-1",2615,-,-,-,- `xView2`_,CD,Maxar,"3,732",4,"1,024x1,024",0.8,RGB `ZueriCrop`_,"I, T",Sentinel-2,116K,48,24x24,10,MSI From c34f0dba7485f52c281d4267258c154369159d45 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Wed, 19 Apr 2023 09:37:39 -0500 Subject: [PATCH 13/13] Fix error list --- torchgeo/datasets/western_usa_live_fuel_moisture.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/torchgeo/datasets/western_usa_live_fuel_moisture.py b/torchgeo/datasets/western_usa_live_fuel_moisture.py index 2c6e1c55c2a..63fd8fbeccc 100644 --- a/torchgeo/datasets/western_usa_live_fuel_moisture.py +++ b/torchgeo/datasets/western_usa_live_fuel_moisture.py @@ -216,10 +216,9 @@ def __init__( checksum: if True, check the MD5 of the downloaded files (may be slow) Raises: - AssertionError: if ``split`` argument is invalid - RuntimeError: if ``download=False`` but dataset is missing or checksum fails - RuntimeError: if ``input_features`` contains invalid variable names + AssertionError: if ``input_features`` contains invalid variable names ImportError: if pandas is not installed + RuntimeError: if ``download=False`` but dataset is missing or checksum fails """ super().__init__()