Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding South America Soybean Dataset #1668

Merged
merged 82 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from 81 commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
e510a06
Created file for South America Soybean dataset and added it to __init…
cookie-kyu Oct 16, 2023
c67cae4
Updated south_america_soybean.py
cookie-kyu Oct 25, 2023
5153b0c
Merge branch 'microsoft:main' into main
cookie-kyu Nov 8, 2023
79e6970
Added tests
cookie-kyu Nov 8, 2023
5b0e053
Merge branch 'main' of https://github.com/cookie-kyu/torchgeo
cookie-kyu Nov 8, 2023
ac40ec1
Updated data.py
cookie-kyu Nov 14, 2023
7655805
Merge branch 'main' into main
cookie-kyu Nov 14, 2023
fd01011
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
46db62b
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
f53d822
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
1ed0c28
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
7a8e0cf
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
6b8b143
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
38bc7cb
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
95f5368
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
b6c9efc
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
5bf910b
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
2c2cc42
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
3eb2e33
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
355d8f1
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
430674a
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
5cc76e7
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
6cc7e7c
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
4f41e13
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
45b306e
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
558cf44
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
4c7a5aa
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
48f1b63
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
281fda2
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
12d1ea3
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
bed3b03
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
c66359b
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
3886f0b
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
824ce88
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
557b169
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
55c1720
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
6749974
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
d1e7a01
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
e788624
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
1580964
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
a5262ce
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
d8ca8d6
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
0a256a3
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
c0070ed
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
1fe3a16
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
8a6d0b0
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Nov 18, 2023
1385b4c
Merge branch 'main' into main
cookie-kyu Nov 18, 2023
74356ee
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
6b59cfd
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
61579dd
Update tests/datasets/test_south_america_soybean.py
cookie-kyu Nov 18, 2023
e67613f
Updated tests
cookie-kyu Dec 1, 2023
97ba0fc
fixed an error in init
cookie-kyu Jan 4, 2024
305ea44
fixed some path inconsistencies
cookie-kyu Jan 15, 2024
d3e82ed
fixed all errors
cookie-kyu Jan 22, 2024
4ddaf70
Fix comments
cookie-kyu Jan 23, 2024
ddecb3c
added dataset to datasets.rst
cookie-kyu Jan 26, 2024
7d8eefc
edit datasets.rst
cookie-kyu Jan 26, 2024
243d488
pushed again
cookie-kyu Jan 26, 2024
66d0e93
Delete tests/data/south_america_soybean/.DS_Store
cookie-kyu Jan 26, 2024
4278c23
Update docs/api/datasets.rst
cookie-kyu Jan 26, 2024
3b9d606
Edited datasets.rst
cookie-kyu Jan 26, 2024
83af882
Edited datasets.rst
cookie-kyu Jan 26, 2024
c900488
Fixed styling
cookie-kyu Jan 26, 2024
4a506ef
Merge branch 'main' into main
adamjstewart Jan 27, 2024
d147b7f
Fix docstring formatting
adamjstewart Jan 27, 2024
0326926
Fix whitespace
adamjstewart Jan 27, 2024
f739867
Add blank line
adamjstewart Jan 27, 2024
fb5ea8d
Merge branch 'main' into main
cookie-kyu Jan 27, 2024
abe019a
Fixed download urls
cookie-kyu Jan 28, 2024
ffa24d1
Merge branch 'main' of https://github.com/cookie-kyu/torchgeo
cookie-kyu Jan 28, 2024
f130c0c
Update geo_datasets.csv
cookie-kyu Jan 28, 2024
50c8460
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Jan 29, 2024
d265ea7
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Jan 29, 2024
96af9e5
Update torchgeo/datasets/south_america_soybean.py
cookie-kyu Jan 29, 2024
773bd41
Updated geo_datasets.csv and added years parameter to class
cookie-kyu Jan 31, 2024
7bc9ea9
Merge branch 'main' of https://github.com/cookie-kyu/torchgeo
cookie-kyu Jan 31, 2024
d84a52a
Delete tests/data/.DS_Store
cookie-kyu Jan 31, 2024
5e76506
Delete tests/.DS_Store
cookie-kyu Jan 31, 2024
f78d63d
Merge branch 'main' into main
cookie-kyu Jan 31, 2024
7dca57b
Update south_america_soybean.py
cookie-kyu Feb 6, 2024
c48bbad
Merge branch 'main' into main
cookie-kyu Feb 6, 2024
477ddd6
Fix docstring formatting
adamjstewart Feb 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ Sentinel
.. autoclass:: Sentinel1
.. autoclass:: Sentinel2


South America Soybean
^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: SouthAmericaSoybean


.. _Non-geospatial Datasets:

Non-geospatial Datasets
Expand Down
1 change: 1 addition & 0 deletions docs/api/geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ Dataset,Type,Source,License,Size (px),Resolution (m)
`Open Buildings`_,Geometries,"Maxar, CNES/Airbus","CC-BY-4.0 OR ODbL-1.0",-,-
`PRISMA`_,Imagery,PRISMA,-,512x512,5--30
`Sentinel`_,Imagery,Sentinel,"CC-BY-SA-3.0-IGO","10,000x10,000",10
`South America Soybean`_,Masks,"Landsat, MODIS",-,-,30
Binary file not shown.
Binary file not shown.
Binary file not shown.
66 changes: 66 additions & 0 deletions tests/data/south_america_soybean/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import hashlib
import os
import shutil

import numpy as np
import rasterio
from rasterio.crs import CRS
from rasterio.transform import Affine

SIZE = 32


np.random.seed(0)
files = ["South_America_Soybean_2002.tif", "South_America_Soybean_2021.tif"]


def create_file(path: str, dtype: str):
"""Create the testing file."""
profile = {
"driver": "GTiff",
"dtype": dtype,
"count": 1,
"crs": CRS.from_epsg(4326),
"transform": Affine(
0.0002499999999999943131,
0.0,
-82.0005000000000024,
0.0,
-0.0002499999999999943131,
0.0005000000000000,
),
"height": SIZE,
"width": SIZE,
"compress": "lzw",
"predictor": 2,
}

allowed_values = [0, 1]

Z = np.random.choice(allowed_values, size=(SIZE, SIZE))

with rasterio.open(path, "w", **profile) as src:
src.write(Z, 1)


if __name__ == "__main__":
dir = os.path.join(os.getcwd(), "SouthAmericaSoybean")
if os.path.exists(dir) and os.path.isdir(dir):
shutil.rmtree(dir)

os.makedirs(dir, exist_ok=True)

for file in files:
create_file(os.path.join(dir, file), dtype="int8")

# Compress data
shutil.make_archive("SouthAmericaSoybean", "zip", ".", dir)

# Compute checksums
with open("SouthAmericaSoybean.zip", "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
print(f"SouthAmericaSoybean.zip: {md5}")
104 changes: 104 additions & 0 deletions tests/datasets/test_south_america_soybean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import os
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import pytest
import torch
import torch.nn as nn
from pytest import MonkeyPatch
from rasterio.crs import CRS

import torchgeo.datasets.utils
from torchgeo.datasets import (
BoundingBox,
DatasetNotFoundError,
IntersectionDataset,
SouthAmericaSoybean,
UnionDataset,
)


def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
shutil.copy(url, root)


class TestSouthAmericaSoybean:
@pytest.fixture
def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> SouthAmericaSoybean:
monkeypatch.setattr(
torchgeo.datasets.south_america_soybean, "download_url", download_url
)
transforms = nn.Identity()
url = os.path.join(
"tests",
"data",
"south_america_soybean",
"SouthAmericaSoybean",
"South_America_Soybean_{}.tif",
)

monkeypatch.setattr(SouthAmericaSoybean, "url", url)
root = str(tmp_path)
return SouthAmericaSoybean(
paths=root,
years=[2002, 2021],
transforms=transforms,
download=True,
checksum=True,
)

def test_getitem(self, dataset: SouthAmericaSoybean) -> None:
x = dataset[dataset.bounds]
assert isinstance(x, dict)
assert isinstance(x["crs"], CRS)
assert isinstance(x["mask"], torch.Tensor)

def test_and(self, dataset: SouthAmericaSoybean) -> None:
ds = dataset & dataset
assert isinstance(ds, IntersectionDataset)

def test_or(self, dataset: SouthAmericaSoybean) -> None:
ds = dataset | dataset
assert isinstance(ds, UnionDataset)

def test_already_extracted(self, dataset: SouthAmericaSoybean) -> None:
SouthAmericaSoybean(dataset.paths, download=True)

def test_already_downloaded(self, tmp_path: Path) -> None:
pathname = os.path.join(
"tests",
"data",
"south_america_soybean",
"SouthAmericaSoybean",
"South_America_Soybean_2002.tif",
)
root = str(tmp_path)
shutil.copy(pathname, root)
SouthAmericaSoybean(root)

def test_plot(self, dataset: SouthAmericaSoybean) -> None:
query = dataset.bounds
x = dataset[query]
dataset.plot(x, suptitle="Test")
plt.close()

def test_plot_prediction(self, dataset: SouthAmericaSoybean) -> None:
query = dataset.bounds
x = dataset[query]
x["prediction"] = x["mask"].clone()
dataset.plot(x, suptitle="Prediction")
plt.close()

def test_not_downloaded(self, tmp_path: Path) -> None:
with pytest.raises(DatasetNotFoundError, match="Dataset not found"):
SouthAmericaSoybean(str(tmp_path))

def test_invalid_query(self, dataset: SouthAmericaSoybean) -> None:
query = BoundingBox(0, 0, 0, 0, 0, 0)
with pytest.raises(
IndexError, match="query: .* not found in index with bounds:"
):
dataset[query]
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
from .sentinel import Sentinel, Sentinel1, Sentinel2
from .skippd import SKIPPD
from .so2sat import So2Sat
from .south_america_soybean import SouthAmericaSoybean
from .spacenet import (
SpaceNet,
SpaceNet1,
Expand Down Expand Up @@ -185,6 +186,7 @@
"Sentinel",
"Sentinel1",
"Sentinel2",
"SouthAmericaSoybean",
# NonGeoDataset
"ADVANCE",
"BeninSmallHolderCashews",
Expand Down
174 changes: 174 additions & 0 deletions torchgeo/datasets/south_america_soybean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""South America Soybean Dataset."""

from collections.abc import Iterable
from typing import Any, Callable, Optional, Union

import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from rasterio.crs import CRS

from .geo import RasterDataset
from .utils import DatasetNotFoundError, download_url


class SouthAmericaSoybean(RasterDataset):
"""South America Soybean Dataset.

This dataset produced annual 30-m soybean maps of South America from 2001 to 2021.

cookie-kyu marked this conversation as resolved.
Show resolved Hide resolved
Link: https://www.nature.com/articles/s41893-021-00729-z

Dataset contains 2 classes:
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
0: nodata
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
1: soybean
cookie-kyu marked this conversation as resolved.
Show resolved Hide resolved
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved

Dataset Format:

* 21 .tif files


If you use this dataset in your research, please cite the following paper:

* https://doi.org/10.1038/s41893-021-00729-z

.. versionadded:: 0.6
"""

filename_glob = "South_America_Soybean_*.*"
filename_regex = r"South_America_Soybean_(?P<year>\d{4})"

date_format = "%Y"
is_image = False
url = "https://glad.umd.edu/projects/AnnualClassMapsV1/SouthAmerica_Soybean_{}.tif"

md5s = {
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
2021: "edff3ada13a1a9910d1fe844d28ae4f",
2020: "0709dec807f576c9707c8c7e183db31",
2019: "441836493bbcd5e123cff579a58f5a4f",
2018: "503c2d0a803c2a2629ebbbd9558a3013",
2017: "4d0487ac1105d171e5f506f1766ea777",
2016: "770c558f6ac40550d0e264da5e44b3e",
2015: "6beb96a61fe0e9ce8c06263e500dde8f",
2014: "824ff91c62a4ba9f4ccfd281729830e5",
2013: "0263e19b3cae6fdaba4e3b450cef985e",
2012: "9f3a71097c9836fcff18a13b9ba608b2",
2011: "b73352ebea3d5658959e9044ec526143",
2010: "9264532d36ffa93493735a6e44caef0d",
2009: "341387c1bb42a15140c80702e4cca02d",
2008: "96fc3f737ab3ce9bcd16cbf7761427e2",
2007: "bb8549b6674163fe20ffd47ec4ce8903",
2006: "eabaa525414ecbff89301d3d5c706f0b",
2005: "89faae27f9b5afbd06935a465e5fe414",
2004: "f9882ca9c70e054e50172835cb75a8c3",
2003: "cad5ed461ff4ab45c90177841aaecad2",
2002: "8a4a9dcea54b3ec7de07657b9f2c0893",
2001: "2914b0af7590a0ca4dfa9ccefc99020f",
}

def __init__(
self,
paths: Union[str, Iterable[str]] = "data",
crs: Optional[CRS] = None,
res: Optional[float] = None,
cookie-kyu marked this conversation as resolved.
Show resolved Hide resolved
years: list[int] = [2021],
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
cache: bool = True,
download: bool = False,
checksum: bool = False,
) -> None:
"""Initialize a new Dataset instance.

Args:
paths: one or more root directories to search or files to load
crs: :term:`coordinate reference system (CRS)` to warp to
(defaults to the CRS of the first file found)
res: resolution of the dataset in units of CRS
(defaults to the resolution of the first file found)
years: list of years for which to use the South America Soybean layer
transforms: a function/transform that takes an input sample
and returns a transformed version
cache: if True, cache file handle to speed up repeated sampling
download: if True, download dataset and store it in the root directory
checksum: if True, check the MD5 after downloading files (may be slow)

Raises:
DatasetNotFoundError: If dataset is not found and *download* is False.
"""
self.paths = paths
self.download = download
self.checksum = checksum
self.years = years
self._verify()

super().__init__(paths, crs, res, transforms=transforms, cache=cache)

def _verify(self) -> None:
"""Verify the integrity of the dataset."""
# Check if the extracted files already exist
if self.files:
return
assert isinstance(self.paths, str)

# Check if the user requested to download the dataset
if not self.download:
raise DatasetNotFoundError(self)

# Download the dataset
self._download()

def _download(self) -> None:
"""Download the dataset."""
for year in self.years:
download_url(
self.url.format(year),
self.paths,
md5=self.md5s[year] if self.checksum else None,
)

def plot(
self,
sample: dict[str, Any],
show_titles: bool = True,
suptitle: Optional[str] = None,
) -> Figure:
"""Plot a sample from the dataset.
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved

Args:
sample: a sample returned by :meth:`RasterDataset.__getitem__`
show_titles: flag indicating whether to show titles above each panel
suptitle: optional string to use as a suptitle
cookie-kyu marked this conversation as resolved.
Show resolved Hide resolved

Returns:
a matplotlib Figure with the rendered sample
"""
mask = sample["mask"].squeeze()
ncols = 1

showing_predictions = "prediction" in sample
if showing_predictions:
pred = sample["prediction"].squeeze()
ncols = 2

fig, axs = plt.subplots(
nrows=1, ncols=ncols, figsize=(ncols * 4, 4), squeeze=False
)

axs[0, 0].imshow(mask, interpolation="none")
axs[0, 0].axis("off")

if show_titles:
axs[0, 0].set_title("Mask")

if showing_predictions:
axs[0, 1].imshow(pred, interpolation="none")
axs[0, 1].axis("off")
if show_titles:
axs[0, 1].set_title("Prediction")

if suptitle is not None:
plt.suptitle(suptitle)

return fig
Loading