From 70ce46bbed9ae01c413934ffd0423d6cf05e1d2d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Sep 2022 21:16:52 +0200 Subject: [PATCH 1/4] Bump actions/upload-artifact from 2 to 3 (#879) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 2 to 3. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v2...v3) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_test-base.yml | 2 +- .github/workflows/ci_test-full.yml | 2 +- .github/workflows/docs-check.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci_test-base.yml b/.github/workflows/ci_test-base.yml index f91911c8ae..1b1dc1cf69 100644 --- a/.github/workflows/ci_test-base.yml +++ b/.github/workflows/ci_test-base.yml @@ -71,7 +71,7 @@ jobs: python -m pytest pl_bolts -v --cov=pl_bolts --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml --ignore=pl_bolts/datamodules --ignore=pl_bolts/datasets --ignore=pl_bolts/models/self_supervised/amdim/transforms.py --ignore=pl_bolts/models/rl - name: Upload pytest test results - uses: actions/upload-artifact@master + uses: actions/upload-artifact@v3 with: name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }} path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml index f3896de9ec..95acca5875 100644 --- a/.github/workflows/ci_test-full.yml +++ b/.github/workflows/ci_test-full.yml @@ -83,7 +83,7 @@ jobs: python -m pytest pl_bolts tests -v --cov=pl_bolts --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml - name: Upload pytest test results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }} path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml index c285a396cf..76de0a1a4b 100644 --- a/.github/workflows/docs-check.yml +++ b/.github/workflows/docs-check.yml @@ -82,7 +82,7 @@ jobs: make html --debug --jobs 2 SPHINXOPTS="-W" - name: Upload built docs - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: docs-results-${{ github.sha }} path: docs/build/html/ From a4855d02c1d6935b88660afc2c4b94ba54faba27 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Thu, 15 Sep 2022 18:48:18 +0900 Subject: [PATCH 2/4] CI: Reuse check schema (#868) * Reuse check-schema * Update azure path Co-authored-by: Jirka Borovec --- .github/workflows/ci-schema.yml | 13 +++++++++++++ .github/workflows/ci_schema.yml | 24 ------------------------ 2 files changed, 13 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/ci-schema.yml delete mode 100644 .github/workflows/ci_schema.yml diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml new file mode 100644 index 0000000000..b2613bcdf9 --- /dev/null +++ b/.github/workflows/ci-schema.yml @@ -0,0 +1,13 @@ +name: Check Schema + +on: + push: + branches: [master, "release/*"] + pull_request: + branches: [master, "release/*"] + +jobs: + validate-schema: + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.1.0 + with: + azure-dir: '' diff --git a/.github/workflows/ci_schema.yml b/.github/workflows/ci_schema.yml deleted file mode 100644 index 76c8ea9936..0000000000 --- a/.github/workflows/ci_schema.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: CI action schema -on: # Trigger the workflow on push or pull request, but only for the master branch - push: {} - pull_request: - branches: [master, "release/*"] - -jobs: - validate-schema: - runs-on: ubuntu-20.04 - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Install pkg - run: | - pip install check-jsonschema - - - name: GH Workflows - run: | - check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow" - - - name: Azure Pipelines - run: | - check-jsonschema azure-pipelines.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json" From 9f3dd69be38bd16957fee308cfd56e7ee99f6f93 Mon Sep 17 00:00:00 2001 From: Shion Matsumoto Date: Thu, 15 Sep 2022 06:28:03 -0400 Subject: [PATCH 3/4] FashionMNIST/EMNIST Datamodules (#871) --- pl_bolts/datamodules/emnist_datamodule.py | 57 ++++++++----------- .../datamodules/fashion_mnist_datamodule.py | 32 +++++------ tests/datamodules/test_datamodules.py | 6 +- 3 files changed, 40 insertions(+), 55 deletions(-) diff --git a/pl_bolts/datamodules/emnist_datamodule.py b/pl_bolts/datamodules/emnist_datamodule.py index fb9831f88c..1c76cd2050 100644 --- a/pl_bolts/datamodules/emnist_datamodule.py +++ b/pl_bolts/datamodules/emnist_datamodule.py @@ -3,7 +3,6 @@ from pl_bolts.datamodules.vision_datamodule import VisionDataModule from pl_bolts.transforms.dataset_normalizations import emnist_normalization from pl_bolts.utils import _TORCHVISION_AVAILABLE -from pl_bolts.utils.stability import under_review from pl_bolts.utils.warnings import warn_missing_pkg if _TORCHVISION_AVAILABLE: @@ -14,7 +13,6 @@ EMNIST = object -@under_review() class EMNISTDataModule(VisionDataModule): """ .. figure:: https://user-images.githubusercontent.com/4632336/123210742-4d6b3380-d477-11eb-80da-3e9a74a18a07.png @@ -76,6 +74,23 @@ class EMNISTDataModule(VisionDataModule): | + Args: + data_dir: Root directory of dataset. + split: The dataset has 6 different splits: ``byclass``, ``bymerge``, + ``balanced``, ``letters``, ``digits`` and ``mnist``. + This argument is passed to :class:`torchvision.datasets.EMNIST`. + val_split: Percent (float) or number (int) of samples to use for the validation split. + num_workers: How many workers to use for loading data + normalize: If ``True``, applies image normalize. + batch_size: How many samples per batch to load. + seed: Random seed to be used for train/val/test splits. + shuffle: If ``True``, shuffles the train data every epoch. + pin_memory: If ``True``, the data loader will copy Tensors into + CUDA pinned memory before returning them. + drop_last: If ``True``, drops the last incomplete batch. + strict_val_split: If ``True``, uses the validation split defined in the paper and ignores ``val_split``. + Note that it only works with ``"balanced"``, ``"digits"``, ``"letters"``, ``"mnist"`` splits. + Here is the default EMNIST, train, val, test-splits and transforms. Transforms:: @@ -87,8 +102,10 @@ class EMNISTDataModule(VisionDataModule): Example:: from pl_bolts.datamodules import EMNISTDataModule + dm = EMNISTDataModule('.') model = LitModel() + Trainer().fit(model, datamodule=dm) """ @@ -119,25 +136,6 @@ def __init__( *args: Any, **kwargs: Any, ) -> None: - """ - Args: - data_dir: Where to save/load the data. - split: The dataset has 6 different splits: ``byclass``, ``bymerge``, - ``balanced``, ``letters``, ``digits`` and ``mnist``. - This argument is passed to :class:`torchvision.datasets.EMNIST`. - val_split: Percent (float) or number (int) of samples - to use for the validation split. - num_workers: How many workers to use for loading data - normalize: If ``True``, applies image normalize. - batch_size: How many samples per batch to load. - seed: Random seed to be used for train/val/test splits. - shuffle: If ``True``, shuffles the train data every epoch. - pin_memory: If ``True``, the data loader will copy Tensors into - CUDA pinned memory before returning them. - drop_last: If ``True``, drops the last incomplete batch. - strict_val_split: If ``True``, uses the validation split defined in the paper and ignores ``val_split``. - Note that it only works with ``"balanced"``, ``"digits"``, ``"letters"``, ``"mnist"`` splits. - """ if not _TORCHVISION_AVAILABLE: # pragma: no cover raise ModuleNotFoundError( "You want to use MNIST dataset loaded from `torchvision` which is not installed yet." @@ -183,13 +181,11 @@ def num_classes(self) -> int: def prepare_data(self, *args: Any, **kwargs: Any) -> None: """Saves files to ``data_dir``.""" - self.dataset_cls(self.data_dir, split=self.split, train=True, download=True) self.dataset_cls(self.data_dir, split=self.split, train=False, download=True) def setup(self, stage: Optional[str] = None) -> None: """Creates train, val, and test dataset.""" - if stage == "fit" or stage is None: train_transforms = self.default_transforms() if self.train_transforms is None else self.train_transforms val_transforms = self.default_transforms() if self.val_transforms is None else self.val_transforms @@ -212,14 +208,9 @@ def setup(self, stage: Optional[str] = None) -> None: ) def default_transforms(self) -> Callable: + if self.normalize: + emnist_transforms = transform_lib.Compose([transform_lib.ToTensor(), emnist_normalization(self.split)]) + else: + emnist_transforms = transform_lib.Compose([transform_lib.ToTensor()]) - return ( - transform_lib.Compose( - [ - transform_lib.ToTensor(), - emnist_normalization(self.split), - ] - ) - if self.normalize - else transform_lib.Compose([transform_lib.ToTensor()]) - ) + return emnist_transforms diff --git a/pl_bolts/datamodules/fashion_mnist_datamodule.py b/pl_bolts/datamodules/fashion_mnist_datamodule.py index bbb4a5a875..102e567f8a 100644 --- a/pl_bolts/datamodules/fashion_mnist_datamodule.py +++ b/pl_bolts/datamodules/fashion_mnist_datamodule.py @@ -2,7 +2,6 @@ from pl_bolts.datamodules.vision_datamodule import VisionDataModule from pl_bolts.utils import _TORCHVISION_AVAILABLE -from pl_bolts.utils.stability import under_review from pl_bolts.utils.warnings import warn_missing_pkg if _TORCHVISION_AVAILABLE: @@ -13,7 +12,6 @@ FashionMNIST = None -@under_review() class FashionMNISTDataModule(VisionDataModule): """ .. figure:: https://3qeqpr26caki16dnhd19sv6by6v-wpengine.netdna-ssl.com/ @@ -21,6 +19,18 @@ class FashionMNISTDataModule(VisionDataModule): :width: 400 :alt: Fashion MNIST + Args: + data_dir: Root directory of dataset. + val_split: Percent (float) or number (int) of samples to use for the validation split. + num_workers: Number of workers to use for loading data. + normalize: If ``True``, applies image normalization. + batch_size: Number of samples per batch to load. + seed: Random seed to be used for train/val/test splits. + shuffle: If ``True``, shuffles the train data every epoch. + pin_memory: If ``True``, the data loader will copy Tensors into CUDA pinned memory before + returning them. + drop_last: If ``True``, drops the last incomplete batch. + Specs: - 10 classes (1 per type) - Each image is (1 x 28 x 28) @@ -61,19 +71,6 @@ def __init__( *args: Any, **kwargs: Any, ) -> None: - """ - Args: - data_dir: Where to save/load the data - val_split: Percent (float) or number (int) of samples to use for the validation split - num_workers: How many workers to use for loading data - normalize: If true applies image normalize - batch_size: How many samples per batch to load - seed: Random seed to be used for train/val/test splits - shuffle: If true shuffles the train data every epoch - pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before - returning them - drop_last: If true drops the last incomplete batch - """ if not _TORCHVISION_AVAILABLE: # pragma: no cover raise ModuleNotFoundError( "You want to use FashionMNIST dataset loaded from `torchvision` which is not installed yet." @@ -95,10 +92,7 @@ def __init__( @property def num_classes(self) -> int: - """ - Return: - 10 - """ + """Returns the number of classes.""" return 10 def default_transforms(self) -> Callable: diff --git a/tests/datamodules/test_datamodules.py b/tests/datamodules/test_datamodules.py index c2ac9842cf..c81db6406b 100644 --- a/tests/datamodules/test_datamodules.py +++ b/tests/datamodules/test_datamodules.py @@ -112,7 +112,7 @@ def test_sr_datamodule(datadir): @pytest.mark.parametrize("split", ["byclass", "bymerge", "balanced", "letters", "digits", "mnist"]) @pytest.mark.parametrize("dm_cls", [BinaryEMNISTDataModule, EMNISTDataModule]) -def test_emnist_datamodules(datadir, dm_cls, split): +def test_emnist_datamodules(datadir, catch_warnings, dm_cls, split): """Test BinaryEMNIST and EMNIST datamodules download data and have the correct shape.""" dm = _create_dm(dm_cls, datadir, split=split) train_loader = dm.train_dataloader() @@ -129,7 +129,7 @@ def test_emnist_datamodules(datadir, dm_cls, split): @pytest.mark.parametrize("dm_cls", [BinaryEMNISTDataModule, EMNISTDataModule]) -def test_emnist_datamodules_with_invalid_split(datadir, dm_cls): +def test_emnist_datamodules_with_invalid_split(datadir, catch_warnings, dm_cls): """Test EMNIST datamodules raise an exception if the provided `split` doesn't exist.""" with pytest.raises(ValueError, match="Unknown value"): @@ -148,7 +148,7 @@ def test_emnist_datamodules_with_invalid_split(datadir, dm_cls): ("mnist", 10_000), ], ) -def test_emnist_datamodules_with_strict_val_split(datadir, dm_cls, split, expected_val_split): +def test_emnist_datamodules_with_strict_val_split(datadir, catch_warnings, dm_cls, split, expected_val_split): """Test EMNIST datamodules when strict_val_split is specified to use the validation set defined in the paper. Refer to https://arxiv.org/abs/1702.05373 for `expected_val_split` values. From 2a1f1346dde4b067d32d05ad303a7133a25acc4b Mon Sep 17 00:00:00 2001 From: Arnol Fokam Date: Fri, 16 Sep 2022 10:51:25 +0200 Subject: [PATCH 4/4] Revision of SimCLR transforms (#857) Co-authored-by: otaj Co-authored-by: arnol --- .../self_supervised/simclr/transforms.py | 105 ++++++------------ tests/conftest.py | 3 + tests/models/self_supervised/unit/__init__.py | 0 .../self_supervised/unit/test_transforms.py | 55 +++++++++ 4 files changed, 90 insertions(+), 73 deletions(-) create mode 100644 tests/models/self_supervised/unit/__init__.py create mode 100644 tests/models/self_supervised/unit/test_transforms.py diff --git a/pl_bolts/models/self_supervised/simclr/transforms.py b/pl_bolts/models/self_supervised/simclr/transforms.py index aecc1388cc..37eccfd6c6 100644 --- a/pl_bolts/models/self_supervised/simclr/transforms.py +++ b/pl_bolts/models/self_supervised/simclr/transforms.py @@ -1,7 +1,4 @@ -import numpy as np - -from pl_bolts.utils import _OPENCV_AVAILABLE, _TORCHVISION_AVAILABLE -from pl_bolts.utils.stability import under_review +from pl_bolts.utils import _TORCHVISION_AVAILABLE from pl_bolts.utils.warnings import warn_missing_pkg if _TORCHVISION_AVAILABLE: @@ -9,15 +6,9 @@ else: # pragma: no cover warn_missing_pkg("torchvision") -if _OPENCV_AVAILABLE: - import cv2 -else: # pragma: no cover - warn_missing_pkg("cv2", pypi_name="opencv-python") - -@under_review() class SimCLRTrainDataTransform: - """Transforms for SimCLR. + """Transforms for SimCLR during training step of the pre-training stage. Transform:: @@ -25,7 +16,7 @@ class SimCLRTrainDataTransform: RandomHorizontalFlip() RandomApply([color_jitter], p=0.8) RandomGrayscale(p=0.2) - GaussianBlur(kernel_size=int(0.1 * self.input_height)) + RandomApply([GaussianBlur(kernel_size=int(0.1 * self.input_height))], p=0.5) transforms.ToTensor() Example:: @@ -34,7 +25,7 @@ class SimCLRTrainDataTransform: transform = SimCLRTrainDataTransform(input_height=32) x = sample() - (xi, xj) = transform(x) + (xi, xj, xk) = transform(x) # xk is only for the online evaluator if used """ def __init__( @@ -68,16 +59,16 @@ def __init__( if kernel_size % 2 == 0: kernel_size += 1 - data_transforms.append(GaussianBlur(kernel_size=kernel_size, p=0.5)) + data_transforms.append(transforms.RandomApply([transforms.GaussianBlur(kernel_size=kernel_size)], p=0.5)) - data_transforms = transforms.Compose(data_transforms) + self.data_transforms = transforms.Compose(data_transforms) if normalize is None: self.final_transform = transforms.ToTensor() else: self.final_transform = transforms.Compose([transforms.ToTensor(), normalize]) - self.train_transform = transforms.Compose([data_transforms, self.final_transform]) + self.train_transform = transforms.Compose([self.data_transforms, self.final_transform]) # add online train transform of the size of global view self.online_transform = transforms.Compose( @@ -93,9 +84,8 @@ def __call__(self, sample): return xi, xj, self.online_transform(sample) -@under_review() class SimCLREvalDataTransform(SimCLRTrainDataTransform): - """Transforms for SimCLR. + """Transforms for SimCLR during the validation step of the pre-training stage. Transform:: @@ -109,7 +99,7 @@ class SimCLREvalDataTransform(SimCLRTrainDataTransform): transform = SimCLREvalDataTransform(input_height=32) x = sample() - (xi, xj) = transform(x) + (xi, xj, xk) = transform(x) # xk is only for the online evaluator if used """ def __init__( @@ -129,70 +119,39 @@ def __init__( ) -@under_review() -class SimCLRFinetuneTransform: +class SimCLRFinetuneTransform(SimCLRTrainDataTransform): + """Transforms for SimCLR during the fine-tuning stage. + + Transform:: + + Resize(input_height + 10, interpolation=3) + transforms.CenterCrop(input_height), + transforms.ToTensor() + + Example:: + + from pl_bolts.models.self_supervised.simclr.transforms import SimCLREvalDataTransform + + transform = SimCLREvalDataTransform(input_height=32) + x = sample() + xk = transform(x) + """ + def __init__( self, input_height: int = 224, jitter_strength: float = 1.0, normalize=None, eval_transform: bool = False ) -> None: - self.jitter_strength = jitter_strength - self.input_height = input_height - self.normalize = normalize - - self.color_jitter = transforms.ColorJitter( - 0.8 * self.jitter_strength, - 0.8 * self.jitter_strength, - 0.8 * self.jitter_strength, - 0.2 * self.jitter_strength, + super().__init__( + normalize=normalize, input_height=input_height, gaussian_blur=None, jitter_strength=jitter_strength ) - if not eval_transform: - data_transforms = [ - transforms.RandomResizedCrop(size=self.input_height), - transforms.RandomHorizontalFlip(p=0.5), - transforms.RandomApply([self.color_jitter], p=0.8), - transforms.RandomGrayscale(p=0.2), - ] - else: - data_transforms = [ + if eval_transform: + self.data_transforms = [ transforms.Resize(int(self.input_height + 0.1 * self.input_height)), transforms.CenterCrop(self.input_height), ] - if normalize is None: - final_transform = transforms.ToTensor() - else: - final_transform = transforms.Compose([transforms.ToTensor(), normalize]) - - data_transforms.append(final_transform) - self.transform = transforms.Compose(data_transforms) + self.transform = transforms.Compose([self.data_transforms, self.final_transform]) def __call__(self, sample): return self.transform(sample) - - -@under_review() -class GaussianBlur: - # Implements Gaussian blur as described in the SimCLR paper - def __init__(self, kernel_size, p=0.5, min=0.1, max=2.0): - if not _TORCHVISION_AVAILABLE: # pragma: no cover - raise ModuleNotFoundError("You want to use `GaussianBlur` from `cv2` which is not installed yet.") - - self.min = min - self.max = max - - # kernel size is set to be 10% of the image height/width - self.kernel_size = kernel_size - self.p = p - - def __call__(self, sample): - sample = np.array(sample) - - # blur the image with a 50% chance - prob = np.random.random_sample() - - if prob < self.p: - sigma = (self.max - self.min) * np.random.random_sample() + self.min - sample = cv2.GaussianBlur(sample, (self.kernel_size, self.kernel_size), sigma) - - return sample diff --git a/tests/conftest.py b/tests/conftest.py index bf233e2185..6b63d39b70 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,7 @@ from pytorch_lightning.trainer.connectors.signal_connector import SignalConnector from pytorch_lightning.utilities.imports import _IS_WINDOWS +from pl_bolts.utils import _TORCHVISION_AVAILABLE, _TORCHVISION_LESS_THAN_0_13 from pl_bolts.utils.stability import UnderReviewWarning # GitHub Actions use this path to cache datasets. @@ -27,6 +28,8 @@ def catch_warnings(): with warnings.catch_warnings(): warnings.simplefilter("error") warnings.simplefilter("ignore", UnderReviewWarning) + if _TORCHVISION_AVAILABLE and _TORCHVISION_LESS_THAN_0_13: + warnings.filterwarnings("ignore", "FLIP_LEFT_RIGHT is deprecated", DeprecationWarning) yield diff --git a/tests/models/self_supervised/unit/__init__.py b/tests/models/self_supervised/unit/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/models/self_supervised/unit/test_transforms.py b/tests/models/self_supervised/unit/test_transforms.py new file mode 100644 index 0000000000..737af74bcb --- /dev/null +++ b/tests/models/self_supervised/unit/test_transforms.py @@ -0,0 +1,55 @@ +import numpy as np +import pytest +import torch +from PIL import Image + +from pl_bolts.models.self_supervised.simclr.transforms import ( + SimCLREvalDataTransform, + SimCLRFinetuneTransform, + SimCLRTrainDataTransform, +) + + +@pytest.mark.parametrize( + "transform_cls", + [pytest.param(SimCLRTrainDataTransform, id="train-data"), pytest.param(SimCLREvalDataTransform, id="eval-data")], +) +def test_simclr_train_data_transform(catch_warnings, transform_cls): + # dummy image + img = np.random.randint(low=0, high=255, size=(32, 32, 3), dtype=np.uint8) + img = Image.fromarray(img) + + # size of the generated views + input_height = 96 + transform = transform_cls(input_height=input_height) + views = transform(img) + + # the transform must output a list or a tuple of images + assert isinstance(views, (list, tuple)) + + # the transform must output three images + # (1st view, 2nd view, online evaluation view) + assert len(views) == 3 + + # all views are tensors + assert all(torch.is_tensor(v) for v in views) + + # all views have expected sizes + assert all(v.size(1) == v.size(2) == input_height for v in views) + + +def test_simclr_finetune_transform(catch_warnings): + # dummy image + img = np.random.randint(low=0, high=255, size=(32, 32, 3), dtype=np.uint8) + img = Image.fromarray(img) + + # size of the generated views + input_height = 96 + transform = SimCLRFinetuneTransform(input_height=input_height) + view = transform(img) + + # the view generator is a tensor + assert torch.is_tensor(view) + + # view has expected size + assert view.size(1) == view.size(2) == input_height