catalyst-team · Scitator · Jul 15, 2021 · Oct 7, 2020 · Oct 11, 2020 · Oct 11, 2020
@@ -36,6 +36,7 @@
     IEpochMetricHandlerCallback,
     EarlyStoppingCallback,
 )
+from catalyst.callbacks.mixup import MixupCallback
 from catalyst.callbacks.optimizer import IOptimizerCallback, OptimizerCallback
 
 if SETTINGS.onnx_required:

@@ -1,101 +1,168 @@
-# from typing import List
-#
-# import numpy as np
-# import torch
-#
-# from catalyst.callbacks.criterion import CriterionCallback
-# from catalyst.core.runner import IRunner
-#
-#
-# class MixupCallback(CriterionCallback):
-#     """Callback to do mixup augmentation.
-#
-#     More details about mixin can be found in the paper
-#     `mixup: Beyond Empirical Risk Minimization`_.
-#
-#     .. warning::
-#         `catalyst.contrib.callbacks.MixupCallback` is inherited from
-#         `catalyst.callbacks.CriterionCallback` and does its work.
-#         You may not use them together.
-#
-#     .. _mixup\: Beyond Empirical Risk Minimization:  # noqa: W605
-#         https://arxiv.org/abs/1710.09412
-#     """
-#
-#     def __init__(
-#         self,
-#         input_key: str = "targets",
-#         output_key: str = "logits",
-#         fields: List[str] = ("features"),
-#         alpha=1.0,
-#         on_train_only=True,
-#         **kwargs
-#     ):
-#         """
-#         Args:
-#             fields: list of features which must be affected.
-#             alpha: beta distribution a=b parameters.
-#                 Must be >=0. The more alpha closer to zero
-#                 the less effect of the mixup.
-#             on_train_only: Apply to train only.
-#                 As the mixup use the proxy inputs, the targets are also proxy.
-#                 We are not interested in them, are we?
-#                 So, if on_train_only is True, use a standard output/metric
-#                 for validation.
-#         """
-#         assert isinstance(input_key, str) and isinstance(output_key, str)
-#         assert len(fields) > 0, "At least one field for MixupCallback is required"
-#         assert alpha >= 0, "alpha must be>=0"
-#
-#         super().__init__(input_key=input_key, input_key=output_key, **kwargs)
-#
-#         self.on_train_only = on_train_only
-#         self.fields = fields
-#         self.alpha = alpha
-#         self.lam = 1
-#         self.index = None
-#         self.is_needed = True
-#
-#     def _compute_loss_value(self, runner: "IRunner", criterion):
-#         if not self.is_needed:
-#             return super()._compute_loss_value(runner, criterion)
-#
-#         pred = runner.output[self.input_key]
-#         y_a = runner.input[self.input_key]
-#         y_b = runner.input[self.input_key][self.index]
-#
-#         loss = self.lam * criterion(pred, y_a) + (1 - self.lam) * criterion(pred, y_b)
-#         return loss
-#
-#     def on_loader_start(self, runner: "IRunner"):
-#         """Loader start hook.
-#
-#         Args:
-#             runner: current runner
-#         """
-#         self.is_needed = not self.on_train_only or runner.is_train_loader
-#
-#     def on_batch_start(self, runner: "IRunner") -> None:
-#         """Batch start hook.
-#
-#         Args:
-#             runner: current runner
-#         """
-#         if not self.is_needed:
-#             return
-#
-#         if self.alpha > 0:
-#             self.lam = np.random.beta(self.alpha, self.alpha)
-#         else:
-#             self.lam = 1
-#
-#         self.index = torch.randperm(runner.input[self.fields[0]].shape[0])
-#         self.index.to(runner.device)
-#
-#         for f in self.fields:
-#             runner.input[f] = (
-#                 self.lam * runner.input[f] + (1 - self.lam) * runner.input[f][self.index]
-#             )
-#
-#
-# __all__ = ["MixupCallback"]
+from typing import List, Union
+
+from catalyst.core import Callback, CallbackOrder, IRunner
+from catalyst.utils import mixup_batch
+
+
+class MixupCallback(Callback):
+    """
+    Callback to do mixup augmentation. More details about mixin can be found in the paper
+    `mixup: Beyond Empirical Risk Minimization`: https://arxiv.org/abs/1710.09412 .
+
+    Examples:
+
+    .. code-block:: python
+
+        from typing import Any, Dict
+        import os
+
+        import numpy as np
+        import torch
+        from torch import nn
+        from torch.utils.data import DataLoader
+
+        from catalyst import dl
+        from catalyst.callbacks import MixupCallback
+        from catalyst.contrib.datasets import MNIST
+        from catalyst.data.transforms import ToTensor
+
+
+        class SimpleNet(nn.Module):
+            def __init__(self, in_channels, in_hw, out_features):
+                super().__init__()
+                self.encoder = nn.Sequential(nn.Conv2d(in_channels,
+                                                       in_channels, 3, 1, 1), nn.Tanh())
+                self.clf = nn.Linear(in_channels * in_hw * in_hw, out_features)
+
+            def forward(self, x):
+                features = self.encoder(x)
+                features = features.view(features.size(0), -1)
+                logits = self.clf(features)
+                return logits
+
+
+        class SimpleDataset(torch.utils.data.Dataset):
+            def __init__(self, train: bool = False):
+                self.mnist = MNIST(os.getcwd(), train=train, download=True, transform=ToTensor())
+
+            def __len__(self) -> int:
+                return len(self.mnist)
+
+            def __getitem__(self, idx: int) -> Dict[str, Any]:
+                x, y = self.mnist.__getitem__(idx)
+                y_one_hot = np.zeros(10)
+                y_one_hot[y] = 1
+                return {"image": x,
+                        "clf_targets": y,
+                        "clf_targets_one_hot": torch.Tensor(y_one_hot)}
+
+
+        model = SimpleNet(1, 28, 10)
+        criterion = torch.nn.BCEWithLogitsLoss()
+        optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
+
+        loaders = {
+            "train": DataLoader(SimpleDataset(train=True), batch_size=32),
+            "valid": DataLoader(SimpleDataset(train=False), batch_size=32),
+        }
+
+
+        class CustomRunner(dl.Runner):
+            def handle_batch(self, batch):
+                image = batch["image"]
+                clf_logits = self.model(image)
+                self.batch["clf_logits"] = clf_logits
+
+
+        runner = CustomRunner()
+        runner.train(
+            loaders=loaders,
+            model=model,
+            criterion=criterion,
+            optimizer=optimizer,
+            logdir="./logdir14",
+            num_epochs=2,
+            verbose=True,
+            valid_loader="valid",
+            valid_metric="loss",
+            minimize_valid_metric=True,
+            callbacks={
+                "mixup": MixupCallback(keys=["image", "clf_targets_one_hot"]),
+                "criterion": dl.CriterionCallback(
+                    metric_key="loss", input_key="clf_logits", target_key="clf_targets_one_hot",
+                ),
+                "optimizer": dl.OptimizerCallback(metric_key="loss"),
+                "classification": dl.ControlFlowCallback(
+                    dl.PrecisionRecallF1SupportCallback(
+                        input_key="clf_logits", target_key="clf_targets", num_classes=10,
+                    ),
+                    ignore_loaders="train",
+                ),
+            },
+        )
+
+    .. note::
+        With running this callback, many metrics (for example, accuracy) become undefined, so
+        use ControlFlowCallback in order to evaluate model(see example)
+    """
+
+    def __init__(
+        self, keys: Union[str, List[str]], alpha=0.2, mode="replace", on_train_only=True, **kwargs,
+    ):
+        """
+
+        Args:
+            keys: batch keys to which you want to apply augmentation
+            alpha: beta distribution a=b parameters. Must be >=0. The more alpha closer to zero the
+                less effect of the mixup.
+            mode: mode determines the method of use. Must be in ["replace", "add"]. If "replace"
+                then replaces the batch with a mixed one, while the batch size is not changed
+                If "add", concatenates mixed examples to the current ones, the batch size increases
+                by 2 times.
+            on_train_only: apply to train only. As the mixup use the proxy inputs, the targets are
+                also proxy. We are not interested in them, are we? So, if on_train_only is True,
+                use a standard output/metric for validation.
+            **kwargs:
+        """
+        assert isinstance(keys, (str, list)), f"keys must be str of list[str], get: {type(keys)}"
+        assert alpha >= 0, "alpha must be>=0"
+        assert mode in ["add", "replace"], f"mode must be in 'add', 'replace', get: {mode}"
+        super().__init__(order=CallbackOrder.Internal)
+        if isinstance(keys, str):
+            keys = [keys]
+        self.keys = keys
+        self.on_train_only = on_train_only
+        self.alpha = alpha
+        self.mode = mode
+        self.required = True
+
+    def _handle_batch(self, runner: "IRunner") -> None:
+        """
+        Applies mixup augmentation for a batch
+
+        Args:
+            runner: runner for the experiment.
+        """
+        runner.batch = mixup_batch(runner.batch, self.keys, alpha=self.alpha, mode=self.mode)
+
+    def on_loader_start(self, runner: "IRunner") -> None:
+        """
+        Loader start hook.
+
+        Args:
+            runner: current runner
+        """
+        self.required = not self.on_train_only or runner.is_train_loader
+
+    def on_batch_start(self, runner: "IRunner") -> None:
+        """
+        On batch start action.
+
+        Args:
+            runner: runner for the experiment.
+        """
+        if self.required:
+            self._handle_batch(runner)
+
+
+__all__ = ["MixupCallback"]
@@ -37,6 +37,7 @@
     get_by_keys,
     convert_labels2list,
 )
+from catalyst.utils.mixup import mixup_batch
 from catalyst.utils.numpy import get_one_hot
 
 from catalyst.utils.onnx import onnx_export

@@ -0,0 +1,47 @@
+from typing import Dict, List
+
+import numpy as np
+import torch
+
+
+def mixup_batch(
+    batch: Dict[str, torch.Tensor], keys: List[str], alpha: float = 0.2, mode: str = "replace"
+) -> Dict[str, torch.Tensor]:
+    """
+
+    Args:
+        batch: batch to which you want to apply augmentation
+        keys: batch keys to which you want to apply augmentation
+        alpha: beta distribution a=b parameters. Must be >=0. The more alpha closer to zero the
+            less effect of the mixup.
+        mode: mode determines the method of use. Must be in ["replace", "add"]. If "replace"
+            then replaces the batch with a mixed one, while the batch size is not changed
+            If "add", concatenates mixed examples to the current ones, the batch size increases
+            by 2 times.
+
+    Returns:
+        augmented batch
+
+    """
+    assert isinstance(keys, list), f"keys must be list[str], get: {type(keys)}"
+    assert alpha >= 0, "alpha must be>=0"
+    assert mode in ["add", "replace"], f"mode must be in 'add', 'replace', get: {mode}"
+
+    batch_size = batch[keys[0]].shape[0]
+    beta = np.random.beta(alpha, alpha, batch_size).astype(np.float32)
+    indexes = np.array(list(range(batch_size)))
+    # index shift by 1
+    indexes_2 = (indexes + 1) % batch_size
+    for key in keys:
+        targets = batch[key]
+        device = targets.device
+        targets_shape = [batch_size] + [1] * len(targets.shape[1:])
+        key_beta = torch.Tensor(beta.reshape(targets_shape)).to(device)
+        targets = targets * key_beta + targets[indexes_2] * (1 - key_beta)
+
+        if mode == "replace":
+            batch[key] = targets
+        else:
+            # mode == 'add'
+            batch[key] = torch.cat([batch[key], targets])
+    return batch
@@ -0,0 +1,31 @@
+# flake8: noqa
+from typing import Tuple
+
+import torch
+from torch.utils.data import DataLoader, TensorDataset
+
+from catalyst import dl, utils
+from catalyst.callbacks import MixupCallback
+
+
+class DymmyRunner(dl.Runner):
+    def handle_batch(self, batch: Tuple[torch.Tensor]):
+        self.batch = {"image": batch[0], "clf_targets_one_hot": batch[1]}
+
+
+def test_mixup_1():
+    utils.set_global_seed(42)
+    num_samples, num_features, num_classes = int(1e4), int(1e1), 4
+    X = torch.rand(num_samples, num_features)
+    y = (torch.rand(num_samples,) * num_classes).to(torch.int64)
+    y = torch.nn.functional.one_hot(y, num_classes).double()
+    dataset = TensorDataset(X, y)
+    loader = DataLoader(dataset, batch_size=32, num_workers=1)
+    loaders = {"train": loader, "valid": loader}
+    runner = DymmyRunner()
+    callback = MixupCallback(keys=["image", "clf_targets_one_hot"])
+    for loader_name in ["train", "valid"]:
+        for batch in loaders[loader_name]:
+            runner.handle_batch(batch)
+            callback.on_batch_start(runner)
+            assert runner.batch["clf_targets_one_hot"].max(1)[0].mean() < 1