HazyResearch · ajratner · Oct 12, 2018 · Aug 28, 2018 · Aug 28, 2018 · Aug 28, 2018
diff --git a/.isort.cfg b/.isort.cfg
@@ -4,4 +4,4 @@ include_trailing_comma=True
 force_grid_wrap=0
 combine_as_imports=True
 line_length=80
-known_third_party=matplotlib,networkx,nltk,numpy,pandas,scipy,setuptools,sklearn,torch,torchtext
+known_third_party=matplotlib,networkx,nltk,numpy,pandas,scipy,setuptools,sklearn,torch,torchtext,tqdm
diff --git a/environment.yml b/environment.yml
@@ -13,4 +13,5 @@ dependencies:
   - pandas
   - pytorch=0.4.1
   - runipy
-  - scipy
+  - scipy
+  - tqdm
diff --git a/metal/classifier.py b/metal/classifier.py
@@ -5,6 +5,8 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
+from torch.utils.data.dataloader import DataLoader
+from tqdm import tqdm
 
 from metal.analysis import confusion_matrix
 from metal.metrics import metric_score
@@ -110,21 +112,21 @@ def train(self, *args, **kwargs):
         """
         raise NotImplementedError
 
-    def _train(self, train_loader, loss_fn, X_dev=None, Y_dev=None):
+    def _train(self, train_loader, loss_fn, dev_loader=None):
         """The internal training routine called by train() after initial setup
 
         Args:
             train_loader: a torch DataLoader of X (data) and Y (labels) for
                 the train split
             loss_fn: the loss function to minimize (maps *data -> loss)
-            X_dev: the dev set model input
-            Y_dev: the dev set target labels
+            dev_loader: a torch DataLoader of X (data) and Y (labels) for
+                the dev split
 
-        If either of X_dev or Y_dev is not provided, then no checkpointing or
+        If dev_loader is not provided, then no checkpointing or
         evaluation on the dev set will occur.
         """
         train_config = self.config["train_config"]
-        evaluate_dev = X_dev is not None and Y_dev is not None
+        evaluate_dev = dev_loader is not None
 
         # Set the optimizer
         optimizer_config = train_config["optimizer_config"]
@@ -142,10 +144,27 @@ def _train(self, train_loader, loss_fn, X_dev=None, Y_dev=None):
                 model_class, **checkpoint_config, verbose=self.config["verbose"]
             )
 
+        # Moving model to GPU
+        if train_config["use_cuda"]:
+
+            if self.config["verbose"]:
+                print("Using GPU...")
+
+            self.cuda()
+
         # Train the model
         for epoch in range(train_config["n_epochs"]):
             epoch_loss = 0.0
-            for data in train_loader:
+            for batch, data in tqdm(
+                enumerate(train_loader),
+                total=len(train_loader),
+                disable=train_config["disable_prog_bar"],
+            ):
+
+                # moving data to GPU
+                if train_config["use_cuda"]:
+                    data = [d.cuda() for d in data]
+
                 # Zero the parameter gradients
                 optimizer.zero_grad()
 
@@ -195,8 +214,9 @@ def _train(self, train_loader, loss_fn, X_dev=None, Y_dev=None):
             if evaluate_dev and (epoch % train_config["validation_freq"] == 0):
                 val_metric = train_config["validation_metric"]
                 dev_score = self.score(
-                    X_dev, Y_dev, metric=val_metric, verbose=False
+                    dev_loader, metric=val_metric, verbose=False
                 )
+
                 if train_config["checkpoint"]:
                     checkpointer.checkpoint(self, epoch, dev_score)
 
@@ -229,7 +249,8 @@ def _train(self, train_loader, loss_fn, X_dev=None, Y_dev=None):
             print("Finished Training")
 
             if evaluate_dev:
-                Y_p_dev = self.predict(X_dev)
+                # Currently use default random break ties in evaluate
+                Y_p_dev, Y_dev = self.evaluate(dev_loader)
 
                 if not self.multitask:
                     print("Confusion Matrix (Dev)")
@@ -271,10 +292,84 @@ def _set_scheduler(self, scheduler_config, optimizer):
             )
         return lr_scheduler
 
+    def _batch_evaluate(self, loader, break_ties="random", **kwargs):
+        """Evaluates the model using minibatches
+
+        Args:
+            loader: Pytorch DataLoader supplying (X,Y):
+                X: The input for the predict method
+                Y: An [n] or [n, 1] torch.Tensor or np.ndarray of target labels
+                    in {1,...,k}; can be None for cases with no ground truth
+
+        Returns:
+            Y_p: an np.ndarray of predictions
+            Y: an np.ndarray of ground truth labels
+        """
+        Y = []
+        Y_p = []
+        for batch, data in enumerate(loader):
+            X_batch, Y_batch = data
+
+            if self.config["train_config"]["use_cuda"]:
+                X_batch = X_batch.cuda()
+
+            Y_batch = self._to_numpy(Y_batch)
+
+            if Y_batch.ndim > 1:
+                Y_batch = self._break_ties(Y_batch, break_ties)
+
+            Y.append(Y_batch)
+            Y_p.append(
+                self._to_numpy(
+                    self.predict(X_batch, break_ties=break_ties, **kwargs)
+                )
+            )
+
+        Y = np.hstack(Y)
+        Y_p = np.hstack(Y_p)
+
+        return Y_p, Y
+
+    def evaluate(self, data, break_ties="random", **kwargs):
+        """Evaluates the model
+
+        Args:
+            data: either a Pytorch DataLoader or tuple supplying (X,Y):
+                X: The input for the predict method
+                Y: An [n] or [n, 1] torch.Tensor or np.ndarray of target labels
+                    in {1,...,k}
+
+        Returns:
+            Y_p: an np.ndarray of predictions
+            Y: an np.ndarray of ground truth labels
+        """
+
+        if type(data) is tuple:
+            X, Y = data
+
+            if self.config["train_config"]["use_cuda"]:
+                X = X.cuda()
+
+            Y = self._to_numpy(Y)
+
+            if Y.ndim > 1:
+                Y = self._break_ties(Y, break_ties)
+
+            Y_p = self.predict(X, break_ties=break_ties, **kwargs)
+
+        elif type(data) is DataLoader:
+            Y_p, Y = self._batch_evaluate(data, break_ties=break_ties)
+
+        else:
+            raise ValueError(
+                "Unrecognized input data structure, use tuple or DataLoader!"
+            )
+
+        return Y_p, Y
+
     def score(
         self,
-        X,
-        Y,
+        data,
         metric=["accuracy"],
         break_ties="random",
         verbose=True,
@@ -283,9 +378,10 @@ def score(
         """Scores the predictive performance of the Classifier on all tasks
 
         Args:
-            X: The input for the predict method
-            Y: An [n] or [n, 1] torch.Tensor or np.ndarray of target labels in
-                {1,...,k}
+            data: either a Pytorch DataLoader or tuple supplying (X,Y):
+                X: The input for the predict method
+                Y: An [n] or [n, 1] torch.Tensor or np.ndarray of target labels
+                    in {1,...,k}
             metric: A metric (string) with which to score performance or a
                 list of such metrics
             break_ties: How to break ties when making predictions
@@ -295,9 +391,8 @@ def score(
         Returns:
             scores: A (float) score
         """
-        Y = self._to_numpy(Y)
-        Y_p = self.predict(X, break_ties=break_ties, **kwargs)
 
+        Y_p, Y = self.evaluate(data, break_ties=break_ties)
         metric_list = metric if isinstance(metric, list) else [metric]
         scores = []
         for metric in metric_list:
@@ -373,7 +468,7 @@ def _to_numpy(Z):
         elif isinstance(Z, list):
             return np.array(Z)
         elif isinstance(Z, torch.Tensor):
-            return Z.numpy()
+            return Z.cpu().numpy()
         else:
             msg = (
                 f"Expected None, list, numpy.ndarray or torch.Tensor, "

diff --git a/metal/contrib/featurizers/requirements.txt b/metal/contrib/featurizers/requirements.txt
@@ -1,3 +1,3 @@
 torchtext==0.2.3
-ntlk
-scikit-learn
+nltk
+scikit-learn
diff --git a/metal/end_model/em_defaults.py b/metal/end_model/em_defaults.py
@@ -18,6 +18,7 @@
     "train_config": {
         # Display
         "print_every": 1,  # Print after this many epochs
+        "disable_prog_bar": False,  # Disable progress bar each epoch
         # GPU
         "use_cuda": False,
         # Dataloader

diff --git a/metal/end_model/end_model.py b/metal/end_model/end_model.py
@@ -13,7 +13,6 @@
 class EndModel(Classifier):
     """A dynamically constructed discriminative classifier
 
-    Args:
         layer_out_dims: a list of integers corresponding to the output sizes
             of the layers of your network. The first element is the
             dimensionality of the input layer, the last element is the
@@ -72,7 +71,9 @@ def _build(self, input_module, middle_modules, head_module):
             self.network = nn.Sequential(input_layer, *middle_layers, head)
 
         # Construct loss module
-        self.criteria = SoftCrossEntropyLoss(reduction="sum")
+        self.criteria = SoftCrossEntropyLoss(
+            reduction="sum", use_cuda=self.config["train_config"]["use_cuda"]
+        )
 
     def _build_input_layer(self, input_module):
         if input_module is None:
@@ -164,19 +165,39 @@ def _make_data_loader(self, X, Y, data_loader_config):
         return data_loader
 
     def _get_loss_fn(self):
-        loss_fn = lambda X, Y: self.criteria(self.forward(X), Y)
+        if hasattr(self.config, "use_cuda"):
+            if self.config["use_cuda"]:
+                criteria = self.criteria.cuda()
+        else:
+            criteria = self.criteria
+        loss_fn = lambda X, Y: criteria(self.forward(X), Y)
+
         return loss_fn
 
-    def train(self, X_train, Y_train, X_dev=None, Y_dev=None, **kwargs):
-        self.config = recursive_merge_dicts(self.config, kwargs)
-        train_config = self.config["train_config"]
+    def _convert_input_data(self, data):
+        if type(data) is tuple:
+            X, Y = data
+            Y = self._to_torch(Y, dtype=torch.FloatTensor)
+            loader_config = self.config["train_config"]["data_loader_config"]
+            loader = self._make_data_loader(X, Y, loader_config)
+        elif type(data) is DataLoader:
+            loader = data
+        else:
+            raise ValueError(
+                "Unrecognized input data structure, use tuple or DataLoader."
+            )
+        return loader
+
+    def train(self, train_data, dev_data=None, **kwargs):
 
-        Y_train = self._to_torch(Y_train, dtype=torch.FloatTensor)
-        Y_dev = self._to_torch(Y_dev)
+        self.config = recursive_merge_dicts(self.config, kwargs)
 
-        # Make data loaders
-        loader_config = train_config["data_loader_config"]
-        train_loader = self._make_data_loader(X_train, Y_train, loader_config)
+        # Convert input data to data loaders
+        train_loader = self._convert_input_data(train_data)
+        if dev_data is not None:
+            dev_loader = self._convert_input_data(dev_data)
+        else:
+            dev_loader = None
 
         # Initialize the model
         self.reset()
@@ -185,7 +206,7 @@ def train(self, X_train, Y_train, X_dev=None, Y_dev=None, **kwargs):
         loss_fn = self._get_loss_fn()
 
         # Execute training procedure
-        self._train(train_loader, loss_fn, X_dev=X_dev, Y_dev=Y_dev)
+        self._train(train_loader, loss_fn, dev_loader=dev_loader)
 
     def predict_proba(self, X):
         """Returns a [n, k] tensor of soft (float) predictions."""

diff --git a/metal/end_model/loss.py b/metal/end_model/loss.py
@@ -18,17 +18,24 @@ class SoftCrossEntropyLoss(nn.Module):
         target: An [n, k] float tensor of target probabilities
     """
 
-    def __init__(self, weight=None, reduction="elementwise_mean"):
+    def __init__(
+        self, weight=None, reduction="elementwise_mean", use_cuda=False
+    ):
         super().__init__()
         assert weight is None or isinstance(weight, torch.FloatTensor)
         self.weight = weight
         self.reduction = reduction
+        self.use_cuda = use_cuda
 
     def forward(self, input, target):
         n, k = input.shape
         cum_losses = torch.zeros(n)
+        if self.use_cuda:
+            cum_losses = cum_losses.cuda()
         for y in range(k):
             cls_idx = torch.full((n,), y, dtype=torch.long)
+            if self.use_cuda:
+                cls_idx = cls_idx.cuda()
             y_loss = F.cross_entropy(input, cls_idx, reduction="none")
             if self.weight is not None:
                 y_loss = y_loss * self.weight[y]

diff --git a/metal/label_model/lm_defaults.py b/metal/label_model/lm_defaults.py
@@ -26,5 +26,8 @@
         # Train loop
         "n_epochs": 100,
         "print_every": 10,
+        "disable_prog_bar": True,  # Disable progress bar each epoch
+        # GPU
+        "use_cuda": False,
     },
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,4 +13,5 @@ dependencies: @@
       - pandas
       - pytorch=0.4.1
       - runipy
-      - scipy
+      - scipy
+      - tqdm