Merge branch 'develop' into layer_improvement

basf · Jul 26, 2024 · 19b760c · 19b760c
2 parents 283a10b + cc92798
commit 19b760c
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 78 deletions.
diff --git a/.github/workflows/build-publish-pypi.yml b/.github/workflows/build-publish-pypi.yml
@@ -3,7 +3,7 @@ name: Publish Package to PyPi
 on:
   push:
     branches:
-      - master
+      - release
 
 jobs:
   publish:

diff --git a/mambular/__version__.py b/mambular/__version__.py
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.1.6"
+__version__ = "0.1.7"
diff --git a/mambular/base_models/lightning_wrapper.py b/mambular/base_models/lightning_wrapper.py
@@ -37,7 +37,7 @@ def __init__(
         lss=False,
         family=None,
         loss_fct: callable = None,
-        **kwargs
+        **kwargs,
     ):
         super().__init__()
         self.num_classes = num_classes
@@ -126,7 +126,7 @@ def compute_loss(self, predictions, y_true):
             Computed loss.
         """
         if self.lss:
-            return self.family.compute_loss(predictions, y_true)
+            return self.family.compute_loss(predictions, y_true.squeeze(-1))
         else:
             loss = self.loss_fct(predictions, y_true)
             return loss
@@ -300,7 +300,7 @@ def configure_optimizers(self):
             A dictionary containing the optimizer and lr_scheduler configurations.
         """
         optimizer = torch.optim.Adam(
-            self.parameters(),
+            self.model.parameters(),
             lr=self.lr,
             weight_decay=self.weight_decay,
         )

diff --git a/mambular/models/sklearn_base_classifier.py b/mambular/models/sklearn_base_classifier.py
@@ -9,6 +9,8 @@
 from ..data_utils.datamodule import MambularDataModule
 from ..preprocessing import Preprocessor
 import numpy as np
+from lightning.pytorch.callbacks import ModelSummary
+from sklearn.metrics import log_loss
 
 
 class SklearnBaseClassifier(BaseEstimator):
@@ -49,23 +51,22 @@ def __init__(self, model, config, **kwargs):
 
     def get_params(self, deep=True):
         """
-        Get parameters for this estimator. Overrides the BaseEstimator method.
+        Get parameters for this estimator.
 
         Parameters
         ----------
         deep : bool, default=True
-            If True, returns the parameters for this estimator and contained sub-objects that are estimators.
+            If True, will return the parameters for this estimator and contained subobjects that are estimators.
 
         Returns
         -------
         params : dict
             Parameter names mapped to their values.
         """
-        params = self.config_kwargs  # Parameters used to initialize DefaultConfig
+        params = {}
+        params.update(self.config_kwargs)
 
-        # If deep=True, include parameters from nested components like preprocessor
         if deep:
-            # Assuming Preprocessor has a get_params method
             preprocessor_params = {
                 "preprocessor__" + key: value
                 for key, value in self.preprocessor.get_params().items()
@@ -76,35 +77,36 @@ def get_params(self, deep=True):
 
     def set_params(self, **parameters):
         """
-        Set the parameters of this estimator. Overrides the BaseEstimator method.
+        Set the parameters of this estimator.
 
         Parameters
         ----------
         **parameters : dict
-            Estimator parameters to be set.
+            Estimator parameters.
 
         Returns
         -------
         self : object
-            The instance with updated parameters.
+            Estimator instance.
         """
-        # Update config_kwargs with provided parameters
-        valid_config_keys = self.config_kwargs.keys()
-        config_updates = {k: v for k, v in parameters.items() if k in valid_config_keys}
-        self.config_kwargs.update(config_updates)
-
-        # Update the config object
-        for key, value in config_updates.items():
-            setattr(self.config, key, value)
-
-        # Handle preprocessor parameters (prefixed with 'preprocessor__')
+        config_params = {
+            k: v for k, v in parameters.items() if not k.startswith("preprocessor__")
+        }
         preprocessor_params = {
             k.split("__")[1]: v
             for k, v in parameters.items()
             if k.startswith("preprocessor__")
         }
+
+        if config_params:
+            self.config_kwargs.update(config_params)
+            if self.config is not None:
+                for key, value in config_params.items():
+                    setattr(self.config, key, value)
+            else:
+                self.config = self.config_class(**self.config_kwargs)
+
         if preprocessor_params:
-            # Assuming Preprocessor has a set_params method
             self.preprocessor.set_params(**preprocessor_params)
 
         return self
@@ -368,12 +370,16 @@ def fit(
         )
 
         # Initialize the trainer and train the model
-        trainer = pl.Trainer(
+        self.trainer = pl.Trainer(
             max_epochs=max_epochs,
-            callbacks=[early_stop_callback, checkpoint_callback],
+            callbacks=[
+                early_stop_callback,
+                checkpoint_callback,
+                ModelSummary(max_depth=2),
+            ],
             **trainer_kwargs
         )
-        trainer.fit(self.model, self.data_module)
+        self.trainer.fit(self.model, self.data_module)
 
         best_model_path = checkpoint_callback.best_model_path
         if best_model_path:
@@ -555,3 +561,33 @@ def evaluate(self, X, y_true, metrics=None):
                 scores[metric_name] = metric_func(y_true, predictions)
 
         return scores
+
+    def score(self, X, y, metric=(log_loss, True)):
+        """
+        Calculate the score of the model using the specified metric.
+
+        Parameters
+        ----------
+        X : array-like or pd.DataFrame of shape (n_samples, n_features)
+            The input samples to predict.
+        y : array-like of shape (n_samples,)
+            The true class labels against which to evaluate the predictions.
+        metric : tuple, default=(log_loss, True)
+            A tuple containing the metric function and a boolean indicating whether the metric requires probability scores (True) or class labels (False).
+
+        Returns
+        -------
+        score : float
+            The score calculated using the specified metric.
+        """
+        metric_func, use_proba = metric
+
+        if not isinstance(X, pd.DataFrame):
+            X = pd.DataFrame(X)
+
+        if use_proba:
+            probabilities = self.predict_proba(X)
+            return metric_func(y, probabilities)
+        else:
+            predictions = self.predict(X)
+            return metric_func(y, predictions)
diff --git a/mambular/models/sklearn_base_lss.py b/mambular/models/sklearn_base_lss.py
@@ -31,6 +31,7 @@
     PoissonDistribution,
     StudentTDistribution,
 )
+from lightning.pytorch.callbacks import ModelSummary
 
 
 class SklearnBaseLSS(BaseEstimator):
@@ -70,23 +71,22 @@ def __init__(self, model, config, **kwargs):
 
     def get_params(self, deep=True):
         """
-        Get parameters for this estimator. Overrides the BaseEstimator method.
+        Get parameters for this estimator.
 
         Parameters
         ----------
         deep : bool, default=True
-            If True, returns the parameters for this estimator and contained sub-objects that are estimators.
+            If True, will return the parameters for this estimator and contained subobjects that are estimators.
 
         Returns
         -------
         params : dict
             Parameter names mapped to their values.
         """
-        params = self.config_kwargs  # Parameters used to initialize DefaultConfig
+        params = {}
+        params.update(self.config_kwargs)
 
-        # If deep=True, include parameters from nested components like preprocessor
         if deep:
-            # Assuming Preprocessor has a get_params method
             preprocessor_params = {
                 "preprocessor__" + key: value
                 for key, value in self.preprocessor.get_params().items()
@@ -97,35 +97,36 @@ def get_params(self, deep=True):
 
     def set_params(self, **parameters):
         """
-        Set the parameters of this estimator. Overrides the BaseEstimator method.
+        Set the parameters of this estimator.
 
         Parameters
         ----------
         **parameters : dict
-            Estimator parameters to be set.
+            Estimator parameters.
 
         Returns
         -------
         self : object
-            The instance with updated parameters.
+            Estimator instance.
         """
-        # Update config_kwargs with provided parameters
-        valid_config_keys = self.config_kwargs.keys()
-        config_updates = {k: v for k, v in parameters.items() if k in valid_config_keys}
-        self.config_kwargs.update(config_updates)
-
-        # Update the config object
-        for key, value in config_updates.items():
-            setattr(self.config, key, value)
-
-        # Handle preprocessor parameters (prefixed with 'preprocessor__')
+        config_params = {
+            k: v for k, v in parameters.items() if not k.startswith("preprocessor__")
+        }
         preprocessor_params = {
             k.split("__")[1]: v
             for k, v in parameters.items()
             if k.startswith("preprocessor__")
         }
+
+        if config_params:
+            self.config_kwargs.update(config_params)
+            if self.config is not None:
+                for key, value in config_params.items():
+                    setattr(self.config, key, value)
+            else:
+                self.config = self.config_class(**self.config_kwargs)
+
         if preprocessor_params:
-            # Assuming Preprocessor has a set_params method
             self.preprocessor.set_params(**preprocessor_params)
 
         return self
@@ -409,12 +410,16 @@ def fit(
         )
 
         # Initialize the trainer and train the model
-        trainer = pl.Trainer(
+        self.trainer = pl.Trainer(
             max_epochs=max_epochs,
-            callbacks=[early_stop_callback, checkpoint_callback],
+            callbacks=[
+                early_stop_callback,
+                checkpoint_callback,
+                ModelSummary(max_depth=2),
+            ],
             **trainer_kwargs
         )
-        trainer.fit(self.model, self.data_module)
+        self.trainer.fit(self.model, self.data_module)
 
         best_model_path = checkpoint_callback.best_model_path
         if best_model_path:
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,7 +3,7 @@ name: Publish Package to PyPi @@
     on:
       push:
         branches:
-          - master
+          - release
     jobs:
       publish:
@@ Expand Down @@