diff --git a/.github/workflows/build-publish-pypi.yml b/.github/workflows/build-publish-pypi.yml index d6d0816..b369bf8 100644 --- a/.github/workflows/build-publish-pypi.yml +++ b/.github/workflows/build-publish-pypi.yml @@ -3,7 +3,7 @@ name: Publish Package to PyPi on: push: branches: - - master + - release jobs: publish: diff --git a/mambular/__version__.py b/mambular/__version__.py index 7408bb0..ba9f224 100644 --- a/mambular/__version__.py +++ b/mambular/__version__.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.1.6" +__version__ = "0.1.7" diff --git a/mambular/base_models/lightning_wrapper.py b/mambular/base_models/lightning_wrapper.py index 39002d9..b26c643 100644 --- a/mambular/base_models/lightning_wrapper.py +++ b/mambular/base_models/lightning_wrapper.py @@ -37,7 +37,7 @@ def __init__( lss=False, family=None, loss_fct: callable = None, - **kwargs + **kwargs, ): super().__init__() self.num_classes = num_classes @@ -126,7 +126,7 @@ def compute_loss(self, predictions, y_true): Computed loss. """ if self.lss: - return self.family.compute_loss(predictions, y_true) + return self.family.compute_loss(predictions, y_true.squeeze(-1)) else: loss = self.loss_fct(predictions, y_true) return loss @@ -300,7 +300,7 @@ def configure_optimizers(self): A dictionary containing the optimizer and lr_scheduler configurations. """ optimizer = torch.optim.Adam( - self.parameters(), + self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay, ) diff --git a/mambular/models/sklearn_base_classifier.py b/mambular/models/sklearn_base_classifier.py index c2d6f17..ec39edc 100644 --- a/mambular/models/sklearn_base_classifier.py +++ b/mambular/models/sklearn_base_classifier.py @@ -9,6 +9,8 @@ from ..data_utils.datamodule import MambularDataModule from ..preprocessing import Preprocessor import numpy as np +from lightning.pytorch.callbacks import ModelSummary +from sklearn.metrics import log_loss class SklearnBaseClassifier(BaseEstimator): @@ -49,23 +51,22 @@ def __init__(self, model, config, **kwargs): def get_params(self, deep=True): """ - Get parameters for this estimator. Overrides the BaseEstimator method. + Get parameters for this estimator. Parameters ---------- deep : bool, default=True - If True, returns the parameters for this estimator and contained sub-objects that are estimators. + If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : dict Parameter names mapped to their values. """ - params = self.config_kwargs # Parameters used to initialize DefaultConfig + params = {} + params.update(self.config_kwargs) - # If deep=True, include parameters from nested components like preprocessor if deep: - # Assuming Preprocessor has a get_params method preprocessor_params = { "preprocessor__" + key: value for key, value in self.preprocessor.get_params().items() @@ -76,35 +77,36 @@ def get_params(self, deep=True): def set_params(self, **parameters): """ - Set the parameters of this estimator. Overrides the BaseEstimator method. + Set the parameters of this estimator. Parameters ---------- **parameters : dict - Estimator parameters to be set. + Estimator parameters. Returns ------- self : object - The instance with updated parameters. + Estimator instance. """ - # Update config_kwargs with provided parameters - valid_config_keys = self.config_kwargs.keys() - config_updates = {k: v for k, v in parameters.items() if k in valid_config_keys} - self.config_kwargs.update(config_updates) - - # Update the config object - for key, value in config_updates.items(): - setattr(self.config, key, value) - - # Handle preprocessor parameters (prefixed with 'preprocessor__') + config_params = { + k: v for k, v in parameters.items() if not k.startswith("preprocessor__") + } preprocessor_params = { k.split("__")[1]: v for k, v in parameters.items() if k.startswith("preprocessor__") } + + if config_params: + self.config_kwargs.update(config_params) + if self.config is not None: + for key, value in config_params.items(): + setattr(self.config, key, value) + else: + self.config = self.config_class(**self.config_kwargs) + if preprocessor_params: - # Assuming Preprocessor has a set_params method self.preprocessor.set_params(**preprocessor_params) return self @@ -368,12 +370,16 @@ def fit( ) # Initialize the trainer and train the model - trainer = pl.Trainer( + self.trainer = pl.Trainer( max_epochs=max_epochs, - callbacks=[early_stop_callback, checkpoint_callback], + callbacks=[ + early_stop_callback, + checkpoint_callback, + ModelSummary(max_depth=2), + ], **trainer_kwargs ) - trainer.fit(self.model, self.data_module) + self.trainer.fit(self.model, self.data_module) best_model_path = checkpoint_callback.best_model_path if best_model_path: @@ -555,3 +561,33 @@ def evaluate(self, X, y_true, metrics=None): scores[metric_name] = metric_func(y_true, predictions) return scores + + def score(self, X, y, metric=(log_loss, True)): + """ + Calculate the score of the model using the specified metric. + + Parameters + ---------- + X : array-like or pd.DataFrame of shape (n_samples, n_features) + The input samples to predict. + y : array-like of shape (n_samples,) + The true class labels against which to evaluate the predictions. + metric : tuple, default=(log_loss, True) + A tuple containing the metric function and a boolean indicating whether the metric requires probability scores (True) or class labels (False). + + Returns + ------- + score : float + The score calculated using the specified metric. + """ + metric_func, use_proba = metric + + if not isinstance(X, pd.DataFrame): + X = pd.DataFrame(X) + + if use_proba: + probabilities = self.predict_proba(X) + return metric_func(y, probabilities) + else: + predictions = self.predict(X) + return metric_func(y, predictions) diff --git a/mambular/models/sklearn_base_lss.py b/mambular/models/sklearn_base_lss.py index 3298cff..62f2d3a 100644 --- a/mambular/models/sklearn_base_lss.py +++ b/mambular/models/sklearn_base_lss.py @@ -31,6 +31,7 @@ PoissonDistribution, StudentTDistribution, ) +from lightning.pytorch.callbacks import ModelSummary class SklearnBaseLSS(BaseEstimator): @@ -70,23 +71,22 @@ def __init__(self, model, config, **kwargs): def get_params(self, deep=True): """ - Get parameters for this estimator. Overrides the BaseEstimator method. + Get parameters for this estimator. Parameters ---------- deep : bool, default=True - If True, returns the parameters for this estimator and contained sub-objects that are estimators. + If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : dict Parameter names mapped to their values. """ - params = self.config_kwargs # Parameters used to initialize DefaultConfig + params = {} + params.update(self.config_kwargs) - # If deep=True, include parameters from nested components like preprocessor if deep: - # Assuming Preprocessor has a get_params method preprocessor_params = { "preprocessor__" + key: value for key, value in self.preprocessor.get_params().items() @@ -97,35 +97,36 @@ def get_params(self, deep=True): def set_params(self, **parameters): """ - Set the parameters of this estimator. Overrides the BaseEstimator method. + Set the parameters of this estimator. Parameters ---------- **parameters : dict - Estimator parameters to be set. + Estimator parameters. Returns ------- self : object - The instance with updated parameters. + Estimator instance. """ - # Update config_kwargs with provided parameters - valid_config_keys = self.config_kwargs.keys() - config_updates = {k: v for k, v in parameters.items() if k in valid_config_keys} - self.config_kwargs.update(config_updates) - - # Update the config object - for key, value in config_updates.items(): - setattr(self.config, key, value) - - # Handle preprocessor parameters (prefixed with 'preprocessor__') + config_params = { + k: v for k, v in parameters.items() if not k.startswith("preprocessor__") + } preprocessor_params = { k.split("__")[1]: v for k, v in parameters.items() if k.startswith("preprocessor__") } + + if config_params: + self.config_kwargs.update(config_params) + if self.config is not None: + for key, value in config_params.items(): + setattr(self.config, key, value) + else: + self.config = self.config_class(**self.config_kwargs) + if preprocessor_params: - # Assuming Preprocessor has a set_params method self.preprocessor.set_params(**preprocessor_params) return self @@ -409,12 +410,16 @@ def fit( ) # Initialize the trainer and train the model - trainer = pl.Trainer( + self.trainer = pl.Trainer( max_epochs=max_epochs, - callbacks=[early_stop_callback, checkpoint_callback], + callbacks=[ + early_stop_callback, + checkpoint_callback, + ModelSummary(max_depth=2), + ], **trainer_kwargs ) - trainer.fit(self.model, self.data_module) + self.trainer.fit(self.model, self.data_module) best_model_path = checkpoint_callback.best_model_path if best_model_path: diff --git a/mambular/models/sklearn_base_regressor.py b/mambular/models/sklearn_base_regressor.py index c128914..30bedb9 100644 --- a/mambular/models/sklearn_base_regressor.py +++ b/mambular/models/sklearn_base_regressor.py @@ -8,11 +8,13 @@ from ..base_models.lightning_wrapper import TaskModel from ..data_utils.datamodule import MambularDataModule from ..preprocessing import Preprocessor +from lightning.pytorch.callbacks import ModelSummary +from dataclasses import asdict, is_dataclass class SklearnBaseRegressor(BaseEstimator): def __init__(self, model, config, **kwargs): - preprocessor_arg_names = [ + self.preprocessor_arg_names = [ "n_bins", "numerical_preprocessing", "use_decision_tree_bins", @@ -25,16 +27,18 @@ def __init__(self, model, config, **kwargs): ] self.config_kwargs = { - k: v for k, v in kwargs.items() if k not in preprocessor_arg_names + k: v for k, v in kwargs.items() if k not in self.preprocessor_arg_names } self.config = config(**self.config_kwargs) preprocessor_kwargs = { - k: v for k, v in kwargs.items() if k in preprocessor_arg_names + k: v for k, v in kwargs.items() if k in self.preprocessor_arg_names } self.preprocessor = Preprocessor(**preprocessor_kwargs) + self.base_model = model self.model = None + self.built = False # Raise a warning if task is set to 'classification' if preprocessor_kwargs.get("task") == "classification": @@ -43,28 +47,24 @@ def __init__(self, model, config, **kwargs): UserWarning, ) - self.base_model = model - self.built = False - def get_params(self, deep=True): """ - Get parameters for this estimator. Overrides the BaseEstimator method. + Get parameters for this estimator. Parameters ---------- deep : bool, default=True - If True, returns the parameters for this estimator and contained sub-objects that are estimators. + If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : dict Parameter names mapped to their values. """ - params = self.config_kwargs # Parameters used to initialize DefaultConfig + params = {} + params.update(self.config_kwargs) - # If deep=True, include parameters from nested components like preprocessor if deep: - # Assuming Preprocessor has a get_params method preprocessor_params = { "preprocessor__" + key: value for key, value in self.preprocessor.get_params().items() @@ -75,35 +75,36 @@ def get_params(self, deep=True): def set_params(self, **parameters): """ - Set the parameters of this estimator. Overrides the BaseEstimator method. + Set the parameters of this estimator. Parameters ---------- **parameters : dict - Estimator parameters to be set. + Estimator parameters. Returns ------- self : object - The instance with updated parameters. + Estimator instance. """ - # Update config_kwargs with provided parameters - valid_config_keys = self.config_kwargs.keys() - config_updates = {k: v for k, v in parameters.items() if k in valid_config_keys} - self.config_kwargs.update(config_updates) - - # Update the config object - for key, value in config_updates.items(): - setattr(self.config, key, value) - - # Handle preprocessor parameters (prefixed with 'preprocessor__') + config_params = { + k: v for k, v in parameters.items() if not k.startswith("preprocessor__") + } preprocessor_params = { k.split("__")[1]: v for k, v in parameters.items() if k.startswith("preprocessor__") } + + if config_params: + self.config_kwargs.update(config_params) + if self.config is not None: + for key, value in config_params.items(): + setattr(self.config, key, value) + else: + self.config = self.config_class(**self.config_kwargs) + if preprocessor_params: - # Assuming Preprocessor has a set_params method self.preprocessor.set_params(**preprocessor_params) return self @@ -362,12 +363,16 @@ def fit( ) # Initialize the trainer and train the model - trainer = pl.Trainer( + self.trainer = pl.Trainer( max_epochs=max_epochs, - callbacks=[early_stop_callback, checkpoint_callback], + callbacks=[ + early_stop_callback, + checkpoint_callback, + ModelSummary(max_depth=2), + ], **trainer_kwargs ) - trainer.fit(self.model, self.data_module) + self.trainer.fit(self.model, self.data_module) best_model_path = checkpoint_callback.best_model_path if best_model_path: @@ -472,3 +477,24 @@ def evaluate(self, X, y_true, metrics=None): scores[metric_name] = metric_func(y_true, predictions) return scores + + def score(self, X, y, metric=mean_squared_error): + """ + Calculate the score of the model using the specified metric. + + Parameters + ---------- + X : array-like or pd.DataFrame of shape (n_samples, n_features) + The input samples to predict. + y : array-like of shape (n_samples,) or (n_samples, n_outputs) + The true target values against which to evaluate the predictions. + metric : callable, default=mean_squared_error + The metric function to use for evaluation. Must be a callable with the signature `metric(y_true, y_pred)`. + + Returns + ------- + score : float + The score calculated using the specified metric. + """ + predictions = self.predict(X) + return metric(y, predictions) diff --git a/requirements.txt b/requirements.txt index 0b2da13..35e749d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy +numpy<=1.26.4 pandas lightning scikit_learn