diff --git a/mambular/models/sklearn_classifier.py b/mambular/models/sklearn_classifier.py index 72bb0df..55a5e63 100644 --- a/mambular/models/sklearn_classifier.py +++ b/mambular/models/sklearn_classifier.py @@ -90,6 +90,10 @@ class MambularClassifier(BaseEstimator): Defines the strategy for binning numerical features. Default is 'uniform'. task : str, optional Indicates the type of machine learning task ('regression' or 'classification'). Default is 'regression'. + cat_cutoff: float or int, optional + Indicates the cutoff after which integer values are treated as categorical. If float, it's treated as a percentage. If int, it's the maximum number of unique values for a column to be considered categorical. Default is 3% + treat_all_integers_as_numerical : bool, optional + If True, all integer columns will be treated as numerical, regardless of their unique value count or proportion. Default is False @@ -140,7 +144,10 @@ def __init__(self, **kwargs): "use_decision_tree_bins", "binning_strategy", "task", + "cat_cutoff", + "treat_all_integers_as_numerical", ] + self.config_kwargs = {k: v for k, v in kwargs.items() if k in config_arg_names} self.config = DefaultMambularConfig(**self.config_kwargs) diff --git a/mambular/models/sklearn_distributional.py b/mambular/models/sklearn_distributional.py index 98962c3..06155bc 100644 --- a/mambular/models/sklearn_distributional.py +++ b/mambular/models/sklearn_distributional.py @@ -102,6 +102,10 @@ class MambularLSS(BaseEstimator): Defines the strategy for binning numerical features. Default is 'uniform'. task : str, optional Indicates the type of machine learning task ('regression' or 'classification'). Default is 'regression'. + cat_cutoff: float or int, optional + Indicates the cutoff after which integer values are treated as categorical. If float, it's treated as a percentage. If int, it's the maximum number of unique values for a column to be considered categorical. Default is 3% + treat_all_integers_as_numerical : bool, optional + If True, all integer columns will be treated as numerical, regardless of their unique value count or proportion. Default is False @@ -152,7 +156,10 @@ def __init__(self, **kwargs): "use_decision_tree_bins", "binning_strategy", "task", + "cat_cutoff", + "treat_all_integers_as_numerical", ] + self.config_kwargs = {k: v for k, v in kwargs.items() if k in config_arg_names} self.config = DefaultMambularConfig(**self.config_kwargs) diff --git a/mambular/models/sklearn_embedding_classifier.py b/mambular/models/sklearn_embedding_classifier.py index 59213f8..1e7f477 100644 --- a/mambular/models/sklearn_embedding_classifier.py +++ b/mambular/models/sklearn_embedding_classifier.py @@ -91,6 +91,10 @@ class EmbeddingMambularClassifier(BaseEstimator): Defines the strategy for binning numerical features. Default is 'uniform'. task : str, optional Indicates the type of machine learning task ('regression' or 'classification'). Default is 'regression'. + cat_cutoff: float or int, optional + Indicates the cutoff after which integer values are treated as categorical. If float, it's treated as a percentage. If int, it's the maximum number of unique values for a column to be considered categorical. Default is 3% + treat_all_integers_as_numerical : bool, optional + If True, all integer columns will be treated as numerical, regardless of their unique value count or proportion. Default is False Attributes @@ -140,6 +144,8 @@ def __init__(self, **kwargs): "use_decision_tree_bins", "binning_strategy", "task", + "cat_cutoff", + "treat_all_integers_as_numerical", ] self.config_kwargs = {k: v for k, v in kwargs.items() if k in config_arg_names} diff --git a/mambular/models/sklearn_embedding_regressor.py b/mambular/models/sklearn_embedding_regressor.py index 4e9abbe..b8b72f9 100644 --- a/mambular/models/sklearn_embedding_regressor.py +++ b/mambular/models/sklearn_embedding_regressor.py @@ -88,6 +88,10 @@ class EmbeddingMambularRegressor(BaseEstimator): Defines the strategy for binning numerical features. Default is 'uniform'. task : str, optional Indicates the type of machine learning task ('regression' or 'classification'). Default is 'regression'. + cat_cutoff: float or int, optional + Indicates the cutoff after which integer values are treated as categorical. If float, it's treated as a percentage. If int, it's the maximum number of unique values for a column to be considered categorical. Default is 3% + treat_all_integers_as_numerical : bool, optional + If True, all integer columns will be treated as numerical, regardless of their unique value count or proportion. Default is False Attributes @@ -137,6 +141,8 @@ def __init__(self, **kwargs): "use_decision_tree_bins", "binning_strategy", "task", + "cat_cutoff", + "treat_all_integers_as_numerical", ] self.config_kwargs = {k: v for k, v in kwargs.items() if k in config_arg_names} diff --git a/mambular/models/sklearn_regressor.py b/mambular/models/sklearn_regressor.py index 7ed892f..4f70d9e 100644 --- a/mambular/models/sklearn_regressor.py +++ b/mambular/models/sklearn_regressor.py @@ -7,7 +7,7 @@ from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader import warnings - +import numpy as np from ..base_models.regressor import BaseMambularRegressor from ..utils.dataset import MambularDataModule, MambularDataset from ..utils.preprocessor import Preprocessor @@ -88,6 +88,10 @@ class MambularRegressor(BaseEstimator): Defines the strategy for binning numerical features. Default is 'uniform'. task : str, optional Indicates the type of machine learning task ('regression' or 'classification'). Default is 'regression'. + cat_cutoff: float or int, optional + Indicates the cutoff after which integer values are treated as categorical. If float, it's treated as a percentage. If int, it's the maximum number of unique values for a column to be considered categorical. Default is 3% + treat_all_integers_as_numerical : bool, optional + If True, all integer columns will be treated as numerical, regardless of their unique value count or proportion. Default is False @@ -138,6 +142,8 @@ def __init__(self, **kwargs): "use_decision_tree_bins", "binning_strategy", "task", + "cat_cutoff", + "treat_all_integers_as_numerical", ] self.config_kwargs = {k: v for k, v in kwargs.items() if k in config_arg_names}