diff --git a/autots/evaluator/auto_model.py b/autots/evaluator/auto_model.py index 5a8c010a..0be2cedd 100644 --- a/autots/evaluator/auto_model.py +++ b/autots/evaluator/auto_model.py @@ -64,6 +64,7 @@ ) from autots.models.arch import ARCH from autots.models.matrix_var import RRVAR, MAR, TMF, LATC, DMD +from autots.models.sklearn import RollingRegression, WindowRegression, MultivariateRegression, DatepartRegression, UnivariateRegression, ComponentAnalysis, PreprocessingRegression def create_model_id( @@ -191,8 +192,6 @@ def ModelMonster( return model elif model == 'RollingRegression': - from autots.models.sklearn import RollingRegression - model = RollingRegression( frequency=frequency, prediction_interval=prediction_interval, @@ -204,8 +203,6 @@ def ModelMonster( ) return model elif model == 'UnivariateRegression': - from autots.models.sklearn import UnivariateRegression - model = UnivariateRegression( frequency=frequency, prediction_interval=prediction_interval, @@ -219,8 +216,6 @@ def ModelMonster( return model elif model == 'MultivariateRegression': - from autots.models.sklearn import MultivariateRegression - model = MultivariateRegression( frequency=frequency, prediction_interval=prediction_interval, @@ -316,8 +311,6 @@ def ModelMonster( ) return model elif model == 'WindowRegression': - from autots.models.sklearn import WindowRegression - model = WindowRegression( frequency=frequency, prediction_interval=prediction_interval, @@ -382,8 +375,6 @@ def ModelMonster( ) return model elif model == 'ComponentAnalysis': - from autots.models.sklearn import ComponentAnalysis - if parameters == {}: model = ComponentAnalysis( frequency=frequency, @@ -408,8 +399,6 @@ def ModelMonster( ) return model elif model == 'DatepartRegression': - from autots.models.sklearn import DatepartRegression - model = DatepartRegression( frequency=frequency, prediction_interval=prediction_interval, @@ -624,8 +613,6 @@ def ModelMonster( **parameters, ) elif model == "PreprocessingRegression": - from autots.models.sklearn import PreprocessingRegression - return PreprocessingRegression( frequency=frequency, prediction_interval=prediction_interval, diff --git a/autots/models/sklearn.py b/autots/models/sklearn.py index 0c52f440..999c439d 100644 --- a/autots/models/sklearn.py +++ b/autots/models/sklearn.py @@ -14,6 +14,8 @@ try: from sklearn import config_context from sklearn.multioutput import MultiOutputRegressor, RegressorChain + from sklearn.linear_model import ElasticNet, MultiTaskElasticNet, LinearRegression, Ridge + from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeRegressor except Exception: pass from autots.models.base import ModelObject, PredictionObject @@ -36,6 +38,19 @@ def ppf(x): # norm.ppf((1 + 0.95) / 2) +# for numba engine more is required, optional +try: + import numba # noqa + NUMBA_AVAILABLE = True +except ImportError: + NUMBA_AVAILABLE = False + +# Check if the pandas version is 1.3 or greater and if numba is installed +if pd.__version__ >= '1.3' and NUMBA_AVAILABLE: + engine = 'numba' +else: + engine = None + def rolling_x_regressor( df, @@ -84,26 +99,26 @@ def rolling_x_regressor( ) ) # backfill should fill last values safely if str(mean_rolling_periods).isdigit(): - temp = local_df.rolling(int(mean_rolling_periods), min_periods=1).median() + temp = local_df.rolling(int(mean_rolling_periods), min_periods=1).median(engine=engine) X.append(temp) if str(macd_periods).isdigit(): # says mean, but median because it's been that way for ages - temp = local_df.rolling(int(macd_periods), min_periods=1).median() - temp + temp = local_df.rolling(int(macd_periods), min_periods=1).median(engine=engine) - temp temp.columns = ['macd' for col in temp.columns] X.append(temp) if isinstance(mean_rolling_periods, list): for mrp in mean_rolling_periods: - if isinstance(mrp, tuple): + if isinstance(mrp, (tuple, list)): lag = mrp[0] mean_roll = mrp[1] - temp = local_df.shift(lag).rolling(int(mean_roll), min_periods=1).mean().bfill() + temp = local_df.shift(lag).rolling(int(mean_roll), min_periods=1).mean(engine=engine).bfill() temp.columns = [f'rollingmean_{lag}_{mean_roll}_' + str(col) for col in temp.columns] else: - temp = local_df.rolling(int(mrp), min_periods=1).mean() + temp = local_df.rolling(int(mrp), min_periods=1).mean(engine=engine) temp.columns = ['rollingmean_' + str(col) for col in temp.columns] X.append(temp) if str(macd_periods).isdigit(): - temp = local_df.rolling(int(macd_periods), min_periods=1).mean() - temp + temp = local_df.rolling(int(macd_periods), min_periods=1).mean(engine=engine) - temp temp.columns = ['macd' for col in temp.columns] X.append(temp) if str(std_rolling_periods).isdigit(): @@ -305,19 +320,13 @@ def retrieve_regressor( model_param_dict = regression_model.get("model_params", {}) if model_class == 'ElasticNet': if multioutput: - from sklearn.linear_model import MultiTaskElasticNet - regr = MultiTaskElasticNet( alpha=1.0, random_state=random_seed, **model_param_dict ) else: - from sklearn.linear_model import ElasticNet - regr = ElasticNet(alpha=1.0, random_state=random_seed, **model_param_dict) return regr elif model_class == 'DecisionTree': - from sklearn.tree import DecisionTreeRegressor - regr = DecisionTreeRegressor(random_state=random_seed, **model_param_dict) return regr elif model_class == 'MLP': @@ -418,8 +427,6 @@ def retrieve_regressor( random_state=random_seed, ) elif regression_model["model_params"]['estimator'] == 'LinReg': - from sklearn.linear_model import LinearRegression - linreg = LinearRegression() regr = AdaBoostRegressor( estimator=linreg, @@ -429,8 +436,6 @@ def retrieve_regressor( random_state=random_seed, ) elif regression_model["model_params"]['estimator'] == 'ElasticNet': - from sklearn.linear_model import LinearRegression - linreg = ElasticNet() regr = AdaBoostRegressor( estimator=linreg, @@ -440,8 +445,6 @@ def retrieve_regressor( random_state=random_seed, ) elif regression_model["model_params"]['estimator'] == 'ExtraTree': - from sklearn.tree import ExtraTreeRegressor - linreg = ExtraTreeRegressor(max_depth=regression_model["model_params"].get("max_depth", 3)) regr = AdaBoostRegressor( estimator=linreg, @@ -451,8 +454,6 @@ def retrieve_regressor( random_state=random_seed, ) elif regression_model["model_params"].get("max_depth", None) is not None: - from sklearn.tree import DecisionTreeRegressor - linreg = DecisionTreeRegressor(max_depth=regression_model["model_params"].get("max_depth")) regr = AdaBoostRegressor( estimator=linreg, @@ -494,12 +495,8 @@ def retrieve_regressor( regr = LinearSVR(verbose=verbose_bool, **model_param_dict) return regr elif model_class == 'Ridge': - from sklearn.linear_model import Ridge - return Ridge(random_state=random_seed, **model_param_dict) elif model_class == "FastRidge": - from sklearn.linear_model import Ridge - return Ridge(alpha=1e-9, solver="cholesky", fit_intercept=False, copy_X=False) elif model_class == 'BayesianRidge': from sklearn.linear_model import BayesianRidge @@ -536,8 +533,6 @@ def retrieve_regressor( return RANSACRegressor(random_state=random_seed, **model_param_dict) elif model_class == "LinearRegression": - from sklearn.linear_model import LinearRegression - return LinearRegression(**model_param_dict) elif model_class == "GaussianProcessRegressor": from sklearn.gaussian_process import GaussianProcessRegressor @@ -611,8 +606,6 @@ def retrieve_classifier( n_jobs=n_jobs, random_state=random_seed, **model_param_dict ) elif model_class == 'DecisionTree': - from sklearn.tree import DecisionTreeClassifier - return DecisionTreeClassifier(random_state=random_seed, **model_param_dict) elif model_class in ['xgboost', 'XGBClassifier']: import xgboost as xgb diff --git a/autots/tools/fir_filter.py b/autots/tools/fir_filter.py index 2bec7e75..51efe0e1 100644 --- a/autots/tools/fir_filter.py +++ b/autots/tools/fir_filter.py @@ -32,8 +32,8 @@ def apply_fir_filter_to_timeseries( """ # Ensure the data has the correct shape: (observations, series) - if data.shape[0] < data.shape[1]: - data = data.T # Transpose if necessary to match (observations, series) + # if data.shape[0] < data.shape[1]: + # data = data.T # Transpose if necessary to match (observations, series) # Normalize the cutoff frequency with respect to the Nyquist frequency nyquist_frequency = 0.5 * sampling_frequency @@ -59,8 +59,8 @@ def apply_fir_filter_time_domain( This function has padding issues currently. """ # Ensure the data has the correct shape: (observations, series) - if data.shape[0] < data.shape[1]: - data = data.T # Transpose if necessary to match (observations, series) + # if data.shape[0] < data.shape[1]: + # data = data.T # Transpose if necessary to match (observations, series) # Normalize the cutoff frequency with respect to the Nyquist frequency nyquist_frequency = 0.5 * sampling_frequency @@ -97,8 +97,8 @@ def fft_fir_filter_to_timeseries( - filtered_data: The filtered version of the input data """ # Ensure the data has the correct shape: (observations, series) - if data.shape[0] < data.shape[1]: - data = data.T # Transpose if necessary to match (observations, series) + # if data.shape[0] < data.shape[1]: + # data = data.T # Transpose if necessary to match (observations, series) # Normalize the cutoff frequency with respect to the Nyquist frequency nyquist_frequency = 0.5 * sampling_frequency diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 6a7224ad..c001039b 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -6,7 +6,7 @@ from autots.datasets import ( load_daily, load_monthly, load_artificial, load_sine ) -from autots.tools.transform import ThetaTransformer +from autots.tools.transform import ThetaTransformer, FIRFilter class TestTransforms(unittest.TestCase): @@ -18,22 +18,26 @@ def test_theta(self): 'series2': np.random.randn(100).cumsum(), }, index=dates) - # Initialize the transformer with custom theta values - theta_values = [0, 1, 2] # Example of custom theta values + theta_values = [0, 1, 2] theta_transformer = ThetaTransformer(theta_values=theta_values) params = theta_transformer.get_new_params() self.assertTrue(params) - # Fit the transformer theta_transformer.fit(data) - - # Transform the data transformed_data = theta_transformer.transform(data) - - # Inverse transform to reconstruct the original data reconstructed_data = theta_transformer.inverse_transform(transformed_data) - - # Verify that the reconstructed data matches the original data - # Note: Due to numerical precision, a small tolerance is acceptable self.assertTrue(np.allclose(data.values, reconstructed_data.values, atol=1e-8)) + + def test_firfilter(self): + df = load_daily(long=False) + transformer = FIRFilter() + transformed = transformer.fit_transform(df) + inverse = transformer.inverse_transform(transformed) # noqa + + if False: + col = df.columns[0] + pd.concat([df[col], transformed[col].rename("transformed")], axis=1).plot() + + self.assertCountEqual(transformed.index.tolist(), df.index.tolist()) + self.assertCountEqual(transformed.columns.tolist(), df.columns.tolist())