From 619f64bfbf0023933284ef94ca99463850603597 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 27 Dec 2023 10:29:56 +0300 Subject: [PATCH 01/23] add window size selector --- .../data_operations/ts_transformations.py | 7 + .../data/default_operation_params.json | 3 +- fedot/utilities/window_size_selector.py | 234 ++++++++++++++++++ 3 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 fedot/utilities/window_size_selector.py diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 614c52bb2a..0428c5ed4c 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -3,6 +3,8 @@ import numpy as np import pandas as pd + +from fedot.utilities.window_size_selector import WindowSizeSelector from golem.core.log import default_log from scipy.ndimage import gaussian_filter from sklearn.decomposition import TruncatedSVD @@ -115,6 +117,11 @@ def _check_and_correct_window_size(self, time_series: np.array, forecast_length: """ + if self.params.get('autotune_window', 0) == 1: + new = int(WindowSizeSelector(method='hac', window_range=(5, 25)) + .get_window_size(time_series) * len(time_series) / 100) + self.params.update(window_size=new, autotune_window=0) + # Maximum threshold if self.window_size + forecast_length > len(time_series): raise ValueError(f"Window size is to high ({self.window_size}) for provided data len {len(time_series)}") diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index c3aeb5208c..1dcbaa732c 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -50,7 +50,8 @@ "verbose": -1 }, "lagged": { - "window_size": 10 + "window_size": 10, + "autotune_window": 1 }, "diff_filter": { "window_size": 3, diff --git a/fedot/utilities/window_size_selector.py b/fedot/utilities/window_size_selector.py new file mode 100644 index 0000000000..6fd58b594c --- /dev/null +++ b/fedot/utilities/window_size_selector.py @@ -0,0 +1,234 @@ +import math +from typing import Union + +import numpy as np +import pandas as pd +from scipy.signal import find_peaks +from statsmodels.tsa.stattools import acf + + +class WindowSizeSelector: + """Class to select appropriate window size to catch periodicity for time series analysis. + There are two group of algorithms implemented: + Whole-Series-Based (WSB): + 1. 'hac' - highest_autocorrelation + 2. 'dff' - dominant_fourier_frequency + Subsequence-based (SB): + 1. 'mwf' - multi_window_finder + 2. 'sss' - summary_statistics_subsequence + Args: + method: by ``default``, it is 'dff'. + You can choose between: 'hac', 'dff', 'sss' or 'mwf'. + window_range: % of time series length, by ``default`` it is (5, 50). + Attributes: + length_ts(int): length of the time_series. + window_max(int): maximum window size in real values. + window_min(int): minimum window size in real values. + dict_methods(dict): dictionary with all implemented methods. + Example: + To find window size for single time series:: + ts = np.random.rand(1000) + ws_selector = WindowSizeSelector(method='hac') + window_size = ws_selector.get_window_size(time_series=ts) + To find window size for multiple time series:: + ts = np.random.rand(1000, 10) + ws_selector = WindowSizeSelector(method='hac') + window_size = ws_selector.apply(time_series=ts, average='median') + Reference: + (c) "Windows Size Selection in Unsupervised Time Series Analytics: A Review and Benchmark. Arik Ermshaus, + Patrick Schafer, and Ulf Leser. 2022" + """ + + def __init__(self, method: str = 'dff', window_range: tuple = (5, 50)): + + assert window_range[0] < window_range[1], 'Upper bound of window range should be bigger than lower bound' + + self.dict_methods = {'hac': self.autocorrelation, + 'dff': self.dominant_fourier_frequency, + 'mwf': self.mwf, + 'sss': self.summary_statistics_subsequence} + self.wss_algorithm = method + self.window_range = window_range + self.window_max = None + self.window_min = None + self.length_ts = None + + def apply(self, time_series: Union[pd.DataFrame, np.array], average: str = 'median') -> int: + """Method to run WSS class over selected time series in parallel mode via joblib + Args: + time_series: time series to study + average: 'mean' or 'median' to average window size over all time series + Returns: + window_size_selected: value which has been chosen as appropriate window size + """ + methods = {'mean': np.mean, 'median': np.median} + assert average in methods.keys(), 'Hyperparameters error: `average` should be mean or median' + + if isinstance(time_series, pd.DataFrame): + time_series = time_series.values + + window_list = [self.get_window_size(ts) for ts in time_series] + return round(methods[average](window_list)) + + def get_window_size(self, time_series: np.array) -> int: + """Main function to run WSS class over selected time series + Note: + One of the reason of ValueError is that time series size can be equal or smaller than 50. + In case of it try to initially set window_size min and max. + Returns: + window_size_selected: value which has been chosen as appropriate window size + """ + if time_series.shape[0] == 1: # If time series is a part of multivariate one + time_series = np.array(time_series[0]) + self.length_ts = len(time_series) + + self.window_max = int(round(self.length_ts * self.window_range[1] / 100)) # in real values + self.window_min = int(round(self.length_ts * self.window_range[0] / 100)) # in real values + + window_size_selected = self.dict_methods[self.wss_algorithm](time_series=time_series) + return round(window_size_selected * 100 / self.length_ts) # in % + + def dominant_fourier_frequency(self, time_series: np.array) -> int: + """ + Method to find dominant fourier frequency in time series and return appropriate window size. It is based on + the assumption that the dominant frequency is the one with the highest magnitude in the Fourier transform. The + window size is then the inverse of the dominant frequency. + """ + fourier = np.fft.fft(time_series) + freq = np.fft.fftfreq(time_series.shape[0], 1) + + magnitudes, window_sizes = [], [] + + for coef, freq in zip(fourier, freq): + if coef and freq > 0: + window_size = int(1 / freq) + mag = math.sqrt(coef.real * coef.real + coef.imag * coef.imag) + + if self.window_min <= window_size < self.window_max: + window_sizes.append(window_size) + magnitudes.append(mag) + + return window_sizes[np.argmax(magnitudes)] + + def autocorrelation(self, time_series: np.array) -> int: + """Method to find the highest autocorrelation in time series and return appropriate window size. It is based on + the assumption that the lag of highest autocorrelation coefficient corresponds to the window size that best + captures the periodicity of the time series. + """ + ts_len = time_series.shape[0] + acf_values = acf(time_series, fft=True, nlags=int(ts_len / 2)) + + peaks, _ = find_peaks(acf_values) + peaks = peaks[np.logical_and(peaks >= self.window_min, peaks < self.window_max)] + corrs = acf_values[peaks] + + if peaks.shape[0] == 0: # if there is no peaks in range (window_min, window_max) return window_min + return self.window_min + return peaks[np.argmax(corrs)] + + def mwf(self, time_series: np.array) -> int: + """ Method to find the window size that minimizes the moving average residual. It is based on the assumption + that the window size that best captures the periodicity of the time series is the one that minimizes the + difference between the moving average and the time series. + """ + + all_averages, window_sizes = [], [] + + for w in range(self.window_min, self.window_max, 1): + movingAvg = np.array(self.movmean(time_series, w)) + all_averages.append(movingAvg) + window_sizes.append(w) + + movingAvgResiduals = [] + + for i, w in enumerate(window_sizes): + moving_avg = all_averages[i][:len(all_averages[-1])] + movingAvgResidual = np.log(abs(moving_avg - (moving_avg).mean()).sum()) + movingAvgResiduals.append(movingAvgResidual) + + b = (np.diff(np.sign(np.diff(movingAvgResiduals))) > 0).nonzero()[0] + 1 # local min + + if len(b) == 0: + return self.window_min + if len(b) < 3: + return window_sizes[b[0]] + + reswin = np.array([window_sizes[b[i]] / (i + 1) for i in range(3)]) + w = np.mean(reswin) + + return int(w) + + def movmean(self, ts, w): + """Fast moving average function""" + moving_avg = np.cumsum(ts, dtype=float) + moving_avg[w:] = moving_avg[w:] - moving_avg[:-w] + return moving_avg[w - 1:] / w + + def summary_statistics_subsequence(self, time_series: np.array, threshold=.89) -> int: + """Method to find the window size that maximizes the subsequence unsupervised similarity score (SUSS). It is + based on the assumption that the window size that best captures the periodicity of the time series is the one + that maximizes the similarity between subsequences of the time series. + """ + # lbound = self.window_min + time_series = (time_series - time_series.min()) / (time_series.max() - time_series.min()) + + ts_mean = np.mean(time_series) + ts_std = np.std(time_series) + ts_min_max = np.max(time_series) - np.min(time_series) + + stats = (ts_mean, ts_std, ts_min_max) + + max_score = self.suss_score(time_series=time_series, window_size=1, stats=stats) + min_score = self.suss_score(time_series=time_series, window_size=time_series.shape[0] - 1, stats=stats) + + exp = 0 + + # exponential search (to find window size interval) + while True: + window_size = 2 ** exp + + if window_size < self.window_min: + exp += 1 + continue + + score = 1 - (self.suss_score(time_series, window_size, stats) - min_score) / (max_score - min_score) + + if score > threshold: + break + + exp += 1 + + lbound, ubound = max(self.window_min, 2 ** (exp - 1)), 2 ** exp + 1 + + # binary search (to find window size in interval) + while lbound <= ubound: + window_size = int((lbound + ubound) / 2) + score = 1 - (self.suss_score(time_series, window_size, stats) - min_score) / (max_score - min_score) + + if score < threshold: + lbound = window_size + 1 + elif score > threshold: + ubound = window_size - 1 + else: + break + + return 2 * lbound + + def suss_score(self, time_series, window_size, stats): + roll = pd.Series(time_series).rolling(window_size) + ts_mean, ts_std, ts_min_max = stats + + roll_mean = roll.mean().to_numpy()[window_size:] + roll_std = roll.std(ddof=0).to_numpy()[window_size:] + roll_min = roll.min().to_numpy()[window_size:] + roll_max = roll.max().to_numpy()[window_size:] + + X = np.array([ + roll_mean - ts_mean, + roll_std - ts_std, + (roll_max - roll_min) - ts_min_max + ]) + + X = np.sqrt(np.sum(np.square(X), axis=0)) / np.sqrt(window_size) + + return np.mean(X) \ No newline at end of file From a0ec26a1d53673f7cb8ee6736335ff5e66f89341 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 27 Dec 2023 10:43:32 +0300 Subject: [PATCH 02/23] forbid lagged window mutation in `parameter_change_mutation` --- fedot/core/composer/gp_composer/specific_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/composer/gp_composer/specific_operators.py b/fedot/core/composer/gp_composer/specific_operators.py index 7799fe432f..4d54750974 100644 --- a/fedot/core/composer/gp_composer/specific_operators.py +++ b/fedot/core/composer/gp_composer/specific_operators.py @@ -19,7 +19,7 @@ def parameter_change_mutation(pipeline: Pipeline, requirements, graph_gen_params node_mutation_probability = get_mutation_prob(mut_id=parameters.mutation_strength, node=pipeline.root_node) for node in pipeline.nodes: - if random() < node_mutation_probability: + if node.operation.metadata.id != 'lagged' and random() < node_mutation_probability: operation_name = node.operation.operation_type current_params = node.parameters From 05b344e94d4d59ef4a79f8623a26027eb93655ac Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 27 Dec 2023 10:51:07 +0300 Subject: [PATCH 03/23] fix error with `ndim > 1` --- .../data_operations/ts_transformations.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 0428c5ed4c..22c488124f 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -105,7 +105,7 @@ def transform_for_fit(self, input_data: InputData) -> OutputData: self._update_column_types(output_data) return output_data - def _check_and_correct_window_size(self, time_series: np.array, forecast_length: int): + def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_length: int): """ Method check if the length of the time series is not enough for lagged transformation - clip it @@ -118,8 +118,15 @@ def _check_and_correct_window_size(self, time_series: np.array, forecast_length: """ if self.params.get('autotune_window', 0) == 1: - new = int(WindowSizeSelector(method='hac', window_range=(5, 25)) - .get_window_size(time_series) * len(time_series) / 100) + def get_window(ts: np.ndarray): + return int(WindowSizeSelector(method='hac', window_range=(5, 60)) + .get_window_size(ts) * len(ts) / 100) + + if time_series.ndim > 1: + new = np.mean([get_window(time_series[:, i].ravel()) for i in range(time_series.shape[1])]) + else: + new = get_window(time_series) + self.params.update(window_size=new, autotune_window=0) # Maximum threshold From 06d2d8fa96d6024c74e01d11386897e3eb9e23b8 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 27 Dec 2023 11:20:22 +0300 Subject: [PATCH 04/23] another way to default window size definition --- .../data_operations/ts_transformations.py | 6 +++--- fedot/core/repository/data/default_operation_params.json | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 22c488124f..6b530ae712 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -117,7 +117,7 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt """ - if self.params.get('autotune_window', 0) == 1: + if self.window_size == 0: def get_window(ts: np.ndarray): return int(WindowSizeSelector(method='hac', window_range=(5, 60)) .get_window_size(ts) * len(ts) / 100) @@ -126,8 +126,8 @@ def get_window(ts: np.ndarray): new = np.mean([get_window(time_series[:, i].ravel()) for i in range(time_series.shape[1])]) else: new = get_window(time_series) - - self.params.update(window_size=new, autotune_window=0) + + self.params.update(window_size=new) # Maximum threshold if self.window_size + forecast_length > len(time_series): diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index 1dcbaa732c..7ebed0ffd4 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -50,8 +50,7 @@ "verbose": -1 }, "lagged": { - "window_size": 10, - "autotune_window": 1 + "window_size": 0 }, "diff_filter": { "window_size": 3, From b9c3d220ad9a28f9cbbe63f2be2e3baa939c47be Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 27 Dec 2023 11:36:32 +0300 Subject: [PATCH 05/23] fix test --- test/unit/optimizer/gp_operators/test_mutation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/unit/optimizer/gp_operators/test_mutation.py b/test/unit/optimizer/gp_operators/test_mutation.py index 8cbdf06a82..d090c47477 100644 --- a/test/unit/optimizer/gp_operators/test_mutation.py +++ b/test/unit/optimizer/gp_operators/test_mutation.py @@ -131,8 +131,6 @@ def test_boosting_mutation_for_non_lagged_ts_model(): """ graph = PipelineAdapter().restore(get_ts_forecasting_graph()) - - boosting_graph = get_ts_forecasting_graph_with_boosting() requirements = PipelineComposerRequirements(primary=['ridge'], secondary=['ridge']) pipeline = boosting_mutation(graph, @@ -143,7 +141,11 @@ def test_boosting_mutation_for_non_lagged_ts_model(): data_train, data_test = get_ts_data() pipeline.fit(data_train) result = pipeline.predict(data_test) - assert boosting_graph.descriptive_id == pipeline.descriptive_id + + boosting_pipeline = PipelineAdapter().restore(get_ts_forecasting_graph_with_boosting()) + boosting_pipeline.fit(data_train) + + assert boosting_pipeline.descriptive_id == pipeline.descriptive_id assert result is not None From 7b0850e1de7a076ba0f56d64b59fc76f297dd4a8 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 27 Dec 2023 14:23:21 +0300 Subject: [PATCH 06/23] fix problems with unproper window and fitted assumption --- fedot/api/api_utils/api_composer.py | 3 ++- .../data_operations/ts_transformations.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fedot/api/api_utils/api_composer.py b/fedot/api/api_utils/api_composer.py index 5a0545a85b..efc34d3a2a 100644 --- a/fedot/api/api_utils/api_composer.py +++ b/fedot/api/api_utils/api_composer.py @@ -1,5 +1,6 @@ import datetime import gc +from copy import deepcopy from typing import List, Optional, Sequence, Tuple, Union from golem.core.log import default_log @@ -97,7 +98,7 @@ def propose_and_fit_initial_assumption(self, train_data: InputData) -> Tuple[Seq with self.timer.launch_assumption_fit(): fitted_assumption = \ - assumption_handler.fit_assumption_and_check_correctness(initial_assumption[0], + assumption_handler.fit_assumption_and_check_correctness(deepcopy(initial_assumption[0]), pipelines_cache=self.pipelines_cache, preprocessing_cache=self.preprocessing_cache, eval_n_jobs=self.params.n_jobs) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 6b530ae712..281d59d186 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -107,7 +107,7 @@ def transform_for_fit(self, input_data: InputData) -> OutputData: def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_length: int): """ Method check if the length of the time series is not enough for - lagged transformation - clip it + lagged transformation Args: time_series: time series for transformation @@ -116,6 +116,7 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt Returns: """ + max_allowed_window_size = len(time_series) - forecast_length if self.window_size == 0: def get_window(ts: np.ndarray): @@ -126,11 +127,11 @@ def get_window(ts: np.ndarray): new = np.mean([get_window(time_series[:, i].ravel()) for i in range(time_series.shape[1])]) else: new = get_window(time_series) - + new = min(max_allowed_window_size, new) self.params.update(window_size=new) # Maximum threshold - if self.window_size + forecast_length > len(time_series): + if self.window_size > max_allowed_window_size: raise ValueError(f"Window size is to high ({self.window_size}) for provided data len {len(time_series)}") # Minimum threshold From 1089e136f78f61321162460c5d1cf4709b63d9ae Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 11:36:06 +0300 Subject: [PATCH 07/23] fix lagged names in mutation --- fedot/core/composer/gp_composer/specific_operators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fedot/core/composer/gp_composer/specific_operators.py b/fedot/core/composer/gp_composer/specific_operators.py index 4d54750974..4d122e68e3 100644 --- a/fedot/core/composer/gp_composer/specific_operators.py +++ b/fedot/core/composer/gp_composer/specific_operators.py @@ -19,7 +19,8 @@ def parameter_change_mutation(pipeline: Pipeline, requirements, graph_gen_params node_mutation_probability = get_mutation_prob(mut_id=parameters.mutation_strength, node=pipeline.root_node) for node in pipeline.nodes: - if node.operation.metadata.id != 'lagged' and random() < node_mutation_probability: + if (node.operation.metadata.id not in ('lagged', 'sparse_lagged', 'exog_ts') and + random() < node_mutation_probability): operation_name = node.operation.operation_type current_params = node.parameters From bdd819199576c7a31a30b54fb9df45937f11b71f Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 11:41:53 +0300 Subject: [PATCH 08/23] pep8 --- fedot/utilities/window_size_selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/utilities/window_size_selector.py b/fedot/utilities/window_size_selector.py index 6fd58b594c..d9096b3a83 100644 --- a/fedot/utilities/window_size_selector.py +++ b/fedot/utilities/window_size_selector.py @@ -231,4 +231,4 @@ def suss_score(self, time_series, window_size, stats): X = np.sqrt(np.sum(np.square(X), axis=0)) / np.sqrt(window_size) - return np.mean(X) \ No newline at end of file + return np.mean(X) From e87ff90a815cbdda6511ac81d5f00ecf04928b04 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 12:35:03 +0300 Subject: [PATCH 09/23] add tests --- .../data_operations/ts_transformations.py | 12 +--- fedot/utilities/window_size_selector.py | 63 ++++++++++++------- .../test_time_series_operations.py | 38 +++++++++++ test/unit/utilities/window_size_selector.py | 17 +++++ 4 files changed, 97 insertions(+), 33 deletions(-) create mode 100644 test/unit/utilities/window_size_selector.py diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 281d59d186..55ad036f12 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from fedot.utilities.window_size_selector import WindowSizeSelector +from fedot.utilities.window_size_selector import WindowSizeSelector, WindowSizeSelectorMethodsEnum from golem.core.log import default_log from scipy.ndimage import gaussian_filter from sklearn.decomposition import TruncatedSVD @@ -119,14 +119,8 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt max_allowed_window_size = len(time_series) - forecast_length if self.window_size == 0: - def get_window(ts: np.ndarray): - return int(WindowSizeSelector(method='hac', window_range=(5, 60)) - .get_window_size(ts) * len(ts) / 100) - - if time_series.ndim > 1: - new = np.mean([get_window(time_series[:, i].ravel()) for i in range(time_series.shape[1])]) - else: - new = get_window(time_series) + selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) + new = int(selector.apply(time_series) * time_series.shape[0] * 0.01) new = min(max_allowed_window_size, new) self.params.update(window_size=new) diff --git a/fedot/utilities/window_size_selector.py b/fedot/utilities/window_size_selector.py index d9096b3a83..f6ef266ff0 100644 --- a/fedot/utilities/window_size_selector.py +++ b/fedot/utilities/window_size_selector.py @@ -1,4 +1,5 @@ import math +from enum import Enum, auto from typing import Union import numpy as np @@ -7,18 +8,24 @@ from statsmodels.tsa.stattools import acf +class WindowSizeSelectorMethodsEnum(Enum): + DFF = auto() + HAC = auto() + MWF = auto() + SSS = auto() + + class WindowSizeSelector: """Class to select appropriate window size to catch periodicity for time series analysis. There are two group of algorithms implemented: Whole-Series-Based (WSB): - 1. 'hac' - highest_autocorrelation - 2. 'dff' - dominant_fourier_frequency + 1. WindowSizeSelectorMethodsEnum.HAC - highest_autocorrelation + 2. WindowSizeSelectorMethodsEnum.DFF - dominant_fourier_frequency Subsequence-based (SB): - 1. 'mwf' - multi_window_finder - 2. 'sss' - summary_statistics_subsequence + 1. WindowSizeSelectorMethodsEnum.MWF - multi_window_finder + 2. WindowSizeSelectorMethodsEnum.SSS - summary_statistics_subsequence Args: - method: by ``default``, it is 'dff'. - You can choose between: 'hac', 'dff', 'sss' or 'mwf'. + method: by ``default``, it is WindowSizeSelectorMethodsEnum.DFF. window_range: % of time series length, by ``default`` it is (5, 50). Attributes: length_ts(int): length of the time_series. @@ -39,21 +46,24 @@ class WindowSizeSelector: Patrick Schafer, and Ulf Leser. 2022" """ - def __init__(self, method: str = 'dff', window_range: tuple = (5, 50)): + def __init__(self, + method: WindowSizeSelectorMethodsEnum = WindowSizeSelectorMethodsEnum.DFF, + window_range: tuple = (5, 50)): - assert window_range[0] < window_range[1], 'Upper bound of window range should be bigger than lower bound' + if window_range[0] >= window_range[1]: + raise ValueError('Upper bound of window range should be bigger than lower bound') - self.dict_methods = {'hac': self.autocorrelation, - 'dff': self.dominant_fourier_frequency, - 'mwf': self.mwf, - 'sss': self.summary_statistics_subsequence} + self.dict_methods = {WindowSizeSelectorMethodsEnum.HAC: self.autocorrelation, + WindowSizeSelectorMethodsEnum.DFF: self.dominant_fourier_frequency, + WindowSizeSelectorMethodsEnum.MWF: self.mwf, + WindowSizeSelectorMethodsEnum.SSS: self.summary_statistics_subsequence} self.wss_algorithm = method self.window_range = window_range self.window_max = None self.window_min = None self.length_ts = None - def apply(self, time_series: Union[pd.DataFrame, np.array], average: str = 'median') -> int: + def apply(self, time_series: np.ndarray, average: str = 'median') -> int: """Method to run WSS class over selected time series in parallel mode via joblib Args: time_series: time series to study @@ -62,15 +72,12 @@ def apply(self, time_series: Union[pd.DataFrame, np.array], average: str = 'medi window_size_selected: value which has been chosen as appropriate window size """ methods = {'mean': np.mean, 'median': np.median} - assert average in methods.keys(), 'Hyperparameters error: `average` should be mean or median' - - if isinstance(time_series, pd.DataFrame): - time_series = time_series.values - - window_list = [self.get_window_size(ts) for ts in time_series] + if time_series.ndim == 1: + time_series = time_series.reshape((-1, 1)) + window_list = [self.get_window_size(time_series[:, i].ravel()) for i in range(time_series.shape[1])] return round(methods[average](window_list)) - def get_window_size(self, time_series: np.array) -> int: + def get_window_size(self, time_series: np.ndarray) -> int: """Main function to run WSS class over selected time series Note: One of the reason of ValueError is that time series size can be equal or smaller than 50. @@ -86,9 +93,12 @@ def get_window_size(self, time_series: np.array) -> int: self.window_min = int(round(self.length_ts * self.window_range[0] / 100)) # in real values window_size_selected = self.dict_methods[self.wss_algorithm](time_series=time_series) - return round(window_size_selected * 100 / self.length_ts) # in % + window_size_selected = round(window_size_selected * 100 / self.length_ts) + window_size_selected = max(self.window_range[0], window_size_selected) + window_size_selected = min(self.window_range[1], window_size_selected) + return window_size_selected - def dominant_fourier_frequency(self, time_series: np.array) -> int: + def dominant_fourier_frequency(self, time_series: np.ndarray) -> int: """ Method to find dominant fourier frequency in time series and return appropriate window size. It is based on the assumption that the dominant frequency is the one with the highest magnitude in the Fourier transform. The @@ -107,8 +117,10 @@ def dominant_fourier_frequency(self, time_series: np.array) -> int: if self.window_min <= window_size < self.window_max: window_sizes.append(window_size) magnitudes.append(mag) - - return window_sizes[np.argmax(magnitudes)] + if window_sizes and magnitudes: + return window_sizes[np.argmax(magnitudes)] + else: + return self.window_min def autocorrelation(self, time_series: np.array) -> int: """Method to find the highest autocorrelation in time series and return appropriate window size. It is based on @@ -187,6 +199,9 @@ def summary_statistics_subsequence(self, time_series: np.array, threshold=.89) - while True: window_size = 2 ** exp + if window_size > self.window_max: + break + if window_size < self.window_min: exp += 1 continue diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index 3c7b071fe1..ef6ce399f5 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -2,6 +2,7 @@ import pytest from fedot.core.data.data_split import train_test_data_setup +from fedot.core.pipelines.pipeline_builder import PipelineBuilder from golem.core.log import default_log from fedot.core.data.data import InputData @@ -238,3 +239,40 @@ def test_lagged_node(length, features_count, target_count, window_size): predict = node.predict(test) assert np.all(predict.predict[-1, :] == np.reshape(test.features[-window_size:].T, (-1, ))) + + +def test_lagged_window_size_selector_tune_window_by_default(): + ts = get_timeseries(length=1000) + pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() + origin_window_size = pipeline.nodes[-1].parameters['window_size'] + pipeline.fit(ts) + new_window_size = pipeline.nodes[-1].parameters['window_size'] + + assert origin_window_size != new_window_size + assert 0 < new_window_size < ts.features.shape[0] + + +@pytest.mark.parametrize('origin_window_size', [10, 20, 100]) +def test_lagged_window_size_selector_does_not_tune_set_window(origin_window_size): + ts = get_timeseries(length=1000) + pipeline = (PipelineBuilder() + .add_node('lagged', params={'window_size': origin_window_size}) + .add_node('ridge').build()) + assert origin_window_size == pipeline.nodes[-1].parameters['window_size'] + pipeline.fit(ts) + assert origin_window_size == pipeline.nodes[-1].parameters['window_size'] + + +@pytest.mark.parametrize('freq', [5, 10, 20]) +def test_lagged_window_size_selector_adequate(freq): + ts = get_timeseries(length=1000) + time = np.linspace(0, 1, ts.features.shape[0]) + ts.features = np.sin(2 * np.pi * freq * time) + + pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() + pipeline.fit(ts) + + window = pipeline.nodes[-1].parameters['window_size'] + expected_window = ts.features.shape[0] / (freq * 2) + + assert expected_window / 2 <= window <= expected_window * 2 diff --git a/test/unit/utilities/window_size_selector.py b/test/unit/utilities/window_size_selector.py new file mode 100644 index 0000000000..350ffd636c --- /dev/null +++ b/test/unit/utilities/window_size_selector.py @@ -0,0 +1,17 @@ +from itertools import combinations + +import numpy as np +import pytest + +from fedot.utilities.window_size_selector import WindowSizeSelector, WindowSizeSelectorMethodsEnum + + +@pytest.mark.parametrize('method', WindowSizeSelectorMethodsEnum) +@pytest.mark.parametrize(['window_min', 'window_max'], + [sorted(x) for x in combinations(map(int, np.random.rand(5) * 100), 2)] + + [(1, 2), (98, 99), (1, 99)]) +def test_window_size_selector(method, window_min, window_max): + selector = WindowSizeSelector(method=method, window_range=(window_min, window_max)) + ts = np.random.rand(1000) + + assert window_min <= selector.apply(time_series=ts) <= window_max From d99a6301e550c522d118311725df0a015b6965b8 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 13:18:38 +0300 Subject: [PATCH 10/23] add test --- fedot/utilities/window_size_selector.py | 4 ++++ test/unit/utilities/window_size_selector.py | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fedot/utilities/window_size_selector.py b/fedot/utilities/window_size_selector.py index f6ef266ff0..c11461c8dc 100644 --- a/fedot/utilities/window_size_selector.py +++ b/fedot/utilities/window_size_selector.py @@ -52,6 +52,10 @@ def __init__(self, if window_range[0] >= window_range[1]: raise ValueError('Upper bound of window range should be bigger than lower bound') + if window_range[0] < 0: + raise ValueError('Lower bound of window range should be bigger or equal to 0') + if window_range[1] > 100: + raise ValueError('Upper bound of window range should be lower or equal to 100') self.dict_methods = {WindowSizeSelectorMethodsEnum.HAC: self.autocorrelation, WindowSizeSelectorMethodsEnum.DFF: self.dominant_fourier_frequency, diff --git a/test/unit/utilities/window_size_selector.py b/test/unit/utilities/window_size_selector.py index 350ffd636c..9dd9c5136c 100644 --- a/test/unit/utilities/window_size_selector.py +++ b/test/unit/utilities/window_size_selector.py @@ -13,5 +13,17 @@ def test_window_size_selector(method, window_min, window_max): selector = WindowSizeSelector(method=method, window_range=(window_min, window_max)) ts = np.random.rand(1000) - + assert window_min <= selector.apply(time_series=ts) <= window_max + + +@pytest.mark.parametrize(['window_min', 'window_max'], + list(combinations(map(int, np.random.rand(10) * 200 - 50), 2)) + + [[-1, 10], [10, 5], [95, 105], [-10, -9], [105, 110]]) +def test_window_size_selector_with_uncorrect_window_params(window_min, window_max): + error = window_min < 0 + error |= window_max > 100 + error |= window_min >= window_max + if error: + with pytest.raises(ValueError): + WindowSizeSelector(window_range=(window_min, window_max)) From 9a0c5ffc61a9d8604a15ac23ef61d68a1545c67f Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 14:47:35 +0300 Subject: [PATCH 11/23] new tests --- .../data_operations/ts_transformations.py | 2 + fedot/utilities/window_size_selector.py | 2 - .../test_time_series_operations.py | 89 ++++++++++++++++++- 3 files changed, 88 insertions(+), 5 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 55ad036f12..e1727c275f 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -122,6 +122,8 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) new = int(selector.apply(time_series) * time_series.shape[0] * 0.01) new = min(max_allowed_window_size, new) + self.log.info((f"Window size of lagged transformation was changed " + f"by WindowSizeSelector from {self.params.get('window_size')} to {new}")) self.params.update(window_size=new) # Maximum threshold diff --git a/fedot/utilities/window_size_selector.py b/fedot/utilities/window_size_selector.py index c11461c8dc..62f291db12 100644 --- a/fedot/utilities/window_size_selector.py +++ b/fedot/utilities/window_size_selector.py @@ -89,8 +89,6 @@ def get_window_size(self, time_series: np.ndarray) -> int: Returns: window_size_selected: value which has been chosen as appropriate window size """ - if time_series.shape[0] == 1: # If time series is a part of multivariate one - time_series = np.array(time_series[0]) self.length_ts = len(time_series) self.window_max = int(round(self.length_ts * self.window_range[1] / 100)) # in real values diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index ef6ce399f5..05c239e6df 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -1,8 +1,13 @@ +import logging + import numpy as np import pytest from fedot.core.data.data_split import train_test_data_setup +from fedot.core.optimisers.objective import MetricsObjective, PipelineObjectiveEvaluate +from fedot.core.optimisers.objective.data_source_splitter import DataSourceSplitter from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from golem.core.log import default_log from fedot.core.data.data import InputData @@ -21,6 +26,26 @@ _FORECAST_LENGTH = 4 +def prepare_logging(): + old_factory = logging.getLogRecordFactory() + records = [] + + def record_factory(*args, **kwargs): + record = old_factory(*args, **kwargs) + records.append(record) + return record + + logging.setLogRecordFactory(record_factory) + return records + +def check_window_size_selector_logging(records): + return [hasattr(record, 'message') and + record.message.startswith('LaggedTransformationImplementation') and + 'WindowSizeSelector' in record.message + for record in records] + + + def synthetic_univariate_ts(): """ Method returns InputData for classical time series forecasting task """ task = Task(TaskTypesEnum.ts_forecasting, @@ -47,10 +72,15 @@ def synthetic_univariate_ts(): def get_timeseries(length=10, features_count=1, - target_count=1, forecast_length=_FORECAST_LENGTH): + target_count=1, forecast_length=_FORECAST_LENGTH, + random=False): task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=forecast_length)) - features = np.arange(0, length * features_count) * 10 + if random: + features = np.random.rand(length, features_count) * 10 + features = features.ravel() if features_count == 1 else features + else: + features = np.arange(0, length * features_count) * 10 if features_count > 1: features = np.reshape(features, (features_count, length)).T for i in range(features_count): @@ -253,7 +283,7 @@ def test_lagged_window_size_selector_tune_window_by_default(): @pytest.mark.parametrize('origin_window_size', [10, 20, 100]) -def test_lagged_window_size_selector_does_not_tune_set_window(origin_window_size): +def test_lagged_window_size_selector_does_not_tune_defined_window(origin_window_size): ts = get_timeseries(length=1000) pipeline = (PipelineBuilder() .add_node('lagged', params={'window_size': origin_window_size}) @@ -263,6 +293,15 @@ def test_lagged_window_size_selector_does_not_tune_set_window(origin_window_size assert origin_window_size == pipeline.nodes[-1].parameters['window_size'] +@pytest.mark.parametrize('window_size', [10, 20, 100]) +def test_lagged_window_size_selector_does_not_tune_manual_defined_window(window_size): + ts = get_timeseries(length=1000) + pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() + pipeline.nodes[-1].parameters = {'window_size': window_size} + pipeline.fit(ts) + assert window_size == pipeline.nodes[-1].parameters['window_size'] + + @pytest.mark.parametrize('freq', [5, 10, 20]) def test_lagged_window_size_selector_adequate(freq): ts = get_timeseries(length=1000) @@ -276,3 +315,47 @@ def test_lagged_window_size_selector_adequate(freq): expected_window = ts.features.shape[0] / (freq * 2) assert expected_window / 2 <= window <= expected_window * 2 + + +@pytest.mark.parametrize('n_jobs', (1, -1)) +def test_evaluation_correctly_work_with_window_size_selector(n_jobs): + ts = get_timeseries(length=1000) + data_splitter = DataSourceSplitter(cv_folds=3) + data_producer = data_splitter.build(ts) + objective = MetricsObjective('rmse', False) + objective_evaluator = PipelineObjectiveEvaluate(objective=objective, + data_producer=data_producer, + validation_blocks=data_splitter.validation_blocks, + eval_n_jobs=n_jobs) + objective_function = objective_evaluator.evaluate + + pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() + + # prepare factory to get all records + records = prepare_logging() + + # run objective function + objective_function(pipeline) + + # check that WindowSizeSelector runs once + assert sum(check_window_size_selector_logging(records)) == 1 + + +def test_tuner_correctly_work_with_window_size_selector(): + ts = get_timeseries(length=1000, random=True) + + autotuned_pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() + autotuned_pipeline.fit(ts) + autotuned_window = autotuned_pipeline.nodes[-1].parameters['window_size'] + + # prepare factory to get all records + records = prepare_logging() + + tuner_tuned_pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() + tuner = TunerBuilder(task=ts.task).with_iterations(10).build(data=ts) + tuned_pipeline = tuner.tune(graph=tuner_tuned_pipeline, show_progress=False) + tuner_tuned_window = tuned_pipeline.nodes[-1].parameters['window_size'] + + assert autotuned_window != tuner_tuned_window + # check that WindowSizeSelector runs twice due to tuner graph copying in initialization + assert sum(check_window_size_selector_logging(records)) == 2 From 96742e5882083c1e5f8aa8c0b826754e18d3349d Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 15:03:51 +0300 Subject: [PATCH 12/23] fix --- test/unit/data_operations/test_time_series_operations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index 05c239e6df..202cea7f22 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -39,9 +39,9 @@ def record_factory(*args, **kwargs): return records def check_window_size_selector_logging(records): - return [hasattr(record, 'message') and - record.message.startswith('LaggedTransformationImplementation') and - 'WindowSizeSelector' in record.message + return [hasattr(record, 'msg') and + 'LaggedTransformationImplementation' in record.msg and + 'WindowSizeSelector' in record.msg for record in records] From 4c593a0021e824bf6024215886ae812ee63c9086 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 15:09:56 +0300 Subject: [PATCH 13/23] fix --- test/unit/data_operations/test_time_series_operations.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index 202cea7f22..67f686ba08 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -39,9 +39,8 @@ def record_factory(*args, **kwargs): return records def check_window_size_selector_logging(records): - return [hasattr(record, 'msg') and - 'LaggedTransformationImplementation' in record.msg and - 'WindowSizeSelector' in record.msg + return ['LaggedTransformationImplementation' in str(record) and + 'WindowSizeSelector' in str(record) for record in records] From 3c2a552ea962bcbd347122ccc42a8b287fdb7db6 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 15:18:06 +0300 Subject: [PATCH 14/23] experiment --- .../data_operations/ts_transformations.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index e1727c275f..91279211a4 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -118,6 +118,9 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt """ max_allowed_window_size = len(time_series) - forecast_length + self.log.info((f"Window size of lagged transformation was changed " + f"by WindowSizeSelector from {self.params.get('window_size')} to {new}")) + if self.window_size == 0: selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) new = int(selector.apply(time_series) * time_series.shape[0] * 0.01) From caf7076a389f5c60b6d70476278e08c9c20bec9c Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 15:18:20 +0300 Subject: [PATCH 15/23] experiment --- .../data_operations/ts_transformations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 91279211a4..a32d350227 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -119,7 +119,7 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt max_allowed_window_size = len(time_series) - forecast_length self.log.info((f"Window size of lagged transformation was changed " - f"by WindowSizeSelector from {self.params.get('window_size')} to {new}")) + f"by WindowSizeSelector from ")) if self.window_size == 0: selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) From 59efb3ba27f3dafa71572e8fd817d90ff83ff74e Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 16:59:29 +0300 Subject: [PATCH 16/23] experiment --- .../data_operations/ts_transformations.py | 3 --- test/unit/data_operations/test_time_series_operations.py | 7 ++++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index a32d350227..e1727c275f 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -118,9 +118,6 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt """ max_allowed_window_size = len(time_series) - forecast_length - self.log.info((f"Window size of lagged transformation was changed " - f"by WindowSizeSelector from ")) - if self.window_size == 0: selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) new = int(selector.apply(time_series) * time_series.shape[0] * 0.01) diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index 67f686ba08..05385c00c6 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -39,9 +39,14 @@ def record_factory(*args, **kwargs): return records def check_window_size_selector_logging(records): - return ['LaggedTransformationImplementation' in str(record) and + print('---------------------------'*20) + print(records) + res = ['LaggedTransformationImplementation' in str(record) and 'WindowSizeSelector' in str(record) for record in records] + print(res) + print('---------------------------' * 20) + return res From afeab258a5ec869289587d0f82731dbf03a3062f Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 17:28:35 +0300 Subject: [PATCH 17/23] experiment --- .../data_operations/ts_transformations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index e1727c275f..32c696b7a6 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -122,8 +122,8 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) new = int(selector.apply(time_series) * time_series.shape[0] * 0.01) new = min(max_allowed_window_size, new) - self.log.info((f"Window size of lagged transformation was changed " - f"by WindowSizeSelector from {self.params.get('window_size')} to {new}")) + self.log.message((f"Window size of lagged transformation was changed " + f"by WindowSizeSelector from {self.params.get('window_size')} to {new}")) self.params.update(window_size=new) # Maximum threshold From adf68f9a705dcaec51c6b299e5a6ea9dfb4c3f83 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 28 Dec 2023 17:35:56 +0300 Subject: [PATCH 18/23] fix test --- test/unit/data_operations/test_time_series_operations.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index 05385c00c6..67f686ba08 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -39,14 +39,9 @@ def record_factory(*args, **kwargs): return records def check_window_size_selector_logging(records): - print('---------------------------'*20) - print(records) - res = ['LaggedTransformationImplementation' in str(record) and + return ['LaggedTransformationImplementation' in str(record) and 'WindowSizeSelector' in str(record) for record in records] - print(res) - print('---------------------------' * 20) - return res From f582def0f2c6296c69a1c659d674173b27324f8f Mon Sep 17 00:00:00 2001 From: Sergey Date: Thu, 4 Jan 2024 10:43:38 +0300 Subject: [PATCH 19/23] Update fedot/core/composer/gp_composer/specific_operators.py --- fedot/core/composer/gp_composer/specific_operators.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fedot/core/composer/gp_composer/specific_operators.py b/fedot/core/composer/gp_composer/specific_operators.py index 4d122e68e3..ed8e11ba7c 100644 --- a/fedot/core/composer/gp_composer/specific_operators.py +++ b/fedot/core/composer/gp_composer/specific_operators.py @@ -19,8 +19,9 @@ def parameter_change_mutation(pipeline: Pipeline, requirements, graph_gen_params node_mutation_probability = get_mutation_prob(mut_id=parameters.mutation_strength, node=pipeline.root_node) for node in pipeline.nodes: - if (node.operation.metadata.id not in ('lagged', 'sparse_lagged', 'exog_ts') and - random() < node_mutation_probability): + lagged = node.operation.metadata.id in ('lagged', 'sparse_lagged', 'exog_ts') + do_mutation = random() < (node_mutation_probability * (0.5 if lagged else 1)) + if do_mutation: operation_name = node.operation.operation_type current_params = node.parameters From 195b06f63cd07414f96721b723544e821809ab61 Mon Sep 17 00:00:00 2001 From: Sergey Date: Thu, 4 Jan 2024 11:14:33 +0300 Subject: [PATCH 20/23] Apply suggestions to lagged implementation from code review --- .../data_operations/ts_transformations.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 32c696b7a6..93a9d7837c 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -116,7 +116,7 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt Returns: """ - max_allowed_window_size = len(time_series) - forecast_length + max_allowed_window_size = max(1, len(time_series) - forecast_length - 1) if self.window_size == 0: selector = WindowSizeSelector(method=WindowSizeSelectorMethodsEnum.HAC, window_range=(5, 60)) @@ -128,7 +128,11 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt # Maximum threshold if self.window_size > max_allowed_window_size: - raise ValueError(f"Window size is to high ({self.window_size}) for provided data len {len(time_series)}") + new = int(np.random.rand() * max_allowed_window_size) + new = min(new, max_allowed_window_size) + new = max(new, self.window_size_minimum) + self.log.message((f"Window size of lagged transformation was changed from {self.params.get('window_size')} to {new}")) + self.params.update(window_size=new) # Minimum threshold if self.window_size < self.window_size_minimum: From 70d30569b754636a3dba03ea8d0a639b57d5975a Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 4 Jan 2024 11:32:44 +0300 Subject: [PATCH 21/23] FIx test --- .../data_operations/ts_transformations.py | 5 +++-- test/unit/data_operations/test_data_operation_params.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 93a9d7837c..cd58027af7 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -1,4 +1,5 @@ from copy import copy, deepcopy +from random import random from typing import Optional, Union import numpy as np @@ -128,10 +129,10 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt # Maximum threshold if self.window_size > max_allowed_window_size: - new = int(np.random.rand() * max_allowed_window_size) + new = int(random() * max_allowed_window_size) new = min(new, max_allowed_window_size) new = max(new, self.window_size_minimum) - self.log.message((f"Window size of lagged transformation was changed from {self.params.get('window_size')} to {new}")) + self.log.info((f"Window size of lagged transformation was changed from {self.params.get('window_size')} to {new}")) self.params.update(window_size=new) # Minimum threshold diff --git a/test/unit/data_operations/test_data_operation_params.py b/test/unit/data_operations/test_data_operation_params.py index 082037d743..c18774e510 100644 --- a/test/unit/data_operations/test_data_operation_params.py +++ b/test/unit/data_operations/test_data_operation_params.py @@ -54,8 +54,9 @@ def test_lagged_with_invalid_params_fit_correctly(): pipeline = get_ts_pipeline(window_size) # Fit it - with pytest.raises(ValueError): - pipeline.fit(ts_input) + pipeline.fit(ts_input) + assert 1 <= pipeline.nodes[-1].parameters.window_size <= len(time_series) - len_forecast + def test_ransac_with_invalid_params_fit_correctly(): From d9e414b3491f863ab597a489835a6937b1af21c5 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 4 Jan 2024 11:40:46 +0300 Subject: [PATCH 22/23] pep8 --- .../data_operations/ts_transformations.py | 3 ++- fedot/utilities/window_size_selector.py | 1 - test/unit/data_operations/test_data_operation_params.py | 2 -- test/unit/data_operations/test_time_series_operations.py | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index cd58027af7..7222872be4 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -132,7 +132,8 @@ def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_lengt new = int(random() * max_allowed_window_size) new = min(new, max_allowed_window_size) new = max(new, self.window_size_minimum) - self.log.info((f"Window size of lagged transformation was changed from {self.params.get('window_size')} to {new}")) + self.log.info(("Window size of lagged transformation was changed " + f"from {self.params.get('window_size')} to {new}")) self.params.update(window_size=new) # Minimum threshold diff --git a/fedot/utilities/window_size_selector.py b/fedot/utilities/window_size_selector.py index 62f291db12..f60302c30c 100644 --- a/fedot/utilities/window_size_selector.py +++ b/fedot/utilities/window_size_selector.py @@ -1,6 +1,5 @@ import math from enum import Enum, auto -from typing import Union import numpy as np import pandas as pd diff --git a/test/unit/data_operations/test_data_operation_params.py b/test/unit/data_operations/test_data_operation_params.py index c18774e510..dc1d86e0fa 100644 --- a/test/unit/data_operations/test_data_operation_params.py +++ b/test/unit/data_operations/test_data_operation_params.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd -import pytest from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup @@ -56,7 +55,6 @@ def test_lagged_with_invalid_params_fit_correctly(): # Fit it pipeline.fit(ts_input) assert 1 <= pipeline.nodes[-1].parameters.window_size <= len(time_series) - len_forecast - def test_ransac_with_invalid_params_fit_correctly(): diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index 67f686ba08..39ad5e7851 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -38,13 +38,13 @@ def record_factory(*args, **kwargs): logging.setLogRecordFactory(record_factory) return records + def check_window_size_selector_logging(records): return ['LaggedTransformationImplementation' in str(record) and 'WindowSizeSelector' in str(record) for record in records] - def synthetic_univariate_ts(): """ Method returns InputData for classical time series forecasting task """ task = Task(TaskTypesEnum.ts_forecasting, From fe4d61a7e12aeec98940ec50fdd0974c23fe7083 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Thu, 4 Jan 2024 11:41:48 +0300 Subject: [PATCH 23/23] Fix test --- test/unit/data_operations/test_data_operation_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/data_operations/test_data_operation_params.py b/test/unit/data_operations/test_data_operation_params.py index dc1d86e0fa..303b2741c5 100644 --- a/test/unit/data_operations/test_data_operation_params.py +++ b/test/unit/data_operations/test_data_operation_params.py @@ -54,7 +54,7 @@ def test_lagged_with_invalid_params_fit_correctly(): # Fit it pipeline.fit(ts_input) - assert 1 <= pipeline.nodes[-1].parameters.window_size <= len(time_series) - len_forecast + assert 1 <= pipeline.nodes[-1].parameters['window_size'] <= len(time_series) - len_forecast def test_ransac_with_invalid_params_fit_correctly():