From 0feea5d016ef03bfde822e6f6816a19d38f1d884 Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Wed, 29 Nov 2023 21:55:33 +0300 Subject: [PATCH 01/14] added preparatory functions for fast_train check --- test/integration/models/test_model.py | 75 ++++++++++++++++++++------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 55af186a14..10927b427e 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -1,7 +1,10 @@ import pickle +import matplotlib.pyplot as plt + from collections import defaultdict from copy import deepcopy from time import perf_counter +from typing import Tuple, Optional import numpy as np import pytest @@ -10,6 +13,7 @@ from sklearn.metrics import mean_absolute_error, mean_squared_error, roc_auc_score as roc_auc from sklearn.preprocessing import MinMaxScaler +from fedot.core.constants import FAST_TRAIN_PRESET_NAME from fedot.core.data.data import InputData, OutputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData @@ -26,7 +30,7 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.dataset_types import DataTypesEnum -from fedot.core.repository.operation_types_repository import OperationTypesRepository +from fedot.core.repository.operation_types_repository import OperationMetaInfo, OperationTypesRepository from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams from test.unit.common_tests import is_predict_ignores_target from test.unit.data_operations.test_time_series_operations import synthetic_univariate_ts @@ -475,31 +479,62 @@ def test_operations_are_serializable(): def test_operations_are_fast(): + """ + Test ensures that all operations with fast_train preset meet sustainability expectation. + Test defines operation complexity as polynomial function of data size. + If complexity function grows fast, then operation should not have fast_train tag. + """ # models that raise exception to_skip = ['custom', 'decompose', 'class_decompose'] - time_limits = defaultdict(lambda *args: 0.5, {'expensive': 2, 'non-default': 100}) + data_lengths = tuple(map(int, np.logspace(2, 4, 6))) for operation in OperationTypesRepository('all')._repo: if operation.id in to_skip: continue - time_limit = [time_limits[tag] for tag in time_limits if tag in operation.tags] - time_limit = max(time_limit) if time_limit else time_limits.default_factory() - for task_type in operation.task_type: - for data_type in operation.input_types: + if operation.presets is not None and FAST_TRAIN_PRESET_NAME in operation.presets: + perfomance_values = get_operation_perfomance(operation) + # TODO: filter out operations by perfomance values + if perfomance_values is not None: + plot_operation_perfomance(data_lengths[1:], perfomance_values[1:]) + assert True + + +def get_operation_perfomance(operation: OperationMetaInfo) -> Optional[Tuple[float]]: + """ + Helper function to check perfomance of only the first valid operation pair (task_type, input_type). + """ + data_lengths = tuple(map(int, np.logspace(2, 4, 6))) + + for task_type in operation.task_type: + for data_type in operation.input_types: + perfomance_values = [] + for length in data_lengths: data = get_data_for_testing(task_type, data_type, - length=100, features_count=2, + length=length, features_count=2, random=True) if data is not None: - try: - nodes_from = [] - if task_type is TaskTypesEnum.ts_forecasting: - if 'non_lagged' not in operation.tags: - nodes_from = [PipelineNode('lagged')] - node = PipelineNode(operation.id, nodes_from=nodes_from) - pipeline = Pipeline(node) - start_time = perf_counter() - pipeline.fit(data) - stop_time = perf_counter() - start_time - assert stop_time <= time_limit or True - except NotImplementedError: - pass + nodes_from = [] + if task_type is TaskTypesEnum.ts_forecasting: + if 'non_lagged' not in operation.tags: + nodes_from = [PipelineNode('lagged')] + node = PipelineNode(operation.id, nodes_from=nodes_from) + pipeline = Pipeline(node) + start_time = perf_counter() + pipeline.fit(data) + stop_time = perf_counter() - start_time + perfomance_values.append(stop_time) + if perfomance_values and len(perfomance_values) == len(data_lengths): + return tuple(perfomance_values) + + +def plot_operation_perfomance(data_lengths: Tuple[float], perfomance_values: Tuple[float]) -> None: + """ + Temporary function for plotting perfomance values and their approximate function. + """ + coefficients = np.polyfit(data_lengths, perfomance_values, 2) + approx_data_lengths = np.linspace(data_lengths[0], data_lengths[-1], 1000) + approx_perfomance_values = np.poly1d(coefficients)(approx_data_lengths) + with plt.ion(): + plt.scatter(data_lengths, perfomance_values, label='Perfomance values') + plt.plot(approx_data_lengths, approx_perfomance_values, label='Approximation') + plt.pause(0.1) From f96b1458c7fcbc6cf5540f3b329730e6cc1f71f2 Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Fri, 1 Dec 2023 16:10:30 +0300 Subject: [PATCH 02/14] Remove fast_train preset and non-default tag from ransac_non_lin_reg --- fedot/core/repository/data/data_operation_repository.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index 27dc9ef572..028a738985 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -219,7 +219,7 @@ }, "ransac_non_lin_reg": { "meta": "regression_preprocessing", - "presets": ["fast_train", "*tree"], + "presets": ["*tree"], "tags": ["affects_target", "non_linear", "filtering", "correct_params", "non_applicable_for_ts", "non-default"] }, From 8389c08e1fb8669d65d29454753fc98fbf4672df Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Fri, 1 Dec 2023 16:17:45 +0300 Subject: [PATCH 03/14] Remove non-default tag from ransac_non_lin_reg --- fedot/core/repository/data/data_operation_repository.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index 028a738985..aa3087769f 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -221,7 +221,7 @@ "meta": "regression_preprocessing", "presets": ["*tree"], "tags": ["affects_target", "non_linear", "filtering", - "correct_params", "non_applicable_for_ts", "non-default"] + "correct_params", "non_applicable_for_ts"] }, "isolation_forest_reg": { "meta": "regression_preprocessing", From a02ba2a14963af22a5b58948567744c77024e489 Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Fri, 1 Dec 2023 19:23:40 +0300 Subject: [PATCH 04/14] Remove fast_train tag from diff_filter --- fedot/core/repository/data/data_operation_repository.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index aa3087769f..32ae7884c3 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -293,7 +293,7 @@ }, "diff_filter": { "meta": "custom_time_series_transformation", - "presets": ["fast_train", "ts"], + "presets": ["ts"], "tags": [ "differential", "non_lagged", From 538f3ea66827a4a8421cf6db60c957e8808263ab Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Fri, 1 Dec 2023 20:45:54 +0300 Subject: [PATCH 05/14] Form a test for checking fast_train preset --- test/integration/models/test_model.py | 47 ++++++++++++--------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 10927b427e..9356143c56 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -1,7 +1,6 @@ import pickle import matplotlib.pyplot as plt -from collections import defaultdict from copy import deepcopy from time import perf_counter from typing import Tuple, Optional @@ -487,24 +486,33 @@ def test_operations_are_fast(): # models that raise exception to_skip = ['custom', 'decompose', 'class_decompose'] data_lengths = tuple(map(int, np.logspace(2, 4, 6))) + reference_operations = ['rf', 'rfr'] + reference_max_time, reference_second_max_time = float('inf'), float('inf') for operation in OperationTypesRepository('all')._repo: - if operation.id in to_skip: - continue - if operation.presets is not None and FAST_TRAIN_PRESET_NAME in operation.presets: - perfomance_values = get_operation_perfomance(operation) - # TODO: filter out operations by perfomance values + if operation.id in reference_operations: + perfomance_values = get_operation_perfomance(operation, data_lengths) if perfomance_values is not None: - plot_operation_perfomance(data_lengths[1:], perfomance_values[1:]) - assert True - - -def get_operation_perfomance(operation: OperationMetaInfo) -> Optional[Tuple[float]]: + reference_max_time = min(reference_max_time, perfomance_values[-1]) + reference_second_max_time = min(reference_second_max_time, perfomance_values[-2]) + + if reference_max_time < float('inf'): + for operation in OperationTypesRepository('all')._repo: + if operation.id in to_skip or operation.id in reference_operations: + continue + if operation.presets is not None and FAST_TRAIN_PRESET_NAME in operation.presets: + perfomance_values = get_operation_perfomance(operation, data_lengths) + if perfomance_values is not None: + max_time = perfomance_values[-1] + second_max_time = perfomance_values[-2] + assert max_time <= reference_max_time and second_max_time <= reference_second_max_time, \ + f'operation {operation.id} should not have fast_train preset' + + +def get_operation_perfomance(operation: OperationMetaInfo, data_lengths: Tuple[float, ...]) -> Optional[Tuple[float, ...]]: """ Helper function to check perfomance of only the first valid operation pair (task_type, input_type). """ - data_lengths = tuple(map(int, np.logspace(2, 4, 6))) - for task_type in operation.task_type: for data_type in operation.input_types: perfomance_values = [] @@ -525,16 +533,3 @@ def get_operation_perfomance(operation: OperationMetaInfo) -> Optional[Tuple[flo perfomance_values.append(stop_time) if perfomance_values and len(perfomance_values) == len(data_lengths): return tuple(perfomance_values) - - -def plot_operation_perfomance(data_lengths: Tuple[float], perfomance_values: Tuple[float]) -> None: - """ - Temporary function for plotting perfomance values and their approximate function. - """ - coefficients = np.polyfit(data_lengths, perfomance_values, 2) - approx_data_lengths = np.linspace(data_lengths[0], data_lengths[-1], 1000) - approx_perfomance_values = np.poly1d(coefficients)(approx_data_lengths) - with plt.ion(): - plt.scatter(data_lengths, perfomance_values, label='Perfomance values') - plt.plot(approx_data_lengths, approx_perfomance_values, label='Approximation') - plt.pause(0.1) From c9b9b893de5a9b9d2240ae7ee0282acb188d49f4 Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Mon, 4 Dec 2023 11:27:26 +0300 Subject: [PATCH 06/14] Add secondary perfomance check for operations with fast_train preset --- test/integration/models/test_model.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 9356143c56..1db764fc10 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -1,5 +1,4 @@ import pickle -import matplotlib.pyplot as plt from copy import deepcopy from time import perf_counter @@ -483,7 +482,8 @@ def test_operations_are_fast(): Test defines operation complexity as polynomial function of data size. If complexity function grows fast, then operation should not have fast_train tag. """ - # models that raise exception + # models that fail fast_train check + fail_operations = [] to_skip = ['custom', 'decompose', 'class_decompose'] data_lengths = tuple(map(int, np.logspace(2, 4, 6))) reference_operations = ['rf', 'rfr'] @@ -505,11 +505,22 @@ def test_operations_are_fast(): if perfomance_values is not None: max_time = perfomance_values[-1] second_max_time = perfomance_values[-2] - assert max_time <= reference_max_time and second_max_time <= reference_second_max_time, \ - f'operation {operation.id} should not have fast_train preset' + if max_time > reference_max_time or second_max_time > reference_second_max_time: + fail_operations.append(operation) + + for operation in fail_operations: + perfomance_values = get_operation_perfomance(operation, data_lengths) + max_time = perfomance_values[-1] + second_max_time = perfomance_values[-2] + if max_time <= reference_max_time and second_max_time <= reference_second_max_time: + fail_operations.remove(operation) + + assert len(fail_operations) == 0, \ + f'operations {[op for op in fail_operations]} should not have fast_train preset' -def get_operation_perfomance(operation: OperationMetaInfo, data_lengths: Tuple[float, ...]) -> Optional[Tuple[float, ...]]: +def get_operation_perfomance(operation: OperationMetaInfo, + data_lengths: Tuple[float, ...]) -> Optional[Tuple[float, ...]]: """ Helper function to check perfomance of only the first valid operation pair (task_type, input_type). """ From c29027cdda55b3110163e1448a23b332656020ff Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Mon, 4 Dec 2023 12:04:53 +0300 Subject: [PATCH 07/14] Fix secondary fast_train check list copy --- test/integration/models/test_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 1db764fc10..a36f9c1d80 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -508,7 +508,7 @@ def test_operations_are_fast(): if max_time > reference_max_time or second_max_time > reference_second_max_time: fail_operations.append(operation) - for operation in fail_operations: + for operation in fail_operations.copy(): perfomance_values = get_operation_perfomance(operation, data_lengths) max_time = perfomance_values[-1] second_max_time = perfomance_values[-2] @@ -516,7 +516,7 @@ def test_operations_are_fast(): fail_operations.remove(operation) assert len(fail_operations) == 0, \ - f'operations {[op for op in fail_operations]} should not have fast_train preset' + f'operations {[operation.id for operation in fail_operations]} should not have fast_train preset' def get_operation_perfomance(operation: OperationMetaInfo, From e702f6cb6a35315b99abda31e6d19dbb61a90300 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 4 Dec 2023 12:49:01 +0300 Subject: [PATCH 08/14] speedup naive average --- .../models/ts_implementations/naive.py | 55 ++++--------------- 1 file changed, 11 insertions(+), 44 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py index 7b6ae81e2e..8a547b4e8b 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py @@ -104,9 +104,9 @@ def predict(self, input_data: InputData) -> OutputData: """ Get desired part of time series for averaging and calculate mean value """ forecast_length = input_data.task.task_params.forecast_length - elements_to_take = self._how_many_elements_use_for_averaging(input_data.features) + window = self._window(input_data.features) # Prepare single forecast - mean_value = np.nanmean(input_data.features[-elements_to_take:]) + mean_value = np.nanmean(input_data.features[-window:]) forecast = np.array([mean_value] * forecast_length).reshape((1, -1)) output_data = self._convert_to_output(input_data, @@ -117,9 +117,13 @@ def predict(self, input_data: InputData) -> OutputData: def predict_for_fit(self, input_data: InputData) -> OutputData: input_data = copy(input_data) forecast_length = input_data.task.task_params.forecast_length - parts = split_rolling_slices(input_data) - mean_values_for_chunks = self.average_by_axis(parts) - forecast = np.repeat(mean_values_for_chunks.reshape((-1, 1)), forecast_length, axis=1) + features = input_data.features + shape = features.shape[0] + + window = self._window(features) + mean_values = np.array([np.mean(features[-window-shape+i:i+1]) for i in range(shape)]) + + forecast = np.repeat(mean_values.reshape((-1, 1)), forecast_length, axis=1) # Update target new_idx, transformed_target = ts_to_table(idx=input_data.idx, time_series=input_data.target, @@ -133,42 +137,5 @@ def predict_for_fit(self, input_data: InputData) -> OutputData: data_type=DataTypesEnum.table) return output_data - def average_by_axis(self, parts: np.array): - """ Perform averaging for each column using last part of it """ - mean_values_for_chunks = np.apply_along_axis(self._average, 1, parts) - return mean_values_for_chunks - - def _average(self, row: np.array): - row = row[np.logical_not(np.isnan(row))] - if len(row) == 1: - return row - - elements_to_take = self._how_many_elements_use_for_averaging(row) - return np.mean(row[-elements_to_take:]) - - def _how_many_elements_use_for_averaging(self, time_series: np.array): - elements_to_take = round(len(time_series) * self.part_for_averaging) - elements_to_take = fix_elements_number(elements_to_take) - return elements_to_take - - -def split_rolling_slices(input_data: InputData): - """ Prepare slices for features series. - Example of result for time series [0, 1, 2, 3]: - [[0, nan, nan, nan], - [0, 1, nan, nan], - [0, 1, 2, nan], - [0, 1, 2, 3]] - """ - nan_mask = np.triu(np.ones_like(input_data.features, dtype=bool), k=1) - final_matrix = np.tril(input_data.features, k=0) - final_matrix = np.array(final_matrix, dtype=float) - final_matrix[nan_mask] = np.nan - - return final_matrix - - -def fix_elements_number(elements_to_take: int): - if elements_to_take < 2: - return 2 - return elements_to_take + def _window(self, time_series: np.ndarray): + return max(2, round(time_series.shape[0] * self.part_for_averaging)) From aea107acd4ca2e213678d3b2c54a42ae35292ed9 Mon Sep 17 00:00:00 2001 From: Sergey Kasyanov Date: Mon, 4 Dec 2023 14:36:17 +0300 Subject: [PATCH 09/14] update test --- test/integration/models/test_model.py | 83 +++++++++++++-------------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index a36f9c1d80..afacb72953 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -482,48 +482,50 @@ def test_operations_are_fast(): Test defines operation complexity as polynomial function of data size. If complexity function grows fast, then operation should not have fast_train tag. """ - # models that fail fast_train check - fail_operations = [] - to_skip = ['custom', 'decompose', 'class_decompose'] - data_lengths = tuple(map(int, np.logspace(2, 4, 6))) + + data_lengths = tuple(map(int, np.logspace(2.2, 4, 6))) reference_operations = ['rf', 'rfr'] - reference_max_time, reference_second_max_time = float('inf'), float('inf') + to_skip = ['custom', 'decompose', 'class_decompose', 'kmeans'] + reference_operations + reference_time = (float('inf'), ) * len(data_lengths) + # tries for time measuring + attempt = 2 for operation in OperationTypesRepository('all')._repo: if operation.id in reference_operations: - perfomance_values = get_operation_perfomance(operation, data_lengths) - if perfomance_values is not None: - reference_max_time = min(reference_max_time, perfomance_values[-1]) - reference_second_max_time = min(reference_second_max_time, perfomance_values[-2]) - - if reference_max_time < float('inf'): - for operation in OperationTypesRepository('all')._repo: - if operation.id in to_skip or operation.id in reference_operations: - continue - if operation.presets is not None and FAST_TRAIN_PRESET_NAME in operation.presets: - perfomance_values = get_operation_perfomance(operation, data_lengths) - if perfomance_values is not None: - max_time = perfomance_values[-1] - second_max_time = perfomance_values[-2] - if max_time > reference_max_time or second_max_time > reference_second_max_time: - fail_operations.append(operation) + perfomance_values = get_operation_perfomance(operation, data_lengths, attempt) + reference_time = tuple(map(min, zip(perfomance_values, reference_time))) - for operation in fail_operations.copy(): - perfomance_values = get_operation_perfomance(operation, data_lengths) - max_time = perfomance_values[-1] - second_max_time = perfomance_values[-2] - if max_time <= reference_max_time and second_max_time <= reference_second_max_time: - fail_operations.remove(operation) - - assert len(fail_operations) == 0, \ - f'operations {[operation.id for operation in fail_operations]} should not have fast_train preset' + for operation in OperationTypesRepository('all')._repo: + if (operation.id not in to_skip + and operation.presets + and FAST_TRAIN_PRESET_NAME in operation.presets): + for _ in range(attempt): + perfomance_values = get_operation_perfomance(operation, data_lengths) + # if attempt is successful then stop + if all(x >= y for x, y in zip(reference_time, perfomance_values)): + break + else: + raise Exception(f"Operation {operation.id} cannot have ``fast-train`` tag") def get_operation_perfomance(operation: OperationMetaInfo, - data_lengths: Tuple[float, ...]) -> Optional[Tuple[float, ...]]: + data_lengths: Tuple[float, ...], + times: int = 1) -> Optional[Tuple[float, ...]]: """ Helper function to check perfomance of only the first valid operation pair (task_type, input_type). """ + def fit_time_for_operation(operation: OperationMetaInfo, + data: InputData): + nodes_from = [] + if task_type is TaskTypesEnum.ts_forecasting: + if 'non_lagged' not in operation.tags: + nodes_from = [PipelineNode('lagged')] + node = PipelineNode(operation.id, nodes_from=nodes_from) + pipeline = Pipeline(node) + start_time = perf_counter() + pipeline.fit(data) + return perf_counter() - start_time + for task_type in operation.task_type: for data_type in operation.input_types: perfomance_values = [] @@ -532,15 +534,10 @@ def get_operation_perfomance(operation: OperationMetaInfo, length=length, features_count=2, random=True) if data is not None: - nodes_from = [] - if task_type is TaskTypesEnum.ts_forecasting: - if 'non_lagged' not in operation.tags: - nodes_from = [PipelineNode('lagged')] - node = PipelineNode(operation.id, nodes_from=nodes_from) - pipeline = Pipeline(node) - start_time = perf_counter() - pipeline.fit(data) - stop_time = perf_counter() - start_time - perfomance_values.append(stop_time) - if perfomance_values and len(perfomance_values) == len(data_lengths): - return tuple(perfomance_values) + min_evaluated_time = min(fit_time_for_operation(operation, data) for _ in range(times)) + perfomance_values.append(min_evaluated_time) + if perfomance_values: + if len(perfomance_values) != len(data_lengths): + raise ValueError('not all measurements have been proceeded') + return tuple(perfomance_values) + raise Exception(f"Fit time for operation ``{operation.id}`` cannot be measured") From a68b3144c31e3b3b91af726fcff46f8cc880ac62 Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Mon, 4 Dec 2023 15:04:16 +0300 Subject: [PATCH 10/14] Move perfomance evaluating function above the tests --- test/integration/models/test_model.py | 70 +++++++++++++-------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index afacb72953..272e231997 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -137,6 +137,41 @@ def get_pca_incorrect_data(): return input_data +def get_operation_perfomance(operation: OperationMetaInfo, + data_lengths: Tuple[float, ...], + times: int = 1) -> Optional[Tuple[float, ...]]: + """ + Helper function to check perfomance of only the first valid operation pair (task_type, input_type). + """ + def fit_time_for_operation(operation: OperationMetaInfo, + data: InputData): + nodes_from = [] + if task_type is TaskTypesEnum.ts_forecasting: + if 'non_lagged' not in operation.tags: + nodes_from = [PipelineNode('lagged')] + node = PipelineNode(operation.id, nodes_from=nodes_from) + pipeline = Pipeline(node) + start_time = perf_counter() + pipeline.fit(data) + return perf_counter() - start_time + + for task_type in operation.task_type: + for data_type in operation.input_types: + perfomance_values = [] + for length in data_lengths: + data = get_data_for_testing(task_type, data_type, + length=length, features_count=2, + random=True) + if data is not None: + min_evaluated_time = min(fit_time_for_operation(operation, data) for _ in range(times)) + perfomance_values.append(min_evaluated_time) + if perfomance_values: + if len(perfomance_values) != len(data_lengths): + raise ValueError('not all measurements have been proceeded') + return tuple(perfomance_values) + raise Exception(f"Fit time for operation ``{operation.id}`` cannot be measured") + + @pytest.fixture() def classification_dataset(): samples = 1000 @@ -506,38 +541,3 @@ def test_operations_are_fast(): break else: raise Exception(f"Operation {operation.id} cannot have ``fast-train`` tag") - - -def get_operation_perfomance(operation: OperationMetaInfo, - data_lengths: Tuple[float, ...], - times: int = 1) -> Optional[Tuple[float, ...]]: - """ - Helper function to check perfomance of only the first valid operation pair (task_type, input_type). - """ - def fit_time_for_operation(operation: OperationMetaInfo, - data: InputData): - nodes_from = [] - if task_type is TaskTypesEnum.ts_forecasting: - if 'non_lagged' not in operation.tags: - nodes_from = [PipelineNode('lagged')] - node = PipelineNode(operation.id, nodes_from=nodes_from) - pipeline = Pipeline(node) - start_time = perf_counter() - pipeline.fit(data) - return perf_counter() - start_time - - for task_type in operation.task_type: - for data_type in operation.input_types: - perfomance_values = [] - for length in data_lengths: - data = get_data_for_testing(task_type, data_type, - length=length, features_count=2, - random=True) - if data is not None: - min_evaluated_time = min(fit_time_for_operation(operation, data) for _ in range(times)) - perfomance_values.append(min_evaluated_time) - if perfomance_values: - if len(perfomance_values) != len(data_lengths): - raise ValueError('not all measurements have been proceeded') - return tuple(perfomance_values) - raise Exception(f"Fit time for operation ``{operation.id}`` cannot be measured") From 2ace0a11b4102377d780e3518f2dc1dfeafc58a2 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 12 Dec 2023 14:19:01 +0300 Subject: [PATCH 11/14] add `resample` and `one_hot_encoding` to skip list.py --- test/integration/models/test_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 272e231997..f745108493 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -520,7 +520,8 @@ def test_operations_are_fast(): data_lengths = tuple(map(int, np.logspace(2.2, 4, 6))) reference_operations = ['rf', 'rfr'] - to_skip = ['custom', 'decompose', 'class_decompose', 'kmeans'] + reference_operations + to_skip = ['custom', 'decompose', 'class_decompose', 'kmeans', + 'resample', 'one_hot_encoding'] + reference_operations reference_time = (float('inf'), ) * len(data_lengths) # tries for time measuring attempt = 2 From 7968b09dd7ef4862b84c43878daa8c07d624f083 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 12 Dec 2023 14:37:03 +0300 Subject: [PATCH 12/14] pep8 --- .../operation_implementations/models/ts_implementations/naive.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py index 8a547b4e8b..f3600b094d 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/naive.py @@ -1,5 +1,4 @@ from copy import copy -from typing import Optional import numpy as np From e02e7d8c8e090023dd7dd9bacc73cb4807a27648 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 12 Dec 2023 14:38:02 +0300 Subject: [PATCH 13/14] pep8 --- test/integration/models/test_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index f745108493..b8ad954837 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -532,9 +532,9 @@ def test_operations_are_fast(): reference_time = tuple(map(min, zip(perfomance_values, reference_time))) for operation in OperationTypesRepository('all')._repo: - if (operation.id not in to_skip - and operation.presets - and FAST_TRAIN_PRESET_NAME in operation.presets): + if (operation.id not in to_skip and + operation.presets and + FAST_TRAIN_PRESET_NAME in operation.presets): for _ in range(attempt): perfomance_values = get_operation_perfomance(operation, data_lengths) # if attempt is successful then stop From 90f45cfbde923b99765fb535fa195ef6a817b5bc Mon Sep 17 00:00:00 2001 From: Lopa10ko Date: Tue, 12 Dec 2023 15:41:43 +0300 Subject: [PATCH 14/14] pep8 --- test/integration/models/test_model.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index b8ad954837..4719f9efa5 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -532,9 +532,7 @@ def test_operations_are_fast(): reference_time = tuple(map(min, zip(perfomance_values, reference_time))) for operation in OperationTypesRepository('all')._repo: - if (operation.id not in to_skip and - operation.presets and - FAST_TRAIN_PRESET_NAME in operation.presets): + if (operation.id not in to_skip and operation.presets and FAST_TRAIN_PRESET_NAME in operation.presets): for _ in range(attempt): perfomance_values = get_operation_perfomance(operation, data_lengths) # if attempt is successful then stop