Strict exceptions processing in tests (#856)

* Add more strict exceptions processing in tests * Random seed fixed for fitting * Debug procedures added
aimclub · Sep 8, 2022 · 6487966 · 6487966
1 parent 4fd5516
commit 6487966
Show file tree

Hide file tree

Showing 16 changed files with 163 additions and 47 deletions.
diff --git a/examples/simple/classification/api_classification.py b/examples/simple/classification/api_classification.py
@@ -8,14 +8,14 @@ def run_classification_example(timeout: float = None):
     train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
     test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'
 
-    baseline_model = Fedot(problem=problem, timeout=timeout)
+    baseline_model = Fedot(problem=problem, timeout=timeout, seed=42)
     baseline_model.fit(features=train_data_path, target='target', predefined_model='rf')
 
     baseline_model.predict(features=test_data_path)
     print(baseline_model.get_metrics())
 
-    auto_model = Fedot(problem=problem, seed=42, timeout=timeout, n_jobs=-1,
-                       max_pipeline_fit_time=1, metric='roc_auc')
+    auto_model = Fedot(problem=problem, seed=42, timeout=timeout, n_jobs=-1, preset='best_quality',
+                       max_pipeline_fit_time=5, metric='roc_auc')
     auto_model.fit(features=train_data_path, target='target')
     prediction = auto_model.predict_proba(features=test_data_path)
     print(auto_model.get_metrics())
@@ -24,4 +24,4 @@ def run_classification_example(timeout: float = None):
 
 
 if __name__ == '__main__':
-    run_classification_example(timeout=2)
+    run_classification_example(timeout=10.0)
diff --git a/fedot/api/api_utils/params.py b/fedot/api/api_utils/params.py
@@ -9,6 +9,7 @@
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.log import default_log, Log
 from fedot.core.repository.tasks import Task, TaskParams, TaskTypesEnum, TsForecastingParams
+from fedot.core.utilities.random import RandomStateHandler
 
 
 class ApiParams:
@@ -93,9 +94,12 @@ def _parse_input_params(self, input_params: Dict[str, Any]):
         self.api_params.update(evo_params)
         if 'preset' not in input_params['composer_tuner_params']:
             self.api_params['preset'] = 'auto'
-        if input_params['seed'] is not None:
-            np.random.seed(input_params['seed'])
-            random.seed(input_params['seed'])
+
+        specified_seed = input_params['seed']
+        if specified_seed is not None:
+            np.random.seed(specified_seed)
+            random.seed(specified_seed)
+            RandomStateHandler.MODEL_FITTING_SEED = specified_seed
 
         if self.api_params['problem'] == 'ts_forecasting' and input_params['task_params'] is None:
             self.log.warning(f'The value of the forecast depth was set to {DEFAULT_FORECAST_LENGTH}.')
@@ -120,12 +124,12 @@ def get_default_evo_params(problem: str):
                   'preset': AUTO_PRESET_NAME,
                   'genetic_scheme': None,
                   'history_folder': None,
-                  'stopping_after_n_generation': 10,
+                  'stopping_after_n_generation': 30,
                   'use_pipelines_cache': True,
                   'use_preprocessing_cache': True}
 
         if problem in ['classification', 'regression']:
-            params['cv_folds'] = 3
+            params['cv_folds'] = 5
         elif problem == 'ts_forecasting':
             params['cv_folds'] = 3
             params['validation_blocks'] = 2

diff --git a/fedot/core/caching/pipelines_cache.py b/fedot/core/caching/pipelines_cache.py
@@ -4,6 +4,7 @@
 from fedot.core.caching.pipelines_cache_db import OperationsCacheDB
 from fedot.core.pipelines.node import Node
 from fedot.core.utilities.data_structures import ensure_wrapped_in_sequence
+from fedot.utilities.debug import is_test_session
 
 if TYPE_CHECKING:
     from fedot.core.pipelines.pipeline import Pipeline
@@ -34,6 +35,8 @@ def save_nodes(self, nodes: Union[Node, List[Node]], fold_id: Optional[int] = No
             self._db.add_operations(mapped)
         except Exception as ex:
             self.log.warning(f'Nodes can not be saved: {ex}. Continue')
+            if is_test_session():
+                raise ex
 
     def save_pipeline(self, pipeline: 'Pipeline', fold_id: Optional[int] = None):
         """
@@ -60,6 +63,8 @@ def try_load_nodes(self, nodes: Union[Node, List[Node]], fold_id: Optional[int]
                     nodes_lst[idx].fitted_operation = None
         except Exception as ex:
             self.log.warning(f'Cache can not be loaded: {ex}. Continue.')
+            if is_test_session():
+                raise ex
 
     def try_load_into_pipeline(self, pipeline: 'Pipeline', fold_id: Optional[int] = None):
         """

diff --git a/fedot/core/caching/preprocessing_cache.py b/fedot/core/caching/preprocessing_cache.py
@@ -2,6 +2,7 @@
 
 from fedot.core.caching.base_cache import BaseCache
 from fedot.core.caching.preprocessing_cache_db import PreprocessingCacheDB
+from fedot.utilities.debug import is_test_session
 
 if TYPE_CHECKING:
     from fedot.core.pipelines.pipeline import Pipeline
@@ -30,8 +31,10 @@ def try_load_preprocessor(self, pipeline: 'Pipeline', fold_id: Union[int, None])
             processors = self._db.get_preprocessor(structural_id)
             if processors:
                 pipeline.encoder, pipeline.imputer = processors
-        except Exception as exc:
-            self.log.warning(f'Preprocessor search error: {exc}')
+        except Exception as ex:
+            self.log.warning(f'Preprocessor search error: {ex}')
+            if is_test_session():
+                raise ex
 
     def add_preprocessor(self, pipeline: 'Pipeline', fold_id: Optional[Union[int, None]] = None):
         """

diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py
@@ -5,6 +5,8 @@
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, SkLearnEvaluationStrategy
 from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose \
     import DecomposerClassImplementation
+from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters \
+    import IsolationForestClassImplementation
 from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_imbalanced_class import \
     ResampleImplementation
 from fedot.core.operations.evaluation.operation_implementations. \
@@ -15,8 +17,7 @@
     keras import FedotCNNImplementation
 from fedot.core.operations.evaluation.operation_implementations.models.knn import FedotKnnClassImplementation
 from fedot.core.operations.evaluation.operation_implementations.models.svc import FedotSVCImplementation
-from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters \
-    import IsolationForestClassImplementation
+from fedot.core.utilities.random import RandomStateHandler
 
 warnings.filterwarnings("ignore", category=UserWarning)
 
@@ -66,7 +67,8 @@ def fit(self, train_data: InputData):
         else:
             operation_implementation = self.operation_impl()
 
-        operation_implementation.fit(train_data)
+        with RandomStateHandler():
+            operation_implementation.fit(train_data)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:
@@ -129,8 +131,8 @@ def fit(self, train_data: InputData):
             operation_implementation = self.operation_impl(**self.params_for_fit)
         else:
             operation_implementation = self.operation_impl()
-
-        operation_implementation.fit(train_data)
+        with RandomStateHandler():
+            operation_implementation.fit(train_data)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/fedot/core/operations/evaluation/clustering.py b/fedot/core/operations/evaluation/clustering.py
@@ -5,6 +5,7 @@
 
 from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.evaluation.evaluation_interfaces import SkLearnEvaluationStrategy
+from fedot.core.utilities.random import RandomStateHandler
 
 warnings.filterwarnings("ignore", category=UserWarning)
 
@@ -31,8 +32,8 @@ def fit(self, train_data: InputData):
             operation_implementation = self.operation_impl(**self.params_for_fit)
         else:
             operation_implementation = self.operation_impl(n_clusters=2)
-
-        operation_implementation.fit(train_data.features)
+        with RandomStateHandler():
+            operation_implementation.fit(train_data.features)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/fedot/core/operations/evaluation/common_preprocessing.py b/fedot/core/operations/evaluation/common_preprocessing.py
@@ -3,11 +3,12 @@
 
 from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy
+from fedot.core.operations.evaluation.operation_implementations.data_operations.categorical_encoders import \
+    OneHotEncodingImplementation, LabelEncodingImplementation
 from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \
     ImputationImplementation, KernelPCAImplementation, NormalizationImplementation, PCAImplementation, \
     PolyFeaturesImplementation, ScalingImplementation, FastICAImplementation
-from fedot.core.operations.evaluation.operation_implementations.data_operations.categorical_encoders import \
-    OneHotEncodingImplementation, LabelEncodingImplementation
+from fedot.core.utilities.random import RandomStateHandler
 
 warnings.filterwarnings("ignore", category=UserWarning)
 
@@ -41,8 +42,8 @@ def fit(self, train_data: InputData):
             operation_implementation = self.operation_impl(**self.params_for_fit)
         else:
             operation_implementation = self.operation_impl()
-
-        operation_implementation.fit(train_data)
+        with RandomStateHandler():
+            operation_implementation.fit(train_data)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/fedot/core/operations/evaluation/evaluation_interfaces.py b/fedot/core/operations/evaluation/evaluation_interfaces.py
@@ -32,6 +32,7 @@
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operation_type_from_id
 from fedot.core.repository.tasks import TaskTypesEnum
+from fedot.core.utilities.random import RandomStateHandler
 
 warnings.filterwarnings("ignore", category=UserWarning)
 
@@ -181,7 +182,7 @@ def fit(self, train_data: InputData):
         current_task = train_data.task.task_type
         models_repo = OperationTypesRepository()
         non_multi_models = models_repo.suitable_operation(task_type=current_task,
-                                                             tags=['non_multi'])
+                                                          tags=['non_multi'])
         is_model_not_support_multi = self.operation_type in non_multi_models
 
         # Multi-output task or not
@@ -191,7 +192,8 @@ def fit(self, train_data: InputData):
             operation_implementation = convert_to_multivariate_model(operation_implementation,
                                                                      train_data)
         else:
-            operation_implementation.fit(train_data.features, train_data.target)
+            with RandomStateHandler():
+                operation_implementation.fit(train_data.features, train_data.target)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/fedot/core/operations/evaluation/gpu/clustering.py b/fedot/core/operations/evaluation/gpu/clustering.py
@@ -1,5 +1,6 @@
 import warnings
 
+from fedot.core.utilities.random import RandomStateHandler
 from fedot.utilities.requirements_notificator import warn_requirement
 
 try:
@@ -42,7 +43,8 @@ def fit(self, train_data: InputData):
             operation_implementation = self.operation_impl(n_clusters=2)
 
         features = cudf.DataFrame(train_data.features.astype('float32'))
-        operation_implementation.fit(features)
+        with RandomStateHandler():
+            operation_implementation.fit(features)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/fedot/core/operations/evaluation/gpu/common.py b/fedot/core/operations/evaluation/gpu/common.py
@@ -1,12 +1,13 @@
 import warnings
 from typing import Optional
 
+from fedot.core.utilities.random import RandomStateHandler
 from fedot.utilities.requirements_notificator import warn_requirement
 
 try:
     import cudf
     import cuml
-    from cuml import KMeans, Ridge, LogisticRegression, Lasso, ElasticNet, \
+    from cuml import Ridge, LogisticRegression, Lasso, ElasticNet, \
         MBSGDClassifier, MBSGDRegressor, CD
     from cuml.ensemble import RandomForestClassifier, RandomForestRegressor
     from cuml.svm import SVC
@@ -86,7 +87,8 @@ def fit(self, train_data: InputData):
             raise NotImplementedError('Not supported for GPU yet')
             # TODO Manually wrap the regressor into multi-output model
         else:
-            operation_implementation.fit(features, target)
+            with RandomStateHandler():
+                operation_implementation.fit(features, target)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/...t/core/operations/evaluation/operation_implementations/data_operations/sklearn_filters.py b/...t/core/operations/evaluation/operation_implementations/data_operations/sklearn_filters.py
@@ -4,18 +4,17 @@
 import numpy as np
 import sklearn
 from pkg_resources import parse_version
-
+from sklearn.ensemble import IsolationForest
 from sklearn.linear_model import LinearRegression, RANSACRegressor
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.tree import DecisionTreeRegressor
-from sklearn.ensemble import IsolationForest
 
+from fedot.core.data.data import InputData, OutputData
 from fedot.core.log import default_log
 from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import (
     DataOperationImplementation
 )
-from fedot.core.data.data import InputData, OutputData
 
 
 class FilterImplementation(DataOperationImplementation):

diff --git a/fedot/core/operations/evaluation/regression.py b/fedot/core/operations/evaluation/regression.py
@@ -5,13 +5,14 @@
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, SkLearnEvaluationStrategy
 from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose \
     import DecomposerRegImplementation
+from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters \
+    import IsolationForestRegImplementation
 from fedot.core.operations.evaluation.operation_implementations. \
     data_operations.sklearn_filters import LinearRegRANSACImplementation, NonLinearRegRANSACImplementation
 from fedot.core.operations.evaluation.operation_implementations. \
     data_operations.sklearn_selectors import LinearRegFSImplementation, NonLinearRegFSImplementation
 from fedot.core.operations.evaluation.operation_implementations.models.knn import FedotKnnRegImplementation
-from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters \
-    import IsolationForestRegImplementation
+from fedot.core.utilities.random import RandomStateHandler
 
 warnings.filterwarnings("ignore", category=UserWarning)
 
@@ -61,8 +62,8 @@ def fit(self, train_data: InputData):
             operation_implementation = self.operation_impl(**self.params_for_fit)
         else:
             operation_implementation = self.operation_impl()
-
-        operation_implementation.fit(train_data)
+        with RandomStateHandler():
+            operation_implementation.fit(train_data)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:
@@ -117,8 +118,8 @@ def fit(self, train_data: InputData):
             operation_implementation = self.operation_impl(**self.params_for_fit)
         else:
             operation_implementation = self.operation_impl()
-
-        operation_implementation.fit(train_data)
+        with RandomStateHandler():
+            operation_implementation.fit(train_data)
         return operation_implementation
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:

diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py
@@ -1,3 +1,4 @@
+import traceback
 from datetime import timedelta
 from typing import Callable, Iterable, Optional, Tuple
 
@@ -9,9 +10,11 @@
 from fedot.core.log import default_log
 from fedot.core.operations.model import Model
 from fedot.core.optimisers.fitness import Fitness
-from fedot.core.optimisers.objective.objective import Objective, to_fitness
-from fedot.core.optimisers.objective.objective_eval import ObjectiveEvaluate
 from fedot.core.pipelines.pipeline import Pipeline
+from fedot.utilities.debug import is_test_session, is_recording_mode
+from fedot.utilities.debug import save_debug_info_for_pipeline
+from .objective import Objective, to_fitness
+from .objective_eval import ObjectiveEvaluate
 
 DataSource = Callable[[], Iterable[Tuple[InputData, InputData]]]
 
@@ -58,10 +61,14 @@ def evaluate(self, graph: Pipeline) -> Fitness:
         folds_metrics = []
         for fold_id, (train_data, test_data) in enumerate(self._data_producer()):
             try:
-                graph.unfit()
                 prepared_pipeline = self.prepare_graph(graph, train_data, fold_id, self._eval_n_jobs)
             except Exception as ex:
                 self._log.warning(f'Continuing after pipeline fit error <{ex}> for graph: {graph_id}')
+                if is_test_session() and not isinstance(ex, TimeoutError):
+                    stack_trace = traceback.format_exc()
+                    save_debug_info_for_pipeline(graph, train_data, test_data, ex, stack_trace)
+                    if not is_recording_mode():
+                        raise ex
                 continue
             evaluated_fitness = self._objective(prepared_pipeline,
                                                 reference_data=test_data,
@@ -70,7 +77,10 @@ def evaluate(self, graph: Pipeline) -> Fitness:
                 folds_metrics.append(evaluated_fitness.values)
             else:
                 self._log.warning(f'Continuing after objective evaluation error for graph: {graph_id}')
-                continue
+                if is_test_session():
+                    raise ValueError(f'Fitness {evaluated_fitness} is not valid')
+                else:
+                    continue
             graph.unfit()
         if folds_metrics:
             folds_metrics = tuple(np.mean(folds_metrics, axis=0))  # averages for each metric over folds
@@ -88,6 +98,7 @@ def prepare_graph(self, graph: Pipeline, train_data: InputData,
         :param fold_id: id of the fold in cross-validation, used for cache requests.
         :param n_jobs: number of parallel jobs for preparation
         """
+        graph.unfit()
         # load preprocessing
         graph.try_load_from_cache(self._pipelines_cache, self._preprocessing_cache, fold_id)
         graph.fit(