- table and text preprocessing are now distinguished for easier reada…

…bility
aimclub · Aug 30, 2022 · a2da375 · a2da375
1 parent 1147eb2
commit a2da375
Showing 1 changed file with 5 additions and 10 deletions.
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
@@ -187,10 +187,13 @@ def _prepare_obligatory_unimodal_for_fit(self, data: InputData, source_name: str
         if data.supplementary_data.was_preprocessed:
             # Preprocessing was already done - return data
             return data
+        # Wrap indices in numpy array
+        data.idx = np.array(data.idx)
+        # Fix tables / time series sizes
+        data = self._correct_shapes(data)
+
         # TODO andreygetmanov target encoding must be obligatory for all data types
         if data_type_is_text(data):
-            # Fix tables / time series sizes
-            data = self._correct_shapes(data)
             replace_inf_with_nans(data)
 
             # Find incorrect features which must be removed
@@ -209,12 +212,7 @@ def _prepare_obligatory_unimodal_for_fit(self, data: InputData, source_name: str
             self._train_target_encoder(data, source_name)
             data.target = self._apply_target_encoding(data, source_name)
 
-            # Wrap indices in numpy array
-            data.idx = np.array(data.idx)
-
         if data_type_is_table(data):
-            # Fix tables / time series sizes
-            data = self._correct_shapes(data)
             replace_inf_with_nans(data)
 
             # Find incorrect features which must be removed
@@ -232,9 +230,6 @@ def _prepare_obligatory_unimodal_for_fit(self, data: InputData, source_name: str
             data.target = self._apply_target_encoding(data, source_name)
             data = self._clean_extra_spaces(data)
 
-            # Wrap indices in numpy array
-            data.idx = np.array(data.idx)
-
             # Process binary categorical features
             self.binary_categorical_processors[source_name].fit(data)
             data = self.binary_categorical_processors[source_name].transform(data)