Skip to content

Commit

Permalink
- table and text preprocessing are now distinguished for easier reada…
Browse files Browse the repository at this point in the history
…bility
  • Loading branch information
andreygetmanov committed Aug 30, 2022
1 parent 1147eb2 commit a2da375
Showing 1 changed file with 5 additions and 10 deletions.
15 changes: 5 additions & 10 deletions fedot/preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,13 @@ def _prepare_obligatory_unimodal_for_fit(self, data: InputData, source_name: str
if data.supplementary_data.was_preprocessed:
# Preprocessing was already done - return data
return data
# Wrap indices in numpy array
data.idx = np.array(data.idx)
# Fix tables / time series sizes
data = self._correct_shapes(data)

# TODO andreygetmanov target encoding must be obligatory for all data types
if data_type_is_text(data):
# Fix tables / time series sizes
data = self._correct_shapes(data)
replace_inf_with_nans(data)

# Find incorrect features which must be removed
Expand All @@ -209,12 +212,7 @@ def _prepare_obligatory_unimodal_for_fit(self, data: InputData, source_name: str
self._train_target_encoder(data, source_name)
data.target = self._apply_target_encoding(data, source_name)

# Wrap indices in numpy array
data.idx = np.array(data.idx)

if data_type_is_table(data):
# Fix tables / time series sizes
data = self._correct_shapes(data)
replace_inf_with_nans(data)

# Find incorrect features which must be removed
Expand All @@ -232,9 +230,6 @@ def _prepare_obligatory_unimodal_for_fit(self, data: InputData, source_name: str
data.target = self._apply_target_encoding(data, source_name)
data = self._clean_extra_spaces(data)

# Wrap indices in numpy array
data.idx = np.array(data.idx)

# Process binary categorical features
self.binary_categorical_processors[source_name].fit(data)
data = self.binary_categorical_processors[source_name].transform(data)
Expand Down

0 comments on commit a2da375

Please sign in to comment.