Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add topofeatures #1241

Merged
merged 22 commits into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions cases/dataset_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,23 @@ def unpack_archived_data(archive_name: str):
os.path.basename(archive_name) not in os.listdir(os.path.dirname(archive_path))):
with tarfile.open(archive_path) as file:
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)



tar.extractall(path, members, numeric_owner=numeric_owner)

safe_extract(file, path=os.path.dirname(archive_path))
print('Unpacking finished')
else:
Expand Down
2 changes: 1 addition & 1 deletion cases/time_series_gapfilling_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_composite_pipeline():
node_linear_2 = PipelineNode('linear', nodes_from=[node_2])

node_final = PipelineNode('ridge', nodes_from=[node_linear_1,
node_linear_2])
node_linear_2])
pipeline = Pipeline(node_final)
return pipeline

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
`one_hot_encoding`,One-Hot Encoder, Feature encoding
`label_encoding`,Label Encoder, Feature encoding
`resample`,Imbalanced binary class transformation in classification, Data transformation
`topological_features`,Calculation of topological features, only for time series,Data transformation


.. csv-table:: Feature transformation operations implementations
Expand Down Expand Up @@ -104,6 +105,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
`one_hot_encoding`,`sklearn.preprocessing.OneHotEncoder`,
`label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree`
`resample`,`FEDOT model using sklearn.utils.resample`,
`topological_features`,FEDOT model,`ts`


Models used
Expand Down
20 changes: 9 additions & 11 deletions examples/advanced/automl/tpot_vs_fedot.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from fedot.core.data.data import InputData
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from tpot.export_utils import set_param_recursive
from tpot.builtins import StackingEstimator
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import roc_auc_score as roc_auc
from sklearn.ensemble import RandomForestClassifier
import numpy

numpy.float = numpy.float64 # tmp patch before TPOT could fix this: https://github.com/EpistasisLab/tpot/issues/1281

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score as roc_auc
from sklearn.naive_bayes import BernoulliNB
from sklearn.pipeline import make_pipeline
from tpot.builtins import StackingEstimator
from tpot.export_utils import set_param_recursive

from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.node import PipelineNode
from fedot.core.data.data import InputData


def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
train_data = InputData.from_csv(train_file_path)
Expand Down
8 changes: 1 addition & 7 deletions examples/advanced/time_series_forecasting/multistep.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd

from examples.advanced.time_series_forecasting.composing_pipelines import get_border_line_info
from examples.simple.time_series_forecasting.api_forecasting import TS_DATASETS
from examples.simple.time_series_forecasting.ts_pipelines import ts_ar_pipeline
from examples.simple.time_series_forecasting.tuning_pipelines import visualise
from fedot.core.data.data import InputData
Expand All @@ -19,13 +20,6 @@

_TS_EXAMPLES_DATA_PATH = fedot_project_root().joinpath('examples/data/ts')

TS_DATASETS = {
'australia': _TS_EXAMPLES_DATA_PATH.joinpath('australia.csv'),
'beer': _TS_EXAMPLES_DATA_PATH.joinpath('beer.csv'),
'salaries': _TS_EXAMPLES_DATA_PATH.joinpath('salaries.csv'),
'stackoverflow': _TS_EXAMPLES_DATA_PATH.joinpath('stackoverflow.csv'),
'test_sea': fedot_project_root().joinpath('test', 'data', 'simple_sea_level.csv')}


def run_multistep(dataset: str, pipeline: Pipeline, step_forecast: int = 10, future_steps: int = 5,
visualisation=False):
Expand Down
32 changes: 16 additions & 16 deletions examples/advanced/time_series_forecasting/nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ def return_working_pipeline():

pipeline = get_arima_pipeline()
train_dataset = MultiModalData({
'arima': deepcopy(train_input),
})
'arima': deepcopy(train_input),
})
predict_dataset = MultiModalData({
'arima': deepcopy(predict_input),
})
'arima': deepcopy(predict_input),
})
pipeline.fit_from_scratch(train_dataset)
predicted_values = pipeline.predict(predict_dataset)
predicted_values = predicted_values.predict
Expand All @@ -140,13 +140,13 @@ def return_working_pipeline():
# arima with nemo ensemble
pipeline = return_working_pipeline()
train_dataset = MultiModalData({
'lagged/1': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
'lagged/1': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
predict_dataset = MultiModalData({
'lagged/1': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
'lagged/1': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
pipeline.fit_from_scratch(train_dataset)
predicted_values = pipeline.predict(predict_dataset).predict

Expand All @@ -165,13 +165,13 @@ def return_working_pipeline():
# arima with nemo ensemble
pipeline = get_arima_nemo_pipeline()
train_dataset = MultiModalData({
'arima': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
'arima': deepcopy(train_input),
'exog_ts': deepcopy(train_input_exog)
})
predict_dataset = MultiModalData({
'arima': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
'arima': deepcopy(predict_input),
'exog_ts': deepcopy(predict_input_exog)
})
pipeline.fit_from_scratch(train_dataset)
predicted_values = pipeline.predict(predict_dataset).predict

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def build_pred_ints(start=5000, end=7000, horizon=200):
task=task,
data_type=DataTypesEnum.ts)
model = Fedot(problem='ts_forecasting',
task_params=task.task_params,
timeout=3,
preset='ts',
show_progress=False)
task_params=task.task_params,
timeout=3,
preset='ts',
show_progress=False)

model.fit(train_input)
model.forecast()
Expand Down
Loading