Skip to content

Commit

Permalink
MultiModalData class improvement (#789)
Browse files Browse the repository at this point in the history
* MultiModalData class improvement

Now csv files with text and table data can be read just in one motion

- from_csv method added
- MultiModalData detection module is added
- text fields are defined automatically
- tests are added
- some refactoring of preprocessing.py
  • Loading branch information
andreygetmanov authored Sep 5, 2022
1 parent 9ae9151 commit e708a83
Show file tree
Hide file tree
Showing 18 changed files with 1,320 additions and 1,675 deletions.
54 changes: 19 additions & 35 deletions examples/advanced/multimodal_text_num_example.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,33 @@
import os
from pathlib import Path

from fedot.api.main import Fedot

from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.utils import fedot_project_root


def prepare_multi_modal_data(files_path: str, task: Task) -> MultiModalData:
def run_multi_modal_example(file_path: str, is_visualise=True) -> float:
"""
Imports data from 2 different sources (table and text)
:param files_path: path to data
:param task: task to solve
:return: MultiModalData object which contains table and text data
This is an example of FEDOT use on multimodal data.
The data is taken and adapted from Wine Reviews dataset (winemag-data_first150k):
https://www.kaggle.com/datasets/zynicide/wine-reviews
and contains information about wine country, region, price, etc.
Column that contains text features is 'description'.
Other columns contain numerical and categorical features.
The aim is to predict wine variety, so it's a classification task.
:param file_path: path to the file with multimodal data
:param is_visualise: if True, then final pipeline will be visualised
:return: F1 metrics of the model
"""

path = os.path.join(str(fedot_project_root()), files_path)

# import of table data
path_table = os.path.join(path, 'multimodal_wine_table.csv')
data_num = InputData.from_csv(path_table, task=task, target_columns='variety')

# import of text data
path_text = os.path.join(path, 'multimodal_wine_text.csv')
data_text = InputData.from_csv(path_text, data_type=DataTypesEnum.text, task=task, target_columns='variety')

data = MultiModalData({
'data_source_table': data_num,
'data_source_text': data_text
})

return data


def run_multi_modal_example(files_path: str, is_visualise=True) -> float:
task = Task(TaskTypesEnum.classification)

data = prepare_multi_modal_data(files_path, task)
task = 'classification'
path = Path(fedot_project_root(), file_path)
data = MultiModalData.from_csv(file_path=path, task=task, target_columns='variety', index_col=None)
fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=0.7)

automl_model = Fedot(problem='classification', timeout=10)
automl_model = Fedot(problem=task, timeout=10)
automl_model.fit(features=fit_data,
target=fit_data.target)

Expand All @@ -59,4 +43,4 @@ def run_multi_modal_example(files_path: str, is_visualise=True) -> float:


if __name__ == '__main__':
run_multi_modal_example(files_path='examples/data/multimodal_wine', is_visualise=True)
run_multi_modal_example(file_path='examples/data/multimodal_wine.csv', is_visualise=True)
780 changes: 780 additions & 0 deletions examples/data/multimodal_wine.csv

Large diffs are not rendered by default.

Loading

0 comments on commit e708a83

Please sign in to comment.