Skip to content

Commit

Permalink
Adaptation of API for multimodal data (#663)
Browse files Browse the repository at this point in the history
* API run of multimodal cases
- added checking of text and image features in data during preprocessing building
- multi_modal_pipeline.py now runs via API
- preprocessing builder now can process each source of multimodal data separately

* - improved data_has_text_features function

* - due to change of multimodal assumption builder mechanics changed data_has_categorical_features function
- added new test for text data

* - fixed fail of test_assumptions_builder_for_multimodal_data
- data_preprocessing now works with unimodal data only, support of multimodal data is provided by iterative preprocessing of each data source node

* - multimodal data strategy now defines data sources based on data type

* - added text+data multimodal example with dataset
- fixed multimodal data preprocessing bug
- removed duplicated categorical encoding from preprocessor

* - fixed CNN initial assumption bug
- updated CNN tests

* - added text vectorization operation by word2vec pretrained models

* - added test for data with empty text fields

* - change of multimodal example and case to be run by API

* - CNN node now is added by processing_builder, not by preprocessing

* - added test for new functionality of MultimodalStrategy data definition

* - modified test_text_features_processed_correctly

* - modified test_correct_api_dataset_with_text_preprocessing

* - added test for check of DataDefiner work on multimodal data
- added some docstrings
- removed multi_modal_genre_prediction.py case

* - added default_operation_params for word2vec_pretrained and tfidf
- added search_space params for word2vec_pretrained and tfidf

* - added tests for various text data
- modified data_has_text_features function in preprocessing builder

* - changed method of phrase vectorizing from sum to average
- text vectorizer params initialisation is moved to init part of class

* - embeddings download info now writes into logger

* - fixed bug with non-working tuner on pipeline with keras CNN

* - MultiModalAssumptionsBuilder refactoring

* - MultiModalAssumptionsBuilder refactoring [2]
  • Loading branch information
andreygetmanov authored Jul 15, 2022
1 parent 703f7b3 commit 128c6e6
Show file tree
Hide file tree
Showing 31 changed files with 1,990 additions and 322 deletions.
82 changes: 0 additions & 82 deletions cases/multi_modal_genre_prediction.py

This file was deleted.

67 changes: 7 additions & 60 deletions examples/advanced/multi_modal_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import os
from typing import Union

from fedot.api.main import Fedot

from sklearn.metrics import f1_score as f1

from cases.dataset_preparation import unpack_archived_data
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.node import PrimaryNode, SecondaryNode
from fedot.core.data.data import InputData, OutputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
Expand Down Expand Up @@ -69,70 +69,17 @@ def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = (
return data


def generate_initial_pipeline_and_data(data: Union[InputData, MultiModalData],
with_split=True) -> tuple:
"""
Generates initial pipeline for data from 3 different sources (table, images and text)
Each source is the primary node for its subpipeline
:param data: multimodal data (from 3 different sources: table, text, image)
:param with_split: if True, splits the sample on train/test
:return: pipeline object, 2 multimodal data objects (fit and predict)
"""

# Identifying a number of classes for CNN params
if data.target.shape[1] > 1:
num_classes = data.target.shape[1]
else:
num_classes = data.num_classes
# image
images_size = data['data_source_img'].features.shape[1:4]
ds_image = PrimaryNode('data_source_img')
image_node = SecondaryNode('cnn', nodes_from=[ds_image])
image_node.custom_params = {'image_shape': images_size,
'architecture_type': 'vgg16',
'num_classes': num_classes,
'epochs': 2,
'batch_size': 16,
'optimizer_parameters': {'loss': "binary_crossentropy",
'optimizer': "adam",
'metrics': 'categorical_crossentropy'}
}

# table
ds_table = PrimaryNode('data_source_table')
numeric_node = SecondaryNode('scaling', nodes_from=[ds_table])

# text
ds_text = PrimaryNode('data_source_text')
node_text_clean = SecondaryNode('text_clean', nodes_from=[ds_text])
text_node = SecondaryNode('tfidf', nodes_from=[node_text_clean])
text_node.custom_params = {'ngram_range': (1, 3), 'min_df': 0.001, 'max_df': 0.9}

# combining all sources together
logit_node = SecondaryNode('logit', nodes_from=[image_node, numeric_node, text_node])
logit_node.custom_params = {'max_iter': 100000, 'random_state': 42}
pipeline = Pipeline(logit_node)

# train/test ratio
ratio = 0.6
if with_split:
fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=ratio)
else:
fit_data, predict_data = data, data

return pipeline, fit_data, predict_data


def run_multi_modal_pipeline(files_path: str, is_visualise=False) -> float:
def run_multi_modal_pipeline(files_path: str, is_visualise=True) -> float:
task = Task(TaskTypesEnum.classification)
images_size = (224, 224)

data = prepare_multi_modal_data(files_path, task, images_size)

pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(data, with_split=True)
fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=0.6)

pipeline.fit(input_data=fit_data)
automl_model = Fedot(problem='classification', timeout=15)
pipeline = automl_model.fit(features=fit_data,
target=fit_data.target)

if is_visualise:
pipeline.show()
Expand Down
62 changes: 62 additions & 0 deletions examples/advanced/multimodal_text_num_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import os

from fedot.api.main import Fedot

from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.utils import fedot_project_root


def prepare_multi_modal_data(files_path: str, task: Task) -> MultiModalData:
"""
Imports data from 2 different sources (table and text)
:param files_path: path to data
:param task: task to solve
:return: MultiModalData object which contains table and text data
"""

path = os.path.join(str(fedot_project_root()), files_path)

# import of table data
path_table = os.path.join(path, 'multimodal_wine_table.csv')
data_num = InputData.from_csv(path_table, task=task, target_columns='variety')

# import of text data
path_text = os.path.join(path, 'multimodal_wine_text.csv')
data_text = InputData.from_csv(path_text, data_type=DataTypesEnum.text, task=task, target_columns='variety')

data = MultiModalData({
'data_source_table': data_num,
'data_source_text': data_text
})

return data


def run_multi_modal_example(files_path: str, is_visualise=True) -> float:
task = Task(TaskTypesEnum.classification)

data = prepare_multi_modal_data(files_path, task)
fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=0.7)

automl_model = Fedot(problem='classification', timeout=10)
automl_model.fit(features=fit_data,
target=fit_data.target)

prediction = automl_model.predict(predict_data)
metrics = automl_model.get_metrics()

if is_visualise:
automl_model.current_pipeline.show()

print(f'F1 for validation sample is {round(metrics["f1"], 3)}')

return metrics["f1"]


if __name__ == '__main__':
run_multi_modal_example(files_path='examples/data/multimodal_wine', is_visualise=True)
Loading

0 comments on commit 128c6e6

Please sign in to comment.