Skip to content

Commit

Permalink
- multimodal data now is prepared as a united MultiModalData object
Browse files Browse the repository at this point in the history
- removed warning during scaling image data
- minor changes for readability
- test_multi_modal.py is changed accordingly to new structure of multi_modal_pipeline.py
  • Loading branch information
andreygetmanov committed Apr 5, 2022
1 parent e001cf4 commit 6cd90a3
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 73 deletions.
11 changes: 4 additions & 7 deletions cases/multi_modal_genre_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,10 @@ def run_multi_modal_case(files_path, is_visualise=True, timeout=datetime.timedel
task = Task(TaskTypesEnum.classification)
images_size = (128, 128)

train_num, test_num, train_img, test_img, train_text, test_text = \
prepare_multi_modal_data(files_path, task, images_size)
data = prepare_multi_modal_data(files_path, task, images_size)

initial_pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size,
train_num, test_num,
train_img, test_img,
train_text, test_text)
initial_pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size, data,
with_split=True)

# the search of the models provided by the framework that can be used as nodes in a pipeline for the selected task
available_model_types = get_operations_for_task(task=task, mode='model')
Expand Down Expand Up @@ -59,7 +56,7 @@ def run_multi_modal_case(files_path, is_visualise=True, timeout=datetime.timedel
pipeline_evo_composed.show()

prediction = pipeline_evo_composed.predict(predict_data, output_mode='labels')
err = calculate_validation_metric(test_text, prediction)
err = calculate_validation_metric(predict_data, prediction)

print(f'F1 micro for validation sample is {err}')
return err
Expand Down
82 changes: 34 additions & 48 deletions examples/advanced/multi_modal_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import Union

from sklearn.metrics import f1_score as f1

Expand All @@ -13,7 +14,7 @@
from fedot.core.utils import fedot_project_root


def calculate_validation_metric(valid: InputData, pred: OutputData) -> float:
def calculate_validation_metric(valid: Union[InputData, MultiModalData], pred: OutputData) -> float:
"""
Calculates F1 score for predicted data
Expand All @@ -30,15 +31,14 @@ def calculate_validation_metric(valid: InputData, pred: OutputData) -> float:
return round(err, 2)


def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = (128, 128), with_split=True) -> tuple:
def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = (128, 128)) -> MultiModalData:
"""
Imports data from 3 different sources (table, images and text)
:param files_path: path to data
:param task: task to solve
:param images_size: the requested size in pixels, as a 2-tuple of (width, height)
:param with_split: if True, splits the sample on train/test
:return: 6 OutputData objects (2 with table data, 2 with images, 2 with text)
:return: MultiModalData object which contains table, text and image data
"""

path = os.path.join(str(fedot_project_root()), files_path)
Expand All @@ -49,8 +49,6 @@ def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = (
label='genres', task=task, is_multilabel=True, shuffle=False)

class_labels = data_num.target
# train/test ratio
ratio = 0.6

img_files_path = f'{files_path}/*.jpeg'
img_path = os.path.join(str(fedot_project_root()), img_files_path)
Expand All @@ -62,44 +60,39 @@ def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = (
label='genres', task=task,
data_type=DataTypesEnum.text, is_multilabel=True, shuffle=False)

if with_split:

train_num, test_num = train_test_data_setup(data_num, shuffle_flag=True, split_ratio=ratio)
train_img, test_img = train_test_data_setup(data_img, shuffle_flag=True, split_ratio=ratio)
train_text, test_text = train_test_data_setup(data_text, shuffle_flag=True, split_ratio=ratio)
else:

train_num, test_num = data_num, data_num
train_img, test_img = data_img, data_img
train_text, test_text = data_text, data_text
data = MultiModalData({
'data_source_img': data_img,
'data_source_table': data_num,
'data_source_text': data_text
})

return train_num, test_num, train_img, test_img, train_text, test_text
return data


def generate_initial_pipeline_and_data(images_size: tuple,
train_num: InputData, test_num: InputData,
train_img: InputData, test_img: InputData,
train_text: InputData, test_text: InputData) -> tuple:
data: Union[InputData, MultiModalData],
with_split=True) -> tuple:
"""
Generates initial pipeline for data from 3 different sources (table, images and text)
Each source is the primary node for its subpipeline
:param images_size: the requested size in pixels, as a 2-tuple of (width, height)
:param train_num: train sample of table data
:param test_num: test sample of table data
:param train_img: train sample of image data
:param test_img: test sample of image data
:param train_text: train sample of text data
:param test_text: test sample of text data
:param data: multimodal data (from 3 different sources: table, text, image)
:param with_split: if True, splits the sample on train/test
:return: pipeline object, 2 multimodal data objects (fit and predict)
"""

# Identifying a number of classes for CNN params
if data.target.shape[1] > 1:
num_classes = data.target.shape[1]
else:
num_classes = data.num_classes
# image
ds_image = PrimaryNode('data_source_img/1')
ds_image = PrimaryNode('data_source_img')
image_node = SecondaryNode('cnn', nodes_from=[ds_image])
image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1),
'architecture': 'simplified',
'num_classes': 5,
'architecture_type': 'simplified',
'num_classes': num_classes,
'epochs': 10,
'batch_size': 16,
'optimizer_parameters': {'loss': "binary_crossentropy",
Expand All @@ -108,11 +101,11 @@ def generate_initial_pipeline_and_data(images_size: tuple,
}

# table
ds_table = PrimaryNode('data_source_table/2')
ds_table = PrimaryNode('data_source_table')
numeric_node = SecondaryNode('scaling', nodes_from=[ds_table])

# text
ds_text = PrimaryNode('data_source_text/3')
ds_text = PrimaryNode('data_source_text')
node_text_clean = SecondaryNode('text_clean', nodes_from=[ds_text])
text_node = SecondaryNode('tfidf', nodes_from=[node_text_clean])
text_node.custom_params = {'ngram_range': (1, 3), 'min_df': 0.001, 'max_df': 0.9}
Expand All @@ -122,16 +115,12 @@ def generate_initial_pipeline_and_data(images_size: tuple,
logit_node.custom_params = {'max_iter': 100000, 'random_state': 42}
pipeline = Pipeline(logit_node)

fit_data = MultiModalData({
'data_source_img/1': train_img,
'data_source_table/2': train_num,
'data_source_text/3': train_text
})
predict_data = MultiModalData({
'data_source_img/1': test_img,
'data_source_table/2': test_num,
'data_source_text/3': test_text
})
# train/test ratio
ratio = 0.6
if with_split:
fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=ratio)
else:
fit_data, predict_data = data, data

return pipeline, fit_data, predict_data

Expand All @@ -140,13 +129,10 @@ def run_multi_modal_pipeline(files_path: str, is_visualise=False) -> float:
task = Task(TaskTypesEnum.classification)
images_size = (128, 128)

train_num, test_num, train_img, test_img, train_text, test_text = \
prepare_multi_modal_data(files_path, task, images_size)
data = prepare_multi_modal_data(files_path, task, images_size)

pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size,
train_num, test_num,
train_img, test_img,
train_text, test_text)
pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size, data,
with_split=True)

pipeline.fit(input_data=fit_data)

Expand All @@ -155,7 +141,7 @@ def run_multi_modal_pipeline(files_path: str, is_visualise=False) -> float:

prediction = pipeline.predict(predict_data, output_mode='labels')

err = calculate_validation_metric(test_text, prediction)
err = calculate_validation_metric(predict_data, prediction)

print(f'F1 micro for validation sample is {err}')

Expand Down
18 changes: 9 additions & 9 deletions fedot/core/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ def from_csv_multi_time_series(task: Task,

return input_data


@staticmethod
def from_image(images: Union[str, np.ndarray] = None,
labels: Union[str, np.ndarray] = None,
Expand Down Expand Up @@ -248,18 +247,19 @@ def from_json_files(files_path: str,

if len(fields_to_use) > 1:
fields_to_combine = []
for f in fields_to_use:
fields_to_combine.append(np.array(df_data[f]))
if isinstance(df_data[f][0], list):
df_data[f] = [' '.join(v) for v in df_data[f]]
for field in fields_to_use:
fields_to_combine.append(np.array(df_data[field]))
# Unite if the element of text data is divided into strings
if isinstance(df_data[field][0], list):
df_data[field] = [' '.join(piece) for piece in df_data[field]]

features = np.column_stack(tuple(fields_to_combine))
else:
val = df_data[fields_to_use[0]]
field = df_data[fields_to_use[0]]
# process field with nested list
if isinstance(val[0], list):
val = [' '.join(v) for v in val]
features = np.array(val)
if isinstance(field[0], list):
field = [' '.join(piece) for piece in field]
features = np.array(field)

if is_multilabel:
target = df_data[label]
Expand Down
1 change: 1 addition & 0 deletions fedot/core/operations/evaluation/evaluation_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def implementation_info(self) -> str:

def _sklearn_compatible_prediction(self, trained_operation, features):
is_multi_output_target = isinstance(trained_operation.classes_, list)
# Check if target is multilabel (has 2 or more columns)
if is_multi_output_target:
n_classes = len(trained_operation.classes_[0])
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def fit_cnn(train_data: InputData,
logger = default_log(__name__)

if transform_flag:
logger.warn('Train data set was not scaled. The data was divided by 255.')
logger.debug('Train data set was not scaled. The data was divided by 255.')

if len(x_train.shape) < 4:
transformed_x_train = np.expand_dims(x_train, -1)
Expand Down
9 changes: 1 addition & 8 deletions test/unit/pipelines/test_multi_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,7 @@ def test_multi_modal_pipeline():
task = Task(TaskTypesEnum.classification)
images_size = (128, 128)

train_num, _, train_img, _, train_text, _ = \
prepare_multi_modal_data(path, task, images_size, with_split=False)

fit_data = MultiModalData({
'data_source_img': train_img,
'data_source_table': train_num,
'data_source_text': train_text
})
fit_data = prepare_multi_modal_data(path, task, images_size)

pipeline.fit(fit_data)
prediction = pipeline.predict(fit_data)
Expand Down

0 comments on commit 6cd90a3

Please sign in to comment.