From 7692609dc0b880e63d995ed8574f499d0357defc Mon Sep 17 00:00:00 2001 From: Andrey Getmanov <91334765+andreygetmanov@users.noreply.github.com> Date: Mon, 18 Apr 2022 20:04:47 +0300 Subject: [PATCH] Image processing improvement (#640) - fixed occasional error of test_image_classification_quality - image resize function now supports RGB images and automatically identifies the number of channels - added VGG16 to models list --- cases/multi_modal_genre_prediction.py | 5 ++-- examples/advanced/multi_modal_pipeline.py | 16 +++++------- fedot/core/data/data.py | 19 +++++++------- .../operation_implementations/models/keras.py | 26 ++++++++++++++----- requirements.txt | 3 ++- test/unit/multimodal/test_multi_modal.py | 17 ++++-------- 6 files changed, 45 insertions(+), 41 deletions(-) diff --git a/cases/multi_modal_genre_prediction.py b/cases/multi_modal_genre_prediction.py index ed321c2033..89a1a5b328 100644 --- a/cases/multi_modal_genre_prediction.py +++ b/cases/multi_modal_genre_prediction.py @@ -16,12 +16,11 @@ def run_multi_modal_case(files_path, is_visualise=True, timeout=datetime.timedelta(minutes=1)): task = Task(TaskTypesEnum.classification) - images_size = (128, 128) + images_size = (224, 224) data = prepare_multi_modal_data(files_path, task, images_size) - initial_pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size, data, - with_split=True) + initial_pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(data, with_split=True) # the search of the models provided by the framework that can be used as nodes in a pipeline for the selected task available_model_types = get_operations_for_task(task=task, mode='model') diff --git a/examples/advanced/multi_modal_pipeline.py b/examples/advanced/multi_modal_pipeline.py index 9c874578d6..23cecadb1f 100644 --- a/examples/advanced/multi_modal_pipeline.py +++ b/examples/advanced/multi_modal_pipeline.py @@ -69,14 +69,12 @@ def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = ( return data -def generate_initial_pipeline_and_data(images_size: tuple, - data: Union[InputData, MultiModalData], +def generate_initial_pipeline_and_data(data: Union[InputData, MultiModalData], with_split=True) -> tuple: """ Generates initial pipeline for data from 3 different sources (table, images and text) Each source is the primary node for its subpipeline - :param images_size: the requested size in pixels, as a 2-tuple of (width, height) :param data: multimodal data (from 3 different sources: table, text, image) :param with_split: if True, splits the sample on train/test :return: pipeline object, 2 multimodal data objects (fit and predict) @@ -88,12 +86,13 @@ def generate_initial_pipeline_and_data(images_size: tuple, else: num_classes = data.num_classes # image + images_size = data['data_source_img'].features.shape[1:4] ds_image = PrimaryNode('data_source_img') image_node = SecondaryNode('cnn', nodes_from=[ds_image]) - image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1), - 'architecture_type': 'simplified', + image_node.custom_params = {'image_shape': images_size, + 'architecture_type': 'vgg16', 'num_classes': num_classes, - 'epochs': 10, + 'epochs': 2, 'batch_size': 16, 'optimizer_parameters': {'loss': "binary_crossentropy", 'optimizer': "adam", @@ -127,12 +126,11 @@ def generate_initial_pipeline_and_data(images_size: tuple, def run_multi_modal_pipeline(files_path: str, is_visualise=False) -> float: task = Task(TaskTypesEnum.classification) - images_size = (128, 128) + images_size = (224, 224) data = prepare_multi_modal_data(files_path, task, images_size) - pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size, data, - with_split=True) + pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(data, with_split=True) pipeline.fit(input_data=fit_data) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 487b8afa45..ebcbb42f2a 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -1,13 +1,12 @@ import glob import os +import cv2 from copy import copy, deepcopy from dataclasses import dataclass, field from typing import List, Optional, Tuple, Union -import imageio import numpy as np import pandas as pd -from PIL import Image from fedot.core.data.array_utilities import atleast_2d from fedot.core.data.load_data import JSONBatchLoader, TextBatchLoader @@ -406,15 +405,15 @@ class OutputData(Data): target: Optional[np.array] = None -def _resize_image(file_path: str, target_size: tuple): - im = Image.open(file_path) - im_resized = im.resize(target_size, Image.NEAREST) - im_resized.save(file_path, 'jpeg') +def _resize_image(file_path: str, target_size: Tuple[int, int]): + """ + Function resizes and rewrites the input image + """ - img = np.asarray(imageio.imread(file_path, 'jpeg')) - if len(img.shape) == 3: - # TODO refactor for multi-color - img = img[..., 0] + img[..., 1] + img[..., 2] + img = cv2.imread(file_path) + if img.shape[:2] != target_size: + img = cv2.resize(img, (target_size[0], target_size[1])) + cv2.imwrite(file_path, img) return img diff --git a/fedot/core/operations/evaluation/operation_implementations/models/keras.py b/fedot/core/operations/evaluation/operation_implementations/models/keras.py index fd3928ac29..0de2575452 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/keras.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/keras.py @@ -56,13 +56,23 @@ def create_simple_cnn(input_shape: tuple, tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), tf.keras.layers.Flatten(), tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(num_classes, activation="sigmoid"), + tf.keras.layers.Dense(num_classes, activation="softmax"), ] ) return model +def create_vgg16(input_shape: tuple, + num_classes: int): + model = tf.keras.applications.vgg16.VGG16(include_top=True, + weights=None, + input_shape=input_shape, + classes=num_classes, + classifier_activation='sigmoid') + return model + + def fit_cnn(train_data: InputData, model, epochs: int = 10, @@ -78,7 +88,7 @@ def fit_cnn(train_data: InputData, if transform_flag: logger.debug('Train data set was not scaled. The data was divided by 255.') - if len(x_train.shape) < 4: + if len(x_train.shape) == 3: transformed_x_train = np.expand_dims(x_train, -1) if len(train_data.target.shape) < 2: @@ -117,11 +127,14 @@ def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labe if np.max(transformed_x_test) > 1: logger.warn('Test data set was not scaled. The data was divided by 255.') - transformed_x_test = np.expand_dims(x_test, -1) + + if len(x_test.shape) == 3: + transformed_x_test = np.expand_dims(x_test, -1) + if output_mode == 'labels': - prediction = trained_model.predict(transformed_x_test) + prediction = np.round(trained_model.predict(transformed_x_test)) elif output_mode in ['probs', 'full_probs', 'default']: - prediction = trained_model.predict_proba(transformed_x_test) + prediction = trained_model.predict(transformed_x_test) if trained_model.num_classes < 2: logger.error('Data set contain only 1 target class. Please reformat your data.') raise NotImplementedError() @@ -133,7 +146,8 @@ def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labe cnn_model_dict = {'deep': create_deep_cnn, - 'simplified': create_simple_cnn} + 'simplified': create_simple_cnn, + 'vgg16': create_vgg16} class FedotCNNImplementation(ModelImplementation): diff --git a/requirements.txt b/requirements.txt index b2a8af87da..8c6a14e15c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,4 +30,5 @@ torch>=1.9.0 gensim>=4.1.2 tqdm requests>=2.* -func_timeout==4.3.5 \ No newline at end of file +func_timeout==4.3.5 +opencv-python>=4.5.5.64 \ No newline at end of file diff --git a/test/unit/multimodal/test_multi_modal.py b/test/unit/multimodal/test_multi_modal.py index 94be52698d..03ae02ff5e 100644 --- a/test/unit/multimodal/test_multi_modal.py +++ b/test/unit/multimodal/test_multi_modal.py @@ -5,23 +5,16 @@ from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import fedot_project_root +from fedot.core.data.multi_modal import MultiModalData -def generate_multi_modal_pipeline(): - images_size = (128, 128) - - # image - image_node = PrimaryNode('cnn') - image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1), - 'architecture': 'simplified', - 'num_classes': 2, - 'epochs': 1, - 'batch_size': 128} +def generate_multi_modal_pipeline(data: MultiModalData): # image + images_size = data['data_source_img'].features.shape[1:4] ds_image = PrimaryNode('data_source_img') image_node = SecondaryNode('cnn', nodes_from=[ds_image]) - image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1), + image_node.custom_params = {'image_shape': images_size, 'architecture': 'simplified', 'num_classes': 2, 'epochs': 15, @@ -61,7 +54,6 @@ def generate_multi_task_pipeline(): def test_multi_modal_pipeline(): - pipeline = generate_multi_modal_pipeline() files_path = os.path.join('test', 'data', 'multi_modal') path = os.path.join(str(fedot_project_root()), files_path) @@ -69,6 +61,7 @@ def test_multi_modal_pipeline(): images_size = (128, 128) fit_data = prepare_multi_modal_data(path, task, images_size) + pipeline = generate_multi_modal_pipeline(fit_data) pipeline.fit(fit_data) prediction = pipeline.predict(fit_data)