Skip to content

Commit

Permalink
Image processing improvement (#640)
Browse files Browse the repository at this point in the history
- fixed occasional error of test_image_classification_quality
- image resize function now supports RGB images and automatically identifies the number of channels
- added VGG16 to models list
  • Loading branch information
andreygetmanov authored Apr 18, 2022
1 parent 542b929 commit 7692609
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 41 deletions.
5 changes: 2 additions & 3 deletions cases/multi_modal_genre_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@

def run_multi_modal_case(files_path, is_visualise=True, timeout=datetime.timedelta(minutes=1)):
task = Task(TaskTypesEnum.classification)
images_size = (128, 128)
images_size = (224, 224)

data = prepare_multi_modal_data(files_path, task, images_size)

initial_pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size, data,
with_split=True)
initial_pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(data, with_split=True)

# the search of the models provided by the framework that can be used as nodes in a pipeline for the selected task
available_model_types = get_operations_for_task(task=task, mode='model')
Expand Down
16 changes: 7 additions & 9 deletions examples/advanced/multi_modal_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,12 @@ def prepare_multi_modal_data(files_path: str, task: Task, images_size: tuple = (
return data


def generate_initial_pipeline_and_data(images_size: tuple,
data: Union[InputData, MultiModalData],
def generate_initial_pipeline_and_data(data: Union[InputData, MultiModalData],
with_split=True) -> tuple:
"""
Generates initial pipeline for data from 3 different sources (table, images and text)
Each source is the primary node for its subpipeline
:param images_size: the requested size in pixels, as a 2-tuple of (width, height)
:param data: multimodal data (from 3 different sources: table, text, image)
:param with_split: if True, splits the sample on train/test
:return: pipeline object, 2 multimodal data objects (fit and predict)
Expand All @@ -88,12 +86,13 @@ def generate_initial_pipeline_and_data(images_size: tuple,
else:
num_classes = data.num_classes
# image
images_size = data['data_source_img'].features.shape[1:4]
ds_image = PrimaryNode('data_source_img')
image_node = SecondaryNode('cnn', nodes_from=[ds_image])
image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1),
'architecture_type': 'simplified',
image_node.custom_params = {'image_shape': images_size,
'architecture_type': 'vgg16',
'num_classes': num_classes,
'epochs': 10,
'epochs': 2,
'batch_size': 16,
'optimizer_parameters': {'loss': "binary_crossentropy",
'optimizer': "adam",
Expand Down Expand Up @@ -127,12 +126,11 @@ def generate_initial_pipeline_and_data(images_size: tuple,

def run_multi_modal_pipeline(files_path: str, is_visualise=False) -> float:
task = Task(TaskTypesEnum.classification)
images_size = (128, 128)
images_size = (224, 224)

data = prepare_multi_modal_data(files_path, task, images_size)

pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(images_size, data,
with_split=True)
pipeline, fit_data, predict_data = generate_initial_pipeline_and_data(data, with_split=True)

pipeline.fit(input_data=fit_data)

Expand Down
19 changes: 9 additions & 10 deletions fedot/core/data/data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import glob
import os
import cv2
from copy import copy, deepcopy
from dataclasses import dataclass, field
from typing import List, Optional, Tuple, Union

import imageio
import numpy as np
import pandas as pd
from PIL import Image

from fedot.core.data.array_utilities import atleast_2d
from fedot.core.data.load_data import JSONBatchLoader, TextBatchLoader
Expand Down Expand Up @@ -406,15 +405,15 @@ class OutputData(Data):
target: Optional[np.array] = None


def _resize_image(file_path: str, target_size: tuple):
im = Image.open(file_path)
im_resized = im.resize(target_size, Image.NEAREST)
im_resized.save(file_path, 'jpeg')
def _resize_image(file_path: str, target_size: Tuple[int, int]):
"""
Function resizes and rewrites the input image
"""

img = np.asarray(imageio.imread(file_path, 'jpeg'))
if len(img.shape) == 3:
# TODO refactor for multi-color
img = img[..., 0] + img[..., 1] + img[..., 2]
img = cv2.imread(file_path)
if img.shape[:2] != target_size:
img = cv2.resize(img, (target_size[0], target_size[1]))
cv2.imwrite(file_path, img)
return img


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,23 @@ def create_simple_cnn(input_shape: tuple,
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(num_classes, activation="sigmoid"),
tf.keras.layers.Dense(num_classes, activation="softmax"),
]
)

return model


def create_vgg16(input_shape: tuple,
num_classes: int):
model = tf.keras.applications.vgg16.VGG16(include_top=True,
weights=None,
input_shape=input_shape,
classes=num_classes,
classifier_activation='sigmoid')
return model


def fit_cnn(train_data: InputData,
model,
epochs: int = 10,
Expand All @@ -78,7 +88,7 @@ def fit_cnn(train_data: InputData,
if transform_flag:
logger.debug('Train data set was not scaled. The data was divided by 255.')

if len(x_train.shape) < 4:
if len(x_train.shape) == 3:
transformed_x_train = np.expand_dims(x_train, -1)

if len(train_data.target.shape) < 2:
Expand Down Expand Up @@ -117,11 +127,14 @@ def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labe

if np.max(transformed_x_test) > 1:
logger.warn('Test data set was not scaled. The data was divided by 255.')
transformed_x_test = np.expand_dims(x_test, -1)

if len(x_test.shape) == 3:
transformed_x_test = np.expand_dims(x_test, -1)

if output_mode == 'labels':
prediction = trained_model.predict(transformed_x_test)
prediction = np.round(trained_model.predict(transformed_x_test))
elif output_mode in ['probs', 'full_probs', 'default']:
prediction = trained_model.predict_proba(transformed_x_test)
prediction = trained_model.predict(transformed_x_test)
if trained_model.num_classes < 2:
logger.error('Data set contain only 1 target class. Please reformat your data.')
raise NotImplementedError()
Expand All @@ -133,7 +146,8 @@ def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labe


cnn_model_dict = {'deep': create_deep_cnn,
'simplified': create_simple_cnn}
'simplified': create_simple_cnn,
'vgg16': create_vgg16}


class FedotCNNImplementation(ModelImplementation):
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ torch>=1.9.0
gensim>=4.1.2
tqdm
requests>=2.*
func_timeout==4.3.5
func_timeout==4.3.5
opencv-python>=4.5.5.64
17 changes: 5 additions & 12 deletions test/unit/multimodal/test_multi_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,16 @@
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.utils import fedot_project_root
from fedot.core.data.multi_modal import MultiModalData


def generate_multi_modal_pipeline():
images_size = (128, 128)

# image
image_node = PrimaryNode('cnn')
image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1),
'architecture': 'simplified',
'num_classes': 2,
'epochs': 1,
'batch_size': 128}
def generate_multi_modal_pipeline(data: MultiModalData):

# image
images_size = data['data_source_img'].features.shape[1:4]
ds_image = PrimaryNode('data_source_img')
image_node = SecondaryNode('cnn', nodes_from=[ds_image])
image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1),
image_node.custom_params = {'image_shape': images_size,
'architecture': 'simplified',
'num_classes': 2,
'epochs': 15,
Expand Down Expand Up @@ -61,14 +54,14 @@ def generate_multi_task_pipeline():


def test_multi_modal_pipeline():
pipeline = generate_multi_modal_pipeline()

files_path = os.path.join('test', 'data', 'multi_modal')
path = os.path.join(str(fedot_project_root()), files_path)
task = Task(TaskTypesEnum.classification)
images_size = (128, 128)

fit_data = prepare_multi_modal_data(path, task, images_size)
pipeline = generate_multi_modal_pipeline(fit_data)

pipeline.fit(fit_data)
prediction = pipeline.predict(fit_data)
Expand Down

0 comments on commit 7692609

Please sign in to comment.