From 4b75f19af53566e7ccb1c395e5c85168de302a57 Mon Sep 17 00:00:00 2001 From: Samet Date: Mon, 14 Nov 2022 11:02:58 -0700 Subject: [PATCH 1/2] Add tiling notebook --- docs/source/how_to_guides/index.rst | 1 + notebooks/100_datamodules/104_tiling.ipynb | 249 +++++++++++++++++++++ 2 files changed, 250 insertions(+) create mode 100644 notebooks/100_datamodules/104_tiling.ipynb diff --git a/docs/source/how_to_guides/index.rst b/docs/source/how_to_guides/index.rst index b9a6bc07d5..5dc6165058 100644 --- a/docs/source/how_to_guides/index.rst +++ b/docs/source/how_to_guides/index.rst @@ -10,6 +10,7 @@ How to Guides notebooks/100_datamodules/101_btech.ipynb notebooks/100_datamodules/102_mvtec.ipynb notebooks/100_datamodules/103_folder.ipynb + notebooks/100_datamodules/104_tiling.ipynb notebooks/200_models/201_fastflow.ipynb notebooks/300_benchmarking/301_benchmarking.ipynb notebooks/300_benchmarking/302_hpo_wandb.ipynb diff --git a/notebooks/100_datamodules/104_tiling.ipynb b/notebooks/100_datamodules/104_tiling.ipynb new file mode 100644 index 0000000000..c1ae323c0d --- /dev/null +++ b/notebooks/100_datamodules/104_tiling.ipynb @@ -0,0 +1,249 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tiling Large Images\n", + "In the literature on deep learning, input image sizes typically range from 224 to 768 pixels. In the majority of industrial applications, however, input image sizes are significantly larger. Before the forward pass, these images are resized to a smaller scale before being fed into the models. However, this is problematic in the context of anomaly detection tasks, where the anomaly is typically quite small. The detection of abnormalities becomes extremely difficult when the image is shrunk in size. A common method for addressing this issue is to tile the input images so that no information is lost during the resizing operation.\n", + "\n", + "This notebook demonstrates how tiling can be done in anomalib." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torchvision.io import read_image\n", + "from torchvision.transforms import Resize, ToPILImage\n", + "from torchvision.utils import draw_segmentation_masks, make_grid\n", + "\n", + "from anomalib.pre_processing.tiler import Tiler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Effect of Resizing the Input Image " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Assuming that datasets directory is located in the root directory.\n", + "# image = read_image(path=\"../../datasets/MVTec/bottle/test/good/000.png\")\n", + "image = read_image(path=\"../../datasets/MVTec/transistor/test/cut_lead/000.png\")\n", + "mask = read_image(path=\"../../datasets/MVTec/transistor/ground_truth/cut_lead/000_mask.png\").bool()\n", + "overlayed_image = draw_segmentation_masks(image, mask, alpha=0.2, colors=[\"red\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ToPILImage()(overlayed_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As can be seen above, the original MVTec image is 1024x1024. Most anomaly detection algorithms resize this input size into a smaller value such as 256x256. As mentioned above, this poses a problem for small anomalous regions as they become even smaller. Let's visualize this by resizing the input image into 256x256 image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "resized_image = Resize((256, 256))(image)\n", + "resized_mask = Resize((256, 256))(mask)\n", + "resized_overlayed_image = draw_segmentation_masks(resized_image, resized_mask, alpha=0.5, colors=[\"red\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ToPILImage()(resized_overlayed_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The figures above demonstrates how resizing affect the input image size. The original image is 1024x1024. The resized image is 256x256. The anomaly is now much smaller and is difficult to detect.\n", + "\n", + "### Effect of Tiling \n", + "To avoid losing information with resizing we could instead tile the image into multiple patches. We could utilize anomalib's `Tiler` object to do this. The `Tiler` object takes in the input image and the desired output size. It then tiles the image into patches of the desired size. The `Tiler` object also returns the coordinates of the patches in the original image. This is useful for reconstructing the tiled image back into the original image. Let's create a tiler object, tile the input iamge and visualize t tiled image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tiler = Tiler(tile_size=(256, 256), stride=256)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "where stride is the parameter defining how many pixels must be crossed in order to take the second tile, and tile size is the size of each tile (patches). Tile size and stride are here defined as (256, 256) and 256, respectively. As a result, we move 256 pixels to the right to take 256x256 patches. This will result in \"non-overlapping tiles\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tiled_image = tiler.tile(image).type(torch.uint8)\n", + "tiled_mask = tiler.tile(mask).type(torch.bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(tiled_image.shape, tiled_mask.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ToPILImage()(make_grid(tiled_image, nrow=4, pad=5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The image is tiled into non-overlapping patches as seen in the figure above. We have 16 patches in total because the original input size is 1024x1024 and the tile and stride sizes are (256, 256) and 25, respectively. As we can see, the fourteenth tile is the one that contains the anomalous region. We can visualize it by running the following:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "overlayed_tile = draw_segmentation_masks(tiled_image[13], tiled_mask[13], alpha=0.2, colors=[\"red\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ToPILImage()(overlayed_tile)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We could also visualize the resized image and the tiled region by stacking them together. This would show how tiling could preserve the data that resizing operations would otherwise lose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ToPILImage()(torch.cat([resized_overlayed_image, overlayed_tile], dim=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Overall, tiling could be a useful feature when the input image size is large and the anomalous region size is small. Tiling the input could avoid information loss that would otherwise occur with resizing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tiling in Anomalib Training\n", + "This section demonstrates how tiling could be enabled in anomalib training. The dataset section in the configuration files needs to be changed in order to enable tiling in anomalib. The dataset contains a tiling section where tiling parameters are set. Below is an illustration of a tiling configuration;\n", + "\n", + "```yaml\n", + " tiling:\n", + " apply: false\n", + " tile_size: null\n", + " stride: null\n", + " remove_border_count: 0\n", + " use_random_tiling: False\n", + " random_tile_count: 16\n", + "```\n", + "For example, to train a dataset with tiling based on 256x256 non-overlapping patches, the following data configuration could be used:\n", + "\n", + "```yaml\n", + " tiling:\n", + " apply: true\n", + " tile_size: 256\n", + " stride: 256\n", + " remove_border_count: 0\n", + " use_random_tiling: False\n", + " random_tile_count: 16\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "depending on the use-case ,these tiling configurations could potentially improve the performance of the anomaly detection models." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.13 ('anomalib')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "f26beec5b578f06009232863ae217b956681fd13da2e828fa5a0ecf8cf2ccd29" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 272daf8fe0281b165eb59ccad8e6c8b12ed006d5 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Fri, 18 Nov 2022 11:46:40 +0000 Subject: [PATCH 2/2] Address the PR comments --- notebooks/100_datamodules/104_tiling.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/notebooks/100_datamodules/104_tiling.ipynb b/notebooks/100_datamodules/104_tiling.ipynb index c1ae323c0d..bf4760db7e 100644 --- a/notebooks/100_datamodules/104_tiling.ipynb +++ b/notebooks/100_datamodules/104_tiling.ipynb @@ -7,7 +7,7 @@ "# Tiling Large Images\n", "In the literature on deep learning, input image sizes typically range from 224 to 768 pixels. In the majority of industrial applications, however, input image sizes are significantly larger. Before the forward pass, these images are resized to a smaller scale before being fed into the models. However, this is problematic in the context of anomaly detection tasks, where the anomaly is typically quite small. The detection of abnormalities becomes extremely difficult when the image is shrunk in size. A common method for addressing this issue is to tile the input images so that no information is lost during the resizing operation.\n", "\n", - "This notebook demonstrates how tiling can be done in anomalib." + "This notebook demonstrates how tiling works in anomalib, and how tiling can be enabled when training a model using anomalib." ] }, { @@ -87,7 +87,7 @@ "The figures above demonstrates how resizing affect the input image size. The original image is 1024x1024. The resized image is 256x256. The anomaly is now much smaller and is difficult to detect.\n", "\n", "### Effect of Tiling \n", - "To avoid losing information with resizing we could instead tile the image into multiple patches. We could utilize anomalib's `Tiler` object to do this. The `Tiler` object takes in the input image and the desired output size. It then tiles the image into patches of the desired size. The `Tiler` object also returns the coordinates of the patches in the original image. This is useful for reconstructing the tiled image back into the original image. Let's create a tiler object, tile the input iamge and visualize t tiled image." + "To avoid losing information with resizing we could instead tile the image into multiple patches. We could utilize anomalib's `Tiler` object to do this. The `Tiler` object takes in the input image and the desired output size. It then tiles the image into patches of the desired size. The `Tiler` object also returns the coordinates of the patches in the original image. This is useful for reconstructing the tiled image back into the original image. Let's create a tiler object, tile the input image and visualize the tiled image." ] }, { @@ -103,7 +103,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "where stride is the parameter defining how many pixels must be crossed in order to take the second tile, and tile size is the size of each tile (patches). Tile size and stride are here defined as (256, 256) and 256, respectively. As a result, we move 256 pixels to the right to take 256x256 patches. This will result in \"non-overlapping tiles\"." + "where stride is the parameter defining the spatial distance in pixels between adjacent tiles, and tile size is the size of each tile (patches). Tile size and stride are here defined as (256, 256) and 256, respectively. As a result, we move 256 pixels to the right to take 256x256 patches. This will result in \"non-overlapping tiles\"." ] }, { @@ -221,7 +221,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('anomalib')", + "display_name": "Python 3.8.10", "language": "python", "name": "python3" }, @@ -235,12 +235,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.10" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "f26beec5b578f06009232863ae217b956681fd13da2e828fa5a0ecf8cf2ccd29" + "hash": "ec7b890a7ad0a53be38594f25f679d3f514fc4eb4271d1db0284d66c33facb3f" } } },