From 0c7444e5552241df0fbda8e41cbc89874b5ec604 Mon Sep 17 00:00:00 2001
From: Samet Akcay <samet.akcay@intel.com>
Date: Thu, 3 Aug 2023 20:36:35 +0100
Subject: [PATCH 1/6] Configure readthedocs via `.readthedocs.yaml` file
 (#1229)

---
 .readthedocs.yaml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000000..f0e299c8e3
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,32 @@
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats:
+  - pdf
+  - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+    - requirements: requirements/docs.txt

From 784767fc2f19a8f354f152aba7f4338cb628118c Mon Sep 17 00:00:00 2001
From: Ashwin Vaidya <ashwin.vaidya@intel.com>
Date: Fri, 4 Aug 2023 17:24:51 +0200
Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=9A=9A=20Refactor=20Benchmarking=20Sc?=
 =?UTF-8?q?ript=20(#1216)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* New printing stuff

* Remove dead code + address codacy issues

* Refactor try/except + log to comet/wandb during runs

* pre-commit error

* third-party configuration

---------

Co-authored-by: Ashwin Vaidya <ashwinitinvaidya@gmail.com>
---
 pyproject.toml                       |   3 +
 requirements/loggers.txt             |   2 +-
 src/anomalib/utils/sweep/__init__.py |   8 +-
 src/anomalib/utils/sweep/utils.py    | 105 ++++++
 tools/benchmarking/benchmark.py      | 469 ++++++++++++++-------------
 tools/benchmarking/utils/__init__.py |   4 +-
 tools/benchmarking/utils/metrics.py  | 123 ++++---
 7 files changed, 425 insertions(+), 289 deletions(-)
 create mode 100644 src/anomalib/utils/sweep/utils.py

diff --git a/pyproject.toml b/pyproject.toml
index d25e846269..d896789bba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,6 +58,9 @@ target-version = "py38"
 # Allow imports relative to the "src" and "tests" directories.
 src = ["src", "tests"]
 
+[tool.ruff.isort]
+known-third-party = ["wandb", "comet_ml"]
+
 [tool.ruff.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 10
diff --git a/requirements/loggers.txt b/requirements/loggers.txt
index f6a4157d0c..c84a876f06 100644
--- a/requirements/loggers.txt
+++ b/requirements/loggers.txt
@@ -1,6 +1,6 @@
 comet-ml>=3.31.7
 gradio>=2.9.4
 tensorboard
-wandb==0.12.17
+wandb>=0.13.0
 GitPython
 ipykernel
diff --git a/src/anomalib/utils/sweep/__init__.py b/src/anomalib/utils/sweep/__init__.py
index 36493a3988..a32a24e50b 100644
--- a/src/anomalib/utils/sweep/__init__.py
+++ b/src/anomalib/utils/sweep/__init__.py
@@ -5,12 +5,16 @@
 
 from .config import flatten_sweep_params, get_run_config, set_in_nested_config
 from .helpers import get_openvino_throughput, get_sweep_callbacks, get_torch_throughput
+from .utils import Status, exception_wrapper, redirect_output
 
 __all__ = [
+    "exception_wrapper",
+    "flatten_sweep_params",
     "get_run_config",
-    "set_in_nested_config",
     "get_sweep_callbacks",
     "get_openvino_throughput",
     "get_torch_throughput",
-    "flatten_sweep_params",
+    "set_in_nested_config",
+    "redirect_output",
+    "Status",
 ]
diff --git a/src/anomalib/utils/sweep/utils.py b/src/anomalib/utils/sweep/utils.py
new file mode 100644
index 0000000000..56a0bb9ccc
--- /dev/null
+++ b/src/anomalib/utils/sweep/utils.py
@@ -0,0 +1,105 @@
+"""Additional utils for sweep."""
+
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import functools
+import io
+import logging
+import sys
+from enum import Enum
+from typing import Any, Callable
+
+logger = logging.getLogger(__name__)
+
+
+def redirect_output(func: Callable) -> Callable[..., dict[str, Any]]:
+    """Decorator to redirect output of the function.
+
+    Args:
+        func (function): Hides output of this function.
+
+    Raises:
+        Exception: Incase the execution of function fails, it raises an exception.
+
+    Returns:
+        object of the called function
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> dict[str, Any]:
+        std_out = sys.stdout
+        sys.stdout = buf = io.StringIO()
+        try:
+            value = func(*args, **kwargs)
+            logger.info(buf.getvalue())
+            logger.info(value)
+        except Exception as exp:
+            logger.exception(
+                "Error occurred while computing benchmark %s. Buffer: %s." "\n Method %s, args %s, kwargs %s",
+                exp,
+                buf.getvalue(),
+                func,
+                args,
+                kwargs,
+            )
+            value = {}
+        sys.stdout = std_out
+        return value
+
+    return wrapper
+
+
+class Status(str, Enum):
+    """Status of the benchmarking run."""
+
+    SUCCESS = "success"
+    FAILED = "failed"
+
+
+class Result:
+    def __init__(self, value: Any, status=Status.SUCCESS):
+        self.value = value
+        self.status = status
+
+    def __bool__(self):
+        return self.status == Status.SUCCESS
+
+
+def exception_wrapper(func: Callable) -> Callable[..., Result]:
+    """Wrapper method to handle exceptions.
+
+    Args:
+        func (function): Function to be wrapped.
+
+    Raises:
+        Exception: Incase the execution of function fails, it raises an exception.
+
+    Example:
+        >>> @exception_wrapper
+        ... def func():
+        ...     raise Exception("Exception occurred")
+        >>> func()
+        Exception: Exception occurred
+
+    Returns:
+        object of the called function
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> Result:
+        try:
+            value = Result(value=func(*args, **kwargs))
+        except Exception as exp:
+            logger.exception(
+                "Error occurred while computing benchmark %s. Method %s, args %s, kwargs %s",
+                exp,
+                func,
+                args,
+                kwargs,
+            )
+            value = Result(False, Status.FAILED)
+        return value
+
+    return wrapper
diff --git a/tools/benchmarking/benchmark.py b/tools/benchmarking/benchmark.py
index d55f6c9eaf..775232c9bd 100644
--- a/tools/benchmarking/benchmark.py
+++ b/tools/benchmarking/benchmark.py
@@ -1,87 +1,84 @@
 """Benchmark all the algorithms in the repo."""
 
-# Copyright (C) 2022 Intel Corporation
+# Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
 
-import functools
-import io
 import logging
+from pathlib import Path
+
+# E402 Module level import not at top of file. Disabled as we need to redirect all outputs during the runs.
+# ruff: noqa: E402
+# pylint: disable=wrong-import-position
+
+# File cannot be unique because if we create a unique name based on time,
+# each process will create a new file
+log_file = "runs/benchmark.log"
+Path(log_file).parent.mkdir(exist_ok=True, parents=True)
+logger_file_handler = logging.FileHandler(log_file)
+logger_file_handler.setLevel(logging.INFO)
+
+# Redirect warnings and logs to file that are generated while importing
+logging.captureWarnings(True)
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+logger.addHandler(logger_file_handler)
+
+# End of warnings capture | Rest of the imports follow
+
 import math
 import multiprocessing
-import sys
 import time
-import warnings
 from argparse import ArgumentParser
-from concurrent.futures import ProcessPoolExecutor, as_completed
+from concurrent.futures import ProcessPoolExecutor
 from datetime import datetime
-from pathlib import Path
+from multiprocessing.managers import DictProxy
 from tempfile import TemporaryDirectory
-from typing import cast
+from typing import Any, cast
 
 import torch
+import wandb
 from omegaconf import DictConfig, ListConfig, OmegaConf
 from pytorch_lightning import Trainer, seed_everything
-from utils import upload_to_comet, upload_to_wandb, write_metrics
+from rich import print  # pylint: disable=W0622 | disable redefine print warning
+from rich.console import Console
+from rich.progress import Progress, TaskID
+from rich.table import Table
+from utils import write_metrics
 
 from anomalib.config import get_configurable_parameters, update_input_size_config
 from anomalib.data import get_datamodule
 from anomalib.deploy import export
 from anomalib.deploy.export import ExportMode
 from anomalib.models import get_model
-from anomalib.utils.loggers import configure_logger
 from anomalib.utils.sweep import (
-    get_openvino_throughput,
+    exception_wrapper,
     get_run_config,
     get_sweep_callbacks,
-    get_torch_throughput,
+    redirect_output,
     set_in_nested_config,
 )
 
-warnings.filterwarnings("ignore")
-
-logger = logging.getLogger(__name__)
-configure_logger()
-pl_logger = logging.getLogger(__file__)
-for logger_name in ["pytorch_lightning", "torchmetrics", "os"]:
-    logging.getLogger(logger_name).setLevel(logging.ERROR)
-
+# TODO add torch and openvino throughputs.
 
-def hide_output(func):
-    """Decorator to hide output of the function.
+# Redirect future warnings and logs to file from all the imports
+for name in logging.root.manager.loggerDict:
+    for filter_keys in ["lightning", "anomalib", "wandb", "comet", "py.warnings"]:
+        if filter_keys in name:
+            _logger = logging.getLogger(name)
+            _logger.setLevel(logging.WARNING)
+            _logger.handlers = []
+            _logger.addHandler(logger_file_handler)
 
-    Args:
-        func (function): Hides output of this function.
-
-    Raises:
-        Exception: Incase the execution of function fails, it raises an exception.
-
-    Returns:
-        object of the called function
-    """
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        std_out = sys.stdout
-        sys.stdout = buf = io.StringIO()
-        try:
-            value = func(*args, **kwargs)
-        except Exception as exp:
-            raise Exception(buf.getvalue()) from exp
-        sys.stdout = std_out
-        return value
 
-    return wrapper
-
-
-@hide_output
-def get_single_model_metrics(model_config: DictConfig | ListConfig, openvino_metrics: bool = False) -> dict:
+@redirect_output
+def get_single_model_metrics(model_config: DictConfig | ListConfig) -> dict[str, Any]:
     """Collects metrics for `model_name` and returns a dict of results.
 
     Args:
         model_config (DictConfig, ListConfig): Configuration for run
-        openvino_metrics (bool): If True, converts the model to OpenVINO format and gathers inference metrics.
 
     Returns:
         dict: Collection of all the metrics such as time taken, throughput and performance scores.
@@ -89,6 +86,8 @@ def get_single_model_metrics(model_config: DictConfig | ListConfig, openvino_met
 
     with TemporaryDirectory() as project_path:
         model_config.project.path = project_path
+        model_config.trainer.enable_progress_bar = False
+        model_config.trainer.enable_model_summary = False
         datamodule = get_datamodule(model_config)
         model = get_model(model_config)
 
@@ -121,210 +120,218 @@ def get_single_model_metrics(model_config: DictConfig | ListConfig, openvino_met
             export_root=project_path,
         )
 
-        throughput = get_torch_throughput(
-            model_path=project_path,
-            test_dataset=datamodule.test_dataloader().dataset,
-            device=model_config.trainer.accelerator,
-        )
-
-        # Get OpenVINO metrics
-        openvino_throughput = float("nan")
-        if openvino_metrics:
-            # Create dirs for openvino model export
-            export(
-                task=model_config.dataset.task,
-                transform=trainer.datamodule.test_data.transform.to_dict(),
-                input_size=model_config.model.input_size,
-                model=model,
-                export_mode=ExportMode.OPENVINO,
-                export_root=project_path,
-            )
-            openvino_throughput = get_openvino_throughput(model_path=project_path, test_dataset=datamodule.test_data)
-
         # arrange the data
         data = {
             "Training Time (s)": training_time,
             "Testing Time (s)": testing_time,
-            f"Inference Throughput {model_config.trainer.accelerator} (fps)": throughput,
-            "OpenVINO Inference Throughput (fps)": openvino_throughput,
         }
         for key, val in test_results[0].items():
             data[key] = float(val)
-
     return data
 
 
-def compute_on_cpu(sweep_config: DictConfig | ListConfig, folder: str | None = None):
-    """Compute all run configurations over a sigle CPU."""
-    for run_config in get_run_config(sweep_config.grid_search):
-        model_metrics = sweep(run_config, 0, sweep_config.seed, False)
-        write_metrics(model_metrics, sweep_config.writer, folder)
-
-
-def compute_on_gpu(
-    run_configs: list[DictConfig],
-    device: int,
-    seed: int,
-    writers: list[str],
-    folder: str | None = None,
-    compute_openvino: bool = False,
-):
-    """Go over each run config and collect the result.
+class Benchmark:
+    """Benchmarking runner
 
     Args:
-        run_configs (DictConfig | ListConfig): List of run configurations.
-        device (int): The GPU id used for running the sweep.
-        seed (int): Fix a seed.
-        writers (list[str]): Destinations to write to.
-        folder (optional, str): Sub-directory to which runs are written to. Defaults to None. If none writes to root.
-        compute_openvino (bool, optional): Compute OpenVINO throughput. Defaults to False.
+        config: (DictConfig | ListConfig): Sweep configuration.
+        n_gpus: (int): Number of devices to run the benchmark on. If n_gpus is 0, benchmarking is run on all available
+         GPUs.
     """
-    for run_config in run_configs:
-        if isinstance(run_config, (DictConfig, ListConfig)):
-            model_metrics = sweep(run_config, device, seed, compute_openvino)
-            write_metrics(model_metrics, writers, folder)
-        else:
-            raise ValueError(
-                f"Expecting `run_config` of type DictConfig or ListConfig. Got {type(run_config)} instead."
-            )
 
-
-def distribute_over_gpus(sweep_config: DictConfig | ListConfig, folder: str | None = None):
-    """Distribute metric collection over all available GPUs. This is done by splitting the list of configurations."""
-    with ProcessPoolExecutor(
-        max_workers=torch.cuda.device_count(), mp_context=multiprocessing.get_context("spawn")
-    ) as executor:
-        run_configs = list(get_run_config(sweep_config.grid_search))
+    def __init__(self, config: DictConfig | ListConfig, n_gpus: int = 0):
+        self.config = config
+        self.n_gpus = min(n_gpus, torch.cuda.device_count()) if n_gpus > 0 else torch.cuda.device_count()
+        self.runs_folder = f"runs/{datetime.strftime(datetime.now(), '%Y_%m_%d-%H_%M_%S')}"
+        Path(self.runs_folder).mkdir(exist_ok=True, parents=True)
+        self.run_failures: bool = False
+
+    @exception_wrapper
+    def _sweep(self, device: int, run_config: DictConfig, seed: int = 42) -> dict[str, Any]:
+        """Run a single sweep on a device."""
+        seed_everything(seed, workers=True)
+        # This assumes that `model_name` is always present in the sweep config.
+        model_config = get_configurable_parameters(model_name=run_config.model_name)
+        model_config.project.seed = seed
+        model_config = cast(DictConfig, model_config)  # placate mypy
+        for param in run_config.keys():
+            # grid search keys are always assumed to be strings
+            param = cast(str, param)  # placate mypy
+            set_in_nested_config(model_config, param.split("."), run_config[param])  # type: ignore
+
+        # convert image size to tuple in case it was updated by run config
+        model_config = update_input_size_config(model_config)
+
+        # Set device in config. 0 - cpu, [0], [1].. - gpu id
+        if device != 0:
+            model_config.trainer.devices = [device - 1]
+            model_config.trainer.accelerator = "gpu"
+
+        # Remove legacy flags
+        for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
+            if legacy_device in model_config.trainer:
+                model_config.trainer[legacy_device] = None
+
+        # Run benchmarking for current config
+        model_metrics: dict[str, Any] = get_single_model_metrics(model_config=model_config)
+        output = f"One sweep run complete for model {model_config.model.name}"
+        output += f" On category {model_config.dataset.category}" if model_config.dataset.category is not None else ""
+        output += str(model_metrics)
+        logger.info(output)
+
+        # Append configuration of current run to the collected metrics
+        for key, value in run_config.items():
+            # Skip adding model name to the dataframe
+            if key != "model_name":
+                model_metrics[key] = value
+
+        # Add device name to list
+        model_metrics["device"] = "gpu"
+        model_metrics["model_name"] = run_config.model_name
+
+        return model_metrics
+
+    @exception_wrapper
+    def _compute(
+        self, progress: DictProxy, task_id: TaskID, device: int, run_configs: list[DictConfig]
+    ) -> dict[str, list[str]]:
+        """Iterate over configurations and compute & write metrics for single configuration.
+
+        Args:
+            progress (DictProxy): Shared dict to write progress status for displaying in terminal.
+            task_id (TaskID): Task id for the current process. Used to identify the progress bar.
+            device (int): GPU id on which the benchmarking is run.
+            run_configs (list[DictConfig]): List of run configurations.
+
+        Returns:
+            dict[str, list[str]]: Dictionary containing the metrics gathered from the sweep.
+        """
+        result = []
+        for idx, config in enumerate(run_configs):
+            output = self._sweep(device, config)
+            if output:
+                write_metrics(output.value, self.config.writer, self.runs_folder)
+                result.append(output.value)
+            else:
+                self.run_failures = True
+
+            progress[str(task_id)] = {"completed": idx + 1, "total": len(run_configs)}
+        # convert list of dicts to dict of lists
+        return {key: [dic[key] for dic in result] for key in result[0]}
+
+    @exception_wrapper
+    def _distribute(self):
+        run_configs = list(get_run_config(self.config.grid_search))
+        step_size = math.ceil(len(run_configs) / self.n_gpus)
         jobs = []
-        for device_id, run_split in enumerate(
-            range(0, len(run_configs), math.ceil(len(run_configs) / torch.cuda.device_count()))
-        ):
-            jobs.append(
-                executor.submit(
-                    compute_on_gpu,
-                    run_configs[run_split : run_split + math.ceil(len(run_configs) / torch.cuda.device_count())],
-                    device_id + 1,
-                    sweep_config.seed,
-                    sweep_config.writer,
-                    folder,
-                    sweep_config.compute_openvino,
-                )
+        results: list[dict[str, list[str]]] = []
+        with Progress() as progress:
+            overall_progress_task = progress.add_task("[green]Overall Progress")
+            with multiprocessing.Manager() as manager:
+                _progress = manager.dict()
+
+                with ProcessPoolExecutor(
+                    max_workers=self.n_gpus, mp_context=multiprocessing.get_context("spawn")
+                ) as executor:
+                    for device_id, run_split in enumerate(range(0, len(run_configs), step_size)):
+                        task_id = progress.add_task(f"Running benchmark on GPU {device_id}")
+                        _progress[str(task_id)] = {"completed": 0, "total": step_size}
+                        jobs.append(
+                            executor.submit(
+                                self._compute,
+                                _progress,
+                                task_id,
+                                device_id,
+                                run_configs[run_split : run_split + step_size],
+                            )
+                        )
+
+                    # monitor the progress:
+                    while (sum([job.done() for job in jobs])) < len(jobs):
+                        progress.update(
+                            overall_progress_task,
+                            completed=sum([task["completed"] for task in _progress.values()]),
+                            total=len(run_configs),
+                        )
+                        for task_id, params in _progress.items():
+                            progress.update(TaskID(int(task_id)), completed=params["completed"], total=params["total"])
+
+                    for job in jobs:
+                        _result = job.result()
+                        if _result:
+                            results.append(_result.value)
+                        else:
+                            self.run_failures = True
+
+                    progress.update(overall_progress_task, completed=len(run_configs), total=len(run_configs))
+        result = self._gather_results(results)
+        if result:
+            self._print_results(result.value)
+        else:
+            self.run_failures = True
+
+    @exception_wrapper
+    def _gather_results(self, results: list[dict[str, list[str]]]) -> dict:
+        """Gather results from all processes.
+
+        Args:
+            results (dict): Dictionary containing the results from all processes.
+
+        Returns:
+            dict: Dictionary containing the results from all processes.
+        """
+        result: dict[str, list] = {key: [] for key in results[0].keys()}
+        for _result in results:
+            for key, value in _result.items():
+                result[key].extend(value)
+        return result
+
+    @exception_wrapper
+    def _print_results(self, result: dict) -> None:
+        """Print the results in a tabular format.
+
+        Args:
+            result (dict): Dictionary containing the results from all processes.
+        """
+        console = Console()
+        table = Table(title="Benchmarking Results", show_header=True, header_style="bold magenta")
+        for column in result.keys():
+            table.add_column(column)
+        for row in [*zip(*result.values())]:
+            table.add_row(*[str(value) for value in row])
+        console.print(table)
+
+    def run(self):
+        """Run the benchmarking."""
+        logger.info(
+            "\n%s\n" "Starting benchmarking. %s" "\nDistributing benchmark collection over %s GPUs.",
+            "-" * 120,
+            datetime.strftime(datetime.now(), "%Y %m %d-%H %M %S"),
+            self.n_gpus,
+        )
+        if not torch.cuda.is_available():
+            logger.warning("Could not detect any cuda enabled devices")
+
+        self._distribute()
+        if self.run_failures:
+            print(
+                "[bold red]There were some errors while collecting benchmark[/bold red]"
+                "\nPlease check the log file [magenta]runs/benchmark.log[/magenta]"
+                " for more details."
             )
-        for job in jobs:
-            try:
-                job.result()
-            except Exception as exc:
-                raise Exception(f"Error occurred while computing benchmark on GPU {job}") from exc
-
-
-def distribute(config: DictConfig | ListConfig):
-    """Run all cpu experiments on a single process. Distribute gpu experiments over all available gpus.
-
-    Args:
-        config: (DictConfig | ListConfig): Sweep configuration.
-    """
-
-    runs_folder = datetime.strftime(datetime.now(), "%Y_%m_%d-%H_%M_%S")
-    devices = config.hardware
-    if not torch.cuda.is_available() and "gpu" in devices:
-        pl_logger.warning("Config requested GPU benchmarking but torch could not detect any cuda enabled devices")
-    elif {"cpu", "gpu"}.issubset(devices):
-        # Create process for gpu and cpu
-        with ProcessPoolExecutor(max_workers=2, mp_context=multiprocessing.get_context("spawn")) as executor:
-            jobs = [
-                executor.submit(compute_on_cpu, config, runs_folder),
-                executor.submit(distribute_over_gpus, config, runs_folder),
-            ]
-            for job in as_completed(jobs):
-                try:
-                    job.result()
-                except Exception as exception:
-                    raise Exception(f"Error occurred while computing benchmark on device {job}") from exception
-    elif "cpu" in devices:
-        compute_on_cpu(config, folder=runs_folder)
-    elif "gpu" in devices:
-        distribute_over_gpus(config, folder=runs_folder)
-    if "wandb" in config.writer:
-        upload_to_wandb(team="anomalib", folder=runs_folder)
-    if "comet" in config.writer:
-        upload_to_comet(folder=runs_folder)
-
-
-def sweep(
-    run_config: DictConfig | ListConfig, device: int = 0, seed: int = 42, convert_openvino: bool = False
-) -> dict[str, str | float]:
-    """Go over all the values mentioned in `grid_search` parameter of the benchmarking config.
-
-    Args:
-        run_config: (DictConfig | ListConfig, optional): Configuration for current run.
-        device (int, optional): Name of the device on which the model is trained. Defaults to 0 "cpu".
-        convert_openvino (bool, optional): Whether to convert the model to openvino format. Defaults to False.
-
-    Returns:
-        dict[str, str | float]: Dictionary containing the metrics gathered from the sweep.
-    """
-    seed_everything(seed, workers=True)
-    # This assumes that `model_name` is always present in the sweep config.
-    model_config = get_configurable_parameters(model_name=run_config.model_name)
-    model_config.project.seed = seed
-
-    model_config = cast(DictConfig, model_config)  # placate mypy
-    for param in run_config.keys():
-        # grid search keys are always assumed to be strings
-        param = cast(str, param)  # placate mypy
-        set_in_nested_config(model_config, param.split("."), run_config[param])  # type: ignore
-
-    # convert image size to tuple in case it was updated by run config
-    model_config = update_input_size_config(model_config)
-
-    # Set device in config. 0 - cpu, [0], [1].. - gpu id
-    if device != 0:
-        model_config.trainer.devices = [device - 1]
-        model_config.trainer.accelerator = "gpu"
-    else:
-        model_config.trainer.accelerator = "cpu"
-
-    # Remove legacy flags
-    for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
-        if legacy_device in model_config.trainer:
-            model_config.trainer[legacy_device] = None
-
-    if run_config.model_name in ["patchcore", "cflow"]:
-        convert_openvino = False  # `torch.cdist` is not supported by onnx version 11
-        # TODO Remove this line when issue #40 is fixed https://github.com/openvinotoolkit/anomalib/issues/40
-        if model_config.model.input_size != (224, 224):
-            return {}  # go to next run
-
-    # Run benchmarking for current config
-    model_metrics = get_single_model_metrics(model_config=model_config, openvino_metrics=convert_openvino)
-    output = f"One sweep run complete for model {model_config.model.name}"
-    output += f" On category {model_config.dataset.category}" if model_config.dataset.category is not None else ""
-    output += str(model_metrics)
-    logger.info(output)
-
-    # Append configuration of current run to the collected metrics
-    for key, value in run_config.items():
-        # Skip adding model name to the dataframe
-        if key != "model_name":
-            model_metrics[key] = value
-
-    # Add device name to list
-    model_metrics["device"] = "gpu" if device > 0 else "cpu"
-    model_metrics["model_name"] = run_config.model_name
-
-    return model_metrics
+        logger.info("Benchmarking complete \n%s", "-" * 120)
 
 
 if __name__ == "__main__":
-    # Benchmarking entry point.
-    # Spawn multiple processes one for cpu and rest for the number of gpus available in the system.
-    # The idea is to distribute metrics collection over all the available devices.
-
     parser = ArgumentParser()
     parser.add_argument("--config", type=Path, help="Path to sweep configuration")
     _args = parser.parse_args()
 
-    print("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
+    print("[royal_blue1]Benchmarking started. This will take a while depending on your configuration.[/royal_blue1]")
+
     _sweep_config = OmegaConf.load(_args.config)
-    distribute(_sweep_config)
-    print("Finished gathering results ⚡")
+    if "wandb" in _sweep_config.writer:
+        wandb.setup()  # this is required when using multiprocessing otherwise wandb hangs
+    runner = Benchmark(_sweep_config, n_gpus=0)
+    runner.run()
+    print("[royal_blue1]Finished gathering results[/royal_blue1] ⚡")
diff --git a/tools/benchmarking/utils/__init__.py b/tools/benchmarking/utils/__init__.py
index b9eebfed78..3b5007afcc 100644
--- a/tools/benchmarking/utils/__init__.py
+++ b/tools/benchmarking/utils/__init__.py
@@ -3,6 +3,6 @@
 # Copyright (C) 2022 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from .metrics import upload_to_comet, upload_to_wandb, write_metrics
+from .metrics import write_metrics, write_to_comet, write_to_wandb
 
-__all__ = ["write_metrics", "upload_to_comet", "upload_to_wandb"]
+__all__ = ["write_metrics", "write_to_comet", "write_to_wandb"]
diff --git a/tools/benchmarking/utils/metrics.py b/tools/benchmarking/utils/metrics.py
index b1b7373d65..161c8c4ebb 100644
--- a/tools/benchmarking/utils/metrics.py
+++ b/tools/benchmarking/utils/metrics.py
@@ -5,9 +5,9 @@
 
 from __future__ import annotations
 
+import logging
 import random
 import string
-from glob import glob
 from pathlib import Path
 
 import pandas as pd
@@ -15,11 +15,13 @@
 from comet_ml import Experiment
 from torch.utils.tensorboard.writer import SummaryWriter
 
+logger = logging.getLogger(__name__)
+
 
 def write_metrics(
     model_metrics: dict[str, str | float],
     writers: list[str],
-    folder: str | None = None,
+    folder: str,
 ):
     """Writes metrics to destination provided in the sweep config.
 
@@ -32,22 +34,36 @@ def write_metrics(
     if model_metrics == {} or model_metrics is None:
         return
 
+    result_folder = Path(folder)
     # Write to CSV
-    metrics_df = pd.DataFrame(model_metrics, index=[0])
-    result_folder = Path("runs") if folder is None else Path(f"runs/{folder}")
-    result_path = result_folder / f"{model_metrics['model_name']}_{model_metrics['device']}.csv"
-    Path.mkdir(result_path.parent, parents=True, exist_ok=True)
-    if not result_path.is_file():
-        metrics_df.to_csv(result_path)
-    else:
-        metrics_df.to_csv(result_path, mode="a", header=False)
+    try:
+        metrics_df = pd.DataFrame(model_metrics, index=[0])
+        result_path = result_folder / f"{model_metrics['model_name']}_{model_metrics['device']}.csv"
+        Path.mkdir(result_path.parent, parents=True, exist_ok=True)
+        if not result_path.is_file():
+            metrics_df.to_csv(result_path)
+        else:
+            metrics_df.to_csv(result_path, mode="a", header=False)
+    except Exception as exception:
+        logger.exception(f"Could not write to csv. Exception: {exception}")
+
+    project_name = f"benchmarking_{result_folder.name}"
+    tags = []
+    for key, value in model_metrics.items():
+        if all(name not in key.lower() for name in ["time", "image", "pixel", "throughput"]):
+            tags.append(str(value))
 
     if "tensorboard" in writers:
-        write_to_tensorboard(model_metrics)
+        write_to_tensorboard(model_metrics, result_folder)
+    if "wandb" in writers:
+        write_to_wandb(model_metrics, project_name, tags)
+    if "comet" in writers:
+        write_to_comet(model_metrics, project_name, tags)
 
 
 def write_to_tensorboard(
     model_metrics: dict[str, str | float],
+    folder: Path,
 ):
     """Write model_metrics to tensorboard.
 
@@ -63,7 +79,7 @@ def write_to_tensorboard(
         else:
             string_metrics[key] = metric
             scalar_prefixes.append(metric)
-    writer = SummaryWriter(f"runs/{model_metrics['model_name']}_{model_metrics['device']}")
+    writer = SummaryWriter(folder / "tfevents")
     for key, metric in model_metrics.items():
         if isinstance(metric, (int, float, bool)):
             scalar_metrics[key.replace(".", "/")] = metric  # need to join by / for tensorboard grouping
@@ -90,56 +106,57 @@ def get_unique_key(str_len: int) -> str:
     return "".join([random.choice(string.ascii_lowercase) for _ in range(str_len)])  # nosec: B311
 
 
-def upload_to_wandb(
+def write_to_wandb(
+    model_metrics: dict[str, str | float],
+    project_name: str,
+    tags: list[str],
     team: str = "anomalib",
-    folder: str | None = None,
 ):
-    """Upload the data in csv files to wandb.
+    """Write model_metrics to wandb.
 
-    Creates a project named benchmarking_[two random characters]. This is so that the project names are unique.
-    One issue is that it does not check for collision
+    > _Note:_ It is observed that any failure in wandb causes the run to hang. Use wandb writer with caution.
 
     Args:
+        model_metrics (dict[str, str | float]): Dictionary containing collected results.
+        project_name (str): Name of the project on wandb.
+        tags (list[str]): List of tags for the run.
         team (str, optional): Name of the team on wandb. This can also be the id of your personal account.
-        Defaults to "anomalib".
-        folder (optional, str): Sub-directory from which runs are picked up. Defaults to None. If none picks from runs.
+            Defaults to "anomalib".
     """
-    project = f"benchmarking_{get_unique_key(2)}"
-    tag_list = ["dataset.category", "model_name", "dataset.image_size", "model.backbone", "device"]
-    search_path = "runs/*.csv" if folder is None else f"runs/{folder}/*.csv"
-    for csv_file in glob(search_path):
-        table = pd.read_csv(csv_file)
-        for index, row in table.iterrows():
-            row = dict(row[1:])  # remove index column
-            tags = [str(row[column]) for column in tag_list if column in row.keys()]
-            wandb.init(
-                entity=team, project=project, name=f"{row['model_name']}_{row['dataset.category']}_{index}", tags=tags
-            )
-            wandb.log(row)
-            wandb.finish()
-
-
-def upload_to_comet(
-    folder: str | None = None,
+    for key, value in model_metrics.items():
+        if all(name not in key.lower() for name in ["time", "image", "pixel", "throughput"]):
+            tags.append(str(value))
+    run = wandb.init(
+        entity=team,
+        project=project_name,
+        name=f"{'_'.join(tags)}",
+        tags=tags,
+        settings={"silent": True, "show_info": False, "show_warnings": False, "show_errors": False},
+    )
+    run.log(model_metrics)
+    logger.info(f"Run logged at {run.url}")
+    run.finish(quiet=True)
+
+
+def write_to_comet(
+    model_metrics: dict[str, str | float],
+    project_name: str,
+    tags: list[str],
+    team: str = "anomalib",
 ):
-    """Upload the data in csv files to comet.
+    """Write model_metrics to wandb.
 
-    Creates a project named benchmarking_[two random characters]. This is so that the project names are unique.
-    One issue is that it does not check for collision
 
     Args:
-        folder (optional, str): Sub-directory from which runs are picked up. Defaults to None. If none picks from runs.
+        model_metrics (dict[str, str | float]): Dictionary containing collected results.
+        project_name (str): Name of the project on comet.
+        tags (list[str]): List of tags for the run.
+        team (str, optional): Name of the team on wandb. This can also be the id of your personal account.
+            Defaults to "anomalib".
     """
-    project = f"benchmarking_{get_unique_key(2)}"
-    tag_list = ["dataset.category", "model_name", "dataset.image_size", "model.backbone", "device"]
-    search_path = "runs/*.csv" if folder is None else f"runs/{folder}/*.csv"
-    for csv_file in glob(search_path):
-        table = pd.read_csv(csv_file)
-        for index, row in table.iterrows():
-            row = dict(row[1:])  # remove index column
-            tags = [str(row[column]) for column in tag_list if column in row.keys()]
-            experiment = Experiment(project_name=project)
-            experiment.set_name(f"{row['model_name']}_{row['dataset.category']}_{index}")
-            experiment.log_metrics(row, step=1, epoch=1)  # populates auto-generated charts on panel view
-            experiment.add_tags(tags)
-            experiment.log_table(filename=csv_file)
+    experiment = Experiment(project_name=project_name, workspace=team)
+    experiment.set_name(f"{'_'.join(tags)}")
+    experiment.log_metrics(model_metrics, step=1, epoch=1)  # populates auto-generated charts on panel view
+    experiment.add_tags(tags)
+    logger.info(f"Run logged at {experiment.url}")
+    experiment.end()

From 5a46d03042b4564a2931000d0fa509a86afb08ee Mon Sep 17 00:00:00 2001
From: Samet Akcay <samet.akcay@intel.com>
Date: Sun, 6 Aug 2023 11:53:00 +0100
Subject: [PATCH 3/6] Update CODEOWNERS

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 5851b45e13..1ebeed9706 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -34,7 +34,7 @@
 /src/anomalib/models/cflow                                  @ashwinvaidya17
 /src/anomalib/models/csflow                                 @ashwinvaidya17
 /src/anomalib/models/dfkde                                  @djdameln
-/src/anomalib/models/dfm                                    @djdameln @nahuja-intel
+/src/anomalib/models/dfm                                    @djdameln
 /src/anomalib/models/draem                                  @djdameln
 /src/anomalib/models/fastflow                               @samet-akcay
 /src/anomalib/models/ganomaly                               @ashwinvaidya17

From 3ffc3f6001479d7471cfed7da9005e34605014eb Mon Sep 17 00:00:00 2001
From: Dick Ameln <dick.ameln@intel.com>
Date: Mon, 7 Aug 2023 16:33:19 +0200
Subject: [PATCH 4/6] Enable training with only normal images for MVTec (#1241)

* ignore mask check when dataset has only normal samples

* update changelog
---
 CHANGELOG.md               |  1 +
 src/anomalib/data/mvtec.py | 15 ++++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e5b17e8d51..7cb433ff24 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Enable training with only normal images for MVTecv in https://github.com/openvinotoolkit/anomalib/pull/1241
 - Improve default settings of EfficientAD
 
 ### Deprecated
diff --git a/src/anomalib/data/mvtec.py b/src/anomalib/data/mvtec.py
index 6a807aea57..e4aadfce0c 100644
--- a/src/anomalib/data/mvtec.py
+++ b/src/anomalib/data/mvtec.py
@@ -153,13 +153,14 @@ def make_mvtec_dataset(
     ] = mask_samples.image_path.values
 
     # assert that the right mask files are associated with the right test images
-    assert (
-        samples.loc[samples.label_index == LabelName.ABNORMAL]
-        .apply(lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, axis=1)
-        .all()
-    ), "Mismatch between anomalous images and ground truth masks. Make sure the mask files in 'ground_truth' \
-              folder follow the same naming convention as the anomalous images in the dataset (e.g. image: '000.png', \
-              mask: '000.png' or '000_mask.png')."
+    if len(samples.loc[samples.label_index == LabelName.ABNORMAL]):
+        assert (
+            samples.loc[samples.label_index == LabelName.ABNORMAL]
+            .apply(lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, axis=1)
+            .all()
+        ), "Mismatch between anomalous images and ground truth masks. Make sure the mask files in 'ground_truth' \
+                folder follow the same naming convention as the anomalous images in the dataset (e.g. image: \
+                '000.png', mask: '000.png' or '000_mask.png')."
 
     if split:
         samples = samples[samples.split == split].reset_index(drop=True)

From a62cd92e33d8cb9cdb004c249edbb01af5a04739 Mon Sep 17 00:00:00 2001
From: Samet Akcay <samet.akcay@intel.com>
Date: Mon, 7 Aug 2023 15:35:10 +0100
Subject: [PATCH 5/6] =?UTF-8?q?Revert=20"=F0=9F=9A=9A=20Refactor=20Benchma?=
 =?UTF-8?q?rking=20Script"=20(#1239)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "🚚 Refactor Benchmarking Script (#1216)"

This reverts commit 784767fc2f19a8f354f152aba7f4338cb628118c.
---
 pyproject.toml                       |   3 -
 requirements/loggers.txt             |   2 +-
 src/anomalib/utils/sweep/__init__.py |   8 +-
 src/anomalib/utils/sweep/utils.py    | 105 ------
 tools/benchmarking/benchmark.py      | 469 +++++++++++++--------------
 tools/benchmarking/utils/__init__.py |   4 +-
 tools/benchmarking/utils/metrics.py  | 123 +++----
 7 files changed, 289 insertions(+), 425 deletions(-)
 delete mode 100644 src/anomalib/utils/sweep/utils.py

diff --git a/pyproject.toml b/pyproject.toml
index d896789bba..d25e846269 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,9 +58,6 @@ target-version = "py38"
 # Allow imports relative to the "src" and "tests" directories.
 src = ["src", "tests"]
 
-[tool.ruff.isort]
-known-third-party = ["wandb", "comet_ml"]
-
 [tool.ruff.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 10
diff --git a/requirements/loggers.txt b/requirements/loggers.txt
index c84a876f06..f6a4157d0c 100644
--- a/requirements/loggers.txt
+++ b/requirements/loggers.txt
@@ -1,6 +1,6 @@
 comet-ml>=3.31.7
 gradio>=2.9.4
 tensorboard
-wandb>=0.13.0
+wandb==0.12.17
 GitPython
 ipykernel
diff --git a/src/anomalib/utils/sweep/__init__.py b/src/anomalib/utils/sweep/__init__.py
index a32a24e50b..36493a3988 100644
--- a/src/anomalib/utils/sweep/__init__.py
+++ b/src/anomalib/utils/sweep/__init__.py
@@ -5,16 +5,12 @@
 
 from .config import flatten_sweep_params, get_run_config, set_in_nested_config
 from .helpers import get_openvino_throughput, get_sweep_callbacks, get_torch_throughput
-from .utils import Status, exception_wrapper, redirect_output
 
 __all__ = [
-    "exception_wrapper",
-    "flatten_sweep_params",
     "get_run_config",
+    "set_in_nested_config",
     "get_sweep_callbacks",
     "get_openvino_throughput",
     "get_torch_throughput",
-    "set_in_nested_config",
-    "redirect_output",
-    "Status",
+    "flatten_sweep_params",
 ]
diff --git a/src/anomalib/utils/sweep/utils.py b/src/anomalib/utils/sweep/utils.py
deleted file mode 100644
index 56a0bb9ccc..0000000000
--- a/src/anomalib/utils/sweep/utils.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Additional utils for sweep."""
-
-# Copyright (C) 2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-import functools
-import io
-import logging
-import sys
-from enum import Enum
-from typing import Any, Callable
-
-logger = logging.getLogger(__name__)
-
-
-def redirect_output(func: Callable) -> Callable[..., dict[str, Any]]:
-    """Decorator to redirect output of the function.
-
-    Args:
-        func (function): Hides output of this function.
-
-    Raises:
-        Exception: Incase the execution of function fails, it raises an exception.
-
-    Returns:
-        object of the called function
-    """
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs) -> dict[str, Any]:
-        std_out = sys.stdout
-        sys.stdout = buf = io.StringIO()
-        try:
-            value = func(*args, **kwargs)
-            logger.info(buf.getvalue())
-            logger.info(value)
-        except Exception as exp:
-            logger.exception(
-                "Error occurred while computing benchmark %s. Buffer: %s." "\n Method %s, args %s, kwargs %s",
-                exp,
-                buf.getvalue(),
-                func,
-                args,
-                kwargs,
-            )
-            value = {}
-        sys.stdout = std_out
-        return value
-
-    return wrapper
-
-
-class Status(str, Enum):
-    """Status of the benchmarking run."""
-
-    SUCCESS = "success"
-    FAILED = "failed"
-
-
-class Result:
-    def __init__(self, value: Any, status=Status.SUCCESS):
-        self.value = value
-        self.status = status
-
-    def __bool__(self):
-        return self.status == Status.SUCCESS
-
-
-def exception_wrapper(func: Callable) -> Callable[..., Result]:
-    """Wrapper method to handle exceptions.
-
-    Args:
-        func (function): Function to be wrapped.
-
-    Raises:
-        Exception: Incase the execution of function fails, it raises an exception.
-
-    Example:
-        >>> @exception_wrapper
-        ... def func():
-        ...     raise Exception("Exception occurred")
-        >>> func()
-        Exception: Exception occurred
-
-    Returns:
-        object of the called function
-    """
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs) -> Result:
-        try:
-            value = Result(value=func(*args, **kwargs))
-        except Exception as exp:
-            logger.exception(
-                "Error occurred while computing benchmark %s. Method %s, args %s, kwargs %s",
-                exp,
-                func,
-                args,
-                kwargs,
-            )
-            value = Result(False, Status.FAILED)
-        return value
-
-    return wrapper
diff --git a/tools/benchmarking/benchmark.py b/tools/benchmarking/benchmark.py
index 775232c9bd..d55f6c9eaf 100644
--- a/tools/benchmarking/benchmark.py
+++ b/tools/benchmarking/benchmark.py
@@ -1,84 +1,87 @@
 """Benchmark all the algorithms in the repo."""
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2022 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
 
+import functools
+import io
 import logging
-from pathlib import Path
-
-# E402 Module level import not at top of file. Disabled as we need to redirect all outputs during the runs.
-# ruff: noqa: E402
-# pylint: disable=wrong-import-position
-
-# File cannot be unique because if we create a unique name based on time,
-# each process will create a new file
-log_file = "runs/benchmark.log"
-Path(log_file).parent.mkdir(exist_ok=True, parents=True)
-logger_file_handler = logging.FileHandler(log_file)
-logger_file_handler.setLevel(logging.INFO)
-
-# Redirect warnings and logs to file that are generated while importing
-logging.captureWarnings(True)
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-logger.addHandler(logger_file_handler)
-
-# End of warnings capture | Rest of the imports follow
-
 import math
 import multiprocessing
+import sys
 import time
+import warnings
 from argparse import ArgumentParser
-from concurrent.futures import ProcessPoolExecutor
+from concurrent.futures import ProcessPoolExecutor, as_completed
 from datetime import datetime
-from multiprocessing.managers import DictProxy
+from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Any, cast
+from typing import cast
 
 import torch
-import wandb
 from omegaconf import DictConfig, ListConfig, OmegaConf
 from pytorch_lightning import Trainer, seed_everything
-from rich import print  # pylint: disable=W0622 | disable redefine print warning
-from rich.console import Console
-from rich.progress import Progress, TaskID
-from rich.table import Table
-from utils import write_metrics
+from utils import upload_to_comet, upload_to_wandb, write_metrics
 
 from anomalib.config import get_configurable_parameters, update_input_size_config
 from anomalib.data import get_datamodule
 from anomalib.deploy import export
 from anomalib.deploy.export import ExportMode
 from anomalib.models import get_model
+from anomalib.utils.loggers import configure_logger
 from anomalib.utils.sweep import (
-    exception_wrapper,
+    get_openvino_throughput,
     get_run_config,
     get_sweep_callbacks,
-    redirect_output,
+    get_torch_throughput,
     set_in_nested_config,
 )
 
-# TODO add torch and openvino throughputs.
+warnings.filterwarnings("ignore")
+
+logger = logging.getLogger(__name__)
+configure_logger()
+pl_logger = logging.getLogger(__file__)
+for logger_name in ["pytorch_lightning", "torchmetrics", "os"]:
+    logging.getLogger(logger_name).setLevel(logging.ERROR)
+
 
-# Redirect future warnings and logs to file from all the imports
-for name in logging.root.manager.loggerDict:
-    for filter_keys in ["lightning", "anomalib", "wandb", "comet", "py.warnings"]:
-        if filter_keys in name:
-            _logger = logging.getLogger(name)
-            _logger.setLevel(logging.WARNING)
-            _logger.handlers = []
-            _logger.addHandler(logger_file_handler)
+def hide_output(func):
+    """Decorator to hide output of the function.
 
+    Args:
+        func (function): Hides output of this function.
+
+    Raises:
+        Exception: Incase the execution of function fails, it raises an exception.
+
+    Returns:
+        object of the called function
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        std_out = sys.stdout
+        sys.stdout = buf = io.StringIO()
+        try:
+            value = func(*args, **kwargs)
+        except Exception as exp:
+            raise Exception(buf.getvalue()) from exp
+        sys.stdout = std_out
+        return value
 
-@redirect_output
-def get_single_model_metrics(model_config: DictConfig | ListConfig) -> dict[str, Any]:
+    return wrapper
+
+
+@hide_output
+def get_single_model_metrics(model_config: DictConfig | ListConfig, openvino_metrics: bool = False) -> dict:
     """Collects metrics for `model_name` and returns a dict of results.
 
     Args:
         model_config (DictConfig, ListConfig): Configuration for run
+        openvino_metrics (bool): If True, converts the model to OpenVINO format and gathers inference metrics.
 
     Returns:
         dict: Collection of all the metrics such as time taken, throughput and performance scores.
@@ -86,8 +89,6 @@ def get_single_model_metrics(model_config: DictConfig | ListConfig) -> dict[str,
 
     with TemporaryDirectory() as project_path:
         model_config.project.path = project_path
-        model_config.trainer.enable_progress_bar = False
-        model_config.trainer.enable_model_summary = False
         datamodule = get_datamodule(model_config)
         model = get_model(model_config)
 
@@ -120,218 +121,210 @@ def get_single_model_metrics(model_config: DictConfig | ListConfig) -> dict[str,
             export_root=project_path,
         )
 
+        throughput = get_torch_throughput(
+            model_path=project_path,
+            test_dataset=datamodule.test_dataloader().dataset,
+            device=model_config.trainer.accelerator,
+        )
+
+        # Get OpenVINO metrics
+        openvino_throughput = float("nan")
+        if openvino_metrics:
+            # Create dirs for openvino model export
+            export(
+                task=model_config.dataset.task,
+                transform=trainer.datamodule.test_data.transform.to_dict(),
+                input_size=model_config.model.input_size,
+                model=model,
+                export_mode=ExportMode.OPENVINO,
+                export_root=project_path,
+            )
+            openvino_throughput = get_openvino_throughput(model_path=project_path, test_dataset=datamodule.test_data)
+
         # arrange the data
         data = {
             "Training Time (s)": training_time,
             "Testing Time (s)": testing_time,
+            f"Inference Throughput {model_config.trainer.accelerator} (fps)": throughput,
+            "OpenVINO Inference Throughput (fps)": openvino_throughput,
         }
         for key, val in test_results[0].items():
             data[key] = float(val)
+
     return data
 
 
-class Benchmark:
-    """Benchmarking runner
+def compute_on_cpu(sweep_config: DictConfig | ListConfig, folder: str | None = None):
+    """Compute all run configurations over a sigle CPU."""
+    for run_config in get_run_config(sweep_config.grid_search):
+        model_metrics = sweep(run_config, 0, sweep_config.seed, False)
+        write_metrics(model_metrics, sweep_config.writer, folder)
+
+
+def compute_on_gpu(
+    run_configs: list[DictConfig],
+    device: int,
+    seed: int,
+    writers: list[str],
+    folder: str | None = None,
+    compute_openvino: bool = False,
+):
+    """Go over each run config and collect the result.
 
     Args:
-        config: (DictConfig | ListConfig): Sweep configuration.
-        n_gpus: (int): Number of devices to run the benchmark on. If n_gpus is 0, benchmarking is run on all available
-         GPUs.
+        run_configs (DictConfig | ListConfig): List of run configurations.
+        device (int): The GPU id used for running the sweep.
+        seed (int): Fix a seed.
+        writers (list[str]): Destinations to write to.
+        folder (optional, str): Sub-directory to which runs are written to. Defaults to None. If none writes to root.
+        compute_openvino (bool, optional): Compute OpenVINO throughput. Defaults to False.
     """
+    for run_config in run_configs:
+        if isinstance(run_config, (DictConfig, ListConfig)):
+            model_metrics = sweep(run_config, device, seed, compute_openvino)
+            write_metrics(model_metrics, writers, folder)
+        else:
+            raise ValueError(
+                f"Expecting `run_config` of type DictConfig or ListConfig. Got {type(run_config)} instead."
+            )
 
-    def __init__(self, config: DictConfig | ListConfig, n_gpus: int = 0):
-        self.config = config
-        self.n_gpus = min(n_gpus, torch.cuda.device_count()) if n_gpus > 0 else torch.cuda.device_count()
-        self.runs_folder = f"runs/{datetime.strftime(datetime.now(), '%Y_%m_%d-%H_%M_%S')}"
-        Path(self.runs_folder).mkdir(exist_ok=True, parents=True)
-        self.run_failures: bool = False
-
-    @exception_wrapper
-    def _sweep(self, device: int, run_config: DictConfig, seed: int = 42) -> dict[str, Any]:
-        """Run a single sweep on a device."""
-        seed_everything(seed, workers=True)
-        # This assumes that `model_name` is always present in the sweep config.
-        model_config = get_configurable_parameters(model_name=run_config.model_name)
-        model_config.project.seed = seed
-        model_config = cast(DictConfig, model_config)  # placate mypy
-        for param in run_config.keys():
-            # grid search keys are always assumed to be strings
-            param = cast(str, param)  # placate mypy
-            set_in_nested_config(model_config, param.split("."), run_config[param])  # type: ignore
-
-        # convert image size to tuple in case it was updated by run config
-        model_config = update_input_size_config(model_config)
-
-        # Set device in config. 0 - cpu, [0], [1].. - gpu id
-        if device != 0:
-            model_config.trainer.devices = [device - 1]
-            model_config.trainer.accelerator = "gpu"
-
-        # Remove legacy flags
-        for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
-            if legacy_device in model_config.trainer:
-                model_config.trainer[legacy_device] = None
-
-        # Run benchmarking for current config
-        model_metrics: dict[str, Any] = get_single_model_metrics(model_config=model_config)
-        output = f"One sweep run complete for model {model_config.model.name}"
-        output += f" On category {model_config.dataset.category}" if model_config.dataset.category is not None else ""
-        output += str(model_metrics)
-        logger.info(output)
-
-        # Append configuration of current run to the collected metrics
-        for key, value in run_config.items():
-            # Skip adding model name to the dataframe
-            if key != "model_name":
-                model_metrics[key] = value
-
-        # Add device name to list
-        model_metrics["device"] = "gpu"
-        model_metrics["model_name"] = run_config.model_name
-
-        return model_metrics
-
-    @exception_wrapper
-    def _compute(
-        self, progress: DictProxy, task_id: TaskID, device: int, run_configs: list[DictConfig]
-    ) -> dict[str, list[str]]:
-        """Iterate over configurations and compute & write metrics for single configuration.
-
-        Args:
-            progress (DictProxy): Shared dict to write progress status for displaying in terminal.
-            task_id (TaskID): Task id for the current process. Used to identify the progress bar.
-            device (int): GPU id on which the benchmarking is run.
-            run_configs (list[DictConfig]): List of run configurations.
-
-        Returns:
-            dict[str, list[str]]: Dictionary containing the metrics gathered from the sweep.
-        """
-        result = []
-        for idx, config in enumerate(run_configs):
-            output = self._sweep(device, config)
-            if output:
-                write_metrics(output.value, self.config.writer, self.runs_folder)
-                result.append(output.value)
-            else:
-                self.run_failures = True
-
-            progress[str(task_id)] = {"completed": idx + 1, "total": len(run_configs)}
-        # convert list of dicts to dict of lists
-        return {key: [dic[key] for dic in result] for key in result[0]}
-
-    @exception_wrapper
-    def _distribute(self):
-        run_configs = list(get_run_config(self.config.grid_search))
-        step_size = math.ceil(len(run_configs) / self.n_gpus)
+
+def distribute_over_gpus(sweep_config: DictConfig | ListConfig, folder: str | None = None):
+    """Distribute metric collection over all available GPUs. This is done by splitting the list of configurations."""
+    with ProcessPoolExecutor(
+        max_workers=torch.cuda.device_count(), mp_context=multiprocessing.get_context("spawn")
+    ) as executor:
+        run_configs = list(get_run_config(sweep_config.grid_search))
         jobs = []
-        results: list[dict[str, list[str]]] = []
-        with Progress() as progress:
-            overall_progress_task = progress.add_task("[green]Overall Progress")
-            with multiprocessing.Manager() as manager:
-                _progress = manager.dict()
-
-                with ProcessPoolExecutor(
-                    max_workers=self.n_gpus, mp_context=multiprocessing.get_context("spawn")
-                ) as executor:
-                    for device_id, run_split in enumerate(range(0, len(run_configs), step_size)):
-                        task_id = progress.add_task(f"Running benchmark on GPU {device_id}")
-                        _progress[str(task_id)] = {"completed": 0, "total": step_size}
-                        jobs.append(
-                            executor.submit(
-                                self._compute,
-                                _progress,
-                                task_id,
-                                device_id,
-                                run_configs[run_split : run_split + step_size],
-                            )
-                        )
-
-                    # monitor the progress:
-                    while (sum([job.done() for job in jobs])) < len(jobs):
-                        progress.update(
-                            overall_progress_task,
-                            completed=sum([task["completed"] for task in _progress.values()]),
-                            total=len(run_configs),
-                        )
-                        for task_id, params in _progress.items():
-                            progress.update(TaskID(int(task_id)), completed=params["completed"], total=params["total"])
-
-                    for job in jobs:
-                        _result = job.result()
-                        if _result:
-                            results.append(_result.value)
-                        else:
-                            self.run_failures = True
-
-                    progress.update(overall_progress_task, completed=len(run_configs), total=len(run_configs))
-        result = self._gather_results(results)
-        if result:
-            self._print_results(result.value)
-        else:
-            self.run_failures = True
-
-    @exception_wrapper
-    def _gather_results(self, results: list[dict[str, list[str]]]) -> dict:
-        """Gather results from all processes.
-
-        Args:
-            results (dict): Dictionary containing the results from all processes.
-
-        Returns:
-            dict: Dictionary containing the results from all processes.
-        """
-        result: dict[str, list] = {key: [] for key in results[0].keys()}
-        for _result in results:
-            for key, value in _result.items():
-                result[key].extend(value)
-        return result
-
-    @exception_wrapper
-    def _print_results(self, result: dict) -> None:
-        """Print the results in a tabular format.
-
-        Args:
-            result (dict): Dictionary containing the results from all processes.
-        """
-        console = Console()
-        table = Table(title="Benchmarking Results", show_header=True, header_style="bold magenta")
-        for column in result.keys():
-            table.add_column(column)
-        for row in [*zip(*result.values())]:
-            table.add_row(*[str(value) for value in row])
-        console.print(table)
-
-    def run(self):
-        """Run the benchmarking."""
-        logger.info(
-            "\n%s\n" "Starting benchmarking. %s" "\nDistributing benchmark collection over %s GPUs.",
-            "-" * 120,
-            datetime.strftime(datetime.now(), "%Y %m %d-%H %M %S"),
-            self.n_gpus,
-        )
-        if not torch.cuda.is_available():
-            logger.warning("Could not detect any cuda enabled devices")
-
-        self._distribute()
-        if self.run_failures:
-            print(
-                "[bold red]There were some errors while collecting benchmark[/bold red]"
-                "\nPlease check the log file [magenta]runs/benchmark.log[/magenta]"
-                " for more details."
+        for device_id, run_split in enumerate(
+            range(0, len(run_configs), math.ceil(len(run_configs) / torch.cuda.device_count()))
+        ):
+            jobs.append(
+                executor.submit(
+                    compute_on_gpu,
+                    run_configs[run_split : run_split + math.ceil(len(run_configs) / torch.cuda.device_count())],
+                    device_id + 1,
+                    sweep_config.seed,
+                    sweep_config.writer,
+                    folder,
+                    sweep_config.compute_openvino,
+                )
             )
-        logger.info("Benchmarking complete \n%s", "-" * 120)
+        for job in jobs:
+            try:
+                job.result()
+            except Exception as exc:
+                raise Exception(f"Error occurred while computing benchmark on GPU {job}") from exc
+
+
+def distribute(config: DictConfig | ListConfig):
+    """Run all cpu experiments on a single process. Distribute gpu experiments over all available gpus.
+
+    Args:
+        config: (DictConfig | ListConfig): Sweep configuration.
+    """
+
+    runs_folder = datetime.strftime(datetime.now(), "%Y_%m_%d-%H_%M_%S")
+    devices = config.hardware
+    if not torch.cuda.is_available() and "gpu" in devices:
+        pl_logger.warning("Config requested GPU benchmarking but torch could not detect any cuda enabled devices")
+    elif {"cpu", "gpu"}.issubset(devices):
+        # Create process for gpu and cpu
+        with ProcessPoolExecutor(max_workers=2, mp_context=multiprocessing.get_context("spawn")) as executor:
+            jobs = [
+                executor.submit(compute_on_cpu, config, runs_folder),
+                executor.submit(distribute_over_gpus, config, runs_folder),
+            ]
+            for job in as_completed(jobs):
+                try:
+                    job.result()
+                except Exception as exception:
+                    raise Exception(f"Error occurred while computing benchmark on device {job}") from exception
+    elif "cpu" in devices:
+        compute_on_cpu(config, folder=runs_folder)
+    elif "gpu" in devices:
+        distribute_over_gpus(config, folder=runs_folder)
+    if "wandb" in config.writer:
+        upload_to_wandb(team="anomalib", folder=runs_folder)
+    if "comet" in config.writer:
+        upload_to_comet(folder=runs_folder)
+
+
+def sweep(
+    run_config: DictConfig | ListConfig, device: int = 0, seed: int = 42, convert_openvino: bool = False
+) -> dict[str, str | float]:
+    """Go over all the values mentioned in `grid_search` parameter of the benchmarking config.
+
+    Args:
+        run_config: (DictConfig | ListConfig, optional): Configuration for current run.
+        device (int, optional): Name of the device on which the model is trained. Defaults to 0 "cpu".
+        convert_openvino (bool, optional): Whether to convert the model to openvino format. Defaults to False.
+
+    Returns:
+        dict[str, str | float]: Dictionary containing the metrics gathered from the sweep.
+    """
+    seed_everything(seed, workers=True)
+    # This assumes that `model_name` is always present in the sweep config.
+    model_config = get_configurable_parameters(model_name=run_config.model_name)
+    model_config.project.seed = seed
+
+    model_config = cast(DictConfig, model_config)  # placate mypy
+    for param in run_config.keys():
+        # grid search keys are always assumed to be strings
+        param = cast(str, param)  # placate mypy
+        set_in_nested_config(model_config, param.split("."), run_config[param])  # type: ignore
+
+    # convert image size to tuple in case it was updated by run config
+    model_config = update_input_size_config(model_config)
+
+    # Set device in config. 0 - cpu, [0], [1].. - gpu id
+    if device != 0:
+        model_config.trainer.devices = [device - 1]
+        model_config.trainer.accelerator = "gpu"
+    else:
+        model_config.trainer.accelerator = "cpu"
+
+    # Remove legacy flags
+    for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
+        if legacy_device in model_config.trainer:
+            model_config.trainer[legacy_device] = None
+
+    if run_config.model_name in ["patchcore", "cflow"]:
+        convert_openvino = False  # `torch.cdist` is not supported by onnx version 11
+        # TODO Remove this line when issue #40 is fixed https://github.com/openvinotoolkit/anomalib/issues/40
+        if model_config.model.input_size != (224, 224):
+            return {}  # go to next run
+
+    # Run benchmarking for current config
+    model_metrics = get_single_model_metrics(model_config=model_config, openvino_metrics=convert_openvino)
+    output = f"One sweep run complete for model {model_config.model.name}"
+    output += f" On category {model_config.dataset.category}" if model_config.dataset.category is not None else ""
+    output += str(model_metrics)
+    logger.info(output)
+
+    # Append configuration of current run to the collected metrics
+    for key, value in run_config.items():
+        # Skip adding model name to the dataframe
+        if key != "model_name":
+            model_metrics[key] = value
+
+    # Add device name to list
+    model_metrics["device"] = "gpu" if device > 0 else "cpu"
+    model_metrics["model_name"] = run_config.model_name
+
+    return model_metrics
 
 
 if __name__ == "__main__":
+    # Benchmarking entry point.
+    # Spawn multiple processes one for cpu and rest for the number of gpus available in the system.
+    # The idea is to distribute metrics collection over all the available devices.
+
     parser = ArgumentParser()
     parser.add_argument("--config", type=Path, help="Path to sweep configuration")
     _args = parser.parse_args()
 
-    print("[royal_blue1]Benchmarking started. This will take a while depending on your configuration.[/royal_blue1]")
-
+    print("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
     _sweep_config = OmegaConf.load(_args.config)
-    if "wandb" in _sweep_config.writer:
-        wandb.setup()  # this is required when using multiprocessing otherwise wandb hangs
-    runner = Benchmark(_sweep_config, n_gpus=0)
-    runner.run()
-    print("[royal_blue1]Finished gathering results[/royal_blue1] ⚡")
+    distribute(_sweep_config)
+    print("Finished gathering results ⚡")
diff --git a/tools/benchmarking/utils/__init__.py b/tools/benchmarking/utils/__init__.py
index 3b5007afcc..b9eebfed78 100644
--- a/tools/benchmarking/utils/__init__.py
+++ b/tools/benchmarking/utils/__init__.py
@@ -3,6 +3,6 @@
 # Copyright (C) 2022 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from .metrics import write_metrics, write_to_comet, write_to_wandb
+from .metrics import upload_to_comet, upload_to_wandb, write_metrics
 
-__all__ = ["write_metrics", "write_to_comet", "write_to_wandb"]
+__all__ = ["write_metrics", "upload_to_comet", "upload_to_wandb"]
diff --git a/tools/benchmarking/utils/metrics.py b/tools/benchmarking/utils/metrics.py
index 161c8c4ebb..b1b7373d65 100644
--- a/tools/benchmarking/utils/metrics.py
+++ b/tools/benchmarking/utils/metrics.py
@@ -5,9 +5,9 @@
 
 from __future__ import annotations
 
-import logging
 import random
 import string
+from glob import glob
 from pathlib import Path
 
 import pandas as pd
@@ -15,13 +15,11 @@
 from comet_ml import Experiment
 from torch.utils.tensorboard.writer import SummaryWriter
 
-logger = logging.getLogger(__name__)
-
 
 def write_metrics(
     model_metrics: dict[str, str | float],
     writers: list[str],
-    folder: str,
+    folder: str | None = None,
 ):
     """Writes metrics to destination provided in the sweep config.
 
@@ -34,36 +32,22 @@ def write_metrics(
     if model_metrics == {} or model_metrics is None:
         return
 
-    result_folder = Path(folder)
     # Write to CSV
-    try:
-        metrics_df = pd.DataFrame(model_metrics, index=[0])
-        result_path = result_folder / f"{model_metrics['model_name']}_{model_metrics['device']}.csv"
-        Path.mkdir(result_path.parent, parents=True, exist_ok=True)
-        if not result_path.is_file():
-            metrics_df.to_csv(result_path)
-        else:
-            metrics_df.to_csv(result_path, mode="a", header=False)
-    except Exception as exception:
-        logger.exception(f"Could not write to csv. Exception: {exception}")
-
-    project_name = f"benchmarking_{result_folder.name}"
-    tags = []
-    for key, value in model_metrics.items():
-        if all(name not in key.lower() for name in ["time", "image", "pixel", "throughput"]):
-            tags.append(str(value))
+    metrics_df = pd.DataFrame(model_metrics, index=[0])
+    result_folder = Path("runs") if folder is None else Path(f"runs/{folder}")
+    result_path = result_folder / f"{model_metrics['model_name']}_{model_metrics['device']}.csv"
+    Path.mkdir(result_path.parent, parents=True, exist_ok=True)
+    if not result_path.is_file():
+        metrics_df.to_csv(result_path)
+    else:
+        metrics_df.to_csv(result_path, mode="a", header=False)
 
     if "tensorboard" in writers:
-        write_to_tensorboard(model_metrics, result_folder)
-    if "wandb" in writers:
-        write_to_wandb(model_metrics, project_name, tags)
-    if "comet" in writers:
-        write_to_comet(model_metrics, project_name, tags)
+        write_to_tensorboard(model_metrics)
 
 
 def write_to_tensorboard(
     model_metrics: dict[str, str | float],
-    folder: Path,
 ):
     """Write model_metrics to tensorboard.
 
@@ -79,7 +63,7 @@ def write_to_tensorboard(
         else:
             string_metrics[key] = metric
             scalar_prefixes.append(metric)
-    writer = SummaryWriter(folder / "tfevents")
+    writer = SummaryWriter(f"runs/{model_metrics['model_name']}_{model_metrics['device']}")
     for key, metric in model_metrics.items():
         if isinstance(metric, (int, float, bool)):
             scalar_metrics[key.replace(".", "/")] = metric  # need to join by / for tensorboard grouping
@@ -106,57 +90,56 @@ def get_unique_key(str_len: int) -> str:
     return "".join([random.choice(string.ascii_lowercase) for _ in range(str_len)])  # nosec: B311
 
 
-def write_to_wandb(
-    model_metrics: dict[str, str | float],
-    project_name: str,
-    tags: list[str],
+def upload_to_wandb(
     team: str = "anomalib",
+    folder: str | None = None,
 ):
-    """Write model_metrics to wandb.
+    """Upload the data in csv files to wandb.
 
-    > _Note:_ It is observed that any failure in wandb causes the run to hang. Use wandb writer with caution.
+    Creates a project named benchmarking_[two random characters]. This is so that the project names are unique.
+    One issue is that it does not check for collision
 
     Args:
-        model_metrics (dict[str, str | float]): Dictionary containing collected results.
-        project_name (str): Name of the project on wandb.
-        tags (list[str]): List of tags for the run.
         team (str, optional): Name of the team on wandb. This can also be the id of your personal account.
-            Defaults to "anomalib".
+        Defaults to "anomalib".
+        folder (optional, str): Sub-directory from which runs are picked up. Defaults to None. If none picks from runs.
     """
-    for key, value in model_metrics.items():
-        if all(name not in key.lower() for name in ["time", "image", "pixel", "throughput"]):
-            tags.append(str(value))
-    run = wandb.init(
-        entity=team,
-        project=project_name,
-        name=f"{'_'.join(tags)}",
-        tags=tags,
-        settings={"silent": True, "show_info": False, "show_warnings": False, "show_errors": False},
-    )
-    run.log(model_metrics)
-    logger.info(f"Run logged at {run.url}")
-    run.finish(quiet=True)
-
-
-def write_to_comet(
-    model_metrics: dict[str, str | float],
-    project_name: str,
-    tags: list[str],
-    team: str = "anomalib",
+    project = f"benchmarking_{get_unique_key(2)}"
+    tag_list = ["dataset.category", "model_name", "dataset.image_size", "model.backbone", "device"]
+    search_path = "runs/*.csv" if folder is None else f"runs/{folder}/*.csv"
+    for csv_file in glob(search_path):
+        table = pd.read_csv(csv_file)
+        for index, row in table.iterrows():
+            row = dict(row[1:])  # remove index column
+            tags = [str(row[column]) for column in tag_list if column in row.keys()]
+            wandb.init(
+                entity=team, project=project, name=f"{row['model_name']}_{row['dataset.category']}_{index}", tags=tags
+            )
+            wandb.log(row)
+            wandb.finish()
+
+
+def upload_to_comet(
+    folder: str | None = None,
 ):
-    """Write model_metrics to wandb.
+    """Upload the data in csv files to comet.
 
+    Creates a project named benchmarking_[two random characters]. This is so that the project names are unique.
+    One issue is that it does not check for collision
 
     Args:
-        model_metrics (dict[str, str | float]): Dictionary containing collected results.
-        project_name (str): Name of the project on comet.
-        tags (list[str]): List of tags for the run.
-        team (str, optional): Name of the team on wandb. This can also be the id of your personal account.
-            Defaults to "anomalib".
+        folder (optional, str): Sub-directory from which runs are picked up. Defaults to None. If none picks from runs.
     """
-    experiment = Experiment(project_name=project_name, workspace=team)
-    experiment.set_name(f"{'_'.join(tags)}")
-    experiment.log_metrics(model_metrics, step=1, epoch=1)  # populates auto-generated charts on panel view
-    experiment.add_tags(tags)
-    logger.info(f"Run logged at {experiment.url}")
-    experiment.end()
+    project = f"benchmarking_{get_unique_key(2)}"
+    tag_list = ["dataset.category", "model_name", "dataset.image_size", "model.backbone", "device"]
+    search_path = "runs/*.csv" if folder is None else f"runs/{folder}/*.csv"
+    for csv_file in glob(search_path):
+        table = pd.read_csv(csv_file)
+        for index, row in table.iterrows():
+            row = dict(row[1:])  # remove index column
+            tags = [str(row[column]) for column in tag_list if column in row.keys()]
+            experiment = Experiment(project_name=project)
+            experiment.set_name(f"{row['model_name']}_{row['dataset.category']}_{index}")
+            experiment.log_metrics(row, step=1, epoch=1)  # populates auto-generated charts on panel view
+            experiment.add_tags(tags)
+            experiment.log_table(filename=csv_file)

From 09ad1d4b1e8f634b72f788314275d3aea33815dd Mon Sep 17 00:00:00 2001
From: Samet Akcay <samet.akcay@intel.com>
Date: Tue, 8 Aug 2023 11:29:34 +0100
Subject: [PATCH 6/6] Update benchmarking notebook (#1242)

* Fix metadata path

* Update benchmarking notebook
---
 .../300_benchmarking/301_benchmarking.ipynb   | 125 +++++++++---------
 1 file changed, 66 insertions(+), 59 deletions(-)

diff --git a/notebooks/300_benchmarking/301_benchmarking.ipynb b/notebooks/300_benchmarking/301_benchmarking.ipynb
index 931f3c8b1b..155e074a58 100644
--- a/notebooks/300_benchmarking/301_benchmarking.ipynb
+++ b/notebooks/300_benchmarking/301_benchmarking.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Short walkthrough on Benchmarking in Anomalib"
+    "# Short walkthrough on Benchmarking in Anomalib\n"
    ]
   },
   {
@@ -22,7 +22,7 @@
     "id": "IJlBPLRvOYuv"
    },
    "source": [
-    "## Install Anomalib"
+    "## Install Anomalib\n"
    ]
   },
   {
@@ -37,7 +37,7 @@
    },
    "outputs": [],
    "source": [
-    "!git clone https://github.com/openvinotoolkit/anomalib.git"
+    "!git clone https://github.com/openvinotoolkit/anomalib.git --branch main --single-branch"
    ]
   },
   {
@@ -92,7 +92,7 @@
     "id": "0NJboi_7XSSN"
    },
    "source": [
-    "> Note: Restart Runtime if promted by clicking the button at the end of the install logs"
+    "> Note: Restart Runtime if promted by clicking the button at the end of the install logs\n"
    ]
   },
   {
@@ -101,7 +101,7 @@
     "id": "y4sQOIwOUO0u"
    },
    "source": [
-    "## Download and setup dataset"
+    "## Download and setup dataset\n"
    ]
   },
   {
@@ -151,7 +151,7 @@
     "id": "Mb_kkxi-URk7"
    },
    "source": [
-    "## Create configuration file for training using Folder Dataset"
+    "## Create configuration file for training using Folder Dataset\n"
    ]
   },
   {
@@ -188,15 +188,15 @@
     "  task: segmentation # classification or segmentation\n",
     "  mask: <path/to/mask/annotations> #optional\n",
     "  extensions: null\n",
-    "  split_ratio: 0.2  # ratio of the normal images that will be used to create a test split\n",
+    "  split_ratio: 0.2 # ratio of the normal images that will be used to create a test split\n",
     "```\n",
     "\n",
-    "The complete configuration is in the codeblock below."
+    "The complete configuration is in the codeblock below.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "id": "GNSo19XlPixN"
    },
@@ -210,18 +210,22 @@
     "  normal_dir: good # name of the folder containing normal images.\n",
     "  abnormal_dir: colour # name of the folder containing abnormal images.\n",
     "  normal_test_dir: null # name of the folder containing normal test images.\n",
+    "  mask_dir: /content/anomalib/datasets/hazelnut_toy/mask/colour # optional\n",
     "  task: segmentation # classification or segmentation\n",
-    "  mask: /content/anomalib/datasets/hazelnut_toy/mask/colour # optional\n",
     "  extensions: null\n",
-    "  split_ratio: 0.2  # ratio of the normal images that will be used to create a test split\n",
-    "  image_size: 256\n",
     "  train_batch_size: 32\n",
-    "  test_batch_size: 32\n",
+    "  eval_batch_size: 32\n",
     "  num_workers: 8\n",
+    "  image_size: 256 # dimensions to which images are resized (mandatory)\n",
+    "  center_crop: null # dimensions to which images are center-cropped after resizing (optional)\n",
+    "  normalization: imagenet # data distribution to which the images will be normalized: [none, imagenet]\n",
     "  transform_config:\n",
     "    train: null\n",
-    "    val: null\n",
-    "  create_validation_set: false\n",
+    "    eval: null\n",
+    "  test_split_mode: from_dir # options: [from_dir, synthetic]\n",
+    "  test_split_ratio: 0.2 # fraction of train images held out testing (usage depends on test_split_mode)\n",
+    "  val_split_mode: same_as_test # options: [same_as_test, from_test, synthetic]\n",
+    "  val_split_ratio: 0.5 # fraction of train/test images held out for validation (usage depends on val_split_mode)\n",
     "  tiling:\n",
     "    apply: false\n",
     "    tile_size: null\n",
@@ -233,6 +237,7 @@
     "model:\n",
     "  name: padim\n",
     "  backbone: resnet18\n",
+    "  pre_trained: true\n",
     "  layers:\n",
     "    - layer1\n",
     "    - layer2\n",
@@ -251,65 +256,67 @@
     "    pixel_default: 3\n",
     "    adaptive: true\n",
     "\n",
+    "visualization:\n",
+    "  show_images: False # show images on the screen\n",
+    "  save_images: True # save images to the file system\n",
+    "  log_images: True # log images to the available loggers (if any)\n",
+    "  image_save_path: null # path to which images will be saved\n",
+    "  mode: full # options: [\"full\", \"simple\"]\n",
+    "\n",
     "project:\n",
     "  seed: 42\n",
     "  path: ./results\n",
     "\n",
     "logging:\n",
-    "  log_images_to: [\"local\"] # options: [wandb, tensorboard, local].\n",
-    "  logger: [] # options: [tensorboard, wandb, csv] or combinations.\n",
+    "  logger: [] # options: [comet, tensorboard, wandb, csv] or combinations.\n",
     "  log_graph: false # Logs the model graph to respective logger.\n",
     "\n",
     "optimization:\n",
-    "  openvino:\n",
-    "    apply: false\n",
+    "  export_mode: null # options: torch, onnx, openvino\n",
     "\n",
     "# PL Trainer Args. Don't add extra parameter here.\n",
     "trainer:\n",
-    "  accelerator: auto # <\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">\n",
-    "  accumulate_grad_batches: 1\n",
-    "  amp_backend: native\n",
-    "  auto_lr_find: false\n",
-    "  auto_scale_batch_size: false\n",
-    "  auto_select_gpus: false\n",
-    "  benchmark: false\n",
-    "  check_val_every_n_epoch: 1 # Don't validate before extracting features.\n",
+    "  enable_checkpointing: true\n",
     "  default_root_dir: null\n",
-    "  detect_anomaly: false\n",
-    "  deterministic: false\n",
+    "  gradient_clip_val: 0\n",
+    "  gradient_clip_algorithm: norm\n",
+    "  num_nodes: 1\n",
     "  devices: 1\n",
-    "  enable_checkpointing: true\n",
-    "  enable_model_summary: true\n",
     "  enable_progress_bar: true\n",
+    "  overfit_batches: 0.0\n",
+    "  track_grad_norm: -1\n",
+    "  check_val_every_n_epoch: 1 # Don't validate before extracting features.\n",
     "  fast_dev_run: false\n",
-    "  gpus: null # Set automatically\n",
-    "  gradient_clip_val: 0\n",
-    "  ipus: null\n",
-    "  limit_predict_batches: 1.0\n",
-    "  limit_test_batches: 1.0\n",
-    "  limit_train_batches: 1.0\n",
-    "  limit_val_batches: 1.0\n",
-    "  log_every_n_steps: 50\n",
+    "  accumulate_grad_batches: 1\n",
     "  max_epochs: 1\n",
-    "  max_steps: -1\n",
-    "  max_time: null\n",
     "  min_epochs: null\n",
+    "  max_steps: -1\n",
     "  min_steps: null\n",
-    "  move_metrics_to_cpu: false\n",
-    "  multiple_trainloader_mode: max_size_cycle\n",
-    "  num_nodes: 1\n",
-    "  num_processes: null\n",
-    "  num_sanity_val_steps: 0\n",
-    "  overfit_batches: 0.0\n",
-    "  plugins: null\n",
+    "  max_time: null\n",
+    "  limit_train_batches: 1.0\n",
+    "  limit_val_batches: 1.0\n",
+    "  limit_test_batches: 1.0\n",
+    "  limit_predict_batches: 1.0\n",
+    "  val_check_interval: 1.0 # Don't validate before extracting features.\n",
+    "  log_every_n_steps: 50\n",
+    "  accelerator: auto # <\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">\n",
+    "  strategy: null\n",
+    "  sync_batchnorm: false\n",
     "  precision: 32\n",
+    "  enable_model_summary: true\n",
+    "  num_sanity_val_steps: 0\n",
     "  profiler: null\n",
+    "  benchmark: false\n",
+    "  deterministic: false\n",
     "  reload_dataloaders_every_n_epochs: 0\n",
+    "  auto_lr_find: false\n",
     "  replace_sampler_ddp: true\n",
-    "  sync_batchnorm: false\n",
-    "  tpu_cores: null\n",
-    "  track_grad_norm: -1\n",
-    "  val_check_interval: 1.0 # Don't validate before extracting features.\n",
+    "  detect_anomaly: false\n",
+    "  auto_scale_batch_size: false\n",
+    "  plugins: null\n",
+    "  move_metrics_to_cpu: false\n",
+    "  multiple_trainloader_mode: max_size_cycle\n",
+    "\n",
     "\"\"\"\n",
     "with open(\"config.yaml\", \"w\", encoding=\"utf8\") as f:\n",
     "    f.writelines(folder_padim)"
@@ -321,7 +328,7 @@
     "id": "jpjtUHyWUXx0"
    },
    "source": [
-    "## Train the model to see if it is working"
+    "## Train the model to see if it is working\n"
    ]
   },
   {
@@ -345,7 +352,7 @@
     "id": "Wt6BCkcoUch7"
    },
    "source": [
-    "## Create Benchmarking config"
+    "## Create Benchmarking config\n"
    ]
   },
   {
@@ -356,11 +363,11 @@
     "\n",
     "> Note: Not all models in Anomalib support OpenVINO export.\n",
     "\n",
-    "The `hardware` section of the config file is used to pass the list of hardwares on which to compute the benchmarking results. If the host system has multiple GPUs, then the benchmarking computation is distributed across GPUs to speed up collection of results. By default, the results are gathered in a `csv` file but with the `writer` flag, you can also save the results to `tensorboard` and `wandb` loggers. The final section is the `grid_search` section. It has two parameters, _dataset_ and *model_name*. The _dataset_ field is used to set the values of grid search while the *model_name* section is used to pass the list of models for which the benchmark is computed.\n",
+    "The `hardware` section of the config file is used to pass the list of hardwares on which to compute the benchmarking results. If the host system has multiple GPUs, then the benchmarking computation is distributed across GPUs to speed up collection of results. By default, the results are gathered in a `csv` file but with the `writer` flag, you can also save the results to `tensorboard` and `wandb` loggers. The final section is the `grid_search` section. It has two parameters, _dataset_ and _model_name_. The _dataset_ field is used to set the values of grid search while the _model_name_ section is used to pass the list of models for which the benchmark is computed.\n",
     "\n",
     "In this notebook we are working with a toy dataset, so we also need to tell the benchmarking script to use that particular dataset instead of the default `MVTec` as defined in each of the model config file. We can either update each config file or just pass a list of one value for the fields such as _format_, _path_, etc., as shown below.\n",
     "\n",
-    "For more information about benchmarking, you can look at the [Anomalib Documentation](https://openvinotoolkit.github.io/anomalib/guides/benchmarking.html)."
+    "For more information about benchmarking, you can look at the [Anomalib Documentation](https://openvinotoolkit.github.io/anomalib/guides/benchmarking.html).\n"
    ]
   },
   {
@@ -383,12 +390,12 @@
     "  dataset:\n",
     "    name: [hazelnut]\n",
     "    format: [folder]\n",
-    "    path: [/content/anomalib/datasets/hazelnut_toy]\n",
+    "    root: [/content/anomalib/datasets/hazelnut_toy]\n",
     "    normal_dir: [good]\n",
     "    abnormal_dir: [colour]\n",
     "    normal_test_dir: [null]\n",
     "    task: [segmentation]\n",
-    "    mask: [/content/anomalib/datasets/hazelnut_toy/mask/colour]\n",
+    "    mask_dir: [/content/anomalib/datasets/hazelnut_toy/mask/colour]\n",
     "    extensions: [null]\n",
     "    split_ratio: [0.2]\n",
     "    image_size: [256, 128]\n",
@@ -455,7 +462,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.10.11"
   },
   "vscode": {
    "interpreter": {