diff --git a/.coveragerc b/.coveragerc
index 852a3e7e..0cde6974 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -2,7 +2,6 @@
 ignore_errors = True
 omit =
     tests/*
-    gama/dashboard/*
 exclude_lines =
     pragma: no cover
     def __repr__
diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 4db9b6b3..00000000
--- a/.flake8
+++ /dev/null
@@ -1,6 +0,0 @@
-[flake8]
-max-line-length = 88
-select = C,E,F,W,B
-ignore = E203, W503, F821
-per-file-ignores =
-    *__init__.py:F401
\ No newline at end of file
diff --git a/.github/actions/pytest/action.yaml b/.github/actions/pytest/action.yaml
new file mode 100644
index 00000000..07008550
--- /dev/null
+++ b/.github/actions/pytest/action.yaml
@@ -0,0 +1,67 @@
+name: "Pytest"
+description: "Run a test suite"
+
+inputs:
+  path:
+    description: "Path to test suite"
+    required: true
+  python-version:
+    description: "Python version to use"
+    default: "3.10"
+    required: true
+  use-cache:
+    description: "If set, use cached dependencies."
+    default: true
+    required: true
+  os:
+    description: "OS for runs-on. Only required when cache is set."
+    default: "ubuntu-latest"
+    required: false
+  pip-args:
+    description: "Additional pip args"
+    default: ""
+    required: false
+  pytest-args:
+    description: "Additional ptest args"
+    default: ""
+    required: false
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Python ${{ inputs.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ inputs.python-version }}
+
+    - name: Get pip cache dir
+      id: get-cache-location
+      if: inputs.use-cache == 'true'
+      shell: bash
+      run: |
+        echo "::set-output name=dir::$(pip cache dir)"
+
+    - uses: actions/cache@v3
+      id: cache
+      if: inputs.use-cache == 'true'
+      with:
+        path: ${{ steps.get-cache-location.outputs.dir }}
+        key: ${{ inputs.os }}-pip-${{ inputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}
+
+    - name: Install dependencies
+      shell: bash
+      run: |
+        python -m pip install --upgrade pip
+        pip install ${{ inputs.pip-args }} ".[test]"
+
+    - name: Test with pytest
+      shell: bash
+      run: |
+        echo pytest ${{ inputs.path }}  --cov ${{ inputs.pytest-args }} 
+        pytest ${{ inputs.path }}  --cov ${{ inputs.pytest-args }}
+
+    - uses: codecov/codecov-action@v3
+      with:
+        flags: unit-tests
+        fail_ci_if_error: true
+        verbose: true
diff --git a/.github/workflows/build-docs.yaml b/.github/workflows/build-docs.yaml
new file mode 100644
index 00000000..c055482d
--- /dev/null
+++ b/.github/workflows/build-docs.yaml
@@ -0,0 +1,49 @@
+name: Docs
+
+on:
+  push:
+    branches:
+      - master
+      - main
+      - "[0-9][0-9].[0-9]+.x"
+
+  pull_request:
+    branches:
+      - master
+      - main
+      - "[0-9][0-9].[0-9]+.x"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ".[doc]"
+
+      - name: Clone GH-Pages
+        if: ${{ github.event_name != 'pull_request' }}
+        uses: actions/checkout@v3
+        with:
+          ref: "gh-pages"
+          path: docs/build/
+
+      - name: Build Docs
+        run: |
+          sphinx-build docs/source docs/build/${{ github.ref_name }}
+
+      - name: Push GH-Pages
+        if: ${{ github.event_name != 'pull_request' }}
+        run: |
+          cd docs/build
+          git config user.name github-actions
+          git config user.email github-actions@github.com
+          git add ${{ github.ref_name }}
+          git commit -m "Autodeploy docs"
+          git push
diff --git a/.github/workflows/changelog.yaml b/.github/workflows/changelog.yaml
new file mode 100644
index 00000000..676ad674
--- /dev/null
+++ b/.github/workflows/changelog.yaml
@@ -0,0 +1,41 @@
+name: Changelog
+
+on:
+  pull_request:
+    branches:
+      - master
+      - main
+
+jobs:
+  check-and-comment:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Check Releases.rst Updated
+        run: |
+          git fetch origin master:master
+          updated_files=$(git diff master -- docs/source/releases.rst)
+          if [[ -z $updated_files ]]
+          then
+            exit 1
+          fi
+
+      - name: Thank you comment
+        uses: thollander/actions-comment-pull-request@v1
+        with:
+          comment_includes: "changelog"
+          message: |
+            Thank you for the pull request! :pray:
+            If CI reports any errors please address them.
+            If you believe the errors are unrelated to your PR, please state why.
+            This will make it easier for the maintainers to assess your PR :)
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Comment PR
+        if: ${{ failure() }}
+        uses: thollander/actions-comment-pull-request@v1
+        with:
+          comment_includes: "changelog" # To avoid creating additional comments
+          message: |
+            Please include the change for this PR in the changelog (`docs/source/releases.rst`).
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/precommit.yaml b/.github/workflows/precommit.yaml
new file mode 100644
index 00000000..48dc99da
--- /dev/null
+++ b/.github/workflows/precommit.yaml
@@ -0,0 +1,15 @@
+name: pre-commit
+
+on:
+  workflow_dispatch:
+  push:
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
new file mode 100644
index 00000000..d9d1d5e9
--- /dev/null
+++ b/.github/workflows/publish.yaml
@@ -0,0 +1,126 @@
+name: Publish
+
+on:
+  workflow_dispatch:
+    inputs:
+      year:
+        required: true
+        type: number
+      macro:
+        required: true
+        type: number
+      micro:
+        required: true
+        type: number
+      suffix:
+        required: false
+        type: string
+      i-know-what-i-am-doing:
+        required: true
+        type: boolean
+        default: false
+      test-pypi:
+        required: true
+        type: boolean
+        default: true
+
+jobs:
+  warn:
+    if: ${{ ! inputs.i-know-what-i-am-doing }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: "i-know-what-i-am-doing not set"
+        run: |
+          exit 1
+
+  build:
+    if: ${{ inputs.i-know-what-i-am-doing }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Match version with input
+        run: |
+          GAMA_VERSION=$(grep -Po "\d+\.\d+\.\d+((\.dev\d*)|(\.post\d*))?" "gama/__version__.py")
+          NEW_VERSION=${{ inputs.year }}.${{ inputs.macro }}.${{ inputs.micro }}${{ inputs.suffix }}
+          echo $GAMA_VERSION $NEW_VERSION
+          exit $([ $GAMA_VERSION == $NEW_VERSION ])
+      - name: Match version with tag
+        run: |
+          GAMA_VERSION=$(grep -Po "\d+\.\d+\.\d+((\.dev\d*)|(\.post\d*))?" "gama/__version__.py")
+          NEW_VERSION=${{ github.ref  }}
+          echo refs/tags/v$GAMA_VERSION $NEW_VERSION
+          exit $([ refs/tags/v$GAMA_VERSION == $NEW_VERSION ])
+      - name: Install build & publish dependencies
+        run: |
+          python -m pip install build
+      - name: Build Wheel
+        run: |
+          python -m build
+      - name: Upload Dist
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist-folder
+          path: dist
+          retention-days: 1
+          if-no-files-found: error
+
+  test-install:
+    needs: build
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Download dist
+        uses: actions/download-artifact@v4.1.7
+        with:
+          name: dist-folder
+          path: dist
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Wheel
+        shell: bash
+        run: |
+          python -m pip install dist/*.whl
+      - name: Run example
+        run: |
+          curl https://raw.githubusercontent.com/openml-labs/gama/master/examples/classification_example.py > example.py
+          # First reduce runtime for 3 minutes to 1 minute, does not work for Windows but fails silently.
+          sed -i.bak "s/180/60/" example.py
+          sed -i.bak "s/3/1/" example.py
+          python example.py
+
+  publish:
+    needs: test-install
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download dist
+        uses: actions/download-artifact@v4.1.7
+        with:
+          name: dist-folder
+          path: dist
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install Twine
+        run: |
+          python -m pip install twine
+      - name: Upload to test server
+        if: ${{ inputs.test-pypi }}
+        run: |
+          python -m twine upload --repository testpypi -u __token__ -p ${{ secrets.TEST_PYPI }} dist/*
+      - name: Upload to real server
+        if: ${{ ! inputs.test-pypi }}
+        run: |
+          python -m twine upload -u __token__ -p ${{ secrets.PYPI }} dist/*
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
new file mode 100644
index 00000000..9ab532fc
--- /dev/null
+++ b/.github/workflows/pytest.yaml
@@ -0,0 +1,41 @@
+name: pytest
+on:
+  workflow_dispatch:
+  pull_request:
+
+jobs:
+  unit:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Invoke Unit Tests
+        uses: ./.github/actions/pytest
+        with:
+          path: tests/unit
+          os: ${{ matrix.os }}
+          python-version: ${{ matrix.python-version }}
+
+  system:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Invoke System Tests
+        uses: ./.github/actions/pytest
+        with:
+          path: tests/system
+          os: ${{ matrix.os }}
+          python-version: ${{ matrix.python-version }}
diff --git a/.github/workflows/test-with-pre.yaml b/.github/workflows/test-with-pre.yaml
new file mode 100644
index 00000000..6850c1b2
--- /dev/null
+++ b/.github/workflows/test-with-pre.yaml
@@ -0,0 +1,26 @@
+name: Test --pre
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "00 0 * * 1"
+
+jobs:
+  unit:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Invoke Unit Tests
+        uses: ./.github/actions/pytest
+        with:
+          path: tests/unit
+          python-version: ${{ matrix.python-version }}
+          use-cache: false
+          pip-args: "--pre"
+          pytest-args: "-W error -W ignore::sklearn.exceptions.ConvergenceWarning"
diff --git a/.github/workflows/version-bump.sh b/.github/workflows/version-bump.sh
new file mode 100755
index 00000000..d559798c
--- /dev/null
+++ b/.github/workflows/version-bump.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+PYPI_INFO=$( curl -Ls https://test.pypi.org/pypi/gama/json | sed "s/ //g")
+GAMA_VERSION=$(cat gama/__version__.py | grep -Eo "\d+\.\d+\.\d+\.dev")
+NEW_VERSION=$(python -c "
+import json, sys; 
+releases = json.loads(sys.argv[-2])['releases'].keys(); 
+current_version = sys.argv[-1]; 
+related_versions = [ver for ver in releases if ver.startswith(current_version)]
+last_version = related_versions[-1] if related_versions else None
+print(current_version + str(int(last_version.removeprefix(current_version))+1) if last_version else current_version+'0')
+"  $PYPI_INFO $GAMA_VERSION)
+echo "s/$GAMA_VERSION/$NEW_VERSION/"
+sed -i '' -r "s/$GAMA_VERSION[0-9]+/$NEW_VERSION/" "gama/__version__.py"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5e3b2d3b..de5458f5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,16 +1,16 @@
 repos:
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.0.261
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
   - repo: https://github.com/psf/black
-    rev: 19.10b0
+    rev: 23.3.0
     hooks:
       - id: black
-        language_version: python3.8
+        language_version: python3.10
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761
+    rev: v1.2.0
     hooks:
       - id: mypy
-        files: gama/*
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.7.9
-    hooks:
-      - id: flake8
-        files: gama/*
+        files: gama/.*
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 9a6e421c..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-language: python
-python:
-  - "3.6"
-  - "3.7"
-env:
-  - JOB=test SUITE=unit
-  - JOB=test SUITE=system
-
-# - perform pre-commit checks only once
-# - perform doc build only on commit to develop or master
-jobs:
-  include:
-  - env: JOB=check
-    python: 3.8
-  - env: JOB=deploy
-    python: 3.8
-    if: (branch = master OR branch =~ /\d\d\.\d+\.x/) AND type = push
-
-install:
-  - ./ci_scripts/install.sh
-script:
-  - ./ci_scripts/script.sh
-after_success:
- - ./ci_scripts/after_success.sh
-
-before_deploy:
- - ./ci_scripts/build_docs.sh
-deploy:
-  provider: pages
-  token: $GITHUB_TOKEN
-  edge: true
-  local_dir: docs/build
-  keep_history: true
-  skip_cleanup: true
-  on:
-    all_branches: true
-    condition: $JOB = deploy
diff --git a/README.md b/README.md
index e458a2f6..f766f4bc 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Using this log, insight can be obtained on the behaviour of the search procedure
 For example, it can produce a graph that shows pipeline fitness over time:
 ![graph of fitness over time](https://github.com/openml-lab/gama/blob/master/docs/source/technical_guide/images/viz.gif)
 
-For more examples and information on the visualization, see [the technical guide](https://openml-labs.github.io/gama/master/user_guide/index.html#dashboard).
+*Note: we temporarily disabled support for the GAMA Dashboard, we will add out-of-the-box visualization again later this year.* 
 
 ## Installing GAMA
 
@@ -68,18 +68,23 @@ _note_: By default, GamaClassifier optimizes towards `log_loss`.
 If you want to cite GAMA, please use [our ECML-PKDD 2020 Demo Track publication](https://link.springer.com/chapter/10.1007/978-3-030-67670-4_39).
 
 ```latex
-@article{DBLP:journals/corr/abs-2007-04911,
-  author    = {Pieter Gijsbers and
-               Joaquin Vanschoren},
-  title     = {{GAMA:} a General Automated Machine learning Assistant},
-  journal   = {CoRR},
-  volume    = {abs/2007.04911},
-  year      = {2020},
-  url       = {https://arxiv.org/abs/2007.04911},
-  eprinttype = {arXiv},
-  eprint    = {2007.04911},
-  timestamp = {Mon, 20 Jul 2020 14:20:39 +0200},
-  biburl    = {https://dblp.org/rec/journals/corr/abs-2007-04911.bib},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
+@InProceedings{10.1007/978-3-030-67670-4_39,
+author="Gijsbers, Pieter and Vanschoren, Joaquin",
+editor="Dong, Yuxiao
+and Ifrim, Georgiana
+and Mladeni{\'{c}}, Dunja
+and Saunders, Craig
+and Van Hoecke, Sofie",
+title="GAMA: A General Automated Machine Learning Assistant",
+booktitle="Machine Learning and Knowledge Discovery in Databases. Applied Data Science and Demo Track",
+year="2021",
+publisher="Springer International Publishing",
+address="Cham",
+pages="560--564",
+abstract="The General Automated Machine learning Assistant (GAMA) is a modular AutoML system developed to empower users to track and control how AutoML algorithms search for optimal machine learning pipelines, and facilitate AutoML research itself. In contrast to current, often black-box systems, GAMA allows users to plug in different AutoML and post-processing techniques, logs and visualizes the search process, and supports easy benchmarking. It currently features three AutoML search algorithms, two model post-processing steps, and is designed to allow for more components to be added.",
+isbn="978-3-030-67670-4"
 }
 ```
+
+## License
+The contents of this repository is under an [Apache-2.0 License](https://github.com/openml-labs/gama/blob/master/LICENSE).
diff --git a/ci_scripts/after_success.sh b/ci_scripts/after_success.sh
deleted file mode 100755
index 3d26411b..00000000
--- a/ci_scripts/after_success.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-if [ "$JOB" = "test" ]; then
-  # codecov will merge reports automatically
-  bash <(curl -s https://codecov.io/bash)
-fi
diff --git a/ci_scripts/build_docs.sh b/ci_scripts/build_docs.sh
deleted file mode 100755
index 7d2b3595..00000000
--- a/ci_scripts/build_docs.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-git clone https://github.com/PGijsbers/gama.git --single-branch --branch gh-pages docs/build
-
-release_branch_regex="[0-9][0-9].[0-9]+.x"
-
-if [[ $TRAVIS_BRANCH =~ release_branch_regex ]]; then
-  echo "Truncating branch name"
-  branchname=${TRAVIS_BRANCH%.x}
-  sed -i -E "s/url=\S*\//url=$branchname\//" index.html
-  sphinx-build -b html docs/source docs/build/$branchname
-else
-  sphinx-build -b html docs/source docs/build/$TRAVIS_BRANCH
-fi
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
deleted file mode 100755
index d6ccfd72..00000000
--- a/ci_scripts/install.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-if [ "$JOB" = "test" ]; then
-  pip install -e .[dev]
-fi
-if [ "$JOB" = "check" ]; then
-  pip install pre-commit
-fi
-if [ "$JOB" = "deploy" ]; then
-  pip install -e .[all]
-fi
diff --git a/ci_scripts/script.sh b/ci_scripts/script.sh
deleted file mode 100755
index 30841238..00000000
--- a/ci_scripts/script.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-set -e
-
-if [ "$JOB" = "check" ] || [ "$JOB" = "deploy" ]; then
-  pre-commit run --all-files
-fi
-if [ "$JOB" = "test" ]; then
-  pytest --cov=gama -sv -n 4 tests/"$SUITE"/
-fi
-if [ "$JOB" = "deploy" ]; then
-  pytest -sv -n 4 tests/unit/
-  pytest -sv -n 4 tests/system/
-fi
diff --git a/codecov.yml b/codecov.yml
index 9225017f..3a7cadd2 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -4,4 +4,3 @@ codecov:
 
 ignore:
   - "gama/utilities/cli.py"
-  - "gama/dashboard/**/*"
diff --git a/docs/source/advanced_guide/logging.rst b/docs/source/advanced_guide/logging.rst
index 48d82bc5..fb73ce5b 100644
--- a/docs/source/advanced_guide/logging.rst
+++ b/docs/source/advanced_guide/logging.rst
@@ -51,5 +51,3 @@ An overview the log levels:
  - `INFO`: General information about the optimization process.
  - `WARNING`: Serious errors that do not prohibit GAMA from running to completion (but results could be suboptimal).
  - `ERROR`: Errors which prevent GAMA from running to completion.
-
-As described in :ref:`dashboard-section` the files in the output directory can be used to generate visualizations about the optimization process.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 081bd140..76e5d2f7 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,13 +17,16 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 # -- Adding the module to document ----------------------------------------
+import datetime
+import gama
 import os
+import shutil
 import sys
 
 sys.path.insert(0, os.path.abspath("../.."))
 
 # -- Copying Example files over -------------------------------------------
-import shutil
+
 
 example_dir = "../../examples"
 example_doc_dir = "./user_guide/examples"
@@ -67,8 +70,6 @@
 # The master toctree document.
 master_doc = "index"
 
-import datetime
-
 # General information about the project.
 project = "gama"
 copyright = f"2018-{datetime.datetime.now().year}, Pieter Gijsbers"
@@ -79,7 +80,6 @@
 # built documents.
 #
 # The short X.Y version.
-import gama
 
 version = gama.__version__
 # The full version, including alpha/beta/rc tags.
@@ -90,7 +90,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
diff --git a/docs/source/releases.rst b/docs/source/releases.rst
index 93302c76..3ad3bc4c 100644
--- a/docs/source/releases.rst
+++ b/docs/source/releases.rst
@@ -1,6 +1,58 @@
 Release Notes
 =============
 
+Version 23.0.0.post1
+--------------
+
+Fix scikit-learn to version <1.3, because GAMA uses scikit-learn internals that were changed from 1.4.
+
+Version 23.0.0
+--------------
+
+Highlights:
+ - We (temporarily) removed the Dashboard, it had not been maintained and is best be rebuilt from the ground up.
+ - The `config` hyperparameter is now called `search_space`.
+ - Adds back the ``preset`` hyperparameter to all GAMA estimators which let you specify the goal
+   of your AutoML execution. Currently ``simple`` can be specified to create a
+   simple model, and ``performance`` can be used to generate the best possible model.
+   It is still possible to manually set the search and post processing methods.
+
+Maintenance:
+ - Bump minimum Python version to Python 3.9, in line with `NEP 29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_.
+ - Bump several library dependencies.
+ - Switch from flake8 to ruff.
+
+Version 22.0.0
+--------------
+
+Maintenance:
+ - Adopt `NEP 29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_:
+    - Drop support for Python 3.6 and 3.7
+    - Add support for Python 3.10
+ - Use pyproject.toml instead of setup.py
+ - Transition to Github CI, including:
+    - pytest on pull request
+    - pytest CRON job with prerelease versions
+    - doc build and deploy
+    - pre-commit check
+    - check for changelog
+    - easy release to pypi from github workflow
+ - Small changes to avoid FutureWarnings and/or DeprecationWarnings.
+
+Bugfixes:
+ - #137: raise an output if ``output_directory`` is non-empty.
+ - #174: Fix an issue where GAMA might freeze in when ending search.
+
+Features:
+ - ASHA resources can now also be specified as fraction of the dataset size by using a float in (0, 1].
+
+Changes:
+ - #138: Instead of subsampling the whole dataset before doing CV, the same test set is used across rungs and only
+        the training folds are subsampled. This makes performance comparable across rungs.
+ - AsyncEA will try to delay creating new individuals as long as possible.
+ - AsyncEA will no longer create offspring from pipelines with failed evaluations (e.g., timeout, invalid configuration).
+
+
 Version 21.0.1
 --------------
 
diff --git a/docs/source/user_guide/dashboard.rst b/docs/source/user_guide/dashboard.rst
deleted file mode 100644
index 0e0e664b..00000000
--- a/docs/source/user_guide/dashboard.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-:orphan:
-
-
-.. _dashboard-section:
-
-Dashboard
----------
-
-.. note::
-    The GAMA Dashboard is not done.
-    However, it is functional and released to get some early feedback on what users would like to see included.
-    The near future may see a reimplementation, see `#97 <https://github.com/openml-labs/gama/issues/97>`_.
-
-GAMA Dashboard is a graphical user interface to start and monitor the AutoML search.
-It is available when GAMA has been installed with its visualization optional dependencies (`pip install gama[vis]`).
-To start GAMA Dashboard call `gamadash` from the command line.
-
-Home tab
-********
-
-Starting GAMA Dashboard will open a new tab in your webbrowser which will show the GAMA Dashboard Home page:
-
-.. image:: images/DashboardHome.png
-
-On the left you can configure GAMA, on the right you can select the dataset you want to perform AutoML on.
-To provide a dataset, specify the path to the csv or ARFF-file which contains your data.
-Once the dataset has been set, the `Go!`-button on the bottom left will be enabled.
-When you are satisfied with your chosen configuration, press the `Go!`-button to start GAMA.
-This will take you to the 'Running' tab.
-
-Running tab
-***********
-
-The running tab will look similar to this:
-
-.. image:: images/DashboardRunning.png
-
-You see four main components on this page:
-
- 1. A visualization of search results. In this scatter plot, each scored pipeline is represented by a marker.
-    The larger markers represent the most recent evaluations. Their location is determined by the pipeline's
-    length (on the y-axis) and score (on the x-axis). You can hover over the markers to get precise scores,
-    and click on the pipeline to select it. A selected pipeline is represented with a large red marker.
-
- 2. Output of the search command. This field provides a textual progress report on GAMA's AutoML process.
-
- 3. Full specification of the selected pipeline. This view of the selected pipeline specifies hyperparametersettings
-    for each step in the pipeline.
-
- 4. A table of each evaluated pipeline. Similar to the plot (1), here you find all pipelines evaluated during search.
-    It is possible to sort and filter based on performance.
-
-Selecting a pipeline in the table or through the plot will update the other components.
-
-Analysis tab
-************
-
-The analysis tab is also available if you did not start a new GAMA run.
-On this tab, you can visualize search results from logs.
-
-.. image:: images/analysis_empty.png
-
-Clicking 'Select or drop log(s)' in the top-right corner opens a file explorer which lets you select file(s) to load.
-Select both the 'gama.log' and 'evaluation.log' files from your directory together.
-For example the the logs found `here <https://github.com/openml-labs/gama/blob/master/tests/data/AsyncEA>`_.
-After loading the files, you can toggle its visualization by clicking the checkbox that appears next to the file name.
-The first visualization you will see is the best obtained score as a function of the number of evaluated pipelines:
-
-.. image:: images/analysis_load.png
-
-In the top right you will find a dropdown 'Visualization Presets' which allow you to see other visualizations.
-Below you will find a description of each preset.
-
-Visualization presets include:
-
-* **#Pipeline by learner** A bar chart of the number of times each estimator is used as the final step in a machine learning pipeline.
-
-* **#Pipeline by size** A bar chart of the distribution of the number of components per evaluated pipeline.
-
-* **Best score over time** The best score obtained by any individual pipeline at a given point in time.
-
-* **Best score over iterations** The best score obtained by any individual pipeline at a given iteration.
-
-* **Size vs Metric** A scatter plot where each pipeline is represented by a marker, gives an impression of the distribution of scores for evaluated pipelines of different lengths.
-
-* **Evaluation Times** A bar chart plotting the distribution of time required to evaluate pipelines during optimization.
-
-* **Evaluations by Rung** (ASHA only) A bar chart plotting the number of evaluations at each ASHA rung.
-
-* **Time by Rung** (ASHA only) A bar chart plotting the combined time used of all evaluations for each ASHA rung.
diff --git a/docs/source/user_guide/index.rst b/docs/source/user_guide/index.rst
index a4ab6031..1a860695 100644
--- a/docs/source/user_guide/index.rst
+++ b/docs/source/user_guide/index.rst
@@ -32,11 +32,6 @@ For ease of use, GAMA provides a `fit`, `predict` and `predict_proba` function a
 
 -----
 
-.. include:: dashboard.rst
-    :start-line: 1
-
------
-
 .. include:: hyperparameters.rst
     :start-line: 1
 
diff --git a/docs/source/user_guide/installation.rst b/docs/source/user_guide/installation.rst
index 0215eeab..5296ae03 100644
--- a/docs/source/user_guide/installation.rst
+++ b/docs/source/user_guide/installation.rst
@@ -12,18 +12,17 @@ You can install them with::
 
     pip install gama[OPTIONAL]
 
-where `OPTIONAL` is one of:
+where `OPTIONAL` is one or more (comma separated):
 
- - `vis`: allows you to use the prototype dash app to visualize optimization traces.
  - `dev`: sets up all required dependencies for development of GAMA.
- - `all`: all of the above.
+ - `doc`: sets up all required dependencies for building documentation of GAMA.
 
 To see exactly what dependencies will be installed, see `setup.py <https://github.com/openml-labs/gama/blob/master/setup.py>`_.
-If you plan on developing GAMA, cloning the repository and installing locally is advised::
+If you plan on developing GAMA, cloning the repository and installing locally with test and doc dependencies is advised::
 
     git clone https://github.com/PGijsbers/gama.git
     cd gama
-    pip install -e .[all]
+    pip install -e ".[doc,test]"
 
 This installation will refer to your local GAMA files.
 Changes to the code directly affect the installed GAMA package without requiring a reinstall.
diff --git a/examples/regression_example.py b/examples/regression_example.py
index 8746aa14..ad0ea07b 100644
--- a/examples/regression_example.py
+++ b/examples/regression_example.py
@@ -1,10 +1,10 @@
-from sklearn.datasets import load_boston
+from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 from gama import GamaRegressor
 
 if __name__ == "__main__":
-    X, y = load_boston(return_X_y=True)
+    X, y = load_diabetes(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     automl = GamaRegressor(max_total_time=180, store="nothing", n_jobs=1)
diff --git a/gama/GamaClassifier.py b/gama/GamaClassifier.py
index c33ec2e6..5ae27493 100644
--- a/gama/GamaClassifier.py
+++ b/gama/GamaClassifier.py
@@ -13,20 +13,20 @@
 
 
 class GamaClassifier(Gama):
-    """ Gama with adaptations for (multi-class) classification. """
+    """Gama with adaptations for (multi-class) classification."""
 
-    def __init__(self, config=None, scoring="neg_log_loss", *args, **kwargs):
-        if not config:
+    def __init__(self, search_space=None, scoring="neg_log_loss", *args, **kwargs):
+        if not search_space:
             # Do this to avoid the whole dictionary being included in the documentation.
-            config = clf_config
+            search_space = clf_config
 
         self._metrics = scoring_to_metric(scoring)
         if any(metric.requires_probabilities for metric in self._metrics):
             # we don't want classifiers that do not have `predict_proba`,
             # because then we have to start doing one hot encodings of predictions etc.
-            config = {
+            search_space = {
                 alg: hp
-                for (alg, hp) in config.items()
+                for (alg, hp) in search_space.items()
                 if not (
                     inspect.isclass(alg)
                     and issubclass(alg, ClassifierMixin)
@@ -35,10 +35,10 @@ def __init__(self, config=None, scoring="neg_log_loss", *args, **kwargs):
             }
 
         self._label_encoder = None
-        super().__init__(*args, **kwargs, config=config, scoring=scoring)
+        super().__init__(*args, search_space=search_space, scoring=scoring, **kwargs)
 
     def _predict(self, x: pd.DataFrame):
-        """ Predict the target for input X.
+        """Predict the target for input X.
 
         Parameters
         ----------
@@ -52,12 +52,12 @@ def _predict(self, x: pd.DataFrame):
         """
         y = self.model.predict(x)  # type: ignore
         # Decode the predicted labels - necessary only if ensemble is not used.
-        if y[0] not in self._label_encoder.classes_:
+        if y[0] not in list(self._label_encoder.classes_):
             y = self._label_encoder.inverse_transform(y)
         return y
 
     def _predict_proba(self, x: pd.DataFrame):
-        """ Predict the class probabilities for input x.
+        """Predict the class probabilities for input x.
 
         Predict target for x, using the best found pipeline(s) during the `fit` call.
 
@@ -75,7 +75,7 @@ def _predict_proba(self, x: pd.DataFrame):
         return self.model.predict_proba(x)  # type: ignore
 
     def predict_proba(self, x: Union[pd.DataFrame, np.ndarray]):
-        """ Predict the class probabilities for input x.
+        """Predict the class probabilities for input x.
 
         Predict target for x, using the best found pipeline(s) during the `fit` call.
 
@@ -99,7 +99,7 @@ def predict_proba_from_file(
         target_column: Optional[str] = None,
         encoding: Optional[str] = None,
     ):
-        """ Predict the class probabilities for input in the arff_file.
+        """Predict the class probabilities for input in the arff_file.
 
         Parameters
         ----------
@@ -124,13 +124,21 @@ def predict_proba_from_file(
         return self._predict_proba(x)
 
     def fit(self, x, y, *args, **kwargs):
-        """ Should use base class documentation. """
+        """Should use base class documentation."""
         y_ = y.squeeze() if isinstance(y, pd.DataFrame) else y
         self._label_encoder = LabelEncoder().fit(y_)
-        if any([isinstance(yi, str) for yi in y_]):
+        if any(isinstance(yi, str) for yi in y_):
             # If target values are `str` we encode them or scikit-learn will complain.
             y = self._label_encoder.transform(y_)
         self._evaluation_library.determine_sample_indices(stratify=y)
+
+        # Add label information for classification to the scorer such that
+        # the cross validator does not encounter unseen labels in smaller
+        # data sets during pipeline evaluation.
+        for m in self._metrics:
+            if "labels" in inspect.signature(m.scorer._score_func).parameters:
+                m.scorer._kwargs.update({"labels": y})
+
         super().fit(x, y, *args, **kwargs)
 
     def _encode_labels(self, y):
diff --git a/gama/GamaRegressor.py b/gama/GamaRegressor.py
index a153bf82..f6e979e7 100644
--- a/gama/GamaRegressor.py
+++ b/gama/GamaRegressor.py
@@ -5,19 +5,21 @@
 
 
 class GamaRegressor(Gama):
-    """ Gama with adaptations for regression. """
+    """Gama with adaptations for regression."""
 
-    def __init__(self, config=None, scoring="neg_mean_squared_error", *args, **kwargs):
+    def __init__(
+        self, search_space=None, scoring="neg_mean_squared_error", *args, **kwargs
+    ):
         """ """
         # Empty docstring overwrites base __init__ doc string.
         # Prevents duplication of the __init__ doc string on the API page.
 
-        if not config:
-            config = reg_config
-        super().__init__(*args, **kwargs, config=config, scoring=scoring)
+        if not search_space:
+            search_space = reg_config
+        super().__init__(*args, search_space=search_space, scoring=scoring, **kwargs)
 
     def _predict(self, x: pd.DataFrame):
-        """ Predict the target for input X.
+        """Predict the target for input X.
 
         Parameters
         ----------
diff --git a/gama/__version__.py b/gama/__version__.py
index 06097df6..b52870e5 100644
--- a/gama/__version__.py
+++ b/gama/__version__.py
@@ -1,2 +1,2 @@
 # format: YY.minor.micro
-__version__ = "21.0.1.dev"
+__version__ = "23.0.0.post1"
diff --git a/gama/configuration/classification.py b/gama/configuration/classification.py
index 77df068c..7388cc59 100644
--- a/gama/configuration/classification.py
+++ b/gama/configuration/classification.py
@@ -1,3 +1,4 @@
+# sourcery skip: de-morgan
 import numpy as np
 
 from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
@@ -100,12 +101,15 @@
         "solver": ["lbfgs"],
     },
     Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)},
-    FastICA: {"tol": np.arange(0.0, 1.01, 0.05)},
+    FastICA: {
+        "tol": np.arange(0.0, 1.01, 0.05),
+        "whiten": ["unit-variance"],
+    },
     FeatureAgglomeration: {
         "linkage": ["ward", "complete", "average"],
         "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
         "param_check": [
-            lambda params: (not params["linkage"] == "ward")
+            lambda params: params["linkage"] != "ward"
             or params["affinity"] == "euclidean"
         ],
     },
diff --git a/gama/configuration/parser.py b/gama/configuration/parser.py
index 8f4b9032..705ec268 100644
--- a/gama/configuration/parser.py
+++ b/gama/configuration/parser.py
@@ -1,13 +1,15 @@
 from collections import defaultdict
-from typing import Dict, Any
+from typing import Dict, Any, Union, List, Callable, Tuple
 
 import sklearn
 
 from gama.genetic_programming.components import Primitive, Terminal, DATA_TERMINAL
 
 
-def pset_from_config(configuration):
-    """ Create a pset for the given configuration dictionary.
+def pset_from_config(
+    configuration: Dict[Union[str, object], Any]
+) -> Tuple[Dict[str, List], Dict[str, Callable]]:
+    """Create a pset for the given configuration dictionary.
 
     Given a configuration dictionary specifying operators (e.g. sklearn
     estimators), their hyperparameters and values for each hyperparameter,
@@ -18,10 +20,14 @@ def pset_from_config(configuration):
 
     Side effect: Imports the classes of each primitive.
 
-    Returns the given Pset.
+    returns:
+        pset - Dict[str, List]:
+            maps return-types to a list of Primitives and/or Terminals
+        parameter_check - Dict[str, Callable]:
+            maps Primitive name to a check for the validity of the hp configuration
     """
 
-    pset = defaultdict(list)
+    pset: Dict[str, List[Union[Primitive, Terminal]]] = defaultdict(list)
     parameter_checks = {}
 
     # Make sure the str-keys are evaluated first, they describe shared hyperparameters.
@@ -33,9 +39,9 @@ def pset_from_config(configuration):
             # Specification of shared hyperparameters
             for value in values:
                 pset[key].append(Terminal(value=value, output=key, identifier=key))
-        elif isinstance(key, object):
+        elif isinstance(key, type):
             # Specification of operator (learner, preprocessor)
-            hyperparameter_types = []
+            hyperparameter_types: List[str] = []
             for name, param_values in sorted(values.items()):
                 # We construct a new type for each hyperparameter, so we can specify
                 # it as terminal type, making sure it matches with expected
@@ -53,41 +59,37 @@ def pset_from_config(configuration):
                     hyperparameter_types.append(hp_name)
                     for value in param_values:
                         pset[hp_name].append(
-                            Terminal(value=value, output=name, identifier=hp_name,)
+                            Terminal(
+                                value=value,
+                                output=name,
+                                identifier=hp_name,
+                            )
                         )
 
             # After registering the hyperparameter types,
             # we can register the operator itself.
-            transformer_tags = [
-                "DATA_PREPROCESSING",
-                "FEATURE_SELECTION",
-                "DATA_TRANSFORMATION",
-            ]
-            if issubclass(key, sklearn.base.TransformerMixin) or (
-                hasattr(key, "metadata")
-                and key.metadata.query()["primitive_family"] in transformer_tags
-            ):
+            if issubclass(key, sklearn.base.TransformerMixin):
                 pset[DATA_TERMINAL].append(
                     Primitive(
-                        input=hyperparameter_types, output=DATA_TERMINAL, identifier=key
+                        input=tuple(hyperparameter_types),
+                        output=DATA_TERMINAL,
+                        identifier=key,
                     )
                 )
-            elif issubclass(key, sklearn.base.ClassifierMixin) or (
-                hasattr(key, "metadata")
-                and key.metadata.query()["primitive_family"] == "CLASSIFICATION"
-            ):
+            elif issubclass(key, sklearn.base.ClassifierMixin):
                 pset["prediction"].append(
                     Primitive(
-                        input=hyperparameter_types, output="prediction", identifier=key
+                        input=tuple(hyperparameter_types),
+                        output="prediction",
+                        identifier=key,
                     )
                 )
-            elif issubclass(key, sklearn.base.RegressorMixin) or (
-                hasattr(key, "metadata")
-                and key.metadata.query()["primitive_family"] == "REGRESSION"
-            ):
+            elif issubclass(key, sklearn.base.RegressorMixin):
                 pset["prediction"].append(
                     Primitive(
-                        input=hyperparameter_types, output="prediction", identifier=key
+                        input=tuple(hyperparameter_types),
+                        output="prediction",
+                        identifier=key,
                     )
                 )
             else:
@@ -104,8 +106,8 @@ def pset_from_config(configuration):
     return pset, parameter_checks
 
 
-def merge_configurations(c1, c2):
-    """ Takes two configurations and merges them together. """
+def merge_configurations(c1: Dict, c2: Dict) -> Dict:
+    """Takes two configurations and merges them together."""
     # Should refactor out 6 indentation levels
     merged: Dict[Any, Any] = defaultdict(lambda: None, c1)
     for algorithm, hparams2 in c2.items():
diff --git a/gama/configuration/regression.py b/gama/configuration/regression.py
index 67b8d83d..f9de2fd0 100644
--- a/gama/configuration/regression.py
+++ b/gama/configuration/regression.py
@@ -48,7 +48,7 @@
     },
     GradientBoostingRegressor: {
         "n_estimators": [100],
-        "loss": ["ls", "lad", "huber", "quantile"],
+        "loss": ["squared_error", "absolute_error", "huber", "quantile"],
         "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
         "max_depth": range(1, 11),
         "min_samples_split": range(2, 21),
@@ -90,12 +90,15 @@
     },
     # Preprocesssors
     Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)},
-    FastICA: {"tol": np.arange(0.0, 1.01, 0.05)},
+    FastICA: {
+        "tol": np.arange(0.0, 1.01, 0.05),
+        "whiten": ["unit-variance"],
+    },
     FeatureAgglomeration: {
         "linkage": ["ward", "complete", "average"],
         "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
         "param_check": [
-            lambda params: (not params["linkage"] == "ward")
+            lambda params: params["linkage"] != "ward"
             or params["affinity"] == "euclidean"
         ],
     },
diff --git a/gama/configuration/testconfiguration.py b/gama/configuration/testconfiguration.py
index 970e81c8..4c134db9 100644
--- a/gama/configuration/testconfiguration.py
+++ b/gama/configuration/testconfiguration.py
@@ -100,12 +100,15 @@
     },
     # Preprocesssors
     Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)},
-    FastICA: {"tol": np.arange(0.0, 1.01, 0.05)},
+    FastICA: {
+        "tol": np.arange(0.0, 1.01, 0.05),
+        "whiten": ["unit-variance"],
+    },
     FeatureAgglomeration: {
         "linkage": ["ward", "complete", "average"],
         "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
         "param_check": [
-            lambda params: (not params["linkage"] == "ward")
+            lambda params: params["linkage"] != "ward"
             or params["affinity"] == "euclidean"
         ],
     },
diff --git a/gama/dashboard/README.md b/gama/dashboard/README.md
deleted file mode 100644
index 9f934e82..00000000
--- a/gama/dashboard/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-This is a Dash app to configure and start GAMA through UI.
-While this work will be merged with the Visualization app, for now they are separate.
-The distinction is that the Visualization app was built to compare multiple runs to each other,
-while this app is built to use GAMA and monitor live ML pipeline search.
-
----
-
-A word on the usage of Dash. This (and the for now separate Visualization app) is my first Dash app.
-I was (and still am) not sure what the right tools for developing this GUI are.
-Dash looked good for prototyping the tool.
-
-I'm sure there's lots to be improved. But in particular know that the usage of local python objects for
-storing data is wrong from a Dash perspective - as this means state is stored in the server and not the client.
-At this point I have no plan to support multiple clients, so I'm okay with this.
-Another point of pain is that I'm currently running GAMA as a subprocess and communication
-with the GUI is facilitated through reading and parsing the gama log file.
-I'd very much like for this to be replaced by communication within Python (e.g. events).
diff --git a/gama/dashboard/__init__.py b/gama/dashboard/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/gama/dashboard/app.py b/gama/dashboard/app.py
deleted file mode 100644
index e79eaf20..00000000
--- a/gama/dashboard/app.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from typing import List
-
-try:
-    import dash
-    import dash_core_components as dcc
-    import dash_bootstrap_components as dbc
-    import dash_html_components as html
-    from dash.dependencies import Input, Output
-except ImportError:
-    print(
-        "Unable to import Dash modules. "
-        "Make sure GAMA is installed with the optional visualization dependencies."
-        "E.g.: pip install gama[vis]"
-    )
-    raise
-
-from gama.dashboard.controller import Controller
-from gama.dashboard.pages.base_page import BasePage
-
-
-external_scripts = [
-    "https://code.jquery.com/jquery-3.3.1.min.js",
-    "https://cdn.datatables.net/v/dt/dt-1.10.18/datatables.min.js",
-]
-dashboard = dash.Dash(
-    "GamaDashboard",
-    external_stylesheets=[dbc.themes.BOOTSTRAP],
-    external_scripts=external_scripts,
-)
-dashboard.config.suppress_callback_exceptions = True
-
-controller = Controller()
-
-
-# === Construct UI elements ===
-
-
-def build_app():
-    from gama.dashboard.pages import pages
-
-    base = create_generic_layout()
-    base["tabs"].children = create_tabs(pages)
-    page_stores = []
-    for page in pages:
-        page.build_page(dashboard, controller)
-        if hasattr(page, "gama_started"):
-            controller.gama_started(page.gama_started)
-        page_stores.append(
-            dcc.Store(id=f"{page.id}-store", storage_type="session", data={})
-        )
-    base["page-stores"].children = page_stores
-    return base
-
-
-def create_generic_layout():
-    """ Creates the generic layout of tabs and their content pages. """
-    tab_banner_style = {
-        "border-top-left-radius": "3px",
-        "background-color": "#f9f9f9",
-        "padding": "0px 24px",
-        "border-bottom": "1px solid #d6d6d6",
-    }
-
-    return html.Div(
-        id="page",
-        children=[
-            html.Div(id="tabs", style=tab_banner_style),
-            html.Div(id="content"),
-            html.Div(id="page-stores"),
-        ],
-        style={"font-family": "'Open Sans Semi Bold', sans-serif"},
-    )
-
-
-def create_tabs(pages: List[BasePage]):
-    if pages == []:
-        raise ValueError("Must have at least one tab.")
-
-    # Sort pages by alignment
-    sorted_pages = sorted(pages, key=lambda p: p.alignment)
-    left_aligned = [page for page in sorted_pages if page.alignment >= 0]
-    right_aligned = [page for page in sorted_pages if page.alignment < 0]
-    grouped_pages = left_aligned + right_aligned
-    tabs = [create_tab(page.name, page.starts_hidden) for page in grouped_pages]
-    return [dcc.Tabs(id="page-tabs", value=tabs[0].value, children=tabs)]
-
-
-def create_tab(name: str, invisible: bool):
-    tab_style = {
-        "color": "black",
-        "width": "10%",
-        "border-top-left-radius": "3px",  # round tab corners
-        "border-top-right-radius": "3px",
-        "border-bottom": "0px",  # bottom box-shadow still present
-        "padding": "6px",
-        "display": "none" if invisible else "inline",
-    }
-    selected_tab_style = {
-        **tab_style,
-        "border-top": "3px solid #c81818",  # Highlight color (TU/e colored)
-        "box-shadow": "1px 1px 0px white",  # removes bottom edge
-        "display": "inline",
-    }
-    return dcc.Tab(
-        id=f"{name}-tab",
-        label=name,
-        value=name,
-        style=tab_style,
-        selected_style=selected_tab_style,
-    )
-
-
-# === Callbacks ===
-
-
-@dashboard.callback([Output("content", "children")], [Input("page-tabs", "value")])
-def display_page_content(page_name):
-    from gama.dashboard.pages import pages
-
-    page = [page for page in pages if page.name == page_name][0]
-    page.need_update = True
-    return [page.content]
-
-
-def main(open_window: bool = True, **server_kwargs):
-    dashboard.layout = build_app()
-    # if open_window:
-    #     import webbrowser
-    #     from threading import Timer
-    #
-    #     port = server_kwargs.get("port", 8050)
-    #     localhost = f"http://127.0.0.1:{port}"
-    #
-    #     Timer(1, webbrowser.open_new, kwargs=dict(url=localhost)).start()
-    dashboard.run_server(**server_kwargs)
-
-
-if __name__ == "__main__":
-    main(debug=True)
diff --git a/gama/dashboard/components/__init__.py b/gama/dashboard/components/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/gama/dashboard/components/cli_window.py b/gama/dashboard/components/cli_window.py
deleted file mode 100644
index 78453ea7..00000000
--- a/gama/dashboard/components/cli_window.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import shlex
-import subprocess
-import threading
-import queue
-from typing import List
-
-from dash import Dash
-from dash.dependencies import Input, Output, State
-from dash.exceptions import PreventUpdate
-import dash_core_components as dcc
-import dash_html_components as html
-import visdcc
-
-
-def enqueue_output(out, queue_: queue.Queue):
-    for line in iter(out.readline, b""):
-        queue_.put(line)
-    out.close()
-
-
-class CLIWindow:
-    """ A Component for Dash App which simulates a console window """
-
-    def __init__(
-        self,
-        id_: str,
-        app: Dash,
-        update_interval_s: float = 1.0,
-        auto_scroll: bool = True,
-    ):
-        self._update_interval_s = update_interval_s
-        self.auto_scroll = auto_scroll
-
-        self.console_id = f"{id_}-text"
-        self.timer_id = f"{id_}-interval"
-        self.js_id = f"{id_}-js"
-        self.id_ = id_
-
-        self.autoscroll_script = (
-            f"document.getElementById('{self.console_id}').scrollTop"
-            f" = document.getElementById('{self.console_id}').scrollHeight"
-        )
-        self.process = None
-        self._thread = None
-        self._queue = None
-        self._lines: List[str] = []
-
-        self.html = self._build_component()
-        self._register_callbacks(app)
-
-    def _build_component(self) -> html.Div:
-        timer = dcc.Interval(
-            id=self.timer_id, interval=self._update_interval_s * 1000, n_intervals=0
-        )
-        scroller = visdcc.Run_js(id=self.js_id, run="")
-        self.console = dcc.Textarea(
-            id=self.console_id,
-            contentEditable="false",
-            style={
-                "height": "100%",
-                "width": "100%",
-                "borderWidth": "1px",
-                "borderRadius": "5px",
-                "borderStyle": "dashed",
-            },
-            persistence_type="session",
-            persistence=True,
-        )
-        return html.Div(
-            id=self.id_,
-            children=[timer, self.console, scroller],
-            style={"height": "100%"},
-        )
-
-    def _register_callbacks(self, app):
-        app.callback(
-            [Output(self.console_id, "value"), Output(self.js_id, "run")],
-            [Input(self.timer_id, "n_intervals")],
-            [State(self.console_id, "value")],
-        )(self.update_console)
-
-    def monitor(self, process):
-        self.process = process
-        # Because there are only blocking reads to the pipe,
-        # we need to read them on a separate thread.
-        self._queue = queue.Queue()
-        self._thread = threading.Thread(
-            target=enqueue_output, args=(self.process.stdout, self._queue), daemon=True
-        )
-        self._thread.start()
-
-    def call(self, command: str):
-        cmd = shlex.split(command)
-        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
-        self.monitor(process)
-
-    def update_console(self, _, current_text):
-        if self.process is None:
-            return [None, None]
-
-        # We want to update the text field if there is new output from the process,
-        # or if we detect the text value has been reset (due to e.g. switching tabs).
-        try:
-            line = self._queue.get_nowait()
-            self._lines.append(line.decode("utf-8"))
-        except queue.Empty:
-            # No new message, update only required if value field had been reset.
-            if current_text is not None:
-                raise PreventUpdate
-
-        script = self.autoscroll_script if self.auto_scroll else ""
-        return ["".join(self._lines), script]
diff --git a/gama/dashboard/components/headers.py b/gama/dashboard/components/headers.py
deleted file mode 100644
index d64c087c..00000000
--- a/gama/dashboard/components/headers.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from typing import List
-
-from dash.dependencies import Input, Output, State
-import dash_core_components as dcc
-import dash_bootstrap_components as dbc
-import dash_html_components as html
-
-
-def button_header(text: str, id_: str, level: int = 4):
-    header = f"{'#' * level} {text}"
-    return dbc.FormGroup(
-        [dbc.Button([dcc.Markdown(header)], id=id_, block=True, color="primary")]
-    )
-
-
-def markdown_header(text: str, level: int = 4, with_horizontal_rule: bool = True):
-    header = f"{'#' * level} {text}"
-    # matching length '-' not required but nicer.
-    hr = f"\n{'-'*(level + 1 + len(text))}"
-    return dcc.Markdown(f"{header}{hr if with_horizontal_rule else ''}")
-
-
-class CollapsableSection:
-    """ A Form with a ButtonHeader which when presses collapses/expands the Form. """
-
-    def __init__(
-        self, header: str, controls: List[dbc.FormGroup], start_open: bool = True
-    ):
-        self._header = header
-        self._start_open = start_open
-        self._header_id = f"{header}-header"
-        self._form_id = f"{header}-form"
-        self._controls = controls
-
-        self.html = self._build_content()
-
-    def _build_content(self) -> html.Div:
-        form_header = button_header(self._header, id_=self._header_id)
-        self.form = dbc.Form(self._controls)
-        collapsable_form = dbc.Collapse(
-            id=self._form_id, children=[self.form], is_open=self._start_open
-        )
-        return html.Div([form_header, collapsable_form])
-
-    def register_callbacks(self, app):
-        app.callback(
-            Output(self._form_id, "is_open"),
-            [Input(self._header_id, "n_clicks")],
-            [State(self._form_id, "is_open")],
-        )(_toggle_collapse)
-
-
-def _toggle_collapse(click, is_open: bool):
-    if click:
-        return not is_open
-    return is_open
diff --git a/gama/dashboard/components/input_group.py b/gama/dashboard/components/input_group.py
deleted file mode 100644
index e5e2cf39..00000000
--- a/gama/dashboard/components/input_group.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from typing import Dict
-
-from dash import Dash
-from dash.dependencies import Input, Output, State
-import dash_core_components as dcc
-import dash_bootstrap_components as dbc
-
-
-def automark_slider(app: Dash, id_: str, label: str, slider_kwargs: Dict):
-    defaults = dict(min=1, max=10, value=1, updatemode="drag")
-    defaults.update(slider_kwargs)
-    marks = {defaults["min"]: defaults["min"], defaults["max"]: defaults["max"]}
-
-    cpu_input = dbc.FormGroup(
-        [
-            dbc.Label(label, html_for=id_, width=6),
-            dbc.Col(dcc.Slider(id=id_, marks=marks, **defaults)),
-        ],
-        row=True,
-    )
-    app.callback.append(
-        Output(id_, "marks"),
-        [Input(id_, "value")],
-        [State(id_, "min"), State(id_, "max")],
-    )(_update_marks)
-    return cpu_input
-
-
-def _update_marks(selected_value, min_, max_):
-    return {min_: str(min_), selected_value: str(selected_value), max_: str(max_)}
-
-
-class ToggleButton:
-    def __init__(self, button_id: str, app: Dash, label: str, start_on: bool = True):
-        self._button_id = button_id
-        self.html = self._build_content(label, start_on)
-
-    def _build_content(self, label: str, start_on: bool):
-        return dbc.FormGroup(
-            [
-                dbc.Label(label, html_for=self._button_id, width=6),
-                dbc.Col(
-                    dbc.Checklist(
-                        id=self._button_id,
-                        options=[{"label": "", "value": "on"}],
-                        switch=True,
-                        value="on" if start_on else "off",
-                    )
-                ),
-            ],
-            row=True,
-        )
diff --git a/gama/dashboard/controller.py b/gama/dashboard/controller.py
deleted file mode 100644
index f9084734..00000000
--- a/gama/dashboard/controller.py
+++ /dev/null
@@ -1,55 +0,0 @@
-""" Provides updates on GAMA's search.
- Next step is to call GAMA directly, but the `fit` call has to be made async. """
-
-import shlex
-import subprocess
-from collections import defaultdict
-
-
-class Controller:
-    def __init__(self):
-        self._subscribers = defaultdict(list)
-
-    def start_gama(
-        self,
-        metric,
-        regularize,
-        n_jobs,
-        max_total_time_h,
-        max_total_time_m,
-        max_eval_time_h,
-        max_eval_time_m,
-        input_file,
-        log_dir,
-        target,
-    ):
-        # For some reason, 0 input registers as None.
-        max_total_time_h = 0 if max_total_time_h is None else max_total_time_h
-        max_total_time_m = 0 if max_total_time_m is None else max_total_time_m
-        max_eval_time_h = 0 if max_eval_time_h is None else max_eval_time_h
-        max_eval_time_m = 0 if max_eval_time_m is None else max_eval_time_m
-        max_total_time = max_total_time_h * 60 + max_total_time_m
-        max_eval_time = max_eval_time_h * 60 + max_eval_time_m
-        command = (
-            f'gama "{input_file}" -v -n {n_jobs} -t {max_total_time} '
-            f'--time_pipeline {max_eval_time} -outdir {log_dir} --target "{target}"'
-        )
-        if regularize != "on":
-            command += " --long"
-        if metric != "default":
-            command += f" -m {metric}"
-
-        command = shlex.split(command)
-        # fake_command = ['python', '-h']
-        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
-        self._on_gama_started(process, log_dir)
-
-    def _on_gama_started(self, process, log_file):
-        for subscriber in self._subscribers["gama_started"]:
-            subscriber(process, log_file)
-
-    def gama_started(self, callback_function):
-        self._subscribers["gama_started"].append(callback_function)
-
-    def gama_ended(self, callback_function):
-        self._subscribers["gama_ended"].append(callback_function)
diff --git a/gama/dashboard/pages/__init__.py b/gama/dashboard/pages/__init__.py
deleted file mode 100644
index 88605e62..00000000
--- a/gama/dashboard/pages/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .homepage import HomePage
-from .runningpage import RunningPage
-from .analysispage import AnalysisPage
-
-pages = [HomePage(), RunningPage(), AnalysisPage()]
diff --git a/gama/dashboard/pages/analysispage.py b/gama/dashboard/pages/analysispage.py
deleted file mode 100644
index c8c245a1..00000000
--- a/gama/dashboard/pages/analysispage.py
+++ /dev/null
@@ -1,231 +0,0 @@
-import base64
-import itertools
-import os
-import shutil
-import uuid
-from typing import Dict, List, Optional
-
-import dash_core_components as dcc
-import dash_html_components as html
-from dash.dependencies import Input, Output, State
-
-from gama.dashboard.pages.base_page import BasePage
-from gama.logging.GamaReport import GamaReport
-from ..plotting import plot_preset_graph
-
-
-class AnalysisPage(BasePage):
-    def __init__(self):
-        super().__init__(name="Analysis", alignment=-1)
-        self.id = "analysis-page"
-        self.reports: Dict[str, GamaReport] = {}
-
-    def build_page(self, app, controller):
-        upload_box = html.Div(
-            id="upload-container",
-            children=[
-                dcc.Upload(
-                    id="upload-box",
-                    children=html.Div([html.A("Select or drop log(s).")]),
-                    style={
-                        "width": "100%",
-                        "height": "60px",
-                        "lineHeight": "60px",
-                        "borderWidth": "1px",
-                        "borderStyle": "dashed",
-                        "borderRadius": "5px",
-                        "textAlign": "center",
-                        "display": "inline-block",
-                    },
-                    multiple=True,
-                )
-            ],
-            style=dict(
-                width=f'{len("Select or drop log(s).")}em',
-                display="inline-block",
-                float="right",
-            ),
-        )
-
-        # Top
-
-        presets = [
-            {"label": "#Pipeline by learner", "value": "number_pipeline_by_learner"},
-            {"label": "#Pipeline by size", "value": "number_pipeline_by_size"},
-            {"label": "Best score over time", "value": "best_over_time"},
-            {"label": "Best score over iterations", "value": "best_over_n"},
-            {"label": "Size vs Metric", "value": "size_vs_metric"},
-            {"label": "Evaluation Times", "value": "evaluation_times_dist"},
-            {"label": "Evaluations by Rung", "value": "n_by_rung"},
-            {"label": "Time by Rung", "value": "time_by_rung"},
-            # {"label": "Custom", "value": "custom"},
-        ]
-
-        preset_container = html.Div(
-            id="preset-container",
-            children=[
-                html.Div("Visualization Presets"),
-                dcc.Dropdown(
-                    id="preset-dropdown",
-                    options=presets,
-                    value="best_over_n",
-                    style=dict(width="90%"),
-                ),
-            ],
-            style=dict(width="100%", display="inline-block", float="left"),
-        )
-
-        # sep_agg_radio = dcc.RadioItems(
-        #     id="sep-agg-radio",
-        #     options=[
-        #         {"label": "separate", "value": "separate-line"},
-        #         {"label": "aggregate", "value": "aggregate"},
-        #     ],
-        #     value="separate-line",
-        #     style={"width": "90%", "display": "inline-block"},
-        # )
-
-        # sep_agg_container = html.Div(
-        #     id="sep_agg_container",
-        #     children=[html.Div("Style"), sep_agg_radio],
-        #     style=dict(display="inline-block", width="50%", float="left"),
-        # )
-
-        # left
-
-        dashboard_graph = dcc.Graph(id="dashboard-graph")
-
-        # third_width = {"width": "30%", "display": "inline-block"}
-        # plot_control_container = html.Div(
-        #     id="plot-controls",
-        #     children=[
-        #         html.Div(
-        #             [html.Label("x-axis"), dcc.Dropdown(id="x-axis-metric")],
-        #             style=third_width
-        #         ),
-        #         html.Div(
-        #             [html.Label("y-axis"), dcc.Dropdown(id="y-axis-metric")],
-        #             style=third_width
-        #         ),
-        #         html.Div(
-        #             [
-        #                 html.Label("plot type"),
-        #                 dcc.Dropdown(
-        #                     id="plot-type",
-        #                     options=[
-        #                         {"label": "scatter", "value": "markers"},
-        #                         {"label": "line", "value": "lines"},
-        #                     ],
-        #                     value="lines",
-        #                 ),
-        #             ],
-        #             style=third_width,
-        #         ),
-        #     ],
-        #     style=dict(width="80%", display="none"),
-        #     hidden=True,
-        # )
-        #
-        # graph_settings_container = html.Div(
-        #     id="graph-settings-container", children=[plot_control_container]
-        # )
-
-        # graph_update_timer = dcc.Interval(id="update-timer", interval=2 * 1000)  # ms
-        #
-        # graph_update_trigger = dcc.Store(id="update-trigger")
-
-        visualization_container = html.Div(
-            id="visualization-container",
-            children=[
-                dashboard_graph,
-                # graph_settings_container,
-                # graph_update_timer,
-                # graph_update_trigger,
-            ],
-            style={"float": "left", "width": "85%"},
-        )
-
-        # right
-
-        file_select = dcc.Checklist(id="select-log-checklist")
-
-        report_select_container = html.Div(
-            id="report-select-container",
-            children=[preset_container, upload_box, file_select],
-            style={"width": "14%", "float": "right", "padding-right": "1%"},
-        )
-
-        self._content = html.Div(
-            id=self.id, children=[visualization_container, report_select_container],
-        )
-
-        app.callback(
-            Output("select-log-checklist", "options"),
-            [Input("upload-box", "contents")],
-            [State("upload-box", "filename")],
-        )(self.load_logs)
-
-        app.callback(
-            Output("dashboard-graph", "figure"),
-            [
-                Input("select-log-checklist", "value"),
-                Input("preset-dropdown", "value"),
-            ],
-        )(self.update_graph)
-
-        return self._content
-
-    def load_logs(self, list_of_contents, list_of_names):
-        # global aggregate_dataframe
-        if list_of_contents is not None:
-            tmp_dir = f"tmp_{str(uuid.uuid4())}"
-            os.makedirs(tmp_dir)
-            for content, filename in zip(list_of_contents, list_of_names):
-                content_type, content_string = content.split(",")
-                decoded = base64.b64decode(content_string).decode("utf-8")
-                with open(os.path.join(tmp_dir, filename), "w") as fh:
-                    fh.write(decoded)
-
-            report = GamaReport(tmp_dir)
-            report_name = report.search_method
-            for i in itertools.count():
-                if f"{report_name}_{i}" not in self.reports:
-                    break
-            self.reports[f"{report_name}_{i}"] = report
-            shutil.rmtree(tmp_dir)
-            return [{"label": logname, "value": logname} for logname in self.reports]
-        return []
-
-    def update_graph(self, logs: List[str], preset_value: Optional[str] = None):
-        print(logs, preset_value)  # , aggregate, xaxis, yaxis, mode,
-        # if preset_value == "custom":
-        #     if logs is None or logs == [] or xaxis is None or yaxis is None:
-        #         title = "Load and select a log on the right"
-        #         plots = []
-        #     else:
-        #         title = f"{aggregate} plot of {len(logs)} logs"
-        #         if aggregate == "separate-line":
-        #             plots = [
-        #                 individual_plot(reports[log], xaxis, yaxis, mode)
-        #                 for log in logs
-        #             ]
-        #         if aggregate == "aggregate":
-        #             plots = aggregate_plot(
-        #               [reports[log] for log in logs], xaxis, yaxis
-        #             )
-        #     return {
-        #         "data": plots,
-        #         "layout": {
-        #             "title": title,
-        #             "xaxis": {"title": f"{xaxis}"},
-        #             "yaxis": {"title": f"{yaxis}"},
-        #             "hovermode": "closest" if mode == "markers" else "x",
-        #         },
-        #     }
-        if logs is not None:
-            # filtered_aggregate = aggregate_dataframe[
-            #     aggregate_dataframe.filename.isin(logs)
-            # ]
-            return plot_preset_graph([self.reports[log] for log in logs], preset_value)
-        else:
-            return {}
diff --git a/gama/dashboard/pages/base_page.py b/gama/dashboard/pages/base_page.py
deleted file mode 100644
index c546fcf0..00000000
--- a/gama/dashboard/pages/base_page.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import abc
-
-
-class BasePage(abc.ABC):
-    def __init__(self, name: str, alignment: int, starts_hidden: bool = False):
-        """ Defines the basic behavior of a page.
-
-        Parameters
-        ----------
-        name: str
-            Name of the page, displayed in the tab.
-
-        alignment: int
-            Defines the order of tabs.
-            Positive numbers are aligned to the left, negative to the right.
-            Within the groups, bigger numbers are placed to the right.
-            E.g.: [0][1][2] ... [-2][-1]
-
-        starts_hidden: bool (default=False)
-            If True, the tab is hidden by default as must be turned visible explicitly.
-        """
-        self.name = name
-        self.starts_hidden = starts_hidden
-        self.alignment = alignment
-        self.need_update = False
-        self._content = None
-
-    @property
-    def content(self):
-        return self._content
-
-    @abc.abstractmethod
-    def build_page(self, app, controller):
-        """ Populate the `content` field with html, register any callbacks. """
-        raise NotImplementedError
diff --git a/gama/dashboard/pages/homepage.py b/gama/dashboard/pages/homepage.py
deleted file mode 100644
index 68c67293..00000000
--- a/gama/dashboard/pages/homepage.py
+++ /dev/null
@@ -1,419 +0,0 @@
-import multiprocessing
-import os
-from typing import Optional, List, Dict, Tuple, Callable
-
-import dash_core_components as dcc
-import dash_bootstrap_components as dbc
-import dash_html_components as html
-import dash_table
-from dash.dependencies import Input, Output, State
-
-from gama.dashboard.pages.base_page import BasePage
-from gama.data_loading import file_to_pandas, load_feature_metadata_from_file
-
-
-class HomePage(BasePage):
-    callbacks: List[Tuple[Tuple, Callable]] = []
-
-    def __init__(self):
-        super().__init__(name="Home", alignment=0)
-        self.id = "home-page"
-
-    def build_page(self, app, controller):
-        self._build_content(app, controller)
-        if app is not None:
-            self._register_callbacks(app)
-
-    def _build_content(self, app, controller) -> html.Div:
-        """ Build all the components of the page. """
-        configuration = build_configuration_menu(app, controller)
-        configuration.style["width"] = "35%"
-        configuration.style["float"] = "left"
-        data_navigator = build_data_navigator()
-        data_navigator.style["width"] = "65%"
-        data_navigator.style["float"] = "right"
-        self._content = html.Div(id=self.id, children=[configuration, data_navigator])
-        return self._content
-
-    def _register_callbacks(self, app):
-        for (io, fn) in HomePage.callbacks:
-            app.callback(*io)(fn)
-        HomePage.callbacks = []
-
-    def load(self):
-        pass
-
-    def unload(self):
-        pass
-
-
-# === Configuration Menu ===
-
-
-def cpu_slider():
-    n_cpus = multiprocessing.cpu_count()
-    id_ = "cpu_slider"
-    cpu_input = dbc.FormGroup(
-        [
-            dbc.Label("N Jobs", html_for=id_, width=6),
-            dbc.Col(
-                dcc.Slider(
-                    id=id_,
-                    min=1,
-                    max=n_cpus,
-                    updatemode="drag",
-                    value=1,
-                    marks={1: "1", n_cpus: str(n_cpus)},
-                    persistence_type="session",
-                    persistence=True,
-                )
-            ),
-        ],
-        row=True,
-    )
-    HomePage.callbacks.append(
-        (
-            (
-                Output(id_, "marks"),
-                [Input(id_, "value")],
-                [State(id_, "min"), State(id_, "max")],
-            ),
-            update_marks,
-        )
-    )
-    return cpu_input
-
-
-def time_nud(
-    label_text: str,
-    hour_id: str,
-    hour_default: int,
-    minute_id: str,
-    minute_default: int,
-):
-    return dbc.FormGroup(
-        [
-            dbc.Label(label_text, html_for=hour_id, width=6),
-            dbc.Col(
-                dbc.InputGroup(
-                    [
-                        dbc.Input(
-                            id=hour_id,
-                            type="number",
-                            min=0,
-                            max=99,
-                            step=1,
-                            value=hour_default,
-                        ),
-                        dbc.InputGroupAddon("H", addon_type="append"),
-                    ]
-                )
-            ),
-            dbc.Col(
-                dbc.InputGroup(
-                    [
-                        dbc.Input(
-                            id=minute_id,
-                            type="number",
-                            min=0,
-                            max=59,
-                            step=1,
-                            value=minute_default,
-                        ),
-                        dbc.InputGroupAddon("M", addon_type="append"),
-                    ]
-                )
-            ),
-        ],
-        row=True,
-    )
-
-
-def toggle_button(label_text: str, id_: str, start_on: bool = True):
-    return dbc.FormGroup(
-        [
-            dbc.Label(label_text, html_for=id_, width=6),
-            dbc.Col(
-                dbc.Checklist(
-                    id=id_,
-                    options=[{"label": "", "value": "on"}],
-                    switch=True,
-                    value="on" if start_on else "off",
-                )
-            ),
-        ],
-        row=True,
-    )
-
-
-def text_input(label_text: str, default_text: str, id_: str):
-    return dbc.FormGroup(
-        [
-            dbc.Label(label_text, html_for=id_, width=6),
-            dbc.Col(
-                dbc.Input(
-                    id=id_, type="text", placeholder=default_text, value=default_text
-                )
-            ),
-        ],
-        row=True,
-    )
-
-
-def dropdown(
-    label_text: str, id_: str, options: Dict[str, str], value: Optional[str] = None
-):
-    """ options formatted as {LABEL_KEY: LABEL_TEXT, ...} """
-    return dbc.FormGroup(
-        [
-            dbc.Label(label_text, html_for=id_, width=6),
-            dbc.Col(
-                dcc.Dropdown(
-                    id=id_,
-                    options=[
-                        {"label": text, "value": key} for key, text in options.items()
-                    ],
-                    clearable=False,
-                    value=value,
-                    persistence_type="session",
-                    persistence=True,
-                ),
-            ),
-        ],
-        row=True,
-    )
-
-
-def button_header(text: str, id_: str, level: int = 4):
-    header = f"{'#' * level} {text}"
-    return dbc.FormGroup(
-        [dbc.Button([dcc.Markdown(header)], id=id_, block=True, color="primary")]
-    )
-
-
-def markdown_header(text: str, level: int = 4, with_horizontal_rule: bool = True):
-    header = f"{'#' * level} {text}"
-    hr = f"\n{'-'*(level + 1 + len(text))}"  # matching length '-' not required but nice
-    return dcc.Markdown(f"{header}{hr if with_horizontal_rule else ''}")
-
-
-def toggle_collapse(click, is_open: bool):
-    if click:
-        return not is_open
-    return is_open
-
-
-def collapsable_section(
-    header: str, controls: List[dbc.FormGroup], start_open: bool = True
-):
-    header_id = f"{header}-header"
-    form_id = f"{header}-form"
-
-    form_header = button_header(header, id_=header_id)
-    collapsable_form = dbc.Collapse(
-        id=form_id, children=[dbc.Form(controls)], is_open=start_open
-    )
-
-    HomePage.callbacks.append(
-        (
-            (
-                Output(form_id, "is_open"),
-                [Input(header_id, "n_clicks")],
-                [State(form_id, "is_open")],
-            ),
-            toggle_collapse,
-        )
-    )
-    return form_header, collapsable_form
-
-
-def build_configuration_menu(app, controller) -> html.Div:
-    # Optimization
-    from gama.utilities.metrics import all_metrics
-
-    metrics = {m: m.replace("_", " ") for m in all_metrics}
-    metrics.update({"default": "default"})
-    scoring_input = dropdown(
-        "Metric", "metric_dropdown", options=metrics, value="default"
-    )
-    regularize_input = toggle_button(
-        "Prefer short pipelines", "regularize_length_switch"
-    )
-    optimization = collapsable_section(
-        "Optimization", [scoring_input, regularize_input]
-    )
-
-    # Resources
-    cpu_input = cpu_slider()
-    max_total_time_input = time_nud(
-        "Max Runtime",
-        hour_id="max_total_h",
-        hour_default=1,
-        minute_id="max_total_m",
-        minute_default=0,
-    )
-    max_eval_time_input = time_nud(
-        "Max time per pipeline",
-        hour_id="max_eval_h",
-        hour_default=0,
-        minute_id="max_eval_m",
-        minute_default=5,
-    )
-    resources = collapsable_section(
-        "Resources", [cpu_input, max_total_time_input, max_eval_time_input]
-    )
-
-    # Advanced
-    log_path = text_input("Log Directory", "~/GamaLog", "logpath")
-    advanced = collapsable_section("Advanced", [log_path], start_open=False)
-
-    # Go!
-    go_button = dbc.Button(
-        [dcc.Markdown("#### Go!")],
-        id="go-button",
-        block=True,
-        color="success",
-        disabled=True,
-    )
-
-    def start_gama(n_click, running_tab_style, *args):
-        controller.start_gama(*args)
-        running_tab_style["display"] = "inline"
-        return "danger", dcc.Markdown("#### Stop!"), "Running", running_tab_style
-
-    app.callback(
-        [
-            Output("go-button", "color"),
-            Output("go-button", "children"),
-            Output("page-tabs", "value"),
-            Output("Running-tab", "style"),
-        ],
-        [Input("go-button", "n_clicks")],
-        [
-            State("Running-tab", "style"),
-            State("metric_dropdown", "value"),
-            State("regularize_length_switch", "value"),
-            State("cpu_slider", "value"),
-            State("max_total_h", "value"),
-            State("max_total_m", "value"),
-            State("max_eval_h", "value"),
-            State("max_eval_m", "value"),
-            State("file-path-input", "value"),
-            State("logpath", "value"),
-            State("target_dropdown", "value"),
-        ],
-    )(start_gama)
-
-    return html.Div(
-        children=[
-            markdown_header("Configure GAMA", level=2),
-            *optimization,
-            *resources,
-            *advanced,
-            go_button,
-        ],
-        style={"box-shadow": "1px 1px 1px black", "padding": "2%"},
-    )
-
-
-def update_marks(selected_value, min_, max_):
-    return {min_: str(min_), selected_value: str(selected_value), max_: str(max_)}
-
-
-def build_data_navigator() -> html.Div:
-    upload_file = dbc.Input(
-        id="file-path-input",
-        placeholder="Path to data file, e.g. ~/data/mydata.arff",
-        type="text",
-    )
-
-    modes = ["None", "Small", "All"]
-    settings = dbc.FormGroup(
-        [
-            dbc.Label("Target", html_for="target_dropdown", width=2),
-            dbc.Col(
-                dcc.Dropdown(
-                    id="target_dropdown",
-                    options=[{"label": "-", "value": "a"}],
-                    clearable=False,
-                    value="a",
-                    # persistence_type="session",
-                    # persistence=True,
-                ),
-                width=4,
-            ),
-            dbc.Label("Preview Mode", html_for="preview_dropdown", width=2),
-            dbc.Col(
-                dcc.Dropdown(
-                    id="preview_dropdown",
-                    options=[{"label": m, "value": m.lower()} for m in modes],
-                    clearable=False,
-                    value="none",
-                    # persistence_type="session",
-                    # persistence=True,
-                ),
-                width=4,
-            ),
-        ],
-        row=True,
-    )
-
-    table_container = html.Div(id="table-container", children=["No data loaded."])
-
-    data_settings = html.Div(
-        id="data-settings-container",
-        children=[settings, table_container],
-        style={"margin": "10px"},
-    )
-
-    def update_data_table(filename, mode):
-        if filename is not None and os.path.isfile(filename):
-            if mode in ["all", "small"]:
-                df = file_to_pandas(filename)
-                if mode == "small":
-                    df = df.head(50)
-
-                data_table = dash_table.DataTable(
-                    id="table",
-                    columns=[{"name": c, "id": c} for c in df.columns],
-                    data=df.to_dict("records"),
-                    editable=False,
-                    style_table={"maxHeight": "500px", "overflowY": "scroll"},
-                )
-                attributes = list(df.columns)
-            else:
-                data_table = "Preview not enabled."
-                attributes = list(load_feature_metadata_from_file(filename))
-
-            target_options = [{"label": c, "value": c} for c in attributes]
-            default_target = attributes[-1]
-
-            return [data_table], target_options, default_target, False
-        return ["No data loaded"], [{"label": "-", "value": "a"}], "a", True
-
-    HomePage.callbacks.append(
-        (
-            (
-                [
-                    Output("table-container", "children"),
-                    Output("target_dropdown", "options"),
-                    Output("target_dropdown", "value"),
-                    Output("go-button", "disabled"),
-                ],
-                [
-                    Input("file-path-input", "value"),
-                    Input("preview_dropdown", "value"),
-                ],
-            ),
-            update_data_table,
-        )
-    )
-
-    return html.Div(
-        children=[
-            markdown_header("Data Navigator", level=2),
-            upload_file,
-            data_settings,
-        ],
-        style={"box-shadow": "1px 1px 1px black", "padding": "2%"},
-    )
diff --git a/gama/dashboard/pages/runningpage.py b/gama/dashboard/pages/runningpage.py
deleted file mode 100644
index 5ea07598..00000000
--- a/gama/dashboard/pages/runningpage.py
+++ /dev/null
@@ -1,240 +0,0 @@
-import os
-import time
-
-import dash_table
-import dash_core_components as dcc
-import dash_html_components as html
-import dash_bootstrap_components as dbc
-from dash.exceptions import PreventUpdate
-from plotly import graph_objects as go
-from dash.dependencies import Input, Output, State
-
-from gama.dashboard.components.cli_window import CLIWindow
-from gama.dashboard.pages.base_page import BasePage
-from gama.logging.GamaReport import GamaReport
-
-
-class RunningPage(BasePage):
-    def __init__(self):
-        super().__init__(name="Running", alignment=1, starts_hidden=True)
-        self.cli = None
-        self.id = "running-page"
-        self.report = None
-        self.log_file = None
-
-    def build_page(self, app, controller):
-        self.cli = CLIWindow("cli", app)
-        plot_area = self.plot_area()
-        pl_viz = self.pipeline_viz()
-        pl_list = self.pipeline_list()
-        ticker = dcc.Interval(id="ticker", interval=5000)
-        self._content = html.Div(
-            id=self.id,
-            children=[
-                dbc.Row([dbc.Col(plot_area, width=8), dbc.Col(self.cli.html)]),
-                dbc.Row([dbc.Col(pl_viz, width=4), dbc.Col(pl_list)]),
-                ticker,
-            ],
-        )
-
-        app.callback(
-            [
-                Output("evaluation-graph", "figure"),
-                Output("pipeline-table", "data"),
-                Output("pl-viz", "children"),
-                Output("pipeline-table", "selected_rows"),
-                Output("pipeline-table", "selected_row_ids"),
-                Output("evaluation-graph", "clickData"),
-            ],
-            [Input("ticker", "n_intervals"), Input("running-page-store", "data")],
-        )(self.update_page)
-
-        app.callback(
-            [Output("running-page-store", "data")],
-            [
-                Input("evaluation-graph", "clickData"),
-                Input("pipeline-table", "selected_row_ids"),
-            ],
-            [State("running-page-store", "data")],
-        )(self.update_selection)
-
-        return self._content
-
-    def update_selection(self, click_data, selected_row_ids, page_store):
-        cell_selected = None if selected_row_ids is None else selected_row_ids[0]
-        if click_data is None:
-            click_selected = None
-        else:
-            click_selected = click_data["points"][0]["customdata"]
-        selected = click_selected if click_selected is not None else cell_selected
-
-        # Selected row ids and click data are always set back to None.
-        # The value that is not None is the new value.
-        if selected is not None:
-            self.need_update = True
-            page_store["selected_pipeline"] = selected
-            return [page_store]
-        # First call or sync call.
-        raise PreventUpdate
-
-    def update_page(self, _, page_store):
-        if self.report is None:
-            if self.log_file is None or not os.path.exists(self.log_file):
-                raise PreventUpdate  # report does not exist
-            else:
-                self.report = GamaReport(self.log_file)
-                if self.report.evaluations.empty:
-                    raise PreventUpdate
-        elif not self.report.update() and not self.need_update:
-            raise PreventUpdate  # report is not updated
-
-        start_update = time.time()
-        selected_pipeline = page_store.get("selected_pipeline", None)
-        evaluations = self.report.successful_evaluations
-
-        self.need_update = False
-        scatters = self.scatter_plot(
-            evaluations, self.report.metrics, selected_pipeline
-        )
-        metric_one, metric_two = self.report.metrics
-        metric_one_text = metric_one.replace("_", " ")
-
-        figure = {
-            "data": scatters,
-            "layout": dict(
-                hovermode="closest",
-                clickmode="event+select",
-                title=f"{metric_one_text} vs {metric_two}",
-                xaxis=dict(title=metric_one_text),
-                yaxis=dict(
-                    title=metric_two, tickformat=",d"
-                ),  # tickformat forces integer ticks for length,
-                uirevision="never_reset_zoom",
-            ),
-        }
-
-        pl_table_data = [
-            {
-                "pl": self.report.individuals[id_].short_name(" > "),
-                "id": id_,
-                "score": score,
-            }
-            for id_, score in zip(evaluations.id, evaluations[metric_one])
-        ]
-        row_id = [i for i, id_ in enumerate(evaluations.id) if id_ == selected_pipeline]
-
-        def format_pipeline(ind):
-            pipeline_elements = []
-            for primitive_node in reversed(ind.primitives):
-                pipeline_elements.append(html.B(str(primitive_node._primitive)))
-                pipeline_elements.append(html.Br())
-                for terminal in primitive_node._terminals:
-                    pipeline_elements.append(f"    {terminal}")
-                    pipeline_elements.append(html.Br())
-            return pipeline_elements
-
-        if selected_pipeline is None:
-            pl_viz_data = None
-        else:
-            pl_viz_data = format_pipeline(self.report.individuals[selected_pipeline])
-
-        print("Update complete in ", time.time() - start_update)
-        return figure, pl_table_data, pl_viz_data, row_id, None, None
-
-    def scatter_plot(self, evaluations, metrics, selected_pipeline: str = None):
-        metric_one, metric_two = metrics
-
-        # Marker size indicates recency of the evaluations,
-        # recent evaluations are bigger.
-        biggest_size = 25
-        smallest_size = 5
-        selected_size = 30
-        d_size_min_max = biggest_size - smallest_size
-
-        sizes = list(range(smallest_size, biggest_size))[-len(evaluations) :]
-        if len(evaluations) > d_size_min_max:
-            sizes = [smallest_size] * (len(evaluations) - d_size_min_max) + sizes
-        if selected_pipeline is not None:
-            sizes = [
-                size if id_ != selected_pipeline else selected_size
-                for size, id_ in zip(sizes, evaluations.id)
-            ]
-
-        default_color = "#301cc9"
-        selected_color = "#c81818"
-
-        colors = [
-            default_color if id_ != selected_pipeline else selected_color
-            for id_ in evaluations.id
-        ]
-
-        print(evaluations.head())
-        print(evaluations[metric_one])
-        print(evaluations[metric_two])
-        all_scatter = go.Scatter(
-            x=evaluations[metric_one],
-            y=-evaluations[metric_two],
-            mode="markers",
-            marker={"color": colors, "size": sizes},
-            name="all evaluations",
-            text=[self.report.individuals[id_].short_name() for id_ in evaluations.id],
-            customdata=evaluations.id,
-        )
-        return [all_scatter]
-
-    def gama_started(self, process, log_file):
-        self.cli.monitor(process)
-        self.log_file = os.path.expanduser(log_file)
-
-    def plot_area(self):
-        scatter = dcc.Graph(
-            id="evaluation-graph",
-            figure={
-                "data": [],
-                "layout": dict(hovermode="closest", transition={"duration": 500},),
-            },
-        )
-        return html.Div(
-            scatter,
-            style={
-                "height": "100%",
-                "box-shadow": "1px 1px 1px black",
-                "padding": "2%",
-            },
-        )
-
-    def pipeline_list(self):
-        ta = dash_table.DataTable(
-            id="pipeline-table",
-            columns=[
-                {"name": "Pipeline", "id": "pl"},
-                {"name": "Score", "id": "score"},
-            ],
-            data=[],
-            style_table={"maxHeight": "300px", "overflowY": "scroll"},
-            filter_action="native",
-            sort_action="native",
-            row_selectable="single",
-            persistence_type="session",
-            persistence=True,
-        )
-
-        return html.Div(
-            ta,
-            style={
-                "height": "100%",
-                "box-shadow": "1px 1px 1px black",
-                "padding": "2%",
-            },
-        )
-
-    def pipeline_viz(self):
-        return html.Div(
-            id="pl-viz",
-            style={
-                "height": "100%",
-                "box-shadow": "1px 1px 1px black",
-                "padding": "2%",
-                "whiteSpace": "pre-wrap",
-            },
-        )
diff --git a/gama/dashboard/plotting.py b/gama/dashboard/plotting.py
deleted file mode 100644
index c09bd4ed..00000000
--- a/gama/dashboard/plotting.py
+++ /dev/null
@@ -1,314 +0,0 @@
-from typing import List, Optional
-
-import pandas as pd
-from plotly import graph_objects as go
-
-from gama.logging.GamaReport import GamaReport
-
-
-def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
-    if reports == []:
-        return {}
-
-    plots = []
-    layout = {}
-    first_metric = f"{reports[0].metrics[0]}"
-    first_metric_max = f"{first_metric}_cummax"
-
-    if preset == "best_over_n":
-        # if aggregate == "separate-line":
-        plots = [
-            individual_plot(report, "n", first_metric_max, "lines")
-            for report in reports
-        ]
-        # elif aggregate == "aggregate":
-        #     plots = aggregate_plot(aggregate_df, "n", first_metric_max)
-        layout = dict(
-            title="Best score by iteration",
-            xaxis=dict(title="n"),
-            yaxis=dict(title=f"max {first_metric}"),
-            hovermode="closest",
-        )
-    elif preset == "best_over_time":
-        # if aggregate == "separate-line":
-        plots = [
-            individual_plot(report, "relative_end", first_metric_max, "lines")
-            for report in reports
-        ]
-        # elif aggregate == "aggregate":
-        #     plots = aggregate_best_over_time(aggregate_df, first_metric_max)
-        layout = dict(
-            title=f"Best score over time",
-            xaxis=dict(title="time (s)"),
-            yaxis=dict(title=f"max {first_metric}"),
-            hovermode="closest",
-        )
-    elif preset == "size_vs_metric":
-        # if aggregate == "separate-line":
-        plots = [
-            individual_plot(report, first_metric, "length", "markers")
-            for report in reports
-        ]
-        # elif aggregate == "aggregate":
-        #     plots = []
-        #     for method in aggregate_df.search_method.unique():
-        #         method_df = aggregate_df[aggregate_df.search_method == method]
-        #         plots.append(
-        #             go.Scatter(
-        #                 x=method_df[first_metric],
-        #                 y=method_df.length,
-        #                 mode="markers",
-        #                 name=method,
-        #             )
-        #         )
-        layout = dict(
-            title=f"Size vs {first_metric}",
-            xaxis=dict(title=first_metric),
-            yaxis=dict(title="pipeline length"),
-            hovermode="closest",
-        )
-    elif preset == "number_pipeline_by_size":
-        # if aggregate == "separate-line":
-        for report in reports:
-            size_counts = report.evaluations.length.value_counts()
-            size_ratio = size_counts / len(report.individuals)
-            plots.append(
-                go.Bar(x=size_ratio.index.values, y=size_ratio.values, name=report.name)
-            )
-        # elif aggregate == "aggregate":
-        #     for method in aggregate_df.search_method.unique():
-        #        results_for_method = aggregate_df[aggregate_df.search_method == method]
-        #        size_counts = results_for_method.length.value_counts()
-        #        size_ratio = size_counts / len(results_for_method)
-        #        plots.append(
-        #            go.Bar(x=size_ratio.index.values, y=size_ratio.values, name=method)
-        #        )
-        layout = dict(
-            title=f"Ratio of pipelines by size",
-            xaxis=dict(title="pipeline length"),
-            yaxis=dict(title="pipeline count"),
-        )
-    elif preset == "number_pipeline_by_learner":
-        for report in reports:
-            main_learners = [
-                str(ind.main_node._primitive) for ind in report.individuals.values()
-            ]
-            learner_counts = pd.Series(main_learners).value_counts()
-            learner_ratio = learner_counts / len(report.individuals)
-            plots.append(
-                go.Bar(
-                    x=learner_ratio.index.values,
-                    y=learner_ratio.values,
-                    name=report.name,
-                )
-            )
-        layout = dict(
-            title=f"Ratio of pipelines by learner",
-            xaxis=dict(title="pipeline length"),
-            yaxis=dict(title="learner"),
-        )
-    elif preset == "evaluation_times_dist":
-        # if aggregate == "separate-line":
-        for report in reports:
-            time_s = report.evaluations.duration.dt.total_seconds()
-            plots.append(go.Histogram(x=time_s, name=report.name))
-        # elif aggregate == "aggregate":
-        #     for method in aggregate_df.search_method.unique():
-        #         time_s = aggregate_df[
-        #             aggregate_df.search_method == method
-        #         ].duration.dt.total_seconds()
-        #         plots.append(go.Histogram(x=time_s, name=method))
-        layout = dict(
-            title=f"Pipeline Evaluation Times",
-            xaxis=dict(title="duration (s)"),
-            yaxis=dict(title="count"),
-        )
-    elif preset == "n_by_rung":
-        for report in reports:
-            if report.search_method == "AsynchronousSuccessiveHalving":
-                count_by_rung = (
-                    report.evaluations.groupby(by="rung").n.count().reset_index()
-                )
-                plots.append(
-                    go.Bar(x=count_by_rung.rung, y=count_by_rung.n, name=report.name)
-                )
-        layout = dict(
-            title=f"#Evaluations by Rung",
-            xaxis=dict(title="rung"),
-            yaxis=dict(title="count"),
-        )
-    elif preset == "time_by_rung":
-        for report in reports:
-            if report.search_method == "AsynchronousSuccessiveHalving":
-                duration_by_rung = (
-                    report.evaluations.groupby(by="rung").duration.sum().reset_index()
-                )
-                duration_by_rung.duration = duration_by_rung.duration.dt.total_seconds()
-                plots.append(
-                    go.Bar(
-                        x=duration_by_rung.rung,
-                        y=duration_by_rung.duration,
-                        name=report.name,
-                    )
-                )
-        layout = dict(
-            title=f"Time spent per Rung",
-            xaxis=dict(title="rung"),
-            yaxis=dict(title="time (s)"),
-        )
-    return {"data": plots, "layout": layout}
-
-
-def individual_plot(report: GamaReport, x_axis: str, y_axis: str, mode: str):
-    """
-
-    :param report: report to pull data from
-    :param x_axis: metric on the x-axis, column of report.evaluations
-    :param y_axis: metric on the y-axis, column of report.evaluations
-    :param mode: See `https://plot.ly/python/reference/#scatter-mode`
-    :return:
-        dash graph
-    """
-    print(len(report.evaluations[x_axis]))
-    print(len(report.evaluations[y_axis]))
-    return go.Scatter(
-        name=f"{report.name}",
-        x=report.evaluations[x_axis],
-        y=report.evaluations[y_axis],
-        text=[ind.short_name for ind in report.individuals.values()],
-        mode=mode,
-    )
-
-
-def aggregate_best_over_time(aggregate: pd.DataFrame, y_axis: str):
-    # By method, find the best score per trace at each point in time.
-    # I'm sure there is a better way to do this...
-    colors = {
-        0: "rgba(255, 0, 0, {a})",
-        1: "rgba(0, 255, 0, {a})",
-        2: "rgba(0, 0, 255, {a})",
-    }
-    soft_colors = {i: color.format(a=0.2) for i, color in colors.items()}
-    hard_colors = {i: color.format(a=1.0) for i, color in colors.items()}
-
-    aggregate_data: List[go.Scatter] = []
-    aggregate = aggregate[aggregate[y_axis] != -float("inf")]
-    for color_no, method in enumerate(aggregate.search_method.unique()):
-        method_agg = aggregate[aggregate.search_method == method]
-        all_end_times = method_agg.relative_end.unique()
-        end_time_series = pd.Series(sorted(all_end_times), name="relative_end")
-
-        scores_over_time = pd.DataFrame(end_time_series)
-        for log_no in method_agg.log_no.unique():
-            log_results = method_agg[method_agg.log_no == log_no]
-            best_for_log = pd.merge_asof(
-                end_time_series,
-                log_results[["relative_end", y_axis]],
-                on="relative_end",
-            )
-            scores_over_time[f"log{log_no}"] = best_for_log[y_axis]
-        scores_over_time = scores_over_time.set_index("relative_end")
-        mean_scores = scores_over_time.mean(axis=1)
-        std_scores = scores_over_time.std(axis=1)
-        scores_over_time["mean"] = mean_scores
-        scores_over_time["std"] = std_scores
-
-        upper_bound = go.Scatter(
-            x=scores_over_time.index,
-            y=scores_over_time["mean"] + scores_over_time["std"],
-            mode="lines",
-            marker=dict(color=soft_colors[color_no]),
-            line=dict(width=0),
-            fillcolor=soft_colors[color_no],
-            fill="tonexty",
-            showlegend=False,
-        )
-
-        mean_performance = go.Scatter(
-            name=method,
-            x=scores_over_time.index,
-            y=scores_over_time["mean"],
-            mode="lines",
-            line=dict(color=hard_colors[color_no]),
-            fillcolor=soft_colors[color_no],
-            fill="tonexty",
-        )
-
-        lower_bound = go.Scatter(
-            x=scores_over_time.index,
-            y=scores_over_time["mean"] - scores_over_time["std"],
-            mode="lines",
-            marker=dict(color=soft_colors[color_no]),
-            line=dict(width=0),
-            showlegend=False,
-        )
-        aggregate_data += [lower_bound, mean_performance, upper_bound]
-    return aggregate_data
-
-
-def aggregate_plot(aggregate: pd.DataFrame, x_axis: str, y_axis: str):
-    """ Create an aggregate plot over multiple reports.
-
-     Aggregates the mean and std of `y_axis` by `x_axis`.
-
-    :param aggregate: dataframe with all evaluations
-    :param x_axis: column which is grouped by before aggregating `y_axis`
-    :param y_axis: column over which to calculate the mean/std.
-    :return:
-        Three dash Scatter objects which respectively draw  the lower bound, mean and
-        upper bound.
-    """
-    colors = {
-        0: "rgba(255, 0, 0, {a})",
-        1: "rgba(0, 255, 0, {a})",
-        2: "rgba(0, 0, 255, {a})",
-    }
-    soft_colors = {i: color.format(a=0.2) for i, color in colors.items()}
-    hard_colors = {i: color.format(a=1.0) for i, color in colors.items()}
-
-    # concat_df = pd.concat([report.evaluations for report in reports_to_combine])
-    # concat_df = concat_df[concat_df[y_axis] != -float('inf')]
-    # agg_df = concat_df.groupby(by=x_axis).agg({y_axis: ['mean', 'std']}).reset_index()
-    # agg_df.columns = [x_axis, y_axis, 'std']
-    aggregate_data: List[go.Scatter] = []
-    aggregate = aggregate[aggregate[y_axis] != -float("inf")]
-    for color_no, method in enumerate(aggregate.search_method.unique()):
-        agg_for_method = aggregate[aggregate.search_method == method]
-        agg_df = (
-            agg_for_method.groupby(by=x_axis)
-            .agg({y_axis: ["mean", "std"]})
-            .reset_index()
-        )
-        agg_df.columns = [x_axis, y_axis, "std"]
-
-        upper_bound = go.Scatter(
-            x=agg_df[x_axis],
-            y=agg_df[y_axis] + agg_df["std"],
-            mode="lines",
-            marker=dict(color=soft_colors[color_no]),
-            line=dict(width=0),
-            fillcolor=soft_colors[color_no],
-            fill="tonexty",
-            showlegend=False,
-        )
-
-        mean_performance = go.Scatter(
-            name=method,
-            x=agg_df[x_axis],
-            y=agg_df[y_axis],
-            mode="lines",
-            line=dict(color=hard_colors[color_no]),
-            fillcolor=soft_colors[color_no],
-            fill="tonexty",
-        )
-
-        lower_bound = go.Scatter(
-            x=agg_df[x_axis],
-            y=agg_df[y_axis] - agg_df["std"],
-            mode="lines",
-            marker=dict(color=soft_colors[color_no]),
-            line=dict(width=0),
-            showlegend=False,
-        )
-        aggregate_data += [lower_bound, mean_performance, upper_bound]
-    return aggregate_data
diff --git a/gama/data_formatting.py b/gama/data_formatting.py
index 23e1ec04..62628d4e 100644
--- a/gama/data_formatting.py
+++ b/gama/data_formatting.py
@@ -19,8 +19,8 @@ def series_looks_categorical(series) -> bool:
     return False
 
 
-def infer_categoricals_inplace(df):
-    """ Use simple heuristics to guess which columns should be categorical. """
+def infer_categoricals_inplace(df: pd.DataFrame) -> None:
+    """Use simple heuristics to convert columns guessed to be categorical."""
     for column in df:
         if series_looks_categorical(df[column]):
             df[column] = df[column].astype("category")
@@ -36,7 +36,7 @@ def numpy_to_dataframe(x: np.ndarray) -> pd.DataFrame:
 def format_y(
     y: Union[pd.DataFrame, pd.Series, np.ndarray], y_type: Type = pd.Series
 ) -> Union[pd.DataFrame, pd.Series]:
-    """ Transforms a target vector or indicator matrix to a single series (or 1d df) """
+    """Transforms a target vector or indicator matrix to a single series (or 1d df)"""
     if not isinstance(y, (np.ndarray, pd.Series, pd.DataFrame)):
         raise TypeError("y must be np.ndarray, pd.Series or pd.DataFrame.")
     if y_type not in [pd.Series, pd.DataFrame]:
@@ -44,11 +44,7 @@ def format_y(
 
     if isinstance(y, np.ndarray) and y.ndim == 2:
         # Either indicator matrix or should be a vector.
-        if y.shape[1] > 1:
-            y = np.argmax(y, axis=1)
-        else:
-            y = y.squeeze()
-
+        y = np.argmax(y, axis=1) if y.shape[1] > 1 else y.squeeze()
     if y_type == pd.Series:
         if isinstance(y, pd.DataFrame):
             y = y.squeeze()
@@ -63,7 +59,7 @@ def format_y(
 def remove_unlabeled_rows(
     x: pd.DataFrame, y: Union[pd.Series, pd.DataFrame]
 ) -> Tuple[pd.DataFrame, Union[pd.Series, pd.DataFrame]]:
-    """ Removes all rows from x and y where y is nan. """
+    """Removes all rows from x and y where y is nan."""
     if isinstance(y, pd.DataFrame):
         unlabeled = y.iloc[:, 0].isnull()
     else:
@@ -84,7 +80,7 @@ def format_x_y(
     y_type: Type = pd.Series,
     remove_unlabeled: bool = True,
 ) -> Tuple[pd.DataFrame, Union[pd.DataFrame, pd.Series]]:
-    """ Take (X,y) data and convert it to a (pd.DataFrame, pd.Series) tuple.
+    """Take (X,y) data and convert it to a (pd.DataFrame, pd.Series) tuple.
 
     Parameters
     ----------
diff --git a/gama/data_loading.py b/gama/data_loading.py
index e1d763c5..246e8670 100644
--- a/gama/data_loading.py
+++ b/gama/data_loading.py
@@ -8,20 +8,20 @@
 
 from gama.data_formatting import infer_categoricals_inplace
 
-CSV_SNIFF_SIZE = 2 ** 12
+CSV_SNIFF_SIZE = 2**12
 
 
 def sniff_csv_meta(file_path: str) -> Tuple[str, bool]:
-    """ Determine the csv delimiter and whether it has a header """
+    """Determine the csv delimiter and whether it has a header"""
     with open(file_path, "r") as csv_file:
-        first_bytes = csv_file.read(2 ** 12)
+        first_bytes = csv_file.read(CSV_SNIFF_SIZE)
     sep = csv.Sniffer().sniff(first_bytes).delimiter
     has_header = csv.Sniffer().has_header(first_bytes)
     return sep, has_header
 
 
 def load_csv_header(file_path: str, **kwargs) -> List[str]:
-    """ Return column names in the header, or 0...N if no header is present. """
+    """Return column names in the header, or 0...N if no header is present."""
     if not file_path.endswith(".csv"):
         raise ValueError(f"{file_path} is not a file with .csv extension.")
     sep, has_header = sniff_csv_meta(file_path)
@@ -37,7 +37,7 @@ def load_csv_header(file_path: str, **kwargs) -> List[str]:
 
 
 def csv_to_pandas(file_path: str, **kwargs) -> pd.DataFrame:
-    """ Load data from the csv file into a pd.DataFrame.
+    """Load data from the csv file into a pd.DataFrame.
 
     Parameters
     ----------
@@ -69,7 +69,7 @@ def csv_to_pandas(file_path: str, **kwargs) -> pd.DataFrame:
 def arff_to_pandas(
     file_path: str, encoding: Optional[str] = None, **kwargs
 ) -> pd.DataFrame:
-    """ Load data from the ARFF file into a pd.DataFrame.
+    """Load data from the ARFF file into a pd.DataFrame.
 
     Parameters
     ----------
@@ -102,7 +102,7 @@ def arff_to_pandas(
 def file_to_pandas(
     file_path: str, encoding: Optional[str] = None, **kwargs
 ) -> pd.DataFrame:
-    """ Load ARFF/csv file into pd.DataFrame.
+    """Load ARFF/csv file into pd.DataFrame.
 
     Parameters
     ----------
@@ -132,7 +132,7 @@ def X_y_from_file(
     encoding: Optional[str] = None,
     **kwargs,
 ) -> Tuple[pd.DataFrame, pd.Series]:
-    """ Load ARFF/csv file into pd.DataFrame and specified column to pd.Series.
+    """Load ARFF/csv file into pd.DataFrame and specified column to pd.Series.
 
     Parameters
     ----------
@@ -162,7 +162,7 @@ def X_y_from_file(
 
 
 def load_feature_metadata_from_file(file_path: str) -> Dict[str, str]:
-    """ Load the header of the csv or ARFF file, return the type of each attribute.
+    """Load the header of the csv or ARFF file, return the type of each attribute.
 
     For csv files, presence of a header is detected with the Python csv parser.
     If no header is present in the csv file, the columns will be labeled with a number.
@@ -177,7 +177,7 @@ def load_feature_metadata_from_file(file_path: str) -> Dict[str, str]:
 
 
 def load_feature_metadata_from_arff(file_path: str) -> Dict[str, str]:
-    """ Load the header of the ARFF file and return the type of each attribute. """
+    """Load the header of the ARFF file and return the type of each attribute."""
     data_header = "@data"
     attribute_indicator = "@attribute"
     attributes = OrderedDict()
diff --git a/gama/gama.py b/gama/gama.py
index a5681a5e..c06cbcb5 100644
--- a/gama/gama.py
+++ b/gama/gama.py
@@ -77,31 +77,37 @@
 
 
 class Gama(ABC):
-    """ Wrapper for the toolbox logic surrounding executing the AutoML pipeline. """
+    """Wrapper for the toolbox logic surrounding executing the AutoML pipeline."""
 
     def __init__(
         self,
+        search_space: Dict[Union[str, object], Any],
         scoring: Union[
             str, Metric, Iterable[str], Iterable[Metric]
         ] = "filled_in_by_child_class",
         regularize_length: bool = True,
         max_pipeline_length: Optional[int] = None,
-        config: Dict = None,
         random_state: Optional[int] = None,
         max_total_time: int = 3600,
         max_eval_time: Optional[int] = None,
         n_jobs: Optional[int] = None,
         max_memory_mb: Optional[int] = None,
         verbosity: int = logging.WARNING,
-        search: BaseSearch = AsyncEA(),
-        post_processing: BasePostProcessing = BestFitPostProcessing(),
+        search: Optional[BaseSearch] = None,
+        post_processing: Optional[BasePostProcessing] = None,
         output_directory: Optional[str] = None,
         store: str = "logs",
+        config: None = None,
+        preset: str = "simple",
     ):
         """
 
         Parameters
         ----------
+        search_space: Dict
+            Specifies available components and their valid hyperparameter settings.
+            For more information, see :ref:`search_space_configuration`.
+
         scoring: str, Metric or Tuple
             Specifies the/all metric(s) to optimize towards.
             A string will be converted to Metric.
@@ -116,10 +122,6 @@ def __init__(
             If set, limit the maximum number of steps in any evaluated pipeline.
             Encoding and imputation are excluded.
 
-        config: Dict
-            Specifies available components and their valid hyperparameter settings.
-            For more information, see :ref:`search_space_configuration`.
-
         random_state:  int, optional (default=None)
             Seed for the random number generators used in the process.
             However, with `n_jobs > 1`,
@@ -148,16 +150,19 @@ def __init__(
         verbosity: int (default=logging.WARNING)
             Sets the level of log messages to be automatically output to terminal.
 
-        search: BaseSearch (default=AsyncEA())
+        search: BaseSearch, optional
             Search method to use to find good pipelines. Should be instantiated.
+            Default depends on ``goal``.
 
-        post_processing: BasePostProcessing (default=BestFitPostProcessing())
+        post_processing: BasePostProcessing, optional
             Post-processing method to create a model after the search phase.
             Should be an instantiated subclass of BasePostProcessing.
+            Default depends on ``goal``.
 
         output_directory: str, optional (default=None)
             Directory to use to save GAMA output. This includes both intermediate
             results during search and logs.
+            This directory must be empty or not exist.
             If set to None, generate a unique name ("gama_HEXCODE").
 
         store: str (default='logs')
@@ -166,15 +171,46 @@ def __init__(
              - 'models': keep only cache with models and predictions
              - 'logs': keep only the logs
              - 'all': keep logs and cache with models and predictions
+
+        preset: str (default='simple')
+            Determines the steps of the AutoML pipeline when they are not
+            provided explicitly, based on the given goal.
+            One of:
+                - simple: Create a simple pipeline with good performance.
+                - performance: Try to get the best performing model.
         """
+        if config:
+            warnings.warn(
+                "Hyperparameter `config` is renamed to `search_space`. "
+                "Using `config` will lead to an error with `gama>=24`.",
+                FutureWarning,
+            )
+            search_space = config
+
+        if search is None:
+            search = AsyncEA()
+        if post_processing is None:
+            if preset == "simple":
+                post_processing = BestFitPostProcessing()
+            elif preset == "performance":
+                post_processing = EnsemblePostProcessing()
+            else:
+                raise ValueError(f"Unknown value for {preset=}'")
+
         if not output_directory:
             output_directory = f"gama_{str(uuid.uuid4())}"
         self.output_directory = os.path.abspath(os.path.expanduser(output_directory))
+
         if not os.path.exists(self.output_directory):
             os.mkdir(self.output_directory)
+        elif len(os.listdir(self.output_directory)) > 0:
+            raise ValueError(
+                f"""`output_directory` ('{self.output_directory}')
+                 must be empty or non-existent."""
+            )
 
         register_stream_log(verbosity)
-        if store in ["logs", "all"]:
+        if store in {"logs", "all"}:
             log_file = os.path.join(self.output_directory, "gama.log")
             log_handler = logging.FileHandler(log_file)
             log_handler.setLevel(logging.DEBUG)
@@ -265,7 +301,7 @@ def __init__(
         e = search.logger(os.path.join(self.output_directory, "evaluations.log"))
         self.evaluation_completed(e.log_evaluation)
 
-        self._pset, parameter_checks = pset_from_config(config)
+        self._pset, parameter_checks = pset_from_config(search_space)
 
         if DATA_TERMINAL not in self._pset:
             if max_pipeline_length is None:
@@ -281,7 +317,7 @@ def __init__(
                 )
         max_start_length = 3 if max_pipeline_length is None else max_pipeline_length
         self._operator_set = OperatorSet(
-            mutate=partial(
+            mutate=partial(  # type: ignore #https://github.com/python/mypy/issues/1484
                 random_valid_mutation_in_place,
                 primitive_set=self._pset,
                 max_length=max_pipeline_length,
@@ -299,7 +335,7 @@ def __init__(
             completed_evaluations=self._evaluation_library.lookup,
         )
 
-    def cleanup(self, which="evaluations"):
+    def cleanup(self, which="evaluations") -> None:
         cache_directory = os.path.join(self.output_directory, "cache")
         if not os.path.exists(self.output_directory):
             return  # Cleanup has been called previously
@@ -314,7 +350,7 @@ def cleanup(self, which="evaluations"):
             os.rmdir(self.output_directory)
 
     def _np_to_matching_dataframe(self, x: np.ndarray) -> pd.DataFrame:
-        """ Format np array to dataframe whose column types match the training data. """
+        """Format np array to dataframe whose column types match the training data."""
         if not isinstance(x, np.ndarray):
             raise TypeError(f"Expected x to be of type 'numpy.ndarray' not {type(x)}.")
 
@@ -323,17 +359,20 @@ def _np_to_matching_dataframe(self, x: np.ndarray) -> pd.DataFrame:
             x[i] = x[i].astype(dtype)
         return x
 
-    def _prepare_for_prediction(self, x):
+    def _prepare_for_prediction(
+        self, x: Union[pd.DataFrame, np.ndarray]
+    ) -> pd.DataFrame:
         if isinstance(x, np.ndarray):
             x = self._np_to_matching_dataframe(x)
-        x = self._basic_encoding_pipeline.transform(x)
+        if self._basic_encoding_pipeline:
+            x = self._basic_encoding_pipeline.transform(x)
         return x
 
-    def _predict(self, x: pd.DataFrame):
+    def _predict(self, x: pd.DataFrame) -> np.ndarray:
         raise NotImplementedError("_predict is implemented by base classes.")
 
-    def predict(self, x: Union[pd.DataFrame, np.ndarray]):
-        """ Predict the target for input X.
+    def predict(self, x: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
+        """Predict the target for input X.
 
         Parameters
         ----------
@@ -355,7 +394,7 @@ def predict_from_file(
         encoding: Optional[str] = None,
         **kwargs,
     ) -> np.ndarray:
-        """ Predict the target for input found in the ARFF file.
+        """Predict the target for input found in the ARFF file.
 
         Parameters
         ----------
@@ -384,7 +423,7 @@ def predict_from_file(
     def score(
         self, x: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]
     ) -> float:
-        """ Calculate `self.scoring` metric of the model on (x, y).
+        """Calculate `self.scoring` metric of the model on (x, y).
 
         Parameters
         ----------
@@ -412,7 +451,7 @@ def score_from_file(
         encoding: Optional[str] = None,
         **kwargs,
     ) -> float:
-        """ Calculate `self.scoring` metric of the model on data in the file.
+        """Calculate `self.scoring` metric of the model on data in the file.
 
         Parameters
         ----------
@@ -444,7 +483,7 @@ def fit_from_file(
         warm_start: Optional[List[Individual]] = None,
         **kwargs,
     ) -> None:
-        """ Find and fit a model to predict the target column (last) from other columns.
+        """Find and fit a model to predict the target column (last) from other columns.
 
         Parameters
         ----------
@@ -471,7 +510,7 @@ def fit(
         y: Union[pd.DataFrame, pd.Series, np.ndarray],
         warm_start: Optional[List[Individual]] = None,
     ) -> "Gama":
-        """ Find and fit a model to predict target y from X.
+        """Find and fit a model to predict target y from X.
 
         Various possible machine learning pipelines will be fit to the (X,y) data.
         Using Genetic Programming, the pipelines chosen should lead to gradually
@@ -509,10 +548,10 @@ def fit(
                 self._operator_set._compile,
                 preprocessing_steps=self._fixed_pipeline_extension,
             )
-
             store_pipelines = (
                 self._evaluation_library._m is None or self._evaluation_library._m > 0
             )
+
             if store_pipelines and self._x.shape[0] * self._x.shape[1] > 6_000_000:
                 # if m > 0, we are storing models for each evaluation. For this size
                 # KNN will create models of about 76Mb in size, which is too big, so
@@ -536,6 +575,13 @@ def fit(
                     if p.identifier not in [PolynomialFeatures]
                 ]
 
+        if self._time_manager.total_time_remaining < 0:
+            pre_time = self._time_manager.activities[-1].stopwatch.elapsed_time
+            raise RuntimeError(
+                f"Preprocessing took {pre_time} seconds. "
+                f"No time remaining (budget: {self._time_manager.total_time} seconds)."
+            )
+
         fit_time = int(
             (1 - self._post_processing.time_fraction)
             * self._time_manager.total_time_remaining
@@ -568,17 +614,17 @@ def fit(
                 self._time_manager.total_time_remaining,
                 best_individuals,
             )
-        if not self._store == "all":
+        if self._store != "all":
             to_clean = dict(nothing="all", logs="evaluations", models="logs")
             self.cleanup(to_clean[self._store])
         return self
 
     def _search_phase(
         self, warm_start: Optional[List[Individual]] = None, timeout: float = 1e6
-    ):
-        """ Invoke the search algorithm, populate `final_pop`. """
+    ) -> None:
+        """Invoke the search algorithm, populate `final_pop`."""
         if warm_start:
-            if not all([isinstance(i, Individual) for i in warm_start]):
+            if not all(isinstance(i, Individual) for i in warm_start):
                 raise TypeError("`warm_start` must be a list of Individual.")
             pop = warm_start
         elif warm_start is None and len(self._final_pop) > 0:
@@ -598,7 +644,6 @@ def _search_phase(
 
         self._operator_set.evaluate = partial(
             gama.genetic_programming.compilers.scikitlearn.evaluate_individual,
-            # evaluate_pipeline=evaluate_pipeline,
             timeout=self._max_eval_time,
             deadline=deadline,
             add_length_to_score=self._regularize_length,
@@ -617,8 +662,8 @@ def _search_phase(
 
     def export_script(
         self, file: Optional[str] = "gama_pipeline.py", raise_if_exists: bool = False
-    ):
-        """ Export a Python script which sets up the best found pipeline.
+    ) -> str:
+        """Export a Python script which sets up the best found pipeline.
 
         Can only be called after `fit`.
 
@@ -645,6 +690,10 @@ def export_script(
         raise_if_exists: bool (default=False)
             If True, raise an error if the file already exists.
             If False, overwrite `file` if it already exists.
+
+        Returns
+        -------
+        script: str
         """
         if self.model is None:
             raise RuntimeError(STR_NO_OPTIMAL_PIPELINE)
@@ -662,11 +711,10 @@ def export_script(
             with open(file, "w") as fh:
                 fh.write(script_text)
             subprocess.call(["black", file])
-        else:
-            return script_text
+        return script_text
 
-    def _safe_outside_call(self, fn):
-        """ Calls fn logging and ignoring all exceptions except TimeoutException. """
+    def _safe_outside_call(self, fn: Callable) -> None:
+        """Calls fn logging and ignoring all exceptions except TimeoutException."""
         try:
             fn()
         except stopit.utils.TimeoutException:
@@ -678,7 +726,10 @@ def _safe_outside_call(self, fn):
             # Note KeyboardInterrupts are not exceptions and get elevated to the caller.
             log.warning("Exception during callback.", exc_info=True)
 
-        if self._time_manager.current_activity.exceeded_limit(margin=3.0):
+        if (
+            self._time_manager.current_activity
+            and self._time_manager.current_activity.exceeded_limit(margin=3.0)
+        ):
             # If time exceeds during a safe callback, the timeout exception *might*
             # have been swallowed. This can result in GAMA running indefinitely.
             # However in rare conditions it can be that the TimeoutException is still
@@ -687,12 +738,12 @@ def _safe_outside_call(self, fn):
             # since it should have been handled (3 seconds).
             raise stopit.utils.TimeoutException
 
-    def _on_evaluation_completed(self, evaluation: Evaluation):
+    def _on_evaluation_completed(self, evaluation: Evaluation) -> None:
         for callback in self._subscribers["evaluation_completed"]:
             self._safe_outside_call(partial(callback, evaluation))
 
     def evaluation_completed(self, callback: Callable[[Evaluation], Any]) -> None:
-        """ Register a callback function that is called when an evaluation is completed.
+        """Register a callback function that is called when an evaluation is completed.
 
         Parameters
         ----------
diff --git a/gama/genetic_programming/compilers/scikitlearn.py b/gama/genetic_programming/compilers/scikitlearn.py
index f5ecad30..312ee2e9 100644
--- a/gama/genetic_programming/compilers/scikitlearn.py
+++ b/gama/genetic_programming/compilers/scikitlearn.py
@@ -6,7 +6,12 @@
 
 import stopit
 from sklearn.base import TransformerMixin, is_classifier
-from sklearn.model_selection import ShuffleSplit, cross_validate, check_cv
+from sklearn.model_selection import (
+    ShuffleSplit,
+    cross_validate,
+    check_cv,
+    StratifiedShuffleSplit,
+)
 from sklearn.pipeline import Pipeline
 
 from gama.utilities.evaluation_library import Evaluation
@@ -27,20 +32,20 @@ def primitive_node_to_sklearn(primitive_node: PrimitiveNode) -> object:
 
 def compile_individual(
     individual: Individual,
-    parameter_checks=None,
-    preprocessing_steps: Sequence[Tuple[str, TransformerMixin]] = None,
+    _parameter_checks=None,
+    preprocessing_steps: Optional[Sequence[Tuple[str, TransformerMixin]]] = None,
 ) -> Pipeline:
     steps = [
         (str(i), primitive_node_to_sklearn(primitive))
         for i, primitive in enumerate(individual.primitives)
     ]
     if preprocessing_steps:
-        steps = steps + list(reversed(preprocessing_steps))
+        steps += list(reversed(preprocessing_steps))
     return Pipeline(list(reversed(steps)))
 
 
-def object_is_valid_pipeline(o):
-    """ Determines if object behaves like a scikit-learn pipeline. """
+def object_is_valid_pipeline(o: object) -> bool:
+    """Determines if object behaves like a scikit-learn pipeline."""
     return (
         o is not None
         and hasattr(o, "fit")
@@ -50,9 +55,15 @@ def object_is_valid_pipeline(o):
 
 
 def evaluate_pipeline(
-    pipeline, x, y_train, timeout: float, metrics: Tuple[Metric], cv=5, subsample=None,
+    pipeline,
+    x,
+    y_train,
+    timeout: float,
+    metrics: Tuple[Metric],
+    cv=5,
+    subsample=None,
 ) -> Tuple:
-    """ Score `pipeline` with k-fold CV according to `metrics` on (a subsample of) X, y
+    """Score `pipeline` with k-fold CV according to `metrics` on (a subsample of) X, y
 
     Returns
     -------
@@ -63,36 +74,59 @@ def evaluate_pipeline(
         error: None if successful, otherwise an Exception
     """
     if not object_is_valid_pipeline(pipeline):
-        raise TypeError(f"Pipeline must not be None and requires fit, predict, steps.")
-    if not timeout > 0:
+        raise TypeError("Pipeline must not be None and requires fit, predict, steps.")
+    if timeout <= 0:
         raise ValueError(f"`timeout` must be greater than 0, is {timeout}.")
 
     prediction, estimators = None, None
     # default score for e.g. timeout or failure
     scores = tuple([float("-inf")] * len(metrics))
+    is_classification = is_classifier(pipeline)
 
     with stopit.ThreadingTimeout(timeout) as c_mgr:
         try:
-            if isinstance(subsample, int) and subsample < len(y_train):
-                sampler = ShuffleSplit(n_splits=1, train_size=subsample, random_state=0)
-                idx, _ = next(sampler.split(x))
-                x, y_train = x.iloc[idx, :], y_train[idx]
+            # When splits are generated (i.e., cv is an int), they are deterministic
+            splitter = check_cv(cv, y_train, classifier=is_classification)
+
+            require_subsample = (
+                isinstance(subsample, int) and subsample < len(y_train)
+            ) or (isinstance(subsample, float) and subsample < 1.0)
+
+            if require_subsample:
+                # We subsample the training sets, but not the test sets.
+                # This allows for performance comparisons across subsample levels.
+                new_splits = []
+                for train, test in splitter.split(x, y_train):
+                    if is_classification:
+                        sampler = StratifiedShuffleSplit(
+                            n_splits=1, train_size=subsample, random_state=0
+                        )
+                    else:
+                        sampler = ShuffleSplit(
+                            n_splits=1, train_size=subsample, random_state=0
+                        )
+                    full_train_x, full_train_y = x.iloc[train, :], y_train[train]
+                    subsample_idx, _ = next(sampler.split(full_train_x, full_train_y))
+                    new_splits.append((subsample_idx, test))
+                splitter = new_splits
 
-            splitter = check_cv(cv, y_train, is_classifier(pipeline))
             result = cross_validate(
                 pipeline,
                 x,
                 y_train,
                 cv=splitter,
                 return_estimator=True,
-                scoring=[m.name for m in metrics],
+                scoring=dict([(m.name, m) for m in metrics]),
                 error_score="raise",
             )
-            scores = tuple([np.mean(result[f"test_{m.name}"]) for m in metrics])
+            scores = tuple(np.mean(result[f"test_{m.name}"]) for m in metrics)
             estimators = result["estimator"]
 
-            for (estimator, (_, test)) in zip(estimators, splitter.split(x, y_train)):
-                if any([m.requires_probabilities for m in metrics]):
+            splitter = (
+                splitter if isinstance(splitter, list) else splitter.split(x, y_train)
+            )
+            for estimator, (_, test) in zip(estimators, splitter):
+                if any(m.requires_probabilities for m in metrics):
                     fold_pred = estimator.predict_proba(x.iloc[test, :])
                 else:
                     fold_pred = estimator.predict(x.iloc[test, :])
@@ -104,9 +138,6 @@ def evaluate_pipeline(
                         prediction = np.empty(shape=(len(y_train),))
                 prediction[test] = fold_pred
 
-            # prediction, scores, estimators = cross_val_predict_score(
-            #     pipeline, x, y_train, cv=cv, metrics=metrics
-            # )
         except stopit.TimeoutException:
             # This exception is handled by the ThreadingTimeout context manager.
             raise
@@ -120,12 +151,11 @@ def evaluate_pipeline(
         # This indicates that the outer context manager (the ea) timed out.
         raise stopit.utils.TimeoutException()
 
-    if not c_mgr:
-        # For now we treat an eval timeout the same way as
-        # e.g. NaN exceptions and use the default score.
-        return prediction, scores, estimators, stopit.TimeoutException()
-
-    return prediction, tuple(scores), estimators, None
+    return (
+        (prediction, tuple(scores), estimators, None)
+        if c_mgr
+        else (prediction, scores, estimators, stopit.TimeoutException())
+    )
 
 
 def evaluate_individual(
@@ -136,7 +166,7 @@ def evaluate_individual(
     add_length_to_score: bool = True,
     **kwargs,
 ) -> Evaluation:
-    """ Evaluate the pipeline specified by individual, and record
+    """Evaluate the pipeline specified by individual, and record
 
     Parameters
     ----------
diff --git a/gama/genetic_programming/components/individual.py b/gama/genetic_programming/components/individual.py
index 729afee2..83f161f0 100644
--- a/gama/genetic_programming/components/individual.py
+++ b/gama/genetic_programming/components/individual.py
@@ -1,13 +1,15 @@
 import uuid
 from typing import List, Callable, Optional, Dict, Any
 
+from sklearn.pipeline import Pipeline
+
 from .fitness import Fitness
 from .primitive_node import PrimitiveNode
 from .terminal import Terminal
 
 
 class Individual:
-    """ Collection of PrimitiveNodes which together specify a machine learning pipeline.
+    """Collection of PrimitiveNodes which together specify a machine learning pipeline.
 
     Parameters
     ----------
@@ -23,44 +25,44 @@ def __init__(
     ):
         self.fitness: Optional[Fitness] = None
         self.main_node = main_node
-        self.meta: Dict[str, Any] = dict()
+        self.meta: Dict[str, Any] = {}
         self._id = uuid.uuid4()
         self._to_pipeline = to_pipeline
 
-    def __eq__(self, other):
+    def __eq__(self, other) -> bool:
         return isinstance(other, Individual) and other._id == self._id
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         return hash(self._id)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return (
             f"Individual {self._id}\n"
             f"Pipeline: {self.pipeline_str()}\nFitness: {self.fitness}"
         )
 
     @property
-    def pipeline(self):
-        """ Calls the `to_pipeline` method on itself."""
+    def pipeline(self) -> Pipeline:
+        """Calls the `to_pipeline` method on itself."""
         if self._to_pipeline is None:
             raise AttributeError(
                 "pipeline not available because `to_pipeline` was not set on __init__."
             )
         return self._to_pipeline(self)
 
-    def short_name(self, step_separator: str = ">"):
-        """ str: e.g. "Binarizer>BernoulliNB" """
+    def short_name(self, step_separator: str = ">") -> str:
+        """str: e.g. "Binarizer>BernoulliNB" """
         return step_separator.join(
             [str(primitive._primitive) for primitive in reversed(self.primitives)]
         )
 
-    def pipeline_str(self):
-        """ str: e.g. "BernoulliNB(Binarizer(data, Binarizer.threshold=0.6), BernoulliNB.alpha=1.0)" """  # noqa: E501
+    def pipeline_str(self) -> str:
+        """str: e.g., "BernoulliNB(Binarizer(data, Binarizer.threshold=0.6), BernoulliNB.alpha=1.0)" """  # noqa: E501
         return str(self.main_node)
 
     @property
     def primitives(self) -> List[PrimitiveNode]:
-        """ Lists all primitive nodes, starting with the Individual's main node. """
+        """Lists all primitive nodes, starting with the Individual's main node."""
         primitives = [self.main_node]
         current_node = self.main_node._data_node
         while isinstance(current_node, PrimitiveNode):  # i.e. not DATA_TERMINAL
@@ -70,11 +72,11 @@ def primitives(self) -> List[PrimitiveNode]:
 
     @property
     def terminals(self) -> List[Terminal]:
-        """ Lists all terminals connected to the Individual's primitive nodes. """
+        """Lists all terminals connected to the Individual's primitive nodes."""
         return [terminal for prim in self.primitives for terminal in prim._terminals]
 
-    def replace_terminal(self, position: int, new_terminal: Terminal):
-        """ Replace the terminal at `position` by `new_terminal` in-place.
+    def replace_terminal(self, position: int, new_terminal: Terminal) -> None:
+        """Replace the terminal at `position` by `new_terminal` in-place.
 
         Parameters
         ----------
@@ -87,15 +89,14 @@ def replace_terminal(self, position: int, new_terminal: Terminal):
         for primitive in self.primitives:
             if scan_position + len(primitive._terminals) > position:
                 terminal_to_be_replaced = primitive._terminals[position - scan_position]
-                if terminal_to_be_replaced.identifier == new_terminal.identifier:
-                    primitive._terminals[position - scan_position] = new_terminal
-                    return
-                else:
+                if terminal_to_be_replaced.identifier != new_terminal.identifier:
                     raise ValueError(
                         f"New terminal does not share output type with the old."
                         f"Old: {terminal_to_be_replaced.identifier}"
                         f"New: {new_terminal.identifier}."
                     )
+                primitive._terminals[position - scan_position] = new_terminal
+                return
             else:
                 scan_position += len(primitive._terminals)
         if scan_position < position:
@@ -104,7 +105,7 @@ def replace_terminal(self, position: int, new_terminal: Terminal):
             )
 
     def replace_primitive(self, position: int, new_primitive: PrimitiveNode):
-        """ Replace the PrimitiveNode at `position` by `new_primitive`.
+        """Replace the PrimitiveNode at `position` by `new_primitive`.
 
         Parameters
         ----------
@@ -131,15 +132,19 @@ def replace_primitive(self, position: int, new_primitive: PrimitiveNode):
         else:
             last_primitive._data_node = new_primitive
 
-    def copy_as_new(self):
-        """ Make deep copy of self, but with fitness None and assigned a new id. """
+    def copy_as_new(self) -> "Individual":
+        """Make deep copy of self, but with fitness None and assigned a new id."""
         return Individual(self.main_node.copy(), to_pipeline=self._to_pipeline)
 
     @classmethod
     def from_string(
-        cls, string: str, primitive_set: dict, to_pipeline: Optional[Callable] = None
-    ):
-        """ Construct an Individual from its `pipeline_str` representation.
+        cls,
+        string: str,
+        primitive_set: dict,
+        to_pipeline: Optional[Callable] = None,
+        strict: bool = True,
+    ) -> "Individual":
+        """Construct an Individual from its `pipeline_str` representation.
 
         Parameters
         ----------
@@ -151,10 +156,15 @@ def from_string(
             The function to convert the Individual into a pipeline representation.
             If `None`, the individuals `pipeline` property will not be available.
 
+        strict: bool (default=True)
+            Require each primitives has all required terminals present in `string`.
+            Non-strict matching may be useful when constructing individuals from
+            and old log with a slightly different search space.
+
         Returns
         -------
         Individual
             An individual as defined by `str`.
         """
-        expression = PrimitiveNode.from_string(string, primitive_set)
+        expression = PrimitiveNode.from_string(string, primitive_set, strict)
         return cls(expression, to_pipeline=to_pipeline)
diff --git a/gama/genetic_programming/components/primitive.py b/gama/genetic_programming/components/primitive.py
index ad0ebe01..51ad02b8 100644
--- a/gama/genetic_programming/components/primitive.py
+++ b/gama/genetic_programming/components/primitive.py
@@ -2,18 +2,18 @@
 
 
 class Primitive(NamedTuple):
-    """ Defines an operator which takes input and produces output.
+    """Defines an operator which takes input and produces output.
 
     E.g. a preprocessing or classification algorithm.
     """
 
-    input: Tuple[str]
+    input: Tuple[str, ...]
     output: str
     identifier: Callable
 
-    def __str__(self):
-        """ str: e.g. "FastICA" """
+    def __str__(self) -> str:
+        """str: e.g. "FastICA" """
         return self.identifier.__name__
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return str(self)
diff --git a/gama/genetic_programming/components/primitive_node.py b/gama/genetic_programming/components/primitive_node.py
index 4e11b6ba..0a6671c9 100644
--- a/gama/genetic_programming/components/primitive_node.py
+++ b/gama/genetic_programming/components/primitive_node.py
@@ -1,10 +1,10 @@
-from typing import List, Union
+from typing import List, Union, cast
 from .terminal import DATA_TERMINAL, Terminal
 from .primitive import Primitive
 
 
 class PrimitiveNode:
-    """ An instantiation for a Primitive with specific Terminals.
+    """An instantiation for a Primitive with specific Terminals.
 
     Parameters
     ----------
@@ -26,22 +26,21 @@ def __init__(
         self._data_node = data_node
         self._terminals = sorted(terminals, key=lambda t: str(t))
 
-    def __str__(self):
-        """ Recursively stringify all primitive nodes (primitive and hyperparameters).
+    def __str__(self) -> str:
+        """Recursively stringify all primitive nodes (primitive and hyperparameters).
 
         Examples: - "GaussianNB(data)"
                   - "BernoulliNB(data, alpha=1.0)"
                   - "BernoulliNB(FastICA(data, tol=0.5), alpha=1.0)"
         """
-        if self._terminals:
-            terminal_str = ", ".join([repr(terminal) for terminal in self._terminals])
-            return f"{self._primitive}({self._data_node}, {terminal_str})"
-        else:
+        if not self._terminals:
             return f"{self._primitive}({self._data_node})"
+        terminal_str = ", ".join([repr(terminal) for terminal in self._terminals])
+        return f"{self._primitive}({self._data_node}, {terminal_str})"
 
     @property
     def str_nonrecursive(self) -> str:
-        """ Stringify all primitive node without data node (primitive and hyperparameters).
+        """Stringify primitive node with its hyperparameter configuration
 
         Examples: - "GaussianNB()"
                   - "BernoulliNB(alpha=1.0)"
@@ -49,11 +48,11 @@ def str_nonrecursive(self) -> str:
         terminal_str = ", ".join([str(terminal) for terminal in self._terminals])
         return f"{self._primitive}({terminal_str})"
 
-    def copy(self):
-        """ Copies the object. Shallow for terminals, deep for data_node. """
-        if self._data_node == DATA_TERMINAL:
-            data_node_copy = DATA_TERMINAL
-        else:
+    def copy(self) -> "PrimitiveNode":
+        """Copies the object. Shallow for terminals, deep for data_node."""
+        if isinstance(self._data_node, str) and self._data_node == DATA_TERMINAL:
+            data_node_copy = DATA_TERMINAL  # type: Union[str, PrimitiveNode]
+        elif isinstance(self._data_node, PrimitiveNode):
             data_node_copy = self._data_node.copy()
         return PrimitiveNode(
             primitive=self._primitive,
@@ -62,8 +61,10 @@ def copy(self):
         )
 
     @classmethod
-    def from_string(cls, string: str, primitive_set: dict):
-        """ Create a PrimitiveNode from string formatted like PrimitiveNode.__str__
+    def from_string(
+        cls, string: str, primitive_set: dict, strict: bool = True
+    ) -> "PrimitiveNode":
+        """Create a PrimitiveNode from string formatted like PrimitiveNode.__str__
 
         Parameters
         ----------
@@ -71,6 +72,10 @@ def from_string(cls, string: str, primitive_set: dict):
             A string formatted similar to PrimitiveNode.__str__
         primitive_set: dict
             The dictionary defining all Terminals and Primitives.
+        strict: bool (default=True)
+            Require each primitives has all required terminals present in `string`.
+            Non-strict matching may be useful when constructing individuals from
+            and old log with a slightly different search space.
 
         Returns
         -------
@@ -97,15 +102,15 @@ def from_string(cls, string: str, primitive_set: dict):
                     for terminal_string in terminal_set.split(", ")
                 ]
             missing = set(primitive.input) - set(map(lambda t: t.identifier, terminals))
-            if missing:
+            if missing and strict:
                 raise ValueError(f"terminals {missing} for primitive {primitive}")
             last_node = cls(primitive, last_node, terminals)
 
-        return last_node
+        return cast(PrimitiveNode, last_node)
 
 
 def find_primitive(primitive_set: dict, primitive_string: str) -> Primitive:
-    """ Find the Primitive that matches `primitive_string` in `primitive_set`. """
+    """Find the Primitive that matches `primitive_string` in `primitive_set`."""
     all_primitives = primitive_set[DATA_TERMINAL] + primitive_set["prediction"]
     for primitive in all_primitives:
         if repr(primitive) == primitive_string:
@@ -114,7 +119,7 @@ def find_primitive(primitive_set: dict, primitive_string: str) -> Primitive:
 
 
 def find_terminal(primitive_set: dict, terminal_string: str) -> Terminal:
-    """ Find the Terminal that matches `terminal_string` in `primitive_set`. """
+    """Find the Terminal that matches `terminal_string` in `primitive_set`."""
     term_type, _ = terminal_string.split("=")
     for terminal in primitive_set[term_type]:
         if repr(terminal) == terminal_string:
diff --git a/gama/genetic_programming/components/terminal.py b/gama/genetic_programming/components/terminal.py
index a44ce964..d8c96c39 100644
--- a/gama/genetic_programming/components/terminal.py
+++ b/gama/genetic_programming/components/terminal.py
@@ -4,7 +4,7 @@
 
 
 class Terminal(NamedTuple):
-    """ Specifies a specific value for a specific type or input.
+    """Specifies a specific value for a specific type or input.
 
     E.g. a value for a hyperparameter for an algorithm.
     """
@@ -13,12 +13,12 @@ class Terminal(NamedTuple):
     output: str
     identifier: str
 
-    def __str__(self):
-        """ str: e.g. "tol=0.5" """
+    def __str__(self) -> str:
+        """str: e.g. "tol=0.5" """
         return f"{self.output}={format_hyperparameter_value(self.value)}"
 
-    def __repr__(self):
-        """ str: e.g. "FastICA.tol=0.5".
+    def __repr__(self) -> str:
+        """str: e.g. "FastICA.tol=0.5".
 
         If the hyperparameter is shared across primitives, there is no prefix.
         """
@@ -29,6 +29,6 @@ def format_hyperparameter_value(value: object) -> str:
     if isinstance(value, str):
         return f"'{value}'"  # Quoted
     elif callable(value) and hasattr(value, "__name__"):
-        return f"{value.__name__}"  # type: ignore
+        return f"{value.__name__}"
     else:
         return str(value)
diff --git a/gama/genetic_programming/crossover.py b/gama/genetic_programming/crossover.py
index e1e5febf..620cefa3 100644
--- a/gama/genetic_programming/crossover.py
+++ b/gama/genetic_programming/crossover.py
@@ -8,7 +8,7 @@
 def random_crossover(
     ind1: Individual, ind2: Individual, max_length: Optional[int] = None
 ) -> Tuple[Individual, Individual]:
-    """ Random valid crossover between two individuals in-place, if it can be done.
+    """Random valid crossover between two individuals in-place, if it can be done.
 
     Parameters
     ----------
@@ -45,7 +45,7 @@ def random_crossover(
 def crossover_primitives(
     ind1: Individual, ind2: Individual
 ) -> Tuple[Individual, Individual]:
-    """ Crossover two individuals by exchanging any number of preprocessing steps.
+    """Crossover two individuals by exchanging any number of preprocessing steps.
 
     Parameters
     ----------
@@ -63,7 +63,7 @@ def crossover_primitives(
 def crossover_terminals(
     ind1: Individual, ind2: Individual
 ) -> Tuple[Individual, Individual]:
-    """ Crossover two individuals in-place by exchanging two Terminals.
+    """Crossover two individuals in-place by exchanging two Terminals.
 
     Terminals must share output type but have different values.
 
@@ -87,7 +87,7 @@ def _shared_terminals(
     with_indices: bool = True,
     value_match: str = "different",
 ) -> Iterable:
-    """ Finds all shared Terminals between two Individuals.
+    """Finds all shared Terminals between two Individuals.
 
     Parameters
     ----------
@@ -127,7 +127,7 @@ def _shared_terminals(
 
 
 def _valid_crossover_functions(ind1: Individual, ind2: Individual) -> List[Callable]:
-    """ Find all crossover functions that can produce new individuals from this input.
+    """Find all crossover functions that can produce new individuals from this input.
 
     Parameters
     ----------
diff --git a/gama/genetic_programming/mutation.py b/gama/genetic_programming/mutation.py
index 86cc3fcc..ad0457c8 100644
--- a/gama/genetic_programming/mutation.py
+++ b/gama/genetic_programming/mutation.py
@@ -12,7 +12,7 @@
 
 
 def mut_replace_terminal(individual: Individual, primitive_set: dict) -> None:
-    """ Mutates an Individual in-place by replacing one of its Terminals.
+    """Mutates an Individual in-place by replacing one of its Terminals.
 
     Parameters
     ----------
@@ -26,7 +26,7 @@ def terminal_replaceable(index_terminal):
         return len(primitive_set[terminal.identifier]) > 1
 
     terminals = list(filter(terminal_replaceable, enumerate(individual.terminals)))
-    if len(terminals) == 0:
+    if not terminals:
         raise ValueError("Individual has no terminals suitable for mutation.")
 
     terminal_index, old = random.choice(terminals)
@@ -37,7 +37,7 @@ def terminal_replaceable(index_terminal):
 
 
 def mut_replace_primitive(individual: Individual, primitive_set: dict) -> None:
-    """ Mutates an Individual in-place by replacing one of its Primitives.
+    """Mutates an Individual in-place by replacing one of its Primitives.
 
     Parameters
     ----------
@@ -51,7 +51,7 @@ def primitive_replaceable(index_primitive):
         return len(primitive_set[primitive._primitive.output]) > 1
 
     primitives = list(filter(primitive_replaceable, enumerate(individual.primitives)))
-    if len(primitives) == 0:
+    if not primitives:
         raise ValueError("Individual has no primitives suitable for replacement.")
 
     primitive_index, old_primitive_node = random.choice(primitives)
@@ -64,9 +64,11 @@ def primitive_replaceable(index_primitive):
 
 
 def mut_shrink(
-    individual: Individual, primitive_set: dict = None, shrink_by: Optional[int] = None
+    individual: Individual,
+    _primitive_set: Optional[dict] = None,
+    shrink_by: Optional[int] = None,
 ) -> None:
-    """ Mutates an Individual in-place by removing any number of primitive nodes.
+    """Mutates an Individual in-place by removing any number of primitive nodes.
 
     Primitive nodes are removed from the preprocessing end.
 
@@ -74,7 +76,7 @@ def mut_shrink(
     ----------
     individual: Individual
         Individual to mutate in-place.
-    primitive_set: dict, optional
+    _primitive_set: dict, optional
         Not used. Present to create a matching function signature with other mutations.
     shrink_by: int, optional (default=None)
         Number of primitives to remove.
@@ -96,7 +98,7 @@ def mut_shrink(
 
 
 def mut_insert(individual: Individual, primitive_set: dict) -> None:
-    """ Mutate an Individual in-place by inserting a PrimitiveNode at a random location.
+    """Mutate an Individual in-place by inserting a PrimitiveNode at a random location.
 
     The new PrimitiveNode will not be inserted as root node.
 
@@ -117,7 +119,7 @@ def mut_insert(individual: Individual, primitive_set: dict) -> None:
 def random_valid_mutation_in_place(
     individual: Individual, primitive_set: dict, max_length: Optional[int] = None
 ) -> Callable:
-    """ Apply a random valid mutation in place.
+    """Apply a random valid mutation in place.
 
     The random mutation can be one of:
 
diff --git a/gama/genetic_programming/nsga2.py b/gama/genetic_programming/nsga2.py
index 31f862e8..4def2379 100644
--- a/gama/genetic_programming/nsga2.py
+++ b/gama/genetic_programming/nsga2.py
@@ -12,22 +12,24 @@
 
 
 class NSGAMeta:
-    def __init__(self, obj, metrics):
+    """A helper class for comparing data points for NSGA2."""
+
+    def __init__(self, obj: object, metrics: List[Callable]):
         self.obj = obj
         self.values = tuple((m(obj) for m in metrics))
-        self.rank = None
-        self.distance = 0
-        self.dominating = []
+        self.rank = 0
+        self.distance = 0.0
+        self.dominating: List["NSGAMeta"] = []
         self.domination_counter = 0
 
-    def dominates(self, other: "NSGAMeta"):
-        for self_val, other_val in zip(self.values, other.values):
-            if self_val <= other_val:  # or maybe <?
-                return False
-        return True
+    def dominates(self, other: "NSGAMeta") -> bool:
+        return all(
+            self_val > other_val
+            for self_val, other_val in zip(self.values, other.values)
+        )
 
-    def crowd_compare(self, other: "NSGAMeta"):
-        """ Favor higher rank, if equal, favor less crowded. """
+    def crowd_compare(self, other: "NSGAMeta") -> int:
+        """Favor higher rank, if equal, favor less crowded."""
         self_better = self.rank < other.rank or (
             self.rank == other.rank and self.distance > other.distance
         )
@@ -37,11 +39,11 @@ def crowd_compare(self, other: "NSGAMeta"):
 def nsga2_select(
     population: List[Any], n: int, metrics: List[Callable[[Any], float]]
 ) -> List[Any]:
-    """ Select n pairs from the population.
+    """Select n pairs from the population.
 
-     Selection is done through binary tournament selection based on crowding distance.
-     Parent pairs may be repeated, but each parent pair consists of two unique parents.
-     The population must be at least size 3 (otherwise it is trivial or impossible).
+    Selection is done through binary tournament selection based on crowding distance.
+    Parent pairs may be repeated, but each parent pair consists of two unique parents.
+    The population must be at least size 3 (otherwise it is trivial or impossible).
     """
     if len(population) < 3:
         raise ValueError("population must be at least size 3 for a pair to be selected")
@@ -68,7 +70,7 @@ def nsga2(
     metrics: List[Callable[[Any], float]],
     return_meta: bool = False,
 ) -> List[Any]:
-    """ Selects n individuals from the population for offspring according to NSGA-II.
+    """Selects n individuals from the population for offspring according to NSGA-II.
 
     Parameters
     ----------
@@ -104,15 +106,18 @@ def nsga2(
             selection += fronts[i]
         else:
             # Only the least crowded remainder is selected
-            s = sorted(fronts[i], key=cmp_to_key(lambda x, y: x.crowd_compare(y)))
+            s = sorted(
+                fronts[i],
+                key=cmp_to_key(lambda x, y: x.crowd_compare(y)),  # type: ignore
+            )
             selection += s[: (n - len(selection))]  # Fill up to n
         i += 1
 
-    return selection if return_meta else [s.obj for s in selection]
+    return selection if return_meta else [s.obj for s in selection]  # type: ignore
 
 
 def fast_non_dominated_sort(P: List[NSGAMeta]) -> List[List[NSGAMeta]]:
-    """ Sorts P into Pareto fronts. """
+    """Sorts P into Pareto fronts."""
     fronts: List[List[NSGAMeta]] = [[]]
     for p, q in itertools.combinations(P, 2):
         if p.dominates(q):
@@ -140,9 +145,11 @@ def fast_non_dominated_sort(P: List[NSGAMeta]) -> List[List[NSGAMeta]]:
     return fronts
 
 
-def crowding_distance_assignment(I: List[NSGAMeta]) -> None:
+def crowding_distance_assignment(
+    I: List[NSGAMeta],  # noqa: E741 'I' is name in paper
+) -> None:
     for m in range(len(I[0].values)):
-        I = sorted(I, key=lambda x: x.values[m])  # noqa: E741 'I' is name in paper
+        I = sorted(I, key=lambda x: x.values[m])  # noqa: E741
         I[0].distance = I[-1].distance = float("inf")
         if (
             I[-1].values[m] == I[0].values[m]
diff --git a/gama/genetic_programming/operations.py b/gama/genetic_programming/operations.py
index 9447e3b3..342da41f 100644
--- a/gama/genetic_programming/operations.py
+++ b/gama/genetic_programming/operations.py
@@ -1,5 +1,5 @@
 import random
-from typing import List
+from typing import List, Optional
 
 from gama.genetic_programming.components import (
     Primitive,
@@ -12,14 +12,14 @@
 def random_terminals_for_primitive(
     primitive_set: dict, primitive: Primitive
 ) -> List[Terminal]:
-    """ Return a list with a random Terminal for each required input to Primitive. """
+    """Return a list with a random Terminal for each required input to Primitive."""
     return [random.choice(primitive_set[term_type]) for term_type in primitive.input]
 
 
 def random_primitive_node(
-    output_type: str, primitive_set: dict, exclude: Primitive = None
+    output_type: str, primitive_set: dict, exclude: Optional[Primitive] = None
 ) -> PrimitiveNode:
-    """ Create a PrimitiveNode with specified output_type and random terminals. """
+    """Create a PrimitiveNode with specified output_type and random terminals."""
     primitive = random.choice([p for p in primitive_set[output_type] if p != exclude])
     terminals = random_terminals_for_primitive(primitive_set, primitive)
     return PrimitiveNode(primitive, data_node=DATA_TERMINAL, terminals=terminals)
@@ -28,7 +28,7 @@ def random_primitive_node(
 def create_random_expression(
     primitive_set: dict, min_length: int = 1, max_length: int = 3
 ) -> PrimitiveNode:
-    """ Create at least min_length and at most max_length chained PrimitiveNodes. """
+    """Create at least min_length and at most max_length chained PrimitiveNodes."""
     individual_length = random.randint(min_length, max_length)
     learner_node = random_primitive_node(
         output_type="prediction", primitive_set=primitive_set
diff --git a/gama/genetic_programming/operator_set.py b/gama/genetic_programming/operator_set.py
index 426ee198..995f7c17 100644
--- a/gama/genetic_programming/operator_set.py
+++ b/gama/genetic_programming/operator_set.py
@@ -1,4 +1,10 @@
 import logging
+from typing import Callable, Dict, List, Optional, Tuple, Any
+from gama.genetic_programming.components.primitive_node import PrimitiveNode
+
+from sklearn.pipeline import Pipeline
+
+from gama.utilities.evaluation_library import Evaluation
 
 from .components import Individual
 
@@ -6,94 +12,79 @@
 
 
 class OperatorSet:
-    """ Provides a thin layer for ea operators for logging, callbacks and safety. """
+    """Provides a thin layer for ea operators for logging, callbacks and safety."""
 
     def __init__(
         self,
-        mutate,
-        mate,
-        create_from_population,
-        create_new,
-        compile_,
-        eliminate,
-        evaluate_callback,
-        max_retry=50,
-        completed_evaluations=None,
+        mutate: Callable[[Individual], None],
+        mate: Callable[[Individual, Individual], Tuple[Individual, Individual]],
+        create_from_population: Callable[[Any], List[Individual]],
+        create_new: Callable[[], PrimitiveNode],
+        compile_: Callable[[Individual], Pipeline],
+        eliminate: Callable[[List[Individual], int], List[Individual]],
+        evaluate_callback: Callable[[Evaluation], None],
+        max_retry: int = 50,
+        completed_evaluations: Optional[Dict[str, Evaluation]] = None,
     ):
-        """
-
-        :param mutate:
-        :param mate:
-        :param create:
-        :param create_new:
-        """
-
         self._mutate = mutate
         self._mate = mate
         self._create_from_population = create_from_population
         self._create_new = create_new
         self._compile = compile_
-        self._safe_compile = None
+        self._safe_compile: Optional[Callable[[Individual], Pipeline]] = None
         self._eliminate = eliminate
         self._max_retry = max_retry
         self._evaluate = None
         self._evaluate_callback = evaluate_callback
-        self.evaluate = None
+        self.evaluate: Optional[Callable[..., Evaluation]] = None
 
         self._completed_evaluations = completed_evaluations
 
     def wait_next(self, async_evaluator):
+        """Wrapper for wait_next() to forward evaluation and log exceptions."""
         future = async_evaluator.wait_next()
-        if future.result is not None:
-            evaluation = future.result
-            if self._evaluate_callback is not None:
-                self._evaluate_callback(evaluation)
-
-        elif future.exception is not None:
+        if future.result and self._evaluate_callback:
+            self._evaluate_callback(future.result)
+        elif future.exception:
             log.warning(f"Error raised during evaluation: {str(future.exception)}.")
         return future
 
     def try_until_new(self, operator, *args, **kwargs):
+        """Keep executing `operator` until a new individual is created."""
         for _ in range(self._max_retry):
             individual = operator(*args, **kwargs)
             if str(individual.main_node) not in self._completed_evaluations:
                 return individual
-        else:
-            log.debug(f"50 iterations of {operator.__name__} did not yield new ind.")
-            # For progress on solving this, see #11
-            return individual
+        log.debug(f"50 iterations of {operator.__name__} did not yield new ind.")
+        # For progress on solving this, see #11
+        return individual
 
-    def mate(self, ind1: Individual, ind2: Individual, *args, **kwargs):
+    def mate(self, ind1: Individual, ind2: Individual, *args, **kwargs) -> Individual:
         def mate_with_log():
             new_individual1, new_individual2 = ind1.copy_as_new(), ind2.copy_as_new()
             self._mate(new_individual1, new_individual2, *args, **kwargs)
             new_individual1.meta = dict(parents=[ind1._id, ind2._id], origin="cx")
             return new_individual1
 
-        individual = self.try_until_new(mate_with_log)
-        return individual
+        return self.try_until_new(mate_with_log)
 
-    def mutate(self, ind: Individual, *args, **kwargs):
+    def mutate(self, ind: Individual, *args, **kwargs) -> Individual:
         def mutate_with_log():
             new_individual = ind.copy_as_new()
             mutator = self._mutate(new_individual, *args, **kwargs)
             new_individual.meta = dict(parents=[ind._id], origin=mutator.__name__)
             return new_individual
 
-        ind = self.try_until_new(mutate_with_log)
-        return ind
+        return self.try_until_new(mutate_with_log)
 
-    def individual(self, *args, **kwargs):
+    def individual(self, *args, **kwargs) -> Individual:
         expression = self._create_new(*args, **kwargs)
-        if self._safe_compile is not None:
-            compile_ = self._safe_compile
-        else:
-            compile_ = self._compile
+        compile_ = self._safe_compile or self._compile
         ind = Individual(expression, to_pipeline=compile_)
         ind.meta["origin"] = "new"
         return ind
 
-    def create(self, *args, **kwargs):
+    def create(self, *args, **kwargs) -> List[Individual]:
         return self._create_from_population(self, *args, **kwargs)
 
     def eliminate(self, *args, **kwargs):
diff --git a/gama/genetic_programming/selection.py b/gama/genetic_programming/selection.py
index 17d38ff7..dd403de1 100644
--- a/gama/genetic_programming/selection.py
+++ b/gama/genetic_programming/selection.py
@@ -16,11 +16,11 @@ def create_from_population(
     cxpb: float,
     mutpb: float,
 ) -> List[Individual]:
-    """ Creates n new individuals based on the population. """
+    """Creates n new individuals based on the population."""
     offspring = []
     metrics = [lambda ind: ind.fitness.values[0], lambda ind: ind.fitness.values[1]]
     parent_pairs = nsga2_select(pop, n, metrics)
-    for (ind1, ind2) in parent_pairs:
+    for ind1, ind2 in parent_pairs:
         if random.random() < cxpb and len(_valid_crossover_functions(ind1, ind2)) > 0:
             ind1 = operator_shell.mate(ind1, ind2)
         else:
diff --git a/gama/logging/GamaReport.py b/gama/logging/GamaReport.py
index ba3a0076..56e6af4f 100644
--- a/gama/logging/GamaReport.py
+++ b/gama/logging/GamaReport.py
@@ -14,10 +14,10 @@
 
 
 class GamaReport:
-    """ Contains information parsed from a search captured by a GAMA analysis log. """
+    """Contains information parsed from a search captured by a GAMA analysis log."""
 
-    def __init__(self, log_directory: str):
-        """ Parse the logfile or log lines provided.
+    def __init__(self, log_directory: str, strict: bool = True):
+        """Parse the logfile or log lines provided.
 
         Parameters
         ----------
@@ -26,13 +26,19 @@ def __init__(self, log_directory: str):
                 - gama.log
                 - evaluations.log
                 - resources.log
+
+        strict: bool (default=True)
+            Require each primitives has all required terminals present in `string`.
+            Non-strict matching may be useful when constructing individuals from
+            and old log with a slightly different search space.
         """
         self._log_directory = os.path.expanduser(log_directory)
         self.name = os.path.split(log_directory)[-1]
         self.phases: List[Tuple[str, str, datetime, float]] = []
         self._last_tell = 0
         self.evaluations: pd.DataFrame = pd.DataFrame()
-        self.individuals: Dict[str, Individual] = dict()
+        self.individuals: Dict[str, Individual] = {}
+        self.strict = strict
 
         # Parse initialization/progress information from gama.log
         with open(os.path.join(log_directory, "gama.log")) as fh:
@@ -83,11 +89,13 @@ def tuple_to_metrics(tuple_str):
                 return pd.Series([float(value) for value in tuple_str[1:-1].split(",")])
 
             df[self.metrics] = df.score.apply(tuple_to_metrics)
-            df.start = pd.to_datetime(df.start)  # needed?
+            df.start = pd.to_datetime(
+                df.start, format="%Y-%m-%d %H:%M:%S,%f"
+            )  # needed?
             df.duration = pd.to_timedelta(df.duration, unit="s")
 
             new_individuals = {
-                id_: Individual.from_string(pipeline, pset)
+                id_: Individual.from_string(pipeline, pset, strict=self.strict)
                 for id_, pipeline in zip(df.id, df.pipeline)
             }
 
@@ -113,7 +121,7 @@ def tuple_to_metrics(tuple_str):
 
     @property
     def successful_evaluations(self):
-        """ Return only evaluations that completed successfully """
+        """Return only evaluations that completed successfully"""
         with pd.option_context("mode.use_inf_as_na", True):
             return self.evaluations[~self.evaluations[self.metrics].isna().any(axis=1)]
 
@@ -125,4 +133,4 @@ def init_to_hps(init_line: str) -> Dict[str, str]:
     # only supports one nested level - will do proper parsing later
     for token in ["()", "(", ")", ",,"]:
         all_arguments = all_arguments.replace(token, ",")
-    return dict(hp.split("=") for hp in all_arguments.split(","))  # type: ignore
+    return dict(hp.split("=") for hp in all_arguments.split(","))
diff --git a/gama/logging/evaluation_logger.py b/gama/logging/evaluation_logger.py
index 18d92eba..19df488d 100644
--- a/gama/logging/evaluation_logger.py
+++ b/gama/logging/evaluation_logger.py
@@ -1,13 +1,14 @@
 from datetime import datetime
 from functools import partial
 import operator
-from typing import Optional, Dict, Callable, Iterable
+from typing import Any, Optional, Dict, Callable, Iterable
 
 from gama.logging import TIME_FORMAT
 from gama.utilities.evaluation_library import Evaluation
 
 
-def nested_getattr(o, attr):
+def _nested_getattr(o: object, attr: str) -> Any:
+    """Resolved nested properties, e.g., `individual.fitness.score`."""
     for a in attr.split("."):
         o = getattr(o, a)
     return o
@@ -21,7 +22,7 @@ def __init__(
         fields: Optional[Dict[str, Callable[[Evaluation], str]]] = None,
         extra_fields: Optional[Dict[str, Callable[[Evaluation], str]]] = None,
     ):
-        """ Formats evaluations for output to a csv file.
+        """Formats evaluations for output to a csv file.
 
         Parameters
         ----------
@@ -43,16 +44,16 @@ def __init__(
 
         if fields is None:
             self.fields: Dict[str, Callable[[Evaluation], str]] = dict(
-                id=partial(nested_getattr, attr="individual._id"),
+                id=partial(_nested_getattr, attr="individual._id"),
                 pid=operator.attrgetter("pid"),
-                t_start=partial(nested_getattr, attr="individual.fitness.start_time"),
+                t_start=partial(_nested_getattr, attr="individual.fitness.start_time"),
                 t_wallclock=partial(
-                    nested_getattr, attr="individual.fitness.wallclock_time"
+                    _nested_getattr, attr="individual.fitness.wallclock_time"
                 ),
                 t_process=partial(
-                    nested_getattr, attr="individual.fitness.process_time"
+                    _nested_getattr, attr="individual.fitness.process_time"
                 ),
-                score=partial(nested_getattr, attr="individual.fitness.values"),
+                score=partial(_nested_getattr, attr="individual.fitness.values"),
                 pipeline=lambda e: e.individual.pipeline_str(),
                 error=operator.attrgetter("error"),
             )
@@ -64,17 +65,15 @@ def __init__(
 
         self.log_line(list(self.fields))
 
-    def log_line(self, values: Iterable[str]):
-        """ Appends `values` as a row of separated values to the file. """
+    def log_line(self, values: Iterable[str]) -> None:
+        """Appends `values` as a row of separated values to the file."""
         with open(self._file_path, "a") as evaluations:
             evaluations.write(self._sep.join(values) + "\n")
 
-    def log_evaluation(self, evaluation):
+    def log_evaluation(self, evaluation) -> None:
         values = [getter(evaluation) for getter in self.fields.values()]
 
         def format_value(v):
-            if isinstance(v, datetime):
-                return v.strftime(TIME_FORMAT)
-            return str(v)
+            return v.strftime(TIME_FORMAT) if isinstance(v, datetime) else str(v)
 
         self.log_line(map(format_value, values))
diff --git a/gama/logging/utility_functions.py b/gama/logging/utility_functions.py
index de4ce274..d99c4981 100644
--- a/gama/logging/utility_functions.py
+++ b/gama/logging/utility_functions.py
@@ -4,7 +4,7 @@
 gama_log = logging.getLogger("gama")
 
 
-def register_stream_log(verbosity):
+def register_stream_log(verbosity: int) -> None:
     previously_registered_handler = [
         handler for handler in gama_log.handlers if hasattr(handler, "tag")
     ]
@@ -21,6 +21,6 @@ def register_stream_log(verbosity):
         ]
 
     stdout_streamhandler = logging.StreamHandler(sys.stdout)
-    stdout_streamhandler.tag = "machine_set"
+    setattr(stdout_streamhandler, "tag", "machine_set")
     stdout_streamhandler.setLevel(verbosity)
     gama_log.addHandler(stdout_streamhandler)
diff --git a/gama/postprocessing/__init__.py b/gama/postprocessing/__init__.py
index c00cf547..14d5b62d 100644
--- a/gama/postprocessing/__init__.py
+++ b/gama/postprocessing/__init__.py
@@ -1,15 +1,26 @@
+from typing import Optional, Sequence, Tuple
+
+from sklearn.base import TransformerMixin
+
 from gama.postprocessing.base_post_processing import BasePostProcessing
 from gama.postprocessing.best_fit import BestFitPostProcessing
 from gama.postprocessing.ensemble import EnsemblePostProcessing
 
 
 class NoPostProcessing(BasePostProcessing):
-    """ Does nothing, no time will be reserved for post-processing. """
+    """Does nothing, no time will be reserved for post-processing."""
+
+    def to_code(
+        self, preprocessing: Optional[Sequence[Tuple[str, TransformerMixin]]] = None
+    ) -> str:
+        raise NotImplementedError(
+            "NoPostProcessing has no `to_code` function, since no model is selected."
+        )
 
     def __init__(self, time_fraction: float = 0.0):
         super().__init__(time_fraction)
 
-    def post_process(self, *args, **kwargs):
+    def post_process(self, *args, **kwargs) -> None:
         return None
 
 
diff --git a/gama/postprocessing/base_post_processing.py b/gama/postprocessing/base_post_processing.py
index e6562253..b775554a 100644
--- a/gama/postprocessing/base_post_processing.py
+++ b/gama/postprocessing/base_post_processing.py
@@ -1,5 +1,5 @@
 from abc import ABC
-from typing import List, Union, Dict, Any, Tuple, TYPE_CHECKING, Sequence
+from typing import List, Union, Dict, Any, Tuple, TYPE_CHECKING, Sequence, Optional
 
 import pandas as pd
 from sklearn.base import TransformerMixin
@@ -12,7 +12,7 @@
 
 
 class BasePostProcessing(ABC):
-    """ All post-processing methods should be derived from this class.
+    """All post-processing methods should be derived from this class.
     This class should not be directly used to configure GAMA.
     """
 
@@ -27,7 +27,7 @@ def __init__(self, time_fraction: float):
         self.time_fraction: float = time_fraction
         self._hyperparameters: Dict[str, Tuple[Any, Any]] = {}
 
-    def __str__(self):
+    def __str__(self) -> str:
         # Not sure if I should report actual used hyperparameters
         # (i.e. include default), or only those set by user.
         user_set_hps = {
@@ -42,11 +42,11 @@ def __str__(self):
 
     @property
     def hyperparameters(self) -> Dict[str, Any]:
-        """ Hyperparameter (name, value) pairs.
+        """Hyperparameter (name, value) pairs.
 
-         Value determined by user > dynamic default > static default.
-         Dynamic default values only considered if `dynamic_defaults` has been called.
-         """
+        Value determined by user > dynamic default > static default.
+        Dynamic default values only considered if `dynamic_defaults` has been called.
+        """
         return {
             parameter: set_value if set_value is not None else default
             for parameter, (set_value, default) in self._hyperparameters.items()
@@ -56,7 +56,8 @@ def _overwrite_hyperparameter_default(self, hyperparameter: str, value: Any):
         set_value, default_value = self._hyperparameters[hyperparameter]
         self._hyperparameters[hyperparameter] = (set_value, value)
 
-    def dynamic_defaults(self, gama: "Gama"):
+    def dynamic_defaults(self, gama: "Gama") -> None:
+        """Configure the post-processing technique based on GAMA properties."""
         pass
 
     def post_process(
@@ -86,9 +87,9 @@ def post_process(
         raise NotImplementedError("Method must be implemented by child class.")
 
     def to_code(
-        self, preprocessing: Sequence[Tuple[str, TransformerMixin]] = None
+        self, preprocessing: Optional[Sequence[Tuple[str, TransformerMixin]]] = None
     ) -> str:
-        """ Generate Python code to reconstruct a pipeline that constructs the model.
+        """Generate Python code to reconstruct a pipeline that constructs the model.
 
         Parameters
         ----------
diff --git a/gama/postprocessing/best_fit.py b/gama/postprocessing/best_fit.py
index d10bbc27..64b19e02 100644
--- a/gama/postprocessing/best_fit.py
+++ b/gama/postprocessing/best_fit.py
@@ -14,7 +14,7 @@
 
 
 class BestFitPostProcessing(BasePostProcessing):
-    """ Post processing technique which trains the best found single pipeline. """
+    """Post processing technique which trains the best found single pipeline."""
 
     def __init__(self, time_fraction: float = 0.1):
         super().__init__(time_fraction)
@@ -27,7 +27,7 @@ def post_process(
         return self._selected_individual.pipeline.fit(x, y)
 
     def to_code(
-        self, preprocessing: Sequence[Tuple[str, TransformerMixin]] = None
+        self, preprocessing: Optional[Sequence[Tuple[str, TransformerMixin]]] = None
     ) -> str:
         if self._selected_individual is None:
             raise RuntimeError("`to_code` can only be called after `post_process`.")
@@ -40,5 +40,4 @@ def to_code(
             imports = imports.union({format_import(t) for _, t in preprocessing})
 
         pipeline_statement = format_pipeline(steps)
-        script = "\n".join(imports) + "\n\n" + pipeline_statement
-        return script
+        return "\n".join(imports) + "\n\n" + pipeline_statement
diff --git a/gama/postprocessing/ensemble.py b/gama/postprocessing/ensemble.py
index 12ab4636..ef18e589 100644
--- a/gama/postprocessing/ensemble.py
+++ b/gama/postprocessing/ensemble.py
@@ -35,7 +35,7 @@ def __init__(
         hillclimb_size: Optional[int] = 10_000,
         max_models: Optional[int] = 200,
     ):
-        """ Ensemble construction per Caruana et al.
+        """Ensemble construction per Caruana et al.
 
         Parameters
         ----------
@@ -82,7 +82,7 @@ def post_process(
         return self._ensemble
 
     def to_code(
-        self, preprocessing: Sequence[Tuple[str, TransformerMixin]] = None
+        self, preprocessing: Optional[Sequence[Tuple[str, TransformerMixin]]] = None
     ) -> str:
         if isinstance(self._ensemble, EnsembleClassifier):
             voter = "VotingClassifier"
@@ -108,9 +108,9 @@ def to_code(
 
         if isinstance(self._ensemble, EnsembleClassifier):
             if self._ensemble._metric.requires_probabilities:
-                voting = ",'soft'"
+                voting = ", voting='soft'"
             else:
-                voting = ", 'hard'"
+                voting = ", voting='hard'"
         else:
             voting = ""  # This parameter does not exist for VotingRegressor
 
@@ -122,7 +122,7 @@ def to_code(
             + "\n\n"
             + "\n\n".join(pipelines)
             + "\n"
-            + f"ensemble = {voter}([{estimators}]{voting},{weights})\n"
+            + f"ensemble = {voter}([{estimators}]{voting}, weights={weights})\n"
         )
         if preprocessing is not None:
             trans_strs = transformers_to_str([t for _, t in preprocessing])
@@ -137,7 +137,7 @@ def __init__(
         self,
         metric,
         y: pd.DataFrame,
-        evaluation_library: EvaluationLibrary = None,
+        evaluation_library: EvaluationLibrary,
         shrink_on_pickle=True,
         downsample_to: Optional[int] = 10_000,
         use_top_n_only: Optional[int] = 200,
@@ -166,11 +166,7 @@ def __init__(
                 "metric must be specified as string or `gama.ea.metrics.Metric`."
             )
 
-        if evaluation_library is None:
-            raise ValueError(
-                "`evaluation_library` is None but must be EvaluationLibrary."
-            )
-        elif not isinstance(evaluation_library, EvaluationLibrary):
+        if not isinstance(evaluation_library, EvaluationLibrary):
             raise TypeError(
                 "`evaluation_library` must be of type "
                 "gama.utilities.evaluation_library.EvaluationLibrary."
@@ -227,27 +223,27 @@ def _ensemble_validation_score(self, prediction_to_validate=None):
         raise NotImplementedError("Must be implemented by child class.")
 
     def _total_fit_weights(self):
-        return sum([weight for (model, weight) in self._fit_models])
+        return sum(weight for (model, weight) in self._fit_models)
 
     def _total_model_weights(self):
-        return sum([weight for (model, weight) in self._models.values()])
+        return sum(weight for (model, weight) in self._models.values())
 
     def _averaged_validation_predictions(self):
-        """ Weighted average of predictions of current models on the hillclimb set. """
+        """Weighted average of predictions of current models on the hillclimb set."""
         weighted_sum_predictions = sum(
-            [model.predictions * weight for (model, weight) in self._models.values()]
+            model.predictions * weight for (model, weight) in self._models.values()
         )
         return weighted_sum_predictions / self._total_model_weights()
 
     def build_initial_ensemble(self, n: int):
-        """ Add top n models in EvaluationLibrary to the ensemble.
+        """Add top n models in EvaluationLibrary to the ensemble.
 
         Parameters
         ----------
         n: int
             Number of models to include.
         """
-        if not n > 0:
+        if n <= 0:
             raise ValueError("Ensemble must include at least one model.")
         if self._models:
             log.warning(
@@ -261,27 +257,25 @@ def build_initial_ensemble(self, n: int):
             self._add_model(model)
 
         log.debug(
-            "Initial ensemble created with score {}".format(
-                self._ensemble_validation_score()
-            )
+            f"Initial ensemble created with score {self._ensemble_validation_score()}"
         )
 
     def _add_model(self, model, add_weight=1):
-        """ Add a specific model to the ensemble or increases its weight. """
+        """Add a specific model to the ensemble or increases its weight."""
         model, weight = self._models.pop(model.individual._id, (model, 0))
         new_weight = weight + add_weight
         self._models[model.individual._id] = (model, new_weight)
         log.debug(f"Weight {model.individual.short_name('>')} set to {new_weight}.")
 
     def expand_ensemble(self, n: int):
-        """ Adds new models to the ensemble based on earlier given data.
+        """Adds new models to the ensemble based on earlier given data.
 
         Parameters
         ----------
         n: int
             Number of models to add to current ensemble.
         """
-        if not n > 0:
+        if n <= 0:
             raise ValueError("n must be greater than 0.")
 
         for _ in range(n):
@@ -303,13 +297,12 @@ def expand_ensemble(self, n: int):
             self._add_model(best_addition)
             self._internal_score = best_addition_score
             log.info(
-                "Ensemble size {} , best score: {}".format(
-                    self._total_model_weights(), best_addition_score
-                )
+                f"Ensemble size {self._total_model_weights()}, "
+                f"best score: {best_addition_score}"
             )
 
     def fit(self, x, y, timeout=1e6):
-        """ Constructs an Ensemble out of the library of models.
+        """Constructs an Ensemble out of the library of models.
 
         Parameters
         ----------
@@ -356,7 +349,7 @@ def fit(self, x, y, timeout=1e6):
 
     def _get_weighted_mean_predictions(self, X, predict_method="predict"):
         weighted_predictions = []
-        for (model, weight) in self._fit_models:
+        for model, weight in self._fit_models:
             target_prediction = getattr(model, predict_method)(X)
             if self._prediction_transformation:
                 target_prediction = self._prediction_transformation(target_prediction)
@@ -390,7 +383,7 @@ def __getstate__(self):
 
 
 def fit_and_weight(args):
-    """ Fit the pipeline given the data. Update weight to 0 if fitting fails.
+    """Fit the pipeline given the data. Update weight to 0 if fitting fails.
 
     Parameters
     ----------
@@ -445,12 +438,11 @@ def _ensemble_validation_score(self, prediction_to_validate=None):
 
         if self._metric.requires_probabilities:
             return self._metric.maximizable_score(self._y, prediction_to_validate)
-        else:
-            # argmax returns (N, 1) matrix, need to squeeze it to (N,) for scoring.
-            class_predictions = self._one_hot_encoder.inverse_transform(
-                prediction_to_validate.toarray()
-            )
-            return self._metric.maximizable_score(self._y, class_predictions)
+        # argmax returns (N, 1) matrix, need to squeeze it to (N,) for scoring.
+        class_predictions = self._one_hot_encoder.inverse_transform(
+            prediction_to_validate.toarray()
+        )
+        return self._metric.maximizable_score(self._y, class_predictions)
 
     def predict(self, X):
         if self._metric.requires_probabilities:
@@ -475,12 +467,11 @@ def predict(self, X):
     def predict_proba(self, X):
         if self._metric.requires_probabilities:
             return self._get_weighted_mean_predictions(X, "predict_proba")
-        else:
-            log.warning(
-                "Ensemble was tuned with a class label predictions metric, "
-                "not probabilities. Using weighted mean of class predictions."
-            )
-            return self._get_weighted_mean_predictions(X, "predict").toarray()
+        log.warning(
+            "Ensemble was tuned with a class label predictions metric, "
+            "not probabilities. Using weighted mean of class predictions."
+        )
+        return self._get_weighted_mean_predictions(X, "predict").toarray()
 
 
 class EnsembleRegressor(Ensemble):
@@ -502,7 +493,7 @@ def build_fit_ensemble(
     evaluation_library: EvaluationLibrary,
     encoder: Optional[object] = None,
 ) -> Ensemble:
-    """ Construct an Ensemble of models, optimizing for metric. """
+    """Construct an Ensemble of models, optimizing for metric."""
     start_build = time.time()
 
     log.debug("Building ensemble.")
@@ -528,7 +519,7 @@ def build_fit_ensemble(
             ensemble.expand_ensemble(remainder)
 
         build_time = time.time() - start_build
-        timeout = timeout - build_time
+        timeout -= build_time
         log.info(f"Ensemble build took {build_time}s. Fit with timeout {timeout}s.")
         ensemble.fit(x, y, timeout)
     except Exception as e:
diff --git a/gama/search_methods/asha.py b/gama/search_methods/asha.py
index dc74e931..66ceb83a 100644
--- a/gama/search_methods/asha.py
+++ b/gama/search_methods/asha.py
@@ -1,7 +1,7 @@
 from functools import partial
 import logging
 import math
-from typing import List, Optional, Dict, Tuple, Any
+from typing import List, Optional, Dict, Tuple, Any, Union
 
 import pandas as pd
 import stopit
@@ -16,7 +16,7 @@
 
 
 class AsynchronousSuccessiveHalving(BaseSearch):
-    """ Asynchronous Halving Algorithm by Li et al.
+    """Asynchronous Halving Algorithm by Li et al.
 
     paper: https://arxiv.org/abs/1810.05934
 
@@ -24,43 +24,58 @@ class AsynchronousSuccessiveHalving(BaseSearch):
     ----------
     reduction_factor: int, optional (default=3)
         Reduction factor of candidates between each rung.
-    minimum_resource: int, optional (default=100)
+    minimum_resource: int or float, optional (default=0.125)
         Number of samples to use in the lowest rung.
-    maximum_resource: int, optional (default=number of samples in the dataset)
+        If integer, it specifies the number of rows.
+        If float, it specifies the fraction of the dataset.
+    maximum_resource: int or float optional (default=1.0)
         Number of samples to use in the top rung.
-        This should not exceed the number of samples in the data.
-    minimum_early_stopping_rate: int (default=1)
+        If integer, it specifies the number of rows.
+        If float, it specifies the fraction of the dataset.
+    minimum_early_stopping_rate: int (default=0)
         Number of lowest rungs to skip.
     """
 
     def __init__(
         self,
         reduction_factor: Optional[int] = None,
-        minimum_resource: Optional[int] = None,
-        maximum_resource: Optional[int] = None,
+        minimum_resource: Optional[Tuple[int, float]] = None,
+        maximum_resource: Optional[Tuple[int, float]] = None,
         minimum_early_stopping_rate: Optional[int] = None,
     ):
         super().__init__()
         # maps hyperparameter -> (set value, default)
         self._hyperparameters: Dict[str, Tuple[Any, Any]] = dict(
             reduction_factor=(reduction_factor, 3),
-            minimum_resource=(minimum_resource, 100),
-            maximum_resource=(maximum_resource, 100_000),
-            minimum_early_stopping_rate=(minimum_early_stopping_rate, 1),
+            minimum_resource=(minimum_resource, 0.125),
+            maximum_resource=(maximum_resource, 1.0),
+            minimum_early_stopping_rate=(minimum_early_stopping_rate, 0),
         )
         self.output = []
 
         self.logger = partial(
             EvaluationLogger,
-            extra_fields=dict(rung=lambda e: e.individual.meta.get("rung", "unknown")),
+            extra_fields=dict(
+                rung=lambda e: e.individual.meta.get("rung", "unknown"),
+                subsample=lambda e: e.individual.meta.get("subsample", "unknown"),
+            ),
         )
 
-    def dynamic_defaults(self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float):
-        # `maximum_resource` is the number of samples used in the highest rung.
-        # this typically should be the number of samples in the (training) dataset.
-        self._overwrite_hyperparameter_default("maximum_resource", len(y))
-
-    def search(self, operations: OperatorSet, start_candidates: List[Individual]):
+    def dynamic_defaults(
+        self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float
+    ) -> None:
+        set_max, default = self._hyperparameters["maximum_resource"]
+        if set_max is not None and len(y) < set_max:
+            # todo: take into account the evaluation procedure as well.
+            logging.warning(
+                f"`maximum_resource` was set to {set_max}, but the dataset only"
+                f"contains {len(y)} samples. Reverting to default (1.0) instead."
+            )
+            self._hyperparameters["maximum_resource"] = (None, default)
+
+    def search(
+        self, operations: OperatorSet, start_candidates: List[Individual]
+    ) -> None:
         self.output = asha(
             operations, start_candidates=start_candidates, **self.hyperparameters
         )
@@ -70,12 +85,12 @@ def asha(
     operations: OperatorSet,
     start_candidates: List[Individual],
     reduction_factor: int = 3,
-    minimum_resource: int = 100,
-    maximum_resource: int = 100_000,
-    minimum_early_stopping_rate: int = 1,
+    minimum_resource: Union[int, float] = 0.125,
+    maximum_resource: Union[int, float] = 1.0,
+    minimum_early_stopping_rate: int = 0,
     max_full_evaluations: Optional[int] = None,
 ) -> List[Individual]:
-    """ Asynchronous Halving Algorithm by Li et al.
+    """Asynchronous Halving Algorithm by Li et al.
 
     paper: https://arxiv.org/abs/1810.05934
 
@@ -87,11 +102,14 @@ def asha(
         A list which contains the set of best found individuals during search.
     reduction_factor: int (default=3)
         Reduction factor of candidates between each rung.
-    minimum_resource: int (default=100)
+    minimum_resource: int or float, optional (default=0.125)
         Number of samples to use in the lowest rung.
-    maximum_resource: int (default=100_000)
+        If integer, it specifies the number of rows.
+        If float, it specifies the fraction of the dataset.
+    maximum_resource: int or float optional (default=1.0)
         Number of samples to use in the top rung.
-        This should not exceed the number of samples in the data.
+        If integer, it specifies the number of rows.
+        If float, it specifies the fraction of the dataset.
     minimum_early_stopping_rate: int (default=1)
         Number of lowest rungs to skip.
     max_full_evaluations: Optional[int] (default=None)
@@ -104,6 +122,8 @@ def asha(
         Individuals of the highest rung in which
         at least one individual has been evaluated.
     """
+    if not isinstance(minimum_resource, type(maximum_resource)):
+        raise ValueError("Currently minimum and maximum resource must same type.")
 
     # Note that here we index the rungs by all possible rungs (0..ceil(log_eta(R/r))),
     # and ignore the first minimum_early_stopping_rate rungs.
@@ -113,7 +133,7 @@ def asha(
     )
     rungs = range(minimum_early_stopping_rate, max_rung + 1)
     rung_resources = {
-        rung: min(minimum_resource * (reduction_factor ** rung), maximum_resource)
+        rung: min(minimum_resource * (reduction_factor**rung), maximum_resource)
         for rung in rungs
     }
     evaluate = partial(
@@ -141,10 +161,9 @@ def get_job():
                     promoted_individuals[rung].append(to_promote)
                     return to_promote[1], rung + 1
 
-        if start_candidates is not None and len(start_candidates) > 0:
+        if start_candidates:
             return start_candidates.pop(), minimum_early_stopping_rate
-        else:
-            return operations.individual(), minimum_early_stopping_rate
+        return operations.individual(), minimum_early_stopping_rate
 
     try:
         with AsyncEvaluator() as async_:
@@ -191,6 +210,7 @@ def start_new_job():
 def evaluate_on_rung(individual, rung, max_rung, evaluate_individual, *args, **kwargs):
     evaluation = evaluate_individual(individual, *args, **kwargs)
     evaluation.individual.meta["rung"] = rung
+    evaluation.individual.meta["subsample"] = kwargs.get("subsample")
     # We want to avoid saving evaluations that are not on the max rung to disk,
     # because we only want to use pipelines evaluated on the max rung after search.
     # We're working on a better way to relay this information, this is temporary.
diff --git a/gama/search_methods/async_ea.py b/gama/search_methods/async_ea.py
index b4f5792b..b7d6bd6e 100644
--- a/gama/search_methods/async_ea.py
+++ b/gama/search_methods/async_ea.py
@@ -14,7 +14,7 @@
 
 
 class AsyncEA(BaseSearch):
-    """ Perform asynchronous evolutionary optimization.
+    """Perform asynchronous evolutionary optimization.
 
     Parameters
     ----------
@@ -45,7 +45,7 @@ def __init__(
         self.output = []
 
         def get_parent(evaluation, n) -> str:
-            """ retrieves the nth parent if it exists, '' otherwise. """
+            """retrieves the nth parent if it exists, '' otherwise."""
             if len(evaluation.individual.meta.get("parents", [])) > n:
                 return evaluation.individual.meta["parents"][n]
             return ""
@@ -59,10 +59,14 @@ def get_parent(evaluation, n) -> str:
             ),
         )
 
-    def dynamic_defaults(self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float):
+    def dynamic_defaults(
+        self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float
+    ) -> None:
         pass
 
-    def search(self, operations: OperatorSet, start_candidates: List[Individual]):
+    def search(
+        self, operations: OperatorSet, start_candidates: List[Individual]
+    ) -> None:
         self.output = async_ea(
             operations, self.output, start_candidates, **self.hyperparameters
         )
@@ -76,7 +80,7 @@ def async_ea(
     max_n_evaluations: Optional[int] = None,
     population_size: int = 50,
 ) -> List[Individual]:
-    """ Perform asynchronous evolutionary optimization with given operators.
+    """Perform asynchronous evolutionary optimization with given operators.
 
     Parameters
     ----------
@@ -122,14 +126,19 @@ def async_ea(
                 n_evaluated_individuals < max_n_evaluations
             ):
                 future = ops.wait_next(async_)
-                if future.exception is None:
-                    individual = future.result.individual
-                    current_population.append(individual)
+                if future.exception is None and future.result.error is None:
+                    current_population.append(future.result.individual)
                     if len(current_population) > max_pop_size:
                         to_remove = ops.eliminate(current_population, 1)
                         current_population.remove(to_remove[0])
 
-                if len(current_population) > 2:
+                if async_.job_queue_size <= 1:
+                    # Technically 0 should work to keep near-100% worker load,
+                    # especially if the dataset is sufficiently large to require
+                    # significant time to evaluate a pipeline.
+                    # Increasing the number decreases the risk of lost compute time,
+                    # but also increases information lag. An offspring created too
+                    # early might miss out on a better parent.
                     new_individual = ops.create(current_population, 1)[0]
                     async_.submit(ops.evaluate, new_individual)
 
diff --git a/gama/search_methods/base_search.py b/gama/search_methods/base_search.py
index 002b79d9..95e4616d 100644
--- a/gama/search_methods/base_search.py
+++ b/gama/search_methods/base_search.py
@@ -9,17 +9,17 @@
 
 
 class BaseSearch(ABC):
-    """ All search methods should be derived from this class.
+    """All search methods should be derived from this class.
     This class should not be directly used to configure GAMA.
     """
 
     def __init__(self):
         # hyperparameters can be used to safe/process search hyperparameters
-        self._hyperparameters: Dict[str, Tuple[Any, Any]] = dict()
+        self._hyperparameters: Dict[str, Tuple[Any, Any]] = {}
         self.output: List[Individual] = []
         self.logger = EvaluationLogger
 
-    def __str__(self):
+    def __str__(self) -> str:
         # Not sure if I should report actual used hyperparameters (i.e. include default)
         # or only those set by user.
         user_set_hps = {
@@ -34,14 +34,14 @@ def __str__(self):
 
     @property
     def hyperparameters(self) -> Dict[str, Any]:
-        """ Hyperparameter (name, value) pairs as set/determined dynamically/default.
-
-         Values may have been set directly, through dynamic defaults or static defaults.
-         This is also the order in which the value of a hyperparameter is checked,
-         i.e. a user set value wil overwrite any other value, and a dynamic default
-         will overwrite a static one.
-         Dynamic default values only considered if `dynamic_defaults` has been called.
-         """
+        """Hyperparameter (name, value) pairs as set/determined dynamically/default.
+
+        Values may have been set directly, through dynamic defaults or static defaults.
+        This is also the order in which the value of a hyperparameter is checked,
+        i.e. a user set value wil overwrite any other value, and a dynamic default
+        will overwrite a static one.
+        Dynamic default values only considered if `dynamic_defaults` has been called.
+        """
         return {
             parameter: set_value if set_value is not None else default
             for parameter, (set_value, default) in self._hyperparameters.items()
@@ -54,7 +54,7 @@ def _overwrite_hyperparameter_default(self, hyperparameter: str, value: Any):
     def dynamic_defaults(
         self, x: pd.DataFrame, y: Union[pd.DataFrame, pd.Series], time_limit: float
     ) -> None:
-        """ Set hyperparameter defaults based on the dataset and time-constraints.
+        """Set hyperparameter defaults based on the dataset and time-constraints.
 
         Should be called before `search`.
 
@@ -74,7 +74,7 @@ def dynamic_defaults(
         raise NotImplementedError("Must be implemented by child class.")
 
     def search(self, operations: OperatorSet, start_candidates: List[Individual]):
-        """ Execute search as configured.
+        """Execute search as configured.
 
         Sets `output` field of this class to the best Individuals.
 
@@ -91,7 +91,7 @@ def search(self, operations: OperatorSet, start_candidates: List[Individual]):
 def _check_base_search_hyperparameters(
     toolbox, output: List[Individual], start_candidates: List[Individual]
 ) -> None:
-    """ Checks that search hyperparameters are valid.
+    """Checks that search hyperparameters are valid.
 
     :param toolbox:
     :param output:
@@ -103,4 +103,4 @@ def _check_base_search_hyperparameters(
             f"'start_population' must be a list but was {type(start_candidates)}"
         )
     if not all(isinstance(x, Individual) for x in start_candidates):
-        raise TypeError(f"Each element in 'start_population' must be Individual.")
+        raise TypeError("Each element in 'start_population' must be Individual.")
diff --git a/gama/search_methods/random_search.py b/gama/search_methods/random_search.py
index 143a02c7..ddf5ff60 100644
--- a/gama/search_methods/random_search.py
+++ b/gama/search_methods/random_search.py
@@ -15,12 +15,16 @@
 
 
 class RandomSearch(BaseSearch):
-    """ Perform random search over all possible pipelines. """
+    """Perform random search over all possible pipelines."""
 
-    def dynamic_defaults(self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float):
+    def dynamic_defaults(
+        self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float
+    ) -> None:
         pass
 
-    def search(self, operations: OperatorSet, start_candidates: List[Individual]):
+    def search(
+        self, operations: OperatorSet, start_candidates: List[Individual]
+    ) -> None:
         random_search(operations, self.output, start_candidates)
 
 
@@ -30,7 +34,7 @@ def random_search(
     start_candidates: List[Individual],
     max_evaluations: Optional[int] = None,
 ) -> List[Individual]:
-    """ Perform random search over all possible pipelines.
+    """Perform random search over all possible pipelines.
 
     Parameters
     ----------
diff --git a/gama/utilities/cli.py b/gama/utilities/cli.py
index 468b86c0..9612a622 100644
--- a/gama/utilities/cli.py
+++ b/gama/utilities/cli.py
@@ -2,14 +2,16 @@
 import logging
 import os
 import pickle
+from typing import List, Union
 
 from pandas.api.types import is_categorical_dtype
 
 from gama import GamaClassifier, GamaRegressor
 from gama.data_loading import X_y_from_file
+from gama.gama import Gama
 
 
-def parse_args():
+def make_parser():
     desc = "An AutoML tool that optimizes machine learning pipelines for your data."
     parser = argparse.ArgumentParser(description=desc)
 
@@ -124,11 +126,16 @@ def parse_args():
         help="If True, execute without calling fit or exports.",
     )
 
-    return parser.parse_args()
+    return parser
 
 
-def main():
-    args = parse_args()
+def main(command: Union[str, List[str]] = ""):
+    parser = make_parser()
+
+    if isinstance(command, str):
+        command = command.split()
+
+    args = parser.parse_args(command) if command else parser.parse_args()
 
     print("CLI: Processing input")
     if not os.path.exists(args.input_file.lower()):
@@ -141,13 +148,12 @@ def main():
         kwargs["sep"] = args.seperator
 
     x, y = X_y_from_file(
-        file_path=args.input_file.lower(), split_column=args.target, **kwargs,
+        file_path=args.input_file.lower(),
+        split_column=args.target,
+        **kwargs,
     )
     if args.mode is None:
-        if is_categorical_dtype(y.dtype):
-            args.mode = "classification"
-        else:
-            args.mode = "regression"
+        args.mode = "classification" if is_categorical_dtype(y.dtype) else "regression"
         print(f"Detected a {args.mode} problem.")
 
     print("CLI: Initializing GAMA")
@@ -165,7 +171,7 @@ def main():
         configuration["scoring"] = args.metric
 
     if args.mode == "regression":
-        automl = GamaRegressor(**configuration)
+        automl: Gama = GamaRegressor(**configuration)
     elif args.mode == "classification":
         automl = GamaClassifier(**configuration)
     else:
diff --git a/gama/utilities/evaluation_library.py b/gama/utilities/evaluation_library.py
index d8df96fe..ad9ce85f 100644
--- a/gama/utilities/evaluation_library.py
+++ b/gama/utilities/evaluation_library.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pandas as pd
+from sklearn.base import BaseEstimator
 from sklearn.model_selection import StratifiedShuffleSplit
 
 from gama.genetic_programming.components import Individual
@@ -15,7 +16,7 @@
 
 
 class Evaluation:
-    """ Record relevant evaluation data of an individual. """
+    """Record relevant evaluation data of an individual."""
 
     def __init__(
         self,
@@ -25,49 +26,49 @@ def __init__(
         estimators: Optional[List] = None,
         start_time: Optional[datetime.datetime] = None,
         duration: float = -1,
-        error: str = None,
+        error: Optional[str] = None,
         pid: Optional[int] = None,
     ):
         self.individual: Individual = individual
         self.score = score
-        self._estimators: Optional[List] = [] if estimators is None else estimators
+        self._estimators: List[BaseEstimator] = [] if estimators is None else estimators
         self.start_time = start_time
         self.duration = duration
         self.error = error
         self.pid = pid
-        self._cache_file = None
+        self._cache_file = ""
 
         if isinstance(predictions, (pd.Series, pd.DataFrame)):
             predictions = predictions.values
         self._predictions: Optional[np.ndarray] = predictions
 
-    def to_disk(self, directory):
-        self._cache_file = os.path.join(directory, str(self.individual._id) + ".pkl")
+    def to_disk(self, directory: str) -> None:
+        """Save Evaluation in the provided directory."""
+        self._cache_file = os.path.join(directory, f"{str(self.individual._id)}.pkl")
         with open(self._cache_file, "wb") as fh:
             pickle.dump((self._estimators, self._predictions), fh)
         self._estimators, self._predictions = [], None
 
-    def remove_from_disk(self):
+    def remove_from_disk(self) -> None:
+        """Remove the related file from disk."""
         os.remove(os.path.join(self._cache_file))
-        self._cache_file = None
+        self._cache_file = ""
 
     @property
-    def estimators(self):
+    def estimators(self) -> List[BaseEstimator]:
         if self._estimators or not self._cache_file:
             return self._estimators
-        else:
-            with open(self._cache_file, "rb") as fh:
-                estimators, _ = pickle.load(fh)
-                return estimators
+        with open(self._cache_file, "rb") as fh:
+            estimators, _ = pickle.load(fh)
+            return estimators
 
     @property
     def predictions(self):
         if self._predictions is not None or not self._cache_file:
             return self._predictions
-        else:
-            with open(self._cache_file, "rb") as fh:
-                _, predictions = pickle.load(fh)
-                return predictions
+        with open(self._cache_file, "rb") as fh:
+            _, predictions = pickle.load(fh)
+            return predictions
 
     # Is there a better way to do this?
     # Assignment in __init__ is not preferred even if it saves lines.
@@ -91,7 +92,7 @@ def __ge__(self, other):
 
 
 class EvaluationLibrary:
-    """ Maintains an in-memory record of evaluations.
+    """Maintains an in-memory record of evaluations.
 
     The main function of the EvaluationLibrary is to maintain a fast lookup for
     the best evaluations, and to discard meta-data of Evaluations which are not
@@ -116,7 +117,7 @@ def __init__(
         sample: Optional[np.ndarray] = None,
         cache: str = "cache",
     ):
-        """ Create an EvaluationLibrary for in-memory record of evaluations.
+        """Create an EvaluationLibrary for in-memory record of evaluations.
 
         Parameters
         ----------
@@ -170,7 +171,7 @@ def determine_sample_indices(
         prediction_size: Optional[int] = None,
         stratify: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]] = None,
     ) -> None:
-        """ Set `self._sample` to an array for sampling predictions or `None`.
+        """Set `self._sample` to an array for sampling predictions or `None`.
 
         The sample indices can be class stratified if `stratify` is set.
         If `prediction_size` or `len(stratify)` is smaller than `n`,
@@ -195,26 +196,23 @@ def determine_sample_indices(
             log.warning("New subsample not used for already stored evaluations.")
         n = self._sample_n if n is None else n
 
-        if n is not None:
-            if prediction_size is not None and n < prediction_size:
-                # Subsample is to be chosen uniformly random.
-                self._sample = np.random.choice(
-                    range(prediction_size), size=n, replace=False
-                )
-            elif stratify is not None and n < len(stratify):
-                splitter = StratifiedShuffleSplit(n_splits=1, train_size=n)
-                self._sample, _ = next(
-                    splitter.split(np.zeros(len(stratify)), stratify)
-                )
-            else:
-                # Specified sample size exceeds size of predictions
-                self._sample = None
-        else:
+        if n is None:
             # No n was provided here nor set on initialization
             self._sample = None
+        elif prediction_size is not None and n < prediction_size:
+            # Subsample is to be chosen uniformly random.
+            self._sample = np.random.choice(
+                range(prediction_size), size=n, replace=False
+            )
+        elif stratify is not None and n < len(stratify):
+            splitter = StratifiedShuffleSplit(n_splits=1, train_size=n)
+            self._sample, _ = next(splitter.split(np.zeros(len(stratify)), stratify))
+        else:
+            # Specified sample size exceeds size of predictions
+            self._sample = None
 
-    def _process_predictions(self, evaluation: Evaluation):
-        """ Downsample evaluation predictions if required. """
+    def _process_predictions(self, evaluation: Evaluation) -> None:
+        """Downsample evaluation predictions if required."""
         if self._sample_n == 0:
             evaluation._predictions = None
         if evaluation.predictions is None:
@@ -231,7 +229,7 @@ def save_evaluation(self, evaluation: Evaluation) -> None:
         self._process_predictions(evaluation)
 
         if evaluation.error is not None:
-            evaluation._estimators, evaluation._predictions = None, None
+            evaluation._estimators, evaluation._predictions = [], None
             self.other_evaluations.append(evaluation)
         elif self._m is None or self._m > len(self.top_evaluations):
             evaluation.to_disk(self._cache)
@@ -240,7 +238,7 @@ def save_evaluation(self, evaluation: Evaluation) -> None:
             removed = heapq.heappushpop(self.top_evaluations, evaluation)
             if removed == evaluation:
                 # new evaluation is not in heap, big memory items may be discarded
-                removed._predictions, removed._estimators = None, None
+                removed._estimators, removed._predictions = [], None
             else:
                 # new evaluation is now on the heap, remove old from disk
                 evaluation.to_disk(self._cache)
@@ -250,13 +248,13 @@ def save_evaluation(self, evaluation: Evaluation) -> None:
 
         self.lookup[self._lookup_key(evaluation)] = evaluation
 
-    def clear_cache(self):
+    def clear_cache(self) -> None:
         for file in os.listdir(self._cache):
             os.remove(os.path.join(self._cache, file))
         os.rmdir(self._cache)
 
     def n_best(self, n: int = 5, with_pipelines=True) -> List[Evaluation]:
-        """ Return the best `n` pipelines.
+        """Return the best `n` pipelines.
 
         Slower if `n` exceeds `m` given on initialization.
         """
diff --git a/gama/utilities/export.py b/gama/utilities/export.py
index c5072c8d..8c92e03e 100644
--- a/gama/utilities/export.py
+++ b/gama/utilities/export.py
@@ -1,21 +1,21 @@
 import copy
-from typing import Tuple, List, Set
+from typing import Tuple, List, Set, Optional
 
 from sklearn.base import TransformerMixin
 from gama.genetic_programming.components import Individual
 
 
 def transformers_to_str(transformers: List[TransformerMixin]) -> List[str]:
-    """ Format a transformer for code export, removes any mapping. """
+    """Format a transformer for code export, removes any mapping."""
     copies = list(map(copy.copy, transformers))
     for transformer in copies:
         if hasattr(transformer, "mapping"):
-            transformer.mapping = None  # type: ignore  # ignore no attr 'mapping'
+            transformer.mapping = None
     return list(map(str, copies))
 
 
 def format_import(o: object) -> str:
-    """ Creates the import statement for `o`'s class. """
+    """Creates the import statement for `o`'s class."""
     if o.__module__.split(".")[-1].startswith("_"):
         module = ".".join(o.__module__.split(".")[:-1])
     else:
@@ -23,7 +23,7 @@ def format_import(o: object) -> str:
     return f"from {module} import {o.__class__.__name__}"
 
 
-def format_pipeline(steps: List[Tuple[str, str]], name: str = "pipeline"):
+def format_pipeline(steps: List[Tuple[str, str]], name: str = "pipeline") -> str:
     steps_str = ",\n".join([f"('{name}', {step})" for name, step in steps])
     return f"{name} = Pipeline([{steps_str}])\n"
 
@@ -31,7 +31,7 @@ def format_pipeline(steps: List[Tuple[str, str]], name: str = "pipeline"):
 def imports_and_steps_for_individual(
     individual: Individual,
 ) -> Tuple[Set[str], List[Tuple[str, str]]]:
-    """ Determine required imports and steps for the individual's pipeline.
+    """Determine required imports and steps for the individual's pipeline.
 
     Returns two lists:
      - one with import statements
@@ -45,25 +45,23 @@ def imports_and_steps_for_individual(
     steps = []
     for i, primitive_node in reversed(list(enumerate(individual.primitives))):
         steps.append((str(i), primitive_node.str_nonrecursive))
-        for terminal in primitive_node._terminals:
-            if callable(terminal.value) and hasattr(terminal.value, "__name__"):
-                imports.append(
-                    f"from {terminal.value.__module__} import {terminal.value.__name__}"  # type: ignore # noqa: E501
-                )
-
+        imports.extend(
+            f"from {terminal.value.__module__} import {terminal.value.__name__}"
+            for terminal in primitive_node._terminals
+            if callable(terminal.value) and hasattr(terminal.value, "__name__")
+        )
     return set(imports), steps
 
 
 def individual_to_python(
-    individual: Individual, prepend_steps: List[Tuple[str, TransformerMixin]] = None
+    individual: Individual,
+    prepend_steps: Optional[List[Tuple[str, TransformerMixin]]] = None,
 ) -> str:
-    """ Generate code for the machine learning pipeline represented by `individual`. """
+    """Generate code for the machine learning pipeline represented by `individual`."""
     imports, steps = imports_and_steps_for_individual(individual)
     if prepend_steps is not None:
         steps = prepend_steps + steps
         imports = imports.union({format_import(step) for _, step in prepend_steps})
     steps_str = ",\n".join([f"('{name}', {step})" for name, step in steps])
     pipeline = f"Pipeline([{steps_str}])"
-    script = "\n".join(sorted(imports)) + "\n\n" + "pipeline = " + pipeline + "\n"
-
-    return script
+    return "\n".join(sorted(imports)) + "\n\n" + "pipeline = " + pipeline + "\n"
diff --git a/gama/utilities/generic/async_evaluator.py b/gama/utilities/generic/async_evaluator.py
index 9b577afd..5470f0d6 100644
--- a/gama/utilities/generic/async_evaluator.py
+++ b/gama/utilities/generic/async_evaluator.py
@@ -14,6 +14,8 @@
       Though that does not hinder the execution of the program,
       I don't want errors for expected behavior.
 """
+
+import contextlib
 import datetime
 import gc
 import logging
@@ -29,17 +31,12 @@
 
 from psutil import NoSuchProcess
 
-try:
-    import resource
-except ModuleNotFoundError:
-    resource = None  # type: ignore
-
 
 log = logging.getLogger(__name__)
 
 
 class AsyncFuture:
-    """ Reference to a function call executed on a different process. """
+    """Reference to a function call executed on a different process."""
 
     def __init__(self, fn, *args, **kwargs):
         self.id = uuid.uuid4()
@@ -51,7 +48,7 @@ def __init__(self, fn, *args, **kwargs):
         self.traceback = None
 
     def execute(self, extra_kwargs):
-        """ Execute the function call `fn(*args, **kwargs)` and record results. """
+        """Execute the function call `fn(*args, **kwargs)` and record results."""
         try:
             # Don't update self.kwargs, as it will be pickled back to the main process
             kwargs = {**self.kwargs, **extra_kwargs}
@@ -62,7 +59,7 @@ def execute(self, extra_kwargs):
 
 
 class AsyncEvaluator:
-    """ Manages subprocesses on which arbitrary functions can be evaluated.
+    """Manages subprocesses on which arbitrary functions can be evaluated.
 
     The function and all its arguments must be picklable.
     Using the same AsyncEvaluator in two different contexts raises a `RuntimeError`.
@@ -78,7 +75,7 @@ class AsyncEvaluator:
 
     def __init__(
         self,
-        n_workers: Optional[int] = None,
+        n_workers: int = 1,
         memory_limit_mb: Optional[int] = None,
         logfile: Optional[str] = None,
         wait_time_before_forced_shutdown: int = 10,
@@ -86,9 +83,8 @@ def __init__(
         """
         Parameters
         ----------
-        n_workers : int, optional (default=None)
+        n_workers : int (default=1)
             Maximum number of subprocesses to run for parallel evaluations.
-            Defaults to `AsyncEvaluator.n_jobs`, using all cores unless overwritten.
         memory_limit_mb : int, optional (default=None)
             The maximum number of megabytes that this process and its subprocesses
             may use in total. If None, no limit is enforced.
@@ -109,9 +105,17 @@ def __init__(
         self._logfile = logfile
         self._wait_time_before_forced_shutdown = wait_time_before_forced_shutdown
 
+        # queue.qsize() may raise an error on Unix-like,
+        # more accurate results may be obtained by using a multiprocessing.Value
+        # but it adds a point of failure and I hope to replace this async
+        # module soon, so we use this approximation.
+        # Since we assume all workers will take up a job, we start below 0:
+        self.job_queue_size = -n_workers
+
         self._input: multiprocessing.Queue = multiprocessing.Queue()
         self._output: multiprocessing.Queue = multiprocessing.Queue()
         self._command: multiprocessing.Queue = multiprocessing.Queue()
+
         pid = os.getpid()
         self._main_process = psutil.Process(pid)
 
@@ -139,7 +143,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         for _ in self._processes:
             self._command.put("stop")
 
-        for i in range(self._wait_time_before_forced_shutdown + 1):
+        for _ in range(self._wait_time_before_forced_shutdown + 1):
             if self._command.empty():
                 break
             time.sleep(1)
@@ -147,6 +151,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         self.clear_queue(self._input)
         self.clear_queue(self._output)
         self.clear_queue(self._command)
+        self.job_queue_size = -self._n_jobs
 
         # Even processes which 'stop' need to be 'waited',
         # otherwise they become zombie processes.
@@ -154,19 +159,18 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             try:
                 self._stop_worker_process(self._processes[0])
             except psutil.NoSuchProcess:
-                pass
+                self.job_queue_size -= 1
+                self._processes.remove(self._processes[0])
         return False
 
-    def clear_queue(self, queue: multiprocessing.Queue):
-        while not queue.empty():
-            try:
-                queue.get(timeout=0.001)
-            except:
-                pass
-        queue.close()
+    def clear_queue(self, q: multiprocessing.Queue):
+        while not q.empty():
+            with contextlib.suppress(queue.Empty):
+                q.get(timeout=0.001)
+        q.close()
 
     def submit(self, fn: Callable, *args, **kwargs) -> AsyncFuture:
-        """ Submit fn(*args, **kwargs) to be evaluated on a subprocess.
+        """Submit fn(*args, **kwargs) to be evaluated on a subprocess.
 
         Parameters
         ----------
@@ -186,10 +190,11 @@ def submit(self, fn: Callable, *args, **kwargs) -> AsyncFuture:
         future = AsyncFuture(fn, *args, **kwargs)
         self.futures[future.id] = future
         self._input.put(future)
+        self.job_queue_size += 1
         return future
 
     def wait_next(self, poll_time: float = 0.05) -> AsyncFuture:
-        """ Wait until an AsyncFuture has been completed and return it.
+        """Wait until an AsyncFuture has been completed and return it.
 
 
         Parameters
@@ -209,13 +214,13 @@ def wait_next(self, poll_time: float = 0.05) -> AsyncFuture:
         """
         if len(self.futures) == 0:
             raise RuntimeError("No Futures queued, must call `submit` first.")
-
         while True:
             self._control_memory_usage()
             self._log_memory_usage()
 
             try:
                 completed_future = self._output.get(block=False)
+                self.job_queue_size -= 1
             except queue.Empty:
                 time.sleep(poll_time)
                 continue
@@ -230,7 +235,7 @@ def wait_next(self, poll_time: float = 0.05) -> AsyncFuture:
             return match
 
     def _start_worker_process(self) -> psutil.Process:
-        """ Start a new worker node and add it to the process pool. """
+        """Start a new worker node and add it to the process pool."""
         mp_process = multiprocessing.Process(
             target=evaluator_daemon,
             args=(self._input, self._output, self._command, AsyncEvaluator.defaults),
@@ -242,13 +247,14 @@ def _start_worker_process(self) -> psutil.Process:
         return subprocess
 
     def _stop_worker_process(self, process: psutil.Process):
-        """ Terminate a new worker node and remove it from the process pool. """
+        """Terminate a new worker node and remove it from the process pool."""
         process.terminate()
         process.wait(timeout=60)
+        self.job_queue_size -= 1
         self._processes.remove(process)
 
     def _control_memory_usage(self, threshold=0.05):
-        """ Dynamically restarts or kills processes to adhere to memory constraints. """
+        """Dynamically restarts or kills processes to adhere to memory constraints."""
         if self._memory_limit_mb is None:
             return
         # If the memory usage of all processes (the main process, and the evaluation
@@ -310,7 +316,7 @@ def _get_memory_usage(self):
         processes = [self._main_process] + self._processes
         for process in processes:
             try:
-                yield process, process.memory_info()[0] / (2 ** 20)
+                yield process, process.memory_info()[0] / (2**20)
             except NoSuchProcess:
                 # can never be the main process anyway
                 self._processes = [p for p in self._processes if p.pid != process.pid]
@@ -323,7 +329,7 @@ def evaluator_daemon(
     command_queue: queue.Queue,
     default_parameters: Optional[Dict] = None,
 ):
-    """ Function for daemon subprocess that evaluates functions from AsyncFutures.
+    """Function for daemon subprocess that evaluates functions from AsyncFutures.
 
     Parameters
     ----------
@@ -342,12 +348,9 @@ def evaluator_daemon(
     """
     try:
         while True:
-            try:
+            with contextlib.suppress(queue.Empty):
                 command_queue.get(block=False)
                 break
-            except queue.Empty:
-                pass
-
             try:
                 future = input_queue.get(block=False)
                 future.execute(default_parameters)
diff --git a/gama/utilities/generic/paretofront.py b/gama/utilities/generic/paretofront.py
index 546eafd6..814cb5b1 100644
--- a/gama/utilities/generic/paretofront.py
+++ b/gama/utilities/generic/paretofront.py
@@ -1,9 +1,9 @@
-from collections import Sequence
+from collections.abc import Sequence
 from typing import Tuple, List, Optional, Callable, Any
 
 
 class ParetoFront(Sequence):
-    """ A list of tuples in which no one tuple is dominated by another. """
+    """A list of tuples in which no one tuple is dominated by another."""
 
     def __init__(
         self,
@@ -30,13 +30,10 @@ def __init__(
         self._iterator_index = 0
 
     def _get_item_value(self, item):
-        if self._get_values_fn is not None:
-            return self._get_values_fn(item)
-        else:
-            return item
+        return self._get_values_fn(item) if self._get_values_fn is not None else item
 
     def update(self, new_item: Any):
-        """ Update the Pareto front with new_item if it qualifies.
+        """Update the Pareto front with new_item if it qualifies.
 
         Parameters
         ----------
@@ -83,7 +80,7 @@ def update(self, new_item: Any):
         return True
 
     def clear(self):
-        """ Removes all items from the Pareto front."""
+        """Removes all items from the Pareto front."""
         self._front = []
 
     def __len__(self):
diff --git a/gama/utilities/generic/stopwatch.py b/gama/utilities/generic/stopwatch.py
index 69800b88..0154d2d3 100644
--- a/gama/utilities/generic/stopwatch.py
+++ b/gama/utilities/generic/stopwatch.py
@@ -2,7 +2,7 @@
 
 
 class Stopwatch:
-    """ A context manager that keeps track of wall clock time spent. """
+    """A context manager that keeps track of wall clock time spent."""
 
     def __init__(self, timing_function=time.time):
         """
@@ -29,7 +29,7 @@ def __exit__(self, *args):
 
     @property
     def elapsed_time(self):
-        """ Time spent in seconds during with-statement (so far, if not yet exited). """
+        """Time spent in seconds during with-statement (so far, if not yet exited)."""
         if self._is_running:
             return self._get_time() - self._start
         else:
diff --git a/gama/utilities/generic/timekeeper.py b/gama/utilities/generic/timekeeper.py
index 0ae09cc1..7253c8b2 100644
--- a/gama/utilities/generic/timekeeper.py
+++ b/gama/utilities/generic/timekeeper.py
@@ -14,21 +14,21 @@ class Activity(NamedTuple):
 
     @property
     def time_left(self) -> float:
-        """ Time left in seconds.
+        """Time left in seconds.
 
         Raises a TypeError if `time_limit` was not specified.
         """
         return self.time_limit - self.stopwatch.elapsed_time
 
     def exceeded_limit(self, margin: float = 0.0) -> float:
-        """ True iff a limit was specified and it is exceeded by `margin` seconds. """
+        """True iff a limit was specified and it is exceeded by `margin` seconds."""
         if self.time_limit is not None:
             return self.time_limit - self.stopwatch.elapsed_time < -margin
         return False
 
 
 class TimeKeeper:
-    """ Simple object that helps keep track of time over multiple activities. """
+    """Simple object that helps keep track of time over multiple activities."""
 
     def __init__(self, total_time: Optional[int] = None):
         """
@@ -45,7 +45,7 @@ def __init__(self, total_time: Optional[int] = None):
 
     @property
     def total_time_remaining(self) -> float:
-        """ Return time remaining in seconds. """
+        """Return time remaining in seconds."""
         if self.total_time is not None:
             return self.total_time - sum(
                 map(lambda a: a.stopwatch.elapsed_time, self.activities)
@@ -56,7 +56,7 @@ def total_time_remaining(self) -> float:
 
     @property
     def current_activity_time_elapsed(self) -> float:
-        """ Return elapsed time in seconds of current activity.
+        """Return elapsed time in seconds of current activity.
 
         Raise RuntimeError if no current activity.
         """
@@ -67,7 +67,7 @@ def current_activity_time_elapsed(self) -> float:
 
     @property
     def current_activity_time_left(self) -> float:
-        """ Return time left in seconds of current activity.
+        """Return time left in seconds of current activity.
 
         Raise RuntimeError if no current activity.
         """
@@ -91,7 +91,7 @@ def start_activity(
         time_limit: Optional[int] = None,
         activity_meta: Optional[List[Any]] = None,
     ) -> Iterator[Stopwatch]:
-        """ Mark the start of a new activity and automatically time its duration.
+        """Mark the start of a new activity and automatically time its duration.
             TimeManager does not currently support nested activities.
 
         Parameters
diff --git a/gama/utilities/metrics.py b/gama/utilities/metrics.py
index 6afb0ade..2c4e21d8 100644
--- a/gama/utilities/metrics.py
+++ b/gama/utilities/metrics.py
@@ -19,18 +19,18 @@
 }
 
 all_metrics = {*classification_metrics, *regression_metrics}
-reversed_scorers = {v: k for k, v in SCORERS.items()}
+reversed_scorers = {repr(v): k for k, v in SCORERS.items()}
 
 
 class MetricType(Enum):
-    """ Metric types supported by GAMA. """
+    """Metric types supported by GAMA."""
 
     CLASSIFICATION: int = 1  #: discrete target
     REGRESSION: int = 2  #: continuous target
 
 
 class Metric:
-    """ A thin layer around the `scorer` class of scikit-learn. """
+    """A thin layer around the `scorer` class of scikit-learn."""
 
     def __init__(self, scorer: Union[_BaseScorer, str]):
         if isinstance(scorer, str):
@@ -40,7 +40,7 @@ def __init__(self, scorer: Union[_BaseScorer, str]):
                 "Scorer was not a valid scorer or could not be converted to one."
             )
         self.scorer = scorer
-        self.name = reversed_scorers[scorer]
+        self.name = reversed_scorers[repr(scorer)]
         self.requires_probabilities = (
             isinstance(scorer, _ProbaScorer) or self.name == "roc_auc"
         )
@@ -56,10 +56,10 @@ def __init__(self, scorer: Union[_BaseScorer, str]):
 
         self.score = self.scorer._score_func
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args, **kwargs) -> float:
         return self.scorer(*args, **kwargs)
 
-    def maximizable_score(self, *args, **kwargs):
+    def maximizable_score(self, *args, **kwargs) -> float:
         return self.scorer._sign * self.score(*args, **kwargs)
 
 
@@ -67,16 +67,17 @@ def scoring_to_metric(
     scoring: Union[str, Metric, Iterable[str], Iterable[Metric]]
 ) -> Tuple[Metric, ...]:
     if isinstance(scoring, str):
-        return tuple([Metric(scoring)])
+        return (Metric(scoring),)
     if isinstance(scoring, Metric):
-        return tuple([scoring])
-
-    if isinstance(scoring, Iterable):
-        if all([isinstance(scorer, (Metric, str)) for scorer in scoring]):
-            converted_metrics = [
-                scorer if isinstance(scorer, Metric) else Metric(scorer)
-                for scorer in scoring
-            ]
-            return tuple(converted_metrics)
+        return (scoring,)
+
+    if isinstance(scoring, Iterable) and all(
+        isinstance(scorer, (Metric, str)) for scorer in scoring
+    ):
+        converted_metrics = [
+            scorer if isinstance(scorer, Metric) else Metric(scorer)
+            for scorer in scoring
+        ]
+        return tuple(converted_metrics)
 
     raise TypeError("scoring must be str, Metric or Iterable (of str or Metric).")
diff --git a/gama/utilities/preprocessing.py b/gama/utilities/preprocessing.py
index a562928b..b2b66316 100644
--- a/gama/utilities/preprocessing.py
+++ b/gama/utilities/preprocessing.py
@@ -15,7 +15,7 @@ def select_categorical_columns(
     max_f: Optional[int] = None,
     ignore_nan: bool = True,
 ) -> Iterator[str]:
-    """ Find all categorical columns with at least `min_f` and at most `max_f` factors.
+    """Find all categorical columns with at least `min_f` and at most `max_f` factors.
 
     Parameters
     ----------
@@ -42,14 +42,16 @@ def select_categorical_columns(
                 yield column
 
 
-def basic_encoding(x: pd.DataFrame, is_classification: bool):
-    """ Perform 'basic' encoding of categorical features.
+def basic_encoding(
+    x: pd.DataFrame, is_classification: bool
+) -> Tuple[pd.DataFrame, TransformerMixin]:
+    """Perform 'basic' encoding of categorical features.
 
-     Specifically, perform:
-      - Ordinal encoding for features with 2 or fewer unique values.
-      - One hot encoding for features with at most 10 unique values.
-      - Ordinal encoding for features with 11+ unique values, if y is categorical.
-     """
+    Specifically, perform:
+     - Ordinal encoding for features with 2 or fewer unique values.
+     - One hot encoding for features with at most 10 unique values.
+     - Ordinal encoding for features with 11+ unique values, if y is categorical.
+    """
     ord_features = list(select_categorical_columns(x, max_f=2))
     if is_classification:
         ord_features.extend(select_categorical_columns(x, min_f=11))
@@ -57,7 +59,7 @@ def basic_encoding(x: pd.DataFrame, is_classification: bool):
 
     encoding_steps = [
         ("ord-enc", ce.OrdinalEncoder(cols=ord_features, drop_invariant=True)),
-        ("oh-enc", ce.OneHotEncoder(cols=leq_10_features, handle_missing="ignore")),
+        ("oh-enc", ce.OneHotEncoder(cols=leq_10_features, handle_missing="value")),
     ]
     encoding_pipeline = Pipeline(encoding_steps)
     x_enc = encoding_pipeline.fit_transform(x, y=None)  # Is this dangerous?
@@ -67,7 +69,7 @@ def basic_encoding(x: pd.DataFrame, is_classification: bool):
 def basic_pipeline_extension(
     x: pd.DataFrame, is_classification: bool
 ) -> List[Tuple[str, TransformerMixin]]:
-    """ Define a TargetEncoder and SimpleImputer.
+    """Define a TargetEncoder and SimpleImputer.
 
     TargetEncoding is will encode categorical features with more than 10 unique values,
     if y is not categorical. SimpleImputer imputes with the median.
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 560aa2a3..00000000
--- a/mypy.ini
+++ /dev/null
@@ -1,27 +0,0 @@
-[mypy]
-# See # https://mypy.readthedocs.io/en/latest/config_file.html
-python_version=3.6
-
-show_column_numbers=True
-
-warn_redundant_casts=True
-warn_unused_ignores=True
-# warn_return_any=True
-# warn_unreachable=True
-
-#disallow_incomplete_defs=True
-#disallow_untyped_defs=True
-
-# Reports any config lines that are not recognized
-warn_unused_configs=True
-
-# Probably want to turn back on later:
-#no_implicit_optional
-ignore_missing_imports=True
-
-# Following configuration are default, but made explicit:
-warn_no_return=True
-
-[mypy-gama.visualization.*]
-# May be integrated with the Dash app later.
-ignore_errors = True
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..0ee2289b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,100 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "gama"
+description = "A package for automated machine learning based on scikit-learn."
+readme = "README.md"
+authors= [{name = "Pieter Gijsbers", email="p.gijsbers@tue.nl"}]
+license = {text = "MIT"}
+#keywords = ..
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+]
+
+dynamic = ["version"]
+requires-python = ">=3.9"
+dependencies = [    
+  "numpy>=1.20.0",
+  "scipy>=1.0.0",
+  "scikit-learn>=1.1.0,<1.3",
+  "pandas>=1.0",
+  "stopit>=1.1.1",
+  "liac-arff>=2.2.2",
+  "category-encoders>=1.2.8",
+  "black==24.3.0",
+  "psutil",
+]
+
+[project.optional-dependencies]
+doc = ["sphinx", "sphinx_rtd_theme"]
+test = [
+    "pre-commit==3.2.2",
+    "pytest>=4.4.0",
+    "pytest-mock",
+    "pytest-xdist",
+    "codecov",
+    "pytest-cov",
+]
+
+[urls]
+github = "https://github.com/openml-labs/GAMA"
+documentation = "https://openml-labs.github.io/gama/"
+"bug tracker" = "https://github.com/openml-labs/gama/issues"
+
+[project.scripts]
+gama = "gama.utilities.cli:main"
+
+[tool.setuptools.dynamic]
+version = {attr = "gama.__version__.__version__"}
+
+[tool.setuptools.packages.find]
+include = ["gama*"]  # package names should match these glob patterns (["*"] by default)
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    "error",
+    "ignore::sklearn.exceptions.ConvergenceWarning",
+    "ignore::RuntimeWarning",
+    # Block a warning coming from scikit-learn internals about scipy.mode
+    "ignore:.*mode.*:FutureWarning",
+    # We have a CRON job checking for deprecation/future warnings,
+    # but we dont fail on them by default as they should not interfere with most PRs.
+    # We still print to ensure new warnings are not introduced by the change.
+    "default::PendingDeprecationWarning",
+    "default::DeprecationWarning",
+    "default::FutureWarning",
+    # We will (probably) get rid of category_encoders in 22.1+
+    "ignore:::category_encoders.target_encoder",
+]
+
+[tool.ruff]
+
+[tool.ruff.per-file-ignores]
+"__init__.py" = ["F401"]
+
+[tool.mypy]
+# See # https://mypy.readthedocs.io/en/latest/config_file.html
+python_version=3.10
+
+show_column_numbers = true
+
+warn_redundant_casts = true
+warn_unused_ignores = true
+# warn_return_any=True
+# warn_unreachable=True
+
+#disallow_incomplete_defs=True
+#disallow_untyped_defs=True
+
+# Reports any config lines that are not recognized
+warn_unused_configs = true
+
+# Probably want to turn back on later:
+#no_implicit_optional
+ignore_missing_imports = true
+
+# Following configuration are default, but made explicit:
+warn_no_return = true
diff --git a/setup.py b/setup.py
deleted file mode 100644
index bf218710..00000000
--- a/setup.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-import os
-
-from setuptools import setup, find_packages
-
-with open("gama/__version__.py", "r") as fh:
-    version = fh.readlines()[-1].split()[-1].strip("\"'")
-
-base = [
-    "numpy>=1.14.0",
-    "scipy>=1.0.0",
-    "scikit-learn>=0.24.0,<0.25.0",
-    "pandas>=1.0,<1.1",
-    "stopit>=1.1.1",
-    "liac-arff>=2.2.2",
-    "category-encoders>=1.2.8",
-    "black==19.10b0",
-    "psutil",
-]
-
-vis = [
-    "dash==1.3",
-    "dash-daq==0.1.0",
-    "dash-bootstrap-components",
-    "visdcc",
-]
-
-doc = ["sphinx", "sphinx_rtd_theme"]
-
-test = [
-    "pre-commit==2.1.1",
-    "pytest>=4.4.0",
-    "pytest-mock",
-    "pytest-xdist<2.0.0",
-    "codecov",
-    "pytest-cov",
-]
-
-# Black, Flake8 and Mypy will be installed through calling pre-commit install
-dev = test + doc
-all_ = test + doc + vis
-
-with open(os.path.join("README.md")) as fid:
-    README = fid.read()
-
-setup(
-    name="gama",
-    version=version,
-    description="A package for automated machine learning based on scikit-learn.",
-    long_description=README,
-    long_description_content_type="text/markdown",
-    author="Pieter Gijsbers",
-    author_email="p.gijsbers@tue.nl",
-    url="https://github.com/openml-labs/GAMA",
-    project_urls={
-        "Bug Tracker": "https://github.com/openml-labs/gama/issues",
-        "Documentation": "https://openml-labs.github.io/gama/",
-        "Source Code": "https://github.com/openml-labs/gama",
-    },
-    packages=find_packages(exclude=["tests", "tests.*"]),
-    install_requires=base,
-    extras_require={"vis": vis, "dev": dev, "all": all_,},
-    python_requires=">=3.6.0",
-    entry_points={
-        "console_scripts": [
-            "gama=gama.utilities.cli:main",
-            "gamadash=gama.dashboard.app:main",
-        ]
-    },
-)
diff --git a/tests/conftest.py b/tests/conftest.py
index f5fe939e..eb3dc76e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,14 +7,14 @@
 
 @pytest.fixture
 def pset():
-    gc = GamaClassifier(config=clf_config, scoring="accuracy", store="nothing")
+    gc = GamaClassifier(search_space=clf_config, scoring="accuracy", store="nothing")
     yield gc._pset
     gc.cleanup("all")
 
 
 @pytest.fixture
 def opset():
-    gc = GamaClassifier(config=clf_config, scoring="accuracy", store="nothing")
+    gc = GamaClassifier(search_space=clf_config, scoring="accuracy", store="nothing")
     yield gc._operator_set
     gc.cleanup("all")
 
diff --git a/tests/system/test_gama.py b/tests/system/test_gama.py
index 019ae222..93cedb35 100644
--- a/tests/system/test_gama.py
+++ b/tests/system/test_gama.py
@@ -8,7 +8,7 @@
 
 
 def _gama_on_digits(gama):
-    X, y = load_digits(return_X_y=True)
+    X, y = load_digits(return_X_y=True, as_frame=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, stratify=y, random_state=0
     )
@@ -23,7 +23,7 @@ def _gama_on_digits(gama):
     assert log_loss(y_test, y_proba) == gama.score(X_test, y_test)
     assert log_loss(y_test, y_proba) == gama.score(X_test, pd.Series(y_test))
     assert log_loss(y_test, y_proba) == gama.score(
-        X_test, LabelEncoder().fit_transform(y_test.reshape(-1, 1))
+        X_test, LabelEncoder().fit_transform(y_test)
     )
 
 
diff --git a/tests/system/test_gamaclassifier.py b/tests/system/test_gamaclassifier.py
index d32f6a4d..b77d2b56 100644
--- a/tests/system/test_gamaclassifier.py
+++ b/tests/system/test_gamaclassifier.py
@@ -178,7 +178,7 @@ def _test_dataset_problem(
 
 
 def test_binary_classification_accuracy():
-    """ Binary classification, accuracy, numpy data and ensemble code export """
+    """Binary classification, accuracy, numpy data and ensemble code export"""
     gama = _test_dataset_problem(breast_cancer, "accuracy")
 
     x, y = breast_cancer["load"](return_X_y=True)
@@ -193,47 +193,47 @@ def test_binary_classification_accuracy():
 
 
 def test_binary_classification_accuracy_asha():
-    """ Binary classification, accuracy, numpy data, ASHA search. """
+    """Binary classification, accuracy, numpy data, ASHA search."""
     _test_dataset_problem(
         breast_cancer, "accuracy", search=AsynchronousSuccessiveHalving(), max_time=60
     )
 
 
 def test_binary_classification_accuracy_random_search():
-    """ Binary classification, accuracy, numpy data, random search. """
+    """Binary classification, accuracy, numpy data, random search."""
     _test_dataset_problem(breast_cancer, "accuracy", search=RandomSearch())
 
 
 def test_binary_classification_logloss():
-    """ Binary classification, log loss (probabilities), numpy data, ASHA search. """
+    """Binary classification, log loss (probabilities), numpy data, ASHA search."""
     _test_dataset_problem(breast_cancer, "neg_log_loss")
 
 
 def test_multiclass_classification_accuracy():
-    """ Multiclass classification, accuracy, numpy data. """
+    """Multiclass classification, accuracy, numpy data."""
     _test_dataset_problem(wine, "accuracy")
 
 
 def test_multiclass_classification_logloss():
-    """ Multiclass classification, log loss (probabilities), numpy data. """
+    """Multiclass classification, log loss (probabilities), numpy data."""
     _test_dataset_problem(wine, "neg_log_loss")
 
 
 def test_string_label_classification_accuracy():
-    """ Binary classification, accuracy, target is str. """
+    """Binary classification, accuracy, target is str."""
     _test_dataset_problem(breast_cancer, "accuracy", y_type=str)
 
 
 def test_string_label_classification_log_loss():
-    """ Binary classification, log loss (probabilities), target is str. """
+    """Binary classification, log loss (probabilities), target is str."""
     _test_dataset_problem(breast_cancer, "neg_log_loss", y_type=str)
 
 
 def test_missing_value_classification_arff():
-    """ Binary classification, log loss (probabilities), arff data. """
+    """Binary classification, log loss (probabilities), arff data."""
     _test_dataset_problem(breast_cancer_missing, "neg_log_loss", arff=True)
 
 
 def test_missing_value_classification():
-    """ Binary classification, log loss (probabilities), missing values. """
+    """Binary classification, log loss (probabilities), missing values."""
     _test_dataset_problem(breast_cancer_missing, "neg_log_loss", missing_values=True)
diff --git a/tests/system/test_gamaregressor.py b/tests/system/test_gamaregressor.py
index c65ee55e..985178c8 100644
--- a/tests/system/test_gamaregressor.py
+++ b/tests/system/test_gamaregressor.py
@@ -1,6 +1,6 @@
 """ Contains full system tests for GamaRegressor """
 import numpy as np
-from sklearn.datasets import load_boston
+from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 
@@ -13,7 +13,7 @@
 
 # While we could derive statistics dynamically,
 # we want to know if any changes ever happen, so we save them statically.
-boston = dict(name="boston", load=load_boston, test_size=127, base_mse=81.790)
+diabetes = dict(name="diabetes", load=load_diabetes, test_size=111, base_mse=4966)
 
 
 def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric):
@@ -28,7 +28,7 @@ def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric):
     assert isinstance(predictions, np.ndarray), "predictions should be numpy arrays."
     assert (data["test_size"],) == predictions.shape, "should predict (N,) shape array."
 
-    # Majority classifier on this split achieves 0.6293706293706294
+    # Predicting the mean will score roughly 4966
     mse = mean_squared_error(y_test, predictions)
     print(data["name"], metric, "mse:", mse)
     assert (
@@ -54,13 +54,13 @@ def _test_dataset_problem(data, metric):
 
 
 def test_regression_mean_squared_error():
-    """ GamaRegressor works on all-numeric data. """
-    _test_dataset_problem(boston, "neg_mean_squared_error")
+    """GamaRegressor works on all-numeric data."""
+    _test_dataset_problem(diabetes, "neg_mean_squared_error")
 
 
 def test_missing_value_regression():
-    """ GamaRegressor works when missing values are present. """
-    data = boston
+    """GamaRegressor works when missing values are present."""
+    data = diabetes
     metric = "neg_mean_squared_error"
     X, y = data["load"](return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
@@ -68,6 +68,9 @@ def test_missing_value_regression():
     X_test[1:100:2, 0] = X_test[2:100:5, 1] = float("NaN")
 
     gama = GamaRegressor(
-        random_state=0, max_total_time=TOTAL_TIME_S, scoring=metric, store="nothing",
+        random_state=0,
+        max_total_time=TOTAL_TIME_S,
+        scoring=metric,
+        store="nothing",
     )
     _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric)
diff --git a/tests/unit/test_auto_ensemble.py b/tests/unit/test_auto_ensemble.py
index 67220e69..62f57856 100644
--- a/tests/unit/test_auto_ensemble.py
+++ b/tests/unit/test_auto_ensemble.py
@@ -1,6 +1,15 @@
 from sklearn.svm import LinearSVC
 from sklearn.datasets import load_iris
-from gama.postprocessing.ensemble import fit_and_weight
+from sklearn.ensemble import VotingClassifier
+
+from gama.genetic_programming.compilers.scikitlearn import compile_individual
+from gama.postprocessing.ensemble import (
+    EnsemblePostProcessing,
+    fit_and_weight,
+    EnsembleClassifier,
+)
+from gama.utilities.evaluation_library import Evaluation, EvaluationLibrary
+from gama.utilities.metrics import Metric
 
 
 def test_fit_and_weight():
@@ -14,3 +23,32 @@ def test_fit_and_weight():
     assert 1 == w
     _, w = fit_and_weight((bad_estimator, x, y, 1))
     assert 0 == w
+
+
+def test_code_export_produces_working_code(GNB, ForestPipeline):
+    x, y = load_iris(return_X_y=True, as_frame=True)
+
+    ensemble = EnsemblePostProcessing()
+
+    ensemble._ensemble = EnsembleClassifier(
+        Metric("neg_log_loss"),
+        y,
+        evaluation_library=EvaluationLibrary(n=None),
+    )
+    gnb = GNB
+    gnb._to_pipeline = compile_individual
+    fp = ForestPipeline
+    fp._to_pipeline = compile_individual
+    ensemble._ensemble._models = {
+        "a": (Evaluation(gnb), 1),
+        "b": (Evaluation(fp), 2),
+    }
+    ensemble._ensemble._metric = Metric("neg_log_loss")
+
+    code = ensemble.to_code()
+    local = {}
+    exec(code, {}, local)
+    exported_ensemble = local["ensemble"]  # should be defined in exported code
+    assert isinstance(exported_ensemble, VotingClassifier)
+    exported_ensemble.fit(x, y)
+    assert 0.9 < exported_ensemble.score(x, y)
diff --git a/tests/unit/test_automl_gp.py b/tests/unit/test_automl_gp.py
index 38743c32..bc6bdc5f 100644
--- a/tests/unit/test_automl_gp.py
+++ b/tests/unit/test_automl_gp.py
@@ -1,7 +1,3 @@
-from gama.genetic_programming.components import Fitness
-from gama.genetic_programming.selection import eliminate_from_pareto
-
-
 def test_individual_length(GNB, ForestPipeline, LinearSVC):
     assert 1 == len(list(GNB.primitives))
     assert 2 == len(list(ForestPipeline.primitives))
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index da4c94f0..04ae5b89 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -1,56 +1,42 @@
-import subprocess
-import sys
-from typing import List
-import gama
-
+import pytest
 
-def cli_command(file) -> List[str]:
-    return [sys.executable, "gama/utilities/cli.py", file, "-dry"]
+import gama
+from gama.utilities.cli import main
 
 
-def test_classifier_invocation():
-    command = cli_command("tests/data/breast_cancer_train.arff")
-    process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    assert 0 == process.returncode, process.stderr
-    assert "classification" in str(process.stdout)
+def test_classifier_invocation(capfd):
+    main("tests/data/breast_cancer_train.arff -dry")
+    out, err = capfd.readouterr()
+    assert "classification" in out
 
 
-def test_classifier_invocation_csv():
-    command = cli_command("tests/data/openml_d_23380.csv")
-    command.extend("--target TR".split(" "))
-    process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    assert 0 == process.returncode, process.stderr
-    assert "classification" in str(process.stdout)
+def test_classifier_invocation_csv(capfd):
+    main("tests/data/openml_d_23380.csv --target TR -dry")
+    out, err = capfd.readouterr()
+    assert "classification" in out
 
 
-def test_regressor_invocation():
-    command = cli_command("tests/data/boston.arff")
-    process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    assert 0 == process.returncode, process.stderr
-    assert "regression" in str(process.stdout)
+def test_regressor_invocation(capfd):
+    main("tests/data/boston.arff -dry")
+    out, err = capfd.readouterr()
+    assert "regression" in out
 
 
-def test_complex_invocation():
-    command = cli_command("tests/data/boston.arff")
-    command.extend("--target MEDV -py myfile.py -t 60 -v -n 4".split(" "))
-    process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    assert 0 == process.returncode, process.stderr
-    assert "regression" in str(process.stdout)
-    assert gama.__version__ in str(process.stdout)
-    assert "n_jobs=4" in str(process.stdout)
-    assert "max_total_time=3600" in str(process.stdout)
+def test_complex_invocation(capfd):
+    main("tests/data/boston.arff --target MEDV -py myfile.py -t 60 -v -n 4 -dry")
+    out, err = capfd.readouterr()
+    assert "regression" in out
+    assert gama.__version__ in out
+    assert "n_jobs=4" in out
+    assert "max_total_time=3600" in out
 
 
 def test_invalid_file():
-    command = cli_command("invalid.file")
-    process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    assert 0 != process.returncode, "Invalid file should terminate with non-zero code"
-    assert "FileNotFoundError: invalid.file" in str(process.stderr)
+    with pytest.raises(FileNotFoundError):
+        main("invalid.file -dry")
 
 
 def test_invalid_argument():
-    command = cli_command("tests/data/boston.arff")
-    command.append("-invalid")
-    process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    assert 0 != process.returncode, "Invalid arguments should cause non-zero exit code"
-    assert "unrecognized arguments: -invalid" in str(process.stderr)
+    with pytest.raises(SystemExit) as e:
+        main("tests/data/boston.arff -invalid")
+    assert 2 == e.value.code
diff --git a/tests/unit/test_configuration_parser.py b/tests/unit/test_configuration_parser.py
index df721e9f..5618be54 100644
--- a/tests/unit/test_configuration_parser.py
+++ b/tests/unit/test_configuration_parser.py
@@ -4,7 +4,7 @@
 
 
 def test_merge_configuration():
-    """ Test merging two simple configurations works as expected. """
+    """Test merging two simple configurations works as expected."""
 
     one = {"alpha": [0, 1], BernoulliNB: {"fit_prior": [True, False]}}
     two = {"alpha": [0, 2], GaussianNB: {"fit_prior": [True, False]}}
diff --git a/tests/unit/test_data_formatting.py b/tests/unit/test_data_formatting.py
index ef07ece7..ea72bb5c 100644
--- a/tests/unit/test_data_formatting.py
+++ b/tests/unit/test_data_formatting.py
@@ -5,7 +5,6 @@
 from gama.utilities.preprocessing import select_categorical_columns
 
 import pandas as pd
-import pytest
 
 from gama.data_formatting import format_x_y, format_y, series_looks_categorical
 
@@ -58,7 +57,7 @@ def well_formatted_x_y(x, y, y_type):
             )
 
     def test_format_x_y_missing_targets(self):
-        """ Samples with missing labels should be removed from training data. """
+        """Samples with missing labels should be removed from training data."""
 
         def well_formatted_x_y(x, y, y_type):
             assert isinstance(x, pd.DataFrame)
diff --git a/tests/unit/test_data_loading.py b/tests/unit/test_data_loading.py
index 83527020..aed32fc5 100644
--- a/tests/unit/test_data_loading.py
+++ b/tests/unit/test_data_loading.py
@@ -13,13 +13,13 @@
     file_to_pandas,
 )
 
-NUMERIC_TYPES = [np.int, np.int32, np.int64, np.float]
+NUMERIC_TYPES = [int, np.int32, np.int64, float]
 
 # https://www.openml.org/d/23380
 METADATA_23380 = {
     "N": "INTEGER",
-    "TR": "{EL_500_20g/L,EL_500_4g/L,PP_333_20g/L,PP_333_4g/L,control,methanol_control}",
-    "TREE": "{D10,D13,D14,D16,D18,D19,D20,D21,D22,G10,G2,G20,G21,G24,G27,G28,G29,G4,G5,G6,G7,G8,G9,J1,J10,J12,J13,J15,J17,J19,J20,J25,J27,J29,J31,J6,J8,M10,M17,M20,M25,M33,M6,O20,O27,O28,O33,O3O,Q12,Q17,Q19,Q23,Q25,Q3,Q34,Q4,Q5}",
+    "TR": "{EL_500_20g/L,EL_500_4g/L,PP_333_20g/L,PP_333_4g/L,control,methanol_control}",  # noqa: E501
+    "TREE": "{D10,D13,D14,D16,D18,D19,D20,D21,D22,G10,G2,G20,G21,G24,G27,G28,G29,G4,G5,G6,G7,G8,G9,J1,J10,J12,J13,J15,J17,J19,J20,J25,J27,J29,J31,J6,J8,M10,M17,M20,M25,M33,M6,O20,O27,O28,O33,O3O,Q12,Q17,Q19,Q23,Q25,Q3,Q34,Q4,Q5}",  # noqa: E501
     "BR": "{A,B,C,D,E,F,G,H,I,J}",
     "TL": "REAL",
     "IN": "INTEGER",
@@ -43,7 +43,7 @@ def _test_df_d23380(df):
 
 
 def _test_x_y_d23380(x, y):
-    """ Test if types are as expected from https://www.openml.org/d/23380 """
+    """Test if types are as expected from https://www.openml.org/d/23380"""
     assert isinstance(x, pd.DataFrame)
     assert (2796, 34) == x.shape
     assert 68100 == x.isnull().sum().sum()
diff --git a/tests/unit/test_ea_crossover.py b/tests/unit/test_ea_crossover.py
index 22120ca6..e949339c 100644
--- a/tests/unit/test_ea_crossover.py
+++ b/tests/unit/test_ea_crossover.py
@@ -9,7 +9,7 @@
 
 
 def test_shared_terminals(SS_BNB, RS_MNB, GNB):
-    """ Test shared terminals are found, if they exist. """
+    """Test shared terminals are found, if they exist."""
     assert 0 == len(list(_shared_terminals(SS_BNB, SS_BNB, value_match="different")))
     assert 2 == len(list(_shared_terminals(SS_BNB, SS_BNB, value_match="equal")))
     assert 2 == len(list(_shared_terminals(SS_BNB, SS_BNB, value_match="all")))
@@ -22,7 +22,7 @@ def test_shared_terminals(SS_BNB, RS_MNB, GNB):
 
 
 def test_crossover_primitives(SS_BNB, RS_MNB):
-    """ Two individuals of at least length 2 produce two new ones with crossover. """
+    """Two individuals of at least length 2 produce two new ones with crossover."""
     ind1_copy, ind2_copy = SS_BNB.copy_as_new(), RS_MNB.copy_as_new()
 
     # Cross-over is in-place
@@ -35,7 +35,7 @@ def test_crossover_primitives(SS_BNB, RS_MNB):
 
 
 def test_crossover_terminal(SS_BNB, RS_MNB):
-    """ Two individuals with shared Terminals produce two new ones with crossover. """
+    """Two individuals with shared Terminals produce two new ones with crossover."""
     ind1_copy, ind2_copy = SS_BNB.copy_as_new(), RS_MNB.copy_as_new()
     # Cross-over is in-place
     crossover_terminals(SS_BNB, RS_MNB)
@@ -47,7 +47,7 @@ def test_crossover_terminal(SS_BNB, RS_MNB):
 
 
 def test_crossover(SS_BNB, RS_MNB):
-    """ Two eligible individuals should produce two new individuals with crossover. """
+    """Two eligible individuals should produce two new individuals with crossover."""
     ind1_copy, ind2_copy = SS_BNB.copy_as_new(), RS_MNB.copy_as_new()
     # Cross-over is in-place
     random_crossover(SS_BNB, RS_MNB)
@@ -58,7 +58,7 @@ def test_crossover(SS_BNB, RS_MNB):
 
 
 def test_crossover_max_length_exceeded(SS_RBS_SS_BNB, RS_MNB):
-    """ Raise ValueError if either provided individual exceeds `max_length`. """
+    """Raise ValueError if either provided individual exceeds `max_length`."""
     with pytest.raises(ValueError) as _:
         random_crossover(SS_RBS_SS_BNB, RS_MNB, max_length=2)
 
@@ -67,7 +67,7 @@ def test_crossover_max_length_exceeded(SS_RBS_SS_BNB, RS_MNB):
 
 
 def test_crossover_max_length(SS_RBS_SS_BNB):
-    """ Setting `max_length` affects only maximum produced length. """
+    """Setting `max_length` affects only maximum produced length."""
     primitives_in_parent = len(SS_RBS_SS_BNB.primitives)
     produced_lengths = []
     for _ in range(60):  # guarantees all length pipelines are produced with prob >0.999
diff --git a/tests/unit/test_ea_metrics.py b/tests/unit/test_ea_metrics.py
index e251f171..6fd0b3ba 100644
--- a/tests/unit/test_ea_metrics.py
+++ b/tests/unit/test_ea_metrics.py
@@ -7,7 +7,7 @@
 
 
 def _test_metric(metric, y_true, y_pred, max_score, prediction_score):
-    """ Metric is calculated directly with different input formats. """
+    """Metric is calculated directly with different input formats."""
 
     def as_1d_array(list_):
         return np.asarray(list_).reshape(-1, 1)
@@ -55,7 +55,7 @@ def test_logloss_numeric():
     log_loss_metric = Metric("neg_log_loss")
     y_true = np.asarray([1, 0, 0, 0, 1])
     y_1_mistake_ohe = np.asarray([[0, 1], [0, 1], [1, 0], [1, 0], [0, 1]])
-    one_mistake_logloss = -6.907755278982137
+    one_mistake_logloss = -7.20873067782343
     _test_metric(
         log_loss_metric,
         y_true,
diff --git a/tests/unit/test_ea_mutation.py b/tests/unit/test_ea_mutation.py
index d55e2a4b..5440bfd8 100644
--- a/tests/unit/test_ea_mutation.py
+++ b/tests/unit/test_ea_mutation.py
@@ -15,14 +15,17 @@
 
 
 def test_mut_replace_terminal(ForestPipeline, pset):
-    """ Tests if mut_replace_terminal replaces exactly one terminal. """
+    """Tests if mut_replace_terminal replaces exactly one terminal."""
     _test_mutation(
-        ForestPipeline, mut_replace_terminal, _mut_replace_terminal_is_applied, pset,
+        ForestPipeline,
+        mut_replace_terminal,
+        _mut_replace_terminal_is_applied,
+        pset,
     )
 
 
 def test_mut_replace_terminal_none_available(GNB, pset):
-    """ mut_replace_terminal raises an exception if no valid mutation is possible. """
+    """mut_replace_terminal raises an exception if no valid mutation is possible."""
     with pytest.raises(ValueError) as error:
         mut_replace_terminal(GNB, pset)
 
@@ -30,26 +33,26 @@ def test_mut_replace_terminal_none_available(GNB, pset):
 
 
 def test_mut_replace_primitive_len_1(LinearSVC, pset):
-    """ mut_replace_primitive replaces exactly one primitive. """
+    """mut_replace_primitive replaces exactly one primitive."""
     _test_mutation(
         LinearSVC, mut_replace_primitive, _mut_replace_primitive_is_applied, pset
     )
 
 
 def test_mut_replace_primitive_len_2(ForestPipeline, pset):
-    """ mut_replace_primitive replaces exactly one primitive. """
+    """mut_replace_primitive replaces exactly one primitive."""
     _test_mutation(
         ForestPipeline, mut_replace_primitive, _mut_replace_primitive_is_applied, pset
     )
 
 
 def test_mut_insert(ForestPipeline, pset):
-    """ mut_insert inserts at least one primitive. """
+    """mut_insert inserts at least one primitive."""
     _test_mutation(ForestPipeline, mut_insert, _mut_insert_is_applied, pset)
 
 
 def test_random_valid_mutation_with_all(ForestPipeline, pset):
-    """ Test if a valid mutation is applied at random.
+    """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
     Because of the random nature, we repeat this enough times to ensure
@@ -76,7 +79,7 @@ def test_random_valid_mutation_with_all(ForestPipeline, pset):
 
 
 def test_random_valid_mutation_without_shrink(LinearSVC, pset):
-    """ Test if a valid mutation is applied at random.
+    """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
     Because of the random nature, we repeat this enough times to ensure
@@ -101,7 +104,7 @@ def test_random_valid_mutation_without_shrink(LinearSVC, pset):
 
 
 def test_random_valid_mutation_without_terminal(GNB, pset):
-    """ Test if a valid mutation is applied at random.
+    """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
     Because of the random nature, we repeat this enough times to ensure
@@ -125,7 +128,7 @@ def test_random_valid_mutation_without_terminal(GNB, pset):
 
 
 def test_random_valid_mutation_without_insert(ForestPipeline, pset):
-    """ Test if a valid mutation is applied at random.
+    """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
     Because of the random nature, we repeat this enough times to ensure
@@ -158,7 +161,7 @@ def _min_trials(n_mutations: int, max_error_rate: float = 0.0001):
 
 
 def _mut_shrink_is_applied(original, mutated):
-    """ Checks if mutation was caused by `mut_shrink`.
+    """Checks if mutation was caused by `mut_shrink`.
 
     :param original: the pre-mutation individual
     :param mutated:  the post-mutation individual
@@ -175,7 +178,7 @@ def _mut_shrink_is_applied(original, mutated):
 
 
 def _mut_insert_is_applied(original, mutated):
-    """ Checks if mutation was caused by `mut_insert`.
+    """Checks if mutation was caused by `mut_insert`.
 
     :param original: the pre-mutation individual
     :param mutated:  the post-mutation individual
@@ -193,7 +196,7 @@ def _mut_insert_is_applied(original, mutated):
 
 
 def _mut_replace_terminal_is_applied(original, mutated):
-    """ Checks if mutation was caused by `gama.ea.mutation.mut_replace_terminal`.
+    """Checks if mutation was caused by `gama.ea.mutation.mut_replace_terminal`.
 
     :param original: the pre-mutation individual
     :param mutated:  the post-mutation individual
@@ -218,7 +221,7 @@ def _mut_replace_terminal_is_applied(original, mutated):
 
 
 def _mut_replace_primitive_is_applied(original, mutated):
-    """ Checks if mutation was caused by `gama.ea.mutation.mut_replace_primitive`.
+    """Checks if mutation was caused by `gama.ea.mutation.mut_replace_primitive`.
 
     :param original: the pre-mutation individual
     :param mutated:  the post-mutation individual
@@ -243,7 +246,7 @@ def _mut_replace_primitive_is_applied(original, mutated):
 
 
 def _test_mutation(individual: Individual, mutation, mutation_check, pset):
-    """ Test if an individual mutated by `mutation` passes `mutation_check` and compiles.
+    """Test if an individual mutated by `mutation` passes `mutation_check` and compiles.
 
     :param individual: The individual to be mutated.
     :param mutation: function: ind -> (ind,). Should mutate the individual
diff --git a/tests/unit/test_evaluation_library.py b/tests/unit/test_evaluation_library.py
index a599ea5c..f55aff8f 100644
--- a/tests/unit/test_evaluation_library.py
+++ b/tests/unit/test_evaluation_library.py
@@ -13,7 +13,9 @@ def _short_name():
 
 def _mock_evaluation(
     individual: Individual,
-    predictions: Optional[Union[np.ndarray, pd.DataFrame, pd.Series]] = np.zeros(30,),
+    predictions: Optional[Union[np.ndarray, pd.DataFrame, pd.Series]] = np.zeros(
+        30,
+    ),
     score: Optional[Tuple[float, ...]] = None,
     estimators: List[object] = None,
     start_time: int = 0,
@@ -60,7 +62,7 @@ def test_evaluation_convert_predictions_from_dataframe_to_nparray(GNB):
 
 
 def test_evaluation_library_max_number_evaluations(GNB):
-    """ `max_number_of_evaluations` restricts the size of `top_evaluations`. """
+    """`max_number_of_evaluations` restricts the size of `top_evaluations`."""
     lib200 = EvaluationLibrary(m=200, sample=None, cache=_short_name())
     lib_unlimited = EvaluationLibrary(m=None, sample=None, cache=_short_name())
 
@@ -91,7 +93,7 @@ def test_evaluation_library_max_number_evaluations(GNB):
 
 
 def test_evaluation_library_n_best(GNB):
-    """ Test `n_best` normal usage.  """
+    """Test `n_best` normal usage."""
     lib = EvaluationLibrary(m=None, sample=None, cache=_short_name())
 
     try:
@@ -120,7 +122,7 @@ def test_evaluation_library_n_best(GNB):
 
 
 def _test_subsample(sample, predictions, subsample, individual):
-    """ Test the `predictions` correctly get sampled to `subsample`. """
+    """Test the `predictions` correctly get sampled to `subsample`."""
     lib = EvaluationLibrary(sample=sample, cache=_short_name())
 
     try:
@@ -137,7 +139,7 @@ def _test_subsample(sample, predictions, subsample, individual):
 
 
 def test_evaluation_library_sample_np2d_prediction(GNB):
-    """ `prediction_sample` set with np.ndarray samples predictions with ndim=2. """
+    """`prediction_sample` set with np.ndarray samples predictions with ndim=2."""
     probabilities = np.random.random(size=(30, 5))
     _test_subsample(
         sample=np.asarray([0, 1, 3]),
@@ -148,7 +150,7 @@ def test_evaluation_library_sample_np2d_prediction(GNB):
 
 
 def test_evaluation_library_sample_pd2d_prediction(GNB):
-    """ `prediction_sample` set with np.ndarray samples pd.DataFrame predictions. """
+    """`prediction_sample` set with np.ndarray samples pd.DataFrame predictions."""
     probabilities = pd.DataFrame(np.random.random(size=(30, 5)))
     _test_subsample(
         sample=np.asarray([0, 1, 3]),
@@ -159,7 +161,7 @@ def test_evaluation_library_sample_pd2d_prediction(GNB):
 
 
 def test_evaluation_library_sample_np1d_prediction(GNB):
-    """ `prediction_sample` set with np.ndarray samples predictions with ndim=1. """
+    """`prediction_sample` set with np.ndarray samples predictions with ndim=1."""
     probabilities = np.random.random(size=(30,))
     _test_subsample(
         sample=np.asarray([0, 1, 3]),
@@ -170,7 +172,7 @@ def test_evaluation_library_sample_np1d_prediction(GNB):
 
 
 def test_evaluation_library_sample_pd1d_prediction(GNB):
-    """ `prediction_sample` set with np.ndarray samples pd.Series predictions. """
+    """`prediction_sample` set with np.ndarray samples pd.Series predictions."""
     probabilities = pd.Series(np.random.random(size=(30,)))
     _test_subsample(
         sample=np.asarray([0, 1, 3]),
diff --git a/tests/unit/test_gama.py b/tests/unit/test_gama.py
index 8d021aa4..9009136c 100644
--- a/tests/unit/test_gama.py
+++ b/tests/unit/test_gama.py
@@ -1,8 +1,18 @@
 import pytest
 
+
 import gama
 
 
+def test_output_directory_must_be_empty(tmp_path):
+    with open(tmp_path / "remove.txt", "w") as fh:
+        fh.write("Created for GAMA unit test.")
+
+    with pytest.raises(ValueError) as e:
+        gama.GamaClassifier(output_directory=tmp_path)
+    assert "`output_directory`" in str(e.value)
+
+
 def test_reproducible_initialization():
     g1 = gama.GamaClassifier(random_state=1, store="nothing")
     pop1 = [g1._operator_set.individual() for _ in range(10)]
diff --git a/tests/unit/test_logging_gamareport.py b/tests/unit/test_logging_gamareport.py
index 08b4c40e..e02a059e 100644
--- a/tests/unit/test_logging_gamareport.py
+++ b/tests/unit/test_logging_gamareport.py
@@ -1,8 +1,9 @@
+import pytest
 from gama.logging.GamaReport import GamaReport
 
 
 def test_gamareport_from_log():
-    """ GamaReport can be constructed from a log that recorded RandomSearch. """
+    """GamaReport can be constructed from a log that recorded RandomSearch."""
     # We refer to a static log, this makes it independent of other unit tests,
     # but it also makes it independent of the actual changes in gama logging.
     # Cons:
@@ -12,7 +13,7 @@ def test_gamareport_from_log():
     #   + backwards compatibility test for GamaReport
     # Perhaps we can/should link to the log file used in the documentation.
     log_dir = "tests/data/RandomSearch"
-    report = GamaReport(log_dir)
+    report = GamaReport(log_dir, strict=False)
     assert report.name == "RandomSearch"
     assert "RandomSearch" == report.search_method
     assert 3 == len(report.phases)
@@ -24,10 +25,18 @@ def test_gamareport_from_log():
     )
 
 
+def test_gamareport_from_log_strict_but_outdated():
+    """GamaReport can be constructed from a log that recorded RandomSearch."""
+    # See caveat of test_gamareport_from_log
+    log_dir = "tests/data/RandomSearch"
+    with pytest.raises(ValueError):
+        GamaReport(log_dir, strict=True)
+
+
 def test_gamareport_asha_from_log():
-    """ GamaReport can be constructed from a log that recorded ASHA. """
+    """GamaReport can be constructed from a log that recorded ASHA."""
     log_dir = "tests/data/ASHA"
-    report = GamaReport(log_dir)
+    report = GamaReport(log_dir, strict=False)
     assert report.name == "ASHA"
     assert "AsynchronousSuccessiveHalving" == report.search_method
     assert 3 == len(report.phases)
@@ -40,9 +49,9 @@ def test_gamareport_asha_from_log():
 
 
 def test_gamareport_asyncEA_from_log():
-    """ GamaReport can be constructed from a log that recorded AsyncEA. """
+    """GamaReport can be constructed from a log that recorded AsyncEA."""
     log_dir = "tests/data/AsyncEA"
-    report = GamaReport(log_dir)
+    report = GamaReport(log_dir, strict=False)
     assert report.name == "AsyncEA"
     assert "AsyncEA" == report.search_method
     assert 3 == len(report.phases)
diff --git a/tests/unit/test_nsga2.py b/tests/unit/test_nsga2.py
index 53f16f37..baa139ed 100644
--- a/tests/unit/test_nsga2.py
+++ b/tests/unit/test_nsga2.py
@@ -7,7 +7,8 @@
 
 
 def _tuples_to_NSGAMeta(tuples: List[Tuple]) -> List[NSGAMeta]:
-    """ Converts a list of tuples to NSGAMeta objects. """
+    """Converts a list of tuples to NSGAMeta objects."""
+
     # Can't declare it directly in a loop as it does not create a new scope.
     def fetch_value(i):
         return lambda x: x[i]
diff --git a/tests/unit/test_postprocessing.py b/tests/unit/test_postprocessing.py
index 5fbbbd04..f444bb3d 100644
--- a/tests/unit/test_postprocessing.py
+++ b/tests/unit/test_postprocessing.py
@@ -7,7 +7,7 @@
 
 
 def test_no_post_processing():
-    """ Test that NoPostProcessing does nothing and no model is returned. """
+    """Test that NoPostProcessing does nothing and no model is returned."""
     postprocessing = NoPostProcessing()
     model = postprocessing.post_process()
     assert pytest.approx(0.0) == postprocessing.time_fraction
diff --git a/tests/unit/test_scikitlearn.py b/tests/unit/test_scikitlearn.py
index f7f3bd7b..6577b77c 100644
--- a/tests/unit/test_scikitlearn.py
+++ b/tests/unit/test_scikitlearn.py
@@ -5,7 +5,7 @@
     compile_individual,
     evaluate_pipeline,
 )
-from gama.utilities.metrics import Metric, scoring_to_metric
+from gama.utilities.metrics import scoring_to_metric
 
 
 def test_evaluate_individual(SS_BNB):
@@ -18,7 +18,9 @@ def fake_evaluate_pipeline(pipeline, *args, **kwargs):
         return None, (1.0,), [], None
 
     evaluation = evaluate_individual(
-        SS_BNB, evaluate_pipeline=fake_evaluate_pipeline, add_length_to_score=True,
+        SS_BNB,
+        evaluate_pipeline=fake_evaluate_pipeline,
+        add_length_to_score=True,
     )
     individual = evaluation.individual
     assert individual == SS_BNB
@@ -49,7 +51,11 @@ def test_evaluate_pipeline(SS_BNB):
     x, y = pd.DataFrame(x), pd.Series(y)
 
     prediction, scores, estimators, errors = evaluate_pipeline(
-        SS_BNB.pipeline, x, y, timeout=60, metrics=scoring_to_metric("accuracy"),
+        SS_BNB.pipeline,
+        x,
+        y,
+        timeout=60,
+        metrics=scoring_to_metric("accuracy"),
     )
     assert 1 == len(scores)
     assert errors is None
diff --git a/tests/unit/test_utilities_generic_paretofront.py b/tests/unit/test_utilities_generic_paretofront.py
index a000f181..eeacf134 100644
--- a/tests/unit/test_utilities_generic_paretofront.py
+++ b/tests/unit/test_utilities_generic_paretofront.py
@@ -2,7 +2,7 @@
 
 
 def test_pareto_initialization_empty():
-    """ Test initialization of empty front. """
+    """Test initialization of empty front."""
     pf = ParetoFront()
 
     assert list(pf) == []
@@ -11,7 +11,7 @@ def test_pareto_initialization_empty():
 
 
 def test_pareto_initialization_pareto_front():
-    """ Initialization with only Pareto front elements. """
+    """Initialization with only Pareto front elements."""
     list_ = [(1, 2, 3), (3, 2, 1), (0, 5, 0)]
     pf = ParetoFront(list_)
 
@@ -21,7 +21,7 @@ def test_pareto_initialization_pareto_front():
 
 
 def test_pareto_initialization_with_inferiors():
-    """" Initialization containing elements that should not be in the Pareto front. """
+    """Initialization containing elements that should not be in the Pareto front."""
     list_ = [(1, 2), (4, 3), (4, 5), (5, 4)]
     pf = ParetoFront(list_)
 
@@ -31,7 +31,7 @@ def test_pareto_initialization_with_inferiors():
 
 
 def test_pareto_initialization_with_duplicates():
-    """ Initialization with duplicate elements. """
+    """Initialization with duplicate elements."""
     list_ = [(1, 2), (3, 1), (1, 2)]
     pf = ParetoFront(list_)
 
@@ -41,7 +41,7 @@ def test_pareto_initialization_with_duplicates():
 
 
 def test_pareto_update_unique():
-    """ Creating Pareto front by updating one by one. """
+    """Creating Pareto front by updating one by one."""
     list_ = [(1, 2, 3), (3, 2, 1), (0, 5, 0)]
     pf = ParetoFront()
 
@@ -51,7 +51,7 @@ def test_pareto_update_unique():
 
 
 def test_pareto_front_clear():
-    """ Calling `clear` empties the Pareto front. """
+    """Calling `clear` empties the Pareto front."""
     pf = ParetoFront([(1, 2), (2, 1)])
     assert list(pf) == [(1, 2), (2, 1)]
 
@@ -60,7 +60,7 @@ def test_pareto_front_clear():
 
 
 def test_pareto_front_custom_function():
-    """ Test construction of Pareto front with custom object and value function. """
+    """Test construction of Pareto front with custom object and value function."""
 
     class A:
         def __init__(self, v1, v2):
diff --git a/tests/unit/test_utilities_generic_stopwatch.py b/tests/unit/test_utilities_generic_stopwatch.py
index 84f50795..e1fa3af7 100644
--- a/tests/unit/test_utilities_generic_stopwatch.py
+++ b/tests/unit/test_utilities_generic_stopwatch.py
@@ -3,17 +3,17 @@
 from gama.utilities.generic.stopwatch import Stopwatch
 
 
-ROUND_ERROR = 0.02
+ROUND_ERROR = 0.5
 
 
 def test_stopwatch_initialization_zero():
-    """ Test that elapsed time is 0 if stopwatch is not started yet. """
+    """Test that elapsed time is 0 if stopwatch is not started yet."""
     sw = Stopwatch()
     assert pytest.approx(0, abs=ROUND_ERROR) == sw.elapsed_time
 
 
 def test_stopwatch_elapsed_time_while_running():
-    """ Tests that elapsed_time increments as expected while running. """
+    """Tests that elapsed_time increments as expected while running."""
     with Stopwatch() as sw:
         assert pytest.approx(0, abs=ROUND_ERROR) == sw.elapsed_time
         time.sleep(1)
@@ -21,7 +21,7 @@ def test_stopwatch_elapsed_time_while_running():
 
 
 def test_stopwatch_elapsed_time_after_running():
-    """ Tests that time elapsed is stored after exiting the context. """
+    """Tests that time elapsed is stored after exiting the context."""
     with Stopwatch() as sw:
         time.sleep(1)
     time.sleep(1)
diff --git a/tests/unit/test_utilities_generic_timekeeper.py b/tests/unit/test_utilities_generic_timekeeper.py
index fcf466f5..feb10c38 100644
--- a/tests/unit/test_utilities_generic_timekeeper.py
+++ b/tests/unit/test_utilities_generic_timekeeper.py
@@ -4,18 +4,18 @@
 
 
 def _time_approx(seconds: int):
-    return pytest.approx(seconds, abs=0.03)
+    return pytest.approx(seconds, abs=0.5)
 
 
 def test_timekeeper_total_time_remaning_error_if_total_time_zero():
-    """ Ensure `total_time_remaining` is unavailable if `total_time` is not set. """
+    """Ensure `total_time_remaining` is unavailable if `total_time` is not set."""
     timekeeper = TimeKeeper()
     with pytest.raises(RuntimeError):
         _ = timekeeper.total_time_remaining
 
 
 def test_timekeeper_stopwatch_normal_behavior():
-    """ Normal stopwatch functionality for stopwatch returned by context manager. """
+    """Normal stopwatch functionality for stopwatch returned by context manager."""
     timekeeper = TimeKeeper()
     with timekeeper.start_activity("test activity", time_limit=3) as sw:
         assert _time_approx(0) == sw.elapsed_time
@@ -44,7 +44,7 @@ def test_timekeeper_stopwatch_normal_behavior():
 
 
 def test_timekeeper_total_remaining_time():
-    """ Ensure total remaining time is correct across activities. """
+    """Ensure total remaining time is correct across activities."""
     total_time = 10
     timekeeper = TimeKeeper(total_time=total_time)
     assert timekeeper.total_time_remaining == total_time