Skip to content

Commit

Permalink
benchmarks updates (#273)
Browse files Browse the repository at this point in the history
* Switch to mamba

* Switch to mamba

* fix/

* Add mamba to env

* reduce

* fix?

* fix skipping

* Speedup?

* Remove custom bench env yaml

* [skip-ci] fix bench env

* [skip-ci]fix?

* [skip-ci] again

* Revert "[skip-ci] again"

This reverts commit b440a4e.

* Revert "[skip-ci]fix?"

This reverts commit f045a64.

* Revert "[skip-ci] fix bench env"

This reverts commit 2b5add3.

* Revert "Remove custom bench env yaml"

This reverts commit 518ff1a.

* add back custom bench env

* fix reduce bare with 2D arrays

* try avoiding env file again

* small cleanups

* try again
  • Loading branch information
dcherian authored Oct 11, 2023
1 parent 0e1b0d8 commit 9dd126c
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 124 deletions.
20 changes: 6 additions & 14 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,13 @@ jobs:
- name: Set up conda environment
uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci/environment.yml
environment-name: flox-tests
environment-name: flox-bench
create-args: >-
python=3.10
asv
mamba
init-shell: bash
cache-environment: true
# create-args: |
# python="${{ matrix.python-version }}"

# - name: Setup some dependencies
# shell: bash -l {0}
# run: |
# pip install asv
# sudo apt-get update -y

- name: Run benchmarks
shell: bash -l {0}
Expand All @@ -47,14 +42,11 @@ jobs:
ASV_FACTOR: 1.5
ASV_SKIP_SLOW: 1
run: |
set -x
# set -x
# ID this runner
asv machine --yes
echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
# Use mamba for env creation
# export CONDA_EXE=$(which mamba)
export CONDA_EXE=$(which conda)
# Run benchmarks for current commit against base
ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
Expand Down
75 changes: 5 additions & 70 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,8 @@
// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["main"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
"dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
"install_timeout": 600,
Expand All @@ -55,63 +42,11 @@

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": ["3.9"],

// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
"conda_channels": ["conda-forge", "nodefaults"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
"matrix": {
"numbagg": [""],
"numpy_groupies": [""],
"numpy": [""],
"pandas": [""],
"dask-core": [""],
"xarray": [""],
},

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],
// "pythons": ["3.9"],

"environment_type": "mamba",
"conda_channels": ["conda-forge"],
"conda_environment_file": "../ci/benchmark.yml",

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
Expand Down
80 changes: 40 additions & 40 deletions asv_bench/benchmarks/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@
import flox.aggregations

N = 3000
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "var", "count", "all"]
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
engines = ["flox", "numpy", "numbagg"]
expected_groups = {
"None": None,
"RangeIndex": pd.RangeIndex(5),
"bins": pd.IntervalIndex.from_breaks([1, 2, 4]),
}
expected_names = tuple(expected_groups)

NUMBAGG_FUNCS = ["nansum", "nanmean", "nanmax", "count", "all"]

numbagg_skip = [
(func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS
] + [(func, expected_names[1], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS]
numbagg_skip = []
for name in expected_names:
numbagg_skip.extend(
list((func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS)
)


def setup_jit():
Expand All @@ -42,7 +42,7 @@ class ChunkReduce:
"""Time the core reduction function."""

min_run_count = 5
warmup_time = 1
warmup_time = 0.5

def setup(self, *args, **kwargs):
raise NotImplementedError
Expand All @@ -59,18 +59,6 @@ def time_reduce(self, func, expected_name, engine):
expected_groups=expected_groups[expected_name],
)

@parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
def time_reduce_bare(self, func, engine):
flox.aggregations.generic_aggregate(
self.labels,
self.array,
axis=-1,
size=5,
func=func,
engine=engine,
fill_value=0,
)

@skip_for_params(numbagg_skip)
@parameterize({"func": funcs, "expected_name": expected_names, "engine": engines})
def peakmem_reduce(self, func, expected_name, engine):
Expand All @@ -92,13 +80,18 @@ def setup(self, *args, **kwargs):
if "numbagg" in args:
setup_jit()


class ChunkReduce1DUnsorted(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N,))
self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
self.axis = -1
setup_jit()
@parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
def time_reduce_bare(self, func, engine):
# TODO: migrate to the other test cases, but we'll have to setup labels
# appropriately ;(
flox.aggregations.generic_aggregate(
self.labels,
self.array,
axis=self.axis,
func=func,
engine=engine,
fill_value=0,
)


class ChunkReduce2D(ChunkReduce):
Expand All @@ -109,14 +102,6 @@ def setup(self, *args, **kwargs):
setup_jit()


class ChunkReduce2DUnsorted(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N, N))
self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
self.axis = -1
setup_jit()


class ChunkReduce2DAllAxes(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N, N))
Expand All @@ -125,9 +110,24 @@ def setup(self, *args, **kwargs):
setup_jit()


class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N, N))
self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
self.axis = None
setup_jit()
# class ChunkReduce2DUnsorted(ChunkReduce):
# def setup(self, *args, **kwargs):
# self.array = np.ones((N, N))
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
# self.axis = -1
# setup_jit()

# class ChunkReduce1DUnsorted(ChunkReduce):
# def setup(self, *args, **kwargs):
# self.array = np.ones((N,))
# self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
# self.axis = -1
# setup_jit()


# class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
# def setup(self, *args, **kwargs):
# self.array = np.ones((N, N))
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
# self.axis = None
# setup_jit()
15 changes: 15 additions & 0 deletions ci/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: flox-bench
channels:
- conda-forge
dependencies:
- asv
- cachey
- dask-core
- numpy>=1.20
- mamba
- pip
- python=3.10
- xarray
- numpy_groupies>=0.9.19
- numbagg>=0.3
- wheel

0 comments on commit 9dd126c

Please sign in to comment.