diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 5571d27af..d8c39cdfd 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -24,18 +24,13 @@ jobs: - name: Set up conda environment uses: mamba-org/setup-micromamba@v1 with: - environment-file: ci/environment.yml - environment-name: flox-tests + environment-name: flox-bench + create-args: >- + python=3.10 + asv + mamba init-shell: bash cache-environment: true - # create-args: | - # python="${{ matrix.python-version }}" - - # - name: Setup some dependencies - # shell: bash -l {0} - # run: | - # pip install asv - # sudo apt-get update -y - name: Run benchmarks shell: bash -l {0} @@ -47,14 +42,11 @@ jobs: ASV_FACTOR: 1.5 ASV_SKIP_SLOW: 1 run: | - set -x + # set -x # ID this runner asv machine --yes echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})" echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})" - # Use mamba for env creation - # export CONDA_EXE=$(which mamba) - export CONDA_EXE=$(which conda) # Run benchmarks for current commit against base ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \ diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index cab53382a..cfcbdf230 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -31,21 +31,8 @@ // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). "branches": ["main"], // for git - // "branches": ["default"], // for mercurial - - // The DVCS being used. If not set, it will be automatically - // determined from "repo" by looking at the protocol in the URL - // (if remote), or by looking for special directories, such as - // ".git" (if local). "dvcs": "git", - // The tool to use to create environments. May be "conda", - // "virtualenv" or other value depending on the plugins in use. - // If missing or the empty string, the tool will be automatically - // determined by looking for tools on the PATH environment - // variable. - "environment_type": "conda", - // timeout in seconds for installing any dependencies in environment // defaults to 10 min "install_timeout": 600, @@ -55,63 +42,11 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. - "pythons": ["3.9"], - - // The list of conda channel names to be searched for benchmark - // dependency packages in the specified order - "conda_channels": ["conda-forge", "nodefaults"], - - // The matrix of dependencies to test. Each key is the name of a - // package (in PyPI) and the values are version numbers. An empty - // list or empty string indicates to just test against the default - // (latest) version. null indicates that the package is to not be - // installed. If the package to be tested is only available from - // PyPi, and the 'environment_type' is conda, then you can preface - // the package name by 'pip+', and the package will be installed via - // pip (with all the conda available packages installed first, - // followed by the pip installed packages). - // - "matrix": { - "numbagg": [""], - "numpy_groupies": [""], - "numpy": [""], - "pandas": [""], - "dask-core": [""], - "xarray": [""], - }, - - // Combinations of libraries/python versions can be excluded/included - // from the set to test. Each entry is a dictionary containing additional - // key-value pairs to include/exclude. - // - // An exclude entry excludes entries where all values match. The - // values are regexps that should match the whole string. - // - // An include entry adds an environment. Only the packages listed - // are installed. The 'python' key is required. The exclude rules - // do not apply to includes. - // - // In addition to package names, the following keys are available: - // - // - python - // Python version, as in the *pythons* variable above. - // - environment_type - // Environment type, as above. - // - sys_platform - // Platform, as in sys.platform. Possible values for the common - // cases: 'linux2', 'win32', 'cygwin', 'darwin'. - // - // "exclude": [ - // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows - // {"environment_type": "conda", "six": null}, // don't run without six on conda - // ], - // - // "include": [ - // // additional env for python2.7 - // {"python": "2.7", "numpy": "1.8"}, - // // additional env if run on windows+conda - // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, - // ], + // "pythons": ["3.9"], + + "environment_type": "mamba", + "conda_channels": ["conda-forge"], + "conda_environment_file": "../ci/benchmark.yml", // The directory (relative to the current directory) that benchmarks are // stored in. If not provided, defaults to "benchmarks" diff --git a/asv_bench/benchmarks/reduce.py b/asv_bench/benchmarks/reduce.py index add77c182..326b73566 100644 --- a/asv_bench/benchmarks/reduce.py +++ b/asv_bench/benchmarks/reduce.py @@ -6,20 +6,20 @@ import flox.aggregations N = 3000 -funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "var", "count", "all"] +funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"] engines = ["flox", "numpy", "numbagg"] expected_groups = { "None": None, - "RangeIndex": pd.RangeIndex(5), "bins": pd.IntervalIndex.from_breaks([1, 2, 4]), } expected_names = tuple(expected_groups) NUMBAGG_FUNCS = ["nansum", "nanmean", "nanmax", "count", "all"] - -numbagg_skip = [ - (func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS -] + [(func, expected_names[1], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS] +numbagg_skip = [] +for name in expected_names: + numbagg_skip.extend( + list((func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS) + ) def setup_jit(): @@ -42,7 +42,7 @@ class ChunkReduce: """Time the core reduction function.""" min_run_count = 5 - warmup_time = 1 + warmup_time = 0.5 def setup(self, *args, **kwargs): raise NotImplementedError @@ -59,18 +59,6 @@ def time_reduce(self, func, expected_name, engine): expected_groups=expected_groups[expected_name], ) - @parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines}) - def time_reduce_bare(self, func, engine): - flox.aggregations.generic_aggregate( - self.labels, - self.array, - axis=-1, - size=5, - func=func, - engine=engine, - fill_value=0, - ) - @skip_for_params(numbagg_skip) @parameterize({"func": funcs, "expected_name": expected_names, "engine": engines}) def peakmem_reduce(self, func, expected_name, engine): @@ -92,13 +80,18 @@ def setup(self, *args, **kwargs): if "numbagg" in args: setup_jit() - -class ChunkReduce1DUnsorted(ChunkReduce): - def setup(self, *args, **kwargs): - self.array = np.ones((N,)) - self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5)) - self.axis = -1 - setup_jit() + @parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines}) + def time_reduce_bare(self, func, engine): + # TODO: migrate to the other test cases, but we'll have to setup labels + # appropriately ;( + flox.aggregations.generic_aggregate( + self.labels, + self.array, + axis=self.axis, + func=func, + engine=engine, + fill_value=0, + ) class ChunkReduce2D(ChunkReduce): @@ -109,14 +102,6 @@ def setup(self, *args, **kwargs): setup_jit() -class ChunkReduce2DUnsorted(ChunkReduce): - def setup(self, *args, **kwargs): - self.array = np.ones((N, N)) - self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) - self.axis = -1 - setup_jit() - - class ChunkReduce2DAllAxes(ChunkReduce): def setup(self, *args, **kwargs): self.array = np.ones((N, N)) @@ -125,9 +110,24 @@ def setup(self, *args, **kwargs): setup_jit() -class ChunkReduce2DAllAxesUnsorted(ChunkReduce): - def setup(self, *args, **kwargs): - self.array = np.ones((N, N)) - self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) - self.axis = None - setup_jit() +# class ChunkReduce2DUnsorted(ChunkReduce): +# def setup(self, *args, **kwargs): +# self.array = np.ones((N, N)) +# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) +# self.axis = -1 +# setup_jit() + +# class ChunkReduce1DUnsorted(ChunkReduce): +# def setup(self, *args, **kwargs): +# self.array = np.ones((N,)) +# self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5)) +# self.axis = -1 +# setup_jit() + + +# class ChunkReduce2DAllAxesUnsorted(ChunkReduce): +# def setup(self, *args, **kwargs): +# self.array = np.ones((N, N)) +# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) +# self.axis = None +# setup_jit() diff --git a/ci/benchmark.yml b/ci/benchmark.yml new file mode 100644 index 000000000..e4987000b --- /dev/null +++ b/ci/benchmark.yml @@ -0,0 +1,15 @@ +name: flox-bench +channels: + - conda-forge +dependencies: + - asv + - cachey + - dask-core + - numpy>=1.20 + - mamba + - pip + - python=3.10 + - xarray + - numpy_groupies>=0.9.19 + - numbagg>=0.3 + - wheel