diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 5571d27af..d8c39cdfd 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -24,18 +24,13 @@ jobs:
       - name: Set up conda environment
         uses: mamba-org/setup-micromamba@v1
         with:
-          environment-file: ci/environment.yml
-          environment-name: flox-tests
+          environment-name: flox-bench
+          create-args: >-
+            python=3.10
+            asv
+            mamba
           init-shell: bash
           cache-environment: true
-          # create-args: |
-          #   python="${{ matrix.python-version }}"
-
-      # - name: Setup some dependencies
-        # shell: bash -l {0}
-        # run: |
-          # pip install asv
-          # sudo apt-get update -y
 
       - name: Run benchmarks
         shell: bash -l {0}
@@ -47,14 +42,11 @@ jobs:
           ASV_FACTOR: 1.5
           ASV_SKIP_SLOW: 1
         run: |
-          set -x
+          # set -x
           # ID this runner
           asv machine --yes
           echo "Baseline:  ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
           echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
-          # Use mamba for env creation
-          # export CONDA_EXE=$(which mamba)
-          export CONDA_EXE=$(which conda)
           # Run benchmarks for current commit against base
           ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
           asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index cab53382a..cfcbdf230 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -31,21 +31,8 @@
     // List of branches to benchmark. If not provided, defaults to "master"
     // (for git) or "default" (for mercurial).
     "branches": ["main"], // for git
-    // "branches": ["default"],    // for mercurial
-
-    // The DVCS being used.  If not set, it will be automatically
-    // determined from "repo" by looking at the protocol in the URL
-    // (if remote), or by looking for special directories, such as
-    // ".git" (if local).
     "dvcs": "git",
 
-    // The tool to use to create environments.  May be "conda",
-    // "virtualenv" or other value depending on the plugins in use.
-    // If missing or the empty string, the tool will be automatically
-    // determined by looking for tools on the PATH environment
-    // variable.
-    "environment_type": "conda",
-
     // timeout in seconds for installing any dependencies in environment
     // defaults to 10 min
     "install_timeout": 600,
@@ -55,63 +42,11 @@
 
     // The Pythons you'd like to test against.  If not provided, defaults
     // to the current version of Python used to run `asv`.
-    "pythons": ["3.9"],
-
-    // The list of conda channel names to be searched for benchmark
-    // dependency packages in the specified order
-    "conda_channels": ["conda-forge", "nodefaults"],
-
-    // The matrix of dependencies to test.  Each key is the name of a
-    // package (in PyPI) and the values are version numbers.  An empty
-    // list or empty string indicates to just test against the default
-    // (latest) version. null indicates that the package is to not be
-    // installed. If the package to be tested is only available from
-    // PyPi, and the 'environment_type' is conda, then you can preface
-    // the package name by 'pip+', and the package will be installed via
-    // pip (with all the conda available packages installed first,
-    // followed by the pip installed packages).
-    //
-    "matrix": {
-        "numbagg": [""],
-        "numpy_groupies": [""],
-        "numpy": [""],
-        "pandas": [""],
-        "dask-core": [""],
-        "xarray": [""],
-    },
-
-    // Combinations of libraries/python versions can be excluded/included
-    // from the set to test. Each entry is a dictionary containing additional
-    // key-value pairs to include/exclude.
-    //
-    // An exclude entry excludes entries where all values match. The
-    // values are regexps that should match the whole string.
-    //
-    // An include entry adds an environment. Only the packages listed
-    // are installed. The 'python' key is required. The exclude rules
-    // do not apply to includes.
-    //
-    // In addition to package names, the following keys are available:
-    //
-    // - python
-    //     Python version, as in the *pythons* variable above.
-    // - environment_type
-    //     Environment type, as above.
-    // - sys_platform
-    //     Platform, as in sys.platform. Possible values for the common
-    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
-    //
-    // "exclude": [
-    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
-    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
-    // ],
-    //
-    // "include": [
-    //     // additional env for python2.7
-    //     {"python": "2.7", "numpy": "1.8"},
-    //     // additional env if run on windows+conda
-    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
-    // ],
+    // "pythons": ["3.9"],
+
+    "environment_type": "mamba",
+    "conda_channels": ["conda-forge"],
+    "conda_environment_file": "../ci/benchmark.yml",
 
     // The directory (relative to the current directory) that benchmarks are
     // stored in.  If not provided, defaults to "benchmarks"
diff --git a/asv_bench/benchmarks/reduce.py b/asv_bench/benchmarks/reduce.py
index add77c182..326b73566 100644
--- a/asv_bench/benchmarks/reduce.py
+++ b/asv_bench/benchmarks/reduce.py
@@ -6,20 +6,20 @@
 import flox.aggregations
 
 N = 3000
-funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "var", "count", "all"]
+funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
 engines = ["flox", "numpy", "numbagg"]
 expected_groups = {
     "None": None,
-    "RangeIndex": pd.RangeIndex(5),
     "bins": pd.IntervalIndex.from_breaks([1, 2, 4]),
 }
 expected_names = tuple(expected_groups)
 
 NUMBAGG_FUNCS = ["nansum", "nanmean", "nanmax", "count", "all"]
-
-numbagg_skip = [
-    (func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS
-] + [(func, expected_names[1], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS]
+numbagg_skip = []
+for name in expected_names:
+    numbagg_skip.extend(
+        list((func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS)
+    )
 
 
 def setup_jit():
@@ -42,7 +42,7 @@ class ChunkReduce:
     """Time the core reduction function."""
 
     min_run_count = 5
-    warmup_time = 1
+    warmup_time = 0.5
 
     def setup(self, *args, **kwargs):
         raise NotImplementedError
@@ -59,18 +59,6 @@ def time_reduce(self, func, expected_name, engine):
             expected_groups=expected_groups[expected_name],
         )
 
-    @parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
-    def time_reduce_bare(self, func, engine):
-        flox.aggregations.generic_aggregate(
-            self.labels,
-            self.array,
-            axis=-1,
-            size=5,
-            func=func,
-            engine=engine,
-            fill_value=0,
-        )
-
     @skip_for_params(numbagg_skip)
     @parameterize({"func": funcs, "expected_name": expected_names, "engine": engines})
     def peakmem_reduce(self, func, expected_name, engine):
@@ -92,13 +80,18 @@ def setup(self, *args, **kwargs):
         if "numbagg" in args:
             setup_jit()
 
-
-class ChunkReduce1DUnsorted(ChunkReduce):
-    def setup(self, *args, **kwargs):
-        self.array = np.ones((N,))
-        self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
-        self.axis = -1
-        setup_jit()
+    @parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
+    def time_reduce_bare(self, func, engine):
+        # TODO: migrate to the other test cases, but we'll have to setup labels
+        # appropriately ;(
+        flox.aggregations.generic_aggregate(
+            self.labels,
+            self.array,
+            axis=self.axis,
+            func=func,
+            engine=engine,
+            fill_value=0,
+        )
 
 
 class ChunkReduce2D(ChunkReduce):
@@ -109,14 +102,6 @@ def setup(self, *args, **kwargs):
         setup_jit()
 
 
-class ChunkReduce2DUnsorted(ChunkReduce):
-    def setup(self, *args, **kwargs):
-        self.array = np.ones((N, N))
-        self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
-        self.axis = -1
-        setup_jit()
-
-
 class ChunkReduce2DAllAxes(ChunkReduce):
     def setup(self, *args, **kwargs):
         self.array = np.ones((N, N))
@@ -125,9 +110,24 @@ def setup(self, *args, **kwargs):
         setup_jit()
 
 
-class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
-    def setup(self, *args, **kwargs):
-        self.array = np.ones((N, N))
-        self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
-        self.axis = None
-        setup_jit()
+# class ChunkReduce2DUnsorted(ChunkReduce):
+#     def setup(self, *args, **kwargs):
+#         self.array = np.ones((N, N))
+#         self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
+#         self.axis = -1
+#         setup_jit()
+
+# class ChunkReduce1DUnsorted(ChunkReduce):
+#     def setup(self, *args, **kwargs):
+#         self.array = np.ones((N,))
+#         self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
+#         self.axis = -1
+#         setup_jit()
+
+
+# class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
+#     def setup(self, *args, **kwargs):
+#         self.array = np.ones((N, N))
+#         self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
+#         self.axis = None
+#         setup_jit()
diff --git a/ci/benchmark.yml b/ci/benchmark.yml
new file mode 100644
index 000000000..e4987000b
--- /dev/null
+++ b/ci/benchmark.yml
@@ -0,0 +1,15 @@
+name: flox-bench
+channels:
+  - conda-forge
+dependencies:
+  - asv
+  - cachey
+  - dask-core
+  - numpy>=1.20
+  - mamba
+  - pip
+  - python=3.10
+  - xarray
+  - numpy_groupies>=0.9.19
+  - numbagg>=0.3
+  - wheel