From 59f9d99e24a5af9507c172db660309e7ad00a400 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Mon, 11 Dec 2017 17:44:08 +0900 Subject: [PATCH 1/6] Start adding benchmarking for basic operations. --- asv_bench/benchmarks/__init__.py | 9 ++++ asv_bench/benchmarks/basic_ops.py | 76 +++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 asv_bench/benchmarks/basic_ops.py diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py index 21ee86e28e3..a6917195afb 100644 --- a/asv_bench/benchmarks/__init__.py +++ b/asv_bench/benchmarks/__init__.py @@ -29,3 +29,12 @@ def randn(shape, frac_nan=None, chunks=None): x.flat[inds] = np.nan return x + + +def randint(low, high=None, size=None, frac_minus=None): + x = np.random.randint(low, high, size) + if frac_minus is not None: + inds = random.sample(range(x.size), int(x.size * frac_minus)) + x.flat[inds] = np.nan + + return x diff --git a/asv_bench/benchmarks/basic_ops.py b/asv_bench/benchmarks/basic_ops.py new file mode 100644 index 00000000000..fc8bef2172c --- /dev/null +++ b/asv_bench/benchmarks/basic_ops.py @@ -0,0 +1,76 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import pandas as pd + +try: + import dask + import dask.multiprocessing +except ImportError: + pass + +import xarray as xr + +from . import randn, randint, requires_dask + + +class Indexing(object): + def setup(self): + self.nx = 300 + self.ny = 200 + self.nt = 100 + var1 = randn((self.nx, self.ny), frac_nan=0.1) + var2 = randn((self.nx, self.nt)) + var3 = randn(self.nt) + self.x = np.arange(self.nx) + self.y = np.linspace(0, 1, self.ny) + self.t = pd.date_range('1970-01-01', periods=self.nt, freq='D') + self.x_coords = np.linspace(0, 1, self.nx) + self.ds = xr.Dataset({'var1': (('x', 'y'), var1), + 'var2': (('x', 't'), var2), + 'var3': (('t', ), var3)}, + coords={'x': self.x, 'y': self.y, 't': self.t, + 'x_coords': ('x', self.x_coords)}) + + self.outer_indexes = [ + (randint(0, self.nx, 400), ), + (randint(0, self.nx, 500), randint(0, self.ny, 400))] + + def time_outer_indexing(self): + for ind in self.outer_indexes: + ind_x = xr.DataArray(ind[-1], dims='y', + coords={'y': self.x[ind[0]]}) + self.ds['var1'][(ind_x,) + ind[1:]] + + def time_outer_assignment(self): + inds = self.outer_indexes() + for ind in inds: + self.ds['var1'][ind] = xr.DataArray(np.ones(400), dims='y') + + def time_vectorized_indexing(self): + inds = [(xr.DataArray(randint(0, self.nx, self.ny), dims=['y']), ), + (xr.DataArray(randint(0, self.nx, 500), dims=['a']), + xr.DataArray(randint(0, self.ny, 500), dims=['a'])), + (xr.DataArray(randint(0, self.ny, 500).reshape(25, 20)), + xr.DataArray(randint(0, self.ny, 500).reshape(25, 20)))] + for ind in inds: + self.ds['var1'][ind] + + def time_vectorized_indexing_coords(self): + ind = randint(0, self.nx, self.ny) + inds = [(xr.DataArray(ind, dims=['y'], coords={'y': self.y}), ), + (xr.DataArray(randint(0, self.nx, 500), dims=['a'], + coords={'a': np.linspace(0, 1, 500)}), + xr.DataArray(randint(0, self.ny, 500), dims=['a'], + coords={'a': np.linspace(0, 1, 500)})), + (xr.DataArray(randint(0, self.ny, 500).reshape(25, 20), + dims=['a', 'b'], + coords={'a': np.arange(25), 'b': np.arange(20)}), + xr.DataArray(randint(0, self.ny, 500).reshape(25, 20), + dims=['a', 'b'], + coords={'a': np.arange(25), 'b': np.arange(20)})) + ] + for ind in inds: + self.ds['var1'][ind] From 336e4887659ed4f9e751aa44ce1336b582dd40d0 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Mon, 22 Jan 2018 11:23:19 +0900 Subject: [PATCH 2/6] Update indexing.py --- asv_bench/benchmarks/indexing.py | 102 +++++++++++++------------------ 1 file changed, 44 insertions(+), 58 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index fc8bef2172c..a3272c9c26a 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -16,61 +16,47 @@ from . import randn, randint, requires_dask -class Indexing(object): - def setup(self): - self.nx = 300 - self.ny = 200 - self.nt = 100 - var1 = randn((self.nx, self.ny), frac_nan=0.1) - var2 = randn((self.nx, self.nt)) - var3 = randn(self.nt) - self.x = np.arange(self.nx) - self.y = np.linspace(0, 1, self.ny) - self.t = pd.date_range('1970-01-01', periods=self.nt, freq='D') - self.x_coords = np.linspace(0, 1, self.nx) - self.ds = xr.Dataset({'var1': (('x', 'y'), var1), - 'var2': (('x', 't'), var2), - 'var3': (('t', ), var3)}, - coords={'x': self.x, 'y': self.y, 't': self.t, - 'x_coords': ('x', self.x_coords)}) - - self.outer_indexes = [ - (randint(0, self.nx, 400), ), - (randint(0, self.nx, 500), randint(0, self.ny, 400))] - - def time_outer_indexing(self): - for ind in self.outer_indexes: - ind_x = xr.DataArray(ind[-1], dims='y', - coords={'y': self.x[ind[0]]}) - self.ds['var1'][(ind_x,) + ind[1:]] - - def time_outer_assignment(self): - inds = self.outer_indexes() - for ind in inds: - self.ds['var1'][ind] = xr.DataArray(np.ones(400), dims='y') - - def time_vectorized_indexing(self): - inds = [(xr.DataArray(randint(0, self.nx, self.ny), dims=['y']), ), - (xr.DataArray(randint(0, self.nx, 500), dims=['a']), - xr.DataArray(randint(0, self.ny, 500), dims=['a'])), - (xr.DataArray(randint(0, self.ny, 500).reshape(25, 20)), - xr.DataArray(randint(0, self.ny, 500).reshape(25, 20)))] - for ind in inds: - self.ds['var1'][ind] - - def time_vectorized_indexing_coords(self): - ind = randint(0, self.nx, self.ny) - inds = [(xr.DataArray(ind, dims=['y'], coords={'y': self.y}), ), - (xr.DataArray(randint(0, self.nx, 500), dims=['a'], - coords={'a': np.linspace(0, 1, 500)}), - xr.DataArray(randint(0, self.ny, 500), dims=['a'], - coords={'a': np.linspace(0, 1, 500)})), - (xr.DataArray(randint(0, self.ny, 500).reshape(25, 20), - dims=['a', 'b'], - coords={'a': np.arange(25), 'b': np.arange(20)}), - xr.DataArray(randint(0, self.ny, 500).reshape(25, 20), - dims=['a', 'b'], - coords={'a': np.arange(25), 'b': np.arange(20)})) - ] - for ind in inds: - self.ds['var1'][ind] +nx = 300 +ny = 200 +nt = 100 +ds = xr.Dataset({'var1': (('x', 'y'), randn((nx, ny), frac_nan=0.1)), + 'var2': (('x', 't'), randn((nx, nt))), + 'var3': (('t', ), randn(nt))}, + coords={'x': np.arange(nx), + 'y': np.linspace(0, 1, ny), + 't': pd.date_range('1970-01-01', periods=nt, freq='D'), + 'x_coords': ('x', np.linspace(1.1, 2.1, nx))}) + + +vectorized_indexes = [ + {'x': xr.DataArray(randint(0, nx, 400), dims='a')}, + {'x': xr.DataArray(randint(0, nx, 400), dims='a'), + 'y': xr.DataArray(randint(0, ny, 400), dims='a')}, + {'x': xr.DataArray(randint(0, nx, 400).reshape(4, 100), dims=['a', 'b']), + 'y': xr.DataArray(randint(0, ny, 400).reshape(4, 100), dims=['a', 'b']), + 't': xr.DataArray(randint(0, nt, 400).reshape(4, 100), dims=['a', 'b'])}, +] + + +def time_basic_indexing(index): + ds.isel(index) + + +time_basic_indexing.param_names = ['index'] +time_basic_indexing.params = [ + {'x': slice(0, 3)}, + {'x': 0, 'y': slice(0, None, 3)}, + {'x': slice(3, -3, 3), 'y': 1, 't': slice(None, -3, 3)}, +] + + +def time_outer_indexing(index): + ds.isel(index) + + +time_outer_indexing.param_names = ['index'] +time_outer_indexing.params = [ + {'x': randint(0, nx, 400)}, + {'x': randint(0, nx, 500), 'y': randint(0, ny, 400)}, + {'x': randint(0, nx, 100), 'y': 1, 't': randint(0, nt, 400)}, +] From dbe2864ade3afe7628d243e54d76ade1570e74bd Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 23 Jan 2018 09:25:38 +0900 Subject: [PATCH 3/6] Benchmark for indexing. --- .gitignore | 3 + asv_bench/benchmarks/indexing.py | 108 +++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index f53aca4faed..490eb49f9d4 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,9 @@ nosetests.xml .cache .ropeproject/ +# asv environments +.asv + # Translations *.mo diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index a3272c9c26a..eec89ba9ce2 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -4,21 +4,14 @@ import numpy as np import pandas as pd - -try: - import dask - import dask.multiprocessing -except ImportError: - pass - import xarray as xr -from . import randn, randint, requires_dask +from . import randn, randint -nx = 300 -ny = 200 -nt = 100 +nx = 3000 +ny = 2000 +nt = 1000 ds = xr.Dataset({'var1': (('x', 'y'), randn((nx, ny), frac_nan=0.1)), 'var2': (('x', 't'), randn((nx, nt))), 'var3': (('t', ), randn(nt))}, @@ -27,6 +20,53 @@ 't': pd.date_range('1970-01-01', periods=nt, freq='D'), 'x_coords': ('x', np.linspace(1.1, 2.1, nx))}) +basic_indexes = [ + {'x': slice(0, 3)}, + {'x': 0, 'y': slice(None, None, 3)}, + {'x': slice(3, -3, 3), 'y': 1, 't': slice(None, -3, 3)} +] + +basic_assignment_values = [ + xr.DataArray(randn((3, ny), frac_nan=0.1), dims=['x', 'y']), + xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), dims=['y']), + xr.DataArray(randn(int((nx - 6) / 3), frac_nan=0.1), dims=['x']), +] + + +def time_indexing_basic(): + for ind in basic_indexes: + ds.isel(**ind) + + +def time_assignment_basic(): + tmp = ds.copy(deep=True) + for ind, val in zip(basic_indexes, basic_assignment_values): + tmp['var1'][ind.get('x', slice(None)), ind.get('y', slice(None))] = val + + +outer_indexes = [ + {'x': randint(0, nx, 400)}, + {'x': randint(0, nx, 500), 'y': randint(0, ny, 400)}, + {'x': randint(0, nx, 100), 'y': 1, 't': randint(0, nt, 400)}, +] + +outer_assignment_values = [ + xr.DataArray(randn((400, ny), frac_nan=0.1), dims=['x', 'y']), + xr.DataArray(randn((500, 400), frac_nan=0.1), dims=['x', 'y']), + xr.DataArray(randn(100, frac_nan=0.1), dims=['x']), +] + + +def time_indexing_outer(): + for ind in outer_indexes: + ds.isel(**ind) + + +def time_assignment_outer(): + tmp = ds.copy(deep=True) + for ind, val in zip(outer_indexes, outer_assignment_values): + tmp['var1'][ind.get('x', slice(None)), ind.get('y', slice(None))] = val + vectorized_indexes = [ {'x': xr.DataArray(randint(0, nx, 400), dims='a')}, @@ -37,26 +77,40 @@ 't': xr.DataArray(randint(0, nt, 400).reshape(4, 100), dims=['a', 'b'])}, ] +vectorized_assignment_values = [ + xr.DataArray(randn((400, 2000)), dims=['a', 'y'], + coords={'a': randn(400)}), + xr.DataArray(randn(400), dims=['a', ], coords={'a': randn(400)}), + xr.DataArray(randn((4, 100)), dims=['a', 'b'], + coords={'a': randn(4), 'b': randn(100)}), +] -def time_basic_indexing(index): - ds.isel(index) +def time_indexing_vectorized(): + for ind in vectorized_indexes: + ds.isel(**ind) -time_basic_indexing.param_names = ['index'] -time_basic_indexing.params = [ - {'x': slice(0, 3)}, - {'x': 0, 'y': slice(0, None, 3)}, - {'x': slice(3, -3, 3), 'y': 1, 't': slice(None, -3, 3)}, -] +def time_assignment_vectorized(): + tmp = ds.copy(deep=True) + for ind, val in zip(vectorized_indexes, vectorized_assignment_values): + tmp['var1'][ind.get('x', slice(None)), ind.get('y', slice(None))] = val -def time_outer_indexing(index): - ds.isel(index) +try: + ds_dask = ds.chunk({'x': 100, 'y': 50, 't': 50}) -time_outer_indexing.param_names = ['index'] -time_outer_indexing.params = [ - {'x': randint(0, nx, 400)}, - {'x': randint(0, nx, 500), 'y': randint(0, ny, 400)}, - {'x': randint(0, nx, 100), 'y': 1, 't': randint(0, nt, 400)}, -] + def time_indexing_basic_dask(): + for ind in basic_indexes: + ds_dask.isel(**ind) + + def time_indexing_outer_dask(): + for ind in outer_indexes: + ds_dask.isel(**ind) + + def time_indexing_vectorized_dask(): + for ind in vectorized_indexes: + ds_dask.isel(**ind) + +except ImportError: + pass From c03b0024035d34d1eb0f7bd6ea3b63220dc125d5 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 23 Jan 2018 10:30:36 +0900 Subject: [PATCH 4/6] Make it a class. --- asv_bench/benchmarks/indexing.py | 206 +++++++++++++++++-------------- 1 file changed, 112 insertions(+), 94 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index eec89ba9ce2..55d5f83816e 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,6 +1,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from functools import partial import numpy as np import pandas as pd @@ -12,105 +13,122 @@ nx = 3000 ny = 2000 nt = 1000 -ds = xr.Dataset({'var1': (('x', 'y'), randn((nx, ny), frac_nan=0.1)), - 'var2': (('x', 't'), randn((nx, nt))), - 'var3': (('t', ), randn(nt))}, - coords={'x': np.arange(nx), - 'y': np.linspace(0, 1, ny), - 't': pd.date_range('1970-01-01', periods=nt, freq='D'), - 'x_coords': ('x', np.linspace(1.1, 2.1, nx))}) -basic_indexes = [ - {'x': slice(0, 3)}, - {'x': 0, 'y': slice(None, None, 3)}, - {'x': slice(3, -3, 3), 'y': 1, 't': slice(None, -3, 3)} -] - -basic_assignment_values = [ - xr.DataArray(randn((3, ny), frac_nan=0.1), dims=['x', 'y']), - xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), dims=['y']), - xr.DataArray(randn(int((nx - 6) / 3), frac_nan=0.1), dims=['x']), -] - - -def time_indexing_basic(): - for ind in basic_indexes: - ds.isel(**ind) - - -def time_assignment_basic(): - tmp = ds.copy(deep=True) - for ind, val in zip(basic_indexes, basic_assignment_values): - tmp['var1'][ind.get('x', slice(None)), ind.get('y', slice(None))] = val - - -outer_indexes = [ - {'x': randint(0, nx, 400)}, - {'x': randint(0, nx, 500), 'y': randint(0, ny, 400)}, - {'x': randint(0, nx, 100), 'y': 1, 't': randint(0, nt, 400)}, -] - -outer_assignment_values = [ - xr.DataArray(randn((400, ny), frac_nan=0.1), dims=['x', 'y']), - xr.DataArray(randn((500, 400), frac_nan=0.1), dims=['x', 'y']), - xr.DataArray(randn(100, frac_nan=0.1), dims=['x']), -] - - -def time_indexing_outer(): - for ind in outer_indexes: - ds.isel(**ind) - - -def time_assignment_outer(): - tmp = ds.copy(deep=True) - for ind, val in zip(outer_indexes, outer_assignment_values): - tmp['var1'][ind.get('x', slice(None)), ind.get('y', slice(None))] = val - - -vectorized_indexes = [ - {'x': xr.DataArray(randint(0, nx, 400), dims='a')}, - {'x': xr.DataArray(randint(0, nx, 400), dims='a'), - 'y': xr.DataArray(randint(0, ny, 400), dims='a')}, - {'x': xr.DataArray(randint(0, nx, 400).reshape(4, 100), dims=['a', 'b']), - 'y': xr.DataArray(randint(0, ny, 400).reshape(4, 100), dims=['a', 'b']), - 't': xr.DataArray(randint(0, nt, 400).reshape(4, 100), dims=['a', 'b'])}, -] - -vectorized_assignment_values = [ - xr.DataArray(randn((400, 2000)), dims=['a', 'y'], - coords={'a': randn(400)}), - xr.DataArray(randn(400), dims=['a', ], coords={'a': randn(400)}), - xr.DataArray(randn((4, 100)), dims=['a', 'b'], - coords={'a': randn(4), 'b': randn(100)}), -] - - -def time_indexing_vectorized(): - for ind in vectorized_indexes: - ds.isel(**ind) - - -def time_assignment_vectorized(): - tmp = ds.copy(deep=True) - for ind, val in zip(vectorized_indexes, vectorized_assignment_values): - tmp['var1'][ind.get('x', slice(None)), ind.get('y', slice(None))] = val +basic_indexes = { + '1slice': {'x': slice(0, 3)}, + '1slice-1scalar': {'x': 0, 'y': slice(None, None, 3)}, + '2slicess-1scalar': {'x': slice(3, -3, 3), 'y': 1, 't': slice(None, -3, 3)} +} + +basic_assignment_values = { + '1slice': xr.DataArray(randn((3, ny), frac_nan=0.1), dims=['x', 'y']), + '1slice-1scalar': xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), + dims=['y']), + '2slicess-1scalar': xr.DataArray(randn(int((nx - 6) / 3), frac_nan=0.1), + dims=['x']) +} + +outer_indexes = { + '1d': {'x': randint(0, nx, 400)}, + '2d': {'x': randint(0, nx, 500), 'y': randint(0, ny, 400)}, + '2d-1scalar': {'x': randint(0, nx, 100), 'y': 1, 't': randint(0, nt, 400)} +} + +outer_assignment_values = { + '1d': xr.DataArray(randn((400, ny), frac_nan=0.1), dims=['x', 'y']), + '2d': xr.DataArray(randn((500, 400), frac_nan=0.1), dims=['x', 'y']), + '2d-1scalar': xr.DataArray(randn(100, frac_nan=0.1), dims=['x']) +} + +vectorized_indexes = { + '1-1d': {'x': xr.DataArray(randint(0, nx, 400), dims='a')}, + '2-1d': {'x': xr.DataArray(randint(0, nx, 400), dims='a'), + 'y': xr.DataArray(randint(0, ny, 400), dims='a')}, + '3-2d': {'x': xr.DataArray(randint(0, nx, 400).reshape(4, 100), + dims=['a', 'b']), + 'y': xr.DataArray(randint(0, ny, 400).reshape(4, 100), + dims=['a', 'b']), + 't': xr.DataArray(randint(0, nt, 400).reshape(4, 100), + dims=['a', 'b'])}, +} + +vectorized_assignment_values = { + '1-1d': xr.DataArray(randn((400, 2000)), dims=['a', 'y'], + coords={'a': randn(400)}), + '2-1d': xr.DataArray(randn(400), dims=['a', ], coords={'a': randn(400)}), + '3-2d': xr.DataArray(randn((4, 100)), dims=['a', 'b'], + coords={'a': randn(4), 'b': randn(100)}) +} + + +class Base(object): + def setup(self, key): + self.ds = xr.Dataset( + {'var1': (('x', 'y'), randn((nx, ny), frac_nan=0.1)), + 'var2': (('x', 't'), randn((nx, nt))), + 'var3': (('t', ), randn(nt))}, + coords={'x': np.arange(nx), + 'y': np.linspace(0, 1, ny), + 't': pd.date_range('1970-01-01', periods=nt, freq='D'), + 'x_coords': ('x', np.linspace(1.1, 2.1, nx))}) + + +class Indexing(Base): + def time_indexing_basic(self, key): + self.ds.isel(**basic_indexes[key]).load() + + time_indexing_basic.param_names = ['key'] + time_indexing_basic.params = [list(basic_indexes.keys())] + + def time_indexing_outer(self, key): + self.ds.isel(**outer_indexes[key]).load() + + time_indexing_outer.param_names = ['key'] + time_indexing_outer.params = [list(outer_indexes.keys())] + + def time_indexing_vectorized(self, key): + self.ds.isel(**vectorized_indexes[key]).load() + + time_indexing_vectorized.param_names = ['key'] + time_indexing_vectorized.params = [list(vectorized_indexes.keys())] + + +class Assignment(Base): + def time_assignment_basic(self, key): + ind = basic_indexes[key] + val = basic_assignment_values[key] + self.ds['var1'][ind.get('x', slice(None)), + ind.get('y', slice(None))] = val + + time_assignment_basic.param_names = ['key'] + time_assignment_basic.params = [list(basic_indexes.keys())] + + def time_assignment_outer(self, key): + ind = outer_indexes[key] + val = outer_assignment_values[key] + self.ds['var1'][ind.get('x', slice(None)), + ind.get('y', slice(None))] = val + + time_assignment_outer.param_names = ['key'] + time_assignment_outer.params = [list(outer_indexes.keys())] + + def time_assignment_vectorized(self, key): + ind = vectorized_indexes[key] + val = vectorized_assignment_values[key] + self.ds['var1'][ind.get('x', slice(None)), + ind.get('y', slice(None))] = val + + time_assignment_vectorized.param_names = ['key'] + time_assignment_vectorized.params = [list(vectorized_indexes.keys())] try: - ds_dask = ds.chunk({'x': 100, 'y': 50, 't': 50}) - - def time_indexing_basic_dask(): - for ind in basic_indexes: - ds_dask.isel(**ind) - - def time_indexing_outer_dask(): - for ind in outer_indexes: - ds_dask.isel(**ind) + import dask - def time_indexing_vectorized_dask(): - for ind in vectorized_indexes: - ds_dask.isel(**ind) + class IndexingDask(Indexing): + def setUp(self): + super(IndexingDask, self).setup(self) + self.ds = self.ds.chunk({'x': 100, 'y': 50, 't': 50}) except ImportError: pass From 755f507d608ab3b7afb1f1e939b73a643db2683f Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 23 Jan 2018 10:37:36 +0900 Subject: [PATCH 5/6] Add key to setup --- asv_bench/benchmarks/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 55d5f83816e..87e62e12038 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -126,7 +126,7 @@ def time_assignment_vectorized(self, key): import dask class IndexingDask(Indexing): - def setUp(self): + def setUp(self, key): super(IndexingDask, self).setup(self) self.ds = self.ds.chunk({'x': 100, 'y': 50, 't': 50}) From 57da6e5c3da679ca7528ec29b570122d20e0727e Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 23 Jan 2018 12:04:06 +0900 Subject: [PATCH 6/6] Use seed in randn and randint. Fix bug in dask benchmark --- asv_bench/benchmarks/__init__.py | 20 +++++++++++--------- asv_bench/benchmarks/indexing.py | 18 ++++++------------ 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py index a6917195afb..e2f49e6ab48 100644 --- a/asv_bench/benchmarks/__init__.py +++ b/asv_bench/benchmarks/__init__.py @@ -6,7 +6,6 @@ import numpy as np -np.random.seed(10) _counter = itertools.count() @@ -17,24 +16,27 @@ def requires_dask(): raise NotImplementedError -def randn(shape, frac_nan=None, chunks=None): +def randn(shape, frac_nan=None, chunks=None, seed=0): + rng = np.random.RandomState(seed) if chunks is None: - x = np.random.standard_normal(shape) + x = rng.standard_normal(shape) else: import dask.array as da - x = da.random.standard_normal(shape, chunks=chunks) + rng = da.random.RandomState(seed) + x = rng.standard_normal(shape, chunks=chunks) if frac_nan is not None: - inds = random.sample(range(x.size), int(x.size * frac_nan)) + inds = rng.choice(range(x.size), int(x.size * frac_nan)) x.flat[inds] = np.nan return x -def randint(low, high=None, size=None, frac_minus=None): - x = np.random.randint(low, high, size) +def randint(low, high=None, size=None, frac_minus=None, seed=0): + rng = np.random.RandomState(seed) + x = rng.randint(low, high, size) if frac_minus is not None: - inds = random.sample(range(x.size), int(x.size * frac_minus)) - x.flat[inds] = np.nan + inds = rng.choice(range(x.size), int(x.size * frac_minus)) + x.flat[inds] = -1 return x diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 87e62e12038..e9a85115a49 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,13 +1,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from functools import partial import numpy as np import pandas as pd import xarray as xr -from . import randn, randint +from . import randn, randint, requires_dask nx = 3000 @@ -122,13 +121,8 @@ def time_assignment_vectorized(self, key): time_assignment_vectorized.params = [list(vectorized_indexes.keys())] -try: - import dask - - class IndexingDask(Indexing): - def setUp(self, key): - super(IndexingDask, self).setup(self) - self.ds = self.ds.chunk({'x': 100, 'y': 50, 't': 50}) - -except ImportError: - pass +class IndexingDask(Indexing): + def setup(self, key): + requires_dask() + super(IndexingDask, self).setup(key) + self.ds = self.ds.chunk({'x': 100, 'y': 50, 't': 50})