Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add windows CI #151

Merged
merged 33 commits into from
Nov 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f9c3b0c
Add windows CI
Illviljan Sep 23, 2022
303149e
Update ci.yaml
Illviljan Sep 23, 2022
66561ba
Update ci.yaml
Illviljan Sep 23, 2022
78bced3
Make arg input the same as shown in pytest
Illviljan Sep 24, 2022
1d93178
Add dtype check
Illviljan Sep 24, 2022
d13e475
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 24, 2022
11df4b4
have expected and actual results on the same side
Illviljan Sep 24, 2022
be224b8
use np.intp for count expected
Illviljan Sep 24, 2022
11af758
Merge remote-tracking branch 'upstream/main' into add_windows_ci
Illviljan Oct 9, 2022
c7d809f
Merge remote-tracking branch 'upstream/main' into add_windows_ci
Illviljan Oct 11, 2022
f993b31
[revert] minimize test
dcherian Oct 11, 2022
7559cce
specify dtypes
dcherian Oct 11, 2022
411db8a
more fixers
dcherian Oct 11, 2022
a9c7cae
more.
dcherian Oct 11, 2022
bed84c2
Fix groupby_reduce
dcherian Oct 12, 2022
232cf15
[revert] only wiindows tests
dcherian Oct 12, 2022
7ecb267
more fixes?
dcherian Oct 12, 2022
0df2daf
more fixes.
dcherian Oct 12, 2022
3a86969
more fix
dcherian Oct 12, 2022
f63e3d3
Last fix?
dcherian Oct 12, 2022
09c1297
Update .github/workflows/ci.yaml
dcherian Oct 12, 2022
b0e156d
Merge remote-tracking branch 'upstream/main' into add_windows_ci
Illviljan Oct 14, 2022
5db76c8
Merge remote-tracking branch 'upstream/main' into add_windows_ci
Illviljan Nov 2, 2022
3b79f6e
revert
dcherian Nov 4, 2022
5cd7bcb
Better fix
dcherian Nov 5, 2022
38438a2
Revert "revert"
dcherian Nov 5, 2022
d60a965
better comment.
dcherian Nov 5, 2022
0c733c4
clean up test
dcherian Nov 5, 2022
d5eb5d9
Revert "Revert "revert""
dcherian Nov 5, 2022
f287ed6
xfail labels dtype test
dcherian Nov 5, 2022
e842829
Revert "[revert] only wiindows tests"
dcherian Nov 5, 2022
85475ca
Revert "[revert] minimize test"
dcherian Nov 5, 2022
8867eb9
fix bad revert
dcherian Nov 5, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
os: ["ubuntu-latest", "windows-latest"]
python-version: ["3.8", "3.10"]
steps:
- uses: actions/checkout@v3
Expand All @@ -43,8 +43,7 @@ jobs:
python="${{ matrix.python-version }}"
- name: Install flox
run: |
python -m pip install -e .
conda list
python -m pip install --no-deps -e .
- name: Run Tests
run: |
pytest -n auto --cov=./ --cov-report=xml
Expand Down
5 changes: 4 additions & 1 deletion flox/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,10 @@ def dask_groupby_agg(
aggregate=partial(aggregate, expected_groups=index, reindex=True),
)
)
groups_.append(cohort)
# This is done because pandas promotes to 64-bit types when an Index is created
# So we use the index to generate the return value for consistency with "map-reduce"
# This is important on windows
groups_.append(index.values)

reduced = dask.array.concatenate(reduced_, axis=-1)
groups = (np.concatenate(groups_),)
Expand Down
87 changes: 48 additions & 39 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ def test_groupby_reduce(
by = da.from_array(by, chunks=(3,) if by.ndim == 1 else (1, 3))

if func == "mean" or func == "nanmean":
expected_result = np.array(expected, dtype=float)
expected_result = np.array(expected, dtype=np.float64)
elif func == "sum":
expected_result = np.array(expected, dtype=dtype)
elif func == "count":
expected_result = np.array(expected, dtype=int)
expected_result = np.array(expected, dtype=np.int64)

result, groups, = groupby_reduce(
array,
Expand All @@ -149,7 +149,9 @@ def test_groupby_reduce(
fill_value=123,
engine=engine,
)
g_dtype = by.dtype if expected_groups is None else np.asarray(expected_groups).dtype
# we use pd.Index(expected_groups).to_numpy() which is always int64
# for the values in this tests
g_dtype = by.dtype if expected_groups is None else np.int64

assert_equal(groups, np.array([0, 1, 2], g_dtype))
assert_equal(expected_result, result)
Expand Down Expand Up @@ -274,7 +276,7 @@ def test_groupby_reduce_count():
array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1])
labels = np.array(["a", "b", "b", "b", "c", "c", "c"])
result, _ = groupby_reduce(array, labels, func="count")
assert_equal(result, [1, 1, 2])
assert_equal(result, np.array([1, 1, 2], dtype=np.int64))


def test_func_is_aggregation():
Expand Down Expand Up @@ -383,53 +385,52 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp
kwargs["expected_groups"] = [0, 2, 1]
with raise_if_dask_computes():
actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False)
assert_equal(groups, [0, 2, 1])
assert_equal(groups, np.array([0, 2, 1], dtype=np.intp))
assert_equal(expected, actual[..., [0, 2, 1]])

kwargs["expected_groups"] = [0, 2, 1]
with raise_if_dask_computes():
actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True)
assert_equal(groups, [0, 1, 2])
assert_equal(groups, np.array([0, 1, 2], np.intp))
assert_equal(expected, actual)


def test_numpy_reduce_axis_subset(engine):
# TODO: add NaNs
by = labels2d
array = np.ones_like(by)
array = np.ones_like(by, dtype=np.int64)
kwargs = dict(func="count", engine=engine, fill_value=0)
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
assert_equal(result, [[2, 3], [2, 3]])
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))

by = np.broadcast_to(labels2d, (3, *labels2d.shape))
array = np.ones_like(by)
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]])
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
assert_equal(result, expected)

result, _ = groupby_reduce(array, by, **kwargs, axis=2)
subarr = np.array([[2, 3], [2, 3]])
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
assert_equal(result, expected)

result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2))
expected = np.array([[4, 6], [4, 6], [4, 6]])
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
assert_equal(result, expected)

result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1))
assert_equal(result, expected)

result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2))
expected = np.array([[4, 6], [4, 6], [4, 6]])
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
assert_equal(result, expected)


@requires_dask
def test_dask_reduce_axis_subset():

by = labels2d
array = np.ones_like(by)
array = np.ones_like(by, dtype=np.int64)
with raise_if_dask_computes():
result, _ = groupby_reduce(
da.from_array(array, chunks=(2, 3)),
Expand All @@ -438,11 +439,11 @@ def test_dask_reduce_axis_subset():
axis=1,
expected_groups=[0, 2],
)
assert_equal(result, [[2, 3], [2, 3]])
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))

by = np.broadcast_to(labels2d, (3, *labels2d.shape))
array = np.ones_like(by)
subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]])
subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
with raise_if_dask_computes():
result, _ = groupby_reduce(
Expand All @@ -455,7 +456,7 @@ def test_dask_reduce_axis_subset():
)
assert_equal(result, expected)

subarr = np.array([[2, 3], [2, 3]])
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
with raise_if_dask_computes():
result, _ = groupby_reduce(
Expand Down Expand Up @@ -663,7 +664,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
engine=engine,
method=method,
)
expected = np.array([3, 1, 0])
expected = np.array([3, 1, 0], dtype=np.int64)
for left, right in zip(groups, pd.IntervalIndex.from_arrays([1, 2, 4], [2, 4, 5]).to_numpy()):
assert left == right
assert_equal(actual, expected)
Expand Down Expand Up @@ -780,15 +781,23 @@ def test_dtype_preservation(dtype, func, engine):


@requires_dask
@pytest.mark.parametrize("method", ["split-reduce", "map-reduce", "cohorts"])
def test_cohorts(method):
repeats = [4, 4, 12, 2, 3, 4]
labels = np.repeat(np.arange(6), repeats)
array = dask.array.from_array(labels, chunks=(4, 8, 4, 9, 4))
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
@pytest.mark.parametrize(
"labels_dtype", [pytest.param(np.int32, marks=pytest.mark.xfail), np.int64]
)
@pytest.mark.parametrize("method", ["map-reduce", "cohorts"])
def test_cohorts_map_reduce_consistent_dtypes(method, dtype, labels_dtype):
repeats = np.array([4, 4, 12, 2, 3, 4], dtype=np.int32)
labels = np.repeat(np.arange(6, dtype=labels_dtype), repeats)
array = dask.array.from_array(labels.astype(dtype), chunks=(4, 8, 4, 9, 4))

actual, actual_groups = groupby_reduce(array, labels, func="count", method=method)
assert_equal(actual_groups, np.arange(6))
assert_equal(actual, repeats)
assert_equal(actual_groups, np.arange(6, dtype=labels.dtype))
assert_equal(actual, repeats.astype(np.int64))

actual, actual_groups = groupby_reduce(array, labels, func="sum", method=method)
assert_equal(actual_groups, np.arange(6, dtype=labels.dtype))
assert_equal(actual, np.array([0, 4, 24, 6, 12, 20], dtype))


@requires_dask
Expand All @@ -800,7 +809,7 @@ def test_cohorts_nd_by(func, method, axis, engine):
o2 = dask.array.ones((2, 3), chunks=-1)

array = dask.array.block([[o, 2 * o], [3 * o2, 4 * o2]])
by = array.compute().astype(int)
by = array.compute().astype(np.int64)
by[0, 1] = 30
by[2, 1] = 40
by[0, 4] = 31
Expand All @@ -825,9 +834,9 @@ def test_cohorts_nd_by(func, method, axis, engine):

actual, groups = groupby_reduce(array, by, sort=False, **kwargs)
if method == "map-reduce":
assert_equal(groups, [1, 30, 2, 31, 3, 4, 40])
assert_equal(groups, np.array([1, 30, 2, 31, 3, 4, 40], dtype=np.int64))
else:
assert_equal(groups, [1, 30, 2, 31, 3, 40, 4])
assert_equal(groups, np.array([1, 30, 2, 31, 3, 40, 4], dtype=np.int64))
reindexed = reindex_(actual, groups, pd.Index(sorted_groups))
assert_equal(reindexed, expected)

Expand Down Expand Up @@ -950,7 +959,7 @@ def test_factorize_values_outside_bins():
fastpath=True,
)
actual = vals[0]
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]])
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]], np.int64)
assert_equal(expected, actual)


Expand All @@ -967,7 +976,7 @@ def test_multiple_groupers() -> None:
reindex=True,
func="count",
)
expected = np.eye(5, 5, dtype=int)
expected = np.eye(5, 5, dtype=np.int64)
assert_equal(expected, actual)


Expand All @@ -979,38 +988,38 @@ def test_factorize_reindex_sorting_strings():
)

expected = factorize_(**kwargs, reindex=True, sort=True)[0]
assert_equal(expected, [0, 1, 4, 2])
assert_equal(expected, np.array([0, 1, 4, 2], dtype=np.int64))

expected = factorize_(**kwargs, reindex=True, sort=False)[0]
assert_equal(expected, [0, 3, 4, 1])
assert_equal(expected, np.array([0, 3, 4, 1], dtype=np.int64))

expected = factorize_(**kwargs, reindex=False, sort=False)[0]
assert_equal(expected, [0, 1, 2, 3])
assert_equal(expected, np.array([0, 1, 2, 3], dtype=np.int64))

expected = factorize_(**kwargs, reindex=False, sort=True)[0]
assert_equal(expected, [0, 1, 3, 2])
assert_equal(expected, np.array([0, 1, 3, 2], dtype=np.int64))


def test_factorize_reindex_sorting_ints():
kwargs = dict(
by=(np.array([-10, 1, 10, 2, 3, 5]),),
axis=-1,
expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),),
)

expected = factorize_(**kwargs, reindex=True, sort=True)[0]
assert_equal(expected, [6, 1, 6, 2, 3, 5])
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

expected = factorize_(**kwargs, reindex=True, sort=False)[0]
assert_equal(expected, [6, 1, 6, 2, 3, 5])
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

kwargs["expected_groups"] = (np.arange(5, -1, -1),)

expected = factorize_(**kwargs, reindex=True, sort=True)[0]
assert_equal(expected, [6, 1, 6, 2, 3, 5])
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

expected = factorize_(**kwargs, reindex=True, sort=False)[0]
assert_equal(expected, [6, 4, 6, 3, 2, 0])
assert_equal(expected, np.array([6, 4, 6, 3, 2, 0], dtype=np.int64))


@requires_dask
Expand Down