From 8c2de998f92f8add315e67bfaa1d331d8e21f011 Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Fri, 23 Feb 2024 01:11:54 +0000 Subject: [PATCH 1/4] transpose matmul categorical bit reproducibility --- src/tabmat/ext/cat_split_helpers-tmpl.cpp | 26 ++++++++++++++--------- tests/test_reproducibility.py | 24 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 tests/test_reproducibility.py diff --git a/src/tabmat/ext/cat_split_helpers-tmpl.cpp b/src/tabmat/ext/cat_split_helpers-tmpl.cpp index c40f851c..0ef80be6 100644 --- a/src/tabmat/ext/cat_split_helpers-tmpl.cpp +++ b/src/tabmat/ext/cat_split_helpers-tmpl.cpp @@ -1,5 +1,5 @@ #include - +#include <%def name="transpose_matvec(dropfirst)"> template @@ -10,24 +10,30 @@ void _transpose_matvec_${dropfirst}( F* res, Int res_size ) { - #pragma omp parallel + int num_threads = omp_get_max_threads(); + std::vector all_res(num_threads * res_size, 0.0); + #pragma omp parallel shared(all_res) { - std::vector restemp(res_size, 0.0); - #pragma omp for + int tid = omp_get_thread_num(); + F* res_slice = &all_res[tid * res_size]; + #pragma omp for for (Py_ssize_t i = 0; i < n_rows; i++) { % if dropfirst == 'all_rows_drop_first': Py_ssize_t col_idx = indices[i] - 1; if (col_idx != -1) { - restemp[col_idx] += other[i]; + res_slice[col_idx] += other[i]; } % else: - restemp[indices[i]] += other[i]; + res_slice[indices[i]] += other[i]; % endif } - for (Py_ssize_t i = 0; i < res_size; i++) { - # pragma omp atomic - res[i] += restemp[i]; - } + #pragma omp for + for (Py_ssize_t i = 0; i < res_size; ++i) { + for (int tid = 0; tid < num_threads; ++tid) { + #pragma omp atomic + res[i] += all_res[tid * res_size + i]; + } + } } } diff --git a/tests/test_reproducibility.py b/tests/test_reproducibility.py new file mode 100644 index 00000000..4dc06ee0 --- /dev/null +++ b/tests/test_reproducibility.py @@ -0,0 +1,24 @@ +import numpy as np +import pandas as pd +import pytest + +import tabmat as tm + +N = 100 +K = 5 + + +@pytest.fixture +def df(): + rng = np.random.default_rng(1234) + return pd.DataFrame( + pd.Categorical(rng.integers(low=0, high=K - 1, size=N), categories=range(K)) + ) + + +@pytest.mark.parametrize("cat_threshold", [K, K + 1]) +def test_mat_transpose_vec(df, cat_threshold): + rng = np.random.default_rng(1234) + vec = rng.normal(size=N) + mat = tm.from_pandas(df, cat_threshold=cat_threshold) + np.testing.assert_equal(mat.transpose_matvec(vec), mat.transpose_matvec(vec)) From d4d6a17c580d92bcbdb8406556190a574291ef60 Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Fri, 23 Feb 2024 10:53:27 +0000 Subject: [PATCH 2/4] Remove reproducibility test --- tests/test_reproducibility.py | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 tests/test_reproducibility.py diff --git a/tests/test_reproducibility.py b/tests/test_reproducibility.py deleted file mode 100644 index 4dc06ee0..00000000 --- a/tests/test_reproducibility.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np -import pandas as pd -import pytest - -import tabmat as tm - -N = 100 -K = 5 - - -@pytest.fixture -def df(): - rng = np.random.default_rng(1234) - return pd.DataFrame( - pd.Categorical(rng.integers(low=0, high=K - 1, size=N), categories=range(K)) - ) - - -@pytest.mark.parametrize("cat_threshold", [K, K + 1]) -def test_mat_transpose_vec(df, cat_threshold): - rng = np.random.default_rng(1234) - vec = rng.normal(size=N) - mat = tm.from_pandas(df, cat_threshold=cat_threshold) - np.testing.assert_equal(mat.transpose_matvec(vec), mat.transpose_matvec(vec)) From e3df4cb601a4a975eb31284cd1a413f0045f494c Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Fri, 23 Feb 2024 11:03:43 +0000 Subject: [PATCH 3/4] Drop redundant atomic --- src/tabmat/ext/cat_split_helpers-tmpl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tabmat/ext/cat_split_helpers-tmpl.cpp b/src/tabmat/ext/cat_split_helpers-tmpl.cpp index 0ef80be6..d70960b5 100644 --- a/src/tabmat/ext/cat_split_helpers-tmpl.cpp +++ b/src/tabmat/ext/cat_split_helpers-tmpl.cpp @@ -30,7 +30,6 @@ void _transpose_matvec_${dropfirst}( #pragma omp for for (Py_ssize_t i = 0; i < res_size; ++i) { for (int tid = 0; tid < num_threads; ++tid) { - #pragma omp atomic res[i] += all_res[tid * res_size + i]; } } From 4a0c4ad1d4c772f3295934b8e2d64fc0bc476b4e Mon Sep 17 00:00:00 2001 From: Marc-Antoine Schmidt Date: Wed, 28 Feb 2024 17:07:56 -0500 Subject: [PATCH 4/4] changelog --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3d6e251c..048b9190 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,10 +14,10 @@ Unreleased - Added cython compiler directive legacy_implicit_noexcept = True to fix performance regression with cython 3. - **Other changes:** - Refactored the pre-commit hooks to use ruff. +- Refactored CategoricalMatrix's transpose_matvec to be deterministic when using OpenMP. 3.1.13 - 2023-10-17 -------------------