Skip to content

Commit

Permalink
Enable third party library integration tests in CI with cudf.pandas (
Browse files Browse the repository at this point in the history
…#17936)

This PR enables 3rd party library integration tests that are run with `cudf.pandas` enabled.

Fixes: rapidsai/cuml#6301

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Mike Sarahan (https://github.com/msarahan)
  - Matthew Murray (https://github.com/Matt711)

URL: #17936
  • Loading branch information
galipremsagar authored Feb 10, 2025
1 parent 428dc18 commit 218d67d
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 12 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
- pandas-tests
- pandas-tests-diff
- telemetry-setup
- third-party-integration-tests-cudf-pandas
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: always()
Expand Down Expand Up @@ -321,6 +322,19 @@ jobs:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
third-party-integration-tests-cudf-pandas:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@nvks-runners
with:
build_type: pull-request
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-l4-latest-1"
container_image: "rapidsai/ci-conda:latest"
run_script: |
ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
pandas-tests:
# run the Pandas unit tests using PR branch
needs: [wheel-build-cudf, changed-files]
Expand Down
32 changes: 24 additions & 8 deletions ci/cudf_pandas_scripts/third-party-integration/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,38 @@ main() {
LIBS=${LIBS#[}
LIBS=${LIBS%]}

if [ "$RAPIDS_BUILD_TYPE" == "pull-request" ]; then
rapids-logger "Downloading artifacts from this pr jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
fi

ANY_FAILURES=0

for lib in ${LIBS//,/ }; do
lib=$(echo "$lib" | tr -d '""')
echo "Running tests for library $lib"

CUDA_VERSION=$(if [ "$lib" = "tensorflow" ]; then echo "11.8"; else echo "${RAPIDS_CUDA_VERSION%.*}"; fi)

. /opt/conda/etc/profile.d/conda.sh

rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--config "$dependencies_yaml" \
--output conda \
--file-key "test_${lib}" \
--matrix "cuda=${CUDA_VERSION};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
# Check the value of RAPIDS_BUILD_TYPE
if [ "$RAPIDS_BUILD_TYPE" == "pull-request" ]; then
rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--config "$dependencies_yaml" \
--output conda \
--file-key "test_${lib}" \
--matrix "cuda=${CUDA_VERSION};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
--prepend-channel "${CPP_CHANNEL}" \
--prepend-channel "${PYTHON_CHANNEL}" | tee env.yaml
else
rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--config "$dependencies_yaml" \
--output conda \
--file-key "test_${lib}" \
--matrix "cuda=${CUDA_VERSION};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
fi

rapids-mamba-retry env create --yes -f env.yaml -n test

Expand Down
82 changes: 80 additions & 2 deletions python/cudf/cudf/pandas/_wrappers/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1741,6 +1741,11 @@ def _unpickle_obj(pickled_args):
_original_DataFrame_init = cudf.DataFrame.__init__
_original_Index_init = cudf.Index.__init__
_original_IndexMeta_call = cudf.core.index.IndexMeta.__call__
_original_from_pandas = cudf.from_pandas
_original_DataFrame_from_pandas = cudf.DataFrame.from_pandas
_original_Series_from_pandas = cudf.Series.from_pandas
_original_Index_from_pandas = cudf.BaseIndex.from_pandas
_original_MultiIndex_from_pandas = cudf.MultiIndex.from_pandas


def wrap_init(original_init):
Expand Down Expand Up @@ -1776,8 +1781,69 @@ def wrapped_call(cls, data, *args, **kwargs):
return wrapped_call


def wrap_from_pandas(original_call):
@functools.wraps(original_call)
def wrapped_from_pandas(obj, *args, **kwargs):
if is_proxy_object(obj):
obj = obj.as_gpu_object()
return obj
return original_call(obj, *args, **kwargs)

return wrapped_from_pandas


def wrap_from_pandas_dataframe(original_call):
@functools.wraps(original_call)
def wrapped_from_pandas_dataframe(dataframe, *args, **kwargs):
if is_proxy_object(dataframe):
dataframe = dataframe.as_gpu_object()
if isinstance(dataframe, cudf.DataFrame):
return dataframe
return original_call(dataframe, *args, **kwargs)

return wrapped_from_pandas_dataframe


def wrap_from_pandas_series(original_call):
@functools.wraps(original_call)
def wrapped_from_pandas_series(s, *args, **kwargs):
if is_proxy_object(s):
s = s.as_gpu_object()
if isinstance(s, cudf.Series):
return s
return original_call(s, *args, **kwargs)

return wrapped_from_pandas_series


def wrap_from_pandas_index(original_call):
@functools.wraps(original_call)
def wrapped_from_pandas_index(index, *args, **kwargs):
if is_proxy_object(index):
index = index.as_gpu_object()
if isinstance(index, cudf.core.index.BaseIndex):
return index
return original_call(index, *args, **kwargs)

return wrapped_from_pandas_index


def wrap_from_pandas_multiindex(original_call):
@functools.wraps(original_call)
def wrapped_from_pandas_multiindex(multiindex, *args, **kwargs):
if is_proxy_object(multiindex):
multiindex = multiindex.as_gpu_object()
if isinstance(multiindex, cudf.MultiIndex):
return multiindex
return original_call(multiindex, *args, **kwargs)

return wrapped_from_pandas_multiindex


@functools.wraps(_original_DataFrame_init)
def DataFrame_init_(self, data, index=None, columns=None, *args, **kwargs):
def DataFrame_init_(
self, data=None, index=None, columns=None, *args, **kwargs
):
data_is_proxy = is_proxy_object(data)

if data_is_proxy:
Expand Down Expand Up @@ -1811,7 +1877,19 @@ def initial_setup():
cudf.Index.__init__ = wrap_init(_original_Index_init)
cudf.DataFrame.__init__ = DataFrame_init_
cudf.core.index.IndexMeta.__call__ = wrap_call(_original_IndexMeta_call)

cudf.from_pandas = wrap_from_pandas(_original_from_pandas)
cudf.DataFrame.from_pandas = wrap_from_pandas_dataframe(
_original_DataFrame_from_pandas
)
cudf.Series.from_pandas = wrap_from_pandas_series(
_original_Series_from_pandas
)
cudf.BaseIndex.from_pandas = wrap_from_pandas_index(
_original_Index_from_pandas
)
cudf.MultiIndex.from_pandas = wrap_from_pandas_multiindex(
_original_MultiIndex_from_pandas
)
cudf.set_option("mode.pandas_compatible", True)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ dependencies:
packages:
- pip
- pip:
- ibis-framework[pandas]
- ibis-framework[pandas]<10.0.0
test_hvplot:
common:
- output_types: conda
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -30,13 +30,15 @@ def dask_client():
yield dask_client


@pytest.mark.skip(reason="TODO: Fix these stumpy tests to work with dask")
def test_1d_distributed(dask_client):
rng = np.random.default_rng(seed=42)
ts = pd.Series(rng.random(100))
m = 10
return stumpy.stumped(dask_client, ts, m)


@pytest.mark.skip(reason="TODO: Fix these stumpy tests to work with dask")
def test_multidimensional_distributed_timeseries(dask_client):
rng = np.random.default_rng(seed=42)
# Each row represents data from a different dimension while each column represents
Expand Down

0 comments on commit 218d67d

Please sign in to comment.