diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json similarity index 91% rename from .devcontainer/cuda12.5-conda/devcontainer.json rename to .devcontainer/cuda12.8-conda/devcontainer.json index 3ed6fa9c37b..ad36130c6a0 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -3,7 +3,7 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "conda", "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { @@ -20,7 +20,7 @@ "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent", @@ -29,7 +29,7 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json similarity index 88% rename from .devcontainer/cuda12.5-pip/devcontainer.json rename to .devcontainer/cuda12.8-pip/devcontainer.json index fe402024e29..a2955b81a60 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -3,20 +3,20 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { - "version": "12.5", + "version": "12.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, @@ -28,7 +28,7 @@ "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent", @@ -36,7 +36,7 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e48f2e11acd..8e3134b896e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -195,7 +195,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' - cuda: '["12.5"]' + cuda: '["12.8"]' node_type: cpu32 build_command: | sccache -z; diff --git a/README.md b/README.md index 2a3400df1a7..45deb518aed 100644 --- a/README.md +++ b/README.md @@ -51,14 +51,13 @@ The [cugraph-docs repository](https://github.com/rapidsai/cugraph-docs) contains ## Table of contents - Installation - - [Getting cuGraph Packages](./docs/cugraph/source/installation/getting_cugraph.md) - - [Building from Source](./docs/cugraph/source/installation/source_build.md) - - [Contributing to cuGraph](./readme_pages/CONTRIBUTING.md) + - [Getting cuGraph Packages](https://docs.rapids.ai/api/cugraph/stable/installation/getting_cugraph/) + - [Building from Source](https://docs.rapids.ai/api/cugraph/stable/installation/source_build/) + - [Contributing to cuGraph](https://docs.rapids.ai/contributing/) - General - - [Latest News](./readme_pages/news.md) - - [Current list of algorithms](./docs/cugraph/source/graph_support/algorithms.md) - - [Blogs and Presentation](./docs/cugraph/source/tutorials/cugraph_blogs.rst) - - [Performance](./readme_pages/performance/performance.md) + - [Latest News](https://docs.rapids.ai/api/cugraph/nightly/) + - [Current list of algorithms](https://docs.rapids.ai/api/cugraph/stable/graph_support/algorithms/) + - [Blogs and Presentation](https://docs.rapids.ai/api/cugraph/nightly/tutorials/cugraph_blogs/) - Packages - [cuGraph Python](./readme_pages/cugraph_python.md) - [Property Graph](./readme_pages/property_graph.md) @@ -69,11 +68,14 @@ The [cugraph-docs repository](https://github.com/rapidsai/cugraph-docs) contains - [cugraph-service](./readme_pages/cugraph_service.md) - API Docs - Python - - [Python Nightly](https://docs.rapids.ai/api/cugraph/nightly/) - - [Python Stable](https://docs.rapids.ai/api/cugraph/stable/) + - [Python Nightly](https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph/) + - [Python Stable](https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph/) + - C + - [C Nightly](https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph_c/) + - [C Stable](https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph_c/) - C++ - - [C++ Nightly](https://docs.rapids.ai/api/libcugraph/nightly/) - - [C++ Stable](https://docs.rapids.ai/api/libcugraph/stable/) + - [C++ Nightly](https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph_cpp/) + - (Will be available when 25.02 is released)[C++ Stable](https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph_cpp/) - References - [RAPIDS](https://rapids.ai/) - [ARROW](https://arrow.apache.org/) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml similarity index 96% rename from conda/environments/all_cuda-125_arch-x86_64.yaml rename to conda/environments/all_cuda-128_arch-x86_64.yaml index 83126df9194..22c5f594a42 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=12.6.2,<13.0a0 -- cuda-version=12.5 +- cuda-version=12.8 - cudf==25.2.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler @@ -77,4 +77,4 @@ dependencies: - torchmetrics - ucx-py==0.42.*,>=0.0.0a0 - wheel -name: all_cuda-125_arch-x86_64 +name: all_cuda-128_arch-x86_64 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f1dda42ad9d..0987757df22 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -548,6 +548,7 @@ add_library(cugraph_c src/c_api/allgather.cpp src/c_api/decompress_to_edgelist.cpp src/c_api/edgelist.cpp + src/c_api/renumber_arbitrary_edgelist.cu ) add_library(cugraph::cugraph_c ALIAS cugraph_c) diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 964b2f2c8d6..4f9022888bb 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -463,6 +463,27 @@ cugraph_error_code_t cugraph_decompress_to_edgelist(const cugraph_resource_handl cugraph_edgelist_t** result, cugraph_error_t** error); +/** + * @brief Renumber arbitrary edgelist + * + * This function is designed to assist renumbering graph vertices in the case where the + * the global vertex id list exceeds the GPU memory. Renumbering is done in-place in the + * supplied @p src and @p dst parameters. + * + * @param [in] handle Handle for accessing resources + * @param [in] renumber_map Host array with the renumber map + * @param [in/out] srcs Device array of src vertices to renumber + * @param [in/out] dsts Device array of dst vertices to renumber + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + */ +cugraph_error_code_t cugraph_renumber_arbitrary_edgelist( + const cugraph_resource_handle_t* handle, + const cugraph_type_erased_host_array_view_t* renumber_map, + cugraph_type_erased_device_array_view_t* srcs, + cugraph_type_erased_device_array_view_t* dsts, + cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/renumber_arbitrary_edgelist.cu b/cpp/src/c_api/renumber_arbitrary_edgelist.cu new file mode 100644 index 00000000000..1535d6689c6 --- /dev/null +++ b/cpp/src/c_api/renumber_arbitrary_edgelist.cu @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/resource_handle.hpp" +#include "c_api/utils.hpp" + +#include +#include + +#include +#include + +#include +#include + +namespace { + +template +cugraph_error_code_t renumber_arbitrary_edgelist( + raft::handle_t const& handle, + cugraph::c_api::cugraph_type_erased_host_array_view_t const* renumber_map, + cugraph::c_api::cugraph_type_erased_device_array_view_t* srcs, + cugraph::c_api::cugraph_type_erased_device_array_view_t* dsts) +{ + rmm::device_uvector vertices(2 * srcs->size_, handle.get_stream()); + + thrust::copy_n( + handle.get_thrust_policy(), srcs->as_type(), srcs->size_, vertices.data()); + thrust::copy_n(handle.get_thrust_policy(), + dsts->as_type(), + dsts->size_, + vertices.data() + srcs->size_); + + thrust::sort(handle.get_thrust_policy(), vertices.begin(), vertices.end()); + vertices.resize( + thrust::distance(vertices.begin(), + thrust::unique(handle.get_thrust_policy(), vertices.begin(), vertices.end())), + handle.get_stream()); + + vertices.shrink_to_fit(handle.get_stream()); + rmm::device_uvector ids(vertices.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + ids.begin(), + ids.end(), + cugraph::invalid_vertex_id::value); + + raft::device_span vertices_span{vertices.data(), vertices.size()}; + raft::device_span ids_span{ids.data(), ids.size()}; + + // Read chunk of renumber_map in a loop, updating base offset to compute vertex id + // FIXME: Compute this as a function of free memory? Or some value that keeps a + // particular GPU saturated? + size_t chunk_size = size_t{1} << 20; + + rmm::device_uvector renumber_chunk(chunk_size, handle.get_stream()); + + for (size_t chunk_base_offset = 0; chunk_base_offset < renumber_map->size_; + chunk_base_offset += chunk_size) { + size_t size = std::min(chunk_size, renumber_map->size_ - chunk_base_offset); + if (size < chunk_size) renumber_chunk.resize(size, handle.get_stream()); + + raft::update_device(renumber_chunk.data(), + renumber_map->as_type() + chunk_base_offset, + size, + handle.get_stream()); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(renumber_chunk.size()), + [chunk_base_offset, + renumber_chunk_span = + raft::device_span{renumber_chunk.data(), renumber_chunk.size()}, + vertices_span, + ids_span] __device__(size_t idx) { + auto pos = thrust::lower_bound( + thrust::seq, vertices_span.begin(), vertices_span.end(), renumber_chunk_span[idx]); + if ((pos != vertices_span.end()) && (*pos == renumber_chunk_span[idx])) { + ids_span[thrust::distance(vertices_span.begin(), pos)] = + static_cast(chunk_base_offset + idx); + } + }); + } + + CUGRAPH_EXPECTS(thrust::count(handle.get_thrust_policy(), + ids.begin(), + ids.end(), + cugraph::invalid_vertex_id::value) == 0, + "some vertices were not renumbered"); + + thrust::transform( + handle.get_thrust_policy(), + srcs->as_type(), + srcs->as_type() + srcs->size_, + srcs->as_type(), + [vertices_span, ids_span] __device__(vertex_t v) { + return ids_span[thrust::distance( + vertices_span.begin(), + thrust::lower_bound(thrust::seq, vertices_span.begin(), vertices_span.end(), v))]; + }); + + thrust::transform( + handle.get_thrust_policy(), + dsts->as_type(), + dsts->as_type() + srcs->size_, + dsts->as_type(), + [vertices_span, ids_span] __device__(vertex_t v) { + return ids_span[thrust::distance( + vertices_span.begin(), + thrust::lower_bound(thrust::seq, vertices_span.begin(), vertices_span.end(), v))]; + }); + + return CUGRAPH_SUCCESS; +} + +} // namespace + +extern "C" cugraph_error_code_t cugraph_renumber_arbitrary_edgelist( + const cugraph_resource_handle_t* handle, + const cugraph_type_erased_host_array_view_t* renumber_map, + cugraph_type_erased_device_array_view_t* srcs, + cugraph_type_erased_device_array_view_t* dsts, + cugraph_error_t** error) +{ + cugraph::c_api::cugraph_type_erased_host_array_view_t const* h_renumber_map = + reinterpret_cast(renumber_map); + cugraph::c_api::cugraph_type_erased_device_array_view_t* d_srcs = + reinterpret_cast(srcs); + cugraph::c_api::cugraph_type_erased_device_array_view_t* d_dsts = + reinterpret_cast(dsts); + + CAPI_EXPECTS(h_renumber_map->type_ == d_srcs->type_, + CUGRAPH_INVALID_INPUT, + "type of renumber map and src vertices must match", + *error); + + CAPI_EXPECTS(h_renumber_map->type_ == d_dsts->type_, + CUGRAPH_INVALID_INPUT, + "type of renumber map and dst vertices must match", + *error); + + CAPI_EXPECTS( + d_srcs->size_ == d_dsts->size_, CUGRAPH_INVALID_INPUT, "src and dst sizes must match", *error); + + *error = nullptr; + + try { + switch (h_renumber_map->type_) { + case cugraph_data_type_id_t::INT32: { + return renumber_arbitrary_edgelist( + *reinterpret_cast(handle)->handle_, + h_renumber_map, + d_srcs, + d_dsts); + } break; + case cugraph_data_type_id_t::INT64: { + return renumber_arbitrary_edgelist( + *reinterpret_cast(handle)->handle_, + h_renumber_map, + d_srcs, + d_dsts); + } break; + default: { + std::stringstream ss; + ss << "ERROR: Unsupported data type enum:" << static_cast(h_renumber_map->type_); + *error = + reinterpret_cast(new cugraph::c_api::cugraph_error_t{ss.str().c_str()}); + return CUGRAPH_INVALID_INPUT; + } + } + } catch (std::exception const& ex) { + *error = reinterpret_cast<::cugraph_error_t*>(new cugraph::c_api::cugraph_error_t{ex.what()}); + return CUGRAPH_UNKNOWN_ERROR; + } + + return CUGRAPH_SUCCESS; +} diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index bd497b9c58c..25a1cf63f4d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -19,6 +19,7 @@ #include "prims/extract_transform_e.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -32,6 +33,7 @@ #include #include +#include #include #include #include @@ -43,6 +45,59 @@ namespace cugraph { +template +struct extract_weak_edges { + edge_t k{}; + __device__ cuda::std::optional> operator()( + vertex_t src, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_t count) const + { + // No need to process edges with count == 0 + return ((count < k - 2) && (count != 0)) + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; + } +}; + +template +struct is_k_or_greater_t { + edge_t k{}; + __device__ bool operator()(edge_t core_number) const { return core_number >= edge_t{k}; } +}; + +template +struct extract_triangles_endpoints { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + auto endpoints = thrust::make_tuple(weak_srcs[chunk_start + idx], // p + weak_dsts[chunk_start + idx], // q + intersection_indices[i] // r + ); + + auto p = weak_srcs[chunk_start + idx]; + auto q = weak_dsts[chunk_start + idx]; + auto r = intersection_indices[i]; + // Re-order the endpoints such that p < q < r in order to identify duplicate triangles + // which will cause overcompensation. comparing the vertex IDs is cheaper than comparing the + // degrees (d(p) < d(q) < d(r)) which will be done once in the latter stage to retrieve the + // direction of the edges once the triplet dependency is broken. + if (p > q) cuda::std::swap(p, q); + if (p > r) cuda::std::swap(p, r); + if (q > r) cuda::std::swap(q, r); + + return thrust::make_tuple(p, q, r); + } +}; + namespace { template @@ -60,39 +115,48 @@ struct exclude_self_loop_t { } }; -template -struct extract_low_to_high_degree_weighted_edges_t { - __device__ cuda::std::optional> operator()( - vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const - { - return (src_out_degree < dst_out_degree) - ? cuda::std::optional>{thrust::make_tuple( - src, dst, wgt)} - : (((src_out_degree == dst_out_degree) && - (src < dst) /* tie-breaking using vertex ID */) - ? cuda::std::optional< - thrust::tuple>{thrust::make_tuple( - src, dst, wgt)} - : cuda::std::nullopt); - } -}; - template -struct extract_low_to_high_degree_edges_t { - __device__ cuda::std::optional> operator()( +struct extract_low_to_high_degree_edges_from_endpoints_t { + raft::device_span srcs{}; + raft::device_span dsts{}; + raft::device_span count{}; + __device__ cuda::std::optional> operator()( vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, cuda::std::nullopt_t) const { - return (src_out_degree < dst_out_degree) - ? cuda::std::optional>{thrust::make_tuple(src, dst)} - : (((src_out_degree == dst_out_degree) && - (src < dst) /* tie-breaking using vertex ID */) - ? cuda::std::optional>{thrust::make_tuple(src, - dst)} - : cuda::std::nullopt); + // FIXME: Not the most efficient way because the entire edgelist is scan just to find + // the direction of the edges + auto itr = thrust::lower_bound(thrust::seq, + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end()), + thrust::make_tuple(src, dst)); + + if ((itr != thrust::make_zip_iterator(srcs.end(), dsts.end())) && + (*itr == thrust::make_tuple(src, dst))) { + auto idx = thrust::distance(thrust::make_zip_iterator(srcs.begin(), dsts.begin()), itr); + + if (src_out_degree < dst_out_degree) { + return cuda::std::optional>{ + thrust::make_tuple(src, dst, count[idx])}; + } else if (dst_out_degree < src_out_degree) { + return cuda::std::optional>{ + thrust::make_tuple(dst, src, count[idx])}; + } else { + if ((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) { + return cuda::std::optional>{ + thrust::make_tuple(src, dst, count[idx])}; + } else if ((src_out_degree == dst_out_degree) && + (src > dst) /* tie-breaking using vertex ID */) { + return cuda::std::optional>{ + thrust::make_tuple(dst, src, count[idx])}; + } + } + } else { + return cuda::std::nullopt; + } } }; @@ -119,246 +183,543 @@ k_truss(raft::handle_t const& handle, // nothing to do } - std::optional> modified_graph{std::nullopt}; - std::optional> modified_graph_view{std::nullopt}; - std::optional> renumber_map{std::nullopt}; - std::optional, weight_t>> - edge_weight{std::nullopt}; - std::optional> wgts{std::nullopt}; - - if (graph_view.count_self_loops(handle) > edge_t{0}) { - auto [srcs, dsts] = extract_transform_e(handle, - graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - exclude_self_loop_t{}); - - if constexpr (multi_gpu) { - std::tie( - srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } + // 2. Exclude self-loops and edges that do not belong to (k-1)-core - std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + auto cur_graph_view = graph_view; + auto unmasked_cur_graph_view = cur_graph_view; - modified_graph_view = (*modified_graph).view(); - } + if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); } + // mask for self-loops and edges not part of k-1 core + cugraph::edge_property_t undirected_mask(handle); + { + // 2.1 Exclude self-loops - // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + if (cur_graph_view.count_self_loops(handle) > edge_t{0}) { + // 2.1. Exclude self-loops - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; - - rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), - handle.get_stream()); - core_number( - handle, cur_graph_view, core_numbers.data(), k_core_degree_type_t::OUT, size_t{2}, size_t{2}); - - raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - - auto [srcs, dsts, wgts] = k_core(handle, - cur_graph_view, - edge_weight_view, - k - 1, - std::make_optional(k_core_degree_type_t::OUT), - std::make_optional(core_number_span)); - - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } + cugraph::edge_property_t self_loop_edge_mask(handle, + cur_graph_view); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, self_loop_edge_mask.mutable_view(), false); - std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( + transform_e( handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto, auto, auto) { return src != dst; }, + self_loop_edge_mask.mutable_view()); + + undirected_mask = std::move(self_loop_edge_mask); + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(undirected_mask.view()); + } - modified_graph_view = (*modified_graph).view(); + // 2.2 Find (k-1)-core and exclude edges that do not belong to (k-1)-core + { + rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), + handle.get_stream()); + core_number(handle, + cur_graph_view, + core_numbers.data(), + k_core_degree_type_t::OUT, + size_t{2}, + size_t{2}); + + edge_src_property_t edge_src_in_k_minus_1_cores( + handle, cur_graph_view); + edge_dst_property_t edge_dst_in_k_minus_1_cores( + handle, cur_graph_view); + auto in_k_minus_1_core_first = + thrust::make_transform_iterator(core_numbers.begin(), is_k_or_greater_t{k - 1}); + rmm::device_uvector in_k_minus_1_core_flags(core_numbers.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + in_k_minus_1_core_first, + in_k_minus_1_core_first + core_numbers.size(), + in_k_minus_1_core_flags.begin()); + update_edge_src_property(handle, + cur_graph_view, + in_k_minus_1_core_flags.begin(), + edge_src_in_k_minus_1_cores.mutable_view()); + update_edge_dst_property(handle, + cur_graph_view, + in_k_minus_1_core_flags.begin(), + edge_dst_in_k_minus_1_cores.mutable_view()); + + cugraph::edge_property_t in_k_minus_1_core_edge_mask( + handle, cur_graph_view); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, in_k_minus_1_core_edge_mask.mutable_view(), false); + + transform_e( + handle, + cur_graph_view, + edge_src_in_k_minus_1_cores.view(), + edge_dst_in_k_minus_1_cores.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto, auto, auto src_in_k_minus_1_core, auto dst_in_k_minus_1_core, auto) { + return src_in_k_minus_1_core && dst_in_k_minus_1_core; + }, + in_k_minus_1_core_edge_mask.mutable_view()); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); + undirected_mask = std::move(in_k_minus_1_core_edge_mask); + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(undirected_mask.view()); } - - renumber_map = std::move(tmp_renumber_map); } // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; + edge_src_property_t edge_src_out_degrees(handle, + cur_graph_view); + edge_dst_property_t edge_dst_out_degrees(handle, + cur_graph_view); + cugraph::edge_property_t, bool> dodg_mask( + handle, cur_graph_view); + { auto out_degrees = cur_graph_view.compute_out_degrees(handle); - edge_src_property_t edge_src_out_degrees(handle, - cur_graph_view); - edge_dst_property_t edge_dst_out_degrees(handle, - cur_graph_view); update_edge_src_property( handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); update_edge_dst_property( handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - if (edge_weight_view) { - std::tie(srcs, dsts, wgts) = extract_transform_e( - handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - *edge_weight_view, - extract_low_to_high_degree_weighted_edges_t{}); - } else { - std::tie(srcs, dsts) = - extract_transform_e(handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - edge_dummy_property_t{}.view(), - extract_low_to_high_degree_edges_t{}); - } - - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } - - std::optional> tmp_renumber_map{std::nullopt}; - - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - true); - - modified_graph_view = (*modified_graph).view(); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); - } - renumber_map = std::move(tmp_renumber_map); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, dodg_mask.mutable_view(), bool{false}); + + cugraph::transform_e( + handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_out_degree, auto dst_out_degree, auto) { + return (src_out_degree < dst_out_degree) ? true + : ((src_out_degree == dst_out_degree) && + (src < dst) /* tie-breaking using vertex ID */) + ? true + : false; + }, + dodg_mask.mutable_view(), + do_expensive_check); + + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(dodg_mask.view()); } // 4. Compute triangle count using nbr_intersection and unroll weak edges { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; + // Mask self loops and edges not being part of k-1 core + auto weak_edges_mask = std::move(undirected_mask); - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view, false); - cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); + cugraph::edge_bucket_t edgelist_weak(handle); + cugraph::edge_bucket_t edges_to_decrement_count(handle); + size_t prev_chunk_size = 0; // FIXME: Add support for chunking while (true) { - // FIXME: This approach is very expensive when invalidating only few edges per iteration - // and should be address. - auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view); + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + extract_weak_edges{k}); + + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); + + thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, weak_edgelist_last); + + // Perform nbr_intersection of the weak edges from the undirected + // graph view + cur_graph_view.clear_edge_mask(); + + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( + handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, do_expensive_check); + + // This array stores (p, q, r) which are endpoints for the triangles with weak edges + + auto triangles_endpoints = + allocate_dataframe_buffer>( + intersection_indices.size(), handle.get_stream()); + + // Extract endpoints for triangles with weak edges + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints), + extract_triangles_endpoints{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + auto unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + auto num_unique_triangles = thrust::distance( // Triangles are represented by their endpoints + get_dataframe_buffer_begin(triangles_endpoints), + unique_triangle_end); + + resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + // Shuffle the edges with respect to the undirected graph view to the GPU + // owning edge (p, q). Remember that the triplet (p, q, r) is ordered based on the + // vertex ID and not the degree so (p, q) might not be an edge in the DODG but is + // surely an edge in the undirected graph + std::tie(triangles_endpoints, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + num_unique_triangles = + thrust::distance(get_dataframe_buffer_begin(triangles_endpoints), unique_triangle_end); + resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); + } + + auto edgelist_to_update_count = allocate_dataframe_buffer>( + 3 * num_unique_triangles, handle.get_stream()); + + // The order no longer matters since duplicated triangles have been removed + // Flatten the endpoints to a list of egdes. + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), + get_dataframe_buffer_begin(edgelist_to_update_count), + [num_unique_triangles, + triangles_endpoints = + get_dataframe_buffer_begin(triangles_endpoints)] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_endpoints + idx_triangle).get_iterator_tuple(); + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } + + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<2>(triangle)); + } + + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<1>(triangle)); + dst = *(thrust::get<2>(triangle)); + } + + return thrust::make_tuple(src, dst); + }); + + if constexpr (multi_gpu) { + std::tie(std::get<0>(edgelist_to_update_count), + std::get<1>(edgelist_to_update_count), + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(edgelist_to_update_count)), + std::move(std::get<1>(edgelist_to_update_count)), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto unique_pair_count = + thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + std::tie(std::get<0>(vertex_pair_buffer_unique), + std::get<1>(vertex_pair_buffer_unique), + decrease_count) = + extract_transform_e( + handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + extract_low_to_high_degree_edges_from_endpoints_t{ + raft::device_span(std::get<0>(vertex_pair_buffer_unique).data(), + std::get<0>(vertex_pair_buffer_unique).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_unique).data(), + std::get<1>(vertex_pair_buffer_unique).size()), + raft::device_span(decrease_count.data(), decrease_count.size())}); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + std::forward_as_tuple(std::tie(std::get<0>(vertex_pair_buffer_unique), + std::get<1>(vertex_pair_buffer_unique), + decrease_count), + std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin(), + decrease_count.begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end(), + decrease_count.end()), + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + thrust::sort_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count.begin()); + + // Update count of weak edges + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + // Update count of weak edges from the DODG view + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [edge_buffer_first = + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span( + decrease_count.data(), decrease_count.size())] __device__(auto src, + auto dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + edge_t count) { + auto itr_pair = thrust::lower_bound( + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + count -= decrease_count[idx_pair]; + + return count; + }, + edge_triangle_counts.mutable_view(), + do_expensive_check); + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end())); + + edgelist_weak.insert( + weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin()); - // Mask all the edges that have k - 2 count + // Get undirected graph view + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); cugraph::transform_e( handle, cur_graph_view, + edgelist_weak, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [k] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) { - return count >= k - 2; + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { + return false; }, - edge_mask.mutable_view(), - false); + weak_edges_mask.mutable_view(), + do_expensive_check); + + edgelist_weak.clear(); + + // shuffle the edges if multi_gpu + if constexpr (multi_gpu) { + std::tie(weak_edgelist_dsts, + weak_edgelist_srcs, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(weak_edgelist_dsts), + std::move(weak_edgelist_srcs), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end())); + + edgelist_weak.insert( + weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin()); - cur_graph_view.attach_edge_mask(edge_mask.view()); + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + do_expensive_check); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } + + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); } + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + do_expensive_check); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; @@ -367,11 +728,10 @@ k_truss(raft::handle_t const& handle, decompress_to_edgelist( handle, cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size()))); + std::optional>{std::nullopt}); std::tie(edgelist_srcs, edgelist_dsts, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6800b9c4769..f819142076d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -893,6 +893,8 @@ ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c) ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c) ConfigureCTest(CAPI_LEIDEN_TEST c_api/leiden_test.c) ConfigureCTest(CAPI_ECG_TEST c_api/ecg_test.c) +ConfigureCTest(CAPI_RENUMBER_ARBITRARY_EDGELIST_TEST c_api/renumber_arbitrary_edgelist_test.c) + ############################################################################# # Skipping due to CUDA 12.2 failure that traces back to RAFT # # TODO: Uncomment this once the issue is fixed. # diff --git a/cpp/tests/c_api/renumber_arbitrary_edgelist_test.c b/cpp/tests/c_api/renumber_arbitrary_edgelist_test.c new file mode 100644 index 00000000000..6528ae55589 --- /dev/null +++ b/cpp/tests/c_api/renumber_arbitrary_edgelist_test.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ +#include "cugraph_c/array.h" + +#include +#include + +#include + +typedef int32_t vertex_t; + +int generic_renumber_arbitrary_edgelist_test(vertex_t* h_src, + vertex_t* h_dst, + vertex_t* h_renumber_map, + size_t num_edges, + size_t renumber_map_size) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* p_handle = NULL; + + p_handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); + + cugraph_type_erased_device_array_t* srcs; + cugraph_type_erased_device_array_t* dsts; + cugraph_type_erased_device_array_view_t* srcs_view; + cugraph_type_erased_device_array_view_t* dsts_view; + cugraph_type_erased_host_array_view_t* renumber_map_view; + + ret_code = cugraph_type_erased_device_array_create(p_handle, num_edges, INT32, &srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "srcs create failed."); + + ret_code = cugraph_type_erased_device_array_create(p_handle, num_edges, INT32, &dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dsts create failed."); + + srcs_view = cugraph_type_erased_device_array_view(srcs); + dsts_view = cugraph_type_erased_device_array_view(dsts); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, srcs_view, (byte_t*)h_src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, dsts_view, (byte_t*)h_dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed."); + + renumber_map_view = + cugraph_type_erased_host_array_view_create(h_renumber_map, renumber_map_size, INT32); + + ret_code = cugraph_renumber_arbitrary_edgelist( + p_handle, renumber_map_view, srcs_view, dsts_view, &ret_error); + + vertex_t h_renumbered_srcs[num_edges]; + vertex_t h_renumbered_dsts[num_edges]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_renumbered_srcs, srcs_view, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_renumbered_dsts, dsts_view, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_edges) && (test_ret_value == 0); ++i) { + vertex_t renumbered_src = -1; + vertex_t renumbered_dst = -1; + + for (size_t j = 0; (j < renumber_map_size) && ((renumbered_src < 0) || (renumbered_dst < 0)); + ++j) { + if (h_src[i] == h_renumber_map[j]) renumbered_src = (vertex_t)j; + if (h_dst[i] == h_renumber_map[j]) renumbered_dst = (vertex_t)j; + } + + TEST_ASSERT(test_ret_value, h_renumbered_srcs[i] == renumbered_src, "src results don't match"); + TEST_ASSERT(test_ret_value, h_renumbered_dsts[i] == renumbered_dst, "dst results don't match"); + } + + cugraph_type_erased_device_array_free(dsts); + cugraph_type_erased_device_array_free(srcs); + cugraph_free_resource_handle(p_handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_renumbering() +{ + size_t num_edges = 8; + size_t renumber_map_size = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_renumber_map[] = {5, 3, 1, 2, 4, 0}; + + return generic_renumber_arbitrary_edgelist_test( + h_src, h_dst, h_renumber_map, num_edges, renumber_map_size); +} + +int main(int argc, char** argv) +{ + int result = 0; + result |= RUN_TEST(test_renumbering); + return result; +} diff --git a/dependencies.yaml b/dependencies.yaml index 02fa03cff70..5419be9beda 100755 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: [conda] matrix: - cuda: ["11.8", "12.5"] + cuda: ["11.8", "12.8"] arch: [x86_64] includes: - checks @@ -279,6 +279,10 @@ dependencies: cuda: "12.5" packages: - cuda-version=12.5 + - matrix: + cuda: "12.8" + packages: + - cuda-version=12.8 cuda: specific: - output_types: [conda] @@ -300,7 +304,7 @@ dependencies: - cuda-nvtx common_build: common: - - output_types: [conda, pyproject] + - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - ninja diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index dfe3b085fdf..060d4ee1e99 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -72,7 +72,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["cugraph"] diff --git a/python/libcugraph/pyproject.toml b/python/libcugraph/pyproject.toml index a6191e28000..9d85bfa5dac 100644 --- a/python/libcugraph/pyproject.toml +++ b/python/libcugraph/pyproject.toml @@ -53,14 +53,14 @@ select = [ ] # detect when package size grows significantly -max_allowed_size_compressed = '1.2G' +max_allowed_size_compressed = '1.4G' [tool.scikit-build] build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["libcugraph"] wheel.install-dir = "libcugraph" diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml index ac124e1fd5f..3c50a79bfa3 100644 --- a/python/pylibcugraph/pyproject.toml +++ b/python/pylibcugraph/pyproject.toml @@ -57,7 +57,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["pylibcugraph"] diff --git a/readme_pages/performance/performance.md b/readme_pages/performance/performance.md deleted file mode 100644 index 159b2334704..00000000000 --- a/readme_pages/performance/performance.md +++ /dev/null @@ -1,7 +0,0 @@ - - - - -We are working on a new nightly benchmarking system that will produce performance numbers. -This is a splash page for where the performance numbers will be posted in early 2023.