diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json
similarity index 91%
rename from .devcontainer/cuda12.5-conda/devcontainer.json
rename to .devcontainer/cuda12.8-conda/devcontainer.json
index 3ed6fa9c37b..ad36130c6a0 100644
--- a/.devcontainer/cuda12.5-conda/devcontainer.json
+++ b/.devcontainer/cuda12.8-conda/devcontainer.json
@@ -3,7 +3,7 @@
     "context": "${localWorkspaceFolder}/.devcontainer",
     "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
     "args": {
-      "CUDA": "12.5",
+      "CUDA": "12.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
       "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04"
     }
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
@@ -20,7 +20,7 @@
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
-  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"],
   "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
   "workspaceFolder": "/home/coder",
   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
@@ -29,7 +29,7 @@
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json
similarity index 88%
rename from .devcontainer/cuda12.5-pip/devcontainer.json
rename to .devcontainer/cuda12.8-pip/devcontainer.json
index fe402024e29..a2955b81a60 100644
--- a/.devcontainer/cuda12.5-pip/devcontainer.json
+++ b/.devcontainer/cuda12.8-pip/devcontainer.json
@@ -3,20 +3,20 @@
     "context": "${localWorkspaceFolder}/.devcontainer",
     "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
     "args": {
-      "CUDA": "12.5",
+      "CUDA": "12.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
     "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": {
-      "version": "12.5",
+      "version": "12.8",
       "installcuBLAS": true,
       "installcuSOLVER": true,
       "installcuRAND": true,
@@ -28,7 +28,7 @@
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
-  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"],
   "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
   "workspaceFolder": "/home/coder",
   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
@@ -36,7 +36,7 @@
     "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index e48f2e11acd..8e3134b896e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -195,7 +195,7 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
     with:
       arch: '["amd64"]'
-      cuda: '["12.5"]'
+      cuda: '["12.8"]'
       node_type: cpu32
       build_command: |
         sccache -z;
diff --git a/README.md b/README.md
index 2a3400df1a7..45deb518aed 100644
--- a/README.md
+++ b/README.md
@@ -51,14 +51,13 @@ The [cugraph-docs repository](https://github.com/rapidsai/cugraph-docs) contains
 
 ## Table of contents
 - Installation
-  - [Getting cuGraph Packages](./docs/cugraph/source/installation/getting_cugraph.md)
-  - [Building from Source](./docs/cugraph/source/installation/source_build.md)
-  - [Contributing to cuGraph](./readme_pages/CONTRIBUTING.md)
+  - [Getting cuGraph Packages](https://docs.rapids.ai/api/cugraph/stable/installation/getting_cugraph/)
+  - [Building from Source](https://docs.rapids.ai/api/cugraph/stable/installation/source_build/)
+  - [Contributing to cuGraph](https://docs.rapids.ai/contributing/)
 - General
-  - [Latest News](./readme_pages/news.md)
-  - [Current list of algorithms](./docs/cugraph/source/graph_support/algorithms.md)
-  - [Blogs and Presentation](./docs/cugraph/source/tutorials/cugraph_blogs.rst)
-  - [Performance](./readme_pages/performance/performance.md)
+  - [Latest News](https://docs.rapids.ai/api/cugraph/nightly/)
+  - [Current list of algorithms](https://docs.rapids.ai/api/cugraph/stable/graph_support/algorithms/)
+  - [Blogs and Presentation](https://docs.rapids.ai/api/cugraph/nightly/tutorials/cugraph_blogs/)
 - Packages
   - [cuGraph Python](./readme_pages/cugraph_python.md)
     - [Property Graph](./readme_pages/property_graph.md)
@@ -69,11 +68,14 @@ The [cugraph-docs repository](https://github.com/rapidsai/cugraph-docs) contains
   - [cugraph-service](./readme_pages/cugraph_service.md)
 - API Docs
   - Python
-    - [Python Nightly](https://docs.rapids.ai/api/cugraph/nightly/)
-    - [Python Stable](https://docs.rapids.ai/api/cugraph/stable/)
+    - [Python Nightly](https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph/)
+    - [Python Stable](https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph/)
+  - C
+    -  [C Nightly](https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph_c/)
+    -  [C Stable](https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph_c/)
   - C++
-    - [C++ Nightly](https://docs.rapids.ai/api/libcugraph/nightly/)
-    - [C++ Stable](https://docs.rapids.ai/api/libcugraph/stable/)
+    - [C++ Nightly](https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph_cpp/)
+    - (Will be available when 25.02 is released)[C++ Stable](https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph_cpp/)
 - References
   - [RAPIDS](https://rapids.ai/)
   - [ARROW](https://arrow.apache.org/)
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
similarity index 96%
rename from conda/environments/all_cuda-125_arch-x86_64.yaml
rename to conda/environments/all_cuda-128_arch-x86_64.yaml
index 83126df9194..22c5f594a42 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -17,7 +17,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.6.2,<13.0a0
-- cuda-version=12.5
+- cuda-version=12.8
 - cudf==25.2.*,>=0.0.0a0
 - cupy>=12.0.0
 - cxx-compiler
@@ -77,4 +77,4 @@ dependencies:
 - torchmetrics
 - ucx-py==0.42.*,>=0.0.0a0
 - wheel
-name: all_cuda-125_arch-x86_64
+name: all_cuda-128_arch-x86_64
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f1dda42ad9d..0987757df22 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -548,6 +548,7 @@ add_library(cugraph_c
         src/c_api/allgather.cpp
         src/c_api/decompress_to_edgelist.cpp
         src/c_api/edgelist.cpp
+        src/c_api/renumber_arbitrary_edgelist.cu
         )
 add_library(cugraph::cugraph_c ALIAS cugraph_c)
 
diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h
index 964b2f2c8d6..4f9022888bb 100644
--- a/cpp/include/cugraph_c/graph_functions.h
+++ b/cpp/include/cugraph_c/graph_functions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -463,6 +463,27 @@ cugraph_error_code_t cugraph_decompress_to_edgelist(const cugraph_resource_handl
                                                     cugraph_edgelist_t** result,
                                                     cugraph_error_t** error);
 
+/**
+ * @brief     Renumber arbitrary edgelist
+ *
+ * This function is designed to assist renumbering graph vertices in the case where the
+ * the global vertex id list exceeds the GPU memory.  Renumbering is done in-place in the
+ * supplied @p src and @p dst parameters.
+ *
+ * @param [in]     handle         Handle for accessing resources
+ * @param [in]     renumber_map   Host array with the renumber map
+ * @param [in/out] srcs           Device array of src vertices to renumber
+ * @param [in/out] dsts           Device array of dst vertices to renumber
+ * @param [out] error             Pointer to an error object storing details of any error.  Will
+ *                                be populated if error code is not CUGRAPH_SUCCESS
+ */
+cugraph_error_code_t cugraph_renumber_arbitrary_edgelist(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_type_erased_host_array_view_t* renumber_map,
+  cugraph_type_erased_device_array_view_t* srcs,
+  cugraph_type_erased_device_array_view_t* dsts,
+  cugraph_error_t** error);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/cpp/src/c_api/renumber_arbitrary_edgelist.cu b/cpp/src/c_api/renumber_arbitrary_edgelist.cu
new file mode 100644
index 00000000000..1535d6689c6
--- /dev/null
+++ b/cpp/src/c_api/renumber_arbitrary_edgelist.cu
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "c_api/resource_handle.hpp"
+#include "c_api/utils.hpp"
+
+#include <cugraph_c/error.h>
+#include <cugraph_c/graph_functions.h>
+
+#include <cugraph/graph.hpp>
+#include <cugraph/utilities/error.hpp>
+
+#include <thrust/binary_search.h>
+#include <thrust/iterator/counting_iterator.h>
+
+namespace {
+
+template <typename vertex_t>
+cugraph_error_code_t renumber_arbitrary_edgelist(
+  raft::handle_t const& handle,
+  cugraph::c_api::cugraph_type_erased_host_array_view_t const* renumber_map,
+  cugraph::c_api::cugraph_type_erased_device_array_view_t* srcs,
+  cugraph::c_api::cugraph_type_erased_device_array_view_t* dsts)
+{
+  rmm::device_uvector<vertex_t> vertices(2 * srcs->size_, handle.get_stream());
+
+  thrust::copy_n(
+    handle.get_thrust_policy(), srcs->as_type<vertex_t>(), srcs->size_, vertices.data());
+  thrust::copy_n(handle.get_thrust_policy(),
+                 dsts->as_type<vertex_t>(),
+                 dsts->size_,
+                 vertices.data() + srcs->size_);
+
+  thrust::sort(handle.get_thrust_policy(), vertices.begin(), vertices.end());
+  vertices.resize(
+    thrust::distance(vertices.begin(),
+                     thrust::unique(handle.get_thrust_policy(), vertices.begin(), vertices.end())),
+    handle.get_stream());
+
+  vertices.shrink_to_fit(handle.get_stream());
+  rmm::device_uvector<vertex_t> ids(vertices.size(), handle.get_stream());
+  thrust::fill(handle.get_thrust_policy(),
+               ids.begin(),
+               ids.end(),
+               cugraph::invalid_vertex_id<vertex_t>::value);
+
+  raft::device_span<vertex_t const> vertices_span{vertices.data(), vertices.size()};
+  raft::device_span<vertex_t> ids_span{ids.data(), ids.size()};
+
+  // Read chunk of renumber_map in a loop, updating base offset to compute vertex id
+  // FIXME: Compute this as a function of free memory?  Or some value that keeps a
+  // particular GPU saturated?
+  size_t chunk_size = size_t{1} << 20;
+
+  rmm::device_uvector<vertex_t> renumber_chunk(chunk_size, handle.get_stream());
+
+  for (size_t chunk_base_offset = 0; chunk_base_offset < renumber_map->size_;
+       chunk_base_offset += chunk_size) {
+    size_t size = std::min(chunk_size, renumber_map->size_ - chunk_base_offset);
+    if (size < chunk_size) renumber_chunk.resize(size, handle.get_stream());
+
+    raft::update_device(renumber_chunk.data(),
+                        renumber_map->as_type<vertex_t>() + chunk_base_offset,
+                        size,
+                        handle.get_stream());
+
+    thrust::for_each(
+      handle.get_thrust_policy(),
+      thrust::make_counting_iterator<size_t>(0),
+      thrust::make_counting_iterator<size_t>(renumber_chunk.size()),
+      [chunk_base_offset,
+       renumber_chunk_span =
+         raft::device_span<vertex_t const>{renumber_chunk.data(), renumber_chunk.size()},
+       vertices_span,
+       ids_span] __device__(size_t idx) {
+        auto pos = thrust::lower_bound(
+          thrust::seq, vertices_span.begin(), vertices_span.end(), renumber_chunk_span[idx]);
+        if ((pos != vertices_span.end()) && (*pos == renumber_chunk_span[idx])) {
+          ids_span[thrust::distance(vertices_span.begin(), pos)] =
+            static_cast<vertex_t>(chunk_base_offset + idx);
+        }
+      });
+  }
+
+  CUGRAPH_EXPECTS(thrust::count(handle.get_thrust_policy(),
+                                ids.begin(),
+                                ids.end(),
+                                cugraph::invalid_vertex_id<vertex_t>::value) == 0,
+                  "some vertices were not renumbered");
+
+  thrust::transform(
+    handle.get_thrust_policy(),
+    srcs->as_type<vertex_t>(),
+    srcs->as_type<vertex_t>() + srcs->size_,
+    srcs->as_type<vertex_t>(),
+    [vertices_span, ids_span] __device__(vertex_t v) {
+      return ids_span[thrust::distance(
+        vertices_span.begin(),
+        thrust::lower_bound(thrust::seq, vertices_span.begin(), vertices_span.end(), v))];
+    });
+
+  thrust::transform(
+    handle.get_thrust_policy(),
+    dsts->as_type<vertex_t>(),
+    dsts->as_type<vertex_t>() + srcs->size_,
+    dsts->as_type<vertex_t>(),
+    [vertices_span, ids_span] __device__(vertex_t v) {
+      return ids_span[thrust::distance(
+        vertices_span.begin(),
+        thrust::lower_bound(thrust::seq, vertices_span.begin(), vertices_span.end(), v))];
+    });
+
+  return CUGRAPH_SUCCESS;
+}
+
+}  // namespace
+
+extern "C" cugraph_error_code_t cugraph_renumber_arbitrary_edgelist(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_type_erased_host_array_view_t* renumber_map,
+  cugraph_type_erased_device_array_view_t* srcs,
+  cugraph_type_erased_device_array_view_t* dsts,
+  cugraph_error_t** error)
+{
+  cugraph::c_api::cugraph_type_erased_host_array_view_t const* h_renumber_map =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(renumber_map);
+  cugraph::c_api::cugraph_type_erased_device_array_view_t* d_srcs =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t*>(srcs);
+  cugraph::c_api::cugraph_type_erased_device_array_view_t* d_dsts =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t*>(dsts);
+
+  CAPI_EXPECTS(h_renumber_map->type_ == d_srcs->type_,
+               CUGRAPH_INVALID_INPUT,
+               "type of renumber map and src vertices must match",
+               *error);
+
+  CAPI_EXPECTS(h_renumber_map->type_ == d_dsts->type_,
+               CUGRAPH_INVALID_INPUT,
+               "type of renumber map and dst vertices must match",
+               *error);
+
+  CAPI_EXPECTS(
+    d_srcs->size_ == d_dsts->size_, CUGRAPH_INVALID_INPUT, "src and dst sizes must match", *error);
+
+  *error = nullptr;
+
+  try {
+    switch (h_renumber_map->type_) {
+      case cugraph_data_type_id_t::INT32: {
+        return renumber_arbitrary_edgelist<int32_t>(
+          *reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_,
+          h_renumber_map,
+          d_srcs,
+          d_dsts);
+      } break;
+      case cugraph_data_type_id_t::INT64: {
+        return renumber_arbitrary_edgelist<int64_t>(
+          *reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_,
+          h_renumber_map,
+          d_srcs,
+          d_dsts);
+      } break;
+      default: {
+        std::stringstream ss;
+        ss << "ERROR: Unsupported data type enum:" << static_cast<int>(h_renumber_map->type_);
+        *error =
+          reinterpret_cast<cugraph_error_t*>(new cugraph::c_api::cugraph_error_t{ss.str().c_str()});
+        return CUGRAPH_INVALID_INPUT;
+      }
+    }
+  } catch (std::exception const& ex) {
+    *error = reinterpret_cast<::cugraph_error_t*>(new cugraph::c_api::cugraph_error_t{ex.what()});
+    return CUGRAPH_UNKNOWN_ERROR;
+  }
+
+  return CUGRAPH_SUCCESS;
+}
diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh
index bd497b9c58c..25a1cf63f4d 100644
--- a/cpp/src/community/k_truss_impl.cuh
+++ b/cpp/src/community/k_truss_impl.cuh
@@ -19,6 +19,7 @@
 #include "prims/extract_transform_e.cuh"
 #include "prims/extract_transform_v_frontier_outgoing_e.cuh"
 #include "prims/fill_edge_property.cuh"
+#include "prims/per_v_pair_dst_nbr_intersection.cuh"
 #include "prims/transform_e.cuh"
 #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh"
 #include "prims/update_edge_src_dst_property.cuh"
@@ -32,6 +33,7 @@
 #include <raft/util/integer_utils.hpp>
 
 #include <cuda/std/optional>
+#include <cuda/std/utility>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/distance.h>
@@ -43,6 +45,59 @@
 
 namespace cugraph {
 
+template <typename vertex_t, typename edge_t>
+struct extract_weak_edges {
+  edge_t k{};
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+    vertex_t src, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_t count) const
+  {
+    // No need to process edges with count == 0
+    return ((count < k - 2) && (count != 0))
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             : cuda::std::nullopt;
+  }
+};
+
+template <typename edge_t>
+struct is_k_or_greater_t {
+  edge_t k{};
+  __device__ bool operator()(edge_t core_number) const { return core_number >= edge_t{k}; }
+};
+
+template <typename vertex_t, typename edge_t>
+struct extract_triangles_endpoints {
+  size_t chunk_start{};
+  raft::device_span<size_t const> intersection_offsets{};
+  raft::device_span<vertex_t const> intersection_indices{};
+  raft::device_span<vertex_t const> weak_srcs{};
+  raft::device_span<vertex_t const> weak_dsts{};
+
+  __device__ thrust::tuple<vertex_t, vertex_t, vertex_t> operator()(edge_t i) const
+  {
+    auto itr = thrust::upper_bound(
+      thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i);
+    auto idx = thrust::distance(intersection_offsets.begin() + 1, itr);
+
+    auto endpoints = thrust::make_tuple(weak_srcs[chunk_start + idx],  // p
+                                        weak_dsts[chunk_start + idx],  // q
+                                        intersection_indices[i]        // r
+    );
+
+    auto p = weak_srcs[chunk_start + idx];
+    auto q = weak_dsts[chunk_start + idx];
+    auto r = intersection_indices[i];
+    // Re-order the endpoints such that p < q < r in order to identify duplicate triangles
+    // which will cause overcompensation. comparing the vertex IDs is cheaper than comparing the
+    // degrees (d(p) < d(q) < d(r)) which will be done once in the latter stage to retrieve the
+    // direction of the edges once the triplet dependency is broken.
+    if (p > q) cuda::std::swap(p, q);
+    if (p > r) cuda::std::swap(p, r);
+    if (q > r) cuda::std::swap(q, r);
+
+    return thrust::make_tuple(p, q, r);
+  }
+};
+
 namespace {
 
 template <typename vertex_t>
@@ -60,39 +115,48 @@ struct exclude_self_loop_t {
   }
 };
 
-template <typename vertex_t, typename weight_t, typename edge_t>
-struct extract_low_to_high_degree_weighted_edges_t {
-  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t>> operator()(
-    vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const
-  {
-    return (src_out_degree < dst_out_degree)
-             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
-                 src, dst, wgt)}
-             : (((src_out_degree == dst_out_degree) &&
-                 (src < dst) /* tie-breaking using vertex ID */)
-                  ? cuda::std::optional<
-                      thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
-                      src, dst, wgt)}
-                  : cuda::std::nullopt);
-  }
-};
-
 template <typename vertex_t, typename edge_t>
-struct extract_low_to_high_degree_edges_t {
-  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+struct extract_low_to_high_degree_edges_from_endpoints_t {
+  raft::device_span<vertex_t const> srcs{};
+  raft::device_span<vertex_t const> dsts{};
+  raft::device_span<edge_t const> count{};
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>> operator()(
     vertex_t src,
     vertex_t dst,
     edge_t src_out_degree,
     edge_t dst_out_degree,
     cuda::std::nullopt_t) const
   {
-    return (src_out_degree < dst_out_degree)
-             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
-             : (((src_out_degree == dst_out_degree) &&
-                 (src < dst) /* tie-breaking using vertex ID */)
-                  ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src,
-                                                                                              dst)}
-                  : cuda::std::nullopt);
+    // FIXME: Not the most efficient way because the entire edgelist is scan just to find
+    // the direction of the edges
+    auto itr = thrust::lower_bound(thrust::seq,
+                                   thrust::make_zip_iterator(srcs.begin(), dsts.begin()),
+                                   thrust::make_zip_iterator(srcs.end(), dsts.end()),
+                                   thrust::make_tuple(src, dst));
+
+    if ((itr != thrust::make_zip_iterator(srcs.end(), dsts.end())) &&
+        (*itr == thrust::make_tuple(src, dst))) {
+      auto idx = thrust::distance(thrust::make_zip_iterator(srcs.begin(), dsts.begin()), itr);
+
+      if (src_out_degree < dst_out_degree) {
+        return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+          thrust::make_tuple(src, dst, count[idx])};
+      } else if (dst_out_degree < src_out_degree) {
+        return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+          thrust::make_tuple(dst, src, count[idx])};
+      } else {
+        if ((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) {
+          return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+            thrust::make_tuple(src, dst, count[idx])};
+        } else if ((src_out_degree == dst_out_degree) &&
+                   (src > dst) /* tie-breaking using vertex ID */) {
+          return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+            thrust::make_tuple(dst, src, count[idx])};
+        }
+      }
+    } else {
+      return cuda::std::nullopt;
+    }
   }
 };
 
@@ -119,246 +183,543 @@ k_truss(raft::handle_t const& handle,
     // nothing to do
   }
 
-  std::optional<graph_t<vertex_t, edge_t, false, multi_gpu>> modified_graph{std::nullopt};
-  std::optional<graph_view_t<vertex_t, edge_t, false, multi_gpu>> modified_graph_view{std::nullopt};
-  std::optional<rmm::device_uvector<vertex_t>> renumber_map{std::nullopt};
-  std::optional<edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, weight_t>>
-    edge_weight{std::nullopt};
-  std::optional<rmm::device_uvector<weight_t>> wgts{std::nullopt};
-
-  if (graph_view.count_self_loops(handle) > edge_t{0}) {
-    auto [srcs, dsts] = extract_transform_e(handle,
-                                            graph_view,
-                                            edge_src_dummy_property_t{}.view(),
-                                            edge_dst_dummy_property_t{}.view(),
-                                            edge_dummy_property_t{}.view(),
-                                            exclude_self_loop_t<vertex_t>{});
-
-    if constexpr (multi_gpu) {
-      std::tie(
-        srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) =
-        detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                       edge_t,
-                                                                                       weight_t,
-                                                                                       int32_t,
-                                                                                       int32_t>(
-          handle,
-          std::move(srcs),
-          std::move(dsts),
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt);
-    }
+  // 2. Exclude self-loops and edges that do not belong to (k-1)-core
 
-    std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, renumber_map) =
-      create_graph_from_edgelist<vertex_t, edge_t, weight_t, int32_t, false, multi_gpu>(
-        handle,
-        std::nullopt,
-        std::move(srcs),
-        std::move(dsts),
-        std::nullopt,
-        std::nullopt,
-        std::nullopt,
-        cugraph::graph_properties_t{true, graph_view.is_multigraph()},
-        true);
+  auto cur_graph_view          = graph_view;
+  auto unmasked_cur_graph_view = cur_graph_view;
 
-    modified_graph_view = (*modified_graph).view();
-  }
+  if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); }
+  // mask for self-loops and edges not part of k-1 core
+  cugraph::edge_property_t<decltype(cur_graph_view), bool> undirected_mask(handle);
+  {
+    // 2.1 Exclude self-loops
 
-  // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core
+    if (cur_graph_view.count_self_loops(handle) > edge_t{0}) {
+      // 2.1. Exclude self-loops
 
-  {
-    auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view;
-
-    auto vertex_partition_range_lasts =
-      renumber_map
-        ? std::make_optional<std::vector<vertex_t>>(cur_graph_view.vertex_partition_range_lasts())
-        : std::nullopt;
-
-    rmm::device_uvector<edge_t> core_numbers(cur_graph_view.number_of_vertices(),
-                                             handle.get_stream());
-    core_number(
-      handle, cur_graph_view, core_numbers.data(), k_core_degree_type_t::OUT, size_t{2}, size_t{2});
-
-    raft::device_span<edge_t const> core_number_span{core_numbers.data(), core_numbers.size()};
-
-    auto [srcs, dsts, wgts] = k_core(handle,
-                                     cur_graph_view,
-                                     edge_weight_view,
-                                     k - 1,
-                                     std::make_optional(k_core_degree_type_t::OUT),
-                                     std::make_optional(core_number_span));
-
-    if constexpr (multi_gpu) {
-      std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) =
-        detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                       edge_t,
-                                                                                       weight_t,
-                                                                                       int32_t,
-                                                                                       int32_t>(
-          handle,
-          std::move(srcs),
-          std::move(dsts),
-          std::move(wgts),
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt);
-    }
+      cugraph::edge_property_t<decltype(cur_graph_view), bool> self_loop_edge_mask(handle,
+                                                                                   cur_graph_view);
+      cugraph::fill_edge_property(
+        handle, unmasked_cur_graph_view, self_loop_edge_mask.mutable_view(), false);
 
-    std::optional<rmm::device_uvector<vertex_t>> tmp_renumber_map{std::nullopt};
-    std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) =
-      create_graph_from_edgelist<vertex_t, edge_t, weight_t, int32_t, false, multi_gpu>(
+      transform_e(
         handle,
-        std::nullopt,
-        std::move(srcs),
-        std::move(dsts),
-        std::move(wgts),
-        std::nullopt,
-        std::nullopt,
-        cugraph::graph_properties_t{true, graph_view.is_multigraph()},
-        true);
+        cur_graph_view,
+        edge_src_dummy_property_t{}.view(),
+        edge_dst_dummy_property_t{}.view(),
+        edge_dummy_property_t{}.view(),
+        [] __device__(auto src, auto dst, auto, auto, auto) { return src != dst; },
+        self_loop_edge_mask.mutable_view());
+
+      undirected_mask = std::move(self_loop_edge_mask);
+      if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); }
+      cur_graph_view.attach_edge_mask(undirected_mask.view());
+    }
 
-    modified_graph_view = (*modified_graph).view();
+    // 2.2 Find (k-1)-core and exclude edges that do not belong to (k-1)-core
+    {
+      rmm::device_uvector<edge_t> core_numbers(cur_graph_view.number_of_vertices(),
+                                               handle.get_stream());
+      core_number(handle,
+                  cur_graph_view,
+                  core_numbers.data(),
+                  k_core_degree_type_t::OUT,
+                  size_t{2},
+                  size_t{2});
+
+      edge_src_property_t<decltype(cur_graph_view), bool> edge_src_in_k_minus_1_cores(
+        handle, cur_graph_view);
+      edge_dst_property_t<decltype(cur_graph_view), bool> edge_dst_in_k_minus_1_cores(
+        handle, cur_graph_view);
+      auto in_k_minus_1_core_first =
+        thrust::make_transform_iterator(core_numbers.begin(), is_k_or_greater_t<edge_t>{k - 1});
+      rmm::device_uvector<bool> in_k_minus_1_core_flags(core_numbers.size(), handle.get_stream());
+      thrust::copy(handle.get_thrust_policy(),
+                   in_k_minus_1_core_first,
+                   in_k_minus_1_core_first + core_numbers.size(),
+                   in_k_minus_1_core_flags.begin());
+      update_edge_src_property(handle,
+                               cur_graph_view,
+                               in_k_minus_1_core_flags.begin(),
+                               edge_src_in_k_minus_1_cores.mutable_view());
+      update_edge_dst_property(handle,
+                               cur_graph_view,
+                               in_k_minus_1_core_flags.begin(),
+                               edge_dst_in_k_minus_1_cores.mutable_view());
+
+      cugraph::edge_property_t<decltype(cur_graph_view), bool> in_k_minus_1_core_edge_mask(
+        handle, cur_graph_view);
+      cugraph::fill_edge_property(
+        handle, unmasked_cur_graph_view, in_k_minus_1_core_edge_mask.mutable_view(), false);
+
+      transform_e(
+        handle,
+        cur_graph_view,
+        edge_src_in_k_minus_1_cores.view(),
+        edge_dst_in_k_minus_1_cores.view(),
+        edge_dummy_property_t{}.view(),
+        [] __device__(auto, auto, auto src_in_k_minus_1_core, auto dst_in_k_minus_1_core, auto) {
+          return src_in_k_minus_1_core && dst_in_k_minus_1_core;
+        },
+        in_k_minus_1_core_edge_mask.mutable_view());
 
-    if (renumber_map) {  // collapse renumber_map
-      unrenumber_int_vertices<vertex_t, multi_gpu>(handle,
-                                                   (*tmp_renumber_map).data(),
-                                                   (*tmp_renumber_map).size(),
-                                                   (*renumber_map).data(),
-                                                   *vertex_partition_range_lasts);
+      undirected_mask = std::move(in_k_minus_1_core_edge_mask);
+      if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); }
+      cur_graph_view.attach_edge_mask(undirected_mask.view());
     }
-
-    renumber_map = std::move(tmp_renumber_map);
   }
 
   // 3. Keep only the edges from a low-degree vertex to a high-degree vertex.
 
-  {
-    auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view;
-
-    auto vertex_partition_range_lasts =
-      renumber_map
-        ? std::make_optional<std::vector<vertex_t>>(cur_graph_view.vertex_partition_range_lasts())
-        : std::nullopt;
+  edge_src_property_t<decltype(cur_graph_view), edge_t> edge_src_out_degrees(handle,
+                                                                             cur_graph_view);
+  edge_dst_property_t<decltype(cur_graph_view), edge_t> edge_dst_out_degrees(handle,
+                                                                             cur_graph_view);
 
+  cugraph::edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, bool> dodg_mask(
+    handle, cur_graph_view);
+  {
     auto out_degrees = cur_graph_view.compute_out_degrees(handle);
-    edge_src_property_t<decltype(cur_graph_view), edge_t> edge_src_out_degrees(handle,
-                                                                               cur_graph_view);
-    edge_dst_property_t<decltype(cur_graph_view), edge_t> edge_dst_out_degrees(handle,
-                                                                               cur_graph_view);
     update_edge_src_property(
       handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view());
     update_edge_dst_property(
       handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view());
 
-    rmm::device_uvector<vertex_t> srcs(0, handle.get_stream());
-    rmm::device_uvector<vertex_t> dsts(0, handle.get_stream());
-
-    edge_weight_view =
-      edge_weight ? std::make_optional((*edge_weight).view())
-                  : std::optional<edge_property_view_t<edge_t, weight_t const*>>{std::nullopt};
-    if (edge_weight_view) {
-      std::tie(srcs, dsts, wgts) = extract_transform_e(
-        handle,
-        cur_graph_view,
-        edge_src_out_degrees.view(),
-        edge_dst_out_degrees.view(),
-        *edge_weight_view,
-        extract_low_to_high_degree_weighted_edges_t<vertex_t, weight_t, edge_t>{});
-    } else {
-      std::tie(srcs, dsts) =
-        extract_transform_e(handle,
-                            cur_graph_view,
-                            edge_src_out_degrees.view(),
-                            edge_dst_out_degrees.view(),
-                            edge_dummy_property_t{}.view(),
-                            extract_low_to_high_degree_edges_t<vertex_t, edge_t>{});
-    }
-
-    if constexpr (multi_gpu) {
-      std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) =
-        detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                       edge_t,
-                                                                                       weight_t,
-                                                                                       int32_t,
-                                                                                       int32_t>(
-          handle,
-          std::move(srcs),
-          std::move(dsts),
-          std::move(wgts),
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt);
-    }
-
-    std::optional<rmm::device_uvector<vertex_t>> tmp_renumber_map{std::nullopt};
-
-    std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) =
-      create_graph_from_edgelist<vertex_t, edge_t, weight_t, int32_t, false, multi_gpu>(
-        handle,
-        std::nullopt,
-        std::move(srcs),
-        std::move(dsts),
-        std::move(wgts),
-        std::nullopt,
-        std::nullopt,
-        cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()},
-        true);
-
-    modified_graph_view = (*modified_graph).view();
-    if (renumber_map) {  // collapse renumber_map
-      unrenumber_int_vertices<vertex_t, multi_gpu>(handle,
-                                                   (*tmp_renumber_map).data(),
-                                                   (*tmp_renumber_map).size(),
-                                                   (*renumber_map).data(),
-                                                   *vertex_partition_range_lasts);
-    }
-    renumber_map = std::move(tmp_renumber_map);
+    cugraph::fill_edge_property(
+      handle, unmasked_cur_graph_view, dodg_mask.mutable_view(), bool{false});
+
+    cugraph::transform_e(
+      handle,
+      cur_graph_view,
+      edge_src_out_degrees.view(),
+      edge_dst_out_degrees.view(),
+      edge_dummy_property_t{}.view(),
+      [] __device__(auto src, auto dst, auto src_out_degree, auto dst_out_degree, auto) {
+        return (src_out_degree < dst_out_degree) ? true
+               : ((src_out_degree == dst_out_degree) &&
+                  (src < dst) /* tie-breaking using vertex ID */)
+                 ? true
+                 : false;
+      },
+      dodg_mask.mutable_view(),
+      do_expensive_check);
+
+    if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); }
+    cur_graph_view.attach_edge_mask(dodg_mask.view());
   }
 
   // 4. Compute triangle count using nbr_intersection and unroll weak edges
 
   {
-    auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view;
+    // Mask self loops and edges not being part of k-1 core
+    auto weak_edges_mask = std::move(undirected_mask);
 
-    edge_weight_view =
-      edge_weight ? std::make_optional((*edge_weight).view())
-                  : std::optional<edge_property_view_t<edge_t, weight_t const*>>{std::nullopt};
+    auto edge_triangle_counts =
+      edge_triangle_count<vertex_t, edge_t, multi_gpu>(handle, cur_graph_view, false);
 
-    cugraph::edge_property_t<decltype(cur_graph_view), bool> edge_mask(handle, cur_graph_view);
-    cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true});
+    cugraph::edge_bucket_t<vertex_t, void, true, multi_gpu, true> edgelist_weak(handle);
+    cugraph::edge_bucket_t<vertex_t, void, true, multi_gpu, true> edges_to_decrement_count(handle);
+    size_t prev_chunk_size = 0;  // FIXME: Add support for chunking
 
     while (true) {
-      // FIXME: This approach is very expensive when invalidating only few edges per iteration
-      // and should be address.
-      auto edge_triangle_counts =
-        edge_triangle_count<vertex_t, edge_t, multi_gpu>(handle, cur_graph_view);
+      // Extract weak edges
+      auto [weak_edgelist_srcs, weak_edgelist_dsts] =
+        extract_transform_e(handle,
+                            cur_graph_view,
+                            edge_src_dummy_property_t{}.view(),
+                            edge_dst_dummy_property_t{}.view(),
+                            edge_triangle_counts.view(),
+                            extract_weak_edges<vertex_t, edge_t>{k});
+
+      auto weak_edgelist_first =
+        thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin());
+      auto weak_edgelist_last =
+        thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end());
+
+      thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, weak_edgelist_last);
+
+      // Perform nbr_intersection of the weak edges from the undirected
+      // graph view
+      cur_graph_view.clear_edge_mask();
+
+      // Attach the weak edge mask
+      cur_graph_view.attach_edge_mask(weak_edges_mask.view());
+
+      auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection(
+        handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, do_expensive_check);
+
+      // This array stores (p, q, r) which are endpoints for the triangles with weak edges
+
+      auto triangles_endpoints =
+        allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t, vertex_t>>(
+          intersection_indices.size(), handle.get_stream());
+
+      // Extract endpoints for triangles with weak edges
+      thrust::tabulate(
+        handle.get_thrust_policy(),
+        get_dataframe_buffer_begin(triangles_endpoints),
+        get_dataframe_buffer_end(triangles_endpoints),
+        extract_triangles_endpoints<vertex_t, edge_t>{
+          prev_chunk_size,
+          raft::device_span<size_t const>(intersection_offsets.data(), intersection_offsets.size()),
+          raft::device_span<vertex_t const>(intersection_indices.data(),
+                                            intersection_indices.size()),
+          raft::device_span<vertex_t const>(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()),
+          raft::device_span<vertex_t const>(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())});
+
+      thrust::sort(handle.get_thrust_policy(),
+                   get_dataframe_buffer_begin(triangles_endpoints),
+                   get_dataframe_buffer_end(triangles_endpoints));
+
+      auto unique_triangle_end = thrust::unique(handle.get_thrust_policy(),
+                                                get_dataframe_buffer_begin(triangles_endpoints),
+                                                get_dataframe_buffer_end(triangles_endpoints));
+
+      auto num_unique_triangles = thrust::distance(  // Triangles are represented by their endpoints
+        get_dataframe_buffer_begin(triangles_endpoints),
+        unique_triangle_end);
+
+      resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream());
+
+      if constexpr (multi_gpu) {
+        auto& comm           = handle.get_comms();
+        auto const comm_size = comm.get_size();
+        auto& major_comm     = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
+        auto const major_comm_size = major_comm.get_size();
+        auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
+        auto const minor_comm_size = minor_comm.get_size();
+
+        auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts();
+
+        rmm::device_uvector<vertex_t> d_vertex_partition_range_lasts(
+          vertex_partition_range_lasts.size(), handle.get_stream());
+
+        raft::update_device(d_vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.size(),
+                            handle.get_stream());
+
+        // Shuffle the edges with respect to the undirected graph view to the GPU
+        // owning edge (p, q). Remember that the triplet (p, q, r) is ordered based on the
+        // vertex ID and not the degree so (p, q) might not be an edge in the DODG but is
+        // surely an edge in the undirected graph
+        std::tie(triangles_endpoints, std::ignore) = groupby_gpu_id_and_shuffle_values(
+          handle.get_comms(),
+          get_dataframe_buffer_begin(triangles_endpoints),
+          get_dataframe_buffer_end(triangles_endpoints),
+
+          [key_func =
+             cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t<vertex_t>{
+               raft::device_span<vertex_t const>(d_vertex_partition_range_lasts.data(),
+                                                 d_vertex_partition_range_lasts.size()),
+               comm_size,
+               major_comm_size,
+               minor_comm_size}] __device__(auto val) {
+            return key_func(thrust::get<0>(val), thrust::get<1>(val));
+          },
+          handle.get_stream());
+
+        thrust::sort(handle.get_thrust_policy(),
+                     get_dataframe_buffer_begin(triangles_endpoints),
+                     get_dataframe_buffer_end(triangles_endpoints));
+
+        unique_triangle_end = thrust::unique(handle.get_thrust_policy(),
+                                             get_dataframe_buffer_begin(triangles_endpoints),
+                                             get_dataframe_buffer_end(triangles_endpoints));
+
+        num_unique_triangles =
+          thrust::distance(get_dataframe_buffer_begin(triangles_endpoints), unique_triangle_end);
+        resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream());
+      }
+
+      auto edgelist_to_update_count = allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
+        3 * num_unique_triangles, handle.get_stream());
+
+      // The order no longer matters since duplicated triangles have been removed
+      // Flatten the endpoints to a list of egdes.
+      thrust::transform(
+        handle.get_thrust_policy(),
+        thrust::make_counting_iterator<edge_t>(0),
+        thrust::make_counting_iterator<edge_t>(size_dataframe_buffer(edgelist_to_update_count)),
+        get_dataframe_buffer_begin(edgelist_to_update_count),
+        [num_unique_triangles,
+         triangles_endpoints =
+           get_dataframe_buffer_begin(triangles_endpoints)] __device__(auto idx) {
+          auto idx_triangle           = idx % num_unique_triangles;
+          auto idx_vertex_in_triangle = idx / num_unique_triangles;
+          auto triangle               = (triangles_endpoints + idx_triangle).get_iterator_tuple();
+          vertex_t src;
+          vertex_t dst;
+
+          if (idx_vertex_in_triangle == 0) {
+            src = *(thrust::get<0>(triangle));
+            dst = *(thrust::get<1>(triangle));
+          }
+
+          if (idx_vertex_in_triangle == 1) {
+            src = *(thrust::get<0>(triangle));
+            dst = *(thrust::get<2>(triangle));
+          }
+
+          if (idx_vertex_in_triangle == 2) {
+            src = *(thrust::get<1>(triangle));
+            dst = *(thrust::get<2>(triangle));
+          }
+
+          return thrust::make_tuple(src, dst);
+        });
+
+      if constexpr (multi_gpu) {
+        std::tie(std::get<0>(edgelist_to_update_count),
+                 std::get<1>(edgelist_to_update_count),
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore) =
+          detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
+                                                                                         edge_t,
+                                                                                         weight_t,
+                                                                                         int32_t,
+                                                                                         int32_t>(
+            handle,
+            std::move(std::get<0>(edgelist_to_update_count)),
+            std::move(std::get<1>(edgelist_to_update_count)),
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            cur_graph_view.vertex_partition_range_lasts());
+      }
+
+      thrust::sort(handle.get_thrust_policy(),
+                   get_dataframe_buffer_begin(edgelist_to_update_count),
+                   get_dataframe_buffer_end(edgelist_to_update_count));
+
+      auto unique_pair_count =
+        thrust::unique_count(handle.get_thrust_policy(),
+                             get_dataframe_buffer_begin(edgelist_to_update_count),
+                             get_dataframe_buffer_end(edgelist_to_update_count));
+
+      auto vertex_pair_buffer_unique = allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
+        unique_pair_count, handle.get_stream());
+
+      rmm::device_uvector<edge_t> decrease_count(unique_pair_count, handle.get_stream());
+
+      thrust::reduce_by_key(handle.get_thrust_policy(),
+                            get_dataframe_buffer_begin(edgelist_to_update_count),
+                            get_dataframe_buffer_end(edgelist_to_update_count),
+                            thrust::make_constant_iterator(size_t{1}),
+                            get_dataframe_buffer_begin(vertex_pair_buffer_unique),
+                            decrease_count.begin(),
+                            thrust::equal_to<thrust::tuple<vertex_t, vertex_t>>{});
+
+      std::tie(std::get<0>(vertex_pair_buffer_unique),
+               std::get<1>(vertex_pair_buffer_unique),
+               decrease_count) =
+        extract_transform_e(
+          handle,
+          cur_graph_view,
+          edge_src_out_degrees.view(),
+          edge_dst_out_degrees.view(),
+          edge_dummy_property_t{}.view(),
+          extract_low_to_high_degree_edges_from_endpoints_t<vertex_t, edge_t>{
+            raft::device_span<vertex_t const>(std::get<0>(vertex_pair_buffer_unique).data(),
+                                              std::get<0>(vertex_pair_buffer_unique).size()),
+            raft::device_span<vertex_t const>(std::get<1>(vertex_pair_buffer_unique).data(),
+                                              std::get<1>(vertex_pair_buffer_unique).size()),
+            raft::device_span<edge_t const>(decrease_count.data(), decrease_count.size())});
+
+      if constexpr (multi_gpu) {
+        auto& comm           = handle.get_comms();
+        auto const comm_size = comm.get_size();
+        auto& major_comm     = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
+        auto const major_comm_size = major_comm.get_size();
+        auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
+        auto const minor_comm_size        = minor_comm.get_size();
+        auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts();
+
+        rmm::device_uvector<vertex_t> d_vertex_partition_range_lasts(
+          vertex_partition_range_lasts.size(), handle.get_stream());
+        raft::update_device(d_vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.size(),
+                            handle.get_stream());
+
+        std::forward_as_tuple(std::tie(std::get<0>(vertex_pair_buffer_unique),
+                                       std::get<1>(vertex_pair_buffer_unique),
+                                       decrease_count),
+                              std::ignore) =
+          groupby_gpu_id_and_shuffle_values(
+            handle.get_comms(),
+            thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(),
+                                      std::get<1>(vertex_pair_buffer_unique).begin(),
+                                      decrease_count.begin()),
+            thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(),
+                                      std::get<1>(vertex_pair_buffer_unique).end(),
+                                      decrease_count.end()),
+            [key_func =
+               cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t<vertex_t>{
+                 raft::device_span<vertex_t const>(d_vertex_partition_range_lasts.data(),
+                                                   d_vertex_partition_range_lasts.size()),
+                 comm_size,
+                 major_comm_size,
+                 minor_comm_size}] __device__(auto val) {
+              return key_func(thrust::get<0>(val), thrust::get<1>(val));
+            },
+            handle.get_stream());
+      }
+
+      thrust::sort_by_key(handle.get_thrust_policy(),
+                          get_dataframe_buffer_begin(vertex_pair_buffer_unique),
+                          get_dataframe_buffer_end(vertex_pair_buffer_unique),
+                          decrease_count.begin());
+
+      // Update count of weak edges
+      edges_to_decrement_count.clear();
+
+      edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(),
+                                      std::get<0>(vertex_pair_buffer_unique).end(),
+                                      std::get<1>(vertex_pair_buffer_unique).begin());
+
+      cur_graph_view.clear_edge_mask();
+      // Check for edge existance on the directed graph view
+      cur_graph_view.attach_edge_mask(dodg_mask.view());
+
+      // Update count of weak edges from the DODG view
+      cugraph::transform_e(
+        handle,
+        cur_graph_view,
+        edges_to_decrement_count,
+        cugraph::edge_src_dummy_property_t{}.view(),
+        cugraph::edge_dst_dummy_property_t{}.view(),
+        edge_triangle_counts.view(),
+        [edge_buffer_first =
+           thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(),
+                                     std::get<1>(vertex_pair_buffer_unique).begin()),
+         edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(),
+                                                      std::get<1>(vertex_pair_buffer_unique).end()),
+         decrease_count   = raft::device_span<edge_t>(
+           decrease_count.data(), decrease_count.size())] __device__(auto src,
+                                                                     auto dst,
+                                                                     cuda::std::nullopt_t,
+                                                                     cuda::std::nullopt_t,
+                                                                     edge_t count) {
+          auto itr_pair = thrust::lower_bound(
+            thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst));
+          auto idx_pair = thrust::distance(edge_buffer_first, itr_pair);
+          count -= decrease_count[idx_pair];
+
+          return count;
+        },
+        edge_triangle_counts.mutable_view(),
+        do_expensive_check);
+
+      edgelist_weak.clear();
+
+      thrust::sort(
+        handle.get_thrust_policy(),
+        thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()),
+        thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()));
+
+      edgelist_weak.insert(
+        weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin());
 
-      // Mask all the edges that have k - 2 count
+      // Get undirected graph view
+      cur_graph_view.clear_edge_mask();
+      cur_graph_view.attach_edge_mask(weak_edges_mask.view());
 
       auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle);
 
       cugraph::transform_e(
         handle,
         cur_graph_view,
+        edgelist_weak,
         cugraph::edge_src_dummy_property_t{}.view(),
         cugraph::edge_dst_dummy_property_t{}.view(),
-        edge_triangle_counts.view(),
-        [k] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) {
-          return count >= k - 2;
+        cugraph::edge_dummy_property_t{}.view(),
+        [] __device__(
+          auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) {
+          return false;
         },
-        edge_mask.mutable_view(),
-        false);
+        weak_edges_mask.mutable_view(),
+        do_expensive_check);
+
+      edgelist_weak.clear();
+
+      // shuffle the edges if multi_gpu
+      if constexpr (multi_gpu) {
+        std::tie(weak_edgelist_dsts,
+                 weak_edgelist_srcs,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore) =
+          detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
+                                                                                         edge_t,
+                                                                                         weight_t,
+                                                                                         int32_t,
+                                                                                         int32_t>(
+            handle,
+            std::move(weak_edgelist_dsts),
+            std::move(weak_edgelist_srcs),
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            cur_graph_view.vertex_partition_range_lasts());
+      }
+
+      thrust::sort(
+        handle.get_thrust_policy(),
+        thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()),
+        thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end()));
+
+      edgelist_weak.insert(
+        weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin());
 
-      cur_graph_view.attach_edge_mask(edge_mask.view());
+      cugraph::transform_e(
+        handle,
+        cur_graph_view,
+        edgelist_weak,
+        cugraph::edge_src_dummy_property_t{}.view(),
+        cugraph::edge_dst_dummy_property_t{}.view(),
+        cugraph::edge_dummy_property_t{}.view(),
+        [] __device__(
+          auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) {
+          return false;
+        },
+        weak_edges_mask.mutable_view(),
+        do_expensive_check);
+
+      cur_graph_view.attach_edge_mask(weak_edges_mask.view());
 
       if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; }
+
+      cur_graph_view.clear_edge_mask();
+      cur_graph_view.attach_edge_mask(dodg_mask.view());
     }
 
+    cur_graph_view.clear_edge_mask();
+    cur_graph_view.attach_edge_mask(dodg_mask.view());
+
+    cugraph::transform_e(
+      handle,
+      cur_graph_view,
+      cugraph::edge_src_dummy_property_t{}.view(),
+      cugraph::edge_dst_dummy_property_t{}.view(),
+      edge_triangle_counts.view(),
+      [] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) {
+        return count == 0 ? false : true;
+      },
+      dodg_mask.mutable_view(),
+      do_expensive_check);
+
     rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
     rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
     std::optional<rmm::device_uvector<weight_t>> edgelist_wgts{std::nullopt};
@@ -367,11 +728,10 @@ k_truss(raft::handle_t const& handle,
       decompress_to_edgelist(
         handle,
         cur_graph_view,
-        edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt,
+        edge_weight_view,
         std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         std::optional<cugraph::edge_property_view_t<edge_t, int32_t const*>>{std::nullopt},
-        std::make_optional(
-          raft::device_span<vertex_t const>((*renumber_map).data(), (*renumber_map).size())));
+        std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
     std::tie(edgelist_srcs,
              edgelist_dsts,
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 6800b9c4769..f819142076d 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -893,6 +893,8 @@ ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c)
 ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c)
 ConfigureCTest(CAPI_LEIDEN_TEST c_api/leiden_test.c)
 ConfigureCTest(CAPI_ECG_TEST c_api/ecg_test.c)
+ConfigureCTest(CAPI_RENUMBER_ARBITRARY_EDGELIST_TEST c_api/renumber_arbitrary_edgelist_test.c)
+
 #############################################################################
 # Skipping due to CUDA 12.2 failure that traces back to RAFT                #
 # TODO: Uncomment this once the issue is fixed.                             #
diff --git a/cpp/tests/c_api/renumber_arbitrary_edgelist_test.c b/cpp/tests/c_api/renumber_arbitrary_edgelist_test.c
new file mode 100644
index 00000000000..6528ae55589
--- /dev/null
+++ b/cpp/tests/c_api/renumber_arbitrary_edgelist_test.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "c_test_utils.h" /* RUN_TEST */
+#include "cugraph_c/array.h"
+
+#include <cugraph_c/algorithms.h>
+#include <cugraph_c/graph.h>
+
+#include <math.h>
+
+typedef int32_t vertex_t;
+
+int generic_renumber_arbitrary_edgelist_test(vertex_t* h_src,
+                                             vertex_t* h_dst,
+                                             vertex_t* h_renumber_map,
+                                             size_t num_edges,
+                                             size_t renumber_map_size)
+{
+  int test_ret_value = 0;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+
+  cugraph_resource_handle_t* p_handle = NULL;
+
+  p_handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed.");
+
+  cugraph_type_erased_device_array_t* srcs;
+  cugraph_type_erased_device_array_t* dsts;
+  cugraph_type_erased_device_array_view_t* srcs_view;
+  cugraph_type_erased_device_array_view_t* dsts_view;
+  cugraph_type_erased_host_array_view_t* renumber_map_view;
+
+  ret_code = cugraph_type_erased_device_array_create(p_handle, num_edges, INT32, &srcs, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "srcs create failed.");
+
+  ret_code = cugraph_type_erased_device_array_create(p_handle, num_edges, INT32, &dsts, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dsts create failed.");
+
+  srcs_view = cugraph_type_erased_device_array_view(srcs);
+  dsts_view = cugraph_type_erased_device_array_view(dsts);
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    p_handle, srcs_view, (byte_t*)h_src, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    p_handle, dsts_view, (byte_t*)h_dst, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed.");
+
+  renumber_map_view =
+    cugraph_type_erased_host_array_view_create(h_renumber_map, renumber_map_size, INT32);
+
+  ret_code = cugraph_renumber_arbitrary_edgelist(
+    p_handle, renumber_map_view, srcs_view, dsts_view, &ret_error);
+
+  vertex_t h_renumbered_srcs[num_edges];
+  vertex_t h_renumbered_dsts[num_edges];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    p_handle, (byte_t*)h_renumbered_srcs, srcs_view, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    p_handle, (byte_t*)h_renumbered_dsts, dsts_view, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_edges) && (test_ret_value == 0); ++i) {
+    vertex_t renumbered_src = -1;
+    vertex_t renumbered_dst = -1;
+
+    for (size_t j = 0; (j < renumber_map_size) && ((renumbered_src < 0) || (renumbered_dst < 0));
+         ++j) {
+      if (h_src[i] == h_renumber_map[j]) renumbered_src = (vertex_t)j;
+      if (h_dst[i] == h_renumber_map[j]) renumbered_dst = (vertex_t)j;
+    }
+
+    TEST_ASSERT(test_ret_value, h_renumbered_srcs[i] == renumbered_src, "src results don't match");
+    TEST_ASSERT(test_ret_value, h_renumbered_dsts[i] == renumbered_dst, "dst results don't match");
+  }
+
+  cugraph_type_erased_device_array_free(dsts);
+  cugraph_type_erased_device_array_free(srcs);
+  cugraph_free_resource_handle(p_handle);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
+int test_renumbering()
+{
+  size_t num_edges         = 8;
+  size_t renumber_map_size = 6;
+
+  vertex_t h_src[]          = {0, 1, 1, 2, 2, 2, 3, 4};
+  vertex_t h_dst[]          = {1, 3, 4, 0, 1, 3, 5, 5};
+  vertex_t h_renumber_map[] = {5, 3, 1, 2, 4, 0};
+
+  return generic_renumber_arbitrary_edgelist_test(
+    h_src, h_dst, h_renumber_map, num_edges, renumber_map_size);
+}
+
+int main(int argc, char** argv)
+{
+  int result = 0;
+  result |= RUN_TEST(test_renumbering);
+  return result;
+}
diff --git a/dependencies.yaml b/dependencies.yaml
index 02fa03cff70..5419be9beda 100755
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: [conda]
     matrix:
-      cuda: ["11.8", "12.5"]
+      cuda: ["11.8", "12.8"]
       arch: [x86_64]
     includes:
       - checks
@@ -279,6 +279,10 @@ dependencies:
               cuda: "12.5"
             packages:
               - cuda-version=12.5
+          - matrix:
+              cuda: "12.8"
+            packages:
+              - cuda-version=12.8
   cuda:
     specific:
       - output_types: [conda]
@@ -300,7 +304,7 @@ dependencies:
               - cuda-nvtx
   common_build:
     common:
-      - output_types: [conda, pyproject]
+      - output_types: [conda, requirements, pyproject]
         packages:
           - &cmake_ver cmake>=3.26.4,!=3.30.0
           - ninja
diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
index dfe3b085fdf..060d4ee1e99 100644
--- a/python/cugraph/pyproject.toml
+++ b/python/cugraph/pyproject.toml
@@ -72,7 +72,7 @@ build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
 cmake.version = "CMakeLists.txt"
 minimum-version = "build-system.requires"
-ninja.make-fallback = true
+ninja.make-fallback = false
 sdist.reproducible = true
 wheel.packages = ["cugraph"]
 
diff --git a/python/libcugraph/pyproject.toml b/python/libcugraph/pyproject.toml
index a6191e28000..9d85bfa5dac 100644
--- a/python/libcugraph/pyproject.toml
+++ b/python/libcugraph/pyproject.toml
@@ -53,14 +53,14 @@ select = [
 ]
 
 # detect when package size grows significantly
-max_allowed_size_compressed = '1.2G'
+max_allowed_size_compressed = '1.4G'
 
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
 cmake.version = "CMakeLists.txt"
 minimum-version = "build-system.requires"
-ninja.make-fallback = true
+ninja.make-fallback = false
 sdist.reproducible = true
 wheel.packages = ["libcugraph"]
 wheel.install-dir = "libcugraph"
diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml
index ac124e1fd5f..3c50a79bfa3 100644
--- a/python/pylibcugraph/pyproject.toml
+++ b/python/pylibcugraph/pyproject.toml
@@ -57,7 +57,7 @@ build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
 cmake.version = "CMakeLists.txt"
 minimum-version = "build-system.requires"
-ninja.make-fallback = true
+ninja.make-fallback = false
 sdist.reproducible = true
 wheel.packages = ["pylibcugraph"]
 
diff --git a/readme_pages/performance/performance.md b/readme_pages/performance/performance.md
deleted file mode 100644
index 159b2334704..00000000000
--- a/readme_pages/performance/performance.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-<a href="https://rapids.ai/">
-<img src="https://www.publicdomainpictures.net/pictures/250000/velka/coming-soon.jpg"
-alt="RAPIDS" width="500"></a>
-
-We are working on a new nightly benchmarking system that will produce performance numbers.
-This is a splash page for where the performance numbers will be posted in early 2023.