From e6e09260b487bc6ec05fafc9cfb22878f9f3fd45 Mon Sep 17 00:00:00 2001 From: Ajit Mistry <55892788+ajit283@users.noreply.github.com> Date: Fri, 31 Jan 2025 07:50:03 +0100 Subject: [PATCH] Expose `extend()` in C API (#276) Add functionality to add additional vectors after build to C API Authors: - Ajit Mistry (https://github.com/ajit283) - Corey J. Nolet (https://github.com/cjnolet) - Ben Frederickson (https://github.com/benfred) - Micka (https://github.com/lowener) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/276 --- cpp/include/cuvs/neighbors/cagra.h | 69 +++++++++ cpp/src/neighbors/cagra_c.cpp | 72 +++++++++ cpp/tests/neighbors/ann_cagra_c.cu | 230 +++++++++++++++++++++++++++++ 3 files changed, 371 insertions(+) diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 3e17f1d0f..207f3f21b 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -141,6 +141,45 @@ cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t* param */ cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params); +/** + * @} + */ + +/** + * @defgroup cagra_c_extend_params C API for CUDA ANN Graph-based nearest neighbor search + * @{ + */ +/** + * @brief Supplemental parameters to extend CAGRA Index + * + */ +struct cuvsCagraExtendParams { + /** The additional dataset is divided into chunks and added to the graph. This is the knob to + * adjust the tradeoff between the recall and operation throughput. Large chunk sizes can result + * in high throughput, but use more working memory (O(max_chunk_size*degree^2)). This can also + * degrade recall because no edges are added between the nodes in the same chunk. Auto select when + * 0. */ + uint32_t max_chunk_size; +}; + +typedef struct cuvsCagraExtendParams* cuvsCagraExtendParams_t; + +/** + * @brief Allocate CAGRA Extend params, and populate with default values + * + * @param[in] params cuvsCagraExtendParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params); + +/** + * @brief De-allocate CAGRA Extend params + * + * @param[in] params + * @return cuvsError_t + */ +cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t params); + /** * @} */ @@ -340,6 +379,36 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res, * @} */ +/** + * @defgroup cagra_c_extend_params C API for CUDA ANN Graph-based nearest neighbor search + * @{ + */ + +/** + * @brief Extend a CAGRA index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, + * or `kDLCPU`. Also, acceptable underlying types are: + * 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` + * 3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsCagraExtendParams_t used to extend CAGRA index + * @param[in] additional_dataset DLManagedTensor* additional dataset + * @param[in,out] index cuvsCagraIndex_t CAGRA index + * @param[out] return_dataset DLManagedTensor* extended dataset + * @return cuvsError_t + */ +cuvsError_t cuvsCagraExtend(cuvsResources_t res, + cuvsCagraExtendParams_t params, + DLManagedTensor* additional_dataset, + cuvsCagraIndex_t index, + DLManagedTensor* return_dataset); + +/** + * @} + */ + /** * @defgroup cagra_c_index_search C API for CUDA ANN Graph-based nearest neighbor search * @{ diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 20cbae7d0..333fffb42 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -87,6 +87,43 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* return index; } +template +void _extend(cuvsResources_t res, + cuvsCagraExtendParams params, + cuvsCagraIndex index, + DLManagedTensor* additional_dataset_tensor, + DLManagedTensor* return_tensor) +{ + auto dataset = additional_dataset_tensor->dl_tensor; + auto return_dl_tensor = return_tensor->dl_tensor; + auto index_ptr = reinterpret_cast*>(index.addr); + auto res_ptr = reinterpret_cast(res); + + // TODO: use C struct here (see issue #487) + auto extend_params = cuvs::neighbors::cagra::extend_params(); + extend_params.max_chunk_size = params.max_chunk_size; + + if (cuvs::core::is_dlpack_device_compatible(dataset) && + cuvs::core::is_dlpack_device_compatible(return_dl_tensor)) { + using mdspan_type = raft::device_matrix_view; + using mdspan_return_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(additional_dataset_tensor); + auto return_mds = cuvs::core::from_dlpack(return_tensor); + cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr, return_mds); + } else if (cuvs::core::is_dlpack_host_compatible(dataset) && + cuvs::core::is_dlpack_host_compatible(return_dl_tensor)) { + using mdspan_type = raft::host_matrix_view; + using mdspan_return_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(additional_dataset_tensor); + auto return_mds = cuvs::core::from_dlpack(return_tensor); + cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr, return_mds); + } else { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } +} + template void _search(cuvsResources_t res, cuvsCagraSearchParams params, @@ -229,6 +266,30 @@ extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res, }); } +extern "C" cuvsError_t cuvsCagraExtend(cuvsResources_t res, + cuvsCagraExtendParams_t params, + DLManagedTensor* additional_dataset_tensor, + cuvsCagraIndex_t index_c_ptr, + DLManagedTensor* return_dataset_tensor) +{ + return cuvs::core::translate_exceptions([=] { + auto dataset = additional_dataset_tensor->dl_tensor; + auto index = *index_c_ptr; + + if ((dataset.dtype.code == kDLFloat) && (dataset.dtype.bits == 32)) { + _extend(res, *params, index, additional_dataset_tensor, return_dataset_tensor); + } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) { + _extend(res, *params, index, additional_dataset_tensor, return_dataset_tensor); + } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) { + _extend(res, *params, index, additional_dataset_tensor, return_dataset_tensor); + } else { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } + }); +} + extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res, cuvsCagraSearchParams_t params, cuvsCagraIndex_t index_c_ptr, @@ -309,6 +370,17 @@ extern "C" cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionPar return cuvs::core::translate_exceptions([=] { delete params; }); } +extern "C" cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params) +{ + return cuvs::core::translate_exceptions( + [=] { *params = new cuvsCagraExtendParams{.max_chunk_size = 0}; }); +} + +extern "C" cuvsError_t cuvsCagraExtendParamsDestroy(cuvsCagraExtendParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + extern "C" cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params) { return cuvs::core::translate_exceptions([=] { diff --git a/cpp/tests/neighbors/ann_cagra_c.cu b/cpp/tests/neighbors/ann_cagra_c.cu index 7315813cc..9e0890c34 100644 --- a/cpp/tests/neighbors/ann_cagra_c.cu +++ b/cpp/tests/neighbors/ann_cagra_c.cu @@ -15,7 +15,9 @@ */ #include "../test_utils.cuh" +#include #include +#include #include #include @@ -23,8 +25,19 @@ #include #include + +#include +#include +#include +#include +#include +#include +#include +#include #include +#include + float dataset[4][2] = {{0.74021935, 0.9209938}, {0.03902049, 0.9689629}, {0.92514056, 0.4463501}, @@ -137,6 +150,223 @@ TEST(CagraC, BuildSearch) cuvsResourcesDestroy(res); } +TEST(CagraC, BuildExtendSearch) +{ + // create cuvsResources_t + cuvsResources_t res; + cuvsResourcesCreate(&res); + cudaStream_t stream; + cuvsStreamGet(res, &stream); + + raft::resources handle; + + const int32_t dimensions = 16; + // main_data_size needs to be >= 128 (see issue #486) + const int32_t main_data_size = 1024; + const int32_t additional_data_size = 64; + const int32_t num_queries = 4; + + // create random data for datasets and queries + rmm::device_uvector random_data_d( + (main_data_size + additional_data_size + num_queries) * dimensions, stream); + rmm::device_uvector random_labels_d( + (main_data_size + additional_data_size + num_queries) * dimensions, stream); + raft::random::make_blobs(random_data_d.data(), + random_labels_d.data(), + main_data_size + additional_data_size + num_queries, + dimensions, + 10, + stream); + + // create dataset DLTensor + rmm::device_uvector main_d(main_data_size * dimensions, stream); + rmm::device_uvector main_labels_d(main_data_size, stream); + raft::copy(main_d.data(), random_data_d.data(), main_data_size * dimensions, stream); + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = main_d.data(); + dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {main_data_size, dimensions}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = nullptr; + + // create additional dataset DLTensor + rmm::device_uvector additional_d(additional_data_size * dimensions, stream); + raft::copy(additional_d.data(), + random_data_d.data() + main_d.size(), + additional_data_size * dimensions, + stream); + DLManagedTensor additional_dataset_tensor; + additional_dataset_tensor.dl_tensor.data = additional_d.data(); + additional_dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + additional_dataset_tensor.dl_tensor.ndim = 2; + additional_dataset_tensor.dl_tensor.dtype.code = kDLFloat; + additional_dataset_tensor.dl_tensor.dtype.bits = 32; + additional_dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t additional_dataset_shape[2] = {additional_data_size, dimensions}; + additional_dataset_tensor.dl_tensor.shape = additional_dataset_shape; + additional_dataset_tensor.dl_tensor.strides = nullptr; + + // create tensor for that points to the extended dataset + rmm::device_uvector extend_return_d((additional_data_size + main_data_size) * dimensions, + stream); + DLManagedTensor additional_dataset_return_tensor; + additional_dataset_return_tensor.dl_tensor.data = extend_return_d.data(); + additional_dataset_return_tensor.dl_tensor.device.device_type = kDLCUDA; + additional_dataset_return_tensor.dl_tensor.ndim = 2; + additional_dataset_return_tensor.dl_tensor.dtype.code = kDLFloat; + additional_dataset_return_tensor.dl_tensor.dtype.bits = 32; + additional_dataset_return_tensor.dl_tensor.dtype.lanes = 1; + int64_t additional_return_dataset_shape[2] = {additional_data_size + main_data_size, dimensions}; + additional_dataset_return_tensor.dl_tensor.shape = additional_return_dataset_shape; + additional_dataset_return_tensor.dl_tensor.strides = nullptr; + + // create index + cuvsCagraIndex_t index; + cuvsCagraIndexCreate(&index); + + // build index + cuvsCagraIndexParams_t build_params; + cuvsCagraIndexParamsCreate(&build_params); + cuvsCagraBuild(res, build_params, &dataset_tensor, index); + + cuvsStreamSync(res); + + // extend index + cuvsCagraExtendParams_t extend_params; + cuvsCagraExtendParamsCreate(&extend_params); + cuvsCagraExtend( + res, extend_params, &additional_dataset_tensor, index, &additional_dataset_return_tensor); + + // create queries DLTensor + rmm::device_uvector queries_d(num_queries * dimensions, stream); + raft::copy(queries_d.data(), + random_data_d.data() + (main_data_size + additional_data_size) * dimensions, + num_queries * dimensions, + stream); + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = queries_d.data(); + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {4, dimensions}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = nullptr; + + // create pairwise distance matrix for dataset and queries + auto pairwise_distance_dataset_input = + raft::make_device_matrix(handle, main_data_size + additional_data_size, dimensions); + + raft::copy(pairwise_distance_dataset_input.data_handle(), main_d.data(), main_d.size(), stream); + raft::copy(pairwise_distance_dataset_input.data_handle() + main_d.size(), + additional_d.data(), + additional_d.size(), + stream); + + auto pairwise_distance_queries_input = + raft::make_device_matrix(handle, num_queries, dimensions); + + raft::copy(pairwise_distance_queries_input.data_handle(), + (float*)queries_d.data(), + num_queries * dimensions, + stream); + + auto pairwise_distances = + raft::make_device_matrix(handle, num_queries, (main_data_size + additional_data_size)); + auto metric = cuvs::distance::DistanceType::L2Expanded; + + cuvs::distance::pairwise_distance(handle, + pairwise_distance_queries_input.view(), + pairwise_distance_dataset_input.view(), + + pairwise_distances.view(), + metric); + + auto min_cols = + raft::make_device_vector(handle, pairwise_distances.extent(0)); + + auto distances_const_view = raft::make_device_matrix_view( + pairwise_distances.data_handle(), pairwise_distances.extent(0), pairwise_distances.extent(1)); + + raft::matrix::argmin(handle, distances_const_view, min_cols.view()); + + float min_cols_distances[num_queries]; + + for (uint32_t i = 0; i < min_cols.extent(0); i++) { + uint32_t mc = min_cols(i); + min_cols_distances[i] = pairwise_distances(i, mc); + } + + // create neighbors DLTensor + rmm::device_uvector neighbors_d(4, stream); + + DLManagedTensor neighbors_tensor; + neighbors_tensor.dl_tensor.data = neighbors_d.data(); + neighbors_tensor.dl_tensor.device.device_type = kDLCUDA; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLUInt; + neighbors_tensor.dl_tensor.dtype.bits = 32; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + int64_t neighbors_shape[2] = {num_queries, 1}; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = nullptr; + + // create distances DLTensor + rmm::device_uvector distances_d(4, stream); + + distances_d.resize(4, stream); + + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = distances_d.data(); + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {num_queries, 1}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = nullptr; + + cuvsFilter filter; + filter.type = NO_FILTER; + filter.addr = (uintptr_t)NULL; + + // search index + cuvsCagraSearchParams_t search_params; + cuvsCagraSearchParamsCreate(&search_params); + cuvsCagraSearch( + res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter); + + // make sure that extend_return_d points to the extended dataset + ASSERT_TRUE(cuvs::devArrMatch( + main_d.data(), extend_return_d.data(), main_d.size(), cuvs::Compare())); + + ASSERT_TRUE(cuvs::devArrMatch(additional_d.data(), + extend_return_d.data() + main_d.size(), + additional_d.size(), + cuvs::Compare())); + + // check neighbors + ASSERT_TRUE( + cuvs::devArrMatch(min_cols.data_handle(), neighbors_d.data(), 4, cuvs::Compare())); + + // check distances + ASSERT_TRUE(cuvs::devArrMatchHost( + min_cols_distances, distances_d.data(), 4, cuvs::CompareApprox(0.001f))); + + // de-allocate index and res + cuvsCagraSearchParamsDestroy(search_params); + cuvsCagraExtendParamsDestroy(extend_params); + cuvsCagraIndexParamsDestroy(build_params); + cuvsCagraIndexDestroy(index); + cuvsResourcesDestroy(res); +} + TEST(CagraC, BuildSearchFiltered) { // create cuvsResources_t