From 8d6b9e91afda16cbcf76df47251346152926374b Mon Sep 17 00:00:00 2001 From: David Kaczynski Date: Fri, 28 Apr 2023 13:49:50 -0400 Subject: [PATCH 1/6] Force error on warnings and add casts to test directory --- tests/CMakeLists.txt | 1 + tests/build_stitched_index.cpp | 6 ++-- tests/range_search_disk_index.cpp | 16 ++++----- tests/search_disk_index.cpp | 12 +++---- tests/search_memory_index.cpp | 16 ++++----- tests/test_insert_deletes_consolidate.cpp | 10 +++--- tests/test_streaming_scenario.cpp | 4 +-- tests/utils/CMakeLists.txt | 2 ++ tests/utils/calculate_recall.cpp | 3 +- tests/utils/compute_groundtruth.cpp | 16 ++++----- .../utils/compute_groundtruth_for_filters.cpp | 33 +++++++++---------- tests/utils/float_bin_to_int8.cpp | 4 +-- tests/utils/generate_pq.cpp | 14 ++++---- tests/utils/generate_synthetic_labels.cpp | 23 ++++++------- tests/utils/int8_to_float_scale.cpp | 4 +-- tests/utils/partition_data.cpp | 2 +- tests/utils/partition_with_ram_budget.cpp | 2 +- tests/utils/rand_data_gen.cpp | 10 +++--- tests/utils/stats_label_data.cpp | 4 +-- tests/utils/vector_analysis.cpp | 10 +++--- 20 files changed, 97 insertions(+), 95 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6aa5532ef..c223e0496 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,6 +2,7 @@ # Licensed under the MIT license. set(CMAKE_CXX_STANDARD 14) +set(CMAKE_COMPILE_WARNING_AS_ERROR ON) add_executable(build_memory_index build_memory_index.cpp) target_link_libraries(build_memory_index ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) diff --git a/tests/build_stitched_index.cpp b/tests/build_stitched_index.cpp index 770a11f83..4c1941a9d 100644 --- a/tests/build_stitched_index.cpp +++ b/tests/build_stitched_index.cpp @@ -173,7 +173,7 @@ void save_full_index(path final_index_path_prefix, path input_data_path, uint64_ size_t bytes_written = METADATA; for (uint32_t node_point = 0; node_point < stitched_graph.size(); node_point++) { - uint32_t current_node_num_neighbors = stitched_graph[node_point].size(); + uint32_t current_node_num_neighbors = (uint32_t)stitched_graph[node_point].size(); std::vector current_node_neighbors = stitched_graph[node_point]; stitched_graph_writer.write((char *)¤t_node_num_neighbors, sizeof(uint32_t)); bytes_written += sizeof(uint32_t); @@ -226,7 +226,7 @@ stitch_indices_return_values stitch_label_indices( std::tie(curr_label_index, curr_label_index_size) = diskann::load_label_index(curr_label_index_path, labels_to_number_of_points[lbl]); - curr_label_entry_point = random(0, curr_label_index.size()); + curr_label_entry_point = (uint32_t)random(0, curr_label_index.size()); label_entry_points[lbl] = label_id_to_orig_id_map[lbl][curr_label_entry_point]; for (uint32_t node_point = 0; node_point < curr_label_index.size(); node_point++) @@ -344,7 +344,7 @@ int main(int argc, char **argv) // 3. for each label, make a separate data file tsl::robin_map> label_id_to_orig_id_map; - uint32_t total_number_of_points = point_ids_to_labels.size(); + uint32_t total_number_of_points = (uint32_t)point_ids_to_labels.size(); #ifndef _WINDOWS if (data_type == "uint8") diff --git a/tests/range_search_disk_index.cpp b/tests/range_search_disk_index.cpp index a67dac378..59f53780f 100644 --- a/tests/range_search_disk_index.cpp +++ b/tests/range_search_disk_index.cpp @@ -184,7 +184,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++) { - uint64_t L = Lvec[test_id]; + uint32_t L = Lvec[test_id]; if (beamwidth <= 0) { @@ -211,7 +211,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre query_result_ids[test_id][i].reserve(res_count); query_result_ids[test_id][i].resize(res_count); for (uint32_t idx = 0; idx < res_count; idx++) - query_result_ids[test_id][i][idx] = indices[idx]; + query_result_ids[test_id][i][idx] = (uint32_t)indices[idx]; } auto e = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = e - s; @@ -226,21 +226,21 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre auto mean_ios = diskann::get_mean_stats(stats, query_num, [](const diskann::QueryStats &stats) { return stats.n_ios; }); - float mean_cpuus = diskann::get_mean_stats( + double mean_cpuus = diskann::get_mean_stats( stats, query_num, [](const diskann::QueryStats &stats) { return stats.cpu_us; }); - float recall = 0; - float ratio_of_sums = 0; + double recall = 0; + double ratio_of_sums = 0; if (calc_recall_flag) { - recall = diskann::calculate_range_search_recall(query_num, groundtruth_ids, query_result_ids[test_id]); + recall = diskann::calculate_range_search_recall((uint32_t)query_num, groundtruth_ids, query_result_ids[test_id]); uint32_t total_true_positive = 0; uint32_t total_positive = 0; for (uint32_t i = 0; i < query_num; i++) { - total_true_positive += query_result_ids[test_id][i].size(); - total_positive += groundtruth_ids[i].size(); + total_true_positive += (uint32_t)query_result_ids[test_id][i].size(); + total_positive += (uint32_t)groundtruth_ids[i].size(); } ratio_of_sums = (1.0 * total_true_positive) / (1.0 * total_positive); diff --git a/tests/search_disk_index.cpp b/tests/search_disk_index.cpp index 02d734c74..1108da97e 100644 --- a/tests/search_disk_index.cpp +++ b/tests/search_disk_index.cpp @@ -194,11 +194,11 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre uint32_t optimized_beamwidth = 2; - float best_recall = 0.0; + double best_recall = 0.0; for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++) { - uint64_t L = Lvec[test_id]; + uint32_t L = Lvec[test_id]; if (L < recall_at) { @@ -252,7 +252,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre } auto e = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = e - s; - float qps = (1.0 * query_num) / (1.0 * diff.count()); + double qps = (1.0 * query_num) / (1.0 * diff.count()); diskann::convert_types(query_result_ids_64.data(), query_result_ids[test_id].data(), query_num, recall_at); @@ -269,11 +269,11 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre auto mean_cpuus = diskann::get_mean_stats(stats, query_num, [](const diskann::QueryStats &stats) { return stats.cpu_us; }); - float recall = 0; + double recall = 0; if (calc_recall_flag) { - recall = diskann::calculate_recall(query_num, gt_ids, gt_dists, gt_dim, query_result_ids[test_id].data(), - recall_at, recall_at); + recall = diskann::calculate_recall((uint32_t)query_num, gt_ids, gt_dists, (uint32_t)gt_dim, + query_result_ids[test_id].data(), recall_at, recall_at); best_recall = std::max(recall, best_recall); } diff --git a/tests/search_memory_index.cpp b/tests/search_memory_index.cpp index 02c96db24..bd5c867a0 100644 --- a/tests/search_memory_index.cpp +++ b/tests/search_memory_index.cpp @@ -125,11 +125,11 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path, query_result_tags.resize(recall_at * query_num); } - float best_recall = 0.0; + double best_recall = 0.0; for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++) { - uint64_t L = Lvec[test_id]; + uint32_t L = Lvec[test_id]; if (L < recall_at) { diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; @@ -185,28 +185,28 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path, } auto qe = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = qe - qs; - latency_stats[i] = diff.count() * 1000000; + latency_stats[i] = (float)(diff.count() * 1000000); } std::chrono::duration diff = std::chrono::high_resolution_clock::now() - s; - float displayed_qps = static_cast(query_num) / diff.count(); + double displayed_qps = query_num / diff.count(); if (show_qps_per_thread) displayed_qps /= num_threads; - std::vector recalls; + std::vector recalls; if (calc_recall_flag) { recalls.reserve(recalls_to_print); for (uint32_t curr_recall = first_recall; curr_recall <= recall_at; curr_recall++) { - recalls.push_back(diskann::calculate_recall(query_num, gt_ids, gt_dists, gt_dim, + recalls.push_back(diskann::calculate_recall((uint32_t)query_num, gt_ids, gt_dists, (uint32_t)gt_dim, query_result_ids[test_id].data(), recall_at, curr_recall)); } } std::sort(latency_stats.begin(), latency_stats.end()); - float mean_latency = + double mean_latency = std::accumulate(latency_stats.begin(), latency_stats.end(), 0.0) / static_cast(query_num); float avg_cmps = (float)std::accumulate(cmp_stats.begin(), cmp_stats.end(), 0) / (float)query_num; @@ -222,7 +222,7 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path, << std::setw(20) << (float)mean_latency << std::setw(15) << (float)latency_stats[(uint64_t)(0.999 * query_num)]; } - for (float recall : recalls) + for (double recall : recalls) { std::cout << std::setw(12) << recall; best_recall = std::max(recall, best_recall); diff --git a/tests/test_insert_deletes_consolidate.cpp b/tests/test_insert_deletes_consolidate.cpp index 844c4abd2..ebfd7cabe 100644 --- a/tests/test_insert_deletes_consolidate.cpp +++ b/tests/test_insert_deletes_consolidate.cpp @@ -90,8 +90,8 @@ std::string get_save_filename(const std::string &save_path, size_t points_to_ski } template -void insert_till_next_checkpoint(diskann::Index &index, size_t start, size_t end, size_t thread_count, T *data, - size_t aligned_dim) +void insert_till_next_checkpoint(diskann::Index &index, size_t start, size_t end, int32_t thread_count, + T *data, size_t aligned_dim) { diskann::Timer insert_timer; @@ -115,7 +115,7 @@ void delete_from_beginning(diskann::Index &index, diskann::IndexWritePa << "Lazy deleting points " << points_to_skip << " to " << points_to_skip + points_to_delete_from_beginning << "... "; for (size_t i = points_to_skip; i < points_to_skip + points_to_delete_from_beginning; ++i) - index.lazy_delete(i + 1); // Since tags are data location + 1 + index.lazy_delete(static_cast(i + 1)); // Since tags are data location + 1 std::cout << "done." << std::endl; auto report = index.consolidate_deletes(delete_params); @@ -230,7 +230,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con if (concurrent) { - int sub_threads = (thread_count + 1) / 2; + int32_t sub_threads = (thread_count + 1) / 2; bool delete_launched = false; std::future delete_task; @@ -279,7 +279,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl; load_aligned_bin_part(data_path, data, start, end - start); - insert_till_next_checkpoint(index, start, end, thread_count, data, aligned_dim); + insert_till_next_checkpoint(index, start, end, (int32_t)thread_count, data, aligned_dim); if (checkpoints_per_snapshot > 0 && --num_checkpoints_till_snapshot == 0) { diff --git a/tests/test_streaming_scenario.cpp b/tests/test_streaming_scenario.cpp index 463e862c9..3281a0573 100644 --- a/tests/test_streaming_scenario.cpp +++ b/tests/test_streaming_scenario.cpp @@ -91,7 +91,7 @@ void insert_next_batch(diskann::Index &index, size_t start, siz std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl; size_t num_failed = 0; -#pragma omp parallel for num_threads(insert_threads) schedule(dynamic) reduction(+ : num_failed) +#pragma omp parallel for num_threads((int32_t)insert_threads) schedule(dynamic) reduction(+ : num_failed) for (int64_t j = start; j < (int64_t)end; j++) { if (index.insert_point(&data[(j - start) * aligned_dim], 1 + static_cast(j)) != 0) @@ -121,7 +121,7 @@ void delete_and_consolidate(diskann::Index &index, diskann::Ind { std::cout << std::endl << "Lazy deleting points " << start << " to " << end << "... "; for (size_t i = start; i < end; ++i) - index.lazy_delete(1 + i); + index.lazy_delete(static_cast(1 + i)); std::cout << "lazy delete done." << std::endl; auto report = index.consolidate_deletes(delete_params); diff --git a/tests/utils/CMakeLists.txt b/tests/utils/CMakeLists.txt index df205cf90..fadac1292 100644 --- a/tests/utils/CMakeLists.txt +++ b/tests/utils/CMakeLists.txt @@ -2,6 +2,8 @@ # Licensed under the MIT license. set(CMAKE_CXX_STANDARD 14) +set(CMAKE_COMPILE_WARNING_AS_ERROR ON) + add_executable(fvecs_to_bin fvecs_to_bin.cpp) diff --git a/tests/utils/calculate_recall.cpp b/tests/utils/calculate_recall.cpp index a45eb19d7..282994e45 100644 --- a/tests/utils/calculate_recall.cpp +++ b/tests/utils/calculate_recall.cpp @@ -47,7 +47,8 @@ int main(int argc, char **argv) return -1; } std::cout << "Calculating recall@" << recall_at << std::endl; - float recall_val = diskann::calculate_recall(points_num, gold_std, gs_dist, dim_gs, our_results, dim_or, recall_at); + double recall_val = diskann::calculate_recall((uint32_t)points_num, gold_std, gs_dist, (uint32_t)dim_gs, our_results, + (uint32_t)dim_or, (uint32_t)recall_at); // double avg_recall = (recall*1.0)/(points_num*1.0); std::cout << "Avg. recall@" << recall_at << " is " << recall_val << "\n"; diff --git a/tests/utils/compute_groundtruth.cpp b/tests/utils/compute_groundtruth.cpp index 991f29ff6..5ddbde45e 100644 --- a/tests/utils/compute_groundtruth.cpp +++ b/tests/utils/compute_groundtruth.cpp @@ -70,13 +70,13 @@ inline bool custom_dist(const std::pair &a, const std::pair dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace((int32_t)p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); } @@ -257,7 +257,8 @@ template inline int get_num_parts(const char *filename) reader.read((char *)&ndims_i32, sizeof(int)); std::cout << "#pts = " << npts_i32 << ", #dims = " << ndims_i32 << std::endl; reader.close(); - int num_parts = (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : std::floor(npts_i32 / PARTSIZE) + 1; + uint32_t num_parts = + (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : (uint32_t)std::floor(npts_i32 / PARTSIZE) + 1; std::cout << "Number of parts: " << num_parts << std::endl; return num_parts; } @@ -351,8 +352,7 @@ std::vector>> processUnfilteredParts(cons int *closest_points_part = new int[nqueries * k]; float *dist_closest_points_part = new float[nqueries * k]; - uint32_t part_k; - part_k = k < npoints ? k : npoints; + auto part_k = k < npoints ? k : npoints; exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints, base_data, nqueries, query_data, metric); diff --git a/tests/utils/compute_groundtruth_for_filters.cpp b/tests/utils/compute_groundtruth_for_filters.cpp index eb962257d..336784704 100644 --- a/tests/utils/compute_groundtruth_for_filters.cpp +++ b/tests/utils/compute_groundtruth_for_filters.cpp @@ -71,13 +71,13 @@ inline bool custom_dist(const std::pair &a, const std::pair dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace((int32_t)p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); } @@ -258,7 +258,7 @@ template inline int get_num_parts(const char *filename) reader.read((char *)&ndims_i32, sizeof(int)); std::cout << "#pts = " << npts_i32 << ", #dims = " << ndims_i32 << std::endl; reader.close(); - int num_parts = (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : std::floor(npts_i32 / PARTSIZE) + 1; + int num_parts = (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : (uint32_t)std::floor(npts_i32 / PARTSIZE) + 1; std::cout << "Number of parts: " << num_parts << std::endl; return num_parts; } @@ -445,8 +445,7 @@ std::vector>> processUnfilteredParts(cons int *closest_points_part = new int[nqueries * k]; float *dist_closest_points_part = new float[nqueries * k]; - uint32_t part_k; - part_k = k < npoints ? k : npoints; + auto part_k = k < npoints ? k : npoints; exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints, base_data, nqueries, query_data, metric); @@ -496,8 +495,7 @@ std::vector>> processFilteredParts( int *closest_points_part = new int[nqueries * k]; float *dist_closest_points_part = new float[nqueries * k]; - uint32_t part_k; - part_k = k < npoints_filt ? k : npoints_filt; + auto part_k = k < npoints_filt ? k : npoints_filt; if (npoints_filt > 0) { exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints_filt, base_data, nqueries, @@ -530,9 +528,8 @@ int aux_main(const std::string &base_file, const std::string &label_file, const const std::string >_file, size_t k, const std::string &universal_label, const diskann::Metric &metric, const std::string &filter_label, const std::string &tags_file = std::string("")) { - size_t npoints, nqueries, dim, npoints_filt; + size_t npoints, nqueries, dim; - float *base_data; float *query_data; load_bin_as_float(query_file.c_str(), query_data, nqueries, dim, 0); @@ -877,13 +874,13 @@ int main(int argc, char **argv) std::vector> final_gt_ids; std::vector> final_gt_dists; - int query_num = 0; + uint32_t query_num = 0; for (const auto &lbl : all_labels) { query_num += labels_to_number_of_queries[lbl]; } - for (int i = 0; i < query_num; i++) + for (uint32_t i = 0; i < query_num; i++) { final_gt_ids.push_back(std::vector(K)); final_gt_dists.push_back(std::vector(K)); @@ -894,9 +891,9 @@ int main(int argc, char **argv) std::string filtered_gt_file = gt_file + "_" + lbl; load_truthset(filtered_gt_file, gt_ids, gt_dists, gt_num, gt_dim); - for (int i = 0; i < labels_to_number_of_queries[lbl]; i++) + for (uint32_t i = 0; i < labels_to_number_of_queries[lbl]; i++) { - int orig_query_id = label_query_id_to_orig_id[lbl][i]; + uint32_t orig_query_id = label_query_id_to_orig_id[lbl][i]; for (uint64_t j = 0; j < K; j++) { final_gt_ids[orig_query_id][j] = label_id_to_orig_id[lbl][gt_ids[i * K + j]]; @@ -908,9 +905,9 @@ int main(int argc, char **argv) int32_t *closest_points = new int32_t[query_num * K]; float *dist_closest_points = new float[query_num * K]; - for (int i = 0; i < query_num; i++) + for (uint32_t i = 0; i < query_num; i++) { - for (int j = 0; j < K; j++) + for (uint32_t j = 0; j < K; j++) { closest_points[i * K + j] = final_gt_ids[i][j]; dist_closest_points[i * K + j] = final_gt_dists[i][j]; diff --git a/tests/utils/float_bin_to_int8.cpp b/tests/utils/float_bin_to_int8.cpp index a7632a6cf..1982005af 100644 --- a/tests/utils/float_bin_to_int8.cpp +++ b/tests/utils/float_bin_to_int8.cpp @@ -42,8 +42,8 @@ int main(int argc, char **argv) std::ofstream writer(argv[2], std::ios::binary); auto read_buf = new float[blk_size * ndims]; auto write_buf = new int8_t[blk_size * ndims]; - float bias = atof(argv[3]); - float scale = atof(argv[4]); + float bias = (float)atof(argv[3]); + float scale = (float)atof(argv[4]); writer.write((char *)(&npts_u32), sizeof(uint32_t)); writer.write((char *)(&ndims_u32), sizeof(uint32_t)); diff --git a/tests/utils/generate_pq.cpp b/tests/utils/generate_pq.cpp index 761983129..a881b1104 100644 --- a/tests/utils/generate_pq.cpp +++ b/tests/utils/generate_pq.cpp @@ -22,16 +22,16 @@ bool generate_pq(const std::string &data_path, const std::string &index_prefix_p if (opq) { - diskann::generate_opq_pivots(train_data, train_size, train_dim, num_pq_centers, num_pq_chunks, pq_pivots_path, - true); + diskann::generate_opq_pivots(train_data, train_size, (uint32_t)train_dim, (uint32_t)num_pq_centers, + (uint32_t)num_pq_chunks, pq_pivots_path, true); } else { - diskann::generate_pq_pivots(train_data, train_size, train_dim, num_pq_centers, num_pq_chunks, - KMEANS_ITERS_FOR_PQ, pq_pivots_path); + diskann::generate_pq_pivots(train_data, train_size, (uint32_t)train_dim, (uint32_t)num_pq_centers, + (uint32_t)num_pq_chunks, KMEANS_ITERS_FOR_PQ, pq_pivots_path); } - diskann::generate_pq_data_from_pivots(data_path, num_pq_centers, num_pq_chunks, pq_pivots_path, - pq_compressed_vectors_path, true); + diskann::generate_pq_data_from_pivots(data_path, (uint32_t)num_pq_centers, (uint32_t)num_pq_chunks, + pq_pivots_path, pq_compressed_vectors_path, true); delete[] train_data; @@ -55,7 +55,7 @@ int main(int argc, char **argv) const std::string index_prefix_path(argv[3]); const size_t num_pq_centers = 256; const size_t num_pq_chunks = (size_t)atoi(argv[4]); - const float sampling_rate = atof(argv[5]); + const float sampling_rate = (float)atof(argv[5]); const bool opq = atoi(argv[6]) == 0 ? false : true; if (std::string(argv[1]) == std::string("float")) diff --git a/tests/utils/generate_synthetic_labels.cpp b/tests/utils/generate_synthetic_labels.cpp index 3de2130fb..d9dd22c34 100644 --- a/tests/utils/generate_synthetic_labels.cpp +++ b/tests/utils/generate_synthetic_labels.cpp @@ -12,19 +12,19 @@ namespace po = boost::program_options; class ZipfDistribution { public: - ZipfDistribution(int num_points, int num_labels) + ZipfDistribution(uint64_t num_points, uint32_t num_labels) : num_labels(num_labels), num_points(num_points), uniform_zero_to_one(std::uniform_real_distribution<>(0.0, 1.0)) { } - std::unordered_map createDistributionMap() + std::unordered_map createDistributionMap() { - std::unordered_map map; - int primary_label_freq = ceil(num_points * distribution_factor); - for (int i{1}; i < num_labels + 1; i++) + std::unordered_map map; + uint32_t primary_label_freq = (uint32_t)ceil(num_points * distribution_factor); + for (uint32_t i{1}; i < num_labels + 1; i++) { - map[i] = ceil(primary_label_freq / i); + map[i] = (uint32_t)ceil(primary_label_freq / i); } return map; } @@ -32,7 +32,7 @@ class ZipfDistribution int writeDistribution(std::ofstream &outfile) { auto distribution_map = createDistributionMap(); - for (int i{0}; i < num_points; i++) + for (uint32_t i{0}; i < num_points; i++) { bool label_written = false; for (auto it = distribution_map.cbegin(), next_it = it; it != distribution_map.cend(); it = next_it) @@ -80,8 +80,8 @@ class ZipfDistribution } private: - int num_labels; - const int num_points; + const uint32_t num_labels; + const uint64_t num_points; const double distribution_factor = 0.7; std::knuth_b rand_engine; const std::uniform_real_distribution uniform_zero_to_one; @@ -90,7 +90,8 @@ class ZipfDistribution int main(int argc, char **argv) { std::string output_file, distribution_type; - size_t num_labels, num_points; + uint32_t num_labels; + uint64_t num_points; try { @@ -100,7 +101,7 @@ int main(int argc, char **argv) desc.add_options()("output_file,O", po::value(&output_file)->required(), "Filename for saving the label file"); desc.add_options()("num_points,N", po::value(&num_points)->required(), "Number of points in dataset"); - desc.add_options()("num_labels,L", po::value(&num_labels)->required(), + desc.add_options()("num_labels,L", po::value(&num_labels)->required(), "Number of unique labels, up to 5000"); desc.add_options()("distribution_type,DT", po::value(&distribution_type)->default_value("random"), "Distribution function for labels defaults " diff --git a/tests/utils/int8_to_float_scale.cpp b/tests/utils/int8_to_float_scale.cpp index 2de1a3a56..19fbc6c43 100644 --- a/tests/utils/int8_to_float_scale.cpp +++ b/tests/utils/int8_to_float_scale.cpp @@ -42,8 +42,8 @@ int main(int argc, char **argv) std::ofstream writer(argv[2], std::ios::binary); auto read_buf = new int8_t[blk_size * ndims]; auto write_buf = new float[blk_size * ndims]; - float bias = atof(argv[3]); - float scale = atof(argv[4]); + float bias = (float)atof(argv[3]); + float scale = (float)atof(argv[4]); writer.write((char *)(&npts_u32), sizeof(uint32_t)); writer.write((char *)(&ndims_u32), sizeof(uint32_t)); diff --git a/tests/utils/partition_data.cpp b/tests/utils/partition_data.cpp index 2c505315c..2520f3f4a 100644 --- a/tests/utils/partition_data.cpp +++ b/tests/utils/partition_data.cpp @@ -23,7 +23,7 @@ int main(int argc, char **argv) const std::string data_path(argv[2]); const std::string prefix_path(argv[3]); - const float sampling_rate = atof(argv[4]); + const float sampling_rate = (float)atof(argv[4]); const size_t num_partitions = (size_t)std::atoi(argv[5]); const size_t max_reps = 15; const size_t k_index = (size_t)std::atoi(argv[6]); diff --git a/tests/utils/partition_with_ram_budget.cpp b/tests/utils/partition_with_ram_budget.cpp index 3c546801a..937b68d2c 100644 --- a/tests/utils/partition_with_ram_budget.cpp +++ b/tests/utils/partition_with_ram_budget.cpp @@ -23,7 +23,7 @@ int main(int argc, char **argv) const std::string data_path(argv[2]); const std::string prefix_path(argv[3]); - const float sampling_rate = atof(argv[4]); + const float sampling_rate = (float)atof(argv[4]); const double ram_budget = (double)std::atof(argv[5]); const size_t graph_degree = (size_t)std::atoi(argv[6]); const size_t k_index = (size_t)std::atoi(argv[7]); diff --git a/tests/utils/rand_data_gen.cpp b/tests/utils/rand_data_gen.cpp index ea2e67478..a6f9305c8 100644 --- a/tests/utils/rand_data_gen.cpp +++ b/tests/utils/rand_data_gen.cpp @@ -23,7 +23,7 @@ int block_write_float(std::ofstream &writer, size_t ndims, size_t npts, float no { float sum = 0; for (size_t d = 0; d < ndims; ++d) - vec[d] = normal_rand(gen); + vec[d] = (float)normal_rand(gen); for (size_t d = 0; d < ndims; ++d) sum += vec[d] * vec[d]; for (size_t d = 0; d < ndims; ++d) @@ -49,7 +49,7 @@ int block_write_int8(std::ofstream &writer, size_t ndims, size_t npts, float nor { float sum = 0; for (size_t d = 0; d < ndims; ++d) - vec[d] = normal_rand(gen); + vec[d] = (float)normal_rand(gen); for (size_t d = 0; d < ndims; ++d) sum += vec[d] * vec[d]; for (size_t d = 0; d < ndims; ++d) @@ -57,7 +57,7 @@ int block_write_int8(std::ofstream &writer, size_t ndims, size_t npts, float nor for (size_t d = 0; d < ndims; ++d) { - vec_T[d] = std::round(vec[d]); + vec_T[d] = (int8_t)std::round(vec[d]); } writer.write((char *)vec_T, ndims * sizeof(int8_t)); @@ -81,7 +81,7 @@ int block_write_uint8(std::ofstream &writer, size_t ndims, size_t npts, float no { float sum = 0; for (size_t d = 0; d < ndims; ++d) - vec[d] = normal_rand(gen); + vec[d] = (float)normal_rand(gen); for (size_t d = 0; d < ndims; ++d) sum += vec[d] * vec[d]; for (size_t d = 0; d < ndims; ++d) @@ -89,7 +89,7 @@ int block_write_uint8(std::ofstream &writer, size_t ndims, size_t npts, float no for (size_t d = 0; d < ndims; ++d) { - vec_T[d] = 128 + std::round(vec[d]); + vec_T[d] = 128 + (int8_t)std::round(vec[d]); } writer.write((char *)vec_T, ndims * sizeof(uint8_t)); diff --git a/tests/utils/stats_label_data.cpp b/tests/utils/stats_label_data.cpp index 129d5bcb2..3342672ff 100644 --- a/tests/utils/stats_label_data.cpp +++ b/tests/utils/stats_label_data.cpp @@ -105,8 +105,8 @@ void stats_analysis(const std::string labels_file, std::string univeral_label, u std::cout << "Third common label " << "\t" << label_count_vec[label_count_vec.size() - 3].first << " with count=" << label_count_vec[label_count_vec.size() - 3].second << std::endl; - avg_labels_per_pt = (sum) / (float)point_cnt; - mean_label_size = (sum) / label_counts.size(); + avg_labels_per_pt = sum / (float)point_cnt; + mean_label_size = sum / (float)label_counts.size(); std::cout << "Total number of points = " << point_cnt << ", number of labels = " << label_counts.size() << std::endl; std::cout << "Average number of labels per point = " << avg_labels_per_pt << std::endl; diff --git a/tests/utils/vector_analysis.cpp b/tests/utils/vector_analysis.cpp index 5e4cb9bf4..009df6d05 100644 --- a/tests/utils/vector_analysis.cpp +++ b/tests/utils/vector_analysis.cpp @@ -36,7 +36,7 @@ template int analyze_norm(std::string base_file) } std::sort(norms.begin(), norms.end()); for (int p = 0; p < 100; p += 5) - std::cout << "percentile " << p << ": " << norms[std::floor((p / 100.0) * npts)] << std::endl; + std::cout << "percentile " << p << ": " << norms[(uint64_t)(std::floor((p / 100.0) * npts))] << std::endl; std::cout << "percentile 100" << ": " << norms[npts - 1] << std::endl; delete[] data; @@ -58,7 +58,7 @@ template int normalize_base(std::string base_file, std::string out_ pt_norm += data[i * ndims + d] * data[i * ndims + d]; pt_norm = std::sqrt(pt_norm); for (size_t d = 0; d < ndims; d++) - data[i * ndims + d] = data[i * ndims + d] / pt_norm; + data[i * ndims + d] = static_cast(data[i * ndims + d] / pt_norm); } diskann::save_bin(out_file, data, npts, ndims); delete[] data; @@ -92,11 +92,11 @@ template int augment_base(std::string base_file, std::string out_fi { for (size_t j = 0; j < ndims; j++) { - new_data[i * newdims + j] = data[i * ndims + j] / max_norm; + new_data[i * newdims + j] = static_cast(data[i * ndims + j] / max_norm); } float diff = 1 - (norms[i] / (max_norm * max_norm)); diff = diff <= 0 ? 0 : std::sqrt(diff); - new_data[i * newdims + ndims] = diff; + new_data[i * newdims + ndims] = static_cast(diff); if (diff <= 0) { std::cout << i << " has large max norm, investigate if needed. diff = " << diff << std::endl; @@ -106,7 +106,7 @@ template int augment_base(std::string base_file, std::string out_fi { for (size_t j = 0; j < ndims; j++) { - new_data[i * newdims + j] = data[i * ndims + j] / std::sqrt(norms[i]); + new_data[i * newdims + j] = static_cast(data[i * ndims + j] / std::sqrt(norms[i])); } new_data[i * newdims + ndims] = 0; } From 87f263f61d4e4d5acccf68428a8a16e99a982102 Mon Sep 17 00:00:00 2001 From: David Kaczynski Date: Fri, 28 Apr 2023 13:54:48 -0400 Subject: [PATCH 2/6] Fix clang formatting --- tests/range_search_disk_index.cpp | 3 ++- tests/utils/calculate_recall.cpp | 4 ++-- tests/utils/compute_groundtruth.cpp | 3 ++- tests/utils/compute_groundtruth_for_filters.cpp | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/range_search_disk_index.cpp b/tests/range_search_disk_index.cpp index 59f53780f..33a7283a7 100644 --- a/tests/range_search_disk_index.cpp +++ b/tests/range_search_disk_index.cpp @@ -233,7 +233,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre double ratio_of_sums = 0; if (calc_recall_flag) { - recall = diskann::calculate_range_search_recall((uint32_t)query_num, groundtruth_ids, query_result_ids[test_id]); + recall = + diskann::calculate_range_search_recall((uint32_t)query_num, groundtruth_ids, query_result_ids[test_id]); uint32_t total_true_positive = 0; uint32_t total_positive = 0; diff --git a/tests/utils/calculate_recall.cpp b/tests/utils/calculate_recall.cpp index 282994e45..dc76252cc 100644 --- a/tests/utils/calculate_recall.cpp +++ b/tests/utils/calculate_recall.cpp @@ -47,8 +47,8 @@ int main(int argc, char **argv) return -1; } std::cout << "Calculating recall@" << recall_at << std::endl; - double recall_val = diskann::calculate_recall((uint32_t)points_num, gold_std, gs_dist, (uint32_t)dim_gs, our_results, - (uint32_t)dim_or, (uint32_t)recall_at); + double recall_val = diskann::calculate_recall((uint32_t)points_num, gold_std, gs_dist, (uint32_t)dim_gs, + our_results, (uint32_t)dim_or, (uint32_t)recall_at); // double avg_recall = (recall*1.0)/(points_num*1.0); std::cout << "Avg. recall@" << recall_at << " is " << recall_val << "\n"; diff --git a/tests/utils/compute_groundtruth.cpp b/tests/utils/compute_groundtruth.cpp index 5ddbde45e..d0c72ae36 100644 --- a/tests/utils/compute_groundtruth.cpp +++ b/tests/utils/compute_groundtruth.cpp @@ -218,7 +218,8 @@ void exact_knn(const size_t dim, const size_t k, for (size_t p = k; p < npoints; p++) { if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace((int32_t)p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace((int32_t)p, + dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); } diff --git a/tests/utils/compute_groundtruth_for_filters.cpp b/tests/utils/compute_groundtruth_for_filters.cpp index 336784704..48ac064db 100644 --- a/tests/utils/compute_groundtruth_for_filters.cpp +++ b/tests/utils/compute_groundtruth_for_filters.cpp @@ -219,7 +219,8 @@ void exact_knn(const size_t dim, const size_t k, for (size_t p = k; p < npoints; p++) { if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace((int32_t)p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace((int32_t)p, + dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); } From a0dc2abdf85a13660577165d2552eece028df709 Mon Sep 17 00:00:00 2001 From: David Kaczynski Date: Mon, 1 May 2023 12:11:40 -0400 Subject: [PATCH 3/6] Add missing initializations --- tests/utils/compute_groundtruth.cpp | 2 +- tests/utils/compute_groundtruth_for_filters.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/utils/compute_groundtruth.cpp b/tests/utils/compute_groundtruth.cpp index d0c72ae36..8f8415f1d 100644 --- a/tests/utils/compute_groundtruth.cpp +++ b/tests/utils/compute_groundtruth.cpp @@ -342,7 +342,7 @@ std::vector>> processUnfilteredParts(cons const diskann::Metric &metric, std::vector &location_to_tag) { - float *base_data; + float *base_data = nullptr; int num_parts = get_num_parts(base_file.c_str()); std::vector>> res(nqueries); for (int p = 0; p < num_parts; p++) diff --git a/tests/utils/compute_groundtruth_for_filters.cpp b/tests/utils/compute_groundtruth_for_filters.cpp index 48ac064db..f053b7534 100644 --- a/tests/utils/compute_groundtruth_for_filters.cpp +++ b/tests/utils/compute_groundtruth_for_filters.cpp @@ -435,7 +435,7 @@ std::vector>> processUnfilteredParts(cons const diskann::Metric &metric, std::vector &location_to_tag) { - float *base_data; + float *base_data = nullptr; int num_parts = get_num_parts(base_file.c_str()); std::vector>> res(nqueries); for (int p = 0; p < num_parts; p++) @@ -477,8 +477,8 @@ std::vector>> processFilteredParts( const std::string &universal_label, size_t &nqueries, size_t &npoints, size_t &dim, size_t &k, float *query_data, const diskann::Metric &metric, std::vector &location_to_tag) { - size_t npoints_filt; - float *base_data; + size_t npoints_filt = 0; + float *base_data = nullptr; std::vector>> res(nqueries); int num_parts = get_num_parts(base_file.c_str()); @@ -531,7 +531,7 @@ int aux_main(const std::string &base_file, const std::string &label_file, const { size_t npoints, nqueries, dim; - float *query_data; + float *query_data = nullptr; load_bin_as_float(query_file.c_str(), query_data, nqueries, dim, 0); if (nqueries > PARTSIZE) From 2092abcfbc5b84581e57dbb16e018650d1e9c0d0 Mon Sep 17 00:00:00 2001 From: David Kaczynski Date: Wed, 3 May 2023 14:04:06 -0400 Subject: [PATCH 4/6] Use size_t for index of point IDs --- tests/utils/compute_groundtruth.cpp | 10 +++++----- tests/utils/compute_groundtruth_for_filters.cpp | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/utils/compute_groundtruth.cpp b/tests/utils/compute_groundtruth.cpp index d0c72ae36..b7f1f1ec2 100644 --- a/tests/utils/compute_groundtruth.cpp +++ b/tests/utils/compute_groundtruth.cpp @@ -45,7 +45,7 @@ template T div_round_up(const T numerator, const T denominator) return (numerator % denominator == 0) ? (numerator / denominator) : 1 + (numerator / denominator); } -using pairIF = std::pair; +using pairIF = std::pair; struct cmpmaxstruct { bool operator()(const pairIF &l, const pairIF &r) @@ -124,7 +124,7 @@ void inner_prod_to_points(const size_t dim, } void exact_knn(const size_t dim, const size_t k, - int *const closest_points, // k * num_queries preallocated, col + size_t *const closest_points, // k * num_queries preallocated, col // major, queries columns float *const dist_closest_points, // k * num_queries // preallocated, Dist to @@ -214,11 +214,11 @@ void exact_knn(const size_t dim, const size_t k, { maxPQIFCS point_dist; for (size_t p = 0; p < k; p++) - point_dist.emplace((int32_t)p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); for (size_t p = k; p < npoints; p++) { if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace((int32_t)p, + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); @@ -350,7 +350,7 @@ std::vector>> processUnfilteredParts(cons size_t start_id = p * PARTSIZE; load_bin_as_float(base_file.c_str(), base_data, npoints, dim, p); - int *closest_points_part = new int[nqueries * k]; + size_t *closest_points_part = new size_t[nqueries * k]; float *dist_closest_points_part = new float[nqueries * k]; auto part_k = k < npoints ? k : npoints; diff --git a/tests/utils/compute_groundtruth_for_filters.cpp b/tests/utils/compute_groundtruth_for_filters.cpp index 48ac064db..d59cb4f66 100644 --- a/tests/utils/compute_groundtruth_for_filters.cpp +++ b/tests/utils/compute_groundtruth_for_filters.cpp @@ -46,7 +46,7 @@ template T div_round_up(const T numerator, const T denominator) return (numerator % denominator == 0) ? (numerator / denominator) : 1 + (numerator / denominator); } -using pairIF = std::pair; +using pairIF = std::pair; struct cmpmaxstruct { bool operator()(const pairIF &l, const pairIF &r) @@ -125,7 +125,7 @@ void inner_prod_to_points(const size_t dim, } void exact_knn(const size_t dim, const size_t k, - int *const closest_points, // k * num_queries preallocated, col + size_t *const closest_points, // k * num_queries preallocated, col // major, queries columns float *const dist_closest_points, // k * num_queries // preallocated, Dist to @@ -214,12 +214,12 @@ void exact_knn(const size_t dim, const size_t k, for (long long q = q_b; q < q_e; q++) { maxPQIFCS point_dist; - for (uint64_t p = 0; p < k; p++) - point_dist.emplace((int32_t)p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + for (size_t p = 0; p < k; p++) + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); for (size_t p = k; p < npoints; p++) { if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace((int32_t)p, + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); @@ -443,7 +443,7 @@ std::vector>> processUnfilteredParts(cons size_t start_id = p * PARTSIZE; load_bin_as_float(base_file.c_str(), base_data, npoints, dim, p); - int *closest_points_part = new int[nqueries * k]; + size_t *closest_points_part = new size_t[nqueries * k]; float *dist_closest_points_part = new float[nqueries * k]; auto part_k = k < npoints ? k : npoints; @@ -493,7 +493,7 @@ std::vector>> processFilteredParts( if (filter_label != "") rev_map = load_filtered_bin_as_float(base_file.c_str(), base_data, npoints, dim, p, label_file.c_str(), filter_label, universal_label, npoints_filt, pts_to_labels); - int *closest_points_part = new int[nqueries * k]; + size_t *closest_points_part = new size_t[nqueries * k]; float *dist_closest_points_part = new float[nqueries * k]; auto part_k = k < npoints_filt ? k : npoints_filt; From ba97cbdca9328c2941a91217335cf6b2f1fc38cb Mon Sep 17 00:00:00 2001 From: David Kaczynski Date: Wed, 3 May 2023 14:08:12 -0400 Subject: [PATCH 5/6] Fix clang format --- tests/utils/compute_groundtruth.cpp | 5 ++--- tests/utils/compute_groundtruth_for_filters.cpp | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/utils/compute_groundtruth.cpp b/tests/utils/compute_groundtruth.cpp index c3c2d3368..f33a26b84 100644 --- a/tests/utils/compute_groundtruth.cpp +++ b/tests/utils/compute_groundtruth.cpp @@ -124,7 +124,7 @@ void inner_prod_to_points(const size_t dim, } void exact_knn(const size_t dim, const size_t k, - size_t *const closest_points, // k * num_queries preallocated, col + size_t *const closest_points, // k * num_queries preallocated, col // major, queries columns float *const dist_closest_points, // k * num_queries // preallocated, Dist to @@ -218,8 +218,7 @@ void exact_knn(const size_t dim, const size_t k, for (size_t p = k; p < npoints; p++) { if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace(p, - dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); } diff --git a/tests/utils/compute_groundtruth_for_filters.cpp b/tests/utils/compute_groundtruth_for_filters.cpp index dc9f3fdb3..5be7135e1 100644 --- a/tests/utils/compute_groundtruth_for_filters.cpp +++ b/tests/utils/compute_groundtruth_for_filters.cpp @@ -125,7 +125,7 @@ void inner_prod_to_points(const size_t dim, } void exact_knn(const size_t dim, const size_t k, - size_t *const closest_points, // k * num_queries preallocated, col + size_t *const closest_points, // k * num_queries preallocated, col // major, queries columns float *const dist_closest_points, // k * num_queries // preallocated, Dist to @@ -219,8 +219,7 @@ void exact_knn(const size_t dim, const size_t k, for (size_t p = k; p < npoints; p++) { if (point_dist.top().second > dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]) - point_dist.emplace(p, - dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); + point_dist.emplace(p, dist_matrix[(ptrdiff_t)p + (ptrdiff_t)(q - q_b) * (ptrdiff_t)npoints]); if (point_dist.size() > k) point_dist.pop(); } From 424e33052b04efd417f1b52c3a2f0967e0c5e175 Mon Sep 17 00:00:00 2001 From: David Kaczynski Date: Wed, 3 May 2023 16:27:46 -0400 Subject: [PATCH 6/6] Refactor iterator and conditions for printing labels --- tests/utils/generate_synthetic_labels.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/utils/generate_synthetic_labels.cpp b/tests/utils/generate_synthetic_labels.cpp index d9dd22c34..6741760cb 100644 --- a/tests/utils/generate_synthetic_labels.cpp +++ b/tests/utils/generate_synthetic_labels.cpp @@ -35,11 +35,10 @@ class ZipfDistribution for (uint32_t i{0}; i < num_points; i++) { bool label_written = false; - for (auto it = distribution_map.cbegin(), next_it = it; it != distribution_map.cend(); it = next_it) + for (auto it = distribution_map.cbegin(); it != distribution_map.cend(); it++) { - next_it++; auto label_selection_probability = std::bernoulli_distribution(distribution_factor / (double)it->first); - if (label_selection_probability(rand_engine)) + if (label_selection_probability(rand_engine) && distribution_map[it->first] > 0) { if (label_written) { @@ -49,10 +48,6 @@ class ZipfDistribution label_written = true; // remove label from map if we have used all labels distribution_map[it->first] -= 1; - if (distribution_map[it->first] == 0) - { - distribution_map.erase(it); - } } } if (!label_written)