Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Force error on warnings and add casts to test directory #342

Merged
merged 7 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the MIT license.

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)

add_executable(build_memory_index build_memory_index.cpp)
target_link_libraries(build_memory_index ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options)
Expand Down
6 changes: 3 additions & 3 deletions tests/build_stitched_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ void save_full_index(path final_index_path_prefix, path input_data_path, uint64_
size_t bytes_written = METADATA;
for (uint32_t node_point = 0; node_point < stitched_graph.size(); node_point++)
{
uint32_t current_node_num_neighbors = stitched_graph[node_point].size();
uint32_t current_node_num_neighbors = (uint32_t)stitched_graph[node_point].size();
std::vector<uint32_t> current_node_neighbors = stitched_graph[node_point];
stitched_graph_writer.write((char *)&current_node_num_neighbors, sizeof(uint32_t));
bytes_written += sizeof(uint32_t);
Expand Down Expand Up @@ -226,7 +226,7 @@ stitch_indices_return_values stitch_label_indices(

std::tie(curr_label_index, curr_label_index_size) =
diskann::load_label_index(curr_label_index_path, labels_to_number_of_points[lbl]);
curr_label_entry_point = random(0, curr_label_index.size());
curr_label_entry_point = (uint32_t)random(0, curr_label_index.size());
label_entry_points[lbl] = label_id_to_orig_id_map[lbl][curr_label_entry_point];

for (uint32_t node_point = 0; node_point < curr_label_index.size(); node_point++)
Expand Down Expand Up @@ -344,7 +344,7 @@ int main(int argc, char **argv)

// 3. for each label, make a separate data file
tsl::robin_map<std::string, std::vector<uint32_t>> label_id_to_orig_id_map;
uint32_t total_number_of_points = point_ids_to_labels.size();
uint32_t total_number_of_points = (uint32_t)point_ids_to_labels.size();

#ifndef _WINDOWS
if (data_type == "uint8")
Expand Down
17 changes: 9 additions & 8 deletions tests/range_search_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre

for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++)
{
uint64_t L = Lvec[test_id];
uint32_t L = Lvec[test_id];

if (beamwidth <= 0)
{
Expand All @@ -211,7 +211,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
query_result_ids[test_id][i].reserve(res_count);
query_result_ids[test_id][i].resize(res_count);
for (uint32_t idx = 0; idx < res_count; idx++)
query_result_ids[test_id][i][idx] = indices[idx];
query_result_ids[test_id][i][idx] = (uint32_t)indices[idx];
}
auto e = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = e - s;
Expand All @@ -226,21 +226,22 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
auto mean_ios = diskann::get_mean_stats<uint32_t>(stats, query_num,
[](const diskann::QueryStats &stats) { return stats.n_ios; });

float mean_cpuus = diskann::get_mean_stats<float>(
double mean_cpuus = diskann::get_mean_stats<float>(
rakri marked this conversation as resolved.
Show resolved Hide resolved
stats, query_num, [](const diskann::QueryStats &stats) { return stats.cpu_us; });

float recall = 0;
float ratio_of_sums = 0;
double recall = 0;
double ratio_of_sums = 0;
if (calc_recall_flag)
{
recall = diskann::calculate_range_search_recall(query_num, groundtruth_ids, query_result_ids[test_id]);
recall =
diskann::calculate_range_search_recall((uint32_t)query_num, groundtruth_ids, query_result_ids[test_id]);

uint32_t total_true_positive = 0;
uint32_t total_positive = 0;
for (uint32_t i = 0; i < query_num; i++)
{
total_true_positive += query_result_ids[test_id][i].size();
total_positive += groundtruth_ids[i].size();
total_true_positive += (uint32_t)query_result_ids[test_id][i].size();
total_positive += (uint32_t)groundtruth_ids[i].size();
}

ratio_of_sums = (1.0 * total_true_positive) / (1.0 * total_positive);
Expand Down
12 changes: 6 additions & 6 deletions tests/search_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,11 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre

uint32_t optimized_beamwidth = 2;

float best_recall = 0.0;
double best_recall = 0.0;

for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++)
{
uint64_t L = Lvec[test_id];
uint32_t L = Lvec[test_id];

if (L < recall_at)
{
Expand Down Expand Up @@ -252,7 +252,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
}
auto e = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = e - s;
float qps = (1.0 * query_num) / (1.0 * diff.count());
double qps = (1.0 * query_num) / (1.0 * diff.count());

diskann::convert_types<uint64_t, uint32_t>(query_result_ids_64.data(), query_result_ids[test_id].data(),
query_num, recall_at);
Expand All @@ -269,11 +269,11 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
auto mean_cpuus = diskann::get_mean_stats<float>(stats, query_num,
[](const diskann::QueryStats &stats) { return stats.cpu_us; });

float recall = 0;
double recall = 0;
if (calc_recall_flag)
{
recall = diskann::calculate_recall(query_num, gt_ids, gt_dists, gt_dim, query_result_ids[test_id].data(),
recall_at, recall_at);
recall = diskann::calculate_recall((uint32_t)query_num, gt_ids, gt_dists, (uint32_t)gt_dim,
query_result_ids[test_id].data(), recall_at, recall_at);
best_recall = std::max(recall, best_recall);
}

Expand Down
16 changes: 8 additions & 8 deletions tests/search_memory_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path,
query_result_tags.resize(recall_at * query_num);
}

float best_recall = 0.0;
double best_recall = 0.0;

for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++)
{
uint64_t L = Lvec[test_id];
uint32_t L = Lvec[test_id];
if (L < recall_at)
{
diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl;
Expand Down Expand Up @@ -185,28 +185,28 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path,
}
auto qe = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = qe - qs;
latency_stats[i] = diff.count() * 1000000;
latency_stats[i] = (float)(diff.count() * 1000000);
}
std::chrono::duration<double> diff = std::chrono::high_resolution_clock::now() - s;

float displayed_qps = static_cast<float>(query_num) / diff.count();
double displayed_qps = query_num / diff.count();

if (show_qps_per_thread)
displayed_qps /= num_threads;

std::vector<float> recalls;
std::vector<double> recalls;
if (calc_recall_flag)
{
recalls.reserve(recalls_to_print);
for (uint32_t curr_recall = first_recall; curr_recall <= recall_at; curr_recall++)
{
recalls.push_back(diskann::calculate_recall(query_num, gt_ids, gt_dists, gt_dim,
recalls.push_back(diskann::calculate_recall((uint32_t)query_num, gt_ids, gt_dists, (uint32_t)gt_dim,
query_result_ids[test_id].data(), recall_at, curr_recall));
}
}

std::sort(latency_stats.begin(), latency_stats.end());
float mean_latency =
double mean_latency =
std::accumulate(latency_stats.begin(), latency_stats.end(), 0.0) / static_cast<float>(query_num);

float avg_cmps = (float)std::accumulate(cmp_stats.begin(), cmp_stats.end(), 0) / (float)query_num;
Expand All @@ -222,7 +222,7 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path,
<< std::setw(20) << (float)mean_latency << std::setw(15)
<< (float)latency_stats[(uint64_t)(0.999 * query_num)];
}
for (float recall : recalls)
for (double recall : recalls)
{
std::cout << std::setw(12) << recall;
best_recall = std::max(recall, best_recall);
Expand Down
10 changes: 5 additions & 5 deletions tests/test_insert_deletes_consolidate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ std::string get_save_filename(const std::string &save_path, size_t points_to_ski
}

template <typename T, typename TagT>
void insert_till_next_checkpoint(diskann::Index<T, TagT> &index, size_t start, size_t end, size_t thread_count, T *data,
size_t aligned_dim)
void insert_till_next_checkpoint(diskann::Index<T, TagT> &index, size_t start, size_t end, int32_t thread_count,
T *data, size_t aligned_dim)
{
diskann::Timer insert_timer;

Expand All @@ -115,7 +115,7 @@ void delete_from_beginning(diskann::Index<T, TagT> &index, diskann::IndexWritePa
<< "Lazy deleting points " << points_to_skip << " to "
<< points_to_skip + points_to_delete_from_beginning << "... ";
for (size_t i = points_to_skip; i < points_to_skip + points_to_delete_from_beginning; ++i)
index.lazy_delete(i + 1); // Since tags are data location + 1
index.lazy_delete(static_cast<TagT>(i + 1)); // Since tags are data location + 1
std::cout << "done." << std::endl;

auto report = index.consolidate_deletes(delete_params);
Expand Down Expand Up @@ -230,7 +230,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con

if (concurrent)
{
int sub_threads = (thread_count + 1) / 2;
int32_t sub_threads = (thread_count + 1) / 2;
bool delete_launched = false;
std::future<void> delete_task;

Expand Down Expand Up @@ -279,7 +279,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con
std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl;

load_aligned_bin_part(data_path, data, start, end - start);
insert_till_next_checkpoint(index, start, end, thread_count, data, aligned_dim);
insert_till_next_checkpoint(index, start, end, (int32_t)thread_count, data, aligned_dim);

if (checkpoints_per_snapshot > 0 && --num_checkpoints_till_snapshot == 0)
{
Expand Down
4 changes: 2 additions & 2 deletions tests/test_streaming_scenario.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void insert_next_batch(diskann::Index<T, TagT, LabelT> &index, size_t start, siz
std::cout << std::endl << "Inserting from " << start << " to " << end << std::endl;

size_t num_failed = 0;
#pragma omp parallel for num_threads(insert_threads) schedule(dynamic) reduction(+ : num_failed)
#pragma omp parallel for num_threads((int32_t)insert_threads) schedule(dynamic) reduction(+ : num_failed)
rakri marked this conversation as resolved.
Show resolved Hide resolved
for (int64_t j = start; j < (int64_t)end; j++)
{
if (index.insert_point(&data[(j - start) * aligned_dim], 1 + static_cast<TagT>(j)) != 0)
Expand Down Expand Up @@ -121,7 +121,7 @@ void delete_and_consolidate(diskann::Index<T, TagT, LabelT> &index, diskann::Ind
{
std::cout << std::endl << "Lazy deleting points " << start << " to " << end << "... ";
for (size_t i = start; i < end; ++i)
index.lazy_delete(1 + i);
index.lazy_delete(static_cast<TagT>(1 + i));
std::cout << "lazy delete done." << std::endl;

auto report = index.consolidate_deletes(delete_params);
Expand Down
2 changes: 2 additions & 0 deletions tests/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Licensed under the MIT license.

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)


add_executable(fvecs_to_bin fvecs_to_bin.cpp)

Expand Down
3 changes: 2 additions & 1 deletion tests/utils/calculate_recall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ int main(int argc, char **argv)
return -1;
}
std::cout << "Calculating recall@" << recall_at << std::endl;
float recall_val = diskann::calculate_recall(points_num, gold_std, gs_dist, dim_gs, our_results, dim_or, recall_at);
double recall_val = diskann::calculate_recall((uint32_t)points_num, gold_std, gs_dist, (uint32_t)dim_gs,
our_results, (uint32_t)dim_or, (uint32_t)recall_at);

// double avg_recall = (recall*1.0)/(points_num*1.0);
std::cout << "Avg. recall@" << recall_at << " is " << recall_val << "\n";
Expand Down
20 changes: 10 additions & 10 deletions tests/utils/compute_groundtruth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ template <class T> T div_round_up(const T numerator, const T denominator)
return (numerator % denominator == 0) ? (numerator / denominator) : 1 + (numerator / denominator);
}

using pairIF = std::pair<int, float>;
using pairIF = std::pair<size_t, float>;
struct cmpmaxstruct
{
bool operator()(const pairIF &l, const pairIF &r)
Expand All @@ -70,13 +70,13 @@ inline bool custom_dist(const std::pair<uint32_t, float> &a, const std::pair<uin
return a.second < b.second;
}

void compute_l2sq(float *const points_l2sq, const float *const matrix, const int64_t num_points, const int dim)
void compute_l2sq(float *const points_l2sq, const float *const matrix, const int64_t num_points, const uint64_t dim)
{
assert(points_l2sq != NULL);
#pragma omp parallel for schedule(static, 65536)
for (int64_t d = 0; d < num_points; ++d)
points_l2sq[d] =
cblas_sdot(dim, matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1, matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1);
points_l2sq[d] = cblas_sdot((int64_t)dim, matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1,
matrix + (ptrdiff_t)d * (ptrdiff_t)dim, 1);
}

void distsq_to_points(const size_t dim,
Expand Down Expand Up @@ -124,7 +124,7 @@ void inner_prod_to_points(const size_t dim,
}

void exact_knn(const size_t dim, const size_t k,
int *const closest_points, // k * num_queries preallocated, col
size_t *const closest_points, // k * num_queries preallocated, col
// major, queries columns
float *const dist_closest_points, // k * num_queries
// preallocated, Dist to
Expand Down Expand Up @@ -257,7 +257,8 @@ template <typename T> inline int get_num_parts(const char *filename)
reader.read((char *)&ndims_i32, sizeof(int));
std::cout << "#pts = " << npts_i32 << ", #dims = " << ndims_i32 << std::endl;
reader.close();
int num_parts = (npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : std::floor(npts_i32 / PARTSIZE) + 1;
uint32_t num_parts =
(npts_i32 % PARTSIZE) == 0 ? npts_i32 / PARTSIZE : (uint32_t)std::floor(npts_i32 / PARTSIZE) + 1;
std::cout << "Number of parts: " << num_parts << std::endl;
return num_parts;
}
Expand Down Expand Up @@ -340,19 +341,18 @@ std::vector<std::vector<std::pair<uint32_t, float>>> processUnfilteredParts(cons
const diskann::Metric &metric,
std::vector<uint32_t> &location_to_tag)
{
float *base_data;
float *base_data = nullptr;
int num_parts = get_num_parts<T>(base_file.c_str());
std::vector<std::vector<std::pair<uint32_t, float>>> res(nqueries);
for (int p = 0; p < num_parts; p++)
{
size_t start_id = p * PARTSIZE;
load_bin_as_float<T>(base_file.c_str(), base_data, npoints, dim, p);

int *closest_points_part = new int[nqueries * k];
size_t *closest_points_part = new size_t[nqueries * k];
float *dist_closest_points_part = new float[nqueries * k];

uint32_t part_k;
part_k = k < npoints ? k : npoints;
auto part_k = k < npoints ? k : npoints;
exact_knn(dim, part_k, closest_points_part, dist_closest_points_part, npoints, base_data, nqueries, query_data,
metric);

Expand Down
Loading