From 9042595614c0f3b5e72f61090538abdb6510af14 Mon Sep 17 00:00:00 2001 From: Dax Pryce Date: Wed, 12 Jul 2023 13:40:36 -0700 Subject: [PATCH] Formatting fixes --- apps/build_memory_index.cpp | 2 +- apps/range_search_disk_index.cpp | 14 ++++---- apps/search_disk_index.cpp | 14 ++++---- apps/test_streaming_scenario.cpp | 8 ++--- include/cached_io.h | 2 +- include/utils.h | 28 +++++++-------- src/disk_utils.cpp | 48 ++++++++++++------------- src/distance.cpp | 30 ++++++++-------- src/index.cpp | 55 ++++++++++++++--------------- src/math_utils.cpp | 16 ++++----- src/partition.cpp | 18 +++++----- src/pq.cpp | 34 +++++++++--------- src/pq_flash_index.cpp | 32 ++++++++--------- src/utils.cpp | 14 ++++---- src/windows_aligned_file_reader.cpp | 4 +-- 15 files changed, 155 insertions(+), 164 deletions(-) diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index 13d08da3d..1646a8d27 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -155,7 +155,7 @@ int main(int argc, char **argv) try { std::cout << "Starting index build with R: " << R << " Lbuild: " << L << " alpha: " << alpha - << " #threads: " << num_threads << std::endl; + << " #threads: " << num_threads << std::endl; size_t data_num, data_dim; diskann::get_bin_metadata(data_path, data_num, data_dim); diff --git a/apps/range_search_disk_index.cpp b/apps/range_search_disk_index.cpp index 71d4b5518..cf477af47 100644 --- a/apps/range_search_disk_index.cpp +++ b/apps/range_search_disk_index.cpp @@ -165,8 +165,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre std::string recall_string = "Recall@rng=" + std::to_string(search_range); std::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) - << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) - << "CPU (s)"; + << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) + << "CPU (s)"; if (calc_recall_flag) { std::cout << std::setw(16) << recall_string << std::endl; @@ -174,8 +174,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre else std::cout << std::endl; std::cout << "===============================================================" - "===========================================" - << std::endl; + "===========================================" + << std::endl; std::vector>> query_result_ids(Lvec.size()); @@ -247,9 +247,9 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre ratio_of_sums = (1.0 * total_true_positive) / (1.0 * total_positive); } - std::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps - << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios - << std::setw(16) << mean_cpuus; + std::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps << std::setw(16) + << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios << std::setw(16) + << mean_cpuus; if (calc_recall_flag) { std::cout << std::setw(16) << recall << "," << ratio_of_sums << std::endl; diff --git a/apps/search_disk_index.cpp b/apps/search_disk_index.cpp index 0671b1f4e..35d4d3308 100644 --- a/apps/search_disk_index.cpp +++ b/apps/search_disk_index.cpp @@ -177,8 +177,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre std::string recall_string = "Recall@" + std::to_string(recall_at); std::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) - << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) - << "CPU (s)"; + << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) + << "CPU (s)"; if (calc_recall_flag) { std::cout << std::setw(16) << recall_string << std::endl; @@ -186,8 +186,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre else std::cout << std::endl; std::cout << "===============================================================" - "=======================================================" - << std::endl; + "=======================================================" + << std::endl; std::vector> query_result_ids(Lvec.size()); std::vector> query_result_dists(Lvec.size()); @@ -277,9 +277,9 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre best_recall = std::max(recall, best_recall); } - std::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps - << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios - << std::setw(16) << mean_cpuus; + std::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps << std::setw(16) + << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios << std::setw(16) + << mean_cpuus; if (calc_recall_flag) { std::cout << std::setw(16) << recall << std::endl; diff --git a/apps/test_streaming_scenario.cpp b/apps/test_streaming_scenario.cpp index 89ac46d66..701cd4081 100644 --- a/apps/test_streaming_scenario.cpp +++ b/apps/test_streaming_scenario.cpp @@ -133,13 +133,13 @@ void delete_and_consolidate(diskann::AbstractIndex &index, diskann::IndexWritePa if (report._status == diskann::consolidation_report::status_code::LOCK_FAIL) { std::cerr << "Unable to acquire consolidate delete lock after " - << "deleting points " << start << " to " << end << ". Will retry in " << wait_time - << "seconds." << std::endl; + << "deleting points " << start << " to " << end << ". Will retry in " << wait_time + << "seconds." << std::endl; } else if (report._status == diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR) { std::cerr << "Inconsistent counts in data structure. " - << "Will retry in " << wait_time << "seconds." << std::endl; + << "Will retry in " << wait_time << "seconds." << std::endl; } else { @@ -197,7 +197,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con diskann::get_bin_metadata(data_path, num_points, dim); std::cout << "metadata: file " << data_path << " has " << num_points << " points in " << dim << " dims" - << std::endl; + << std::endl; aligned_dim = ROUND_UP(dim, 8); auto index_config = diskann::IndexConfigBuilder() diff --git a/include/cached_io.h b/include/cached_io.h index 7347f8ce1..462f5d59c 100644 --- a/include/cached_io.h +++ b/include/cached_io.h @@ -43,7 +43,7 @@ class cached_ifstream cache_buf = new char[cacheSize]; reader.read(cache_buf, cacheSize); std::cout << "Opened: " << filename.c_str() << ", size: " << fsize << ", cache_size: " << cacheSize - << std::endl; + << std::endl; } catch (std::system_error &e) { diff --git a/include/utils.h b/include/utils.h index 3fef59776..23cc4d007 100644 --- a/include/utils.h +++ b/include/utils.h @@ -139,10 +139,10 @@ inline int delete_file(const std::string &fileName) if (rc != 0) { std::cerr << "Could not delete file: " << fileName - << " even though it exists. This might indicate a permissions " - "issue. " - "If you see this message, please contact the diskann team." - << std::endl; + << " even though it exists. This might indicate a permissions " + "issue. " + "If you see this message, please contact the diskann team." + << std::endl; } return rc; } @@ -249,8 +249,8 @@ inline void realloc_aligned(void **ptr, size_t size, size_t align) *ptr = ::_aligned_realloc(*ptr, size, align); #else std::cerr << "No aligned realloc on GCC. Must malloc and mem_align, " - "left it out for now." - << std::endl; + "left it out for now." + << std::endl; #endif if (*ptr == nullptr) report_memory_allocation_failure(); @@ -701,8 +701,7 @@ inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t size_t bytes_written = npts * ndims * sizeof(T) + 2 * sizeof(uint32_t); writer.write((char *)&npts_i32, sizeof(int)); writer.write((char *)&ndims_i32, sizeof(int)); - std::cout << "bin: #pts = " << npts << ", #dims = " << ndims << ", size = " << bytes_written << "B" - << std::endl; + std::cout << "bin: #pts = " << npts << ", #dims = " << ndims << ", size = " << bytes_written << "B" << std::endl; writer.write((char *)data, npts * ndims * sizeof(T)); writer.close(); @@ -743,7 +742,7 @@ inline void load_aligned_bin_impl(std::basic_istream &reader, size_t actua } rounded_dim = ROUND_UP(dim, 8); std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << ", aligned_dim = " << rounded_dim << "... " - << std::flush; + << std::flush; size_t allocSize = npts * rounded_dim * sizeof(T); std::cout << "allocating aligned memory of " << allocSize << " bytes... " << std::flush; alloc_aligned(((void **)&data), allocSize, 8 * sizeof(T)); @@ -939,7 +938,7 @@ inline void copy_aligned_data_from_file(const char *bin_file, T *&data, size_t & if (data == nullptr) { std::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." - << std::endl; + << std::endl; throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file function", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -1020,7 +1019,7 @@ inline bool validate_index_file_size(std::ifstream &in) if (actual_file_size != expected_file_size) { std::cerr << "Index file size error. Expected size (metadata): " << expected_file_size - << ", actual file size : " << actual_file_size << "." << std::endl; + << ", actual file size : " << actual_file_size << "." << std::endl; return false; } return true; @@ -1189,10 +1188,9 @@ inline void printProcessMemory(const char *message) PROCESS_MEMORY_COUNTERS counters; HANDLE h = GetCurrentProcess(); GetProcessMemoryInfo(h, &counters, sizeof(counters)); - std::cout << message - << " [Peaking Working Set size: " << counters.PeakWorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) - << "GB Working set size: " << counters.WorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) - << "GB Private bytes " << counters.PagefileUsage * 1.0 / (1024 * 1024 * 1024) << "GB]" << std::endl; + std::cout << message << " [Peaking Working Set size: " << counters.PeakWorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) + << "GB Working set size: " << counters.WorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) + << "GB Private bytes " << counters.PagefileUsage * 1.0 / (1024 * 1024 * 1024) << "GB]" << std::endl; } #else diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 604d00fda..0dd89ba3f 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -112,8 +112,8 @@ size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, else { std::cout << "Compress ratio: " << compress_ratio << " #new pq_chunks: " << chunks_by_cr - << " is either zero or greater than num_pq_chunks: " << num_pq_chunks - << ". num_pq_chunks is unchanged. " << std::endl; + << " is either zero or greater than num_pq_chunks: " << num_pq_chunks + << ". num_pq_chunks is unchanged. " << std::endl; } } else @@ -127,7 +127,7 @@ size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, num_pq_chunks = num_pq_chunks > MAX_PQ_CHUNKS ? MAX_PQ_CHUNKS : num_pq_chunks; std::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." - << std::endl; + << std::endl; return num_pq_chunks; } @@ -135,8 +135,8 @@ template T *generateRandomWarmup(uint64_t warmup_num, uint64_t warm { T *warmup = nullptr; warmup_num = 100000; - std::cout << "Generating random warmup file with dim " << warmup_dim << " and aligned dim " - << warmup_aligned_dim << std::flush; + std::cout << "Generating random warmup file with dim " << warmup_dim << " and aligned dim " << warmup_aligned_dim + << std::flush; diskann::alloc_aligned(((void **)&warmup), warmup_num * warmup_aligned_dim * sizeof(T), 8 * sizeof(T)); std::memset(warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); std::random_device rd; @@ -165,8 +165,8 @@ T *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, u { diskann::load_aligned_bin(files, cache_warmup_file, warmup, warmup_num, file_dim, file_aligned_dim); std::cout << "In the warmup file: " << cache_warmup_file << " File dim: " << file_dim - << " File aligned dim: " << file_aligned_dim << " Expected dim: " << warmup_dim - << " Expected aligned dim: " << warmup_aligned_dim << std::endl; + << " File aligned dim: " << file_aligned_dim << " Expected dim: " << warmup_dim + << " Expected aligned dim: " << warmup_aligned_dim << std::endl; if (file_dim != warmup_dim || file_aligned_dim != warmup_aligned_dim) { @@ -535,7 +535,7 @@ void breakup_dense_points(const std::string data_file, const std::string labels_ } } std::cout << "fraction of dense points with >= " << density << " labels = " << (float)dense_pts / (float)npts - << std::endl; + << std::endl; if (labels_per_point.size() != 0) { @@ -625,8 +625,8 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr // TODO: Make this honest when there is filter support if (full_index_ram < ram_budget * 1024 * 1024 * 1024) { - std::cout << "Full index fits in RAM budget, should consume at most " - << full_index_ram / (1024 * 1024 * 1024) << "GiBs, so building in one shot" << std::endl; + std::cout << "Full index fits in RAM budget, should consume at most " << full_index_ram / (1024 * 1024 * 1024) + << "GiBs, so building in one shot" << std::endl; diskann::IndexWriteParameters paras = diskann::IndexWriteParametersBuilder(L, R) .with_filter_list_size(Lf) @@ -1037,18 +1037,18 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const if (param_list.size() < 5 || param_list.size() > 9) { std::cout << "Correct usage of parameters is R (max degree)\n" - "L (indexing list size, better if >= R)\n" - "B (RAM limit of final index in GB)\n" - "M (memory limit while indexing)\n" - "T (number of threads for indexing)\n" - "B' (PQ bytes for disk index: optional parameter for " - "very large dimensional data)\n" - "reorder (set true to include full precision in data file" - ": optional paramter, use only when using disk PQ\n" - "build_PQ_byte (number of PQ bytes for inde build; set 0 to use " - "full precision vectors)\n" - "QD Quantized Dimension to overwrite the derived dim from B " - << std::endl; + "L (indexing list size, better if >= R)\n" + "B (RAM limit of final index in GB)\n" + "M (memory limit while indexing)\n" + "T (number of threads for indexing)\n" + "B' (PQ bytes for disk index: optional parameter for " + "very large dimensional data)\n" + "reorder (set true to include full precision in data file" + ": optional paramter, use only when using disk PQ\n" + "build_PQ_byte (number of PQ bytes for inde build; set 0 to use " + "full precision vectors)\n" + "QD Quantized Dimension to overwrite the derived dim from B " + << std::endl; return -1; } @@ -1164,7 +1164,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const } std::cout << "Starting index build: R=" << R << " L=" << L << " Query RAM budget: " << final_index_ram_limit - << " Indexing ram budget: " << indexing_ram_budget << " T: " << num_threads << std::endl; + << " Indexing ram budget: " << indexing_ram_budget << " T: " << num_threads << std::endl; auto s = std::chrono::high_resolution_clock::now(); @@ -1216,7 +1216,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const } std::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." - << std::endl; + << std::endl; generate_quantized_data(data_file_to_use, pq_pivots_path, pq_compressed_vectors_path, compareMetric, p_val, num_pq_chunks, use_opq, codebook_prefix); diff --git a/src/distance.cpp b/src/distance.cpp index 6ab8d70d9..ad7db8dea 100644 --- a/src/distance.cpp +++ b/src/distance.cpp @@ -627,15 +627,15 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) else if (m == diskann::Metric::INNER_PRODUCT) { std::cout << "Inner product: Using AVX2 implementation " - "AVXDistanceInnerProductFloat" - << std::endl; + "AVXDistanceInnerProductFloat" + << std::endl; return new diskann::AVXDistanceInnerProductFloat(); } else if (m == diskann::Metric::FAST_L2) { std::cout << "Fast_L2: Using AVX2 implementation with norm " - "memoization DistanceFastL2" - << std::endl; + "memoization DistanceFastL2" + << std::endl; return new diskann::DistanceFastL2(); } else @@ -666,16 +666,16 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) else { std::cout << "Older CPU. Using slow distance computation " - "SlowDistanceL2Int." - << std::endl; + "SlowDistanceL2Int." + << std::endl; return new diskann::SlowDistanceL2(); } } else if (m == diskann::Metric::COSINE) { std::cout << "Using either AVX or AVX2 for Cosine similarity " - "DistanceCosineInt8." - << std::endl; + "DistanceCosineInt8." + << std::endl; return new diskann::DistanceCosineInt8(); } else @@ -693,19 +693,19 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) { #ifdef _WINDOWS std::cout << "WARNING: AVX/AVX2 distance function not defined for Uint8. " - "Using " - "slow version. " - "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." - << std::endl; + "Using " + "slow version. " + "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." + << std::endl; #endif return new diskann::DistanceL2UInt8(); } else if (m == diskann::Metric::COSINE) { std::cout << "AVX/AVX2 distance function not defined for Uint8. Using " - "slow version SlowDistanceCosineUint8() " - "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." - << std::endl; + "slow version SlowDistanceCosineUint8() " + "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." + << std::endl; return new diskann::SlowDistanceCosineUInt8(); } else diff --git a/src/index.cpp b/src/index.cpp index 6e7a91b86..082917bf7 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -113,8 +113,8 @@ Index::Index(Metric m, const size_t dim, const size_t max_point this->_distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); this->_normalize_vecs = true; std::cout << "Normalizing vectors and using L2 for cosine " - "AVXNormalizedCosineDistanceFloat()." - << std::endl; + "AVXNormalizedCosineDistanceFloat()." + << std::endl; } else { @@ -395,8 +395,8 @@ void Index::save(const char *filename, bool compact_before_save else { std::cout << "Save index in a single file currently not supported. " - "Not saving the index." - << std::endl; + "Not saving the index." + << std::endl; } // If frozen points were temporarily compacted to _nd, move back to @@ -584,8 +584,8 @@ void Index::load(const char *filename, uint32_t num_threads, ui else { std::cout << "Single index file saving/loading support not yet " - "enabled. Not loading the index." - << std::endl; + "enabled. Not loading the index." + << std::endl; return; } @@ -655,9 +655,8 @@ void Index::load(const char *filename, uint32_t num_threads, ui reposition_frozen_point_to_end(); std::cout << "Num frozen points:" << _num_frozen_pts << " _nd: " << _nd << " _start: " << _start - << " size(_location_to_tag): " << _location_to_tag.size() - << " size(_tag_to_location):" << _tag_to_location.size() << " Max points: " << _max_points - << std::endl; + << " size(_location_to_tag): " << _location_to_tag.size() + << " size(_tag_to_location):" << _tag_to_location.size() << " Max points: " << _max_points << std::endl; // For incremental index, _query_scratch is initialized in the constructor. // For the bulk index, the params required to initialize _query_scratch @@ -728,8 +727,8 @@ size_t Index::load_graph(std::string filename, size_t expected_ #endif std::cout << "From graph header, expected_file_size: " << expected_file_size - << ", _max_observed_degree: " << _max_observed_degree << ", _start: " << _start - << ", file_frozen_pts: " << file_frozen_pts << std::endl; + << ", _max_observed_degree: " << _max_observed_degree << ", _start: " << _start + << ", file_frozen_pts: " << file_frozen_pts << std::endl; if (file_frozen_pts != _num_frozen_pts) { @@ -763,8 +762,8 @@ size_t Index::load_graph(std::string filename, size_t expected_ if (_max_points < expected_max_points) { std::cout << "Number of points in data: " << expected_max_points - << " is greater than max_points: " << _max_points - << " Setting max points to: " << expected_max_points << std::endl; + << " is greater than max_points: " << _max_points << " Setting max points to: " << expected_max_points + << std::endl; _final_graph.resize(expected_max_points + _num_frozen_pts); _max_points = expected_max_points; } @@ -823,8 +822,8 @@ size_t Index::load_graph(std::string filename, size_t expected_ } #endif - std::cout << "done. Index has " << nodes_read << " nodes and " << cc << " out-edges, _start is set to " - << _start << std::endl; + std::cout << "done. Index has " << nodes_read << " nodes and " << cc << " out-edges, _start is set to " << _start + << std::endl; return nodes_read; } @@ -1483,7 +1482,7 @@ void Index::link(const IndexWriteParameters ¶meters) if (node_ctr % 100000 == 0) { std::cout << "\r" << (100.0 * node_ctr) / (visit_order.size()) << "% of index build completed." - << std::flush; + << std::flush; } } @@ -1586,9 +1585,8 @@ void Index::prune_all_neighbors(const uint32_t max_degree, cons min = max; if (_nd > 0) { - std::cout << "Index built with degree: max:" << max - << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) << " min:" << min - << " count(deg<2):" << cnt << std::endl; + std::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) + << " min:" << min << " count(deg<2):" << cnt << std::endl; } } @@ -1712,7 +1710,7 @@ void Index::build_with_data_populated(const IndexWriteParameter cnt++; } std::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) - << " min:" << min << " count(deg<2):" << cnt << std::endl; + << " min:" << min << " count(deg<2):" << cnt << std::endl; _max_observed_degree = std::max((uint32_t)max, _max_observed_degree); _has_built = true; @@ -2154,7 +2152,7 @@ std::pair Index::search(const T *query, con if (L > scratch->get_L()) { std::cout << "Attempting to expand query scratch_space. Was created " - << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; + << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; scratch->resize_for_new_L(L); std::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; } @@ -2241,7 +2239,7 @@ std::pair Index::search_with_filters(const if (L > scratch->get_L()) { std::cout << "Attempting to expand query scratch_space. Was created " - << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; + << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; scratch->resize_for_new_L(L); std::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; } @@ -2257,8 +2255,7 @@ std::pair Index::search_with_filters(const } else { - std::cout << "No filtered medoid found. exitting " - << std::endl; // RKNOTE: If universal label found start there + std::cout << "No filtered medoid found. exitting " << std::endl; // RKNOTE: If universal label found start there throw diskann::ANNException("No filtered medoid found. exitting ", -1); } filter_vec.emplace_back(filter_label); @@ -2335,7 +2332,7 @@ size_t Index::search_with_tags(const T *query, const uint64_t K if (L > scratch->get_L()) { std::cout << "Attempting to expand query scratch_space. Was created " - << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; + << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; scratch->resize_for_new_L(L); std::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; } @@ -2546,7 +2543,7 @@ consolidation_report Index::consolidate_deletes(const IndexWrit if (_location_to_tag.size() + _delete_set->size() != _nd) { std::cerr << "Error: _location_to_tag.size (" << _location_to_tag.size() << ") + _delete_set->size (" - << _delete_set->size() << ") != _nd(" << _nd << ") "; + << _delete_set->size() << ") != _nd(" << _nd << ") "; return consolidation_report(diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR, 0, 0, 0, 0, 0, 0, 0); } @@ -2702,7 +2699,7 @@ template void Index void Index_data_compacted << std::endl; std::cout << "---------------------------------------------------------" - "------------" - << std::endl; + "------------" + << std::endl; } template void Index::count_nodes_at_bfs_levels() diff --git a/src/math_utils.cpp b/src/math_utils.cpp index b7b2e53aa..36b943dde 100644 --- a/src/math_utils.cpp +++ b/src/math_utils.cpp @@ -208,8 +208,8 @@ void compute_closest_centers(float *data, size_t num_points, size_t dim, float * void process_residuals(float *data_load, size_t num_points, size_t dim, float *cur_pivot_data, size_t num_centers, uint32_t *closest_centers, bool to_subtract) { - std::cout << "Processing residuals of " << num_points << " points in " << dim << " dimensions using " - << num_centers << " centers " << std::endl; + std::cout << "Processing residuals of " << num_points << " points in " << dim << " dimensions using " << num_centers + << " centers " << std::endl; #pragma omp parallel for schedule(static, 8192) for (int64_t n_iter = 0; n_iter < (int64_t)num_points; n_iter++) { @@ -339,8 +339,8 @@ float run_lloyds(float *data, size_t num_points, size_t dim, float *centers, con if (((i != 0) && ((old_residual - residual) / residual) < 0.00001) || (residual < std::numeric_limits::epsilon())) { - std::cout << "Residuals unchanged: " << old_residual << " becomes " << residual - << ". Early termination." << std::endl; + std::cout << "Residuals unchanged: " << old_residual << " becomes " << residual << ". Early termination." + << std::endl; break; } } @@ -381,10 +381,10 @@ void kmeanspp_selecting_pivots(float *data, size_t num_points, size_t dim, float if (num_points > 1 << 23) { std::cout << "ERROR: n_pts " << num_points - << " currently not supported for k-means++, maximum is " - "8388608. Falling back to random pivot " - "selection." - << std::endl; + << " currently not supported for k-means++, maximum is " + "8388608. Falling back to random pivot " + "selection." + << std::endl; selecting_pivots(data, num_points, dim, pivot_data, num_centers); return; } diff --git a/src/partition.cpp b/src/partition.cpp index 8afd784c1..d2bda0eba 100644 --- a/src/partition.cpp +++ b/src/partition.cpp @@ -50,8 +50,7 @@ void gen_random_slice(const std::string base_file, const std::string output_pref base_reader.read((char *)&npts_u32, sizeof(uint32_t)); base_reader.read((char *)&nd_u32, sizeof(uint32_t)); - std::cout << "Loading base " << base_file << ". #points: " << npts_u32 << ". #dim: " << nd_u32 << "." - << std::endl; + std::cout << "Loading base " << base_file << ". #points: " << npts_u32 << ". #dim: " << nd_u32 << "." << std::endl; sample_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); sample_writer.write((char *)&nd_u32, sizeof(uint32_t)); sample_id_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); @@ -80,7 +79,7 @@ void gen_random_slice(const std::string base_file, const std::string output_pref sample_writer.close(); sample_id_writer.close(); std::cout << "Wrote " << num_sampled_pts_u32 << " points to sample file: " << output_prefix + "_data.bin" - << std::endl; + << std::endl; } // streams data from the file, and samples each vector with probability p_val @@ -317,8 +316,8 @@ int shard_data_into_clusters(const std::string data_file, float *pivots, const s shard_idmap_writer[i].close(); } - std::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " - << total_count << " points across " << num_centers << " shards " << std::endl; + std::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " << total_count + << " points across " << num_centers << " shards " << std::endl; return 0; } @@ -401,8 +400,8 @@ int shard_data_into_clusters_only_ids(const std::string data_file, float *pivots shard_idmap_writer[i].close(); } - std::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " - << total_count << " points across " << num_centers << " shards " << std::endl; + std::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " << total_count + << " points across " << num_centers << " shards " << std::endl; return 0; } @@ -581,9 +580,8 @@ int partition_with_ram_budget(const std::string data_file, const double sampling if (cur_shard_ram_estimate > max_ram_usage) max_ram_usage = cur_shard_ram_estimate; } - std::cout << "With " << num_parts - << " parts, max estimated RAM usage: " << max_ram_usage / (1024 * 1024 * 1024) - << "GB, budget given is " << ram_budget << std::endl; + std::cout << "With " << num_parts << " parts, max estimated RAM usage: " << max_ram_usage / (1024 * 1024 * 1024) + << "GB, budget given is " << ram_budget << std::endl; if (max_ram_usage > 1024 * 1024 * 1024 * ram_budget) { fit_in_ram = false; diff --git a/src/pq.cpp b/src/pq.cpp index e08fe3a25..1641e4ed5 100644 --- a/src/pq.cpp +++ b/src/pq.cpp @@ -58,8 +58,8 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if (nr != 4 && nr != 5) { std::cout << "Error reading pq_pivots file " << pq_table_file - << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting " << 4 - << " or " << 5; + << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting " << 4 << " or " + << 5; throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -67,13 +67,13 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if (nr == 4) { std::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] - << " " << file_offset_data[3] << std::endl; + << " " << file_offset_data[3] << std::endl; } else if (nr == 5) { use_old_filetype = true; std::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] - << " " << file_offset_data[3] << file_offset_data[4] << std::endl; + << " " << file_offset_data[3] << file_offset_data[4] << std::endl; } else { @@ -90,7 +90,7 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if ((nr != NUM_PQ_CENTROIDS)) { std::cout << "Error reading pq_pivots file " << pq_table_file << ". file_num_centers = " << nr - << " but expecting " << NUM_PQ_CENTROIDS << " centers"; + << " but expecting " << NUM_PQ_CENTROIDS << " centers"; throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -106,7 +106,7 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if ((nr != this->ndims) || (nc != 1)) { std::cerr << "Error reading centroids from pq_pivots file " << pq_table_file << ". file_dim = " << nr - << ", file_cols = " << nc << " but expecting " << this->ndims << " entries in 1 dimension."; + << ", file_cols = " << nc << " but expecting " << this->ndims << " entries in 1 dimension."; throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -125,13 +125,13 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if (nc != 1 || (nr != num_chunks + 1 && num_chunks != 0)) { std::cerr << "Error loading chunk offsets file. numc: " << nc << " (should be 1). numr: " << nr - << " (should be " << num_chunks + 1 << " or 0 if we need to infer)" << std::endl; + << " (should be " << num_chunks + 1 << " or 0 if we need to infer)" << std::endl; throw diskann::ANNException("Error loading chunk offsets file", -1, __FUNCSIG__, __FILE__, __LINE__); } this->n_chunks = nr - 1; std::cout << "Loaded PQ Pivots: #ctrs: " << NUM_PQ_CENTROIDS << ", #dims: " << this->ndims - << ", #chunks: " << this->n_chunks << std::endl; + << ", #chunks: " << this->n_chunks << std::endl; if (file_exists(rotmat_file)) { @@ -460,7 +460,7 @@ int generate_pq_pivots(const float *const passed_train_data, size_t num_train, u std::unique_ptr closest_center = std::make_unique(num_train); std::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " - << chunk_offsets[i + 1] << ")" << std::endl; + << chunk_offsets[i + 1] << ")" << std::endl; #pragma omp parallel for schedule(static, 65536) for (int64_t j = 0; j < (int64_t)num_train; j++) @@ -492,7 +492,7 @@ int generate_pq_pivots(const float *const passed_train_data, size_t num_train, u diskann::save_bin(pq_pivots_path.c_str(), cumul_bytes.data(), cumul_bytes.size(), 1, 0); std::cout << "Saved pq pivot data to " << pq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] - << "B." << std::endl; + << "B." << std::endl; return 0; } @@ -624,7 +624,7 @@ int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32 std::unique_ptr closest_center = std::make_unique(num_train); std::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " - << chunk_offsets[i + 1] << ")" << std::endl; + << chunk_offsets[i + 1] << ")" << std::endl; #pragma omp parallel for schedule(static, 65536) for (int64_t j = 0; j < (int64_t)num_train; j++) @@ -700,7 +700,7 @@ int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32 diskann::save_bin(opq_pivots_path.c_str(), cumul_bytes.data(), cumul_bytes.size(), 1, 0); std::cout << "Saved opq pivot data to " << opq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] - << "B." << std::endl; + << "B." << std::endl; std::string rotmat_path = opq_pivots_path + "_rotation_matrix.bin"; diskann::save_bin(rotmat_path.c_str(), rotmat_tr.get(), dim, dim); @@ -749,7 +749,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if (nr != 4) { std::cout << "Error reading pq_pivots file " << pq_pivots_path - << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting 4."; + << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting 4."; throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -759,8 +759,8 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if ((nr != num_centers) || (nc != dim)) { std::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_num_centers = " << nr - << ", file_dim = " << nc << " but expecting " << num_centers << " centers in " << dim - << " dimensions."; + << ", file_dim = " << nc << " but expecting " << num_centers << " centers in " << dim + << " dimensions."; throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -770,7 +770,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if ((nr != dim) || (nc != 1)) { std::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_dim = " << nr - << ", file_cols = " << nc << " but expecting " << dim << " entries in 1 dimension."; + << ", file_cols = " << nc << " but expecting " << dim << " entries in 1 dimension."; throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -780,7 +780,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if (nr != (uint64_t)num_pq_chunks + 1 || nc != 1) { std::cout << "Error reading pq_pivots file at chunk offsets; file has nr=" << nr << ",nc=" << nc - << ", expecting nr=" << num_pq_chunks + 1 << ", nc=1." << std::endl; + << ", expecting nr=" << num_pq_chunks + 1 << ", nc=1." << std::endl; throw diskann::ANNException("Error reading pq_pivots file at chunk offsets.", -1, __FUNCSIG__, __FILE__, __LINE__); } diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index 55546ca4a..3fc03f379 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -48,15 +48,15 @@ PQFlashIndex::PQFlashIndex(std::shared_ptr &fileRe if (std::is_floating_point::value) { std::cout << "Cosine metric chosen for (normalized) float data." - "Changing distance to L2 to boost accuracy." - << std::endl; + "Changing distance to L2 to boost accuracy." + << std::endl; metric = diskann::Metric::L2; } else { std::cerr << "WARNING: Cannot normalize integral data types." - << " This may result in erroneous results or poor recall." - << " Consider using L2 distance with integral data types." << std::endl; + << " This may result in erroneous results or poor recall." + << " Consider using L2 distance with integral data types." << std::endl; } } @@ -302,7 +302,7 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: if (num_nodes_to_cache > tenp_nodes) { std::cout << "Reducing nodes to cache from: " << num_nodes_to_cache << " to: " << tenp_nodes - << "(10 percent of total nodes:" << this->num_points << ")" << std::endl; + << "(10 percent of total nodes:" << this->num_points << ")" << std::endl; num_nodes_to_cache = tenp_nodes == 0 ? 1 : tenp_nodes; } std::cout << "Caching " << num_nodes_to_cache << "..." << std::endl; @@ -422,8 +422,8 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: } } - std::cout << ". #nodes: " << node_set.size() - prev_node_set_size - << ", #nodes thus far: " << node_set.size() << std::endl; + std::cout << ". #nodes: " << node_set.size() - prev_node_set_size << ", #nodes thus far: " << node_set.size() + << std::endl; prev_node_set_size = node_set.size(); lvl++; } @@ -439,7 +439,7 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: std::cout << "Level: " << lvl << std::flush; std::cout << ". #nodes: " << node_list.size() - prev_node_set_size << ", #nodes thus far: " << node_list.size() - << std::endl; + << std::endl; std::cout << "done" << std::endl; } @@ -595,7 +595,7 @@ void PQFlashIndex::get_label_file_metadata(std::string map_file, uint } std::cout << "Labels file metadata: num_points: " << num_pts << ", #total_labels: " << num_total_labels - << std::endl; + << std::endl; infile.close(); } @@ -856,7 +856,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons #endif std::cout << "Loaded PQ centroids and in-memory compressed vectors. #points: " << num_points - << " #dim: " << data_dim << " #aligned_dim: " << aligned_dim << " #chunks: " << n_chunks << std::endl; + << " #dim: " << data_dim << " #aligned_dim: " << aligned_dim << " #chunks: " << n_chunks << std::endl; if (n_chunks > MAX_PQ_CHUNKS) { @@ -884,7 +884,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons disk_bytes_per_point = disk_pq_n_chunks * sizeof(uint8_t); // revising disk_bytes_per_point since DISK PQ is used. std::cout << "Disk index uses PQ data compressed down to " << disk_pq_n_chunks << " bytes per point." - << std::endl; + << std::endl; } // read index metadata @@ -918,8 +918,8 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons if (disk_nnodes != num_points) { std::cout << "Mismatch in #points for compressed data file and disk " - "index file: " - << disk_nnodes << " vs " << num_points << std::endl; + "index file: " + << disk_nnodes << " vs " << num_points << std::endl; return -1; } @@ -947,7 +947,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons if (this->num_frozen_points == 1) { std::cout << " Detected frozen point in index at location " << this->frozen_location - << ". Will not output it at search time." << std::endl; + << ". Will not output it at search time." << std::endl; } READ_U64(index_metadata, this->reorder_data_exists); @@ -1012,8 +1012,8 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons { #endif std::cout << "Centroid data file not found. Using corresponding vectors " - "for the medoids " - << std::endl; + "for the medoids " + << std::endl; use_medoids_data_as_centroids(); } else diff --git a/src/utils.cpp b/src/utils.cpp index be031664d..6bab1dbbc 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -193,11 +193,10 @@ double calculate_recall(uint32_t num_queries, uint32_t *gold_std, float *gs_dist if ((active_points_count < recall_at && !active_tags.empty()) && !printed) { - std::cout << "Warning: Couldn't find enough closest neighbors " << active_points_count << "/" - << recall_at - << " from " - "truthset for query # " - << i << ". Will result in under-reported value of recall." << std::endl; + std::cout << "Warning: Couldn't find enough closest neighbors " << active_points_count << "/" << recall_at + << " from " + "truthset for query # " + << i << ". Will result in under-reported value of recall." << std::endl; printed = true; } if (gs_dist != nullptr) @@ -270,8 +269,7 @@ void get_bin_metadata(AlignedFileReader &reader, size_t &npts, size_t &ndim, siz { npts = buf[0]; ndim = buf[1]; - std::cout << "File has: " << npts << " points, " << ndim << " dimensions at offset: " << offset - << std::endl; + std::cout << "File has: " << npts << " points, " << ndim << " dimensions at offset: " << offset << std::endl; } else { @@ -334,7 +332,7 @@ void copy_aligned_data_from_file(AlignedFileReader &reader, T *&data, size_t &np if (data == nullptr) { std::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." - << std::endl; + << std::endl; throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file()", -1, __FUNCSIG__, __FILE__, __LINE__); } diff --git a/src/windows_aligned_file_reader.cpp b/src/windows_aligned_file_reader.cpp index 06646a207..1031dcbd0 100644 --- a/src/windows_aligned_file_reader.cpp +++ b/src/windows_aligned_file_reader.cpp @@ -163,8 +163,8 @@ void WindowsAlignedFileReader::read(std::vector &read_reqs, IOConte if (error != WAIT_TIMEOUT) { std::cerr << "GetQueuedCompletionStatus() failed " - "with error = " - << error << std::endl; + "with error = " + << error << std::endl; throw diskann::ANNException("GetQueuedCompletionStatus failed with error: ", error, __FUNCSIG__, __FILE__, __LINE__); }