From 30cc67b4d1baeb2414b46e73be5214d2579bc154 Mon Sep 17 00:00:00 2001 From: harsha vardhan simhadri Date: Sun, 13 Aug 2023 18:59:51 -0700 Subject: [PATCH] add CI test for 1536D rand vector on disk --- .github/actions/generate-random/action.yml | 9 +++++++++ .github/workflows/disk-pq.yml | 5 +++++ src/pq_flash_index.cpp | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/actions/generate-random/action.yml b/.github/actions/generate-random/action.yml index 75554773e..d1440a9cd 100644 --- a/.github/actions/generate-random/action.yml +++ b/.github/actions/generate-random/action.yml @@ -11,12 +11,18 @@ runs: dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_10K_norm1.0.bin -D 10 -N 10000 --norm 1.0 dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0 dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0 + + echo "Generating random 1536D float vectors for index" + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_10K_norm1.0.bin -D 1536 -N 10000 --norm 1.0 echo "Generating random vectors for query" dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_1K_norm1.0.bin -D 10 -N 1000 --norm 1.0 dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0 dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0 + echo "Generating random 1536D float vectors for query" + dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_1K_norm1.0.bin -D 1536 -N 1000 --norm 1.0 + echo "Computing ground truth for floats across l2, mips, and cosine distance functions" dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100 dist/bin/compute_groundtruth --data_type float --dist_fn mips --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100 @@ -32,4 +38,7 @@ runs: dist/bin/compute_groundtruth --data_type uint8 --dist_fn mips --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/mips_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 dist/bin/compute_groundtruth --data_type uint8 --dist_fn cosine --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100 + echo "Computing ground truth for float 1536D in l2 distance functions" + dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_1536D_10K_norm1.0.bin --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_10K_norm1.0_1536D_1K_norm1.0_gt100 --K 100 + shell: bash diff --git a/.github/workflows/disk-pq.yml b/.github/workflows/disk-pq.yml index 35c662184..3fa1f35e2 100644 --- a/.github/workflows/disk-pq.yml +++ b/.github/workflows/disk-pq.yml @@ -34,6 +34,11 @@ jobs: run: | dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1 dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 + - name: build and search disk index (1536D, one shot graph build, L2, no diskPQ) (float) + if: success() || failure() + run: | + dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_1536D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_1536D_10K_norm1.0_diskfull_oneshot -R 16 -L 32 -B 0.003 -M 1 + dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_1536D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_10K_norm1.0_1536D_1K_norm1.0_gt100 --recall_at 5 -L 200 -W 2 --num_nodes_to_cache 100 -T 16 - name: build and search disk index (one shot graph build, L2, no diskPQ) (int8) if: success() || failure() run: | diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index b83572da5..2aa60d1e3 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -1252,7 +1252,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t char *sector_scratch = query_scratch->sector_scratch; uint64_t §or_scratch_idx = query_scratch->sector_idx; const uint64_t num_sectors_per_node = - _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_disk_bytes_per_point, defaults::SECTOR_LEN); + _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_disk_bytes_per_point, defaults::SECTOR_LEN); // query <-> PQ chunk centers distances _pq_table.preprocess_query(query_rotated); // center the query and rotate if