Skip to content

Commit

Permalink
add CI test for 1536D rand vector on disk
Browse files Browse the repository at this point in the history
  • Loading branch information
harsha-simhadri committed Aug 14, 2023
1 parent 2f8528d commit 30cc67b
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
9 changes: 9 additions & 0 deletions .github/actions/generate-random/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,18 @@ runs:
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_10K_norm1.0.bin -D 10 -N 10000 --norm 1.0
dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0
dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0
echo "Generating random 1536D float vectors for index"
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_10K_norm1.0.bin -D 1536 -N 10000 --norm 1.0
echo "Generating random vectors for query"
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_1K_norm1.0.bin -D 10 -N 1000 --norm 1.0
dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0
dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0
echo "Generating random 1536D float vectors for query"
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_1K_norm1.0.bin -D 1536 -N 1000 --norm 1.0
echo "Computing ground truth for floats across l2, mips, and cosine distance functions"
dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100
dist/bin/compute_groundtruth --data_type float --dist_fn mips --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100
Expand All @@ -32,4 +38,7 @@ runs:
dist/bin/compute_groundtruth --data_type uint8 --dist_fn mips --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/mips_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100
dist/bin/compute_groundtruth --data_type uint8 --dist_fn cosine --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100
echo "Computing ground truth for float 1536D in l2 distance functions"
dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_1536D_10K_norm1.0.bin --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_10K_norm1.0_1536D_1K_norm1.0_gt100 --K 100
shell: bash
5 changes: 5 additions & 0 deletions .github/workflows/disk-pq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ jobs:
run: |
dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1
dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16
- name: build and search disk index (1536D, one shot graph build, L2, no diskPQ) (float)
if: success() || failure()
run: |
dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_1536D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_1536D_10K_norm1.0_diskfull_oneshot -R 16 -L 32 -B 0.003 -M 1
dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_1536D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_10K_norm1.0_1536D_1K_norm1.0_gt100 --recall_at 5 -L 200 -W 2 --num_nodes_to_cache 100 -T 16
- name: build and search disk index (one shot graph build, L2, no diskPQ) (int8)
if: success() || failure()
run: |
Expand Down
2 changes: 1 addition & 1 deletion src/pq_flash_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1252,7 +1252,7 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
char *sector_scratch = query_scratch->sector_scratch;
uint64_t &sector_scratch_idx = query_scratch->sector_idx;
const uint64_t num_sectors_per_node =
_nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_disk_bytes_per_point, defaults::SECTOR_LEN);
_nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_disk_bytes_per_point, defaults::SECTOR_LEN);

// query <-> PQ chunk centers distances
_pq_table.preprocess_query(query_rotated); // center the query and rotate if
Expand Down

0 comments on commit 30cc67b

Please sign in to comment.