Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow multi-sector layout for large vectors #417

Merged
merged 26 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f234f73
make sector node an inline function
harsha-simhadri Aug 9, 2023
db6674e
convert offset_node macro to inline method
harsha-simhadri Aug 9, 2023
35c0f9a
rename member vars to start with underscore in pq_flash_index.h
harsha-simhadri Aug 9, 2023
537cec0
rename member vars to start with underscore in pq_flash_index.h
harsha-simhadri Aug 9, 2023
81c795b
added support in create_disk_index
harsha-simhadri Aug 10, 2023
4be0e12
add read sector util
harsha-simhadri Aug 10, 2023
64b0e8e
add read sector util
harsha-simhadri Aug 10, 2023
b24da69
load_cache_list now uses read_blocks util
harsha-simhadri Aug 10, 2023
2dd8282
allow nullptr for read_nodes
harsha-simhadri Aug 11, 2023
6122176
BFS cache generation uses util
harsha-simhadri Aug 11, 2023
e2559e2
clang format
harsha-simhadri Aug 11, 2023
721a888
fix aligned_free error in bfs traversal; move medoid coord load to re…
harsha-simhadri Aug 11, 2023
3b9838c
rename member vars of PQ Flash index to start with underscore
harsha-simhadri Aug 11, 2023
252704b
clang format
harsha-simhadri Aug 11, 2023
2f8528d
add num_sectors info to cache_beam_Search
harsha-simhadri Aug 14, 2023
30cc67b
add CI test for 1536D rand vector on disk
harsha-simhadri Aug 14, 2023
2b51ac6
increase build params in CI for 1536D test
harsha-simhadri Aug 14, 2023
52a9991
increase build params in CI for 1536D test
harsha-simhadri Aug 14, 2023
c4673d0
change test from 1536 to 1024
harsha-simhadri Aug 14, 2023
c3afe29
fix errors in num_Sector calculation
harsha-simhadri Aug 14, 2023
9571fed
add more tests and a seperate CI file for multi-sector
harsha-simhadri Aug 14, 2023
f9330b8
split high dim random data gen into seperate yml file
harsha-simhadri Aug 14, 2023
2745c1a
fix gt path
harsha-simhadri Aug 14, 2023
0075f79
fix gt path
harsha-simhadri Aug 14, 2023
0c51f5c
fix gt path
harsha-simhadri Aug 14, 2023
a3b404a
increase CI test search param
harsha-simhadri Aug 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/actions/generate-random/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,18 @@ runs:
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_10K_norm1.0.bin -D 10 -N 10000 --norm 1.0
dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0
dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_10K_norm50.0.bin -D 10 -N 10000 --norm 50.0

echo "Generating random 1536D float vectors for index"
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_10K_norm1.0.bin -D 1536 -N 10000 --norm 1.0

echo "Generating random vectors for query"
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_10D_1K_norm1.0.bin -D 10 -N 1000 --norm 1.0
dist/bin/rand_data_gen --data_type int8 --output_file data/rand_int8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0
dist/bin/rand_data_gen --data_type uint8 --output_file data/rand_uint8_10D_1K_norm50.0.bin -D 10 -N 1000 --norm 50.0

echo "Generating random 1536D float vectors for query"
dist/bin/rand_data_gen --data_type float --output_file data/rand_float_1536D_1K_norm1.0.bin -D 1536 -N 1000 --norm 1.0

harsha-simhadri marked this conversation as resolved.
Show resolved Hide resolved
echo "Computing ground truth for floats across l2, mips, and cosine distance functions"
dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100
dist/bin/compute_groundtruth --data_type float --dist_fn mips --base_file data/rand_float_10D_10K_norm1.0.bin --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --K 100
Expand All @@ -32,4 +38,7 @@ runs:
dist/bin/compute_groundtruth --data_type uint8 --dist_fn mips --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/mips_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100
dist/bin/compute_groundtruth --data_type uint8 --dist_fn cosine --base_file data/rand_uint8_10D_10K_norm50.0.bin --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 --K 100

echo "Computing ground truth for float 1536D in l2 distance functions"
dist/bin/compute_groundtruth --data_type float --dist_fn l2 --base_file data/rand_float_1536D_10K_norm1.0.bin --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_10K_norm1.0_1536D_1K_norm1.0_gt100 --K 100

shell: bash
5 changes: 5 additions & 0 deletions .github/workflows/disk-pq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ jobs:
run: |
dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_10D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot -R 16 -L 32 -B 0.00003 -M 1
dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_10D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/l2_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16
- name: build and search disk index (1536D, one shot graph build, L2, no diskPQ) (float)
if: success() || failure()
run: |
dist/bin/build_disk_index --data_type float --dist_fn l2 --data_path data/rand_float_1536D_10K_norm1.0.bin --index_path_prefix data/disk_index_l2_rand_float_1536D_10K_norm1.0_diskfull_oneshot -R 30 -L 500 -B 0.003 -M 1
dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_float_1536D_10K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_float_1536D_1K_norm1.0.bin --gt_file data/l2_rand_float_1536D_10K_norm1.0_1536D_1K_norm1.0_gt100 --recall_at 5 -L 200 -W 2 --num_nodes_to_cache 100 -T 16
- name: build and search disk index (one shot graph build, L2, no diskPQ) (int8)
if: success() || failure()
run: |
Expand Down
12 changes: 6 additions & 6 deletions apps/search_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,13 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
{
return res;
}
// cache bfs levels

std::vector<uint32_t> node_list;
diskann::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl;
//_pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list);
if (num_nodes_to_cache > 0)
_pFlashIndex->generate_cache_list_from_sample_queries(warmup_query_file, 15, 6, num_nodes_to_cache, num_threads,
node_list);
diskann::cout << "Caching " << num_nodes_to_cache << " nodes around medoid(s)" << std::endl;
_pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list);
// if (num_nodes_to_cache > 0)
// _pFlashIndex->generate_cache_list_from_sample_queries(warmup_query_file, 15, 6, num_nodes_to_cache,
// num_threads, node_list);
_pFlashIndex->load_cache_list(node_list);
node_list.clear();
node_list.shrink_to_fit();
Expand Down
118 changes: 75 additions & 43 deletions include/pq_flash_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,81 +109,113 @@ template <typename T, typename LabelT = uint32_t> class PQFlashIndex
DISKANN_DLLEXPORT void generate_random_labels(std::vector<LabelT> &labels, const uint32_t num_labels,
const uint32_t nthreads);

// index info
// sector # on disk where node_id is present with in the graph part
DISKANN_DLLEXPORT uint64_t get_node_sector(uint64_t node_id);

// ptr to start of the node
DISKANN_DLLEXPORT char *offset_to_node(char *sector_buf, uint64_t node_id);

// returns region of `node_buf` containing [NNBRS][NBR_ID(uint32_t)]
DISKANN_DLLEXPORT uint32_t *offset_to_node_nhood(char *node_buf);

// returns region of `node_buf` containing [COORD(T)]
DISKANN_DLLEXPORT T *offset_to_node_coords(char *node_buf);

//
// node_ids: input list of node_ids to be read
// coord_buffers: pointers to pre-allocated buffers that coords need to copied to. If null, dont copy.
// nbr_buffers: pre-allocated buffers to copy neighbors into
//
// returns a vector of bool one for each node_id: true if read is success, else false
//
DISKANN_DLLEXPORT std::vector<bool> read_nodes(const std::vector<uint32_t> &node_ids,
std::vector<T *> &coord_buffers,
std::vector<std::pair<uint32_t, uint32_t *>> &nbr_buffers);

// index info for multi-node sectors
// nhood of node `i` is in sector: [i / nnodes_per_sector]
// offset in sector: [(i % nnodes_per_sector) * max_node_len]
// nnbrs of node `i`: *(unsigned*) (buf)
// nbrs of node `i`: ((unsigned*)buf) + 1

uint64_t max_node_len = 0, nnodes_per_sector = 0, max_degree = 0;
//
// index info for multi-sector nodes
// nhood of node `i` is in sector: [i * DIV_ROUND_UP(_max_node_len, SECTOR_LEN)]
// offset in sector: [0]
//
// Common info
// coords start at ofsset
// #nbrs of node `i`: *(unsigned*) (offset + disk_bytes_per_point)
// nbrs of node `i` : (unsigned*) (offset + disk_bytes_per_point + 1)

uint64_t _max_node_len = 0;
uint64_t _nnodes_per_sector = 0; // 0 for multi-sector nodes, >0 for multi-node sectors
uint64_t _max_degree = 0;

// Data used for searching with re-order vectors
uint64_t ndims_reorder_vecs = 0, reorder_data_start_sector = 0, nvecs_per_sector = 0;
uint64_t _ndims_reorder_vecs = 0;
uint64_t _reorder_data_start_sector = 0;
uint64_t _nvecs_per_sector = 0;

diskann::Metric metric = diskann::Metric::L2;

// used only for inner product search to re-scale the result value
// (due to the pre-processing of base during index build)
float max_base_norm = 0.0f;
float _max_base_norm = 0.0f;

// data info
uint64_t num_points = 0;
uint64_t num_frozen_points = 0;
uint64_t frozen_location = 0;
uint64_t data_dim = 0;
uint64_t disk_data_dim = 0; // will be different from data_dim only if we use
// PQ for disk data (very large dimensionality)
uint64_t aligned_dim = 0;
uint64_t disk_bytes_per_point = 0;

std::string disk_index_file;
std::vector<std::pair<uint32_t, uint32_t>> node_visit_counter;
uint64_t _num_points = 0;
uint64_t _num_frozen_points = 0;
uint64_t _frozen_location = 0;
uint64_t _data_dim = 0;
uint64_t _aligned_dim = 0;
uint64_t _disk_bytes_per_point = 0; // Number of bytes

std::string _disk_index_file;
std::vector<std::pair<uint32_t, uint32_t>> _node_visit_counter;

// PQ data
// n_chunks = # of chunks ndims is split into
// data: char * n_chunks
// _n_chunks = # of chunks ndims is split into
// data: char * _n_chunks
// chunk_size = chunk size of each dimension chunk
// pq_tables = float* [[2^8 * [chunk_size]] * n_chunks]
// pq_tables = float* [[2^8 * [chunk_size]] * _n_chunks]
uint8_t *data = nullptr;
uint64_t n_chunks;
FixedChunkPQTable pq_table;
uint64_t _n_chunks;
FixedChunkPQTable _pq_table;

// distance comparator
std::shared_ptr<Distance<T>> dist_cmp;
std::shared_ptr<Distance<float>> dist_cmp_float;
std::shared_ptr<Distance<T>> _dist_cmp;
std::shared_ptr<Distance<float>> _dist_cmp_float;

// for very large datasets: we use PQ even for the disk resident index
bool use_disk_index_pq = false;
uint64_t disk_pq_n_chunks = 0;
FixedChunkPQTable disk_pq_table;
bool _use_disk_index_pq = false;
uint64_t _disk_pq_n_chunks = 0;
FixedChunkPQTable _disk_pq_table;

// medoid/start info

// graph has one entry point by default,
// we can optionally have multiple starting points
uint32_t *medoids = nullptr;
uint32_t *_medoids = nullptr;
// defaults to 1
size_t num_medoids;
size_t _num_medoids;
// by default, it is empty. If there are multiple
// centroids, we pick the medoid corresponding to the
// closest centroid as the starting point of search
float *centroid_data = nullptr;
float *_centroid_data = nullptr;

// nhood_cache
unsigned *nhood_cache_buf = nullptr;
tsl::robin_map<uint32_t, std::pair<uint32_t, uint32_t *>> nhood_cache;
// nhood_cache; the uint32_t in nhood_Cache are offsets into nhood_cache_buf
unsigned *_nhood_cache_buf = nullptr;
tsl::robin_map<uint32_t, std::pair<uint32_t, uint32_t *>> _nhood_cache;

// coord_cache
T *coord_cache_buf = nullptr;
tsl::robin_map<uint32_t, T *> coord_cache;
// coord_cache; The T* in coord_cache are offsets into coord_cache_buf
T *_coord_cache_buf = nullptr;
tsl::robin_map<uint32_t, T *> _coord_cache;

// thread-specific scratch
ConcurrentQueue<SSDThreadData<T> *> thread_data;
uint64_t max_nthreads;
bool load_flag = false;
bool count_visited_nodes = false;
bool reorder_data_exists = false;
uint64_t reoreder_data_offset = 0;
ConcurrentQueue<SSDThreadData<T> *> _thread_data;
uint64_t _max_nthreads;
bool _load_flag = false;
bool _count_visited_nodes = false;
bool _reorder_data_exists = false;
uint64_t _reoreder_data_offset = 0;

// filter support
uint32_t *_pts_to_label_offsets = nullptr;
Expand Down
88 changes: 69 additions & 19 deletions src/disk_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,20 +895,22 @@ void create_disk_layout(const std::string base_file, const std::string mem_index
if (vamana_frozen_num == 1)
vamana_frozen_loc = medoid;
max_node_len = (((uint64_t)width_u32 + 1) * sizeof(uint32_t)) + (ndims_64 * sizeof(T));
nnodes_per_sector = defaults::SECTOR_LEN / max_node_len;
nnodes_per_sector = defaults::SECTOR_LEN / max_node_len; // 0 if max_node_len > SECTOR_LEN

diskann::cout << "medoid: " << medoid << "B" << std::endl;
diskann::cout << "max_node_len: " << max_node_len << "B" << std::endl;
diskann::cout << "nnodes_per_sector: " << nnodes_per_sector << "B" << std::endl;

// defaults::SECTOR_LEN buffer for each sector
std::unique_ptr<char[]> sector_buf = std::make_unique<char[]>(defaults::SECTOR_LEN);
std::unique_ptr<char[]> multisector_buf = std::make_unique<char[]>(ROUND_UP(max_node_len, defaults::SECTOR_LEN));
std::unique_ptr<char[]> node_buf = std::make_unique<char[]>(max_node_len);
uint32_t &nnbrs = *(uint32_t *)(node_buf.get() + ndims_64 * sizeof(T));
uint32_t *nhood_buf = (uint32_t *)(node_buf.get() + (ndims_64 * sizeof(T)) + sizeof(uint32_t));

// number of sectors (1 for meta data)
uint64_t n_sectors = ROUND_UP(npts_64, nnodes_per_sector) / nnodes_per_sector;
uint64_t n_sectors = nnodes_per_sector > 0 ? ROUND_UP(npts_64, nnodes_per_sector) / nnodes_per_sector
: npts_64 * DIV_ROUND_UP(max_node_len, defaults::SECTOR_LEN);
uint64_t n_reorder_sectors = 0;
uint64_t n_data_nodes_per_sector = 0;

Expand Down Expand Up @@ -941,15 +943,68 @@ void create_disk_layout(const std::string base_file, const std::string mem_index
std::unique_ptr<T[]> cur_node_coords = std::make_unique<T[]>(ndims_64);
diskann::cout << "# sectors: " << n_sectors << std::endl;
uint64_t cur_node_id = 0;
for (uint64_t sector = 0; sector < n_sectors; sector++)
{
if (sector % 100000 == 0)

if (nnodes_per_sector > 0)
{ // Write multiple nodes per sector
for (uint64_t sector = 0; sector < n_sectors; sector++)
{
diskann::cout << "Sector #" << sector << "written" << std::endl;
if (sector % 100000 == 0)
{
diskann::cout << "Sector #" << sector << "written" << std::endl;
}
memset(sector_buf.get(), 0, defaults::SECTOR_LEN);
for (uint64_t sector_node_id = 0; sector_node_id < nnodes_per_sector && cur_node_id < npts_64;
sector_node_id++)
{
memset(node_buf.get(), 0, max_node_len);
// read cur node's nnbrs
vamana_reader.read((char *)&nnbrs, sizeof(uint32_t));

// sanity checks on nnbrs
assert(nnbrs > 0);
assert(nnbrs <= width_u32);

// read node's nhood
vamana_reader.read((char *)nhood_buf, (std::min)(nnbrs, width_u32) * sizeof(uint32_t));
if (nnbrs > width_u32)
{
vamana_reader.seekg((nnbrs - width_u32) * sizeof(uint32_t), vamana_reader.cur);
}

// write coords of node first
// T *node_coords = data + ((uint64_t) ndims_64 * cur_node_id);
base_reader.read((char *)cur_node_coords.get(), sizeof(T) * ndims_64);
memcpy(node_buf.get(), cur_node_coords.get(), ndims_64 * sizeof(T));

// write nnbrs
*(uint32_t *)(node_buf.get() + ndims_64 * sizeof(T)) = (std::min)(nnbrs, width_u32);

// write nhood next
memcpy(node_buf.get() + ndims_64 * sizeof(T) + sizeof(uint32_t), nhood_buf,
(std::min)(nnbrs, width_u32) * sizeof(uint32_t));

// get offset into sector_buf
char *sector_node_buf = sector_buf.get() + (sector_node_id * max_node_len);

// copy node buf into sector_node_buf
memcpy(sector_node_buf, node_buf.get(), max_node_len);
cur_node_id++;
}
// flush sector to disk
diskann_writer.write(sector_buf.get(), defaults::SECTOR_LEN);
}
memset(sector_buf.get(), 0, defaults::SECTOR_LEN);
for (uint64_t sector_node_id = 0; sector_node_id < nnodes_per_sector && cur_node_id < npts_64; sector_node_id++)
}
else
{ // Write multi-sector nodes
uint64_t nsectors_per_node = DIV_ROUND_UP(max_node_len, defaults::SECTOR_LEN);
PhilipBAdams marked this conversation as resolved.
Show resolved Hide resolved
for (uint64_t i = 0; i < npts_64; i++)
{
if ((i * nsectors_per_node) % 100000 == 0)
{
diskann::cout << "Sector #" << i * nsectors_per_node << "written" << std::endl;
}
memset(multisector_buf.get(), 0, nsectors_per_node * defaults::SECTOR_LEN);

memset(node_buf.get(), 0, max_node_len);
// read cur node's nnbrs
vamana_reader.read((char *)&nnbrs, sizeof(uint32_t));
Expand All @@ -968,25 +1023,20 @@ void create_disk_layout(const std::string base_file, const std::string mem_index
// write coords of node first
// T *node_coords = data + ((uint64_t) ndims_64 * cur_node_id);
base_reader.read((char *)cur_node_coords.get(), sizeof(T) * ndims_64);
memcpy(node_buf.get(), cur_node_coords.get(), ndims_64 * sizeof(T));
memcpy(multisector_buf.get(), cur_node_coords.get(), ndims_64 * sizeof(T));

// write nnbrs
*(uint32_t *)(node_buf.get() + ndims_64 * sizeof(T)) = (std::min)(nnbrs, width_u32);
*(uint32_t *)(multisector_buf.get() + ndims_64 * sizeof(T)) = (std::min)(nnbrs, width_u32);

// write nhood next
memcpy(node_buf.get() + ndims_64 * sizeof(T) + sizeof(uint32_t), nhood_buf,
memcpy(multisector_buf.get() + ndims_64 * sizeof(T) + sizeof(uint32_t), nhood_buf,
(std::min)(nnbrs, width_u32) * sizeof(uint32_t));

// get offset into sector_buf
char *sector_node_buf = sector_buf.get() + (sector_node_id * max_node_len);

// copy node buf into sector_node_buf
memcpy(sector_node_buf, node_buf.get(), max_node_len);
cur_node_id++;
// flush sector to disk
diskann_writer.write(multisector_buf.get(), nsectors_per_node * defaults::SECTOR_LEN);
}
// flush sector to disk
diskann_writer.write(sector_buf.get(), defaults::SECTOR_LEN);
}

if (append_reorder_data)
{
diskann::cout << "Index written. Appending reorder data..." << std::endl;
Expand Down
Loading