Skip to content

Commit 64bae92

Browse files
0ctopus13primeDooyong Kim
and
Dooyong Kim
authored
Introduced writing layer, getting rid of writing logic that uses an absolute path in the filesystem. (opensearch-project#2241)
Signed-off-by: Dooyong Kim <[email protected]> Co-authored-by: Dooyong Kim <[email protected]>
1 parent a029fa8 commit 64bae92

File tree

53 files changed

+3142
-1930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3142
-1930
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1717
## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.18...2.x)
1818
### Features
1919
### Enhancements
20+
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
2021
### Bug Fixes
2122
### Infrastructure
2223
### Documentation

jni/cmake/init-nmslib.cmake

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ if(NOT DEFINED APPLY_LIB_PATCHES OR "${APPLY_LIB_PATCHES}" STREQUAL true)
1919
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0001-Initialize-maxlevel-during-add-from-enterpoint-level.patch")
2020
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0002-Adds-ability-to-pass-ef-parameter-in-the-query-for-h.patch")
2121
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0003-Added-streaming-apis-for-vector-index-loading-in-Hnsw.patch")
22+
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0004-Added-a-new-save-apis-in-Hnsw-with-streaming-interfa.patch")
2223

2324
# Get patch id of the last commit
2425
execute_process(COMMAND sh -c "git --no-pager show HEAD | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT_FROM_COMMIT WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib)

jni/include/commons.h

+6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
* Modifications Copyright OpenSearch Contributors. See
99
* GitHub history for details.
1010
*/
11+
12+
#ifndef OPENSEARCH_KNN_COMMONS_H
13+
#define OPENSEARCH_KNN_COMMONS_H
14+
1115
#include "jni_util.h"
1216
#include <jni.h>
1317
namespace knn_jni {
@@ -99,3 +103,5 @@ namespace knn_jni {
99103
int getIntegerMethodParameter(JNIEnv *, knn_jni::JNIUtilInterface *, std::unordered_map<std::string, jobject>, std::string, int);
100104
}
101105
}
106+
107+
#endif

jni/include/faiss_index_service.h

+38-25
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616

1717
#include <jni.h>
1818
#include "faiss/MetricType.h"
19+
#include "faiss/impl/io.h"
1920
#include "jni_util.h"
2021
#include "faiss_methods.h"
22+
#include "faiss_stream_support.h"
2123
#include <memory>
2224

2325
namespace knn_jni {
@@ -30,7 +32,8 @@ namespace faiss_wrapper {
3032
*/
3133
class IndexService {
3234
public:
33-
IndexService(std::unique_ptr<FaissMethods> faissMethods);
35+
explicit IndexService(std::unique_ptr<FaissMethods> faissMethods);
36+
3437
/**
3538
* Initialize index
3639
*
@@ -45,6 +48,7 @@ class IndexService {
4548
* @return memory address of the native index object
4649
*/
4750
virtual jlong initIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, faiss::MetricType metric, std::string indexDescription, int dim, int numVectors, int threadCount, std::unordered_map<std::string, jobject> parameters);
51+
4852
/**
4953
* Add vectors to index
5054
*
@@ -55,29 +59,34 @@ class IndexService {
5559
* @param idMapAddress memory address of the native index object
5660
*/
5761
virtual void insertToIndex(int dim, int numIds, int threadCount, int64_t vectorsAddress, std::vector<int64_t> &ids, jlong idMapAddress);
62+
5863
/**
5964
* Write index to disk
6065
*
61-
* @param threadCount number of thread count to be used while adding data
62-
* @param indexPath path to write index
63-
* @param idMap memory address of the native index object
66+
* @param writer IOWriter implementation doing IO processing.
67+
* In most cases, it is expected to have underlying Lucene's IndexOuptut.
68+
* @param idMapAddress memory address of the native index object
6469
*/
65-
virtual void writeIndex(std::string indexPath, jlong idMapAddress);
70+
virtual void writeIndex(faiss::IOWriter* writer, jlong idMapAddress);
71+
6672
virtual ~IndexService() = default;
73+
6774
protected:
6875
virtual void allocIndex(faiss::Index * index, size_t dim, size_t numVectors);
76+
6977
std::unique_ptr<FaissMethods> faissMethods;
70-
};
78+
}; // class IndexService
7179

7280
/**
7381
* A class to provide operations on index
7482
* This class should evolve to have only cpp object but not jni object
7583
*/
76-
class BinaryIndexService : public IndexService {
84+
class BinaryIndexService final : public IndexService {
7785
public:
7886
//TODO Remove dependency on JNIUtilInterface and JNIEnv
7987
//TODO Reduce the number of parameters
80-
BinaryIndexService(std::unique_ptr<FaissMethods> faissMethods);
88+
explicit BinaryIndexService(std::unique_ptr<FaissMethods> faissMethods);
89+
8190
/**
8291
* Initialize index
8392
*
@@ -91,7 +100,8 @@ class BinaryIndexService : public IndexService {
91100
* @param parameters parameters to be applied to faiss index
92101
* @return memory address of the native index object
93102
*/
94-
virtual jlong initIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, faiss::MetricType metric, std::string indexDescription, int dim, int numVectors, int threadCount, std::unordered_map<std::string, jobject> parameters) override;
103+
jlong initIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, faiss::MetricType metric, std::string indexDescription, int dim, int numVectors, int threadCount, std::unordered_map<std::string, jobject> parameters) final;
104+
95105
/**
96106
* Add vectors to index
97107
*
@@ -106,7 +116,8 @@ class BinaryIndexService : public IndexService {
106116
* @param idMap a map of document id and vector id
107117
* @param parameters parameters to be applied to faiss index
108118
*/
109-
virtual void insertToIndex(int dim, int numIds, int threadCount, int64_t vectorsAddress, std::vector<int64_t> &ids, jlong idMapAddress) override;
119+
void insertToIndex(int dim, int numIds, int threadCount, int64_t vectorsAddress, std::vector<int64_t> &ids, jlong idMapAddress) final;
120+
110121
/**
111122
* Write index to disk
112123
*
@@ -119,23 +130,23 @@ class BinaryIndexService : public IndexService {
119130
* @param idMap a map of document id and vector id
120131
* @param parameters parameters to be applied to faiss index
121132
*/
122-
virtual void writeIndex(std::string indexPath, jlong idMapAddress) override;
123-
virtual ~BinaryIndexService() = default;
133+
void writeIndex(faiss::IOWriter* writer, jlong idMapAddress) final;
134+
124135
protected:
125-
virtual void allocIndex(faiss::Index * index, size_t dim, size_t numVectors) override;
126-
};
136+
void allocIndex(faiss::Index * index, size_t dim, size_t numVectors) final;
137+
}; // class BinaryIndexService
127138

128139
/**
129140
* A class to provide operations on index
130141
* This class should evolve to have only cpp object but not jni object
131142
*/
132-
class ByteIndexService : public IndexService {
143+
class ByteIndexService final : public IndexService {
133144
public:
134145
//TODO Remove dependency on JNIUtilInterface and JNIEnv
135146
//TODO Reduce the number of parameters
136-
ByteIndexService(std::unique_ptr<FaissMethods> faissMethods);
147+
explicit ByteIndexService(std::unique_ptr<FaissMethods> faissMethods);
137148

138-
/**
149+
/**
139150
* Initialize index
140151
*
141152
* @param jniUtil jni util
@@ -148,7 +159,8 @@ class ByteIndexService : public IndexService {
148159
* @param parameters parameters to be applied to faiss index
149160
* @return memory address of the native index object
150161
*/
151-
virtual jlong initIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, faiss::MetricType metric, std::string indexDescription, int dim, int numVectors, int threadCount, std::unordered_map<std::string, jobject> parameters) override;
162+
jlong initIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, faiss::MetricType metric, std::string indexDescription, int dim, int numVectors, int threadCount, std::unordered_map<std::string, jobject> parameters) final;
163+
152164
/**
153165
* Add vectors to index
154166
*
@@ -163,7 +175,8 @@ class ByteIndexService : public IndexService {
163175
* @param idMap a map of document id and vector id
164176
* @param parameters parameters to be applied to faiss index
165177
*/
166-
virtual void insertToIndex(int dim, int numIds, int threadCount, int64_t vectorsAddress, std::vector<int64_t> &ids, jlong idMapAddress) override;
178+
void insertToIndex(int dim, int numIds, int threadCount, int64_t vectorsAddress, std::vector<int64_t> &ids, jlong idMapAddress) final;
179+
167180
/**
168181
* Write index to disk
169182
*
@@ -176,14 +189,14 @@ class ByteIndexService : public IndexService {
176189
* @param idMap a map of document id and vector id
177190
* @param parameters parameters to be applied to faiss index
178191
*/
179-
virtual void writeIndex(std::string indexPath, jlong idMapAddress) override;
180-
virtual ~ByteIndexService() = default;
181-
protected:
182-
virtual void allocIndex(faiss::Index * index, size_t dim, size_t numVectors) override;
183-
};
192+
void writeIndex(faiss::IOWriter* writer, jlong idMapAddress) final;
193+
194+
protected:
195+
void allocIndex(faiss::Index * index, size_t dim, size_t numVectors) final;
196+
}; // class ByteIndexService
184197

185198
}
186199
}
187200

188201

189-
#endif //OPENSEARCH_KNN_FAISS_INDEX_SERVICE_H
202+
#endif //OPENSEARCH_KNN_FAISS_INDEX_SERVICE_H

jni/include/faiss_methods.h

+11-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef OPENSEARCH_KNN_FAISS_METHODS_H
1111
#define OPENSEARCH_KNN_FAISS_METHODS_H
1212

13+
#include "faiss/impl/io.h"
1314
#include "faiss/Index.h"
1415
#include "faiss/IndexBinary.h"
1516
#include "faiss/IndexIDMap.h"
@@ -26,14 +27,21 @@ namespace faiss_wrapper {
2627
class FaissMethods {
2728
public:
2829
FaissMethods() = default;
30+
2931
virtual faiss::Index* indexFactory(int d, const char* description, faiss::MetricType metric);
32+
3033
virtual faiss::IndexBinary* indexBinaryFactory(int d, const char* description);
34+
3135
virtual faiss::IndexIDMapTemplate<faiss::Index>* indexIdMap(faiss::Index* index);
36+
3237
virtual faiss::IndexIDMapTemplate<faiss::IndexBinary>* indexBinaryIdMap(faiss::IndexBinary* index);
33-
virtual void writeIndex(const faiss::Index* idx, const char* fname);
34-
virtual void writeIndexBinary(const faiss::IndexBinary* idx, const char* fname);
38+
39+
virtual void writeIndex(const faiss::Index* idx, faiss::IOWriter* writer);
40+
41+
virtual void writeIndexBinary(const faiss::IndexBinary* idx, faiss::IOWriter* writer);
42+
3543
virtual ~FaissMethods() = default;
36-
};
44+
}; // class FaissMethods
3745

3846
} //namespace faiss_wrapper
3947
} //namespace knn_jni

jni/include/faiss_stream_support.h

+36-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "faiss/impl/io.h"
1616
#include "jni_util.h"
1717
#include "native_engines_stream_support.h"
18+
#include "parameter_utils.h"
1819

1920
#include <jni.h>
2021
#include <stdexcept>
@@ -34,7 +35,7 @@ class FaissOpenSearchIOReader final : public faiss::IOReader {
3435
public:
3536
explicit FaissOpenSearchIOReader(NativeEngineIndexInputMediator *_mediator)
3637
: faiss::IOReader(),
37-
mediator(_mediator) {
38+
mediator(knn_jni::util::ParameterCheck::require_non_null(_mediator, "mediator")) {
3839
name = "FaissOpenSearchIOReader";
3940
}
4041

@@ -56,6 +57,40 @@ class FaissOpenSearchIOReader final : public faiss::IOReader {
5657
}; // class FaissOpenSearchIOReader
5758

5859

60+
/**
61+
* A glue component inheriting IOWriter to delegate IO processing down to the given
62+
* mediator. The mediator is expected to do write bytes via the provided Lucene's IndexOutput.
63+
*/
64+
class FaissOpenSearchIOWriter final : public faiss::IOWriter {
65+
public:
66+
explicit FaissOpenSearchIOWriter(NativeEngineIndexOutputMediator *_mediator)
67+
: faiss::IOWriter(),
68+
mediator(knn_jni::util::ParameterCheck::require_non_null(_mediator, "mediator")) {
69+
name = "FaissOpenSearchIOWriter";
70+
}
71+
72+
size_t operator()(const void *ptr, size_t size, size_t nitems) final {
73+
const auto writeBytes = size * nitems;
74+
if (writeBytes > 0) {
75+
mediator->writeBytes(reinterpret_cast<const uint8_t *>(ptr), writeBytes);
76+
}
77+
return nitems;
78+
}
79+
80+
// return a file number that can be memory-mapped
81+
int filedescriptor() final {
82+
throw std::runtime_error("filedescriptor() is not supported in FaissOpenSearchIOWriter.");
83+
}
84+
85+
void flush() {
86+
mediator->flush();
87+
}
88+
89+
private:
90+
NativeEngineIndexOutputMediator *mediator;
91+
}; // class FaissOpenSearchIOWriter
92+
93+
5994

6095
}
6196
}

jni/include/faiss_wrapper.h

+18-17
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "jni_util.h"
1616
#include "faiss_index_service.h"
17+
#include "faiss_stream_support.h"
1718
#include <jni.h>
1819

1920
namespace knn_jni {
@@ -22,25 +23,25 @@ namespace knn_jni {
2223

2324
void InsertToIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, jintArray idsJ, jlong vectorsAddressJ, jint dimJ, jlong indexAddr, jint threadCount, IndexService *indexService);
2425

25-
void WriteIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, jstring indexPathJ, jlong indexAddr, IndexService *indexService);
26+
void WriteIndex(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, jobject output, jlong indexAddr, IndexService *indexService);
2627

2728
// Create an index with ids and vectors. Instead of creating a new index, this function creates the index
2829
// based off of the template index passed in. The index is serialized to indexPathJ.
2930
void CreateIndexFromTemplate(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jintArray idsJ,
30-
jlong vectorsAddressJ, jint dimJ, jstring indexPathJ, jbyteArray templateIndexJ,
31+
jlong vectorsAddressJ, jint dimJ, jobject output, jbyteArray templateIndexJ,
3132
jobject parametersJ);
3233

3334
// Create an index with ids and vectors. Instead of creating a new index, this function creates the index
3435
// based off of the template index passed in. The index is serialized to indexPathJ.
3536
void CreateBinaryIndexFromTemplate(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jintArray idsJ,
36-
jlong vectorsAddressJ, jint dimJ, jstring indexPathJ, jbyteArray templateIndexJ,
37-
jobject parametersJ);
37+
jlong vectorsAddressJ, jint dimJ, jobject output, jbyteArray templateIndexJ,
38+
jobject parametersJ);
3839

3940
// Create a index with ids and byte vectors. Instead of creating a new index, this function creates the index
4041
// based off of the template index passed in. The index is serialized to indexPathJ.
4142
void CreateByteIndexFromTemplate(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jintArray idsJ,
42-
jlong vectorsAddressJ, jint dimJ, jstring indexPathJ, jbyteArray templateIndexJ,
43-
jobject parametersJ);
43+
jlong vectorsAddressJ, jint dimJ, jobject output, jbyteArray templateIndexJ,
44+
jobject parametersJ);
4445

4546
// Load an index from indexPathJ into memory.
4647
//
@@ -74,28 +75,28 @@ namespace knn_jni {
7475
// Sets the sharedIndexState for an index
7576
void SetSharedIndexState(jlong indexPointerJ, jlong shareIndexStatePointerJ);
7677

77-
/**
78+
/**
7879
* Execute a query against the index located in memory at indexPointerJ
79-
*
80+
*
8081
* Parameters:
8182
* methodParamsJ: introduces a map to have additional method parameters
82-
*
83+
*
8384
* Return an array of KNNQueryResults
84-
*/
85+
*/
8586
jobjectArray QueryIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jlong indexPointerJ,
8687
jfloatArray queryVectorJ, jint kJ, jobject methodParamsJ, jintArray parentIdsJ);
8788

8889
/**
8990
* Execute a query against the index located in memory at indexPointerJ along with Filters
90-
*
91+
*
9192
* Parameters:
9293
* methodParamsJ: introduces a map to have additional method parameters
93-
*
94+
*
9495
* Return an array of KNNQueryResults
9596
*/
9697
jobjectArray QueryIndex_WithFilter(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jlong indexPointerJ,
97-
jfloatArray queryVectorJ, jint kJ, jobject methodParamsJ, jlongArray filterIdsJ,
98-
jint filterIdsTypeJ, jintArray parentIdsJ);
98+
jfloatArray queryVectorJ, jint kJ, jobject methodParamsJ, jlongArray filterIdsJ,
99+
jint filterIdsTypeJ, jintArray parentIdsJ);
99100

100101
// Execute a query against the binary index located in memory at indexPointerJ along with Filters
101102
//
@@ -124,14 +125,14 @@ namespace knn_jni {
124125
//
125126
// Return the serialized representation
126127
jbyteArray TrainBinaryIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jobject parametersJ, jint dimension,
127-
jlong trainVectorsPointerJ);
128+
jlong trainVectorsPointerJ);
128129

129130
// Create an empty byte index defined by the values in the Java map, parametersJ. Train the index with
130131
// the byte vectors located at trainVectorsPointerJ.
131132
//
132133
// Return the serialized representation
133134
jbyteArray TrainByteIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jobject parametersJ, jint dimension,
134-
jlong trainVectorsPointerJ);
135+
jlong trainVectorsPointerJ);
135136

136137
/*
137138
* Perform a range search with filter against the index located in memory at indexPointerJ.
@@ -163,7 +164,7 @@ namespace knn_jni {
163164
* @return an array of RangeQueryResults
164165
*/
165166
jobjectArray RangeSearch(knn_jni::JNIUtilInterface *jniUtil, JNIEnv *env, jlong indexPointerJ, jfloatArray queryVectorJ,
166-
jfloat radiusJ, jobject methodParamsJ, jint maxResultWindowJ, jintArray parentIdsJ);
167+
jfloat radiusJ, jobject methodParamsJ, jint maxResultWindowJ, jintArray parentIdsJ);
167168
}
168169
}
169170

0 commit comments

Comments
 (0)