Skip to content

Commit

Permalink
feat: add Vector DB config for Vertex RAG (Weaviate + FeatureStore)
Browse files Browse the repository at this point in the history
feat: add UpdateRagCorpus API for Vertex RAG
feat: add ApiKeyConfig field to ApiAuth
docs: A comment for field `vertex_prediction_endpoint` in message `.google.cloud.aiplatform.v1beta1.RagEmbeddingModelConfig` is changed
docs: A comment for field `distance` in message `.google.cloud.aiplatform.v1beta1.RagContexts` is changed

PiperOrigin-RevId: 669362408
  • Loading branch information
Google APIs authored and copybara-github committed Aug 30, 2024
1 parent 7310c46 commit 085ad08
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 1 deletion.
6 changes: 6 additions & 0 deletions google/cloud/aiplatform/v1beta1/api_auth.proto
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,10 @@ message ApiAuth {
}
];
}

// The auth config.
oneof auth_config {
// The API secret.
ApiKeyConfig api_key_config = 1;
}
}
144 changes: 144 additions & 0 deletions google/cloud/aiplatform/v1beta1/vertex_rag_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package google.cloud.aiplatform.v1beta1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1beta1/api_auth.proto";
import "google/cloud/aiplatform/v1beta1/io.proto";
import "google/protobuf/timestamp.proto";

Expand Down Expand Up @@ -61,16 +62,147 @@ message RagEmbeddingModelConfig {
string model_version_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Configuration for sparse emebdding generation.
message SparseEmbeddingConfig {
// Message for BM25 parameters.
message Bm25 {
// Optional. Use multilingual tokenizer if set to true.
bool multilingual = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The parameter to control term frequency saturation. It
// determines the scaling between the matching term frequency and final
// score. k1 is in the range of [1.2, 3]. The default value is 1.2.
optional float k1 = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The parameter to control document length normalization. It
// determines how much the document length affects the final score. b is
// in the range of [0, 1]. The default value is 0.75.
optional float b = 3 [(google.api.field_behavior) = OPTIONAL];
}

// The model to use for sparse embedding generation.
oneof model {
// Use BM25 scoring algorithm.
Bm25 bm25 = 1;
}
}

// Config for hybrid search.
message HybridSearchConfig {
// Optional. The configuration for sparse embedding generation. This field
// is optional the default behavior depends on the vector database choice on
// the RagCorpus.
SparseEmbeddingConfig sparse_embedding_config = 1
[(google.api.field_behavior) = OPTIONAL];

// Required. The Vertex AI Prediction Endpoint that hosts the embedding
// model for dense embedding generations.
VertexPredictionEndpoint dense_embedding_model_prediction_endpoint = 2
[(google.api.field_behavior) = REQUIRED];
}

// The model config to use.
oneof model_config {
// The Vertex AI Prediction Endpoint that either refers to a publisher model
// or an endpoint that is hosting a 1P fine-tuned text embedding model.
// Endpoints hosting non-1P fine-tuned text embedding models are
// currently not supported.
// This is used for dense vector search.
VertexPredictionEndpoint vertex_prediction_endpoint = 1;

// Configuration for hybrid search.
HybridSearchConfig hybrid_search_config = 2;
}
}

// Config for the Vector DB to use for RAG.
message RagVectorDbConfig {
// The config for the default RAG-managed Vector DB.
message RagManagedDb {}

// The config for the Weaviate.
message Weaviate {
// Weaviate DB instance HTTP endpoint. e.g. 34.56.78.90:8080
// Vertex RAG only supports HTTP connection to Weaviate.
// This value cannot be changed after it's set.
string http_endpoint = 1;

// The corresponding collection this corpus maps to.
// This value cannot be changed after it's set.
string collection_name = 2;
}

// The config for the Vertex Feature Store.
message VertexFeatureStore {
// The resource name of the FeatureView.
// Format:
// `projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}`
string feature_view_resource_name = 1;
}

// The config for the Vector DB.
oneof vector_db {
// The config for the RAG-managed Vector DB.
RagManagedDb rag_managed_db = 1;

// The config for the Weaviate.
Weaviate weaviate = 2;

// The config for the Vertex Feature Store.
VertexFeatureStore vertex_feature_store = 4;
}

// Authentication config for the chosen Vector DB.
ApiAuth api_auth = 5;
}

// RagFile status.
message FileStatus {
// RagFile state.
enum State {
// RagFile state is unspecified.
STATE_UNSPECIFIED = 0;

// RagFile resource has been created and indexed successfully.
ACTIVE = 1;

// RagFile resource is in a problematic state.
// See `error_message` field for details.
ERROR = 2;
}

// Output only. RagFile state.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Only when the `state` field is ERROR.
string error_status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// RagCorpus status.
message CorpusStatus {
// RagCorpus life state.
enum State {
// This state is not supposed to happen.
UNKNOWN = 0;

// RagCorpus resource entry is initialized, but hasn't done validation.
INITIALIZED = 1;

// RagCorpus is provisioned successfully and is ready to serve.
ACTIVE = 2;

// RagCorpus is in a problematic situation.
// See `error_message` field for details.
ERROR = 3;
}

// Output only. RagCorpus life state.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Only when the `state` field is ERROR.
string error_status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A RagCorpus is a RagFile container and a project can have multiple
// RagCorpora.
message RagCorpus {
Expand Down Expand Up @@ -98,13 +230,22 @@ message RagCorpus {
(google.api.field_behavior) = IMMUTABLE
];

// Optional. Immutable. The Vector DB config of the RagCorpus.
RagVectorDbConfig rag_vector_db_config = 7 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = IMMUTABLE
];

// Output only. Timestamp when this RagCorpus was created.
google.protobuf.Timestamp create_time = 4
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Timestamp when this RagCorpus was last updated.
google.protobuf.Timestamp update_time = 5
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. RagCorpus state.
CorpusStatus corpus_status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A RagFile contains user data for chunking, embedding and indexing.
Expand Down Expand Up @@ -176,6 +317,9 @@ message RagFile {
// Output only. Timestamp when this RagFile was last updated.
google.protobuf.Timestamp update_time = 7
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. State of the RagFile.
FileStatus file_status = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Specifies the size and overlap of chunks for RagFiles.
Expand Down
29 changes: 29 additions & 0 deletions google/cloud/aiplatform/v1beta1/vertex_rag_data_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ service VertexRagDataService {
};
}

// Updates a RagCorpus.
rpc UpdateRagCorpus(UpdateRagCorpusRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
patch: "/v1beta1/{rag_corpus.name=projects/*/locations/*/ragCorpora/*}"
body: "rag_corpus"
};
option (google.api.method_signature) = "rag_corpus";
option (google.longrunning.operation_info) = {
response_type: "RagCorpus"
metadata_type: "UpdateRagCorpusOperationMetadata"
};
}

// Gets a RagCorpus.
rpc GetRagCorpus(GetRagCorpusRequest) returns (RagCorpus) {
option (google.api.http) = {
Expand Down Expand Up @@ -248,6 +262,7 @@ message UploadRagFileRequest {
// Response message for
// [VertexRagDataService.UploadRagFile][google.cloud.aiplatform.v1beta1.VertexRagDataService.UploadRagFile].
message UploadRagFileResponse {
// The result of the upload.
oneof result {
// The RagFile that had been uploaded into the RagCorpus.
RagFile rag_file = 1;
Expand Down Expand Up @@ -363,6 +378,20 @@ message CreateRagCorpusOperationMetadata {
GenericOperationMetadata generic_metadata = 1;
}

// Request message for
// [VertexRagDataService.UpdateRagCorpus][google.cloud.aiplatform.v1beta1.VertexRagDataService.UpdateRagCorpus].
message UpdateRagCorpusRequest {
// Required. The RagCorpus which replaces the resource on the server.
RagCorpus rag_corpus = 1 [(google.api.field_behavior) = REQUIRED];
}

// Runtime operation information for
// [VertexRagDataService.UpdateRagCorpus][google.cloud.aiplatform.v1beta1.VertexRagDataService.UpdateRagCorpus].
message UpdateRagCorpusOperationMetadata {
// The operation generic information.
GenericOperationMetadata generic_metadata = 1;
}

// Runtime operation information for
// [VertexRagDataService.ImportRagFiles][google.cloud.aiplatform.v1beta1.VertexRagDataService.ImportRagFiles].
message ImportRagFilesOperationMetadata {
Expand Down
19 changes: 18 additions & 1 deletion google/cloud/aiplatform/v1beta1/vertex_rag_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ service VertexRagService {

// A query to retrieve relevant contexts.
message RagQuery {
// Configurations for hybrid search results ranking.
message Ranking {
// Optional. Alpha value controls the weight between dense and sparse vector
// search results. The range is [0, 1], while 0 means sparse vector search
// only and 1 means dense vector search only. The default value is 0.5 which
// balances sparse and dense vector search equally.
optional float alpha = 1 [(google.api.field_behavior) = OPTIONAL];
}

// The query to retrieve contexts.
// Currently only text query is supported.
oneof query {
Expand All @@ -57,6 +66,9 @@ message RagQuery {

// Optional. The number of contexts to retrieve.
int32 similarity_top_k = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. Configurations for hybrid search results ranking.
Ranking ranking = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Request message for
Expand Down Expand Up @@ -132,8 +144,13 @@ message RagContexts {
// The text chunk.
string text = 2;

// The distance between the query vector and the context text vector.
// The distance between the query dense embedding vector and the context
// text vector.
double distance = 3;

// The distance between the query sparse embedding vector and the context
// text vector.
double sparse_distance = 4;
}

// All its contexts.
Expand Down

0 comments on commit 085ad08

Please sign in to comment.