feat: add Vector DB config for Vertex RAG (Weaviate + FeatureStore)

feat: add UpdateRagCorpus API for Vertex RAG feat: add ApiKeyConfig field to ApiAuth docs: A comment for field `vertex_prediction_endpoint` in message `.google.cloud.aiplatform.v1beta1.RagEmbeddingModelConfig` is changed docs: A comment for field `distance` in message `.google.cloud.aiplatform.v1beta1.RagContexts` is changed PiperOrigin-RevId: 669362408
googleapis · Aug 30, 2024 · 085ad08 · 085ad08
1 parent 7310c46
commit 085ad08
Show file tree

Hide file tree

Showing 4 changed files with 197 additions and 1 deletion.
diff --git a/google/cloud/aiplatform/v1beta1/api_auth.proto b/google/cloud/aiplatform/v1beta1/api_auth.proto
@@ -40,4 +40,10 @@ message ApiAuth {
       }
     ];
   }
+
+  // The auth config.
+  oneof auth_config {
+    // The API secret.
+    ApiKeyConfig api_key_config = 1;
+  }
 }
diff --git a/google/cloud/aiplatform/v1beta1/vertex_rag_data.proto b/google/cloud/aiplatform/v1beta1/vertex_rag_data.proto
@@ -18,6 +18,7 @@ package google.cloud.aiplatform.v1beta1;
 
 import "google/api/field_behavior.proto";
 import "google/api/resource.proto";
+import "google/cloud/aiplatform/v1beta1/api_auth.proto";
 import "google/cloud/aiplatform/v1beta1/io.proto";
 import "google/protobuf/timestamp.proto";
 
@@ -61,16 +62,147 @@ message RagEmbeddingModelConfig {
     string model_version_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
   }
 
+  // Configuration for sparse emebdding generation.
+  message SparseEmbeddingConfig {
+    // Message for BM25 parameters.
+    message Bm25 {
+      // Optional. Use multilingual tokenizer if set to true.
+      bool multilingual = 1 [(google.api.field_behavior) = OPTIONAL];
+
+      // Optional. The parameter to control term frequency saturation. It
+      // determines the scaling between the matching term frequency and final
+      // score. k1 is in the range of [1.2, 3]. The default value is 1.2.
+      optional float k1 = 2 [(google.api.field_behavior) = OPTIONAL];
+
+      // Optional. The parameter to control document length normalization. It
+      // determines how much the document length affects the final score. b is
+      // in the range of [0, 1]. The default value is 0.75.
+      optional float b = 3 [(google.api.field_behavior) = OPTIONAL];
+    }
+
+    // The model to use for sparse embedding generation.
+    oneof model {
+      // Use BM25 scoring algorithm.
+      Bm25 bm25 = 1;
+    }
+  }
+
+  // Config for hybrid search.
+  message HybridSearchConfig {
+    // Optional. The configuration for sparse embedding generation. This field
+    // is optional the default behavior depends on the vector database choice on
+    // the RagCorpus.
+    SparseEmbeddingConfig sparse_embedding_config = 1
+        [(google.api.field_behavior) = OPTIONAL];
+
+    // Required. The Vertex AI Prediction Endpoint that hosts the embedding
+    // model for dense embedding generations.
+    VertexPredictionEndpoint dense_embedding_model_prediction_endpoint = 2
+        [(google.api.field_behavior) = REQUIRED];
+  }
+
   // The model config to use.
   oneof model_config {
     // The Vertex AI Prediction Endpoint that either refers to a publisher model
     // or an endpoint that is hosting a 1P fine-tuned text embedding model.
     // Endpoints hosting non-1P fine-tuned text embedding models are
     // currently not supported.
+    // This is used for dense vector search.
     VertexPredictionEndpoint vertex_prediction_endpoint = 1;
+
+    // Configuration for hybrid search.
+    HybridSearchConfig hybrid_search_config = 2;
   }
 }
 
+// Config for the Vector DB to use for RAG.
+message RagVectorDbConfig {
+  // The config for the default RAG-managed Vector DB.
+  message RagManagedDb {}
+
+  // The config for the Weaviate.
+  message Weaviate {
+    // Weaviate DB instance HTTP endpoint. e.g. 34.56.78.90:8080
+    // Vertex RAG only supports HTTP connection to Weaviate.
+    // This value cannot be changed after it's set.
+    string http_endpoint = 1;
+
+    // The corresponding collection this corpus maps to.
+    // This value cannot be changed after it's set.
+    string collection_name = 2;
+  }
+
+  // The config for the Vertex Feature Store.
+  message VertexFeatureStore {
+    // The resource name of the FeatureView.
+    // Format:
+    // `projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}`
+    string feature_view_resource_name = 1;
+  }
+
+  // The config for the Vector DB.
+  oneof vector_db {
+    // The config for the RAG-managed Vector DB.
+    RagManagedDb rag_managed_db = 1;
+
+    // The config for the Weaviate.
+    Weaviate weaviate = 2;
+
+    // The config for the Vertex Feature Store.
+    VertexFeatureStore vertex_feature_store = 4;
+  }
+
+  // Authentication config for the chosen Vector DB.
+  ApiAuth api_auth = 5;
+}
+
+// RagFile status.
+message FileStatus {
+  // RagFile state.
+  enum State {
+    // RagFile state is unspecified.
+    STATE_UNSPECIFIED = 0;
+
+    // RagFile resource has been created and indexed successfully.
+    ACTIVE = 1;
+
+    // RagFile resource is in a problematic state.
+    // See `error_message` field for details.
+    ERROR = 2;
+  }
+
+  // Output only. RagFile state.
+  State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Only when the `state` field is ERROR.
+  string error_status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// RagCorpus status.
+message CorpusStatus {
+  // RagCorpus life state.
+  enum State {
+    // This state is not supposed to happen.
+    UNKNOWN = 0;
+
+    // RagCorpus resource entry is initialized, but hasn't done validation.
+    INITIALIZED = 1;
+
+    // RagCorpus is provisioned successfully and is ready to serve.
+    ACTIVE = 2;
+
+    // RagCorpus is in a problematic situation.
+    // See `error_message` field for details.
+    ERROR = 3;
+  }
+
+  // Output only. RagCorpus life state.
+  State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Only when the `state` field is ERROR.
+  string error_status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
 // A RagCorpus is a RagFile container and a project can have multiple
 // RagCorpora.
 message RagCorpus {
@@ -98,13 +230,22 @@ message RagCorpus {
     (google.api.field_behavior) = IMMUTABLE
   ];
 
+  // Optional. Immutable. The Vector DB config of the RagCorpus.
+  RagVectorDbConfig rag_vector_db_config = 7 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE
+  ];
+
   // Output only. Timestamp when this RagCorpus was created.
   google.protobuf.Timestamp create_time = 4
       [(google.api.field_behavior) = OUTPUT_ONLY];
 
   // Output only. Timestamp when this RagCorpus was last updated.
   google.protobuf.Timestamp update_time = 5
       [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. RagCorpus state.
+  CorpusStatus corpus_status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // A RagFile contains user data for chunking, embedding and indexing.
@@ -176,6 +317,9 @@ message RagFile {
   // Output only. Timestamp when this RagFile was last updated.
   google.protobuf.Timestamp update_time = 7
       [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. State of the RagFile.
+  FileStatus file_status = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Specifies the size and overlap of chunks for RagFiles.

diff --git a/google/cloud/aiplatform/v1beta1/vertex_rag_data_service.proto b/google/cloud/aiplatform/v1beta1/vertex_rag_data_service.proto
@@ -54,6 +54,20 @@ service VertexRagDataService {
     };
   }
 
+  // Updates a RagCorpus.
+  rpc UpdateRagCorpus(UpdateRagCorpusRequest)
+      returns (google.longrunning.Operation) {
+    option (google.api.http) = {
+      patch: "/v1beta1/{rag_corpus.name=projects/*/locations/*/ragCorpora/*}"
+      body: "rag_corpus"
+    };
+    option (google.api.method_signature) = "rag_corpus";
+    option (google.longrunning.operation_info) = {
+      response_type: "RagCorpus"
+      metadata_type: "UpdateRagCorpusOperationMetadata"
+    };
+  }
+
   // Gets a RagCorpus.
   rpc GetRagCorpus(GetRagCorpusRequest) returns (RagCorpus) {
     option (google.api.http) = {
@@ -248,6 +262,7 @@ message UploadRagFileRequest {
 // Response message for
 // [VertexRagDataService.UploadRagFile][google.cloud.aiplatform.v1beta1.VertexRagDataService.UploadRagFile].
 message UploadRagFileResponse {
+  // The result of the upload.
   oneof result {
     // The RagFile that had been uploaded into the RagCorpus.
     RagFile rag_file = 1;
@@ -363,6 +378,20 @@ message CreateRagCorpusOperationMetadata {
   GenericOperationMetadata generic_metadata = 1;
 }
 
+// Request message for
+// [VertexRagDataService.UpdateRagCorpus][google.cloud.aiplatform.v1beta1.VertexRagDataService.UpdateRagCorpus].
+message UpdateRagCorpusRequest {
+  // Required. The RagCorpus which replaces the resource on the server.
+  RagCorpus rag_corpus = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Runtime operation information for
+// [VertexRagDataService.UpdateRagCorpus][google.cloud.aiplatform.v1beta1.VertexRagDataService.UpdateRagCorpus].
+message UpdateRagCorpusOperationMetadata {
+  // The operation generic information.
+  GenericOperationMetadata generic_metadata = 1;
+}
+
 // Runtime operation information for
 // [VertexRagDataService.ImportRagFiles][google.cloud.aiplatform.v1beta1.VertexRagDataService.ImportRagFiles].
 message ImportRagFilesOperationMetadata {

diff --git a/google/cloud/aiplatform/v1beta1/vertex_rag_service.proto b/google/cloud/aiplatform/v1beta1/vertex_rag_service.proto
@@ -48,6 +48,15 @@ service VertexRagService {
 
 // A query to retrieve relevant contexts.
 message RagQuery {
+  // Configurations for hybrid search results ranking.
+  message Ranking {
+    // Optional. Alpha value controls the weight between dense and sparse vector
+    // search results. The range is [0, 1], while 0 means sparse vector search
+    // only and 1 means dense vector search only. The default value is 0.5 which
+    // balances sparse and dense vector search equally.
+    optional float alpha = 1 [(google.api.field_behavior) = OPTIONAL];
+  }
+
   // The query to retrieve contexts.
   // Currently only text query is supported.
   oneof query {
@@ -57,6 +66,9 @@ message RagQuery {
 
   // Optional. The number of contexts to retrieve.
   int32 similarity_top_k = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Configurations for hybrid search results ranking.
+  Ranking ranking = 4 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Request message for
@@ -132,8 +144,13 @@ message RagContexts {
     // The text chunk.
     string text = 2;
 
-    // The distance between the query vector and the context text vector.
+    // The distance between the query dense embedding vector and the context
+    // text vector.
     double distance = 3;
+
+    // The distance between the query sparse embedding vector and the context
+    // text vector.
+    double sparse_distance = 4;
   }
 
   // All its contexts.