feat: Add support for dataproc BatchController service (#546)

PiperOrigin-RevId: 404333740 Source-Link: googleapis/googleapis@5088bd7 Source-Link: https://github.com/googleapis/googleapis-gen/commit/44b870783c1943771bcdec748ae4d5208c3d7f0e Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNDRiODcwNzgzYzE5NDM3NzFiY2RlYzc0OGFlNGQ1MjA4YzNkN2YwZSJ9
googleapis · Oct 19, 2021 · cfc0eeb · cfc0eeb
1 parent 1fc5068
commit cfc0eeb
Show file tree

Hide file tree

Showing 29 changed files with 18,208 additions and 8,792 deletions.
diff --git a/protos/google/cloud/dataproc/v1/batches.proto b/protos/google/cloud/dataproc/v1/batches.proto
@@ -0,0 +1,372 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dataproc.v1;
+
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/dataproc/v1/shared.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/timestamp.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
+option java_multiple_files = true;
+option java_outer_classname = "BatchesProto";
+option java_package = "com.google.cloud.dataproc.v1";
+
+// The BatchController provides methods to manage batch workloads.
+service BatchController {
+  option (google.api.default_host) = "dataproc.googleapis.com";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+
+  // Creates a batch workload that executes asynchronously.
+  rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
+    option (google.api.http) = {
+      post: "/v1/{parent=projects/*/locations/*}/batches"
+      body: "batch"
+    };
+    option (google.api.method_signature) = "parent,batch,batch_id";
+    option (google.longrunning.operation_info) = {
+      response_type: "Batch"
+      metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
+    };
+  }
+
+  // Gets the batch workload resource representation.
+  rpc GetBatch(GetBatchRequest) returns (Batch) {
+    option (google.api.http) = {
+      get: "/v1/{name=projects/*/locations/*/batches/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Lists batch workloads.
+  rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
+    option (google.api.http) = {
+      get: "/v1/{parent=projects/*/locations/*}/batches"
+    };
+    option (google.api.method_signature) = "parent";
+  }
+
+  // Deletes the batch workload resource. If the batch is not in terminal state,
+  // the delete fails and the response returns `FAILED_PRECONDITION`.
+  rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
+    option (google.api.http) = {
+      delete: "/v1/{name=projects/*/locations/*/batches/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+}
+
+// A request to create a batch workload.
+message CreateBatchRequest {
+  // Required. The parent resource where this batch will be created.
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      child_type: "dataproc.googleapis.com/Batch"
+    }
+  ];
+
+  // Required. The batch to create.
+  Batch batch = 2 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The ID to use for the batch, which will become the final component of
+  // the batch's resource name.
+  //
+  // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
+  string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A unique ID used to identify the request. If the service
+  // receives two
+  // [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
+  // with the same request_id, the second request is ignored and the
+  // Operation that corresponds to the first Batch created and stored
+  // in the backend is returned.
+  //
+  // Recommendation: Set this value to a
+  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
+  //
+  // The value must contain only letters (a-z, A-Z), numbers (0-9),
+  // underscores (_), and hyphens (-). The maximum length is 40 characters.
+  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// A request to get the resource representation for a batch workload.
+message GetBatchRequest {
+  // Required. The name of the batch to retrieve.
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataproc.googleapis.com/Batch"
+    }
+  ];
+}
+
+// A request to list batch workloads in a project.
+message ListBatchesRequest {
+  // Required. The parent, which owns this collection of batches.
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      child_type: "dataproc.googleapis.com/Batch"
+    }
+  ];
+
+  // Optional. The maximum number of batches to return in each response.
+  // The service may return fewer than this value.
+  // The default page size is 20; the maximum page size is 1000.
+  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A page token received from a previous `ListBatches` call.
+  // Provide this token to retrieve the subsequent page.
+  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// A list of batch workloads.
+message ListBatchesResponse {
+  // The batches from the specified collection.
+  repeated Batch batches = 1;
+
+  // A token, which can be sent as `page_token` to retrieve the next page.
+  // If this field is omitted, there are no subsequent pages.
+  string next_page_token = 2;
+}
+
+// A request to delete a batch workload.
+message DeleteBatchRequest {
+  // Required. The name of the batch resource to delete.
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataproc.googleapis.com/Batch"
+    }
+  ];
+}
+
+// A representation of a batch workload in the service.
+message Batch {
+  option (google.api.resource) = {
+    type: "dataproc.googleapis.com/Batch"
+    pattern: "projects/{project}/locations/{location}/batches/{batch}"
+  };
+
+  // Historical state information.
+  message StateHistory {
+    // Output only. The state of the batch at this point in history.
+    State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. Details about the state at this point in history.
+    string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The time when the batch entered the historical state.
+    google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+
+  // The batch state.
+  enum State {
+    // The batch state is unknown.
+    STATE_UNSPECIFIED = 0;
+
+    // The batch is created before running.
+    PENDING = 1;
+
+    // The batch is running.
+    RUNNING = 2;
+
+    // The batch is cancelling.
+    CANCELLING = 3;
+
+    // The batch cancellation was successful.
+    CANCELLED = 4;
+
+    // The batch completed successfully.
+    SUCCEEDED = 5;
+
+    // The batch is no longer running due to an error.
+    FAILED = 6;
+  }
+
+  // Output only. The resource name of the batch.
+  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. A batch UUID (Unique Universal Identifier). The service
+  // generates this value when it creates the batch.
+  string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The time when the batch was created.
+  google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // The application/framework-specific portion of the batch configuration.
+  oneof batch_config {
+    // Optional. PySpark batch config.
+    PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Spark batch config.
+    SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. SparkR batch config.
+    SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. SparkSql batch config.
+    SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
+  }
+
+  // Output only. Runtime information about batch execution.
+  RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The state of the batch.
+  State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Batch state details, such as a failure
+  // description if the state is `FAILED`.
+  string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The time when the batch entered a current state.
+  google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The email address of the user who created the batch.
+  string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Optional. The labels to associate with this batch.
+  // Label **keys** must contain 1 to 63 characters, and must conform to
+  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
+  // Label **values** may be empty, but, if present, must contain 1 to 63
+  // characters, and must conform to [RFC
+  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
+  // associated with a batch.
+  map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Runtime configuration for the batch execution.
+  RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Environment configuration for the batch execution.
+  EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];
+
+  // Output only. The resource name of the operation associated with this batch.
+  string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Historical state information for the batch.
+  repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// A configuration for running an
+// [Apache
+// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
+// batch workload.
+message PySparkBatch {
+  // Required. The HCFS URI of the main Python file to use as the Spark driver. Must
+  // be a .py file.
+  string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The arguments to pass to the driver. Do not include arguments
+  // that can be set as batch properties, such as `--conf`, since a collision
+  // can occur that causes an incorrect batch submission.
+  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS file URIs of Python files to pass to the PySpark
+  // framework. Supported file types: `.py`, `.egg`, and `.zip`.
+  repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of jar files to add to the classpath of the
+  // Spark driver and tasks.
+  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor.
+  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
+  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
+  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// A configuration for running an [Apache Spark](http://spark.apache.org/)
+// batch workload.
+message SparkBatch {
+  // The specification of the main method to call to drive the Spark
+  // workload. Specify either the jar file that contains the main class or the
+  // main class name. To pass both a main jar and a main class in that jar, add
+  // the jar to `jar_file_uris`, and then specify the main class
+  // name in `main_class`.
+  oneof driver {
+    // Optional. The HCFS URI of the jar file that contains the main class.
+    string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. The name of the driver main class. The jar file that contains the class
+    // must be in the classpath or specified in `jar_file_uris`.
+    string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
+  }
+
+  // Optional. The arguments to pass to the driver. Do not include arguments
+  // that can be set as batch properties, such as `--conf`, since a collision
+  // can occur that causes an incorrect batch submission.
+  repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of jar files to add to the classpath of the
+  // Spark driver and tasks.
+  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor.
+  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
+  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
+  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// A configuration for running an
+// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
+// batch workload.
+message SparkRBatch {
+  // Required. The HCFS URI of the main R file to use as the driver.
+  // Must be a `.R` or `.r` file.
+  string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The arguments to pass to the Spark driver. Do not include arguments
+  // that can be set as batch properties, such as `--conf`, since a collision
+  // can occur that causes an incorrect batch submission.
+  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor.
+  repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
+  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
+  repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// A configuration for running
+// [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
+message SparkSqlBatch {
+  // Required. The HCFS URI of the script that contains Spark SQL queries to execute.
+  string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. Mapping of query variable names to values (equivalent to the
+  // Spark SQL command: `SET name="value";`).
+  map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
+  repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
+}