Skip to content
This repository has been archived by the owner on Jul 13, 2023. It is now read-only.

Commit

Permalink
feat: Add support for dataproc BatchController service (#546)
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 404333740
Source-Link: googleapis/googleapis@5088bd7
Source-Link: https://github.com/googleapis/googleapis-gen/commit/44b870783c1943771bcdec748ae4d5208c3d7f0e
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNDRiODcwNzgzYzE5NDM3NzFiY2RlYzc0OGFlNGQ1MjA4YzNkN2YwZSJ9
  • Loading branch information
gcf-owl-bot[bot] authored Oct 19, 2021
1 parent 1fc5068 commit cfc0eeb
Show file tree
Hide file tree
Showing 29 changed files with 18,208 additions and 8,792 deletions.
372 changes: 372 additions & 0 deletions protos/google/cloud/dataproc/v1/batches.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataproc.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/dataproc/v1/shared.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "BatchesProto";
option java_package = "com.google.cloud.dataproc.v1";

// The BatchController provides methods to manage batch workloads.
service BatchController {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";

// Creates a batch workload that executes asynchronously.
rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{parent=projects/*/locations/*}/batches"
body: "batch"
};
option (google.api.method_signature) = "parent,batch,batch_id";
option (google.longrunning.operation_info) = {
response_type: "Batch"
metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
};
}

// Gets the batch workload resource representation.
rpc GetBatch(GetBatchRequest) returns (Batch) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/batches/*}"
};
option (google.api.method_signature) = "name";
}

// Lists batch workloads.
rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*}/batches"
};
option (google.api.method_signature) = "parent";
}

// Deletes the batch workload resource. If the batch is not in terminal state,
// the delete fails and the response returns `FAILED_PRECONDITION`.
rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1/{name=projects/*/locations/*/batches/*}"
};
option (google.api.method_signature) = "name";
}
}

// A request to create a batch workload.
message CreateBatchRequest {
// Required. The parent resource where this batch will be created.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "dataproc.googleapis.com/Batch"
}
];

// Required. The batch to create.
Batch batch = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. The ID to use for the batch, which will become the final component of
// the batch's resource name.
//
// This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. A unique ID used to identify the request. If the service
// receives two
// [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
// with the same request_id, the second request is ignored and the
// Operation that corresponds to the first Batch created and stored
// in the backend is returned.
//
// Recommendation: Set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The value must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
}

// A request to get the resource representation for a batch workload.
message GetBatchRequest {
// Required. The name of the batch to retrieve.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dataproc.googleapis.com/Batch"
}
];
}

// A request to list batch workloads in a project.
message ListBatchesRequest {
// Required. The parent, which owns this collection of batches.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "dataproc.googleapis.com/Batch"
}
];

// Optional. The maximum number of batches to return in each response.
// The service may return fewer than this value.
// The default page size is 20; the maximum page size is 1000.
int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. A page token received from a previous `ListBatches` call.
// Provide this token to retrieve the subsequent page.
string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
}

// A list of batch workloads.
message ListBatchesResponse {
// The batches from the specified collection.
repeated Batch batches = 1;

// A token, which can be sent as `page_token` to retrieve the next page.
// If this field is omitted, there are no subsequent pages.
string next_page_token = 2;
}

// A request to delete a batch workload.
message DeleteBatchRequest {
// Required. The name of the batch resource to delete.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dataproc.googleapis.com/Batch"
}
];
}

// A representation of a batch workload in the service.
message Batch {
option (google.api.resource) = {
type: "dataproc.googleapis.com/Batch"
pattern: "projects/{project}/locations/{location}/batches/{batch}"
};

// Historical state information.
message StateHistory {
// Output only. The state of the batch at this point in history.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Details about the state at this point in history.
string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The time when the batch entered the historical state.
google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// The batch state.
enum State {
// The batch state is unknown.
STATE_UNSPECIFIED = 0;

// The batch is created before running.
PENDING = 1;

// The batch is running.
RUNNING = 2;

// The batch is cancelling.
CANCELLING = 3;

// The batch cancellation was successful.
CANCELLED = 4;

// The batch completed successfully.
SUCCEEDED = 5;

// The batch is no longer running due to an error.
FAILED = 6;
}

// Output only. The resource name of the batch.
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. A batch UUID (Unique Universal Identifier). The service
// generates this value when it creates the batch.
string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The time when the batch was created.
google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

// The application/framework-specific portion of the batch configuration.
oneof batch_config {
// Optional. PySpark batch config.
PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. Spark batch config.
SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. SparkR batch config.
SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];

// Optional. SparkSql batch config.
SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
}

// Output only. Runtime information about batch execution.
RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The state of the batch.
State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Batch state details, such as a failure
// description if the state is `FAILED`.
string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The time when the batch entered a current state.
google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The email address of the user who created the batch.
string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. The labels to associate with this batch.
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to [RFC
// 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
// associated with a batch.
map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL];

// Optional. Runtime configuration for the batch execution.
RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];

// Optional. Environment configuration for the batch execution.
EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];

// Output only. The resource name of the operation associated with this batch.
string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Historical state information for the batch.
repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A configuration for running an
// [Apache
// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
// batch workload.
message PySparkBatch {
// Required. The HCFS URI of the main Python file to use as the Spark driver. Must
// be a .py file.
string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

// Optional. The arguments to pass to the driver. Do not include arguments
// that can be set as batch properties, such as `--conf`, since a collision
// can occur that causes an incorrect batch submission.
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS file URIs of Python files to pass to the PySpark
// framework. Supported file types: `.py`, `.egg`, and `.zip`.
repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of jar files to add to the classpath of the
// Spark driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be placed in the working directory of
// each executor.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
}

// A configuration for running an [Apache Spark](http://spark.apache.org/)
// batch workload.
message SparkBatch {
// The specification of the main method to call to drive the Spark
// workload. Specify either the jar file that contains the main class or the
// main class name. To pass both a main jar and a main class in that jar, add
// the jar to `jar_file_uris`, and then specify the main class
// name in `main_class`.
oneof driver {
// Optional. The HCFS URI of the jar file that contains the main class.
string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The name of the driver main class. The jar file that contains the class
// must be in the classpath or specified in `jar_file_uris`.
string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Optional. The arguments to pass to the driver. Do not include arguments
// that can be set as batch properties, such as `--conf`, since a collision
// can occur that causes an incorrect batch submission.
repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of jar files to add to the classpath of the
// Spark driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be placed in the working directory of
// each executor.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
}

// A configuration for running an
// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
// batch workload.
message SparkRBatch {
// Required. The HCFS URI of the main R file to use as the driver.
// Must be a `.R` or `.r` file.
string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

// Optional. The arguments to pass to the Spark driver. Do not include arguments
// that can be set as batch properties, such as `--conf`, since a collision
// can occur that causes an incorrect batch submission.
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be placed in the working directory of
// each executor.
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
}

// A configuration for running
// [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
message SparkSqlBatch {
// Required. The HCFS URI of the script that contains Spark SQL queries to execute.
string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

// Optional. Mapping of query variable names to values (equivalent to the
// Spark SQL command: `SET name="value";`).
map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
}
Loading

0 comments on commit cfc0eeb

Please sign in to comment.