Skip to content
This repository has been archived by the owner on Jul 13, 2023. It is now read-only.

Commit

Permalink
feat: a bundle of new features for Dataproc V1 (#475)
Browse files Browse the repository at this point in the history
* feat: update the Dataproc V1 API client library

This includes the following:
1. The new start and stop cluster methods.
2. The ability to specify a metastore config in a cluster.
3. The ability to specify a (BETA) GKE cluster when creating a Dataproc cluster.
4. The ability to configure the behavior for private IPv6 cluster networking.
5. The ability to specify node affinity groups for clusters.
6. The ability to specify shielded VM configurations for clusters.
7. Support for service-account based secure multi-tenancy.
8. The ability to specify cluster labels for picking which cluster should run a job.
9. Components for DOCKER, DRUID, FLINK, HBASE, RANGER, and SOLR
10. The ability to specify a DAG timeout for workflows.

Committer: @ojarjur
PiperOrigin-RevId: 367518225

Source-Link: googleapis/googleapis@439f098

Source-Link: https://github.com/googleapis/googleapis-gen/commit/6461b0e0f9c5b378abe577c2f061155086f777f7

* 🦉 Updates from OwlBot

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Jeffrey Rennie <[email protected]>
  • Loading branch information
3 people authored Apr 26, 2021
1 parent 267ac65 commit 4803be4
Show file tree
Hide file tree
Showing 13 changed files with 7,586 additions and 2,822 deletions.
272 changes: 253 additions & 19 deletions protos/google/cloud/dataproc/v1/clusters.proto

Large diffs are not rendered by default.

70 changes: 47 additions & 23 deletions protos/google/cloud/dataproc/v1/jobs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ option java_package = "com.google.cloud.dataproc.v1";
// The JobController provides methods to manage jobs.
service JobController {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";

// Submits a job to a cluster.
rpc SubmitJob(SubmitJobRequest) returns (Job) {
Expand All @@ -44,7 +45,8 @@ service JobController {
}

// Submits job to a cluster.
rpc SubmitJobAsOperation(SubmitJobRequest) returns (google.longrunning.Operation) {
rpc SubmitJobAsOperation(SubmitJobRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation"
body: "*"
Expand Down Expand Up @@ -286,9 +288,9 @@ message PySparkJob {

// A list of queries to run on a cluster.
message QueryList {
// Required. The queries to execute. You do not need to terminate a query
// with a semicolon. Multiple queries can be specified in one string
// by separating each with a semicolon. Here is an example of an Cloud
// Required. The queries to execute. You do not need to end a query expression
// with a semicolon. Multiple queries can be specified in one
// string by separating each with a semicolon. Here is an example of a
// Dataproc API snippet that uses a QueryList to specify a HiveJob:
//
// "hiveJob": {
Expand Down Expand Up @@ -323,7 +325,8 @@ message HiveJob {

// Optional. Mapping of query variable names to values (equivalent to the
// Hive command: `SET name="value";`).
map<string, string> script_variables = 4 [(google.api.field_behavior) = OPTIONAL];
map<string, string> script_variables = 4
[(google.api.field_behavior) = OPTIONAL];

// Optional. A mapping of property names and values, used to configure Hive.
// Properties that conflict with values set by the Dataproc API may be
Expand Down Expand Up @@ -352,7 +355,8 @@ message SparkSqlJob {

// Optional. Mapping of query variable names to values (equivalent to the
// Spark SQL command: SET `name="value";`).
map<string, string> script_variables = 3 [(google.api.field_behavior) = OPTIONAL];
map<string, string> script_variables = 3
[(google.api.field_behavior) = OPTIONAL];

// Optional. A mapping of property names to values, used to configure
// Spark SQL's SparkConf. Properties that conflict with values set by the
Expand Down Expand Up @@ -386,7 +390,8 @@ message PigJob {

// Optional. Mapping of query variable names to values (equivalent to the Pig
// command: `name=[value]`).
map<string, string> script_variables = 4 [(google.api.field_behavior) = OPTIONAL];
map<string, string> script_variables = 4
[(google.api.field_behavior) = OPTIONAL];

// Optional. A mapping of property names to values, used to configure Pig.
// Properties that conflict with values set by the Dataproc API may be
Expand Down Expand Up @@ -479,6 +484,11 @@ message JobPlacement {
// Output only. A cluster UUID generated by the Dataproc service when
// the job is submitted.
string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Cluster labels to identify a cluster where the job will be
// submitted.
map<string, string> cluster_labels = 3
[(google.api.field_behavior) = OPTIONAL];
}

// Dataproc job status.
Expand Down Expand Up @@ -557,7 +567,8 @@ message JobStatus {
];

// Output only. The time when this state was entered.
google.protobuf.Timestamp state_start_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp state_start_time = 6
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Additional state information, which includes
// status reported by the agent.
Expand All @@ -566,8 +577,8 @@ message JobStatus {

// Encapsulates the full scoping used to reference a job.
message JobReference {
// Optional. The ID of the Google Cloud Platform project that the job belongs to. If
// specified, must match the request project ID.
// Optional. The ID of the Google Cloud Platform project that the job belongs
// to. If specified, must match the request project ID.
string project_id = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The job ID, which must be unique within the project.
Expand Down Expand Up @@ -677,22 +688,26 @@ message Job {
JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The previous job status.
repeated JobStatus status_history = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated JobStatus status_history = 13
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The collection of YARN applications spun up by this job.
//
// **Beta** Feature: This report is available for testing purposes only. It
// may be changed before final release.
repeated YarnApplication yarn_applications = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated YarnApplication yarn_applications = 9
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. A URI pointing to the location of the stdout of the job's
// driver program.
string driver_output_resource_uri = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
string driver_output_resource_uri = 17
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. If present, the location of miscellaneous control files
// which may be used as part of job setup and handling. If not present,
// control files may be placed in the same location as `driver_output_uri`.
string driver_control_files_uri = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
string driver_control_files_uri = 15
[(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. The labels to associate with this job.
// Label **keys** must contain 1 to 63 characters, and must conform to
Expand All @@ -711,8 +726,8 @@ message Job {
// may be reused over time.
string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Indicates whether the job is completed. If the value is `false`,
// the job is still in progress. If `true`, the job is completed, and
// Output only. Indicates whether the job is completed. If the value is
// `false`, the job is still in progress. If `true`, the job is completed, and
// `status.state` field will indicate if it was successful, failed,
// or cancelled.
bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY];
Expand All @@ -721,14 +736,19 @@ message Job {
// Job scheduling options.
message JobScheduling {
// Optional. Maximum number of times per hour a driver may be restarted as
// a result of driver terminating with non-zero code before job is
// a result of driver exiting with non-zero code before job is
// reported failed.
//
// A job may be reported as thrashing if driver exits with non-zero code
// 4 times within 10 minute window.
//
// Maximum value is 10.
int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. Maximum number of times in total a driver may be restarted as a
// result of driver exiting with non-zero code before job is reported failed.
// Maximum value is 240.
int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL];
}

// A request to submit a job.
Expand All @@ -744,8 +764,9 @@ message SubmitJobRequest {
Job job = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. A unique id used to identify the request. If the server
// receives two [SubmitJobRequest][google.cloud.dataproc.v1.SubmitJobRequest] requests with the same
// id, then the second request will be ignored and the
// receives two
// [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s
// with the same id, then the second request will be ignored and the
// first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend
// is returned.
//
Expand All @@ -769,7 +790,8 @@ message JobMetadata {
string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Job submission time.
google.protobuf.Timestamp start_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp start_time = 4
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// A request to get the resource representation for a job in a project.
Expand Down Expand Up @@ -822,7 +844,8 @@ message ListJobsRequest {
// (default = match ALL jobs).
//
// If `filter` is provided, `jobStateMatcher` will be ignored.
JobStateMatcher job_state_matcher = 5 [(google.api.field_behavior) = OPTIONAL];
JobStateMatcher job_state_matcher = 5
[(google.api.field_behavior) = OPTIONAL];

// Optional. A filter constraining the jobs to list. Filters are
// case-sensitive and have the following syntax:
Expand Down Expand Up @@ -862,7 +885,8 @@ message UpdateJobRequest {
// <code>labels</code>, and the `PATCH` request body would specify the new
// value. <strong>Note:</strong> Currently, <code>labels</code> is the only
// field that can be updated.
google.protobuf.FieldMask update_mask = 5 [(google.api.field_behavior) = REQUIRED];
google.protobuf.FieldMask update_mask = 5
[(google.api.field_behavior) = REQUIRED];
}

// A list of jobs in a project.
Expand Down
26 changes: 25 additions & 1 deletion protos/google/cloud/dataproc/v1/shared.proto
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,38 @@ syntax = "proto3";
package google.cloud.dataproc.v1;

import "google/api/annotations.proto";
import "google/api/field_behavior.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "SharedProto";
option java_package = "com.google.cloud.dataproc.v1";

// Cluster components that can be activated.
// Next ID: 16.
enum Component {
// Unspecified component. Specifying this will cause Cluster creation to fail.
COMPONENT_UNSPECIFIED = 0;

// The Anaconda python distribution.
// The Anaconda python distribution. The Anaconda component is not supported
// in the Dataproc
// <a
// href="/dataproc/docs/concepts/versioning/dataproc-release-2.0">2.0
// image</a>. The 2.0 image is pre-installed with Miniconda.
ANACONDA = 5;

// Docker
DOCKER = 13;

// The Druid query engine. (alpha)
DRUID = 9;

// Flink
FLINK = 14;

// HBase. (beta)
HBASE = 11;

// The Hive Web HCatalog (the REST service for accessing HCatalog).
HIVE_WEBHCAT = 3;

Expand All @@ -40,6 +58,12 @@ enum Component {
// The Presto query engine.
PRESTO = 6;

// The Ranger service.
RANGER = 12;

// The Solr service.
SOLR = 10;

// The Zeppelin notebook.
ZEPPELIN = 4;

Expand Down
Loading

0 comments on commit 4803be4

Please sign in to comment.