1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
|
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/field_behavior.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "SharedProto";
option java_package = "com.google.cloud.dataproc.v1";
// Runtime configuration for a workload.
message RuntimeConfig {
// Optional. Version of the batch runtime.
string version = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Optional custom container image for the job runtime environment. If
// not specified, a default container image will be used.
string container_image = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. A mapping of property names to values, which are used to configure workload
// execution.
map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
}
// Environment configuration for a workload.
message EnvironmentConfig {
// Optional. Execution configuration for a workload.
ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Peripherals configuration that workload has access to.
PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Execution configuration for a workload.
message ExecutionConfig {
// Optional. Service account that used to execute workload.
string service_account = 2 [(google.api.field_behavior) = OPTIONAL];
// Network configuration for workload execution.
oneof network {
// Optional. Network URI to connect workload to.
string network_uri = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Subnetwork URI to connect workload to.
string subnetwork_uri = 5 [(google.api.field_behavior) = OPTIONAL];
}
// Optional. Tags used for network traffic control.
repeated string network_tags = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud KMS key to use for encryption.
string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
}
// Spark History Server configuration for the workload.
message SparkHistoryServerConfig {
// Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
// Server for the workload.
//
// Example:
//
// * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
string dataproc_cluster = 1 [
(google.api.field_behavior) = OPTIONAL
];
}
// Auxiliary services configuration for a workload.
message PeripheralsConfig {
// Optional. Resource name of an existing Dataproc Metastore service.
//
// Example:
//
// * `projects/[project_id]/locations/[region]/services/[service_id]`
string metastore_service = 1 [
(google.api.field_behavior) = OPTIONAL
];
// Optional. The Spark History Server configuration for the workload.
SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Runtime information about workload execution.
message RuntimeInfo {
// Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
// URIs.
map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A URI pointing to the location of the stdout and stderr of the workload.
string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A URI pointing to the location of the diagnostics tarball.
string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// The cluster's GKE config.
message GkeClusterConfig {
// Optional. A target GKE cluster to deploy to. It must be in the same project and
// region as the Dataproc cluster (the GKE cluster can be zonal or regional).
// Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
string gke_cluster_target = 2 [
(google.api.field_behavior) = OPTIONAL
];
// Optional. GKE NodePools where workloads will be scheduled. At least one node pool
// must be assigned the 'default' role. Each role can be given to only a
// single NodePoolTarget. All NodePools must have the same location settings.
// If a nodePoolTarget is not specified, Dataproc constructs a default
// nodePoolTarget.
repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The configuration for running the Dataproc cluster on Kubernetes.
message KubernetesClusterConfig {
// Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
// does not exist, it is created. If it exists, Dataproc
// verifies that another Dataproc VirtualCluster is not installed
// into it. If not specified, the name of the Dataproc Cluster is used.
string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];
oneof config {
// Required. The configuration for running the Dataproc cluster on GKE.
GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
}
// Optional. The software configuration for this Dataproc cluster running on Kubernetes.
KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The software configuration for this Dataproc cluster running on Kubernetes.
message KubernetesSoftwareConfig {
// The components that should be installed in this Dataproc cluster. The key
// must be a string from the KubernetesComponent enumeration. The value is
// the version of the software to be installed.
// At least one entry must be specified.
map<string, string> component_version = 1;
// The properties to set on daemon config files.
//
// Property keys are specified in `prefix:property` format, for example
// `spark:spark.kubernetes.container.image`. The following are supported
// prefixes and their mappings:
//
// * spark: `spark-defaults.conf`
//
// For more information, see [Cluster
// properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
map<string, string> properties = 2;
}
// GKE NodePools that Dataproc workloads run on.
message GkeNodePoolTarget {
// `Role` specifies whose tasks will run on the NodePool. The roles can be
// specific to workloads. Exactly one GkeNodePoolTarget within the
// VirtualCluster must have 'default' role, which is used to run all workloads
// that are not associated with a NodePool.
enum Role {
// Role is unspecified.
ROLE_UNSPECIFIED = 0;
// Any roles that are not directly assigned to a NodePool run on the
// `default` role's NodePool.
DEFAULT = 1;
// Run controllers and webhooks.
CONTROLLER = 2;
// Run spark driver.
SPARK_DRIVER = 3;
// Run spark executors.
SPARK_EXECUTOR = 4;
}
// Required. The target GKE NodePool.
// Format:
// 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
string node_pool = 1 [
(google.api.field_behavior) = REQUIRED
];
// Required. The types of role for a GKE NodePool
repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. The configuration for the GKE NodePool.
//
// If specified, Dataproc attempts to create a NodePool with the
// specified shape. If one with the same name already exists, it is
// verified against all specified fields. If a field differs, the
// virtual cluster creation will fail.
//
// If omitted, any NodePool with the specified name is used. If a
// NodePool with the specified name does not exist, Dataproc create a NodePool
// with default values.
GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The configuration of a GKE NodePool used by a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
message GkeNodePoolConfig {
// Parameters that describe cluster nodes.
message GkeNodeConfig {
// Optional. The name of a Compute Engine [machine
// type](https://cloud.google.com/compute/docs/machine-types).
string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Whether the nodes are created as [preemptible VM
// instances](https://cloud.google.com/compute/docs/instances/preemptible).
bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. The number of local SSD disks to attach to the node, which is limited by
// the maximum number of disks allowable per zone (see [Adding Local
// SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. A list of [hardware
// accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
// each node.
repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];
// Optional. [Minimum CPU
// platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
// to be used by this instance. The instance may be scheduled on the
// specified or a newer CPU platform. Specify the friendly names of CPU
// platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
}
// A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
// for a NodePool.
message GkeNodePoolAcceleratorConfig {
// The number of accelerator cards exposed to an instance.
int64 accelerator_count = 1;
// The accelerator type resource namename (see GPUs on Compute Engine).
string accelerator_type = 2;
}
// GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
// adjust the size of the node pool to the current cluster usage.
message GkeNodePoolAutoscalingConfig {
// The minimum number of nodes in the NodePool. Must be >= 0 and <=
// max_node_count.
int32 min_node_count = 2;
// The maximum number of nodes in the NodePool. Must be >= min_node_count.
// **Note:** Quota must be sufficient to scale up the cluster.
int32 max_node_count = 3;
}
// Optional. The node pool configuration.
GkeNodeConfig config = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The list of Compute Engine
// [zones](https://cloud.google.com/compute/docs/zones#available) where
// NodePool's nodes will be located.
//
// **Note:** Currently, only one zone may be specified.
//
// If a location is not specified during NodePool creation, Dataproc will
// choose a location.
repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];
// Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
// only when a valid configuration is present.
GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
}
// Cluster components that can be activated.
enum Component {
// Unspecified component. Specifying this will cause Cluster creation to fail.
COMPONENT_UNSPECIFIED = 0;
// The Anaconda python distribution. The Anaconda component is not supported
// in the Dataproc
// <a
// href="/dataproc/docs/concepts/versioning/dataproc-release-2.0">2.0
// image</a>. The 2.0 image is pre-installed with Miniconda.
ANACONDA = 5;
// Docker
DOCKER = 13;
// The Druid query engine. (alpha)
DRUID = 9;
// Flink
FLINK = 14;
// HBase. (beta)
HBASE = 11;
// The Hive Web HCatalog (the REST service for accessing HCatalog).
HIVE_WEBHCAT = 3;
// The Jupyter Notebook.
JUPYTER = 1;
// The Presto query engine.
PRESTO = 6;
// The Ranger service.
RANGER = 12;
// The Solr service.
SOLR = 10;
// The Zeppelin notebook.
ZEPPELIN = 4;
// The Zookeeper service.
ZOOKEEPER = 8;
}
// Actions in response to failure of a resource associated with a cluster.
enum FailureAction {
// When FailureAction is unspecified, failure action defaults to NO_ACTION.
FAILURE_ACTION_UNSPECIFIED = 0;
// Take no action on failure to create a cluster resource. NO_ACTION is the
// default.
NO_ACTION = 1;
// Delete the failed cluster resource.
DELETE = 2;
}
|