summaryrefslogtreecommitdiff
path: root/third_party/googleapis/google/dataflow/v1beta3/streaming.proto
blob: 77577ca1b45797dc9e6e95b7c79656055a5257c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.dataflow.v1beta3;

option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
option go_package = "google.golang.org/genproto/googleapis/dataflow/v1beta3;dataflow";
option java_multiple_files = true;
option java_outer_classname = "StreamingProto";
option java_package = "com.google.dataflow.v1beta3";
option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
option ruby_package = "Google::Cloud::Dataflow::V1beta3";

// Global topology of the streaming Dataflow job, including all
// computations and their sharded locations.
message TopologyConfig {
  // The computations associated with a streaming Dataflow job.
  repeated ComputationTopology computations = 1;

  // The disks assigned to a streaming Dataflow job.
  repeated DataDiskAssignment data_disk_assignments = 2;

  // Maps user stage names to stable computation names.
  map<string, string> user_stage_to_computation_name_map = 3;

  // The size (in bits) of keys that will be assigned to source messages.
  int32 forwarding_key_bits = 4;

  // Version number for persistent state.
  int32 persistent_state_version = 5;
}

// Identifies a pubsub location to use for transferring data into or
// out of a streaming Dataflow job.
message PubsubLocation {
  // A pubsub topic, in the form of
  // "pubsub.googleapis.com/topics/<project-id>/<topic-name>"
  string topic = 1;

  // A pubsub subscription, in the form of
  // "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>"
  string subscription = 2;

  // If set, contains a pubsub label from which to extract record timestamps.
  // If left empty, record timestamps will be generated upon arrival.
  string timestamp_label = 3;

  // If set, contains a pubsub label from which to extract record ids.
  // If left empty, record deduplication will be strictly best effort.
  string id_label = 4;

  // Indicates whether the pipeline allows late-arriving data.
  bool drop_late_data = 5;

  // If set, specifies the pubsub subscription that will be used for tracking
  // custom time timestamps for watermark estimation.
  string tracking_subscription = 6;

  // If true, then the client has requested to get pubsub attributes.
  bool with_attributes = 7;
}

// Identifies the location of a streaming computation stage, for
// stage-to-stage communication.
message StreamingStageLocation {
  // Identifies the particular stream within the streaming Dataflow
  // job.
  string stream_id = 1;
}

// Identifies the location of a streaming side input.
message StreamingSideInputLocation {
  // Identifies the particular side input within the streaming Dataflow job.
  string tag = 1;

  // Identifies the state family where this side input is stored.
  string state_family = 2;
}

// Identifies the location of a custom souce.
message CustomSourceLocation {
  // Whether this source is stateful.
  bool stateful = 1;
}

// Describes a stream of data, either as input to be processed or as
// output of a streaming Dataflow job.
message StreamLocation {
  // A specification of a stream's location.
  oneof location {
    // The stream is part of another computation within the current
    // streaming Dataflow job.
    StreamingStageLocation streaming_stage_location = 1;

    // The stream is a pubsub stream.
    PubsubLocation pubsub_location = 2;

    // The stream is a streaming side input.
    StreamingSideInputLocation side_input_location = 3;

    // The stream is a custom source.
    CustomSourceLocation custom_source_location = 4;
  }
}

// State family configuration.
message StateFamilyConfig {
  // The state family value.
  string state_family = 1;

  // If true, this family corresponds to a read operation.
  bool is_read = 2;
}

// All configuration data for a particular Computation.
message ComputationTopology {
  // The system stage name.
  string system_stage_name = 1;

  // The ID of the computation.
  string computation_id = 5;

  // The key ranges processed by the computation.
  repeated KeyRangeLocation key_ranges = 2;

  // The inputs to the computation.
  repeated StreamLocation inputs = 3;

  // The outputs from the computation.
  repeated StreamLocation outputs = 4;

  // The state family values.
  repeated StateFamilyConfig state_families = 7;
}

// Location information for a specific key-range of a sharded computation.
// Currently we only support UTF-8 character splits to simplify encoding into
// JSON.
message KeyRangeLocation {
  // The start (inclusive) of the key range.
  string start = 1;

  // The end (exclusive) of the key range.
  string end = 2;

  // The physical location of this range assignment to be used for
  // streaming computation cross-worker message delivery.
  string delivery_endpoint = 3;

  // The name of the data disk where data for this range is stored.
  // This name is local to the Google Cloud Platform project and uniquely
  // identifies the disk within that project, for example
  // "myproject-1014-104817-4c2-harness-0-disk-1".
  string data_disk = 5;

  // DEPRECATED. The location of the persistent state for this range, as a
  // persistent directory in the worker local filesystem.
  string deprecated_persistent_directory = 4 [deprecated = true];
}

// Describes mounted data disk.
message MountedDataDisk {
  // The name of the data disk.
  // This name is local to the Google Cloud Platform project and uniquely
  // identifies the disk within that project, for example
  // "myproject-1014-104817-4c2-harness-0-disk-1".
  string data_disk = 1;
}

// Data disk assignment for a given VM instance.
message DataDiskAssignment {
  // VM instance name the data disks mounted to, for example
  // "myproject-1014-104817-4c2-harness-0".
  string vm_instance = 1;

  // Mounted data disks. The order is important a data disk's 0-based index in
  // this list defines which persistent directory the disk is mounted to, for
  // example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" },
  // { "myproject-1014-104817-4c2-harness-0-disk-1" }.
  repeated string data_disks = 2;
}

// Data disk assignment information for a specific key-range of a sharded
// computation.
// Currently we only support UTF-8 character splits to simplify encoding into
// JSON.
message KeyRangeDataDiskAssignment {
  // The start (inclusive) of the key range.
  string start = 1;

  // The end (exclusive) of the key range.
  string end = 2;

  // The name of the data disk where data for this range is stored.
  // This name is local to the Google Cloud Platform project and uniquely
  // identifies the disk within that project, for example
  // "myproject-1014-104817-4c2-harness-0-disk-1".
  string data_disk = 3;
}

// Describes full or partial data disk assignment information of the computation
// ranges.
message StreamingComputationRanges {
  // The ID of the computation.
  string computation_id = 1;

  // Data disk assignments for ranges from this computation.
  repeated KeyRangeDataDiskAssignment range_assignments = 2;
}

// Streaming appliance snapshot configuration.
message StreamingApplianceSnapshotConfig {
  // If set, indicates the snapshot id for the snapshot being performed.
  string snapshot_id = 1;

  // Indicates which endpoint is used to import appliance state.
  string import_state_endpoint = 2;
}