summaryrefslogtreecommitdiff
path: root/third_party/googleapis/google/cloud/aiplatform/v1beta1/endpoint.proto
blob: 0236e532f7cdc0c99a9f0ea5456a727e5d685199 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1beta1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
import "google/cloud/aiplatform/v1beta1/explanation.proto";
import "google/cloud/aiplatform/v1beta1/io.proto";
import "google/cloud/aiplatform/v1beta1/machine_resources.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
option java_multiple_files = true;
option java_outer_classname = "EndpointProto";
option java_package = "com.google.cloud.aiplatform.v1beta1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
option ruby_package = "Google::Cloud::AIPlatform::V1beta1";

// Models are deployed into it, and afterwards Endpoint is called to obtain
// predictions and explanations.
message Endpoint {
  option (google.api.resource) = {
    type: "aiplatform.googleapis.com/Endpoint"
    pattern: "projects/{project}/locations/{location}/endpoints/{endpoint}"
  };

  // Output only. The resource name of the Endpoint.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. The display name of the Endpoint.
  // The name can be up to 128 characters long and can be consist of any UTF-8
  // characters.
  string display_name = 2 [(google.api.field_behavior) = REQUIRED];

  // The description of the Endpoint.
  string description = 3;

  // Output only. The models deployed in this Endpoint.
  // To add or remove DeployedModels use [EndpointService.DeployModel][google.cloud.aiplatform.v1beta1.EndpointService.DeployModel] and
  // [EndpointService.UndeployModel][google.cloud.aiplatform.v1beta1.EndpointService.UndeployModel] respectively.
  repeated DeployedModel deployed_models = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // A map from a DeployedModel's ID to the percentage of this Endpoint's
  // traffic that should be forwarded to that DeployedModel.
  //
  // If a DeployedModel's ID is not listed in this map, then it receives no
  // traffic.
  //
  // The traffic percentage values must add up to 100, or map must be empty if
  // the Endpoint is to not accept any traffic at a moment.
  map<string, int32> traffic_split = 5;

  // Used to perform consistent read-modify-write updates. If not set, a blind
  // "overwrite" update happens.
  string etag = 6;

  // The labels with user-defined metadata to organize your Endpoints.
  //
  // Label keys and values can be no longer than 64 characters
  // (Unicode codepoints), can only contain lowercase letters, numeric
  // characters, underscores and dashes. International characters are allowed.
  //
  // See https://goo.gl/xmQnxf for more information and examples of labels.
  map<string, string> labels = 7;

  // Output only. Timestamp when this Endpoint was created.
  google.protobuf.Timestamp create_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Timestamp when this Endpoint was last updated.
  google.protobuf.Timestamp update_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Customer-managed encryption key spec for an Endpoint. If set, this
  // Endpoint and all sub-resources of this Endpoint will be secured by
  // this key.
  EncryptionSpec encryption_spec = 10;

  // The full name of the Google Compute Engine
  // [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
  // to which the Endpoint should be peered.
  //
  // Private services access must already be configured for the network. If left
  // unspecified, the Endpoint is not peered with any network.
  //
  // Only one of the fields, [network][google.cloud.aiplatform.v1beta1.Endpoint.network] or
  // [enable_private_service_connect][google.cloud.aiplatform.v1beta1.Endpoint.enable_private_service_connect],
  // can be set.
  //
  // [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
  // `projects/{project}/global/networks/{network}`.
  // Where `{project}` is a project number, as in `12345`, and `{network}` is
  // network name.
  string network = 13 [(google.api.resource_reference) = {
                         type: "compute.googleapis.com/Network"
                       }];

  // Deprecated: If true, expose the Endpoint via private service connect.
  //
  // Only one of the fields, [network][google.cloud.aiplatform.v1beta1.Endpoint.network] or
  // [enable_private_service_connect][google.cloud.aiplatform.v1beta1.Endpoint.enable_private_service_connect],
  // can be set.
  bool enable_private_service_connect = 17 [deprecated = true];

  // Output only. Resource name of the Model Monitoring job associated with this Endpoint
  // if monitoring is enabled by [CreateModelDeploymentMonitoringJob][].
  // Format:
  // `projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}`
  string model_deployment_monitoring_job = 14 [
    (google.api.field_behavior) = OUTPUT_ONLY,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/ModelDeploymentMonitoringJob"
    }
  ];

  // Configures the request-response logging for online prediction.
  PredictRequestResponseLoggingConfig predict_request_response_logging_config = 18;
}

// A deployment of a Model. Endpoints contain one or more DeployedModels.
message DeployedModel {
  // The prediction (for example, the machine) resources that the DeployedModel
  // uses. The user is billed for the resources (at least their minimal amount)
  // even if the DeployedModel receives no traffic.
  // Not all Models support all resources types. See
  // [Model.supported_deployment_resources_types][google.cloud.aiplatform.v1beta1.Model.supported_deployment_resources_types].
  oneof prediction_resources {
    // A description of resources that are dedicated to the DeployedModel, and
    // that need a higher degree of manual configuration.
    DedicatedResources dedicated_resources = 7;

    // A description of resources that to large degree are decided by Vertex
    // AI, and require only a modest additional configuration.
    AutomaticResources automatic_resources = 8;

    // The resource name of the shared DeploymentResourcePool to deploy on.
    // Format:
    // projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}
    string shared_resources = 17 [(google.api.resource_reference) = {
                                    type: "aiplatform.googleapis.com/DeploymentResourcePool"
                                  }];
  }

  // Immutable. The ID of the DeployedModel. If not provided upon deployment, Vertex AI
  // will generate a value for this ID.
  //
  // This value should be 1-10 characters, and valid characters are /[0-9]/.
  string id = 1 [(google.api.field_behavior) = IMMUTABLE];

  // Required. The resource name of the Model that this is the deployment of. Note that
  // the Model may be in a different location than the DeployedModel's Endpoint.
  //
  // The resource name may contain version id or version alias to specify the
  // version, if no version is specified, the default version will be deployed.
  string model = 2 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Model"
    }
  ];

  // Output only. The version ID of the model that is deployed.
  string model_version_id = 18 [(google.api.field_behavior) = OUTPUT_ONLY];

  // The display name of the DeployedModel. If not provided upon creation,
  // the Model's display_name is used.
  string display_name = 3;

  // Output only. Timestamp when the DeployedModel was created.
  google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Explanation configuration for this DeployedModel.
  //
  // When deploying a Model using [EndpointService.DeployModel][google.cloud.aiplatform.v1beta1.EndpointService.DeployModel], this value
  // overrides the value of [Model.explanation_spec][google.cloud.aiplatform.v1beta1.Model.explanation_spec]. All fields of
  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] are optional in the request. If a field of
  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] is not populated, the value of the same field of
  // [Model.explanation_spec][google.cloud.aiplatform.v1beta1.Model.explanation_spec] is inherited. If the corresponding
  // [Model.explanation_spec][google.cloud.aiplatform.v1beta1.Model.explanation_spec] is not populated, all fields of the
  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] will be used for the explanation configuration.
  ExplanationSpec explanation_spec = 9;

  // The service account that the DeployedModel's container runs as. Specify the
  // email address of the service account. If this service account is not
  // specified, the container runs as a service account that doesn't have access
  // to the resource project.
  //
  // Users deploying the Model must have the `iam.serviceAccounts.actAs`
  // permission on this service account.
  string service_account = 11;

  // If true, the container of the DeployedModel instances will send `stderr`
  // and `stdout` streams to Stackdriver Logging.
  //
  // Only supported for custom-trained Models and AutoML Tabular Models.
  bool enable_container_logging = 12;

  // These logs are like standard server access logs, containing
  // information like timestamp and latency for each prediction request.
  //
  // Note that Stackdriver logs may incur a cost, especially if your project
  // receives prediction requests at a high queries per second rate (QPS).
  // Estimate your costs before enabling this option.
  bool enable_access_logging = 13;

  // Output only. Provide paths for users to send predict/explain/health requests directly to
  // the deployed model services running on Cloud via private services access.
  // This field is populated if [network][google.cloud.aiplatform.v1beta1.Endpoint.network] is configured.
  PrivateEndpoints private_endpoints = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// PrivateEndpoints proto is used to provide paths for users to send
// requests privately.
// To send request via private service access, use predict_http_uri,
// explain_http_uri or health_http_uri. To send request via private service
// connect, use service_attachment.
message PrivateEndpoints {
  // Output only. Http(s) path to send prediction requests.
  string predict_http_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Http(s) path to send explain requests.
  string explain_http_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Http(s) path to send health check requests.
  string health_http_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The name of the service attachment resource. Populated if private service
  // connect is enabled.
  string service_attachment = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Configuration for logging request-response to a BigQuery table.
message PredictRequestResponseLoggingConfig {
  // If logging is enabled or not.
  bool enabled = 1;

  // Percentage of requests to be logged, expressed as a fraction in
  // range(0,1].
  double sampling_rate = 2;

  // BigQuery table for logging.
  // If only given a project, a new dataset will be created with name
  // `logging_<endpoint-display-name>_<endpoint-id>` where
  // <endpoint-display-name> will be made BigQuery-dataset-name compatible (e.g.
  // most special characters will become underscores). If no table name is
  // given, a new table will be created with name `request_response_logging`
  BigQueryDestination bigquery_destination = 3;
}