summaryrefslogtreecommitdiff
path: root/third_party/googleapis/google/cloud/aiplatform/v1/dataset.proto
blob: 6c686986a2c7ea8cafd88e9d5fe927245276b24e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/encryption_spec.proto";
import "google/cloud/aiplatform/v1/io.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";
option java_multiple_files = true;
option java_outer_classname = "DatasetProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// A collection of DataItems and Annotations on them.
message Dataset {
  option (google.api.resource) = {
    type: "aiplatform.googleapis.com/Dataset"
    pattern: "projects/{project}/locations/{location}/datasets/{dataset}"
  };

  // Output only. The resource name of the Dataset.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. The user-defined name of the Dataset.
  // The name can be up to 128 characters long and can be consist of any UTF-8
  // characters.
  string display_name = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. The description of the Dataset.
  string description = 16 [(google.api.field_behavior) = OPTIONAL];

  // Required. Points to a YAML file stored on Google Cloud Storage describing additional
  // information about the Dataset.
  // The schema is defined as an OpenAPI 3.0.2 Schema Object.
  // The schema files that can be used here are found in
  // gs://google-cloud-aiplatform/schema/dataset/metadata/.
  string metadata_schema_uri = 3 [(google.api.field_behavior) = REQUIRED];

  // Required. Additional information about the Dataset.
  google.protobuf.Value metadata = 8 [(google.api.field_behavior) = REQUIRED];

  // Output only. Timestamp when this Dataset was created.
  google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Timestamp when this Dataset was last updated.
  google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Used to perform consistent read-modify-write updates. If not set, a blind
  // "overwrite" update happens.
  string etag = 6;

  // The labels with user-defined metadata to organize your Datasets.
  //
  // Label keys and values can be no longer than 64 characters
  // (Unicode codepoints), can only contain lowercase letters, numeric
  // characters, underscores and dashes. International characters are allowed.
  // No more than 64 user labels can be associated with one Dataset (System
  // labels are excluded).
  //
  // See https://goo.gl/xmQnxf for more information and examples of labels.
  // System reserved label keys are prefixed with "aiplatform.googleapis.com/"
  // and are immutable. Following system labels exist for each Dataset:
  //
  // * "aiplatform.googleapis.com/dataset_metadata_schema": output only, its
  //   value is the [metadata_schema's][google.cloud.aiplatform.v1.Dataset.metadata_schema_uri] title.
  map<string, string> labels = 7;

  // Customer-managed encryption key spec for a Dataset. If set, this Dataset
  // and all sub-resources of this Dataset will be secured by this key.
  EncryptionSpec encryption_spec = 11;
}

// Describes the location from where we import data into a Dataset, together
// with the labels that will be applied to the DataItems and the Annotations.
message ImportDataConfig {
  // The source of the input.
  oneof source {
    // The Google Cloud Storage location for the input content.
    GcsSource gcs_source = 1;
  }

  // Labels that will be applied to newly imported DataItems. If an identical
  // DataItem as one being imported already exists in the Dataset, then these
  // labels will be appended to these of the already existing one, and if labels
  // with identical key is imported before, the old label value will be
  // overwritten. If two DataItems are identical in the same import data
  // operation, the labels will be combined and if key collision happens in this
  // case, one of the values will be picked randomly. Two DataItems are
  // considered identical if their content bytes are identical (e.g. image bytes
  // or pdf bytes).
  // These labels will be overridden by Annotation labels specified inside index
  // file referenced by [import_schema_uri][google.cloud.aiplatform.v1.ImportDataConfig.import_schema_uri], e.g. jsonl file.
  map<string, string> data_item_labels = 2;

  // Required. Points to a YAML file stored on Google Cloud Storage describing the import
  // format. Validation will be done against the schema. The schema is defined
  // as an [OpenAPI 3.0.2 Schema
  // Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject).
  string import_schema_uri = 4 [(google.api.field_behavior) = REQUIRED];
}

// Describes what part of the Dataset is to be exported, the destination of
// the export and how to export.
message ExportDataConfig {
  // The destination of the output.
  oneof destination {
    // The Google Cloud Storage location where the output is to be written to.
    // In the given directory a new directory will be created with name:
    // `export-data-<dataset-display-name>-<timestamp-of-export-call>` where
    // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
    // output will be written into that directory. Inside that directory,
    // annotations with the same schema will be grouped into sub directories
    // which are named with the corresponding annotations' schema title. Inside
    // these sub directories, a schema.yaml will be created to describe the
    // output format.
    GcsDestination gcs_destination = 1;
  }

  // A filter on Annotations of the Dataset. Only Annotations on to-be-exported
  // DataItems(specified by [data_items_filter][]) that match this filter will
  // be exported. The filter syntax is the same as in
  // [ListAnnotations][google.cloud.aiplatform.v1.DatasetService.ListAnnotations].
  string annotations_filter = 2;
}