Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ValueType.STRUCT to Feature #5381

Merged
merged 6 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions packages/google-cloud-aiplatform/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ syntax = "proto3";
package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/cloud/aiplatform/v1/openapi.proto";
import "google/cloud/aiplatform/v1/tool.proto";
import "google/protobuf/duration.proto";
import "google/type/date.proto";
Expand Down Expand Up @@ -168,6 +169,15 @@ message GenerationConfig {
// otherwise the behavior is undefined.
// This is a preview feature.
string response_mime_type = 13 [(google.api.field_behavior) = OPTIONAL];

// Optional. The `Schema` object allows the definition of input and output
// data types. These types can be objects, but also primitives and arrays.
// Represents a select subset of an [OpenAPI 3.0 schema
// object](https://spec.openapis.org/oas/v3.0.3#schema).
// If set, a compatible response_mime_type must also be set.
// Compatible mimetypes:
// `application/json`: Schema for JSON response.
optional Schema response_schema = 16 [(google.api.field_behavior) = OPTIONAL];
}

// Safety settings.
Expand Down Expand Up @@ -368,57 +378,12 @@ message Candidate {
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// Segment of the content.
message Segment {
// Output only. The index of a Part object within its parent Content object.
int32 part_index = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Start index in the given Part, measured in bytes. Offset from
// the start of the Part, inclusive, starting at zero.
int32 start_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. End index in the given Part, measured in bytes. Offset from
// the start of the Part, exclusive, starting at zero.
int32 end_index = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Grounding attribution.
message GroundingAttribution {
// Attribution from the web.
message Web {
// Output only. URI reference of the attribution.
string uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Title of the attribution.
string title = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}

oneof reference {
// Optional. Attribution from the web.
Web web = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Output only. Segment of the content this attribution belongs to.
Segment segment = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Output only. Confidence score of the attribution. Ranges from 0
// to 1. 1 is the most confident.
optional float confidence_score = 2 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = OUTPUT_ONLY
];
}

// Metadata returned to client when grounding is enabled.
message GroundingMetadata {
// Optional. Web search queries for the following-up web search.
repeated string web_search_queries = 1
[(google.api.field_behavior) = OPTIONAL];

// Optional. List of grounding attributions.
repeated GroundingAttribution grounding_attributions = 2
[(google.api.field_behavior) = OPTIONAL];

// Optional. Google search entry for the following-up web searches.
optional SearchEntryPoint search_entry_point = 4
[(google.api.field_behavior) = OPTIONAL];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ message Dataset {
// is
// `projects/{project}/locations/{location}/metadataStores/{metadata_store}/artifacts/{artifact}`.
string metadata_artifact = 17 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Reference to the public base model last used by the dataset. Only
// set for prompt datasets.
string model_reference = 18 [(google.api.field_behavior) = OPTIONAL];
}

// Describes the location from where we import data into a Dataset, together
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@ service DatasetService {
};
}

// Updates a DatasetVersion.
rpc UpdateDatasetVersion(UpdateDatasetVersionRequest)
returns (DatasetVersion) {
option (google.api.http) = {
patch: "/v1/{dataset_version.name=projects/*/locations/*/datasets/*/datasetVersions/*}"
body: "dataset_version"
};
option (google.api.method_signature) = "dataset_version,update_mask";
}

// Deletes a Dataset version.
rpc DeleteDatasetVersion(DeleteDatasetVersionRequest)
returns (google.longrunning.Operation) {
Expand Down Expand Up @@ -292,6 +302,21 @@ message UpdateDatasetRequest {
[(google.api.field_behavior) = REQUIRED];
}

// Request message for
// [DatasetService.UpdateDatasetVersion][google.cloud.aiplatform.v1.DatasetService.UpdateDatasetVersion].
message UpdateDatasetVersionRequest {
// Required. The DatasetVersion which replaces the resource on the server.
DatasetVersion dataset_version = 1 [(google.api.field_behavior) = REQUIRED];

// Required. The update mask applies to the resource.
// For the `FieldMask` definition, see
// [google.protobuf.FieldMask][google.protobuf.FieldMask]. Updatable fields:
//
// * `display_name`
google.protobuf.FieldMask update_mask = 2
[(google.api.field_behavior) = REQUIRED];
}

// Request message for
// [DatasetService.ListDatasets][google.cloud.aiplatform.v1.DatasetService.ListDatasets].
message ListDatasetsRequest {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,8 @@ message DatasetVersion {
(google.api.field_behavior) = OUTPUT_ONLY,
(google.api.field_behavior) = REQUIRED
];

// Output only. Reference to the public base model last used by the dataset
// version. Only set for prompt dataset versions.
string model_reference = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/encryption_spec.proto";
import "google/cloud/aiplatform/v1/machine_resources.proto";
import "google/protobuf/timestamp.proto";

Expand Down Expand Up @@ -47,6 +48,29 @@ message DeploymentResourcePool {
DedicatedResources dedicated_resources = 2
[(google.api.field_behavior) = REQUIRED];

// Customer-managed encryption key spec for a DeploymentResourcePool. If set,
// this DeploymentResourcePool will be secured by this key. Endpoints and the
// DeploymentResourcePool they deploy in need to have the same EncryptionSpec.
EncryptionSpec encryption_spec = 5;

// The service account that the DeploymentResourcePool's container(s) run as.
// Specify the email address of the service account. If this service account
// is not specified, the container(s) run as a service account that doesn't
// have access to the resource project.
//
// Users deploying the Models to this DeploymentResourcePool must have the
// `iam.serviceAccounts.actAs` permission on this service account.
string service_account = 6;

// If the DeploymentResourcePool is deployed with custom-trained Models or
// AutoML Tabular Models, the container(s) of the DeploymentResourcePool will
// send `stderr` and `stdout` streams to Cloud Logging by default.
// Please note that the logs incur cost, which are subject to [Cloud Logging
// pricing](https://cloud.google.com/logging/pricing).
//
// User can disable container logging by setting this flag to true.
bool disable_container_logging = 7;

// Output only. Timestamp when this DeploymentResourcePool was created.
google.protobuf.Timestamp create_time = 4
[(google.api.field_behavior) = OUTPUT_ONLY];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ message Feature {

// Used for Feature that is bytes.
BYTES = 13;

// Used for Feature that is struct.
STRUCT = 14;
}

// Immutable. Name of the Feature.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ message FeatureGroup {

oneof source {
// Indicates that features for this group come from BigQuery Table/View.
// By default treats the source as a sparse time series source, which is
// required to have an entity_id and a feature_timestamp column in the
// source.
// By default treats the source as a sparse time series source. The BigQuery
// source table or view must have at least one entity ID column and a column
// named `feature_timestamp`.
BigQuery big_query = 7;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/encryption_spec.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
Expand Down Expand Up @@ -140,4 +141,8 @@ message FeatureOnlineStore {
// is different from common Vertex service endpoint.
DedicatedServingEndpoint dedicated_serving_endpoint = 10
[(google.api.field_behavior) = OPTIONAL];

// Optional. Customer-managed encryption key spec for data storage. If set,
// online store will be secured by this key.
EncryptionSpec encryption_spec = 13 [(google.api.field_behavior) = OPTIONAL];
}
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,30 @@ message FeatureValue {

// Bytes feature value.
bytes bytes_value = 13;

// A struct type feature value.
StructValue struct_value = 15;
}

// Metadata of feature value.
Metadata metadata = 14;
}

// Struct (or object) type feature value.
message StructValue {
// A list of field values.
repeated StructFieldValue values = 1;
}

// One field of a Struct (or object) type feature value.
message StructFieldValue {
// Name of the field in the struct feature.
string name = 1;

// The value for this field.
FeatureValue value = 2;
}

// Container for list of values.
message FeatureValueList {
// A list of feature values. All of them should be the same data type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,17 @@ message Index {

// A datapoint of Index.
message IndexDatapoint {
// Feature embedding vector for sparse index. An array of numbers whose values
// are located in the specified dimensions.
message SparseEmbedding {
// Required. The list of embedding values of the sparse vector.
repeated float values = 1 [(google.api.field_behavior) = REQUIRED];

// Required. The list of indexes for the embedding values of the sparse
// vector.
repeated int64 dimensions = 2 [(google.api.field_behavior) = REQUIRED];
}

// Restriction of a datapoint which describe its attributes(tokens) from each
// of several attribute categories(namespaces).
message Restriction {
Expand Down Expand Up @@ -209,10 +220,13 @@ message IndexDatapoint {
// Required. Unique identifier of the datapoint.
string datapoint_id = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Feature embedding vector. An array of numbers with the length of
// [NearestNeighborSearchConfig.dimensions].
// Required. Feature embedding vector for dense index. An array of numbers
// with the length of [NearestNeighborSearchConfig.dimensions].
repeated float feature_vector = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. Feature embedding vector for sparse index.
SparseEmbedding sparse_embedding = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. List of Restrict of the datapoint, used to perform "restricted
// searches" where boolean rule are used to filter the subset of the database
// eligible for matching. This uses categorical tokens. See:
Expand All @@ -232,9 +246,12 @@ message IndexDatapoint {

// Stats of the Index.
message IndexStats {
// Output only. The number of vectors in the Index.
// Output only. The number of dense vectors in the Index.
int64 vectors_count = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The number of sparse vectors in the Index.
int64 sparse_vectors_count = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The number of shards in the Index.
int32 shards_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,8 @@ message NearestNeighborSearchOperationMetadata {
// The embedding id is not valid.
INVALID_EMBEDDING_ID = 5;

// The size of the embedding vectors does not match with the specified
// dimension.
// The size of the dense embedding vectors does not match with the
// specified dimension.
EMBEDDING_SIZE_MISMATCH = 6;

// The `namespace` field is missing.
Expand All @@ -337,8 +337,17 @@ message NearestNeighborSearchOperationMetadata {
// File is not in UTF_8 format.
INVALID_ENCODING = 13;

// Error parsing sparse dimensions field.
INVALID_SPARSE_DIMENSIONS = 14;

// Token restrict value is invalid.
INVALID_TOKEN_VALUE = 15;

// Invalid sparse embedding.
INVALID_SPARSE_EMBEDDING = 16;

// Invalid dense embedding.
INVALID_EMBEDDING = 17;
}

// The error type of this record.
Expand Down Expand Up @@ -373,6 +382,12 @@ message NearestNeighborSearchOperationMetadata {
// invalid records that couldn't be parsed.
// Up to 50 partial errors will be reported.
repeated RecordError partial_errors = 4;

// Number of sparse records in this file that were successfully processed.
int64 valid_sparse_record_count = 5;

// Number of sparse records in this file we skipped due to validate errors.
int64 invalid_sparse_record_count = 6;
}

// The validation stats of the content (per file) to be inserted or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ message FindNeighborsRequest {
// A query to find a number of the nearest neighbors (most similar vectors)
// of a vector.
message Query {
// Parameters for RRF algorithm that combines search results.
message RRF {
// Required. Users can provide an alpha value to give more weight to dense
// vs sparse results. For example, if the alpha is 0, we only return
// sparse and if the alpha is 1, we only return dense.
float alpha = 1 [(google.api.field_behavior) = REQUIRED];
}

oneof ranking {
// Optional. Represents RRF algorithm that combines search results.
RRF rrf = 6 [(google.api.field_behavior) = OPTIONAL];
}

// Required. The datapoint/vector whose nearest neighbors should be searched
// for.
IndexDatapoint datapoint = 1 [(google.api.field_behavior) = REQUIRED];
Expand Down Expand Up @@ -131,8 +144,11 @@ message FindNeighborsResponse {
// fields are populated.
IndexDatapoint datapoint = 1;

// The distance between the neighbor and the query vector.
// The distance between the neighbor and the dense embedding query.
double distance = 2;

// The distance between the neighbor and the query sparse_embedding.
double sparse_distance = 3;
}

// Nearest neighbors for one query.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ message MetadataStore {
int64 disk_utilization_bytes = 1;
}

// Represents Dataplex integration settings.
message DataplexConfig {
// Optional. Whether or not Data Lineage synchronization is enabled for
// Vertex Pipelines.
bool enabled_pipelines_lineage = 1 [(google.api.field_behavior) = OPTIONAL];
}

// Output only. The resource name of the MetadataStore instance.
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

Expand All @@ -64,4 +71,7 @@ message MetadataStore {

// Output only. State information of the MetadataStore.
MetadataStoreState state = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Dataplex integration settings.
DataplexConfig dataplex_config = 8 [(google.api.field_behavior) = OPTIONAL];
}
Loading
Loading