Skip to content

Commit

Permalink
feat(bigquery): add dataset/table collation (#7235)
Browse files Browse the repository at this point in the history
  • Loading branch information
alvarowolfx authored Jan 13, 2023
1 parent 1d165ff commit 9f7bbeb
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 0 deletions.
19 changes: 19 additions & 0 deletions bigquery/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ type DatasetMetadata struct {
// all newly created partitioned tables in the dataset.
DefaultPartitionExpiration time.Duration

// Defines the default collation specification of future tables
// created in the dataset. If a table is created in this dataset without
// table-level default collation, then the table inherits the dataset default
// collation, which is applied to the string fields that do not have explicit
// collation specified. A change to this field affects only tables created
// afterwards, and does not alter the existing tables.
// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
DefaultCollation string

// These fields are read-only.
CreationTime time.Time
LastModifiedTime time.Time // When the dataset or any of its tables were modified.
Expand Down Expand Up @@ -104,6 +113,10 @@ type DatasetMetadataToUpdate struct {
// in the dataset.
DefaultEncryptionConfig *EncryptionConfig

// Defines the default collation specification of future tables
// created in the dataset.
DefaultCollation optional.String

// The entire access list. It is not possible to replace individual entries.
Access []*AccessEntry

Expand Down Expand Up @@ -174,6 +187,7 @@ func (dm *DatasetMetadata) toBQ() (*bq.Dataset, error) {
ds.Location = dm.Location
ds.DefaultTableExpirationMs = int64(dm.DefaultTableExpiration / time.Millisecond)
ds.DefaultPartitionExpirationMs = int64(dm.DefaultPartitionExpiration / time.Millisecond)
ds.DefaultCollation = string(dm.DefaultCollation)
ds.Labels = dm.Labels
var err error
ds.Access, err = accessListToBQ(dm.Access)
Expand Down Expand Up @@ -259,6 +273,7 @@ func bqToDatasetMetadata(d *bq.Dataset, c *Client) (*DatasetMetadata, error) {
LastModifiedTime: unixMillisToTime(d.LastModifiedTime),
DefaultTableExpiration: time.Duration(d.DefaultTableExpirationMs) * time.Millisecond,
DefaultPartitionExpiration: time.Duration(d.DefaultPartitionExpirationMs) * time.Millisecond,
DefaultCollation: d.DefaultCollation,
DefaultEncryptionConfig: bqToEncryptionConfig(d.DefaultEncryptionConfiguration),
Description: d.Description,
Name: d.FriendlyName,
Expand Down Expand Up @@ -344,6 +359,10 @@ func (dm *DatasetMetadataToUpdate) toBQ() (*bq.Dataset, error) {
ds.DefaultPartitionExpirationMs = int64(dur / time.Millisecond)
}
}
if dm.DefaultCollation != nil {
ds.DefaultCollation = optional.ToString(dm.DefaultCollation)
forceSend("DefaultCollation")
}
if dm.DefaultEncryptionConfig != nil {
ds.DefaultEncryptionConfiguration = dm.DefaultEncryptionConfig.toBQ()
ds.DefaultEncryptionConfiguration.ForceSendFields = []string{"KmsKeyName"}
Expand Down
30 changes: 30 additions & 0 deletions bigquery/dataset_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,36 @@ func TestIntegration_DatasetUpdateDefaultPartitionExpiration(t *testing.T) {
}
}

func TestIntegration_DatasetUpdateDefaultCollation(t *testing.T) {
if client == nil {
t.Skip("Integration tests skipped")
}
ctx := context.Background()
_, err := dataset.Metadata(ctx)
if err != nil {
t.Fatal(err)
}
caseInsensitiveCollation := "und:ci"
// Set the default collation
md, err := dataset.Update(ctx, DatasetMetadataToUpdate{
DefaultCollation: caseInsensitiveCollation,
}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseInsensitiveCollation {
t.Fatalf("got %q, want und:ci", md.DefaultCollation)
}
// Omitting DefaultCollation doesn't change it.
md, err = dataset.Update(ctx, DatasetMetadataToUpdate{Name: "xyz"}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseInsensitiveCollation {
t.Fatalf("got %q, want und:ci", md.DefaultCollation)
}
}

func TestIntegration_DatasetUpdateAccess(t *testing.T) {
if client == nil {
t.Skip("Integration tests skipped")
Expand Down
9 changes: 9 additions & 0 deletions bigquery/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ type FieldSchema struct {
// - Struct or array composed with the above allowed functions, for example:
// [CURRENT_DATE(), DATE '2020-01-01']"
DefaultValueExpression string

// Collation can be set only when the type of field is STRING.
// The following values are supported:
// - 'und:ci': undetermined locale, case insensitive.
// - '': empty string. Default to case-sensitive behavior.
// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
Collation string
}

func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
Expand All @@ -153,6 +160,7 @@ func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
Precision: fs.Precision,
Scale: fs.Scale,
DefaultValueExpression: fs.DefaultValueExpression,
Collation: string(fs.Collation),
}

if fs.Repeated {
Expand Down Expand Up @@ -212,6 +220,7 @@ func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
Precision: tfs.Precision,
Scale: tfs.Scale,
DefaultValueExpression: tfs.DefaultValueExpression,
Collation: tfs.Collation,
}

for _, f := range tfs.Fields {
Expand Down
28 changes: 28 additions & 0 deletions bigquery/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,34 @@ func TestSchemaConversion(t *testing.T) {
},
},
},
{
// collation values
bqSchema: &bq.TableSchema{
Fields: []*bq.TableFieldSchema{
{
Name: "name",
Type: "STRING",
Collation: "und:ci",
},
{
Name: "another_name",
Type: "STRING",
Collation: "",
},
}},
schema: Schema{
{
Name: "name",
Type: StringFieldType,
Collation: "und:ci",
},
{
Name: "another_name",
Type: StringFieldType,
Collation: "",
},
},
},
{
// policy tags
bqSchema: &bq.TableSchema{
Expand Down
21 changes: 21 additions & 0 deletions bigquery/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ type TableMetadata struct {
// ETag is the ETag obtained when reading metadata. Pass it to Table.Update to
// ensure that the metadata hasn't changed since it was read.
ETag string

// Defines the default collation specification of new STRING fields
// in the table. During table creation or update, if a STRING field is added
// to this table without explicit collation specified, then the table inherits
// the table default collation. A change to this field affects only fields
// added afterwards, and does not alter the existing fields.
// The following values are supported:
// - 'und:ci': undetermined locale, case insensitive.
// - '': empty string. Default to case-sensitive behavior.
// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
DefaultCollation string
}

// TableCreateDisposition specifies the circumstances under which destination table will be created.
Expand Down Expand Up @@ -663,6 +674,7 @@ func (tm *TableMetadata) toBQ() (*bq.Table, error) {
if tm.ETag != "" {
return nil, errors.New("cannot set ETag on create")
}
t.DefaultCollation = string(tm.DefaultCollation)
return t, nil
}

Expand Down Expand Up @@ -743,6 +755,7 @@ func bqToTableMetadata(t *bq.Table, c *Client) (*TableMetadata, error) {
CreationTime: unixMillisToTime(t.CreationTime),
LastModifiedTime: unixMillisToTime(int64(t.LastModifiedTime)),
ETag: t.Etag,
DefaultCollation: t.DefaultCollation,
EncryptionConfig: bqToEncryptionConfig(t.EncryptionConfiguration),
RequirePartitionFilter: t.RequirePartitionFilter,
SnapshotDefinition: bqToSnapshotDefinition(t.SnapshotDefinition, c),
Expand Down Expand Up @@ -924,6 +937,10 @@ func (tm *TableMetadataToUpdate) toBQ() (*bq.Table, error) {
t.View.UseLegacySql = optional.ToBool(tm.UseLegacySQL)
t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
}
if tm.DefaultCollation != nil {
t.DefaultCollation = optional.ToString(tm.DefaultCollation)
forceSend("DefaultCollation")
}
labels, forces, nulls := tm.update()
t.Labels = labels
t.ForceSendFields = append(t.ForceSendFields, forces...)
Expand Down Expand Up @@ -997,6 +1014,10 @@ type TableMetadataToUpdate struct {
// elimination when referenced in a query.
RequirePartitionFilter optional.Bool

// Defines the default collation specification of new STRING fields
// in the table.
DefaultCollation optional.String

labelUpdater
}

Expand Down
69 changes: 69 additions & 0 deletions bigquery/table_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,3 +592,72 @@ func TestIntegration_TableUseLegacySQL(t *testing.T) {
_ = view.Delete(ctx)
}
}

func TestIntegration_TableDefaultCollation(t *testing.T) {
// Test DefaultCollation for Table.Create and Table.Update
if client == nil {
t.Skip("Integration tests skipped")
}
ctx := context.Background()
table := dataset.Table(tableIDs.New())
caseInsensitiveCollation := "und:ci"
caseSensitiveCollation := ""
err := table.Create(context.Background(), &TableMetadata{
Schema: schema,
DefaultCollation: caseInsensitiveCollation,
ExpirationTime: testTableExpiration,
})
if err != nil {
t.Fatal(err)
}
defer table.Delete(ctx)
md, err := table.Metadata(ctx)
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseInsensitiveCollation {
t.Fatalf("expected default collation to be %q, but found %q", caseInsensitiveCollation, md.DefaultCollation)
}
for _, field := range md.Schema {
if field.Type == StringFieldType {
if field.Collation != caseInsensitiveCollation {
t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name)
}
}
}

// Update table DefaultCollation to case-sensitive
md, err = table.Update(ctx, TableMetadataToUpdate{
DefaultCollation: caseSensitiveCollation,
}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseSensitiveCollation {
t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation)
}

// Add a field with different case-insensitive collation
updatedSchema := md.Schema
updatedSchema = append(updatedSchema, &FieldSchema{
Name: "another_name",
Type: StringFieldType,
Collation: caseInsensitiveCollation,
})
md, err = table.Update(ctx, TableMetadataToUpdate{
Schema: updatedSchema,
}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseSensitiveCollation {
t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation)
}
for _, field := range md.Schema {
if field.Type == StringFieldType {
if field.Collation != caseInsensitiveCollation {
t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name)
}
}
}
}

0 comments on commit 9f7bbeb

Please sign in to comment.