Skip to content

Commit

Permalink
cty: Allow capsule types inside set values
Browse files Browse the repository at this point in the history
Previously cty would just panic if a caller tried to put values inside a
set whose element type is or contains a capsule type, because the set
rules lacked a hashing algorithm for those.

Now we have a hash implementation for capsule types. Ideally a capsule
type intended for use in sets should have a CapsuleOps that defines both
Equals and HashKey for the set implementation to rely on, but there is
also a less-efficient fallback behavior for types without HashKey (just
bucket them all together and scan) and a less-useful fallback behavior
for types without Equals (pointer equality of the encapsulated pointer,
as usual).

This now makes it safe to use capsule types in a system that also makes
use of sets of arbitrary types.
  • Loading branch information
apparentlymart committed Jun 3, 2022
1 parent 74095df commit b66d00a
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ This release contains some changes to some aspects of the API that are either le

Since type parameters are not supported by earlier versions of the Go compiler, callers must upgrade to Go 1.18 before using cty v1.11.0 or later.

## Other changes in this release

* `cty`: It's now possible to use capsule types in the elements of sets. Previously `cty` would panic if asked to construct a value of a set type whose element type either is or contains a capsule type, but there is now explicit support for storing encapsulated values in sets and optional (but recommended) support for a custom hashing function per type in order to improve performance for sets with a large number of elements.

# 1.10.0 (November 2, 2021)

* `cty`: The documented definition and comparison logic of `cty.Number` is now refined to acknowledge that its true range is limited only to values that have both a binary floating point and decimal representation, because `cty` values are primarily designed to traverse JSON serialization where numbers are always defined as decimal strings.
Expand Down
12 changes: 12 additions & 0 deletions cty/capsule_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ type CapsuleOps struct {
// pointer identity of the encapsulated value.
RawEquals func(a, b interface{}) bool

// HashKey provides a hashing function for values of the corresponding
// capsule type. If defined, cty will use the resulting hashes as part
// of the implementation of sets whose element type is or contains the
// corresponding capsule type.
//
// If a capsule type defines HashValue then the function _must_ return
// an equal hash value for any two values that would cause Equals or
// RawEquals to return true when given those values. If a given type
// does not uphold that assumption then sets including this type will
// not behave correctly.
HashKey func(v interface{}) string

// ConversionFrom can provide conversions from the corresponding type to
// some other type when values of the corresponding type are used with
// the "convert" package. (The main cty package does not use this operation.)
Expand Down
21 changes: 20 additions & 1 deletion cty/set_internals.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,25 @@ func appendSetHashBytes(val Value, buf *bytes.Buffer, marks ValueMarks) {
return
}

if val.ty.IsCapsuleType() {
buf.WriteRune('«')
ops := val.ty.CapsuleOps()
if ops != nil && ops.HashKey != nil {
key := ops.HashKey(val.EncapsulatedValue())
buf.WriteString(fmt.Sprintf("%q", key))
} else {
// If there isn't an explicit hash implementation then we'll
// just generate the same hash value for every value of this
// type, which is logically fine but less efficient for
// larger sets because we'll have to bucket all values
// together and scan over them with Equals to determine
// set membership.
buf.WriteRune('?')
}
buf.WriteRune('»')
return
}

// should never get down here
panic("unsupported type in set hash")
panic(fmt.Sprintf("unsupported type %#v in set hash", val.ty))
}
30 changes: 30 additions & 0 deletions cty/set_internals_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,30 @@ package cty
import (
"fmt"
"math/big"
"reflect"
"testing"

"github.com/zclconf/go-cty/cty/set"
)

func TestSetHashBytes(t *testing.T) {
type encapsulated struct {
name string
}
typeWithHash := CapsuleWithOps("with hash function", reflect.TypeOf(encapsulated{}), &CapsuleOps{
RawEquals: func(a, b interface{}) bool {
return a.(*encapsulated).name == b.(*encapsulated).name
},
HashKey: func(v interface{}) string {
return v.(*encapsulated).name
},
})
typeWithoutHash := CapsuleWithOps("without hash function", reflect.TypeOf(encapsulated{}), &CapsuleOps{
RawEquals: func(a, b interface{}) bool {
return a.(*encapsulated).name == b.(*encapsulated).name
},
})

tests := []struct {
value Value
want string
Expand Down Expand Up @@ -160,6 +178,18 @@ func TestSetHashBytes(t *testing.T) {
`<54;"ermintrude";>`,
NewValueMarks(1, 2),
},

// Encapsulated values
{
CapsuleVal(typeWithHash, &encapsulated{"boop"}),
`«"boop"»`, // we use the guillemets to differentiate this from a cty.String hash
nil,
},
{
CapsuleVal(typeWithoutHash, &encapsulated{"boop"}),
`«?»`, // we use the guillemets to differentiate a known value without a hash func from an unknown value
nil,
},
}

for _, test := range tests {
Expand Down
93 changes: 93 additions & 0 deletions cty/set_type_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package cty

import (
"reflect"
"sort"
"testing"

"github.com/google/go-cmp/cmp"
)

func TestSetOperations(t *testing.T) {
Expand Down Expand Up @@ -36,5 +40,94 @@ func TestSetOperations(t *testing.T) {
t.Errorf("missing element %q", wantStr)
}
}
}

func TestSetOfCapsuleType(t *testing.T) {
type capsuleTypeForSetTests struct {
name string
}

encapsulatedNames := func(vals []Value) []string {
if len(vals) == 0 {
return nil
}
ret := make([]string, len(vals))
for i, v := range vals {
ret[i] = v.EncapsulatedValue().(*capsuleTypeForSetTests).name
}
sort.Strings(ret)
return ret
}

typeWithHash := CapsuleWithOps("with hash function", reflect.TypeOf(capsuleTypeForSetTests{}), &CapsuleOps{
RawEquals: func(a, b interface{}) bool {
return a.(*capsuleTypeForSetTests).name == b.(*capsuleTypeForSetTests).name
},
HashKey: func(v interface{}) string {
return v.(*capsuleTypeForSetTests).name
},
})
typeWithoutHash := CapsuleWithOps("without hash function", reflect.TypeOf(capsuleTypeForSetTests{}), &CapsuleOps{
RawEquals: func(a, b interface{}) bool {
return a.(*capsuleTypeForSetTests).name == b.(*capsuleTypeForSetTests).name
},
})
typeWithoutEquals := Capsule("without hash function", reflect.TypeOf(capsuleTypeForSetTests{}))

t.Run("with hash", func(t *testing.T) {
// When we provide a hashing function the set implementation can
// optimize its internal storage by spreading values over multiple
// smaller buckets.
v := SetVal([]Value{
CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"a"}),
CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"b"}),
CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"a"}),
CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"c"}),
})
got := encapsulatedNames(v.AsValueSlice())
want := []string{"a", "b", "c"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("wrong element names\n%s", diff)
}
})
t.Run("without hash", func(t *testing.T) {
// When we don't provide a hashing function the outward behavior
// should still be identical but the internal storage won't be
// so efficient, due to everything living in one big bucket and
// so we have to scan over all values to test if a particular
// element is present.
v := SetVal([]Value{
CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"a"}),
CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"b"}),
CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"a"}),
CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"c"}),
})
got := encapsulatedNames(v.AsValueSlice())
want := []string{"a", "b", "c"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("wrong element names\n%s", diff)
}
})
t.Run("without equals", func(t *testing.T) {
// When we don't even have an equals function we can still store
// values in the set but we will use the default capsule type
// behavior of comparing by pointer equality. That means that
// the name field doesn't coalesce anymore, but two instances
// of this same d should.
d := &capsuleTypeForSetTests{"d"}
v := SetVal([]Value{
CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"a"}),
CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"b"}),
CapsuleVal(typeWithoutEquals, d),
CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"a"}),
CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"c"}),
CapsuleVal(typeWithoutEquals, d),
})
got := encapsulatedNames(v.AsValueSlice())
want := []string{"a", "a", "b", "c", "d"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("wrong element names\n%s", diff)
}
})

}

0 comments on commit b66d00a

Please sign in to comment.