From b66d00abfee0e4a1e730fd1e7e2147df61aa1e4b Mon Sep 17 00:00:00 2001
From: Martin Atkins <mart@degeneration.co.uk>
Date: Fri, 3 Jun 2022 16:00:52 -0700
Subject: [PATCH] cty: Allow capsule types inside set values

Previously cty would just panic if a caller tried to put values inside a
set whose element type is or contains a capsule type, because the set
rules lacked a hashing algorithm for those.

Now we have a hash implementation for capsule types. Ideally a capsule
type intended for use in sets should have a CapsuleOps that defines both
Equals and HashKey for the set implementation to rely on, but there is
also a less-efficient fallback behavior for types without HashKey (just
bucket them all together and scan) and a less-useful fallback behavior
for types without Equals (pointer equality of the encapsulated pointer,
as usual).

This now makes it safe to use capsule types in a system that also makes
use of sets of arbitrary types.
---
 CHANGELOG.md              |  4 ++
 cty/capsule_ops.go        | 12 +++++
 cty/set_internals.go      | 21 ++++++++-
 cty/set_internals_test.go | 30 +++++++++++++
 cty/set_type_test.go      | 93 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 159 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 935e9977..092f15a9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,10 @@ This release contains some changes to some aspects of the API that are either le
 
     Since type parameters are not supported by earlier versions of the Go compiler, callers must upgrade to Go 1.18 before using cty v1.11.0 or later.
 
+## Other changes in this release
+
+* `cty`: It's now possible to use capsule types in the elements of sets. Previously `cty` would panic if asked to construct a value of a set type whose element type either is or contains a capsule type, but there is now explicit support for storing encapsulated values in sets and optional (but recommended) support for a custom hashing function per type in order to improve performance for sets with a large number of elements.
+
 # 1.10.0 (November 2, 2021)
 
 * `cty`: The documented definition and comparison logic of `cty.Number` is now refined to acknowledge that its true range is limited only to values that have both a binary floating point and decimal representation, because `cty` values are primarily designed to traverse JSON serialization where numbers are always defined as decimal strings.
diff --git a/cty/capsule_ops.go b/cty/capsule_ops.go
index 3ff6855e..102d26fa 100644
--- a/cty/capsule_ops.go
+++ b/cty/capsule_ops.go
@@ -49,6 +49,18 @@ type CapsuleOps struct {
 	// pointer identity of the encapsulated value.
 	RawEquals func(a, b interface{}) bool
 
+	// HashKey provides a hashing function for values of the corresponding
+	// capsule type. If defined, cty will use the resulting hashes as part
+	// of the implementation of sets whose element type is or contains the
+	// corresponding capsule type.
+	//
+	// If a capsule type defines HashValue then the function _must_ return
+	// an equal hash value for any two values that would cause Equals or
+	// RawEquals to return true when given those values. If a given type
+	// does not uphold that assumption then sets including this type will
+	// not behave correctly.
+	HashKey func(v interface{}) string
+
 	// ConversionFrom can provide conversions from the corresponding type to
 	// some other type when values of the corresponding type are used with
 	// the "convert" package. (The main cty package does not use this operation.)
diff --git a/cty/set_internals.go b/cty/set_internals.go
index 6faa288a..7b3d4250 100644
--- a/cty/set_internals.go
+++ b/cty/set_internals.go
@@ -254,6 +254,25 @@ func appendSetHashBytes(val Value, buf *bytes.Buffer, marks ValueMarks) {
 		return
 	}
 
+	if val.ty.IsCapsuleType() {
+		buf.WriteRune('«')
+		ops := val.ty.CapsuleOps()
+		if ops != nil && ops.HashKey != nil {
+			key := ops.HashKey(val.EncapsulatedValue())
+			buf.WriteString(fmt.Sprintf("%q", key))
+		} else {
+			// If there isn't an explicit hash implementation then we'll
+			// just generate the same hash value for every value of this
+			// type, which is logically fine but less efficient for
+			// larger sets because we'll have to bucket all values
+			// together and scan over them with Equals to determine
+			// set membership.
+			buf.WriteRune('?')
+		}
+		buf.WriteRune('»')
+		return
+	}
+
 	// should never get down here
-	panic("unsupported type in set hash")
+	panic(fmt.Sprintf("unsupported type %#v in set hash", val.ty))
 }
diff --git a/cty/set_internals_test.go b/cty/set_internals_test.go
index 95d848c6..99e22bdd 100644
--- a/cty/set_internals_test.go
+++ b/cty/set_internals_test.go
@@ -3,12 +3,30 @@ package cty
 import (
 	"fmt"
 	"math/big"
+	"reflect"
 	"testing"
 
 	"github.com/zclconf/go-cty/cty/set"
 )
 
 func TestSetHashBytes(t *testing.T) {
+	type encapsulated struct {
+		name string
+	}
+	typeWithHash := CapsuleWithOps("with hash function", reflect.TypeOf(encapsulated{}), &CapsuleOps{
+		RawEquals: func(a, b interface{}) bool {
+			return a.(*encapsulated).name == b.(*encapsulated).name
+		},
+		HashKey: func(v interface{}) string {
+			return v.(*encapsulated).name
+		},
+	})
+	typeWithoutHash := CapsuleWithOps("without hash function", reflect.TypeOf(encapsulated{}), &CapsuleOps{
+		RawEquals: func(a, b interface{}) bool {
+			return a.(*encapsulated).name == b.(*encapsulated).name
+		},
+	})
+
 	tests := []struct {
 		value     Value
 		want      string
@@ -160,6 +178,18 @@ func TestSetHashBytes(t *testing.T) {
 			`<54;"ermintrude";>`,
 			NewValueMarks(1, 2),
 		},
+
+		// Encapsulated values
+		{
+			CapsuleVal(typeWithHash, &encapsulated{"boop"}),
+			`«"boop"»`, // we use the guillemets to differentiate this from a cty.String hash
+			nil,
+		},
+		{
+			CapsuleVal(typeWithoutHash, &encapsulated{"boop"}),
+			`«?»`, // we use the guillemets to differentiate a known value without a hash func from an unknown value
+			nil,
+		},
 	}
 
 	for _, test := range tests {
diff --git a/cty/set_type_test.go b/cty/set_type_test.go
index 5c495864..3b4a8103 100644
--- a/cty/set_type_test.go
+++ b/cty/set_type_test.go
@@ -1,7 +1,11 @@
 package cty
 
 import (
+	"reflect"
+	"sort"
 	"testing"
+
+	"github.com/google/go-cmp/cmp"
 )
 
 func TestSetOperations(t *testing.T) {
@@ -36,5 +40,94 @@ func TestSetOperations(t *testing.T) {
 			t.Errorf("missing element %q", wantStr)
 		}
 	}
+}
+
+func TestSetOfCapsuleType(t *testing.T) {
+	type capsuleTypeForSetTests struct {
+		name string
+	}
+
+	encapsulatedNames := func(vals []Value) []string {
+		if len(vals) == 0 {
+			return nil
+		}
+		ret := make([]string, len(vals))
+		for i, v := range vals {
+			ret[i] = v.EncapsulatedValue().(*capsuleTypeForSetTests).name
+		}
+		sort.Strings(ret)
+		return ret
+	}
+
+	typeWithHash := CapsuleWithOps("with hash function", reflect.TypeOf(capsuleTypeForSetTests{}), &CapsuleOps{
+		RawEquals: func(a, b interface{}) bool {
+			return a.(*capsuleTypeForSetTests).name == b.(*capsuleTypeForSetTests).name
+		},
+		HashKey: func(v interface{}) string {
+			return v.(*capsuleTypeForSetTests).name
+		},
+	})
+	typeWithoutHash := CapsuleWithOps("without hash function", reflect.TypeOf(capsuleTypeForSetTests{}), &CapsuleOps{
+		RawEquals: func(a, b interface{}) bool {
+			return a.(*capsuleTypeForSetTests).name == b.(*capsuleTypeForSetTests).name
+		},
+	})
+	typeWithoutEquals := Capsule("without hash function", reflect.TypeOf(capsuleTypeForSetTests{}))
+
+	t.Run("with hash", func(t *testing.T) {
+		// When we provide a hashing function the set implementation can
+		// optimize its internal storage by spreading values over multiple
+		// smaller buckets.
+		v := SetVal([]Value{
+			CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"a"}),
+			CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"b"}),
+			CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"a"}),
+			CapsuleVal(typeWithHash, &capsuleTypeForSetTests{"c"}),
+		})
+		got := encapsulatedNames(v.AsValueSlice())
+		want := []string{"a", "b", "c"}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("wrong element names\n%s", diff)
+		}
+	})
+	t.Run("without hash", func(t *testing.T) {
+		// When we don't provide a hashing function the outward behavior
+		// should still be identical but the internal storage won't be
+		// so efficient, due to everything living in one big bucket and
+		// so we have to scan over all values to test if a particular
+		// element is present.
+		v := SetVal([]Value{
+			CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"a"}),
+			CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"b"}),
+			CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"a"}),
+			CapsuleVal(typeWithoutHash, &capsuleTypeForSetTests{"c"}),
+		})
+		got := encapsulatedNames(v.AsValueSlice())
+		want := []string{"a", "b", "c"}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("wrong element names\n%s", diff)
+		}
+	})
+	t.Run("without equals", func(t *testing.T) {
+		// When we don't even have an equals function we can still store
+		// values in the set but we will use the default capsule type
+		// behavior of comparing by pointer equality. That means that
+		// the name field doesn't coalesce anymore, but two instances
+		// of this same d should.
+		d := &capsuleTypeForSetTests{"d"}
+		v := SetVal([]Value{
+			CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"a"}),
+			CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"b"}),
+			CapsuleVal(typeWithoutEquals, d),
+			CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"a"}),
+			CapsuleVal(typeWithoutEquals, &capsuleTypeForSetTests{"c"}),
+			CapsuleVal(typeWithoutEquals, d),
+		})
+		got := encapsulatedNames(v.AsValueSlice())
+		want := []string{"a", "a", "b", "c", "d"}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("wrong element names\n%s", diff)
+		}
+	})
 
 }