From 8ca71171985f600acebd2f08a64dbb9e5975f891 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Thu, 9 Nov 2023 11:16:47 +0000 Subject: [PATCH 1/5] A really dirty rip-out of the subtree revisions This confirms that removing this will actually work in practice. This commit is a breaking change and can't be merged as-is. The lesson to be learned is that this is the minimum change that would be required to make this work if there were no existing clients using Trillian. --- storage/mysql/log_storage.go | 2 +- storage/mysql/schema/storage.sql | 3 +-- storage/mysql/tree_storage.go | 22 ++++++++-------------- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/storage/mysql/log_storage.go b/storage/mysql/log_storage.go index 1c3795d5c6..e4d6432fba 100644 --- a/storage/mysql/log_storage.go +++ b/storage/mysql/log_storage.go @@ -324,7 +324,7 @@ type logTreeTX struct { func (t *logTreeTX) GetMerkleNodes(ctx context.Context, ids []compact.NodeID) ([]tree.Node, error) { t.treeTX.mu.Lock() defer t.treeTX.mu.Unlock() - return t.subtreeCache.GetNodes(ids, t.getSubtreesAtRev(ctx, t.readRev)) + return t.subtreeCache.GetNodes(ids, t.getSubtreesFunc(ctx)) } func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime time.Time) ([]*trillian.LogLeaf, error) { diff --git a/storage/mysql/schema/storage.sql b/storage/mysql/schema/storage.sql index 8de7447ef2..6cd2ef5d12 100644 --- a/storage/mysql/schema/storage.sql +++ b/storage/mysql/schema/storage.sql @@ -40,10 +40,9 @@ CREATE TABLE IF NOT EXISTS Subtree( TreeId BIGINT NOT NULL, SubtreeId VARBINARY(255) NOT NULL, Nodes MEDIUMBLOB NOT NULL, - SubtreeRevision INTEGER NOT NULL, -- Key columns must be in ASC order in order to benefit from group-by/min-max -- optimization in MySQL. - PRIMARY KEY(TreeId, SubtreeId, SubtreeRevision), + PRIMARY KEY(TreeId, SubtreeId), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE ); diff --git a/storage/mysql/tree_storage.go b/storage/mysql/tree_storage.go index c9bae1b5a4..128d120c6f 100644 --- a/storage/mysql/tree_storage.go +++ b/storage/mysql/tree_storage.go @@ -34,7 +34,7 @@ import ( // These statements are fixed const ( - insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) ` + placeholderSQL + insertSubtreeMultiSQL = `REPLACE INTO Subtree(TreeId, SubtreeId, Nodes) ` + placeholderSQL insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) VALUES(?,?,?,?,?,?)` @@ -138,7 +138,7 @@ func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, num int) (*sql.St } func (m *mySQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?, ?)", "(?, ?, ?, ?)") + return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?)", "(?, ?, ?)") } func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { @@ -172,7 +172,7 @@ type treeTX struct { writeRevision int64 } -func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]byte) ([]*storagepb.SubtreeProto, error) { +func (t *treeTX) getSubtrees(ctx context.Context, ids [][]byte) ([]*storagepb.SubtreeProto, error) { klog.V(2).Infof("getSubtrees(len(ids)=%d)", len(ids)) klog.V(4).Infof("getSubtrees(") if len(ids) == 0 { @@ -190,7 +190,8 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by } }() - args := make([]interface{}, 0, len(ids)+3) + args := make([]interface{}, 0, len(ids)+1) + args = append(args, t.treeID) // populate args with ids. for _, id := range ids { @@ -198,10 +199,6 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by args = append(args, id) } - args = append(args, t.treeID) - args = append(args, treeRevision) - args = append(args, t.treeID) - rows, err := stx.QueryContext(ctx, args...) if err != nil { klog.Warningf("Failed to get merkle subtrees: %s", err) @@ -295,7 +292,6 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre args = append(args, t.treeID) args = append(args, s.Prefix) args = append(args, subtreeBytes) - args = append(args, t.writeRevision) } tmpl, err := t.ts.setSubtreeStmt(ctx, len(subtrees)) @@ -339,18 +335,16 @@ func checkResultOkAndRowCountIs(res sql.Result, err error, count int64) error { return nil } -// getSubtreesAtRev returns a GetSubtreesFunc which reads at the passed in rev. -func (t *treeTX) getSubtreesAtRev(ctx context.Context, rev int64) cache.GetSubtreesFunc { +func (t *treeTX) getSubtreesFunc(ctx context.Context) cache.GetSubtreesFunc { return func(ids [][]byte) ([]*storagepb.SubtreeProto, error) { - return t.getSubtrees(ctx, rev, ids) + return t.getSubtrees(ctx, ids) } } func (t *treeTX) SetMerkleNodes(ctx context.Context, nodes []tree.Node) error { t.mu.Lock() defer t.mu.Unlock() - rev := t.writeRevision - 1 - return t.subtreeCache.SetNodes(nodes, t.getSubtreesAtRev(ctx, rev)) + return t.subtreeCache.SetNodes(nodes, t.getSubtreesFunc(ctx)) } func (t *treeTX) Commit(ctx context.Context) error { From 13d5d31361ac5755aedfdb0223bb380c57019030 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Thu, 16 Nov 2023 15:38:15 +0000 Subject: [PATCH 2/5] Support both revisioned and unrevisioned subtrees The same schema is used for both revisioned and unrevisioned subtrees. The difference is that we always write a revision of 0 in the unrevisioned case, which still means that there will only be a single entry per subtree. The old PublicKey column has been used to store a settings object for newly created trees. If this settings object is found in this column, then it will be parsed and checked for a property indicating that this tree is revisionless. If the property is successfully confirmed to be revisionless then all writes to the subtree table will have a revision of 0, and all reads will skip the nested inner query that was causing slow queries. If the property cannot be confirmed to be revisionless (no settings persisted, or settings persisted but explictly say to use revisioned), then the functionality will continue in the old way. This preserves backwards compatibility, but makes it so that new trees will gain these features. For users with legacy trees that wish to take advantage of the smaller storage costs and faster queries of the new revisionless storage, the proposed migration mechanism is to use migrillian to clone the old tree to a new tree. If anyone is interested in doing this then I recommend speaking to us on Slack (https://join.slack.com/t/transparency-dev/shared_invite/zt-27pkqo21d-okUFhur7YZ0rFoJVIOPznQ). --- integration/admin/admin_integration_test.go | 11 +- storage/mysql/admin_storage.go | 70 +++++++-- storage/mysql/admin_storage_test.go | 124 ++++++++++++++- storage/mysql/log_storage.go | 2 +- storage/mysql/mysqlpb/gen.go | 18 +++ storage/mysql/mysqlpb/options.pb.go | 165 ++++++++++++++++++++ storage/mysql/mysqlpb/options.proto | 27 ++++ storage/mysql/schema/storage.sql | 7 +- storage/mysql/sql.go | 29 ++++ storage/mysql/storage_test.go | 132 +++++++++++----- storage/mysql/tree_storage.go | 73 +++++++-- storage/testonly/admin_storage_tester.go | 13 +- 12 files changed, 586 insertions(+), 85 deletions(-) create mode 100644 storage/mysql/mysqlpb/gen.go create mode 100644 storage/mysql/mysqlpb/options.pb.go create mode 100644 storage/mysql/mysqlpb/options.proto diff --git a/integration/admin/admin_integration_test.go b/integration/admin/admin_integration_test.go index ff24b9b304..2fdf434868 100644 --- a/integration/admin/admin_integration_test.go +++ b/integration/admin/admin_integration_test.go @@ -220,8 +220,9 @@ func TestAdminServer_UpdateTree(t *testing.T) { want.TreeId = tree.TreeId want.CreateTime = tree.CreateTime want.UpdateTime = tree.UpdateTime + want.StorageSettings = tree.StorageSettings if !proto.Equal(tree, want) { - diff := cmp.Diff(tree, want) + diff := cmp.Diff(tree, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-UpdateTree diff:\n%v", test.desc, diff) } } @@ -357,7 +358,7 @@ func TestAdminServer_DeleteTree(t *testing.T) { want.Deleted = true want.DeleteTime = deletedTree.DeleteTime if got := deletedTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-DeleteTree() diff (-got +want):\n%v", test.desc, diff) } @@ -366,7 +367,7 @@ func TestAdminServer_DeleteTree(t *testing.T) { t.Fatalf("%v: GetTree() returned err = %v", test.desc, err) } if got, want := storedTree, deletedTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-GetTree() diff (-got +want):\n%v", test.desc, diff) } } @@ -447,7 +448,7 @@ func TestAdminServer_UndeleteTree(t *testing.T) { continue } if got, want := undeletedTree, createdTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-UndeleteTree() diff (-got +want):\n%v", test.desc, diff) } @@ -456,7 +457,7 @@ func TestAdminServer_UndeleteTree(t *testing.T) { t.Fatalf("%v: GetTree() returned err = %v", test.desc, err) } if got, want := storedTree, createdTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-GetTree() diff (-got +want):\n%v", test.desc, diff) } } diff --git a/storage/mysql/admin_storage.go b/storage/mysql/admin_storage.go index 16ed7c911b..11c082c80d 100644 --- a/storage/mysql/admin_storage.go +++ b/storage/mysql/admin_storage.go @@ -15,17 +15,21 @@ package mysql import ( + "bytes" "context" "database/sql" + "encoding/gob" "fmt" "sync" "time" "github.com/google/trillian" "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" ) @@ -47,8 +51,8 @@ const ( Description, CreateTimeMillis, UpdateTimeMillis, - PrivateKey, - PublicKey, + PrivateKey, -- Unused + PublicKey, -- Used to store StorageSettings MaxRootDurationMillis, Deleted, DeleteTimeMillis @@ -62,7 +66,7 @@ const ( ) // NewAdminStorage returns a MySQL storage.AdminStorage implementation backed by DB. -func NewAdminStorage(db *sql.DB) storage.AdminStorage { +func NewAdminStorage(db *sql.DB) *mysqlAdminStorage { return &mysqlAdminStorage{db} } @@ -223,6 +227,39 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia } rootDuration := newTree.MaxRootDuration.AsDuration() + // When creating a new tree we automatically add StorageSettings to allow us to + // determine that this tree can support newer storage features. When reading + // trees that do not have this StorageSettings populated, it must be assumed that + // the tree was created with the oldest settings. + // The gist of this code is super simple: create a new StorageSettings with the most + // modern defaults if the created tree does not have one, and then create a struct that + // represents this to store in the DB. Unfortunately because this involves anypb, struct + // copies, marshalling, and proper error handling this turns into a scary amount of code. + if tree.StorageSettings != nil { + newTree.StorageSettings = proto.Clone(tree.StorageSettings).(*anypb.Any) + } else { + o := &mysqlpb.StorageOptions{ + SubtreeRevisions: false, // Default behaviour for new trees is to skip writing subtree revisions. + } + a, err := anypb.New(o) + if err != nil { + return nil, fmt.Errorf("failed to create new StorageOptions: %v", err) + } + newTree.StorageSettings = a + } + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(newTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + return nil, fmt.Errorf("failed to unmarshal StorageOptions: %v", err) + } + ss := storageSettings{ + Revisioned: o.SubtreeRevisions, + } + buff := &bytes.Buffer{} + enc := gob.NewEncoder(buff) + if err := enc.Encode(ss); err != nil { + return nil, fmt.Errorf("failed to encode storageSettings: %v", err) + } + insertTreeStmt, err := t.tx.PrepareContext( ctx, `INSERT INTO Trees( @@ -236,8 +273,8 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia Description, CreateTimeMillis, UpdateTimeMillis, - PrivateKey, - PublicKey, + PrivateKey, -- Unused + PublicKey, -- Used to store StorageSettings MaxRootDurationMillis) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) if err != nil { @@ -261,8 +298,8 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia newTree.Description, nowMillis, nowMillis, - []byte{}, // Unused, filling in for backward compatibility. - []byte{}, // Unused, filling in for backward compatibility. + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + buff.Bytes(), // Using the otherwise unused PublicKey for storing StorageSettings. rootDuration/time.Millisecond, ) if err != nil { @@ -419,8 +456,21 @@ func validateDeleted(ctx context.Context, tx *sql.Tx, treeID int64, wantDeleted } func validateStorageSettings(tree *trillian.Tree) error { - if tree.StorageSettings != nil { - return fmt.Errorf("storage_settings not supported, but got %v", tree.StorageSettings) + if tree.StorageSettings.MessageIs(&mysqlpb.StorageOptions{}) { + return nil } - return nil + if tree.StorageSettings == nil { + // No storage settings is OK, we'll just use the defaults for new trees + return nil + } + return fmt.Errorf("storage_settings must be nil or mysqlpb.StorageOptions, but got %v", tree.StorageSettings) +} + +// storageSettings allows us to persist storage settings to the DB. +// It is a tempting trap to use protos for this, but the way they encode +// makes it impossible to tell the difference between no value ever written +// and a value that was written with the default values for each field. +// Using an explicit struct and gob encoding allows us to tell the difference. +type storageSettings struct { + Revisioned bool } diff --git a/storage/mysql/admin_storage_test.go b/storage/mysql/admin_storage_test.go index 54d636d551..d1a7cfd81d 100644 --- a/storage/mysql/admin_storage_test.go +++ b/storage/mysql/admin_storage_test.go @@ -15,13 +15,16 @@ package mysql import ( + "bytes" "context" "database/sql" + "encoding/gob" "fmt" "testing" "github.com/google/trillian" "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" "github.com/google/trillian/storage/testonly" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -128,12 +131,16 @@ func TestAdminTX_TreeWithNulls(t *testing.T) { } } -func TestAdminTX_StorageSettingsNotSupported(t *testing.T) { +func TestAdminTX_StorageSettings(t *testing.T) { cleanTestDB(DB) s := NewAdminStorage(DB) ctx := context.Background() - settings, err := anypb.New(&trillian.Tree{}) + badSettings, err := anypb.New(&trillian.Tree{}) + if err != nil { + t.Fatalf("Error marshaling proto: %v", err) + } + goodSettings, err := anypb.New(&mysqlpb.StorageOptions{}) if err != nil { t.Fatalf("Error marshaling proto: %v", err) } @@ -142,16 +149,38 @@ func TestAdminTX_StorageSettingsNotSupported(t *testing.T) { desc string // fn attempts to either create or update a tree with a non-nil, valid Any proto // on Tree.StorageSettings. It's expected to return an error. - fn func(storage.AdminStorage) error + fn func(storage.AdminStorage) error + wantErr bool }{ { - desc: "CreateTree", + desc: "CreateTree Bad Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = badSettings + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: true, + }, + { + desc: "CreateTree nil Settings", fn: func(s storage.AdminStorage) error { tree := proto.Clone(testonly.LogTree).(*trillian.Tree) - tree.StorageSettings = settings + tree.StorageSettings = nil _, err := storage.CreateTree(ctx, s, tree) return err }, + wantErr: false, + }, + { + desc: "CreateTree StorageOptions Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = goodSettings + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: false, }, { desc: "UpdateTree", @@ -160,14 +189,93 @@ func TestAdminTX_StorageSettingsNotSupported(t *testing.T) { if err != nil { t.Fatalf("CreateTree() failed with err = %v", err) } - _, err = storage.UpdateTree(ctx, s, tree.TreeId, func(tree *trillian.Tree) { tree.StorageSettings = settings }) + _, err = storage.UpdateTree(ctx, s, tree.TreeId, func(tree *trillian.Tree) { tree.StorageSettings = badSettings }) return err }, + wantErr: true, }, } for _, test := range tests { - if err := test.fn(s); err == nil { - t.Errorf("%v: err = nil, want non-nil", test.desc) + if err := test.fn(s); (err != nil) != test.wantErr { + t.Errorf("err: %v, wantErr = %v", err, test.wantErr) + } + } +} + +// Test reading variants of trees that could have been created by old versions +// of Trillian to check we infer the correct storage options. +func TestAdminTX_GetTreeLegacies(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + serializedStorageSettings := func(revisioned bool) []byte { + ss := storageSettings{ + Revisioned: revisioned, + } + buff := &bytes.Buffer{} + enc := gob.NewEncoder(buff) + if err := enc.Encode(ss); err != nil { + t.Fatalf("failed to encode storageSettings: %v", err) + } + return buff.Bytes() + } + tests := []struct { + desc string + key []byte + wantRevisioned bool + }{ + { + desc: "No data", + key: []byte{}, + wantRevisioned: true, + }, + { + desc: "Public key", + key: []byte("trustmethatthisisapublickey"), + wantRevisioned: true, + }, + { + desc: "StorageOptions revisioned", + key: serializedStorageSettings(true), + wantRevisioned: true, + }, + { + desc: "StorageOptions revisionless", + key: serializedStorageSettings(false), + wantRevisioned: false, + }, + } + for _, tC := range tests { + // Create a tree with default settings, and then reach into the DB to override + // whatever was written into the persisted settings to align with the test case. + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatal(err) + } + // We are reaching really into the internals here, but it's the only way to set up + // archival state. Going through the Create/Update methods will change the storage + // options. + tx, err := s.db.BeginTx(ctx, nil /* opts */) + if err != nil { + t.Fatal(err) + } + if _, err := tx.Exec("UPDATE Trees SET PublicKey = ? WHERE TreeId = ?", tC.key, tree.TreeId); err != nil { + t.Fatal(err) + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + readTree, err := storage.GetTree(ctx, s, tree.TreeId) + if err != nil { + t.Fatal(err) + } + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(readTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + t.Fatal(err) + } + if got, want := o.SubtreeRevisions, tC.wantRevisioned; got != want { + t.Errorf("%s SubtreeRevisions: got %t, wanted %t", tC.desc, got, want) } } } diff --git a/storage/mysql/log_storage.go b/storage/mysql/log_storage.go index e4d6432fba..1c3795d5c6 100644 --- a/storage/mysql/log_storage.go +++ b/storage/mysql/log_storage.go @@ -324,7 +324,7 @@ type logTreeTX struct { func (t *logTreeTX) GetMerkleNodes(ctx context.Context, ids []compact.NodeID) ([]tree.Node, error) { t.treeTX.mu.Lock() defer t.treeTX.mu.Unlock() - return t.subtreeCache.GetNodes(ids, t.getSubtreesFunc(ctx)) + return t.subtreeCache.GetNodes(ids, t.getSubtreesAtRev(ctx, t.readRev)) } func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime time.Time) ([]*trillian.LogLeaf, error) { diff --git a/storage/mysql/mysqlpb/gen.go b/storage/mysql/mysqlpb/gen.go new file mode 100644 index 0000000000..875c14cbf4 --- /dev/null +++ b/storage/mysql/mysqlpb/gen.go @@ -0,0 +1,18 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mysqlpb contains protobuf definitions used by the mysql implementation. +package mysqlpb + +//go:generate protoc -I=. --go_out=paths=source_relative:. options.proto diff --git a/storage/mysql/mysqlpb/options.pb.go b/storage/mysql/mysqlpb/options.pb.go new file mode 100644 index 0000000000..2016a31838 --- /dev/null +++ b/storage/mysql/mysqlpb/options.pb.go @@ -0,0 +1,165 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc v3.20.1 +// source: options.proto + +package mysqlpb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// StorageOptions contains configuration parameters for MySQL implementation +// of the storage backend. This is envisioned only to be used for changes that +// would be breaking, but need to support old behaviour for backwards compatibility. +type StorageOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // subtreeRevisions being explicitly set to false will skip writing subtree revisions. + // https://github.com/google/trillian/pull/3201 + SubtreeRevisions bool `protobuf:"varint,1,opt,name=subtreeRevisions,proto3" json:"subtreeRevisions,omitempty"` +} + +func (x *StorageOptions) Reset() { + *x = StorageOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_options_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StorageOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StorageOptions) ProtoMessage() {} + +func (x *StorageOptions) ProtoReflect() protoreflect.Message { + mi := &file_options_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StorageOptions.ProtoReflect.Descriptor instead. +func (*StorageOptions) Descriptor() ([]byte, []int) { + return file_options_proto_rawDescGZIP(), []int{0} +} + +func (x *StorageOptions) GetSubtreeRevisions() bool { + if x != nil { + return x.SubtreeRevisions + } + return false +} + +var File_options_proto protoreflect.FileDescriptor + +var file_options_proto_rawDesc = []byte{ + 0x0a, 0x0d, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x07, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x22, 0x3c, 0x0a, 0x0e, 0x53, 0x74, 0x6f, 0x72, + 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2a, 0x0a, 0x10, 0x73, 0x75, + 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, + 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x32, 0x5a, 0x30, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, + 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x74, 0x72, 0x69, 0x6c, + 0x6c, 0x69, 0x61, 0x6e, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2f, 0x6d, 0x79, 0x73, + 0x71, 0x6c, 0x2f, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_options_proto_rawDescOnce sync.Once + file_options_proto_rawDescData = file_options_proto_rawDesc +) + +func file_options_proto_rawDescGZIP() []byte { + file_options_proto_rawDescOnce.Do(func() { + file_options_proto_rawDescData = protoimpl.X.CompressGZIP(file_options_proto_rawDescData) + }) + return file_options_proto_rawDescData +} + +var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_options_proto_goTypes = []interface{}{ + (*StorageOptions)(nil), // 0: mysqlpb.StorageOptions +} +var file_options_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_options_proto_init() } +func file_options_proto_init() { + if File_options_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_options_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StorageOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_options_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_options_proto_goTypes, + DependencyIndexes: file_options_proto_depIdxs, + MessageInfos: file_options_proto_msgTypes, + }.Build() + File_options_proto = out.File + file_options_proto_rawDesc = nil + file_options_proto_goTypes = nil + file_options_proto_depIdxs = nil +} diff --git a/storage/mysql/mysqlpb/options.proto b/storage/mysql/mysqlpb/options.proto new file mode 100644 index 0000000000..2ebdc670ae --- /dev/null +++ b/storage/mysql/mysqlpb/options.proto @@ -0,0 +1,27 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +option go_package = "github.com/google/trillian/storage/mysql/mysqlpb"; + +package mysqlpb; + +// StorageOptions contains configuration parameters for MySQL implementation +// of the storage backend. This is envisioned only to be used for changes that +// would be breaking, but need to support old behaviour for backwards compatibility. +message StorageOptions { + // subtreeRevisions being explicitly set to false will skip writing subtree revisions. + // https://github.com/google/trillian/pull/3201 + bool subtreeRevisions = 1; +} diff --git a/storage/mysql/schema/storage.sql b/storage/mysql/schema/storage.sql index 6cd2ef5d12..0d571b24fa 100644 --- a/storage/mysql/schema/storage.sql +++ b/storage/mysql/schema/storage.sql @@ -18,8 +18,8 @@ CREATE TABLE IF NOT EXISTS Trees( CreateTimeMillis BIGINT NOT NULL, UpdateTimeMillis BIGINT NOT NULL, MaxRootDurationMillis BIGINT NOT NULL, - PrivateKey MEDIUMBLOB NOT NULL, - PublicKey MEDIUMBLOB NOT NULL, + PrivateKey MEDIUMBLOB NOT NULL, -- Unused. + PublicKey MEDIUMBLOB NOT NULL, -- This is now used to store settings. Deleted BOOLEAN, DeleteTimeMillis BIGINT, PRIMARY KEY(TreeId) @@ -40,9 +40,10 @@ CREATE TABLE IF NOT EXISTS Subtree( TreeId BIGINT NOT NULL, SubtreeId VARBINARY(255) NOT NULL, Nodes MEDIUMBLOB NOT NULL, + SubtreeRevision INTEGER NOT NULL, -- Key columns must be in ASC order in order to benefit from group-by/min-max -- optimization in MySQL. - PRIMARY KEY(TreeId, SubtreeId), + PRIMARY KEY(TreeId, SubtreeId, SubtreeRevision), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE ); diff --git a/storage/mysql/sql.go b/storage/mysql/sql.go index a929952727..a48e2a554f 100644 --- a/storage/mysql/sql.go +++ b/storage/mysql/sql.go @@ -15,11 +15,15 @@ package mysql import ( + "bytes" "database/sql" + "encoding/gob" "fmt" "time" "github.com/google/trillian" + "github.com/google/trillian/storage/mysql/mysqlpb" + "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -124,5 +128,30 @@ func readTree(r row) (*trillian.Tree, error) { } } + // We're going to try to interpret PublicKey as storageSettings, but it could be a + // public key from a really old tree, or an empty column from a tree created in the + // period between Trillian key material being removed and this column being used for + // storing settings. + buff := bytes.NewBuffer(publicKey) + dec := gob.NewDecoder(buff) + ss := &storageSettings{} + var o *mysqlpb.StorageOptions + if err := dec.Decode(ss); err != nil { + // If there are no storageSettings then this tree was created before settings + // were supported, and thus we have to populate the settings with the oldest + // settings for features. + o = &mysqlpb.StorageOptions{ + SubtreeRevisions: true, + } + } else { + o = &mysqlpb.StorageOptions{ + SubtreeRevisions: ss.Revisioned, + } + } + tree.StorageSettings, err = anypb.New(o) + if err != nil { + return nil, fmt.Errorf("failed to put StorageSettings into tree: %w", err) + } + return tree, nil } diff --git a/storage/mysql/storage_test.go b/storage/mysql/storage_test.go index 13479c1445..c08aa757f2 100644 --- a/storage/mysql/storage_test.go +++ b/storage/mysql/storage_test.go @@ -25,18 +25,47 @@ import ( "fmt" "os" "testing" + "time" "github.com/google/trillian" "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" "github.com/google/trillian/storage/testdb" storageto "github.com/google/trillian/storage/testonly" stree "github.com/google/trillian/storage/tree" "github.com/google/trillian/types" "github.com/transparency-dev/merkle/compact" "github.com/transparency-dev/merkle/rfc6962" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/durationpb" "k8s.io/klog/v2" ) +var ( + // LogTree is a valid, LOG-type trillian.Tree for tests. + // This tree is configured to write revisions for each subtree. + // This matches the legacy behaviour before revisions were removed. + RevisionedLogTree = &trillian.Tree{ + TreeState: trillian.TreeState_ACTIVE, + TreeType: trillian.TreeType_LOG, + DisplayName: "Llamas Log", + Description: "Registry of publicly-owned llamas", + MaxRootDuration: durationpb.New(0 * time.Millisecond), + StorageSettings: mustCreateRevisionedStorage(), + } +) + +func mustCreateRevisionedStorage() *anypb.Any { + o := &mysqlpb.StorageOptions{ + SubtreeRevisions: true, + } + a, err := anypb.New(o) + if err != nil { + panic(err) + } + return a +} + func TestNodeRoundTrip(t *testing.T) { nodes := createSomeNodes(256) nodeIDs := make([]compact.NodeID, len(nodes)) @@ -58,11 +87,11 @@ func TestNodeRoundTrip(t *testing.T) { {desc: "store-all-read-all", store: nodes, read: nodeIDs, want: nodes}, {desc: "store-all-read-none", store: nodes, read: nil, want: nil}, } { - t.Run(tc.desc, func(t *testing.T) { + testbody := func(treeDef *trillian.Tree) { ctx := context.Background() cleanTestDB(DB) as := NewAdminStorage(DB) - tree := mustCreateTree(ctx, t, as, storageto.LogTree) + tree := mustCreateTree(ctx, t, as, treeDef) s := NewLogStorage(DB, nil) const writeRev = int64(100) @@ -86,6 +115,12 @@ func TestNodeRoundTrip(t *testing.T) { } return nil }) + } + t.Run(tc.desc+"-norevisions", func(t *testing.T) { + testbody(storageto.LogTree) + }) + t.Run(tc.desc+"-revisions", func(t *testing.T) { + testbody(RevisionedLogTree) }) } } @@ -93,50 +128,67 @@ func TestNodeRoundTrip(t *testing.T) { // This test ensures that node writes cross subtree boundaries so this edge case in the subtree // cache gets exercised. Any tree size > 256 will do this. func TestLogNodeRoundTripMultiSubtree(t *testing.T) { - ctx := context.Background() - cleanTestDB(DB) - as := NewAdminStorage(DB) - tree := mustCreateTree(ctx, t, as, storageto.LogTree) - s := NewLogStorage(DB, nil) - - const writeRev = int64(100) - const size = 871 - nodesToStore, err := createLogNodesForTreeAtSize(t, size, writeRev) - if err != nil { - t.Fatalf("failed to create test tree: %v", err) - } - nodeIDsToRead := make([]compact.NodeID, len(nodesToStore)) - for i := range nodesToStore { - nodeIDsToRead[i] = nodesToStore[i].ID + testCases := []struct { + desc string + tree *trillian.Tree + }{ + { + desc: "Revisionless", + tree: storageto.LogTree, + }, + { + desc: "Revisions", + tree: RevisionedLogTree, + }, } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, tC.tree) + s := NewLogStorage(DB, nil) - { - runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { - forceWriteRevision(writeRev, tx) - if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { - t.Fatalf("Failed to store nodes: %s", err) + const writeRev = int64(100) + const size = 871 + nodesToStore, err := createLogNodesForTreeAtSize(t, size, writeRev) + if err != nil { + t.Fatalf("failed to create test tree: %v", err) + } + nodeIDsToRead := make([]compact.NodeID, len(nodesToStore)) + for i := range nodesToStore { + nodeIDsToRead[i] = nodesToStore[i].ID } - return storeLogRoot(ctx, tx, uint64(size), uint64(writeRev), []byte{1, 2, 3}) - }) - } - { - runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { - readNodes, err := tx.GetMerkleNodes(ctx, nodeIDsToRead) - if err != nil { - t.Fatalf("Failed to retrieve nodes: %s", err) + { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + forceWriteRevision(writeRev, tx) + if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { + t.Fatalf("Failed to store nodes: %s", err) + } + return storeLogRoot(ctx, tx, uint64(size), uint64(writeRev), []byte{1, 2, 3}) + }) } - if err := nodesAreEqual(readNodes, nodesToStore); err != nil { - missing, extra := diffNodes(readNodes, nodesToStore) - for _, n := range missing { - t.Errorf("Missing: %v", n.ID) - } - for _, n := range extra { - t.Errorf("Extra : %v", n.ID) - } - t.Fatalf("Read back different nodes from the ones stored: %s", err) + + { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + readNodes, err := tx.GetMerkleNodes(ctx, nodeIDsToRead) + if err != nil { + t.Fatalf("Failed to retrieve nodes: %s", err) + } + if err := nodesAreEqual(readNodes, nodesToStore); err != nil { + missing, extra := diffNodes(readNodes, nodesToStore) + for _, n := range missing { + t.Errorf("Missing: %v", n.ID) + } + for _, n := range extra { + t.Errorf("Extra : %v", n.ID) + } + t.Fatalf("Read back different nodes from the ones stored: %s", err) + } + return nil + }) } - return nil }) } } diff --git a/storage/mysql/tree_storage.go b/storage/mysql/tree_storage.go index 128d120c6f..7fb5ed8d0d 100644 --- a/storage/mysql/tree_storage.go +++ b/storage/mysql/tree_storage.go @@ -26,15 +26,17 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage/cache" + "github.com/google/trillian/storage/mysql/mysqlpb" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" "k8s.io/klog/v2" ) // These statements are fixed const ( - insertSubtreeMultiSQL = `REPLACE INTO Subtree(TreeId, SubtreeId, Nodes) ` + placeholderSQL + insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) ` + placeholderSQL + ` ON DUPLICATE KEY UPDATE Nodes=VALUES(Nodes)` insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) VALUES(?,?,?,?,?,?)` @@ -52,6 +54,12 @@ const ( AND Subtree.SubtreeRevision = x.MaxRevision AND Subtree.TreeId = x.TreeId AND Subtree.TreeId = ?` + + selectSubtreeSQLNoRev = ` + SELECT SubtreeId, Subtree.Nodes + FROM Subtree + WHERE Subtree.TreeId = ? + AND SubtreeId IN (` + placeholderSQL + `)` placeholderSQL = "" ) @@ -133,12 +141,16 @@ func (m *mySQLTreeStorage) getStmt(ctx context.Context, statement string, num in return s, nil } -func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") +func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, num int) (*sql.Stmt, error) { + if subtreeRevs { + return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") + } else { + return m.getStmt(ctx, selectSubtreeSQLNoRev, num, "?", "?") + } } func (m *mySQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?)", "(?, ?, ?)") + return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?, ?)", "(?, ?, ?, ?)") } func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { @@ -147,6 +159,12 @@ func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, klog.Warningf("Could not start tree TX: %s", err) return treeTX{}, err } + var subtreeRevisions bool + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(tree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + return treeTX{}, fmt.Errorf("failed to unmarshal StorageSettings: %v", err) + } + subtreeRevisions = o.SubtreeRevisions return treeTX{ tx: t, mu: &sync.Mutex{}, @@ -156,6 +174,7 @@ func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes: hashSizeBytes, subtreeCache: subtreeCache, writeRevision: -1, + subtreeRevs: subtreeRevisions, }, nil } @@ -170,16 +189,17 @@ type treeTX struct { hashSizeBytes int subtreeCache *cache.SubtreeCache writeRevision int64 + subtreeRevs bool } -func (t *treeTX) getSubtrees(ctx context.Context, ids [][]byte) ([]*storagepb.SubtreeProto, error) { +func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]byte) ([]*storagepb.SubtreeProto, error) { klog.V(2).Infof("getSubtrees(len(ids)=%d)", len(ids)) klog.V(4).Infof("getSubtrees(") if len(ids) == 0 { return nil, nil } - tmpl, err := t.ts.getSubtreeStmt(ctx, len(ids)) + tmpl, err := t.ts.getSubtreeStmt(ctx, t.subtreeRevs, len(ids)) if err != nil { return nil, err } @@ -190,13 +210,26 @@ func (t *treeTX) getSubtrees(ctx context.Context, ids [][]byte) ([]*storagepb.Su } }() - args := make([]interface{}, 0, len(ids)+1) - args = append(args, t.treeID) + var args []interface{} + if t.subtreeRevs { + args = make([]interface{}, 0, len(ids)+3) + // populate args with ids. + for _, id := range ids { + klog.V(4).Infof(" id: %x", id) + args = append(args, id) + } + args = append(args, t.treeID) + args = append(args, treeRevision) + args = append(args, t.treeID) + } else { + args = make([]interface{}, 0, len(ids)+1) + args = append(args, t.treeID) - // populate args with ids. - for _, id := range ids { - klog.V(4).Infof(" id: %x", id) - args = append(args, id) + // populate args with ids. + for _, id := range ids { + klog.V(4).Infof(" id: %x", id) + args = append(args, id) + } } rows, err := stx.QueryContext(ctx, args...) @@ -280,6 +313,13 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre // a really large number of subtrees to store. args := make([]interface{}, 0, len(subtrees)) + // If not using subtree revisions then default value of 0 is fine. There is no + // significance to this value, other than it cannot be NULL in the DB. + var subtreeRev int64 + if t.subtreeRevs { + // We're using subtree revisions, so ensure we write at the correct revision + subtreeRev = t.writeRevision + } for _, s := range subtrees { s := s if s.Prefix == nil { @@ -292,6 +332,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre args = append(args, t.treeID) args = append(args, s.Prefix) args = append(args, subtreeBytes) + args = append(args, subtreeRev) } tmpl, err := t.ts.setSubtreeStmt(ctx, len(subtrees)) @@ -335,16 +376,18 @@ func checkResultOkAndRowCountIs(res sql.Result, err error, count int64) error { return nil } -func (t *treeTX) getSubtreesFunc(ctx context.Context) cache.GetSubtreesFunc { +// getSubtreesAtRev returns a GetSubtreesFunc which reads at the passed in rev. +func (t *treeTX) getSubtreesAtRev(ctx context.Context, rev int64) cache.GetSubtreesFunc { return func(ids [][]byte) ([]*storagepb.SubtreeProto, error) { - return t.getSubtrees(ctx, ids) + return t.getSubtrees(ctx, rev, ids) } } func (t *treeTX) SetMerkleNodes(ctx context.Context, nodes []tree.Node) error { t.mu.Lock() defer t.mu.Unlock() - return t.subtreeCache.SetNodes(nodes, t.getSubtreesFunc(ctx)) + rev := t.writeRevision - 1 + return t.subtreeCache.SetNodes(nodes, t.getSubtreesAtRev(ctx, rev)) } func (t *treeTX) Commit(ctx context.Context) error { diff --git a/storage/testonly/admin_storage_tester.go b/storage/testonly/admin_storage_tester.go index aa734fe5e0..351a5252e5 100644 --- a/storage/testonly/admin_storage_tester.go +++ b/storage/testonly/admin_storage_tester.go @@ -328,8 +328,10 @@ func runListTreesTest(ctx context.Context, tx storage.ReadOnlyAdminTX, includeDe sort.Slice(want, func(i, j int) bool { return want[i].TreeId < want[j].TreeId }) for i, wantTree := range want { + // Ignore storage_settings changes (OK to vary between implementations) + wantTree.StorageSettings = got[i].StorageSettings if !proto.Equal(got[i], wantTree) { - return fmt.Errorf("post-ListTrees() diff (-got +want):\n%v", cmp.Diff(got, want)) + return fmt.Errorf("post-ListTrees() diff (-got +want):\n%v", cmp.Diff(got, want, cmp.Comparer(proto.Equal))) } } return nil @@ -362,8 +364,10 @@ func (tester *AdminStorageTester) TestSoftDeleteTree(t *testing.T) { wantTree := proto.Clone(test.tree).(*trillian.Tree) wantTree.Deleted = true wantTree.DeleteTime = deletedTree.DeleteTime + // Ignore storage_settings changes (OK to vary between implementations) + wantTree.StorageSettings = deletedTree.StorageSettings if got, want := deletedTree, wantTree; !proto.Equal(got, want) { - t.Errorf("%v: post-SoftDeleteTree diff (-got +want):\n%v", test.desc, cmp.Diff(got, want)) + t.Errorf("%v: post-SoftDeleteTree diff (-got +want):\n%v", test.desc, cmp.Diff(got, want, cmp.Comparer(proto.Equal))) } if err := assertStoredTree(ctx, s, deletedTree); err != nil { @@ -557,8 +561,11 @@ func assertStoredTree(ctx context.Context, s storage.AdminStorage, want *trillia if err != nil { return fmt.Errorf("GetTree() returned err = %v", err) } + + // Ignore storage_settings changes (OK to vary between implementations) + want.StorageSettings = got.StorageSettings if !proto.Equal(got, want) { - return fmt.Errorf("post-GetTree() diff (-got +want):\n%v", cmp.Diff(got, want)) + return fmt.Errorf("post-GetTree() diff (-got +want):\n%v", cmp.Diff(got, want, cmp.Comparer(proto.Equal))) } return nil } From 8027a3da5464dbceda0bd931953de6c76f9f12c4 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Mon, 11 Dec 2023 16:30:53 +0000 Subject: [PATCH 3/5] Added comment in UpdateTree warning future maintainers of a potential pitfall --- storage/mysql/admin_storage.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/storage/mysql/admin_storage.go b/storage/mysql/admin_storage.go index 11c082c80d..0b267f6732 100644 --- a/storage/mysql/admin_storage.go +++ b/storage/mysql/admin_storage.go @@ -393,7 +393,10 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( tree.Description, nowMillis, rootDuration/time.Millisecond, - []byte{}, // Unused, filling in for backward compatibility. + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + // PublicKey should not be updated with any storageSettings here without + // a lot of thought put into it. At the moment storageSettings are inferred + // when reading the tree, even if no value is stored in the database. tree.TreeId); err != nil { return nil, err } From 16ac32327cc91a885c8e540458df6c038c83a011 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Tue, 12 Dec 2023 12:00:45 +0000 Subject: [PATCH 4/5] Updated CHANGELOG It's a big one --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 032b0693b3..e08bd0e800 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,42 @@ * Bump CI version of MySQL from 5.7 to 8.0 * Bump golangci-lint from 1.51.1 to 1.55.1 (developers should update to this version) +### MySQL: Changes to Subtree Revisions + +Support for skipping subtree revisions to increase read performance and disk usage: added in #3201 + +TL;DR: existing trees will continue to be stored and queried as they were before, but new trees +created with the MySQL storage layer will be stored and queried in a way that uses less space +and allows for simpler and faster queries. No schema changes are required by log operators. + +The Trillian MySQL implementation stores the internal state of the log as Subtrees in the database. +These are essentially tiles as described by [tlog: Tiling a log](https://research.swtch.com/tlog). +Trees created with previous versions of Trillian stored a different revision of each Subtree when +the tree was updated. This is somewhat redundant for append-only logs because an earlier version +of a Subtree can always be derived from a later one by simply removing entries from the right of +the Subtree. PR #3201 removes this Subtree revision history, and updates Subtrees in place when +they are updated. + +Measurements from @n-canter show that revisionless storage saves around 75% storage costs for the +Subtree table, and queries over this table are more than 15% faster. + +The same schema is used for both revisioned and unrevisioned subtrees. The difference is that we +always write a revision of 0 in the unrevisioned case, which still means that there will only be +a single entry per subtree. + +Support is maintained for the old way of revisioning Subtrees in order to avoid breaking changes +to existing trees. There is no simple code change that would safely allow a previously revisioned +tree to start becoming a revisionless tree. This new revisionless Subtree feature is only available +for trees created with new versions of Trillian. + +Users with legacy revisioned trees that wish to take advantage of smaller storage costs and faster +queries of the new revisionless storage should come speak to us on +[transparency-dev Slack](https://join.slack.com/t/transparency-dev/shared_invite/zt-27pkqo21d-okUFhur7YZ0rFoJVIOPznQ). +The safest option we have available is to use [migrillian](https://github.com/google/certificate-transparency-go/tree/master/trillian/migrillian) to create a new copy of trees, but this will be quite a manual +process and will only work for CT logs. +Other migration options are conceivable and we're eager to work with the community to develop +and test tools for upgrading trees in place. + ## v1.5.3 * Recommended go version for development: 1.20 From 1ca893fe779395e180b1a8a054dbf61a3adaaf50 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Tue, 12 Dec 2023 12:20:01 +0000 Subject: [PATCH 5/5] clarify reduced disk usage --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e08bd0e800..569648944a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ ### MySQL: Changes to Subtree Revisions -Support for skipping subtree revisions to increase read performance and disk usage: added in #3201 +Support for skipping subtree revisions to increase read performance and reduce disk usage: added in #3201 TL;DR: existing trees will continue to be stored and queried as they were before, but new trees created with the MySQL storage layer will be stored and queried in a way that uses less space