From 1d5601a4bda1262afb3136ca6b1a1c5966b0a39d Mon Sep 17 00:00:00 2001 From: Yimin Chen Date: Thu, 20 Jul 2023 18:55:41 -0700 Subject: [PATCH] Validate workflow task start time when complete (#4663) **What changed?** Add start time to workflow task token and validate it on close. **Why?** To reject concurrent speculative workflow task with same startedEventID. **How did you test it?** Integration tests. **Potential risks** No **Is hotfix candidate?** Yes --- api/token/v1/message.pb.go | 211 ++++-- .../server/api/token/v1/message.proto | 4 + service/frontend/workflow_handler.go | 1 + service/history/api/startworkflow/api.go | 3 +- .../history/workflowTaskHandlerCallbacks.go | 34 +- service/matching/matchingEngine.go | 5 +- tests/update_workflow_test.go | 690 ------------------ 7 files changed, 196 insertions(+), 752 deletions(-) diff --git a/api/token/v1/message.pb.go b/api/token/v1/message.pb.go index eb375013105..7b3d7f039e9 100644 --- a/api/token/v1/message.pb.go +++ b/api/token/v1/message.pb.go @@ -35,8 +35,12 @@ import ( math_bits "math/bits" reflect "reflect" strings "strings" + time "time" + _ "github.com/gogo/protobuf/gogoproto" proto "github.com/gogo/protobuf/proto" + _ "github.com/gogo/protobuf/types" + github_com_gogo_protobuf_types "github.com/gogo/protobuf/types" v11 "go.temporal.io/server/api/clock/v1" v1 "go.temporal.io/server/api/history/v1" ) @@ -45,6 +49,7 @@ import ( var _ = proto.Marshal var _ = fmt.Errorf var _ = math.Inf +var _ = time.Kitchen // This is a compile-time assertion to ensure that this generated file // is compatible with the proto package it is being compiled against. @@ -261,6 +266,8 @@ type Task struct { ActivityType string `protobuf:"bytes,8,opt,name=activity_type,json=activityType,proto3" json:"activity_type,omitempty"` Clock *v11.VectorClock `protobuf:"bytes,9,opt,name=clock,proto3" json:"clock,omitempty"` StartedEventId int64 `protobuf:"varint,10,opt,name=started_event_id,json=startedEventId,proto3" json:"started_event_id,omitempty"` + Version int64 `protobuf:"varint,11,opt,name=version,proto3" json:"version,omitempty"` + StartedTime *time.Time `protobuf:"bytes,12,opt,name=started_time,json=startedTime,proto3,stdtime" json:"started_time,omitempty"` } func (m *Task) Reset() { *m = Task{} } @@ -365,6 +372,20 @@ func (m *Task) GetStartedEventId() int64 { return 0 } +func (m *Task) GetVersion() int64 { + if m != nil { + return m.Version + } + return 0 +} + +func (m *Task) GetStartedTime() *time.Time { + if m != nil { + return m.StartedTime + } + return nil +} + type QueryTask struct { NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` TaskQueue string `protobuf:"bytes,2,opt,name=task_queue,json=taskQueue,proto3" json:"task_queue,omitempty"` @@ -436,52 +457,57 @@ func init() { } var fileDescriptor_020fff7d28118bec = []byte{ - // 715 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x94, 0x4f, 0x6f, 0xd3, 0x48, - 0x18, 0xc6, 0xe3, 0xfc, 0xf7, 0x9b, 0xb4, 0x9b, 0xb8, 0xea, 0x36, 0xaa, 0x76, 0xbd, 0x69, 0x76, - 0x0f, 0xd9, 0x52, 0x39, 0x14, 0x4e, 0x88, 0x03, 0x12, 0x15, 0x52, 0xd3, 0x5b, 0xad, 0x08, 0x24, - 0x24, 0x88, 0x5c, 0x7b, 0xd2, 0x8e, 0x92, 0xce, 0xb8, 0x33, 0x63, 0x97, 0xdc, 0xf8, 0x08, 0x7c, - 0x0c, 0xf8, 0x26, 0x1c, 0x7b, 0xec, 0x91, 0xa6, 0x17, 0x6e, 0xf4, 0x1b, 0x80, 0x66, 0xec, 0x49, - 0x42, 0x6b, 0x04, 0x07, 0x6e, 0x99, 0xe7, 0x7d, 0xe6, 0x9d, 0x79, 0x9f, 0x5f, 0x3c, 0xb0, 0x2d, - 0xd0, 0x69, 0x48, 0x99, 0x37, 0xe9, 0x71, 0xc4, 0x62, 0xc4, 0x7a, 0x5e, 0x88, 0x7b, 0x82, 0x8e, - 0x11, 0xe9, 0xc5, 0xbb, 0xbd, 0x53, 0xc4, 0xb9, 0x77, 0x8c, 0x9c, 0x90, 0x51, 0x41, 0xad, 0xbf, - 0xb4, 0xd7, 0x49, 0xbc, 0x8e, 0x17, 0x62, 0x47, 0x79, 0x9d, 0x78, 0x77, 0x33, 0xb3, 0x93, 0x3f, - 0xa1, 0xfe, 0xf8, 0x4e, 0xa7, 0xcd, 0x9d, 0x2c, 0xef, 0x09, 0xe6, 0x82, 0xb2, 0xe9, 0x1d, 0x77, - 0xe7, 0x4b, 0x1e, 0xd6, 0xf6, 0x93, 0xe2, 0x1e, 0x25, 0x02, 0x93, 0xc8, 0x13, 0x98, 0x12, 0x6b, - 0x1d, 0xca, 0x2c, 0x22, 0x43, 0x1c, 0xb4, 0x8c, 0xb6, 0xd1, 0x35, 0xdd, 0x12, 0x8b, 0x48, 0x3f, - 0xb0, 0xfe, 0x83, 0xd5, 0x11, 0x66, 0x5c, 0x0c, 0x51, 0x8c, 0x88, 0x90, 0xe5, 0x7c, 0xdb, 0xe8, - 0x16, 0xdc, 0xba, 0x52, 0x9f, 0x49, 0xb1, 0x1f, 0x58, 0x1d, 0x58, 0x21, 0xe8, 0xcd, 0x92, 0xa9, - 0xa0, 0x4c, 0x35, 0x29, 0x6a, 0x8f, 0x03, 0x6b, 0x98, 0x0f, 0xcf, 0x29, 0x1b, 0x8f, 0x26, 0xf4, - 0x7c, 0xc8, 0x22, 0x42, 0x30, 0x39, 0x6e, 0x95, 0xda, 0x46, 0xb7, 0xea, 0x36, 0x31, 0x7f, 0x91, - 0x56, 0xdc, 0xa4, 0x60, 0xdd, 0x83, 0x66, 0x88, 0x18, 0xc7, 0x5c, 0x20, 0xe2, 0xa3, 0xa1, 0x8a, - 0xa6, 0x55, 0x6e, 0x1b, 0xdd, 0xba, 0xdb, 0x58, 0x2a, 0x0c, 0xa4, 0x6e, 0x9d, 0xc1, 0x86, 0x60, - 0x1e, 0xe1, 0x58, 0x9e, 0x3f, 0x3f, 0x43, 0x78, 0x7c, 0xdc, 0xaa, 0xb4, 0x8d, 0x6e, 0xed, 0xc1, - 0x23, 0x27, 0x2b, 0xef, 0x34, 0x25, 0x27, 0xde, 0x75, 0x06, 0x7a, 0xbb, 0xbe, 0xc7, 0xc0, 0xe3, - 0xe3, 0x3e, 0x19, 0x51, 0x77, 0x5d, 0x64, 0x95, 0xac, 0x2d, 0xa8, 0x1f, 0x31, 0x8f, 0xf8, 0x27, - 0xe9, 0xd5, 0xaa, 0xea, 0x6a, 0xb5, 0x44, 0x53, 0xb7, 0x3a, 0x28, 0x56, 0xcd, 0x06, 0x74, 0x3e, - 0x14, 0xe0, 0x4f, 0xd7, 0x3b, 0xcf, 0x0a, 0x7d, 0x0b, 0xea, 0xc4, 0x3b, 0x45, 0x3c, 0xf4, 0x7c, - 0x24, 0x63, 0x03, 0x15, 0x7d, 0x6d, 0xae, 0xf5, 0x03, 0xeb, 0x1f, 0xa8, 0xcd, 0xe7, 0x49, 0xd3, - 0x37, 0x5d, 0xd0, 0x52, 0x3f, 0x58, 0x02, 0x57, 0xb8, 0x05, 0x8e, 0x0b, 0x8f, 0x2d, 0x31, 0x29, - 0x26, 0xe0, 0x94, 0xba, 0x04, 0x65, 0xd9, 0x15, 0xcb, 0x5c, 0x29, 0x51, 0x50, 0x0a, 0x6e, 0x73, - 0x61, 0x7d, 0x9e, 0x14, 0xac, 0x36, 0xd4, 0x11, 0x09, 0x16, 0x3d, 0xcb, 0xca, 0x08, 0x88, 0x04, - 0xba, 0xe3, 0x36, 0x34, 0x17, 0x0e, 0xdd, 0xaf, 0xa2, 0x6c, 0x7f, 0x68, 0x9b, 0xee, 0x96, 0x89, - 0xb8, 0xfa, 0x03, 0xc4, 0xaf, 0xa0, 0x99, 0xb6, 0x1b, 0x26, 0xd8, 0x30, 0xe2, 0x2d, 0x53, 0xc1, - 0xbd, 0xff, 0x33, 0xb8, 0xe9, 0x81, 0xfb, 0x7a, 0x9f, 0xdb, 0x88, 0x6f, 0x29, 0x07, 0xc5, 0xaa, - 0xd1, 0xc8, 0x77, 0xbe, 0xe6, 0xa1, 0xa8, 0xe9, 0x7e, 0x47, 0xc6, 0xf8, 0x7d, 0x64, 0x76, 0xc0, - 0xe2, 0xfe, 0x09, 0x0a, 0xa2, 0x09, 0x0a, 0x6e, 0xd3, 0x69, 0xcc, 0x2b, 0x3a, 0xcf, 0x16, 0x54, - 0x3c, 0x21, 0xc7, 0x13, 0x8a, 0x4a, 0xc9, 0xd5, 0x4b, 0x79, 0xbe, 0xe7, 0x0b, 0x1c, 0x63, 0x31, - 0xd5, 0x28, 0x4c, 0x17, 0xb4, 0xd4, 0x0f, 0xac, 0x7f, 0x61, 0x65, 0xf1, 0x29, 0x4c, 0x43, 0xa4, - 0x30, 0x98, 0x6e, 0x5d, 0x8b, 0x83, 0x69, 0x88, 0xa4, 0x69, 0xde, 0x45, 0x99, 0xaa, 0x89, 0x49, - 0x8b, 0xca, 0xf4, 0x04, 0x4a, 0xea, 0xf1, 0x49, 0xf3, 0xfe, 0x3f, 0x33, 0x6f, 0xe5, 0x48, 0xd2, - 0xf6, 0x05, 0x65, 0x7b, 0x72, 0xe9, 0x26, 0xfb, 0xac, 0x2e, 0x34, 0xd4, 0x9f, 0x69, 0x79, 0x62, - 0x50, 0x13, 0xaf, 0xa6, 0x7a, 0x3a, 0x6f, 0x67, 0x04, 0xe6, 0x61, 0x84, 0xd8, 0xf4, 0x57, 0x29, - 0xfc, 0x0d, 0x20, 0x3f, 0xf3, 0xe1, 0x59, 0x84, 0x22, 0x94, 0x42, 0x30, 0xa5, 0x72, 0x28, 0x05, - 0x6b, 0x03, 0x2a, 0xaa, 0x3c, 0x87, 0x50, 0x96, 0xcb, 0x7e, 0xf0, 0xf4, 0xf5, 0xc5, 0x95, 0x9d, - 0xbb, 0xbc, 0xb2, 0x73, 0x37, 0x57, 0xb6, 0xf1, 0x76, 0x66, 0x1b, 0xef, 0x67, 0xb6, 0xf1, 0x71, - 0x66, 0x1b, 0x17, 0x33, 0xdb, 0xf8, 0x34, 0xb3, 0x8d, 0xcf, 0x33, 0x3b, 0x77, 0x33, 0xb3, 0x8d, - 0x77, 0xd7, 0x76, 0xee, 0xe2, 0xda, 0xce, 0x5d, 0x5e, 0xdb, 0xb9, 0x97, 0xdd, 0x63, 0xba, 0x98, - 0x1d, 0xd3, 0xac, 0x77, 0xfe, 0xb1, 0xfa, 0x71, 0x54, 0x56, 0xcf, 0xed, 0xc3, 0x6f, 0x01, 0x00, - 0x00, 0xff, 0xff, 0x55, 0x89, 0x62, 0xf7, 0x14, 0x06, 0x00, 0x00, + // 795 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x55, 0x41, 0x8f, 0xdb, 0x44, + 0x14, 0xce, 0x6c, 0xb2, 0xd9, 0xf8, 0xc5, 0x2d, 0x89, 0xab, 0xd2, 0x68, 0x05, 0xde, 0x6c, 0xe0, + 0x10, 0x4a, 0x65, 0xb3, 0x70, 0x42, 0x1c, 0x90, 0xba, 0x42, 0x6a, 0x7a, 0xab, 0x15, 0x81, 0x84, + 0x04, 0xd1, 0xac, 0x3d, 0xc9, 0x8e, 0x92, 0xcc, 0xb8, 0x33, 0x63, 0x2f, 0xb9, 0xf1, 0x13, 0xfa, + 0x33, 0xe0, 0x9f, 0x70, 0xdc, 0x63, 0x4f, 0xc0, 0x66, 0x2f, 0xdc, 0xe8, 0x4f, 0x40, 0x33, 0xf6, + 0x24, 0x66, 0x6b, 0x04, 0x07, 0x6e, 0x9e, 0xef, 0x7d, 0xef, 0xcd, 0x7b, 0xdf, 0x37, 0x79, 0x81, + 0xc7, 0x8a, 0xac, 0x53, 0x2e, 0xf0, 0x2a, 0x94, 0x44, 0xe4, 0x44, 0x84, 0x38, 0xa5, 0xa1, 0xe2, + 0x4b, 0xc2, 0xc2, 0xfc, 0x2c, 0x5c, 0x13, 0x29, 0xf1, 0x82, 0x04, 0xa9, 0xe0, 0x8a, 0x7b, 0xef, + 0x59, 0x6e, 0x50, 0x70, 0x03, 0x9c, 0xd2, 0xc0, 0x70, 0x83, 0xfc, 0xec, 0xf8, 0x64, 0xc1, 0xf9, + 0x62, 0x45, 0x42, 0xc3, 0xbd, 0xc8, 0xe6, 0xa1, 0xa2, 0x6b, 0x22, 0x15, 0x5e, 0xa7, 0x45, 0xfa, + 0xf1, 0x69, 0x42, 0x52, 0xc2, 0x12, 0xc2, 0x62, 0x4a, 0x64, 0xb8, 0xe0, 0x0b, 0x6e, 0x70, 0xf3, + 0x55, 0x52, 0x6a, 0xbb, 0x89, 0x57, 0x3c, 0x5e, 0xbe, 0xd5, 0xcd, 0xf1, 0x93, 0x3a, 0xee, 0x25, + 0x95, 0x8a, 0x8b, 0xcd, 0x5b, 0xec, 0xd1, 0x9f, 0x07, 0xf0, 0xe0, 0x59, 0x11, 0x3c, 0xe7, 0x4c, + 0x51, 0x96, 0x61, 0x45, 0x39, 0xf3, 0x1e, 0x42, 0x5b, 0x64, 0x6c, 0x46, 0x93, 0x01, 0x1a, 0xa2, + 0xb1, 0x13, 0x1d, 0x8a, 0x8c, 0x4d, 0x12, 0xef, 0x43, 0xb8, 0x3f, 0xa7, 0x42, 0xaa, 0x19, 0xc9, + 0x09, 0x53, 0x3a, 0x7c, 0x30, 0x44, 0xe3, 0x66, 0xe4, 0x1a, 0xf4, 0x2b, 0x0d, 0x4e, 0x12, 0x6f, + 0x04, 0xf7, 0x18, 0xf9, 0xa1, 0x42, 0x6a, 0x1a, 0x52, 0x57, 0x83, 0x96, 0x13, 0xc0, 0x03, 0x2a, + 0x67, 0x57, 0x5c, 0x2c, 0xe7, 0x2b, 0x7e, 0x35, 0x13, 0x19, 0x63, 0x94, 0x2d, 0x06, 0x87, 0x43, + 0x34, 0xee, 0x44, 0x7d, 0x2a, 0xbf, 0x29, 0x23, 0x51, 0x11, 0xf0, 0x3e, 0x86, 0x7e, 0x4a, 0x84, + 0xa4, 0x52, 0x11, 0x16, 0x93, 0x99, 0x91, 0x77, 0xd0, 0x1e, 0xa2, 0xb1, 0x1b, 0xf5, 0x2a, 0x81, + 0xa9, 0xc6, 0xbd, 0x97, 0xf0, 0x48, 0x09, 0xcc, 0x24, 0xd5, 0xf7, 0xef, 0xee, 0x50, 0x58, 0x2e, + 0x07, 0x47, 0x43, 0x34, 0xee, 0x7e, 0xfa, 0x79, 0x50, 0xe7, 0x59, 0xa9, 0x52, 0x90, 0x9f, 0x05, + 0x53, 0x9b, 0x6e, 0xfb, 0x98, 0x62, 0xb9, 0x9c, 0xb0, 0x39, 0x8f, 0x1e, 0xaa, 0xba, 0x90, 0x77, + 0x0a, 0xee, 0x85, 0xc0, 0x2c, 0xbe, 0x2c, 0x5b, 0xeb, 0x98, 0xd6, 0xba, 0x05, 0x66, 0xba, 0x7a, + 0xde, 0xea, 0x38, 0x3d, 0x18, 0xfd, 0xdc, 0x84, 0x77, 0x23, 0x7c, 0x55, 0x27, 0xfa, 0x29, 0xb8, + 0x0c, 0xaf, 0x89, 0x4c, 0x71, 0x4c, 0xb4, 0x6c, 0x60, 0xa4, 0xef, 0xee, 0xb0, 0x49, 0xe2, 0x9d, + 0x40, 0x77, 0x37, 0x4f, 0xa9, 0xbe, 0x13, 0x81, 0x85, 0x26, 0x49, 0xc5, 0xb8, 0xe6, 0x1d, 0xe3, + 0xa4, 0xc2, 0xa2, 0xe2, 0x49, 0xab, 0x30, 0xce, 0xa0, 0x15, 0x53, 0xaa, 0xac, 0x5c, 0xeb, 0xca, + 0x99, 0x31, 0xa5, 0x19, 0xf5, 0xf7, 0xd4, 0xaf, 0x8b, 0x80, 0x37, 0x04, 0x97, 0xb0, 0x64, 0x5f, + 0xb3, 0x6d, 0x88, 0x40, 0x58, 0x62, 0x2b, 0x3e, 0x86, 0xfe, 0x9e, 0x61, 0xeb, 0x1d, 0x19, 0xda, + 0x3b, 0x96, 0x66, 0xab, 0xd5, 0x5a, 0xdc, 0xf9, 0x07, 0x8b, 0xbf, 0x83, 0x7e, 0x59, 0x6e, 0x56, + 0xd8, 0x46, 0x89, 0x1c, 0x38, 0xc6, 0xdc, 0x4f, 0xfe, 0xcd, 0xdc, 0xf2, 0xc2, 0x67, 0x36, 0x2f, + 0xea, 0xe5, 0x77, 0x90, 0xe7, 0xad, 0x0e, 0xea, 0x1d, 0x8c, 0x7e, 0x6d, 0x42, 0xcb, 0xba, 0xfb, + 0x37, 0x67, 0xd0, 0xff, 0xe7, 0xcc, 0x13, 0xf0, 0x64, 0x7c, 0x49, 0x92, 0x6c, 0x45, 0x92, 0xbb, + 0xee, 0xf4, 0x76, 0x11, 0xab, 0xe7, 0x00, 0x8e, 0xb0, 0xd2, 0xe3, 0x29, 0xe3, 0xca, 0x61, 0x64, + 0x8f, 0xfa, 0x7e, 0x1c, 0x2b, 0x9a, 0x53, 0xb5, 0xb1, 0x56, 0x38, 0x11, 0x58, 0x68, 0x92, 0x78, + 0x1f, 0xc0, 0xbd, 0xfd, 0x4f, 0x61, 0x93, 0x12, 0x63, 0x83, 0x13, 0xb9, 0x16, 0x9c, 0x6e, 0x52, + 0xa2, 0x49, 0xbb, 0x2a, 0x86, 0xd4, 0x29, 0x48, 0x16, 0x34, 0xa4, 0x2f, 0xe1, 0xd0, 0x2c, 0x9f, + 0x52, 0xef, 0x8f, 0x6a, 0xf5, 0x36, 0x8c, 0x42, 0xed, 0x58, 0x71, 0x71, 0xae, 0x8f, 0x51, 0x91, + 0xe7, 0x8d, 0xa1, 0x67, 0x1e, 0x53, 0x75, 0x62, 0x30, 0x13, 0xdf, 0x2f, 0xf1, 0xca, 0xbc, 0xf6, + 0xd5, 0x74, 0x0d, 0xc1, 0x1e, 0xbd, 0x73, 0x70, 0x6d, 0x0d, 0xbd, 0x51, 0x07, 0xae, 0xe9, 0xe5, + 0x38, 0x28, 0xd6, 0x6d, 0x60, 0xd7, 0x6d, 0x30, 0xb5, 0xeb, 0xf6, 0x69, 0xeb, 0xd5, 0x6f, 0x27, + 0x28, 0xea, 0x96, 0x59, 0x1a, 0x1f, 0xcd, 0xc1, 0x79, 0x91, 0x11, 0xb1, 0xf9, 0xaf, 0x26, 0xbf, + 0x0f, 0xa0, 0xb7, 0xc8, 0xec, 0x65, 0x46, 0x32, 0x52, 0x7a, 0xec, 0x68, 0xe4, 0x85, 0x06, 0xbc, + 0x47, 0x70, 0x64, 0xc2, 0x3b, 0x8f, 0xdb, 0xfa, 0x38, 0x49, 0x9e, 0x7e, 0x7f, 0x7d, 0xe3, 0x37, + 0x5e, 0xdf, 0xf8, 0x8d, 0x37, 0x37, 0x3e, 0xfa, 0x71, 0xeb, 0xa3, 0x9f, 0xb6, 0x3e, 0xfa, 0x65, + 0xeb, 0xa3, 0xeb, 0xad, 0x8f, 0x7e, 0xdf, 0xfa, 0xe8, 0x8f, 0xad, 0xdf, 0x78, 0xb3, 0xf5, 0xd1, + 0xab, 0x5b, 0xbf, 0x71, 0x7d, 0xeb, 0x37, 0x5e, 0xdf, 0xfa, 0x8d, 0x6f, 0xc7, 0x0b, 0xbe, 0x97, + 0x96, 0xf2, 0xba, 0xbf, 0xa2, 0x2f, 0xcc, 0xc7, 0x45, 0xdb, 0x8c, 0xfb, 0xd9, 0x5f, 0x01, 0x00, + 0x00, 0xff, 0xff, 0x5f, 0x35, 0x79, 0x84, 0xb7, 0x06, 0x00, 0x00, } func (this *HistoryContinuation) Equal(that interface{}) bool { @@ -623,6 +649,16 @@ func (this *Task) Equal(that interface{}) bool { if this.StartedEventId != that1.StartedEventId { return false } + if this.Version != that1.Version { + return false + } + if that1.StartedTime == nil { + if this.StartedTime != nil { + return false + } + } else if !this.StartedTime.Equal(*that1.StartedTime) { + return false + } return true } func (this *QueryTask) Equal(that interface{}) bool { @@ -697,7 +733,7 @@ func (this *Task) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 14) + s := make([]string, 0, 16) s = append(s, "&token.Task{") s = append(s, "NamespaceId: "+fmt.Sprintf("%#v", this.NamespaceId)+",\n") s = append(s, "WorkflowId: "+fmt.Sprintf("%#v", this.WorkflowId)+",\n") @@ -711,6 +747,8 @@ func (this *Task) GoString() string { s = append(s, "Clock: "+fmt.Sprintf("%#v", this.Clock)+",\n") } s = append(s, "StartedEventId: "+fmt.Sprintf("%#v", this.StartedEventId)+",\n") + s = append(s, "Version: "+fmt.Sprintf("%#v", this.Version)+",\n") + s = append(s, "StartedTime: "+fmt.Sprintf("%#v", this.StartedTime)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -913,6 +951,21 @@ func (m *Task) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.StartedTime != nil { + n3, err3 := github_com_gogo_protobuf_types.StdTimeMarshalTo(*m.StartedTime, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(*m.StartedTime):]) + if err3 != nil { + return 0, err3 + } + i -= n3 + i = encodeVarintMessage(dAtA, i, uint64(n3)) + i-- + dAtA[i] = 0x62 + } + if m.Version != 0 { + i = encodeVarintMessage(dAtA, i, uint64(m.Version)) + i-- + dAtA[i] = 0x58 + } if m.StartedEventId != 0 { i = encodeVarintMessage(dAtA, i, uint64(m.StartedEventId)) i-- @@ -1158,6 +1211,13 @@ func (m *Task) Size() (n int) { if m.StartedEventId != 0 { n += 1 + sovMessage(uint64(m.StartedEventId)) } + if m.Version != 0 { + n += 1 + sovMessage(uint64(m.Version)) + } + if m.StartedTime != nil { + l = github_com_gogo_protobuf_types.SizeOfStdTime(*m.StartedTime) + n += 1 + l + sovMessage(uint64(l)) + } return n } @@ -1237,6 +1297,8 @@ func (this *Task) String() string { `ActivityType:` + fmt.Sprintf("%v", this.ActivityType) + `,`, `Clock:` + strings.Replace(fmt.Sprintf("%v", this.Clock), "VectorClock", "v11.VectorClock", 1) + `,`, `StartedEventId:` + fmt.Sprintf("%v", this.StartedEventId) + `,`, + `Version:` + fmt.Sprintf("%v", this.Version) + `,`, + `StartedTime:` + strings.Replace(fmt.Sprintf("%v", this.StartedTime), "Timestamp", "types.Timestamp", 1) + `,`, `}`, }, "") return s @@ -2117,6 +2179,61 @@ func (m *Task) Unmarshal(dAtA []byte) error { break } } + case 11: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Version", wireType) + } + m.Version = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMessage + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Version |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 12: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field StartedTime", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMessage + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMessage + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMessage + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.StartedTime == nil { + m.StartedTime = new(time.Time) + } + if err := github_com_gogo_protobuf_types.StdTimeUnmarshal(m.StartedTime, dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipMessage(dAtA[iNdEx:]) diff --git a/proto/internal/temporal/server/api/token/v1/message.proto b/proto/internal/temporal/server/api/token/v1/message.proto index d67003daf38..c79ce280e1d 100644 --- a/proto/internal/temporal/server/api/token/v1/message.proto +++ b/proto/internal/temporal/server/api/token/v1/message.proto @@ -24,6 +24,8 @@ package temporal.server.api.token.v1; option go_package = "go.temporal.io/server/api/token/v1;token"; +import "google/protobuf/timestamp.proto"; +import "dependencies/gogoproto/gogo.proto"; import "temporal/server/api/clock/v1/message.proto"; import "temporal/server/api/history/v1/message.proto"; @@ -62,6 +64,8 @@ message Task { string activity_type = 8; temporal.server.api.clock.v1.VectorClock clock = 9; int64 started_event_id = 10; + int64 version = 11; + google.protobuf.Timestamp started_time = 12 [(gogoproto.stdtime) = true]; } message QueryTask { diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index c8d9c247a2e..270b8139a26 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -967,6 +967,7 @@ func (wh *WorkflowHandler) RespondWorkflowTaskCompleted( RunId: taskToken.GetRunId(), ScheduledEventId: histResp.StartedResponse.GetScheduledEventId(), StartedEventId: histResp.StartedResponse.GetStartedEventId(), + StartedTime: histResp.StartedResponse.GetStartedTime(), Attempt: histResp.StartedResponse.GetAttempt(), } token, err := wh.tokenSerializer.Serialize(taskToken) diff --git a/service/history/api/startworkflow/api.go b/service/history/api/startworkflow/api.go index 785dc64d06f..f8b899339fb 100644 --- a/service/history/api/startworkflow/api.go +++ b/service/history/api/startworkflow/api.go @@ -33,10 +33,10 @@ import ( historypb "go.temporal.io/api/history/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/api/historyservice/v1" tokenspb "go.temporal.io/server/api/token/v1" - "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/common" "go.temporal.io/server/common/definition" "go.temporal.io/server/common/metrics" @@ -542,6 +542,7 @@ func (s *Starter) generateResponse( RunId: runID, ScheduledEventId: workflowTaskInfo.ScheduledEventID, StartedEventId: workflowTaskInfo.StartedEventID, + StartedTime: workflowTaskInfo.StartedTime, Attempt: workflowTaskInfo.Attempt, Clock: clock, } diff --git a/service/history/workflowTaskHandlerCallbacks.go b/service/history/workflowTaskHandlerCallbacks.go index e6368d9c6ba..630a6ffe7c1 100644 --- a/service/history/workflowTaskHandlerCallbacks.go +++ b/service/history/workflowTaskHandlerCallbacks.go @@ -322,8 +322,13 @@ func (handler *workflowTaskHandlerCallbacksImpl) handleWorkflowTaskFailed( scheduledEventID := token.GetScheduledEventId() workflowTask := mutableState.GetWorkflowTaskByID(scheduledEventID) - if workflowTask == nil || workflowTask.Attempt != token.Attempt || workflowTask.StartedEventID == common.EmptyEventID || - (token.StartedEventId != common.EmptyEventID && token.StartedEventId != workflowTask.StartedEventID) { + if workflowTask == nil || + workflowTask.StartedEventID == common.EmptyEventID || + (token.StartedEventId != common.EmptyEventID && token.StartedEventId != workflowTask.StartedEventID) || + (token.StartedTime != nil && workflowTask.StartedTime != nil && !token.StartedTime.Equal(*workflowTask.StartedTime)) || + workflowTask.Attempt != token.Attempt { + // we have not alter mutable state yet, so release with it with nil to avoid clear MS. + workflowContext.GetReleaseFn()(nil) return nil, serviceerror.NewNotFound("Workflow task not found.") } @@ -390,6 +395,21 @@ func (handler *workflowTaskHandlerCallbacksImpl) handleWorkflowTaskCompleted( if err != nil { return nil, err } + weContext := workflowContext.GetContext() + ms := workflowContext.GetMutableState() + + currentWorkflowTask := ms.GetWorkflowTaskByID(token.GetScheduledEventId()) + if !ms.IsWorkflowExecutionRunning() || + currentWorkflowTask == nil || + currentWorkflowTask.StartedEventID == common.EmptyEventID || + (token.StartedEventId != common.EmptyEventID && token.StartedEventId != currentWorkflowTask.StartedEventID) || + (token.StartedTime != nil && currentWorkflowTask.StartedTime != nil && !token.StartedTime.Equal(*currentWorkflowTask.StartedTime)) || + currentWorkflowTask.Attempt != token.Attempt { + // we have not alter mutable state yet, so release with it with nil to avoid clear MS. + workflowContext.GetReleaseFn()(nil) + return nil, serviceerror.NewNotFound("Workflow task not found.") + } + defer func() { workflowContext.GetReleaseFn()(retError) }() var effects effect.Buffer @@ -407,16 +427,6 @@ func (handler *workflowTaskHandlerCallbacksImpl) handleWorkflowTaskCompleted( effects.Apply(ctx) }() - weContext := workflowContext.GetContext() - ms := workflowContext.GetMutableState() - - currentWorkflowTask := ms.GetWorkflowTaskByID(token.GetScheduledEventId()) - if !ms.IsWorkflowExecutionRunning() || currentWorkflowTask == nil || currentWorkflowTask.Attempt != token.Attempt || - currentWorkflowTask.StartedEventID == common.EmptyEventID || - (token.StartedEventId != common.EmptyEventID && token.StartedEventId != currentWorkflowTask.StartedEventID) { - return nil, serviceerror.NewNotFound("Workflow task not found.") - } - // It's an error if the workflow has used versioning in the past but this task has no versioning info. if ms.GetWorkerVersionStamp().GetUseVersioning() && !request.GetWorkerVersionStamp().GetUseVersioning() { return nil, serviceerror.NewInvalidArgument("Workflow using versioning must continue to use versioning.") diff --git a/service/matching/matchingEngine.go b/service/matching/matchingEngine.go index 49ff5b6d6bc..db9c8618fec 100644 --- a/service/matching/matchingEngine.go +++ b/service/matching/matchingEngine.go @@ -40,10 +40,10 @@ import ( "go.temporal.io/api/serviceerror" taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/api/workflowservice/v1" - - enumsspb "go.temporal.io/server/api/enums/v1" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/api/matchingservice/v1" + + enumsspb "go.temporal.io/server/api/enums/v1" persistencespb "go.temporal.io/server/api/persistence/v1" replicationspb "go.temporal.io/server/api/replication/v1" taskqueuespb "go.temporal.io/server/api/taskqueue/v1" @@ -1301,6 +1301,7 @@ func (e *matchingEngineImpl) createPollWorkflowTaskQueueResponse( RunId: task.event.Data.GetRunId(), ScheduledEventId: historyResponse.GetScheduledEventId(), StartedEventId: historyResponse.GetStartedEventId(), + StartedTime: historyResponse.GetStartedTime(), Attempt: historyResponse.GetAttempt(), Clock: historyResponse.GetClock(), } diff --git a/tests/update_workflow_test.go b/tests/update_workflow_test.go index 22ad34b2fd6..3fae730d651 100644 --- a/tests/update_workflow_test.go +++ b/tests/update_workflow_test.go @@ -28,7 +28,6 @@ import ( "context" "errors" "fmt" - "strconv" "time" "github.com/stretchr/testify/assert" @@ -3919,692 +3918,3 @@ func (s *integrationSuite) TestUpdateWorkflow_CompletedSpeculativeWorkflowTask_D }) } } - -func (s *integrationSuite) TestUpdateWorkflow_StaleSpeculativeWorkflowTask_ReloadShard_DifferentStartedId_Rejected() { - /* - Test scenario: - An update triggered a speculative WFT and the task is dispatched to worker. - Shard reload, speculative WFT disappear from server. - Another update come, a second speculative WFT is scheduled but not dispatched yet. - An activity completed, it converts the 2nd speculative WFT into normal one. - The first speculative WFT respond back, server reject it because startedId mismatch. - The second speculative WFT respond back, server accpeted - */ - - tv := testvars.New(s.T().Name()) - tv = s.startWorkflow(tv) - - testCtx := NewContext() - wtHandlerCalls := 0 - wtHandler := func(execution *commonpb.WorkflowExecution, wt *commonpb.WorkflowType, previousStartedEventID, startedEventID int64, history *historypb.History) ([]*commandpb.Command, error) { - wtHandlerCalls++ - switch wtHandlerCalls { - case 1: - // Schedule activity. - return []*commandpb.Command{{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("5"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }}, nil - case 2: - return nil, nil - default: - s.Failf("wtHandler called too many times", "wtHandler shouldn't be called %d times", wtHandlerCalls) - return nil, nil - } - } - - atHandler := func(execution *commonpb.WorkflowExecution, activityType *commonpb.ActivityType, - activityID string, input *commonpb.Payloads, taskToken []byte) (*commonpb.Payloads, bool, error) { - return payloads.EncodeString(tv.String("activity-result")), false, nil - } - - poller := &TaskPoller{ - Engine: s.engine, - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - WorkflowTaskHandler: wtHandler, - ActivityTaskHandler: atHandler, - Logger: s.Logger, - T: s.T(), - } - - // First WFT, will schedule activity. Also force create a new WFT. - _, wt1Resp, err := poller.PollAndProcessWorkflowTaskWithAttemptAndRetryAndForceNewWorkflowTask(false, false, false, false, 1, 1, true, nil) - s.NoError(err) - - // Drain 2nd WFT (which is force created as requested) to make all events seem by SDK so following update can be speculative. - _, err = poller.HandlePartialWorkflowTask(wt1Resp.GetWorkflowTask(), false) - s.NoError(err) - s.EqualValues(0, wt1Resp.ResetHistoryEventId) - - // send update wf request, this will trigger speculative wft - go func() { - _, _ = s.sendUpdate(tv, "1") - }() - - // poll the speculative wft - wft1, err1 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err1) - s.NotNil(wft1) - s.True(len(wft1.TaskToken) > 0) // has valid task token - s.True(len(wft1.Messages) > 0) // has valid message - s.Equal(int64(10), wft1.StartedEventId) - s.Equal(int64(9), wft1.Messages[0].GetEventId()) - - // Get shardId so we can unload the shard later, this will clear mutable state and reload it. - ms, err := s.adminClient.DescribeMutableState(testCtx, &adminservice.DescribeMutableStateRequest{ - Namespace: s.namespace, - Execution: tv.WorkflowExecution(), - }) - s.NoError(err) - shardId, err := strconv.Atoi(ms.ShardId) - s.NoError(err) - - // unload the shard, this will make the speculative wft disappear. - _, err = s.adminClient.CloseShard(testCtx, &adminservice.CloseShardRequest{ - ShardId: int32(shardId), - }) - s.NoError(err) - - // send another update wf request (with SAME updateId), this will trigger a new speculative wft - go func() { - _, _ = s.sendUpdate(tv, "1") - }() - - // before handle the new speculative WFT, we handle the activity, this will convert the speculative wft to normal wft - err = poller.PollAndProcessActivityTask(false) - s.NoError(err) - - // poll the new wft (not speculative anymore) - wft2, err2 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err2) - s.NotNil(wft2) - s.True(len(wft2.TaskToken) > 0) // has valid task token - s.True(len(wft2.Messages) > 0) // has valid message - s.Equal(int64(12), wft2.StartedEventId) - s.Equal(int64(11), wft2.Messages[0].GetEventId()) - - // now try to complete 1st speculative wft, it should fail - commands := s.acceptUpdateCommands(tv, "1") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("13"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft1.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft1.Messages[0], "1"), - ReturnNewWorkflowTask: true, - }) - s.Error(err) // this should fail with NotFound - s.Contains(err.Error(), "Workflow task not found") - - // complete wft2 should succeed - commands = s.acceptUpdateCommands(tv, "1") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("15"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft2.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft2.Messages[0], "1"), - ReturnNewWorkflowTask: true, - }) - s.NoError(err) - - events := s.getHistory(s.namespace, tv.WorkflowExecution()) - s.EqualHistoryEvents(` - 1 WorkflowExecutionStarted - 2 WorkflowTaskScheduled - 3 WorkflowTaskStarted - 4 WorkflowTaskCompleted - 5 ActivityTaskScheduled - 6 WorkflowTaskScheduled - 7 WorkflowTaskStarted - 8 WorkflowTaskCompleted - 9 WorkflowTaskScheduled - 10 ActivityTaskStarted - 11 ActivityTaskCompleted - 12 WorkflowTaskStarted - 13 WorkflowTaskCompleted - 14 WorkflowExecutionUpdateAccepted {"AcceptedRequestSequencingEventId":11} - 15 ActivityTaskScheduled - `, events) -} - -func (s *integrationSuite) TestUpdateWorkflow_StaleSpeculativeWorkflowTask_ReloadShard_SameStartedId_SameUpdateId_Accepted() { - /* - Test scenario: - An update triggered a speculative WFT and the task is dispatched to worker. - Shard reload, speculative WFT disappear from server. - Another update come, a second speculative WFT is dispatched to worker with same scheduled_id/started_id and update_id. - The first speculative WFT respond back, server accept it. - The second speculative WFT respond back, server reject it because scheduled_id is not found. - */ - tv := testvars.New(s.T().Name()) - tv = s.startWorkflow(tv) - - testCtx := NewContext() - wtHandlerCalls := 0 - wtHandler := func(execution *commonpb.WorkflowExecution, wt *commonpb.WorkflowType, previousStartedEventID, startedEventID int64, history *historypb.History) ([]*commandpb.Command, error) { - wtHandlerCalls++ - switch wtHandlerCalls { - case 1: - // Schedule activity. - return []*commandpb.Command{{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("5"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }}, nil - case 2: - return nil, nil - default: - s.Failf("wtHandler called too many times", "wtHandler shouldn't be called %d times", wtHandlerCalls) - return nil, nil - } - } - - atHandler := func(execution *commonpb.WorkflowExecution, activityType *commonpb.ActivityType, - activityID string, input *commonpb.Payloads, taskToken []byte) (*commonpb.Payloads, bool, error) { - return payloads.EncodeString(tv.String("activity-result")), false, nil - } - - poller := &TaskPoller{ - Engine: s.engine, - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - WorkflowTaskHandler: wtHandler, - ActivityTaskHandler: atHandler, - Logger: s.Logger, - T: s.T(), - } - - // First WFT, will schedule activity. Also force create a new WFT. - _, wt1Resp, err := poller.PollAndProcessWorkflowTaskWithAttemptAndRetryAndForceNewWorkflowTask(false, false, false, false, 1, 1, true, nil) - s.NoError(err) - - // Drain 2nd WFT (which is force created as requested) to make all events seem by SDK so following update can be speculative. - _, err = poller.HandlePartialWorkflowTask(wt1Resp.GetWorkflowTask(), false) - s.NoError(err) - s.EqualValues(0, wt1Resp.ResetHistoryEventId) - - // send update wf request, this will trigger speculative wft - go func() { - _, _ = s.sendUpdate(tv, "1") - }() - - // poll the speculative wft - wft1, err1 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err1) - s.NotNil(wft1) - s.True(len(wft1.TaskToken) > 0) // has valid task token - s.True(len(wft1.Messages) > 0) // has valid message - s.Equal(int64(10), wft1.StartedEventId) - s.Equal(int64(9), wft1.Messages[0].GetEventId()) - - // Get shardId so we can unload the shard later - ms, err := s.adminClient.DescribeMutableState(testCtx, &adminservice.DescribeMutableStateRequest{ - Namespace: s.namespace, - Execution: tv.WorkflowExecution(), - }) - s.NoError(err) - shardId, err := strconv.Atoi(ms.ShardId) - s.NoError(err) - - // unload the shard, this will make last speculative wft disappear from server - _, err = s.adminClient.CloseShard(testCtx, &adminservice.CloseShardRequest{ - ShardId: int32(shardId), - }) - s.NoError(err) - - // send another update wf request (with SAME updateId), this will trigger a new speculative wft - go func() { - _, _ = s.sendUpdate(tv, "1") - }() - - // poll the new wft (not speculative anymore) - wft2, err2 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err2) - s.NotNil(wft2) - s.True(len(wft2.TaskToken) > 0) // has valid task token - s.True(len(wft2.Messages) > 0) // has valid message - s.Equal(int64(10), wft2.StartedEventId) - s.Equal(int64(9), wft2.Messages[0].GetEventId()) - - // now try to complete 1st speculative wft, it should succeed - commands := s.acceptUpdateCommands(tv, "1") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("13"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft1.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft1.Messages[0], "1"), - ReturnNewWorkflowTask: true, - }) - s.NoError(err) // Stale speculative WFT should be accepted because it has same scheduled_id / started_id and the accepted message is valid (same update_id) - - events := s.getHistory(s.namespace, tv.WorkflowExecution()) - s.EqualHistoryEvents(` - 1 WorkflowExecutionStarted - 2 WorkflowTaskScheduled - 3 WorkflowTaskStarted - 4 WorkflowTaskCompleted - 5 ActivityTaskScheduled - 6 WorkflowTaskScheduled - 7 WorkflowTaskStarted - 8 WorkflowTaskCompleted - 9 WorkflowTaskScheduled - 10 WorkflowTaskStarted - 11 WorkflowTaskCompleted - 12 WorkflowExecutionUpdateAccepted {"AcceptedRequestSequencingEventId":9} - 13 ActivityTaskScheduled - `, events) -} - -func (s *integrationSuite) TestUpdateWorkflow_StaleSpeculativeWorkflowTask_ClearMutableState_Accepted() { - /* - Test scenario: - An update triggered a speculative WFT and the task is dispatched to worker. - Mutable state cleared, speculative WFT disappear from server but update registry stays. - Another update come, a second speculative WFT is dispatched to worker with same scheduled_id/started_id but different update_id. - The first speculative WFT respond back, server accept it. - Server generates 3rd speculative WFT because there is still not started update. - The second speculative WFT respond back, server reject it because scheduled_id is not found. - Worker polls for the 3rd speculative WFT and respond back normally. - */ - - tv := testvars.New(s.T().Name()) - tv = s.startWorkflow(tv) - - testCtx := NewContext() - wtHandlerCalls := 0 - wtHandler := func(execution *commonpb.WorkflowExecution, wt *commonpb.WorkflowType, previousStartedEventID, startedEventID int64, history *historypb.History) ([]*commandpb.Command, error) { - wtHandlerCalls++ - switch wtHandlerCalls { - case 1: - // Schedule activity. - return []*commandpb.Command{{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("5"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }}, nil - case 2: - return nil, nil - default: - s.Failf("wtHandler called too many times", "wtHandler shouldn't be called %d times", wtHandlerCalls) - return nil, nil - } - } - - atHandler := func(execution *commonpb.WorkflowExecution, activityType *commonpb.ActivityType, - activityID string, input *commonpb.Payloads, taskToken []byte) (*commonpb.Payloads, bool, error) { - return payloads.EncodeString(tv.String("activity-result")), false, nil - } - - poller := &TaskPoller{ - Engine: s.engine, - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - WorkflowTaskHandler: wtHandler, - ActivityTaskHandler: atHandler, - Logger: s.Logger, - T: s.T(), - } - - // First WFT, will schedule activity. Also force create a new WFT. - _, wt1Resp, err := poller.PollAndProcessWorkflowTaskWithAttemptAndRetryAndForceNewWorkflowTask(false, false, false, false, 1, 1, true, nil) - s.NoError(err) - - // Drain 2nd WFT (which is force created as requested) to make all events seem by SDK so following update can be speculative. - _, err = poller.HandlePartialWorkflowTask(wt1Resp.GetWorkflowTask(), false) - s.NoError(err) - s.EqualValues(0, wt1Resp.ResetHistoryEventId) - - // send update wf request, this will trigger speculative wft - go func() { - _, _ = s.sendUpdate(tv, "1") - }() - - // poll the speculative wft - wft1, err1 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err1) - s.NotNil(wft1) - s.True(len(wft1.TaskToken) > 0) // has valid task token - s.True(len(wft1.Messages) > 0) // has valid message - s.Equal(int64(10), wft1.StartedEventId) - s.Equal(int64(9), wft1.Messages[0].GetEventId()) - - // DescribeMutableState will clear MS, cause the speculative to disappear but the registry for update 1 will stay. - _, err = s.adminClient.DescribeMutableState(testCtx, &adminservice.DescribeMutableStateRequest{ - Namespace: s.namespace, - Execution: tv.WorkflowExecution(), - }) - s.NoError(err) - - // send another update wf request (with DIFFERENT updateId), this will trigger a new speculative wft - go func() { - _, _ = s.sendUpdate(tv, "2") - }() - - // poll the new wft (it is still speculative) - wft2, err2 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err2) - s.NotNil(wft2) - s.True(len(wft2.TaskToken) > 0) // has valid task token - s.True(len(wft2.Messages) > 0) // has valid message - s.Equal(int64(10), wft2.StartedEventId) - s.Equal(int64(9), wft2.Messages[0].GetEventId()) - - // now try to complete 1st speculative wft, it should succeed. - commands := s.acceptUpdateCommands(tv, "1") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("13"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft1.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft1.Messages[0], "1"), - }) - s.NoError(err) - - // complete wft2 should fail, because the wft already completed. - commands = s.acceptUpdateCommands(tv, "2") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("15"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft2.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft2.Messages[0], "2"), - }) - s.Error(err) - s.Contains(err.Error(), "Workflow task not found") - - // polling it again should complete the other update - wft3, err3 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err3) - s.NotNil(wft3) - s.True(len(wft3.TaskToken) > 0) // has valid task token - s.True(len(wft3.Messages) > 0) // has valid message - s.Equal(int64(15), wft3.StartedEventId) - s.Equal(int64(14), wft3.Messages[0].GetEventId()) - - // complete wft2 should fail, because the wft already completed. - commands = s.acceptUpdateCommands(tv, "2") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("15"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft3.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft3.Messages[0], "2"), - }) - s.NoError(err) - - events := s.getHistory(s.namespace, tv.WorkflowExecution()) - s.EqualHistoryEvents(` - 1 WorkflowExecutionStarted - 2 WorkflowTaskScheduled - 3 WorkflowTaskStarted - 4 WorkflowTaskCompleted - 5 ActivityTaskScheduled - 6 WorkflowTaskScheduled - 7 WorkflowTaskStarted - 8 WorkflowTaskCompleted - 9 WorkflowTaskScheduled - 10 WorkflowTaskStarted - 11 WorkflowTaskCompleted - 12 WorkflowExecutionUpdateAccepted {"AcceptedRequestSequencingEventId":9} - 13 ActivityTaskScheduled - 14 WorkflowTaskScheduled - 15 WorkflowTaskStarted - 16 WorkflowTaskCompleted - 17 WorkflowExecutionUpdateAccepted {"AcceptedRequestSequencingEventId":14} - 18 ActivityTaskScheduled - `, events) -} - -func (s *integrationSuite) TestUpdateWorkflow_StaleSpeculativeWorkflowTask_SameStartedId_DifferentUpdateId_Rejected() { - /* - Test scenario: - An update triggered a speculative WFT and the task is dispatched to worker. - Shard reload, speculative WFT disappear from server and the update registry is gone. - Another update come (with different updateID), a second speculative WFT is dispatched to worker. - The first speculative WFT respond back, server reject it because update_id not found. - The second speculative WFT respond back, server reject it because the WFT is already failed by last respond. - */ - - tv := testvars.New(s.T().Name()) - tv = s.startWorkflow(tv) - - testCtx := NewContext() - wtHandlerCalls := 0 - wtHandler := func(execution *commonpb.WorkflowExecution, wt *commonpb.WorkflowType, previousStartedEventID, startedEventID int64, history *historypb.History) ([]*commandpb.Command, error) { - wtHandlerCalls++ - switch wtHandlerCalls { - case 1: - // Schedule activity. - return []*commandpb.Command{{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("5"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }}, nil - case 2: - return nil, nil - default: - s.Failf("wtHandler called too many times", "wtHandler shouldn't be called %d times", wtHandlerCalls) - return nil, nil - } - } - - atHandler := func(execution *commonpb.WorkflowExecution, activityType *commonpb.ActivityType, - activityID string, input *commonpb.Payloads, taskToken []byte) (*commonpb.Payloads, bool, error) { - return payloads.EncodeString(tv.String("activity-result")), false, nil - } - - poller := &TaskPoller{ - Engine: s.engine, - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - WorkflowTaskHandler: wtHandler, - ActivityTaskHandler: atHandler, - Logger: s.Logger, - T: s.T(), - } - - // First WFT, will schedule activity. Also force create a new WFT. - _, wt1Resp, err := poller.PollAndProcessWorkflowTaskWithAttemptAndRetryAndForceNewWorkflowTask(false, false, false, false, 1, 1, true, nil) - s.NoError(err) - - // Drain 2nd WFT (which is force created as requested) to make all events seem by SDK so following update can be speculative. - _, err = poller.HandlePartialWorkflowTask(wt1Resp.GetWorkflowTask(), false) - s.NoError(err) - s.EqualValues(0, wt1Resp.ResetHistoryEventId) - - // send update wf request, this will trigger speculative wft - go func() { - _, _ = s.sendUpdate(tv, "1") - }() - - // poll the speculative wft - wft1, err1 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err1) - s.NotNil(wft1) - s.True(len(wft1.TaskToken) > 0) // has valid task token - s.True(len(wft1.Messages) > 0) // has valid message - s.Equal(int64(10), wft1.StartedEventId) - s.Equal(int64(9), wft1.Messages[0].GetEventId()) - - // Get shardId so we can unload the shard - ms, err := s.adminClient.DescribeMutableState(testCtx, &adminservice.DescribeMutableStateRequest{ - Namespace: s.namespace, - Execution: tv.WorkflowExecution(), - }) - s.NoError(err) - shardId, err := strconv.Atoi(ms.ShardId) - s.NoError(err) - - // unload the shard, this will make last speculative wft disappear from server - _, err = s.adminClient.CloseShard(testCtx, &adminservice.CloseShardRequest{ - ShardId: int32(shardId), - }) - s.NoError(err) - - // send another update wf request (with DIFFERENT updateId), this will trigger a new speculative wft - go func() { - _, _ = s.sendUpdate(tv, "2") - }() - - // poll the new wft (it is still speculative) - wft2, err2 := s.engine.PollWorkflowTaskQueue(testCtx, &workflowservice.PollWorkflowTaskQueueRequest{ - Namespace: s.namespace, - TaskQueue: tv.TaskQueue(), - }) - s.NoError(err2) - s.NotNil(wft2) - s.True(len(wft2.TaskToken) > 0) // has valid task token - s.True(len(wft2.Messages) > 0) // has valid message - s.Equal(int64(10), wft2.StartedEventId) - s.Equal(int64(9), wft2.Messages[0].GetEventId()) - - // now try to complete 1st speculative wft, it should fail - commands := s.acceptUpdateCommands(tv, "1") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("13"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft1.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft1.Messages[0], "1"), - ReturnNewWorkflowTask: true, - }) - s.Error(err) // Fail because UpdateId is not_found. (If shard was not reload, and the update still exists in registry, then it could be accepted) - s.Contains(err.Error(), "BadUpdateWorkflowExecutionMessage") - s.Contains(err.Error(), "not found") - - // complete wft2 should also fail, because the previous attempt already mark the WFT as failed. - commands = s.acceptUpdateCommands(tv, "1") - commands = append(commands, &commandpb.Command{ - CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK, - Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{ - ActivityId: tv.ActivityID("15"), - ActivityType: tv.ActivityType(), - TaskQueue: tv.TaskQueue(), - ScheduleToCloseTimeout: tv.InfiniteTimeout(), - }}, - }) - _, err = s.engine.RespondWorkflowTaskCompleted(testCtx, &workflowservice.RespondWorkflowTaskCompletedRequest{ - Namespace: s.namespace, - TaskToken: wft2.TaskToken, - Commands: commands, - Messages: s.acceptUpdateMessages(tv, wft2.Messages[0], "1"), - ReturnNewWorkflowTask: true, - }) - s.Error(err) - s.Contains(err.Error(), "Workflow task not found") - - events := s.getHistory(s.namespace, tv.WorkflowExecution()) - s.EqualHistoryEvents(` - 1 WorkflowExecutionStarted - 2 WorkflowTaskScheduled - 3 WorkflowTaskStarted - 4 WorkflowTaskCompleted - 5 ActivityTaskScheduled - 6 WorkflowTaskScheduled - 7 WorkflowTaskStarted - 8 WorkflowTaskCompleted - 9 WorkflowTaskScheduled - 10 WorkflowTaskStarted - 11 WorkflowTaskFailed - `, events) -}