-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
Copy pathtxn.go
1020 lines (926 loc) · 35.8 KB
/
txn.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2015 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
//
// Author: Peter Mattis ([email protected])
package client
import (
"fmt"
"github.com/gogo/protobuf/proto"
"github.com/pkg/errors"
"golang.org/x/net/context"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
)
// Txn is an in-progress distributed database transaction. A Txn is safe for
// concurrent use by multiple goroutines.
type Txn struct {
db *DB
// The following fields are not safe for concurrent modification.
// They should be set before operating on the transaction.
// commitTriggers are run upon successful commit.
commitTriggers []func()
// systemConfigTrigger is set to true when modifying keys from the SystemConfig
// span. This sets the SystemConfigTrigger on EndTransactionRequest.
systemConfigTrigger bool
// The txn has to be committed by this deadline. A nil value indicates no
// deadline.
deadline *hlc.Timestamp
// mu holds fields that need to be synchronized for concurrent request execution.
mu struct {
syncutil.Mutex
Proto roachpb.Transaction
// UserPriority is the transaction's priority.
UserPriority roachpb.UserPriority
// txnAnchorKey is the key at which to anchor the transaction record. If
// unset, the first key written in the transaction will be used.
txnAnchorKey roachpb.Key
// writingTxnRecord is set when the Txn is in the middle of writing
// its transaction record. It is used to assure that even in the presence
// of concurrent requests, only one sends a BeginTxnRequest.
writingTxnRecord bool
// see IsFinalized()
finalized bool
// previousIDs holds the set of all previous IDs that the Txn's Proto has had
// across transaction aborts. This allows us to determine if a given response
// was meant for any incarnation of this transaction.
previousIDs map[uuid.UUID]struct{}
// commandCount indicates how many requests have been sent through
// this transaction. Reset on retryable txn errors.
// TODO(andrei): This is broken for DistSQL, which doesn't account for the
// requests it uses the transaction for.
commandCount int
// onKVTxnAborted, if set, is called when a KV-level roachpb.Transaction has
// been aborted, but this client.Txn instance will continue by initializing
// another roachpb.Transaction.
//
// This happens in response to TransactionAbortedError's and is used to
// inform the TxnCoordSender that it should stop heartbeating the
// transaction record corresponding to prevTxn.
//
// TODO(andrei): Criss-crossing between layers like this and calling from
// client.Txn into TxnCoordSender is ugly; it was meant as a temporary hack
// until #10511 is addressed and TxnCoordSender is merged with client.Txn.
onKVTxnSwitched func(ctx context.Context, prevTxn roachpb.Transaction)
}
}
// NewTxn returns a new txn.
func NewTxn(db *DB) *Txn {
return NewTxnWithProto(db, roachpb.Transaction{})
}
// NewTxnWithProto returns a new txn with the provided Transaction proto.
// This allows a client.Txn to be created with an already initialized proto.
func NewTxnWithProto(db *DB, proto roachpb.Transaction) *Txn {
txn := &Txn{db: db}
txn.mu.Proto = proto
return txn
}
// CommandCount returns the count of commands executed through this txn.
// Retryable errors on the transaction will reset the count to 0.
func (txn *Txn) CommandCount() int {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.commandCount
}
// IsFinalized returns true if this Txn has been finalized and should therefore
// not be used for any more KV operations.
// A Txn is considered finalized if it successfully committed or if a rollback
// was attempted (successful or not).
// Note that Commit() always leaves the transaction finalized, since it attempts
// to rollback on error.
func (txn *Txn) IsFinalized() bool {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.finalized
}
// SetUserPriority sets the transaction's user priority. Transactions default to
// normal user priority. The user priority must be set before any operations are
// performed on the transaction.
func (txn *Txn) SetUserPriority(userPriority roachpb.UserPriority) error {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.UserPriority == userPriority {
return nil
}
if txn.mu.Proto.IsInitialized() {
return errors.Errorf("cannot change the user priority of a running transaction")
}
if userPriority < roachpb.MinUserPriority || userPriority > roachpb.MaxUserPriority {
return errors.Errorf("the given user priority %f is out of the allowed range [%f, %d]",
userPriority, roachpb.MinUserPriority, roachpb.MaxUserPriority)
}
txn.mu.UserPriority = userPriority
return nil
}
// InternalSetPriority sets the transaction priority. It is intended for
// internal (testing) use only.
func (txn *Txn) InternalSetPriority(priority int32) {
txn.mu.Lock()
// The negative user priority is translated on the server into a positive,
// non-randomized, priority for the transaction.
txn.mu.UserPriority = roachpb.UserPriority(-priority)
txn.mu.Unlock()
}
// UserPriority returns the transaction's user priority.
func (txn *Txn) UserPriority() roachpb.UserPriority {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.UserPriority
}
// SetDebugName sets the debug name associated with the transaction which will
// appear in log files and the web UI.
func (txn *Txn) SetDebugName(name string) {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.Proto.Name == name {
return
}
if txn.mu.Proto.IsInitialized() {
panic("cannot change the debug name of a running transaction")
}
txn.mu.Proto.Name = name
}
// DebugName returns the debug name associated with the transaction.
func (txn *Txn) DebugName() string {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.Proto.ID == nil {
return txn.mu.Proto.Name
}
return fmt.Sprintf("%s (id: %s)", txn.mu.Proto.Name, txn.mu.Proto.ID)
}
// SetIsolation sets the transaction's isolation type. Transactions default to
// serializable isolation. The isolation must be set before any operations are
// performed on the transaction.
func (txn *Txn) SetIsolation(isolation enginepb.IsolationType) error {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.Proto.Isolation == isolation {
return nil
}
if txn.mu.Proto.IsInitialized() {
return errors.Errorf("cannot change the isolation level of a running transaction")
}
txn.mu.Proto.Isolation = isolation
return nil
}
// Isolation returns the transaction's isolation type.
func (txn *Txn) Isolation() enginepb.IsolationType {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.Proto.Isolation
}
// OrigTimestamp returns the transaction's starting timestamp.
func (txn *Txn) OrigTimestamp() hlc.Timestamp {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.Proto.OrigTimestamp
}
// AnchorKey returns the transaction's anchor key. The caller should treat the
// returned byte slice as immutable.
func (txn *Txn) AnchorKey() []byte {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.Proto.Key
}
// SetTxnAnchorKey sets the key at which to anchor the transaction record. The
// transaction anchor key defaults to the first key written in a transaction.
func (txn *Txn) SetTxnAnchorKey(key roachpb.Key) error {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.Proto.Writing || txn.mu.writingTxnRecord {
return errors.Errorf("transaction anchor key already set")
}
txn.mu.txnAnchorKey = key
return nil
}
// SetSystemConfigTrigger sets the system db trigger to true on this transaction.
// This will impact the EndTransactionRequest.
//
// NOTE: The system db trigger will only execute correctly if the transaction
// record is located on the range that contains the system span. If a
// transaction is created which modifies both system *and* non-system data, it
// should be ensured that the transaction record itself is on the system span.
// This can be done by making sure a system key is the first key touched in the
// transaction.
func (txn *Txn) SetSystemConfigTrigger() error {
if !txn.systemConfigTrigger {
txn.systemConfigTrigger = true
// The system-config trigger must be run on the system-config range which
// means any transaction with the trigger set needs to be anchored to the
// system-config range.
return txn.SetTxnAnchorKey(keys.SystemConfigSpan.Key)
}
return nil
}
// Proto returns the transactions underlying protocol buffer. It is not thread-safe,
// only use if you know that no requests are executing concurrently.
//
// A thread-safe alternative would be to clone the Proto under lock and return
// this clone, but we currently have no situations where this is needed.
func (txn *Txn) Proto() *roachpb.Transaction {
return &txn.mu.Proto
}
// NewBatch creates and returns a new empty batch object for use with the Txn.
func (txn *Txn) NewBatch() *Batch {
return &Batch{txn: txn}
}
// Get retrieves the value for a key, returning the retrieved key/value or an
// error. It is not considered an error for the key to not exist.
//
// r, err := db.Get("a")
// // string(r.Key) == "a"
//
// key can be either a byte slice or a string.
func (txn *Txn) Get(ctx context.Context, key interface{}) (KeyValue, error) {
b := txn.NewBatch()
b.Get(key)
return getOneRow(txn.Run(ctx, b), b)
}
// GetProto retrieves the value for a key and decodes the result as a proto
// message. If the key doesn't exist, the proto will simply be reset.
//
// key can be either a byte slice or a string.
func (txn *Txn) GetProto(ctx context.Context, key interface{}, msg proto.Message) error {
r, err := txn.Get(ctx, key)
if err != nil {
return err
}
return r.ValueProto(msg)
}
// Put sets the value for a key
//
// key can be either a byte slice or a string. value can be any key type, a
// proto.Message or any Go primitive type (bool, int, etc).
func (txn *Txn) Put(ctx context.Context, key, value interface{}) error {
b := txn.NewBatch()
b.Put(key, value)
return getOneErr(txn.Run(ctx, b), b)
}
// CPut conditionally sets the value for a key if the existing value is equal
// to expValue. To conditionally set a value only if there is no existing entry
// pass nil for expValue. Note that this must be an interface{}(nil), not a
// typed nil value (e.g. []byte(nil)).
//
// Returns an error if the existing value is not equal to expValue.
//
// key can be either a byte slice or a string. value can be any key type, a
// proto.Message or any Go primitive type (bool, int, etc).
func (txn *Txn) CPut(ctx context.Context, key, value, expValue interface{}) error {
b := txn.NewBatch()
b.CPut(key, value, expValue)
return getOneErr(txn.Run(ctx, b), b)
}
// InitPut sets the first value for a key to value. An error is reported if a
// value already exists for the key and it's not equal to the value passed in.
//
// key can be either a byte slice or a string. value can be any key type, a
// proto.Message or any Go primitive type (bool, int, etc). It is illegal to
// set value to nil.
func (txn *Txn) InitPut(ctx context.Context, key, value interface{}) error {
b := txn.NewBatch()
b.InitPut(key, value)
return getOneErr(txn.Run(ctx, b), b)
}
// Inc increments the integer value at key. If the key does not exist it will
// be created with an initial value of 0 which will then be incremented. If the
// key exists but was set using Put or CPut an error will be returned.
//
// The returned Result will contain a single row and Result.Err will indicate
// success or failure.
//
// key can be either a byte slice or a string.
func (txn *Txn) Inc(ctx context.Context, key interface{}, value int64) (KeyValue, error) {
b := txn.NewBatch()
b.Inc(key, value)
return getOneRow(txn.Run(ctx, b), b)
}
func (txn *Txn) scan(
ctx context.Context, begin, end interface{}, maxRows int64, isReverse bool,
) ([]KeyValue, error) {
b := txn.NewBatch()
if maxRows > 0 {
b.Header.MaxSpanRequestKeys = maxRows
}
if !isReverse {
b.Scan(begin, end)
} else {
b.ReverseScan(begin, end)
}
r, err := getOneResult(txn.Run(ctx, b), b)
return r.Rows, err
}
// Scan retrieves the rows between begin (inclusive) and end (exclusive) in
// ascending order.
//
// The returned []KeyValue will contain up to maxRows elements (or all results
// when zero is supplied).
//
// key can be either a byte slice or a string.
func (txn *Txn) Scan(
ctx context.Context, begin, end interface{}, maxRows int64,
) ([]KeyValue, error) {
return txn.scan(ctx, begin, end, maxRows, false)
}
// ReverseScan retrieves the rows between begin (inclusive) and end (exclusive)
// in descending order.
//
// The returned []KeyValue will contain up to maxRows elements (or all results
// when zero is supplied).
//
// key can be either a byte slice or a string.
func (txn *Txn) ReverseScan(
ctx context.Context, begin, end interface{}, maxRows int64,
) ([]KeyValue, error) {
return txn.scan(ctx, begin, end, maxRows, true)
}
// Del deletes one or more keys.
//
// key can be either a byte slice or a string.
func (txn *Txn) Del(ctx context.Context, keys ...interface{}) error {
b := txn.NewBatch()
b.Del(keys...)
return getOneErr(txn.Run(ctx, b), b)
}
// DelRange deletes the rows between begin (inclusive) and end (exclusive).
//
// The returned Result will contain 0 rows and Result.Err will indicate success
// or failure.
//
// key can be either a byte slice or a string.
func (txn *Txn) DelRange(ctx context.Context, begin, end interface{}) error {
b := txn.NewBatch()
b.DelRange(begin, end, false)
return getOneErr(txn.Run(ctx, b), b)
}
// Run executes the operations queued up within a batch. Before executing any
// of the operations the batch is first checked to see if there were any errors
// during its construction (e.g. failure to marshal a proto message).
//
// The operations within a batch are run in parallel and the order is
// non-deterministic. It is an unspecified behavior to modify and retrieve the
// same key within a batch.
//
// Upon completion, Batch.Results will contain the results for each
// operation. The order of the results matches the order the operations were
// added to the batch.
func (txn *Txn) Run(ctx context.Context, b *Batch) error {
tracing.AnnotateTrace()
defer tracing.AnnotateTrace()
if err := b.prepare(); err != nil {
return err
}
return sendAndFill(ctx, txn.send, b)
}
func (txn *Txn) commit(ctx context.Context) error {
err := txn.sendEndTxnReq(ctx, true /* commit */, txn.deadline)
if err == nil {
for _, t := range txn.commitTriggers {
t()
}
}
return err
}
// CleanupOnError cleans up the transaction as a result of an error.
func (txn *Txn) CleanupOnError(ctx context.Context, err error) {
if err == nil {
log.Fatal(ctx, "CleanupOnError called with nil error")
}
// This may race with a concurrent EndTxnRequests. That's fine though because
// we're just trying to clean up and will happily log the failed Rollback error
// if someone beat us.
txn.mu.Lock()
isPending := txn.mu.Proto.Status == roachpb.PENDING
txn.mu.Unlock()
if isPending {
if replyErr := txn.Rollback(ctx); replyErr != nil {
log.Errorf(ctx, "failure aborting transaction: %s; abort caused by: %s", replyErr, err)
}
}
}
// Commit is the same as CommitOrCleanup but will not attempt to clean
// up on failure. This can be used when the caller is prepared to do proper
// cleanup.
func (txn *Txn) Commit(ctx context.Context) error {
return txn.commit(ctx)
}
// CommitInBatch executes the operations queued up within a batch and
// commits the transaction. Explicitly committing a transaction is
// optional, but more efficient than relying on the implicit commit
// performed when the transaction function returns without error.
// The batch must be created by this transaction.
// If the command completes successfully, the txn is considered finalized. On
// error, no attempt is made to clean up the (possibly still pending)
// transaction.
func (txn *Txn) CommitInBatch(ctx context.Context, b *Batch) error {
if txn != b.txn {
return errors.Errorf("a batch b can only be committed by b.txn")
}
b.AddRawRequest(endTxnReq(true /* commit */, txn.deadline, txn.systemConfigTrigger))
return txn.Run(ctx, b)
}
// CommitOrCleanup sends an EndTransactionRequest with Commit=true.
// If that fails, an attempt to rollback is made.
// txn should not be used to send any more commands after this call.
func (txn *Txn) CommitOrCleanup(ctx context.Context) error {
err := txn.commit(ctx)
if err != nil {
txn.CleanupOnError(ctx, err)
}
if !txn.IsFinalized() {
log.Fatal(ctx, "Commit() failed to move txn to a final state")
}
return err
}
// UpdateDeadlineMaybe sets the transactions deadline to the lower of the
// current one (if any) and the passed value.
func (txn *Txn) UpdateDeadlineMaybe(deadline hlc.Timestamp) bool {
if txn.deadline == nil || deadline.Less(*txn.deadline) {
txn.deadline = &deadline
return true
}
return false
}
// ResetDeadline resets the deadline.
func (txn *Txn) ResetDeadline() {
txn.deadline = nil
}
// GetDeadline returns the deadline. For testing.
func (txn *Txn) GetDeadline() *hlc.Timestamp {
return txn.deadline
}
// Rollback sends an EndTransactionRequest with Commit=false.
// txn is considered finalized and cannot be used to send any more commands.
func (txn *Txn) Rollback(ctx context.Context) error {
log.VEventf(ctx, 2, "rolling back transaction")
return txn.sendEndTxnReq(ctx, false /* commit */, nil)
}
// AddCommitTrigger adds a closure to be executed on successful commit
// of the transaction.
func (txn *Txn) AddCommitTrigger(trigger func()) {
txn.commitTriggers = append(txn.commitTriggers, trigger)
}
func (txn *Txn) sendEndTxnReq(ctx context.Context, commit bool, deadline *hlc.Timestamp) error {
var ba roachpb.BatchRequest
ba.Add(endTxnReq(commit, deadline, txn.systemConfigTrigger))
_, err := txn.send(ctx, ba)
return err.GoError()
}
func endTxnReq(commit bool, deadline *hlc.Timestamp, hasTrigger bool) roachpb.Request {
req := &roachpb.EndTransactionRequest{
Commit: commit,
Deadline: deadline,
}
if hasTrigger {
req.InternalCommitTrigger = &roachpb.InternalCommitTrigger{
ModifiedSpanTrigger: &roachpb.ModifiedSpanTrigger{
SystemConfigSpan: true,
},
}
}
return req
}
// TxnExecOptions controls how Exec() runs a transaction and the corresponding
// closure.
type TxnExecOptions struct {
// If set, the transaction is automatically aborted if the closure returns any
// error aside from recoverable internal errors, in which case the closure is
// retried. The retryable function should have no side effects which could
// cause problems in the event it must be run more than once.
// If not set, all errors cause the txn to be aborted.
AutoRetry bool
// If set, then the txn is automatically committed if no errors are
// encountered. If not set, committing or leaving open the txn is the
// responsibility of the client.
AutoCommit bool
// If set, an OrigTimestamp will be assigned to the transaction as early as
// possible, instead of when the first KV operation is performed. This allows
// users to guarantee that the transactions timestamp is a lower bound for any
// operation performed in Exec's closure.
//
// Useful for SQL txns for ensuring that the value returned by
// `cluster_logical_timestamp()` is consistent with the commit (serializable)
// ordering.
AssignTimestampImmediately bool
}
// AutoCommitError wraps a non-retryable error coming from auto-commit.
type AutoCommitError struct {
cause error
}
func (e *AutoCommitError) Error() string {
return e.cause.Error()
}
// Exec executes fn in the context of a distributed transaction.
// Execution is controlled by opt (see comments in TxnExecOptions).
//
// opt is passed to fn, and it's valid for fn to modify opt as it sees
// fit during each execution attempt.
//
// It's valid for txn to be nil (meaning the txn has already aborted) if fn
// can handle that. This is useful for continuing transactions that have been
// aborted because of an error in a previous batch of statements in the hope
// that a ROLLBACK will reset the state. Neither opt.AutoRetry not opt.AutoCommit
// can be set in this case.
//
// It is undefined to call Commit concurrently with any call to Exec. Since Exec
// with the AutoCommitflag is equivalent to an Exec possibly followed by a Commit,
// it must not be called concurrently with any other call to Exec or Commit.
//
// When this method returns, txn might be in any state; Exec does not attempt
// to clean up the transaction before returning an error. In case of
// TransactionAbortedError, txn is reset to a fresh transaction, ready to be
// used.
func (txn *Txn) Exec(
ctx context.Context, opt TxnExecOptions, fn func(context.Context, *Txn, *TxnExecOptions) error,
) (err error) {
// Run fn in a retry loop until we encounter a success or
// error condition this loop isn't capable of handling.
if txn == nil && (opt.AutoRetry || opt.AutoCommit) {
log.Fatal(ctx, "asked to retry or commit a txn that is already aborted")
}
for {
if txn != nil {
txn.mu.Lock()
// If we're looking at a brand new transaction, then communicate
// what should be used as initial timestamp.
if opt.AssignTimestampImmediately && !txn.mu.Proto.IsInitialized() {
// Control the KV timestamp, such that the value returned by
// `cluster_logical_timestamp()` is consistent with the commit
// (serializable) ordering.
txn.mu.Proto.OrigTimestamp = txn.db.clock.Now()
}
txn.mu.Unlock()
}
err = fn(ctx, txn, &opt)
if err == nil && opt.AutoCommit {
// Copy the status out of the Proto under lock. Making decisions on
// this later is not thread-safe, but the commutativity property of
// transactions assure that reasoning about the situation is straightforward.
txn.mu.Lock()
status := txn.Proto().Status
txn.mu.Unlock()
switch status {
case roachpb.ABORTED:
// TODO(andrei): Until 7881 is fixed.
log.Errorf(ctx, "#7881: no err but aborted txn proto. opt: %+v, txn: %+v",
opt, txn)
case roachpb.PENDING:
// fn succeeded, but didn't commit.
err = txn.Commit(ctx)
log.Eventf(ctx, "client.Txn did AutoCommit. err: %v\ntxn: %+v", err, txn.Proto())
if err != nil {
if _, retryable := err.(*roachpb.RetryableTxnError); !retryable {
// We can't retry, so let the caller know we tried to
// autocommit.
err = &AutoCommitError{cause: err}
}
}
}
}
retErr, retryable := err.(*roachpb.RetryableTxnError)
if retryable && !txn.IsRetryableErrMeantForTxn(retErr) {
// Make sure the txn record that err carries is for this txn.
// If it's not, we terminate the "retryable" character of the error. We
// might get a RetryableTxnError if the closure ran another transaction
// internally and let the error propagate upwards.
return errors.Wrap(retErr, "retryable error from another txn")
}
if !opt.AutoRetry || !retryable {
break
}
txn.commitTriggers = nil
log.VEventf(ctx, 2, "automatically retrying transaction: %s because of error: %s",
txn.DebugName(), err)
}
return err
}
// IsRetryableErrMeantForTxn returns true if err is a retryable
// error meant to restart this client transaction.
func (txn *Txn) IsRetryableErrMeantForTxn(err *roachpb.RetryableTxnError) bool {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.isRetryableErrMeantForTxnLocked(err)
}
func (txn *Txn) isRetryableErrMeantForTxnLocked(err *roachpb.RetryableTxnError) bool {
// Make sure the txn record that err carries is for this txn.
// First check if the error was meant for a previous incarnation
// of the transaction.
if err.TxnID != nil {
if _, ok := txn.mu.previousIDs[*err.TxnID]; ok {
return true
}
}
// If not, make sure it was meant for this transaction.
return roachpb.TxnIDEqual(err.TxnID, txn.mu.Proto.ID)
}
// send runs the specified calls synchronously in a single batch and
// returns any errors. If the transaction is read-only or has already
// been successfully committed or aborted, a potential trailing
// EndTransaction call is silently dropped, allowing the caller to
// always commit or clean-up explicitly even when that may not be
// required (or even erroneous). Returns (nil, nil) for an empty batch.
func (txn *Txn) send(
ctx context.Context, ba roachpb.BatchRequest,
) (*roachpb.BatchResponse, *roachpb.Error) {
// It doesn't make sense to use inconsistent reads in a transaction. However,
// we still need to accept it as a parameter for this to compile.
if ba.ReadConsistency != roachpb.CONSISTENT {
return nil, roachpb.NewErrorf("cannot use %s ReadConsistency in txn",
ba.ReadConsistency)
}
lastIndex := len(ba.Requests) - 1
if lastIndex < 0 {
return nil, nil
}
firstWriteIdx, pErr := firstWriteIndex(ba)
if pErr != nil {
return nil, pErr
}
haveTxnWrite := firstWriteIdx != -1
endTxnRequest, haveEndTxn := ba.Requests[lastIndex].GetInner().(*roachpb.EndTransactionRequest)
var needBeginTxn, elideEndTxn bool
lockedPrelude := func() *roachpb.Error {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.Proto.Status != roachpb.PENDING || txn.mu.finalized {
return roachpb.NewErrorf(
"attempting to use transaction with wrong status or finalized: %s %v",
txn.mu.Proto.Status, txn.mu.finalized)
}
// For testing purposes, txn.UserPriority can be a negative value (see
// roachpb.MakePriority).
if txn.mu.UserPriority != 0 {
ba.UserPriority = txn.mu.UserPriority
}
needBeginTxn = !(txn.mu.Proto.Writing || txn.mu.writingTxnRecord) && haveTxnWrite
needEndTxn := txn.mu.Proto.Writing || txn.mu.writingTxnRecord || haveTxnWrite
elideEndTxn = haveEndTxn && !needEndTxn
// If we're not yet writing in this txn, but intend to, insert a
// begin transaction request before the first write command and update
// transaction state accordingly.
if needBeginTxn {
// Set txn key based on the key of the first transactional write if
// not already set. If the transaction already has a key (we're in a
// restart), make sure we keep the anchor key the same.
if len(txn.mu.Proto.Key) == 0 {
txnAnchorKey := txn.mu.txnAnchorKey
if len(txnAnchorKey) == 0 {
txnAnchorKey = ba.Requests[0].GetInner().Header().Key
}
txn.mu.Proto.Key = txnAnchorKey
}
// Set the key in the begin transaction request to the txn's anchor key.
bt := &roachpb.BeginTransactionRequest{
Span: roachpb.Span{
Key: txn.mu.Proto.Key,
},
}
// Inject the new request before position firstWriteIdx, taking
// care to avoid unnecessary allocations.
oldRequests := ba.Requests
ba.Requests = make([]roachpb.RequestUnion, len(ba.Requests)+1)
copy(ba.Requests, oldRequests[:firstWriteIdx])
ba.Requests[firstWriteIdx].MustSetInner(bt)
copy(ba.Requests[firstWriteIdx+1:], oldRequests[firstWriteIdx:])
// We're going to be writing the transaction record by sending the
// begin transaction request.
txn.mu.writingTxnRecord = true
}
// Initialize an uninitialized (ID == nil) Transaction proto.
if !txn.mu.Proto.IsInitialized() {
// The initial timestamp may be communicated by a higher layer.
// If so, use that. Otherwise make up a new one.
timestamp := txn.mu.Proto.OrigTimestamp
if timestamp == (hlc.Timestamp{}) {
timestamp = txn.db.clock.Now()
}
newTxn := roachpb.NewTransaction(
txn.mu.Proto.Name,
txn.mu.Proto.Key,
ba.UserPriority,
txn.mu.Proto.Isolation,
timestamp,
txn.db.clock.MaxOffset().Nanoseconds(),
)
// Use existing priority as a minimum. This is used on transaction
// aborts to ratchet priority when creating successor transaction.
if newTxn.Priority < txn.mu.Proto.Priority {
newTxn.Priority = txn.mu.Proto.Priority
}
txn.mu.Proto = *newTxn
}
if elideEndTxn {
ba.Requests = ba.Requests[:lastIndex]
}
// Increment the statement count sent through this transaction.
txn.mu.commandCount += len(ba.Requests)
// Clone the Txn's Proto so that future modifications can be made without
// worrying about synchronization.
newTxn := txn.mu.Proto.Clone()
ba.Txn = &newTxn
return nil
}
if pErr := lockedPrelude(); pErr != nil {
return nil, pErr
}
// Send call through the DB.
br, pErr := txn.db.send(ctx, ba)
// Lock for the entire response postlude.
txn.mu.Lock()
defer txn.mu.Unlock()
// If we inserted a begin transaction request, remove it here. We also
// unset the flag writingTxnRecord flag in case another ever needs to
// be sent again (for instance, if we're aborted and need to restart).
if needBeginTxn {
if br != nil && br.Responses != nil {
br.Responses = append(br.Responses[:firstWriteIdx], br.Responses[firstWriteIdx+1:]...)
}
// Handle case where inserted begin txn confused an indexed error.
if pErr != nil && pErr.Index != nil {
idx := pErr.Index.Index
if idx == int32(firstWriteIdx) {
// An error was encountered on begin txn; disallow the indexing.
pErr.Index = nil
} else if idx > int32(firstWriteIdx) {
// An error was encountered after begin txn; decrement index.
pErr.SetErrorIndex(idx - 1)
}
}
txn.mu.writingTxnRecord = false
}
if haveEndTxn {
if pErr == nil || !endTxnRequest.Commit {
// Finalize the transaction if either we sent a successful commit
// EndTxnRequest, or sent a rollback EndTxnRequest (regardless of
// if it succeeded).
txn.mu.finalized = true
}
}
if pErr != nil {
if log.V(1) {
log.Infof(ctx, "failed batch: %s", pErr)
}
txn.updateStateOnErrLocked(ctx, pErr.GoError(), ba.UserPriority)
return nil, pErr
}
if br != nil {
if br.Error != nil {
panic(roachpb.ErrorUnexpectedlySet(txn.db.sender, br))
}
if len(br.CollectedSpans) != 0 {
if err := tracing.IngestRemoteSpans(ctx, br.CollectedSpans); err != nil {
return nil, roachpb.NewErrorf("error ingesting remote spans: %s", err)
}
}
// Only successful requests can carry an updated Txn in their response
// header. Some errors (e.g. a restart) have a Txn attached to them as
// well; these errors have been handled above.
txn.mu.Proto.Update(br.Txn)
}
if elideEndTxn {
// Check that read only transactions do not violate their deadline. This can NOT
// happen since the txn deadline is normally updated when it is about to expire
// or expired. We will just keep the code for safety (see TestReacquireLeaseOnRestart).
if endTxnRequest.Deadline != nil {
if endTxnRequest.Deadline.Less(txn.mu.Proto.Timestamp) {
return nil, roachpb.NewErrorWithTxn(roachpb.NewTransactionAbortedError(), &txn.mu.Proto)
}
}
// This normally happens on the server and sent back in response
// headers, but this transaction was optimized away. The caller may
// still inspect the transaction struct, so we manually update it
// here to emulate a true transaction.
if endTxnRequest.Commit {
txn.mu.Proto.Status = roachpb.COMMITTED
} else {
txn.mu.Proto.Status = roachpb.ABORTED
}
}
return br, nil
}
// firstWriteIndex returns the index of the first transactional write in the
// BatchRequest. Returns -1 if the batch has not intention to write. It also
// verifies that if an EndTransactionRequest is included, then it is the last
// request in the batch.
func firstWriteIndex(ba roachpb.BatchRequest) (int, *roachpb.Error) {
firstWriteIdx := -1
for i, ru := range ba.Requests {
args := ru.GetInner()
if i < len(ba.Requests)-1 /* if not last*/ {
if _, ok := args.(*roachpb.EndTransactionRequest); ok {
return -1, roachpb.NewErrorf("%s sent as non-terminal call", args.Method())
}
}
if roachpb.IsTransactionWrite(args) {
if firstWriteIdx == -1 {
firstWriteIdx = i
}
}
}
return firstWriteIdx, nil
}
// UpdateStateOnErr updates the Txn, and the Transaction proto inside it, in
// response to an error encountered when running a request through the txn. If
// the error is not a RetryableTxnError, then this is a no-op. For a retryable
// error, the Transaction proto is either initialized with the updated proto
// from the error, or a new Transaction proto is initialized.
//
// pri is the priority that should be used if the transaction needs to be
// restarted and we're giving the restarted transaction the chance to get a
// higher priority.
//
// TODO(andrei,nvanbenschoten): document whether calling this repeatedly within
// the same epoch of the txn is safe or not, and generally what protection we
// have for racing calls. We protect against calls for old IDs, but what about
// old epochs and/or the current epoch?
func (txn *Txn) UpdateStateOnErr(ctx context.Context, err error, pri roachpb.UserPriority) {
txn.mu.Lock()
txn.updateStateOnErrLocked(ctx, err, pri)
txn.mu.Unlock()
}
// updateStateOnErrLocked is like UpdateStateOnErr, but assumes that txn.mu is
// locked.
func (txn *Txn) updateStateOnErrLocked(ctx context.Context, err error, pri roachpb.UserPriority) {
log.Infof(context.TODO(), "client.Txn.updateStateOnErrLocked with err: %s", err)
retryErr, ok := err.(*roachpb.RetryableTxnError)
if !ok {
return
}
if !txn.isRetryableErrMeantForTxnLocked(retryErr) {
// If this happens, something is wrong; we've received an error that
// wasn't meant for this transaction. This is a sign that we either
// somehow ran another txn inside our txn and didn't properly terminate
// its error, or our transaction got a TransactionAbortedError (and the
// proto was reset), was retried, and then we still somehow managed to get
// an error meant for the previous incarnation of the transaction.
// Letting this wrong error slip here can cause us to retry the wrong
// transaction.
panic(fmt.Sprintf("Got a retryable error meant for a different transaction. "+
"txn.mu.Proto.ID: %v, pErr.ID: %v. err: %s",
txn.mu.Proto.ID, retryErr.TxnID, retryErr))
}
// Reset the statement count as this is a retryable txn error.
txn.mu.commandCount = 0
// Only update the proto if the retryable error is meant for the current
// incarnation of the transaction. In other words, only update it if
// retryErr.TxnID is not in txn.mu.previousIDs.
if roachpb.TxnIDEqual(retryErr.TxnID, txn.mu.Proto.ID) {
if retryErr.Transaction == nil {
// Transaction == nil means the cause was a TransactionAbortedError; we're
// going to initialized a new Transaction, and so have to save the old
// transaction ID so that concurrent requests or delayed responses that
// that throw errors know that these errors were sent to the correct
// transaction, even once the proto is reset.
if txn.mu.previousIDs == nil {
txn.mu.previousIDs = make(map[uuid.UUID]struct{})
}
txn.mu.previousIDs[*txn.mu.Proto.ID] = struct{}{}
}
newTxn := roachpb.PrepareTransactionForRetry(
retryErr, pri, txn.mu.Proto.Isolation, txn.mu.Proto.Name,
)
if !roachpb.TxnIDEqual(txn.mu.Proto.ID, newTxn.ID) &&
txn.mu.onKVTxnSwitched != nil {
txn.mu.onKVTxnSwitched(ctx, txn.mu.Proto)
}
// Overwrite the transaction proto with the one to be used for the next
// attempt.
txn.mu.Proto = newTxn
}
}
// PrepareForDetachedUpdates needs to be called on Txn's that are going to be