-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
Copy pathintent_reader_writer.go
282 lines (261 loc) · 9.94 KB
/
intent_reader_writer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package storage
import (
"context"
"sync"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
)
// This file defines wrappers for Reader and Writer, and functions to do the
// wrapping, which depend on the configuration settings above.
// intentDemuxWriter implements 3 methods from the Writer interface:
// PutIntent, ClearIntent, ClearMVCCRangeAndIntents.
type intentDemuxWriter struct {
w Writer
// Must be non-nil if this intentDemuxWriter is used. We do the checking
// lazily when methods are called since the clients of intentDemuxWriter
// initialize it up-front, but don't know if they are being used by code
// that cares about intents (e.g. a temporary Engine used for disk-spilling
// during query execution will never read-write intents).
settings *cluster.Settings
cachedSettingsAreValid bool
clusterVersionIsRecentEnoughCached bool
disableSeparatedIntents bool
}
func wrapIntentWriter(
ctx context.Context,
w Writer,
settings *cluster.Settings,
isLongLived bool,
disableSeparatedIntents bool,
) intentDemuxWriter {
idw := intentDemuxWriter{w: w, settings: settings}
if !isLongLived && settings != nil {
// Cache the settings for performance.
idw.cachedSettingsAreValid = true
// Be resilient to the version not yet being initialized.
idw.clusterVersionIsRecentEnoughCached = !idw.settings.Version.ActiveVersionOrEmpty(ctx).Less(
clusterversion.ByKey(clusterversion.SeparatedIntents))
}
idw.disableSeparatedIntents = disableSeparatedIntents
return idw
}
// ClearIntent has the same behavior as Writer.ClearIntent. buf is used as
// scratch-space to avoid allocations -- its contents will be overwritten and
// not appended to, and a possibly different buf returned.
func (idw intentDemuxWriter) ClearIntent(
key roachpb.Key,
state PrecedingIntentState,
txnDidNotUpdateMeta bool,
txnUUID uuid.UUID,
buf []byte,
) (_ []byte, separatedIntentCountDelta int, _ error) {
if idw.settings == nil {
return nil, 0, errors.AssertionFailedf("intentDemuxWriter not configured with cluster.Setttings")
}
switch state {
case ExistingIntentInterleaved:
return buf, 0, idw.w.ClearUnversioned(key)
case ExistingIntentSeparated:
var engineKey EngineKey
engineKey, buf = LockTableKey{
Key: key,
Strength: lock.Exclusive,
TxnUUID: txnUUID[:],
}.ToEngineKey(buf)
if txnDidNotUpdateMeta {
return buf, -1, idw.w.SingleClearEngineKey(engineKey)
}
return buf, -1, idw.w.ClearEngineKey(engineKey)
default:
return buf, 0, errors.AssertionFailedf("ClearIntent: invalid preceding state %d", state)
}
}
// PutIntent has the same behavior as Writer.PutIntent. buf is used as
// scratch-space to avoid allocations -- its contents will be overwritten and
// not appended to, and a possibly different buf returned.
func (idw intentDemuxWriter) PutIntent(
ctx context.Context,
key roachpb.Key,
value []byte,
state PrecedingIntentState,
txnDidNotUpdateMeta bool,
txnUUID uuid.UUID,
buf []byte,
) (_ []byte, separatedIntentCountDelta int, _ error) {
if idw.settings == nil {
return nil, 0, errors.AssertionFailedf("intentDemuxWriter not configured with cluster.Setttings")
}
var writeSeparatedIntents bool
if idw.cachedSettingsAreValid {
// Fast-path
writeSeparatedIntents = idw.clusterVersionIsRecentEnoughCached && !idw.disableSeparatedIntents
} else {
// Slow-path, when doing writes on the Engine directly. This should not be
// performance sensitive code.
writeSeparatedIntents =
// Be resilient to the version not yet being initialized.
!idw.settings.Version.ActiveVersionOrEmpty(ctx).Less(
clusterversion.ByKey(clusterversion.SeparatedIntents)) &&
!idw.disableSeparatedIntents
}
var engineKey EngineKey
if state == ExistingIntentSeparated || writeSeparatedIntents {
engineKey, buf = LockTableKey{
Key: key,
Strength: lock.Exclusive,
TxnUUID: txnUUID[:],
}.ToEngineKey(buf)
}
if state == ExistingIntentSeparated && !writeSeparatedIntents {
// Switching this intent from separated to interleaved.
if txnDidNotUpdateMeta {
if err := idw.w.SingleClearEngineKey(engineKey); err != nil {
return buf, 0, err
}
} else {
if err := idw.w.ClearEngineKey(engineKey); err != nil {
return buf, 0, err
}
}
} else if state == ExistingIntentInterleaved && writeSeparatedIntents {
// Switching this intent from interleaved to separated.
if err := idw.w.ClearUnversioned(key); err != nil {
return buf, 0, err
}
}
// Else, staying separated or staying interleaved or there was no preceding
// intent, so don't need to explicitly clear.
if state == ExistingIntentSeparated {
separatedIntentCountDelta = -1
}
// Write intent
if writeSeparatedIntents {
separatedIntentCountDelta++
return buf, separatedIntentCountDelta, idw.w.PutEngineKey(engineKey, value)
}
return buf, separatedIntentCountDelta, idw.w.PutUnversioned(key, value)
}
// ClearMVCCRangeAndIntents has the same behavior as
// Writer.ClearMVCCRangeAndIntents. buf is used as scratch-space to avoid
// allocations -- its contents will be overwritten and not appended to, and a
// possibly different buf returned.
func (idw intentDemuxWriter) ClearMVCCRangeAndIntents(
start, end roachpb.Key, buf []byte,
) ([]byte, error) {
if idw.settings == nil {
return nil, errors.AssertionFailedf("intentDemuxWriter not configured with cluster.Setttings")
}
err := idw.w.ClearRawRange(start, end)
if err != nil {
return buf, err
}
lstart, buf := keys.LockTableSingleKey(start, buf)
lend, _ := keys.LockTableSingleKey(end, nil)
return buf, idw.w.ClearRawRange(lstart, lend)
}
func (idw intentDemuxWriter) safeToWriteSeparatedIntents(ctx context.Context) (bool, error) {
if idw.settings == nil {
return false,
errors.Errorf(
"intentDemuxWriter without cluster.Settings does not support SafeToWriteSeparatedIntents")
}
if idw.cachedSettingsAreValid {
return idw.clusterVersionIsRecentEnoughCached, nil
}
// Be resilient to the version not yet being initialized.
return !idw.settings.Version.ActiveVersionOrEmpty(ctx).Less(
clusterversion.ByKey(clusterversion.SeparatedIntents)), nil
}
// wrappableReader is used to implement a wrapped Reader. A wrapped Reader
// should be used and immediately discarded. It maintains no state of its own
// between calls.
// Why do we not keep the wrapped reader as a member in the caller? Because
// different methods on Reader can need different wrappings depending on what
// they want to observe.
//
// TODO(sumeer): for allocation optimization we could expose a scratch space
// struct that the caller keeps on behalf of the wrapped reader. But can only
// do such an optimization when know that the wrappableReader will be used
// with external synchronization that prevents preallocated buffers from being
// modified concurrently. pebbleBatch.{MVCCGet,MVCCGetProto} have MVCCKey
// serialization allocation optimizations which we can't do below. But those
// are probably not performance sensitive, since the performance sensitive
// code probably uses an MVCCIterator.
type wrappableReader interface {
Reader
rawGet(key []byte) (value []byte, err error)
}
// wrapReader wraps the provided reader, to return an implementation of MVCCIterator
// that supports MVCCKeyAndIntentsIterKind.
func wrapReader(r wrappableReader) *intentInterleavingReader {
iiReader := intentInterleavingReaderPool.Get().(*intentInterleavingReader)
*iiReader = intentInterleavingReader{wrappableReader: r}
return iiReader
}
type intentInterleavingReader struct {
wrappableReader
}
var _ Reader = &intentInterleavingReader{}
var intentInterleavingReaderPool = sync.Pool{
New: func() interface{} {
return &intentInterleavingReader{}
},
}
// Get implements the Reader interface.
func (imr *intentInterleavingReader) MVCCGet(key MVCCKey) ([]byte, error) {
val, err := imr.wrappableReader.rawGet(EncodeKey(key))
if val != nil || err != nil || !key.Timestamp.IsEmpty() {
return val, err
}
// The meta could be in the lock table. Constructing an Iterator for each
// Get is not efficient, but this function is deprecated and only used for
// tests, so we don't care.
ltKey, _ := keys.LockTableSingleKey(key.Key, nil)
iter := imr.wrappableReader.NewEngineIterator(IterOptions{Prefix: true, LowerBound: ltKey})
defer iter.Close()
valid, err := iter.SeekEngineKeyGE(EngineKey{Key: ltKey})
if !valid || err != nil {
return nil, err
}
val = iter.Value()
return val, nil
}
// MVCCGetProto implements the Reader interface.
func (imr *intentInterleavingReader) MVCCGetProto(
key MVCCKey, msg protoutil.Message,
) (ok bool, keyBytes, valBytes int64, err error) {
return pebbleGetProto(imr, key, msg)
}
// NewMVCCIterator implements the Reader interface. The
// intentInterleavingReader can be freed once this method returns.
func (imr *intentInterleavingReader) NewMVCCIterator(
iterKind MVCCIterKind, opts IterOptions,
) MVCCIterator {
if (!opts.MinTimestampHint.IsEmpty() || !opts.MaxTimestampHint.IsEmpty()) &&
iterKind == MVCCKeyAndIntentsIterKind {
panic("cannot ask for interleaved intents when specifying timestamp hints")
}
if iterKind == MVCCKeyIterKind {
return imr.wrappableReader.NewMVCCIterator(MVCCKeyIterKind, opts)
}
return newIntentInterleavingIterator(imr.wrappableReader, opts)
}
func (imr *intentInterleavingReader) Free() {
*imr = intentInterleavingReader{}
intentInterleavingReaderPool.Put(imr)
}