-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
Copy pathbulk_adder.go
121 lines (104 loc) · 4.85 KB
/
bulk_adder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package kvserverbase
import (
"context"
"fmt"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
)
// BulkAdderOptions is used to configure the behavior of a BulkAdder.
type BulkAdderOptions struct {
// Name is used in logging messages to identify this adder or the process on
// behalf of which it is adding data.
Name string
// MinBufferSize is the initial size of the BulkAdder buffer. It indicates the
// amount of memory we require to be able to buffer data before flushing for
// SST creation.
MinBufferSize int64
// BufferSize is the maximum size we can grow the BulkAdder buffer to.
MaxBufferSize func() int64
// SkipDuplicates configures handling of duplicate keys within a local sorted
// batch. When true if the same key/value pair is added more than once
// subsequent additions will be ignored instead of producing an error. If an
// attempt to add the same key has a different value, it is always an error.
// Once a batch is flushed – explicitly or automatically – local duplicate
// detection does not apply.
SkipDuplicates bool
// DisallowShadowingBelow controls whether shadowing of existing keys is
// permitted when the SSTables produced by this adder are ingested. See the
// comment on kvpb.AddSSTableRequest for more details. Note that if this is
// set to a non-empty timestamp, the older flag DisallowShadowing will be set
// on all requests as well, so callers should expect older nodes to handle any
// requests accordingly or check the MVCCAddSSTable version gate.
DisallowShadowingBelow hlc.Timestamp
// BatchTimestamp is the timestamp to use on AddSSTable requests (which can be
// different from the timestamp used to construct the adder which is what is
// actually applied to each key).
BatchTimestamp hlc.Timestamp
// WriteAtBatchTimestamp will rewrite the SST to use the batch timestamp, even
// if it gets pushed to a different timestamp on the server side. All SST MVCC
// timestamps must equal BatchTimestamp. See
// kvpb.AddSSTableRequest.SSTTimestampToRequestTimestamp.
WriteAtBatchTimestamp bool
// InitialSplitsIfUnordered specifies a number of splits to make before the
// first flush of the buffer if the contents of that buffer were unsorted.
// Being unsorted suggests the remaining input is likely unsorted as well and
// thus future flushes and flushes on other nodes will overlap, so we want to
// pre-split the target span now before we start filling it, using the keys in
// the first buffer to pick split points in the hope it is a representative
// sample of the overall input.
InitialSplitsIfUnordered int
// ImportEpoch specifies the ImportEpoch of the table the BulkAdder
// is ingesting data into as part of an IMPORT INTO job. If specified, the Bulk
// Adder's SSTBatcher will write the import epoch to each versioned value's
// metadata.
ImportEpoch uint32
}
// BulkAdderFactory describes a factory function for BulkAdders.
type BulkAdderFactory func(
ctx context.Context, db *kv.DB, timestamp hlc.Timestamp, opts BulkAdderOptions,
) (BulkAdder, error)
// BulkAdder describes a bulk-adding helper that can be used to add lots of KVs.
type BulkAdder interface {
// Add adds a KV pair to the adder's buffer, potentially flushing if needed.
Add(ctx context.Context, key roachpb.Key, value []byte) error
// Flush explicitly flushes anything remaining in the adder's buffer.
Flush(ctx context.Context) error
// IsEmpty returns whether or not this BulkAdder has data buffered.
IsEmpty() bool
// CurrentBufferFill returns how full the configured buffer is.
CurrentBufferFill() float32
// GetSummary returns a summary of rows/bytes/etc written by this batcher.
GetSummary() kvpb.BulkOpSummary
// Close closes the underlying buffers/writers.
Close(ctx context.Context)
// SetOnFlush sets a callback function called after flushing the buffer.
SetOnFlush(func(summary kvpb.BulkOpSummary))
}
// DuplicateKeyError represents a failed attempt to ingest the same key twice
// using a BulkAdder within the same batch.
type DuplicateKeyError struct {
Key roachpb.Key
Value []byte
}
func (d *DuplicateKeyError) Error() string {
return fmt.Sprintf("duplicate key: %s", d.Key)
}
// NewDuplicateKeyError constructs a DuplicateKeyError, copying its input.
func NewDuplicateKeyError(key roachpb.Key, value []byte) error {
ret := &DuplicateKeyError{
Key: key.Clone(),
}
ret.Value = append(ret.Value, value...)
return ret
}