-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
reduce.go
125 lines (107 loc) · 3.03 KB
/
reduce.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
* Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package bulk
import (
"bytes"
"sync/atomic"
"github.com/dgraph-io/badger"
"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/posting"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/dgraph/x"
)
type reducer struct {
*state
input <-chan shuffleOutput
writesThr *x.Throttle
}
func (r *reducer) run() {
thr := x.NewThrottle(r.opt.NumGoroutines)
for reduceJob := range r.input {
thr.Start()
NumReducers.Add(1)
NumQueuedReduceJobs.Add(-1)
go func(job shuffleOutput) {
r.reduce(job)
thr.Done()
NumReducers.Add(-1)
}(reduceJob)
}
thr.Wait()
r.writesThr.Wait()
}
func (r *reducer) reduce(job shuffleOutput) {
var currentKey []byte
var uids []uint64
pl := new(pb.PostingList)
newTxn := func() *badger.Txn {
r.writesThr.Start()
return job.db.NewTransactionAt(r.state.writeTs, true)
}
commitTxn := func(txn *badger.Txn) {
NumBadgerWrites.Add(1)
x.Check(txn.CommitAt(r.state.writeTs, func(err error) {
x.Check(err)
NumBadgerWrites.Add(-1)
r.writesThr.Done()
}))
}
txn := newTxn()
outputPostingList := func() {
atomic.AddInt64(&r.prog.reduceKeyCount, 1)
// For a UID-only posting list, the badger value is a delta packed UID
// list. The UserMeta indicates to treat the value as a delta packed
// list when the value is read by dgraph. For a value posting list,
// the full pb.Posting type is used (which pb.y contains the
// delta packed UID list).
meta := posting.BitCompletePosting
pl.Pack = codec.Encode(uids, 256)
val, err := pl.Marshal()
x.Check(err)
// if err returned is ErrTxnTooBig, we should commit current Txn and start new Txn.
e := badger.NewEntry(currentKey, val).WithMeta(meta)
err = txn.SetEntry(e)
if err == badger.ErrTxnTooBig {
commitTxn(txn)
txn = newTxn()
x.Check(txn.SetEntry(e)) // We are not checking ErrTxnTooBig second time.
} else {
x.Check(err)
}
uids = uids[:0]
pl.Reset()
}
for _, mapEntry := range job.mapEntries {
atomic.AddInt64(&r.prog.reduceEdgeCount, 1)
if bytes.Compare(mapEntry.Key, currentKey) != 0 && currentKey != nil {
outputPostingList()
}
currentKey = mapEntry.Key
uid := mapEntry.Uid
if mapEntry.Posting != nil {
uid = mapEntry.Posting.Uid
}
if len(uids) > 0 && uids[len(uids)-1] == uid {
continue
}
uids = append(uids, uid)
if mapEntry.Posting != nil {
pl.Postings = append(pl.Postings, mapEntry.Posting)
}
}
outputPostingList()
commitTxn(txn)
}