From 70d473bc8975b2fe199309e05298e19cbf99b1f2 Mon Sep 17 00:00:00 2001 From: Manish R Jain Date: Tue, 6 Aug 2019 16:46:10 -0700 Subject: [PATCH] Use sync.Pool for MapEntries in bulk loader (#3763) This PR has a significant impact on the allocated objects and space. For addMapEntry it went down from 7.35GB to 5.48GB. For addIndexMapEntries, it went down from 15.67GB to 10.11GB. The profiles were taken 30s into bulk loader. The other allocations line up nicely to verify that (lexer.Emit and rdfChunker.Parse). Current master: ``` Showing nodes accounting for 45.97GB, 82.78% of 55.53GB total Dropped 112 nodes (cum <= 0.28GB) Showing top 10 nodes out of 54 flat flat% sum% cum cum% 13.61GB 24.51% 24.51% 13.61GB 24.51% github.com/dgraph-io/dgraph/lex.(*Lexer).Emit 7.35GB 13.23% 37.74% 7.35GB 13.23% github.com/dgraph-io/dgraph/dgraph/cmd/bulk.(*mapper).addMapEntry 6.82GB 12.29% 50.02% 15.67GB 28.21% github.com/dgraph-io/dgraph/dgraph/cmd/bulk.(*mapper).addIndexMapEntries 3.62GB 6.51% 56.54% 24.84GB 44.74% github.com/dgraph-io/dgraph/chunker.rdfChunker.Parse 3.04GB 5.48% 62.02% 3.04GB 5.48% github.com/dgraph-io/dgraph/posting.NewPosting 2.95GB 5.32% 67.34% 2.95GB 5.32% bytes.makeSlice 2.80GB 5.04% 72.38% 2.80GB 5.04% github.com/dgraph-io/dgraph/gql.NQuad.createEdgePrototype 2.71GB 4.87% 77.25% 2.71GB 4.87% github.com/dgraph-io/dgraph/lex.NewLexer 1.62GB 2.92% 80.18% 1.62GB 2.92% strings.makeCutsetFunc 1.45GB 2.61% 82.78% 1.45GB 2.61% github.com/dgraph-io/dgraph/x.generateKey ``` This PR: ``` Showing nodes accounting for 39.94GB, 80.85% of 49.40GB total Dropped 148 nodes (cum <= 0.25GB) Showing top 10 nodes out of 54 flat flat% sum% cum cum% 13.47GB 27.26% 27.26% 13.47GB 27.26% github.com/dgraph-io/dgraph/lex.(*Lexer).Emit 4.40GB 8.90% 36.16% 10.11GB 20.47% github.com/dgraph-io/dgraph/dgraph/cmd/bulk.(*mapper).addIndexMapEntries 3.63GB 7.36% 43.52% 24.81GB 50.23% github.com/dgraph-io/dgraph/chunker.rdfChunker.Parse 3.47GB 7.03% 50.54% 3.47GB 7.03% github.com/dgraph-io/dgraph/dgraph/cmd/bulk.newMapper.func1 2.97GB 6.01% 56.55% 2.97GB 6.01% bytes.makeSlice 2.92GB 5.91% 62.46% 2.92GB 5.91% github.com/dgraph-io/dgraph/posting.NewPosting 2.76GB 5.59% 68.05% 2.76GB 5.59% github.com/dgraph-io/dgraph/lex.NewLexer 2.69GB 5.45% 73.51% 2.69GB 5.45% github.com/dgraph-io/dgraph/gql.NQuad.createEdgePrototype 2GB 4.05% 77.56% 5.48GB 11.09% github.com/dgraph-io/dgraph/dgraph/cmd/bulk.(*mapper).addMapEntry 1.63GB 3.29% 80.85% 1.63GB 3.29% strings.makeCutsetFunc ``` (cherry picked from commit 3e52a8fe64ab1d43a13194004a50f67fde81778b) --- dgraph/cmd/bulk/mapper.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dgraph/cmd/bulk/mapper.go b/dgraph/cmd/bulk/mapper.go index 6a5c0a74c48..0d822f290a8 100644 --- a/dgraph/cmd/bulk/mapper.go +++ b/dgraph/cmd/bulk/mapper.go @@ -48,6 +48,7 @@ import ( type mapper struct { *state shards []shardState // shard is based on predicate + mePool *sync.Pool } type shardState struct { @@ -62,6 +63,11 @@ func newMapper(st *state) *mapper { return &mapper{ state: st, shards: make([]shardState, st.opt.MapShards), + mePool: &sync.Pool{ + New: func() interface{} { + return &pb.MapEntry{} + }, + }, } } @@ -125,6 +131,7 @@ func (m *mapper) writeMapEntriesToFile(entries []*pb.MapEntry, encodedSize uint6 x.Check(err) _, err = w.Write(meBuf) x.Check(err) + m.mePool.Put(me) } } @@ -182,9 +189,9 @@ func (m *mapper) run(inputFormat chunker.InputFormat) { func (m *mapper) addMapEntry(key []byte, p *pb.Posting, shard int) { atomic.AddInt64(&m.prog.mapEdgeCount, 1) - me := &pb.MapEntry{ - Key: key, - } + me := m.mePool.Get().(*pb.MapEntry) + me.Key = key + if p.PostingType != pb.Posting_REF || len(p.Facets) > 0 { me.Posting = p } else {