Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
sort symbols in order of frequency rather than lexicographically
Browse files Browse the repository at this point in the history
  • Loading branch information
cstyan committed Mar 17, 2018
1 parent 195bc0d commit 508d576
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 46 deletions.
6 changes: 3 additions & 3 deletions block.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
type IndexWriter interface {
// AddSymbols registers all string symbols that are encountered in series
// and other indices.
AddSymbols(sym map[string]struct{}) error
AddSymbols(sym map[string]int) error

// AddSeries populates the index writer with a series and its offsets
// of chunks that the index can reference.
Expand All @@ -61,7 +61,7 @@ type IndexWriter interface {
type IndexReader interface {
// Symbols returns a set of string symbols that may occur in series' labels
// and indices.
Symbols() (map[string]struct{}, error)
Symbols() (map[string]int, error)

// LabelValues returns the possible label values.
LabelValues(names ...string) (index.StringTuples, error)
Expand Down Expand Up @@ -350,7 +350,7 @@ type blockIndexReader struct {
b *Block
}

func (r blockIndexReader) Symbols() (map[string]struct{}, error) {
func (r blockIndexReader) Symbols() (map[string]int, error) {
s, err := r.ir.Symbols()
return s, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
}
Expand Down
4 changes: 2 additions & 2 deletions compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter) error {
var (
set ChunkSeriesSet
allSymbols = make(map[string]struct{}, 1<<16)
allSymbols = make(map[string]int, 1<<16)
closers = []io.Closer{}
)
defer func() { closeAll(closers...) }()
Expand Down Expand Up @@ -538,7 +538,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
return errors.Wrap(err, "read symbols")
}
for s := range symbols {
allSymbols[s] = struct{}{}
allSymbols[s] = symbols[s]
}

all, err := indexr.Postings(index.AllPostingsKey())
Expand Down
20 changes: 10 additions & 10 deletions head.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ type Head struct {
series *stripeSeries

symMtx sync.RWMutex
symbols map[string]struct{}
symbols map[string]int
values map[string]stringset // label names to possible values

postings *index.MemPostings // postings lists for terms
Expand Down Expand Up @@ -187,7 +187,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (
maxTime: math.MinInt64,
series: newStripeSeries(),
values: map[string]stringset{},
symbols: map[string]struct{}{},
symbols: make(map[string]int),
postings: index.NewUnorderedMemPostings(),
tombstones: memTombstones{},
}
Expand Down Expand Up @@ -623,12 +623,12 @@ func (h *Head) gc() {
h.postings.Delete(deleted)

// Rebuild symbols and label value indices from what is left in the postings terms.
symbols := make(map[string]struct{})
symbols := make(map[string]int)
values := make(map[string]stringset, len(h.values))

h.postings.Iter(func(t labels.Label, _ index.Postings) error {
symbols[t.Name] = struct{}{}
symbols[t.Value] = struct{}{}
symbols[t.Name]++
symbols[t.Value]++

ss, ok := values[t.Name]
if !ok {
Expand Down Expand Up @@ -771,14 +771,14 @@ func (h *headIndexReader) Close() error {
return nil
}

func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
func (h *headIndexReader) Symbols() (map[string]int, error) {
h.head.symMtx.RLock()
defer h.head.symMtx.RUnlock()

res := make(map[string]struct{}, len(h.head.symbols))
res := make(map[string]int, len(h.head.symbols))

for s := range h.head.symbols {
res[s] = struct{}{}
res[s] = 0
}
return res, nil
}
Expand Down Expand Up @@ -910,8 +910,8 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
}
valset.set(l.Value)

h.symbols[l.Name] = struct{}{}
h.symbols[l.Value] = struct{}{}
h.symbols[l.Name]++
h.symbols[l.Value]++
}

return s, true
Expand Down
12 changes: 6 additions & 6 deletions head_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,12 @@ func TestHead_Truncate(t *testing.T) {
testutil.Assert(t, postingsB2 == nil, "")
testutil.Assert(t, postingsC1 == nil, "")

testutil.Equals(t, map[string]struct{}{
"": struct{}{}, // from 'all' postings list
"a": struct{}{},
"b": struct{}{},
"1": struct{}{},
"2": struct{}{},
testutil.Equals(t, map[string]int{
"": 2, // from 'all' postings list
"a": 2,
"b": 1,
"1": 2,
"2": 1,
}, h.symbols)

testutil.Equals(t, map[string]stringset{
Expand Down
31 changes: 21 additions & 10 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ func (s indexWriterSeriesSlice) Less(i, j int) bool {
return labels.Compare(s[i].labels, s[j].labels) < 0
}

type symbolFrequencyPair struct {
symbol string
frequency int
}

type symbolFrequencylist []symbolFrequencyPair

func (s symbolFrequencylist) Len() int { return len(s) }
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s symbolFrequencylist) Less(i, j int) bool { return s[i].frequency < s[j].frequency }

type indexWriterStage uint8

const (
Expand Down Expand Up @@ -330,17 +341,17 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
return nil
}

func (w *Writer) AddSymbols(sym map[string]struct{}) error {
func (w *Writer) AddSymbols(sym map[string]int) error {
if err := w.ensureStage(idxStageSymbols); err != nil {
return err
}
// Generate sorted list of strings we will store as reference table.
symbols := make([]string, 0, len(sym))
symbols := make(symbolFrequencylist, 0, len(sym))

for s := range sym {
symbols = append(symbols, s)
for k, v := range sym {
symbols = append(symbols, symbolFrequencyPair{k, v})
}
sort.Strings(symbols)
sort.Sort(sort.Reverse(symbols))

const headerSize = 4

Expand All @@ -352,8 +363,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
w.symbols = make(map[string]uint32, len(symbols))

for index, s := range symbols {
w.symbols[s] = uint32(index)
w.buf2.putUvarintStr(s)
w.symbols[s.symbol] = uint32(index)
w.buf2.putUvarintStr(s.symbol)
}

w.buf1.putBE32int(w.buf2.len())
Expand Down Expand Up @@ -832,11 +843,11 @@ func (r *Reader) lookupSymbol(o uint32) (string, error) {
}

// Symbols returns a set of symbols that exist within the index.
func (r *Reader) Symbols() (map[string]struct{}, error) {
res := make(map[string]struct{}, len(r.symbols))
func (r *Reader) Symbols() (map[string]int, error) {
res := make(map[string]int, len(r.symbols))

for _, s := range r.symbols {
res[s] = struct{}{}
res[s] = 0
}
return res, nil
}
Expand Down
20 changes: 10 additions & 10 deletions index/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,13 @@ func TestIndexRW_Postings(t *testing.T) {
labels.FromStrings("a", "1", "b", "4"),
}

err = iw.AddSymbols(map[string]struct{}{
"a": struct{}{},
"b": struct{}{},
"1": struct{}{},
"2": struct{}{},
"3": struct{}{},
"4": struct{}{},
err = iw.AddSymbols(map[string]int{
"a": 1,
"b": 2,
"1": 1,
"2": 4,
"3": 5,
"4": 3,
})
testutil.Ok(t, err)

Expand Down Expand Up @@ -245,11 +245,11 @@ func TestPersistence_index_e2e(t *testing.T) {
// Sort labels as the index writer expects series in sorted order.
sort.Sort(labels.Slice(lbls))

symbols := map[string]struct{}{}
symbols := make(map[string]int)
for _, lset := range lbls {
for _, l := range lset {
symbols[l.Name] = struct{}{}
symbols[l.Value] = struct{}{}
symbols[l.Name] = 0
symbols[l.Value] = 0
}
}

Expand Down
10 changes: 5 additions & 5 deletions querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1367,20 +1367,20 @@ type mockIndex struct {
series map[uint64]series
labelIndex map[string][]string
postings map[labels.Label][]uint64
symbols map[string]struct{}
symbols map[string]int
}

func newMockIndex() mockIndex {
ix := mockIndex{
series: make(map[uint64]series),
labelIndex: make(map[string][]string),
postings: make(map[labels.Label][]uint64),
symbols: make(map[string]struct{}),
symbols: make(map[string]int),
}
return ix
}

func (m mockIndex) Symbols() (map[string]struct{}, error) {
func (m mockIndex) Symbols() (map[string]int, error) {
return m.symbols, nil
}

Expand All @@ -1389,8 +1389,8 @@ func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta)
return errors.Errorf("series with reference %d already added", ref)
}
for _, lbl := range l {
m.symbols[lbl.Name] = struct{}{}
m.symbols[lbl.Value] = struct{}{}
m.symbols[lbl.Name] = 0
m.symbols[lbl.Value] = 0
}

s := series{l: l}
Expand Down

0 comments on commit 508d576

Please sign in to comment.