Skip to content

Commit

Permalink
tidy up and use go benchmark
Browse files Browse the repository at this point in the history
Signed-off-by: Wangchong Zhou <[email protected]>
  • Loading branch information
fffonion committed Sep 28, 2018
1 parent 6d709d5 commit 6a6b96c
Show file tree
Hide file tree
Showing 5 changed files with 671 additions and 443 deletions.
4 changes: 2 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func main() {
statsdListenTCP = kingpin.Flag("statsd.listen-tcp", "The TCP address on which to receive statsd metric lines. \"\" disables it.").Default(":9125").String()
mappingConfig = kingpin.Flag("statsd.mapping-config", "Metric mapping configuration file name.").String()
readBuffer = kingpin.Flag("statsd.read-buffer", "Size (in bytes) of the operating system's transmit read buffer associated with the UDP connection. Please make sure the kernel parameters net.core.rmem_max is set to a value greater than the value specified.").Int()
dumpFSMPath = kingpin.Flag("statsd.dump-fsm", "The path to dump internal FSM generated for glob matching as Dot file.").Default("").String()
dumpFSMPath = kingpin.Flag("debug.dump-fsm", "The path to dump internal FSM generated for glob matching as Dot file.").Default("").String()
)

log.AddFlags(kingpin.CommandLine)
Expand Down Expand Up @@ -203,7 +203,7 @@ func main() {
if *dumpFSMPath != "" {
err := dumpFSM(mapper, *dumpFSMPath)
if err != nil {
log.Fatal("Error dumpping FSM:", err)
log.Fatal("Error dumping FSM:", err)
}
}
go watchConfig(*mappingConfig, mapper)
Expand Down
18 changes: 11 additions & 7 deletions pkg/mapper/fsm/formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@ package fsm

import (
"fmt"
"regexp"
"strconv"
"strings"
)

var (
templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
)

type templateFormatter struct {
captureIndexes []int
captureCount int
Expand Down Expand Up @@ -57,12 +62,11 @@ func (formatter *templateFormatter) format(captures map[int]string) string {
if formatter.captureCount == 0 {
// no label substitution, keep as it is
return formatter.fmtString
} else {
indexes := formatter.captureIndexes
vargs := make([]interface{}, formatter.captureCount)
for i, idx := range indexes {
vargs[i] = captures[idx]
}
return fmt.Sprintf(formatter.fmtString, vargs...)
}
indexes := formatter.captureIndexes
vargs := make([]interface{}, formatter.captureCount)
for i, idx := range indexes {
vargs[i] = captures[idx]
}
return fmt.Sprintf(formatter.fmtString, vargs...)
}
238 changes: 101 additions & 137 deletions pkg/mapper/fsm/fsm.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,13 @@
package fsm

import (
"fmt"
"io"
"regexp"
"strings"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)

var (
templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
)

type mappingState struct {
transitions map[string]*mappingState
minRemainingLength int
Expand All @@ -48,26 +42,26 @@ type fsmBacktrackStackCursor struct {
}

type FSM struct {
root *mappingState
needsBacktracking bool
metricTypes []string
disableOrdering bool
statesCount int
root *mappingState
metricTypes []string
statesCount int
BacktrackingNeeded bool
OrderingDisabled bool
}

// NewFSM creates a new FSM instance
func NewFSM(metricTypes []string, maxPossibleTransitions int, disableOrdering bool) *FSM {
func NewFSM(metricTypes []string, maxPossibleTransitions int, orderingDisabled bool) *FSM {
fsm := FSM{}
root := &mappingState{}
root.transitions = make(map[string]*mappingState, 3)
root.transitions = make(map[string]*mappingState, len(metricTypes))

metricTypes = append(metricTypes, "")
for _, field := range metricTypes {
state := &mappingState{}
(*state).transitions = make(map[string]*mappingState, maxPossibleTransitions)
root.transitions[string(field)] = state
}
fsm.disableOrdering = disableOrdering
fsm.OrderingDisabled = orderingDisabled
fsm.metricTypes = metricTypes
fsm.statesCount = 0
fsm.root = root
Expand Down Expand Up @@ -136,127 +130,6 @@ func (f *FSM) AddState(match string, name string, labels prometheus.Labels, matc

}

// DumpFSM accepts a io.writer and write the current FSM into dot file format
func (f *FSM) DumpFSM(w io.Writer) {
idx := 0
states := make(map[int]*mappingState)
states[idx] = f.root

w.Write([]byte("digraph g {\n"))
w.Write([]byte("rankdir=LR\n")) // make it vertical
w.Write([]byte("node [ label=\"\",style=filled,fillcolor=white,shape=circle ]\n")) // remove label of node

for idx < len(states) {
for field, transition := range states[idx].transitions {
states[len(states)] = transition
w.Write([]byte(fmt.Sprintf("%d -> %d [label = \"%s\"];\n", idx, len(states)-1, field)))
if idx == 0 {
// color for metric types
w.Write([]byte(fmt.Sprintf("%d [color=\"#D6B656\",fillcolor=\"#FFF2CC\"];\n", len(states)-1)))
} else if transition.transitions == nil || len(transition.transitions) == 0 {
// color for end state
w.Write([]byte(fmt.Sprintf("%d [color=\"#82B366\",fillcolor=\"#D5E8D4\"];\n", len(states)-1)))
}
}
idx++
}
// color for start state
w.Write([]byte(fmt.Sprintf("0 [color=\"#a94442\",fillcolor=\"#f2dede\"];\n")))
w.Write([]byte("}"))
}

// TestIfNeedBacktracking test if backtrack is needed for current FSM
func (f *FSM) TestIfNeedBacktracking(mappings []string) bool {
needBacktrack := false
// A has * in rules there's other transisitions at the same state
// this makes A the cause of backtracking
ruleByLength := make(map[int][]string)
ruleREByLength := make(map[int][]*regexp.Regexp)

// first sort rules by length
for _, mapping := range mappings {
l := len(strings.Split(mapping, "."))
ruleByLength[l] = append(ruleByLength[l], mapping)

metricRe := strings.Replace(mapping, ".", "\\.", -1)
metricRe = strings.Replace(metricRe, "*", "([^.]*)", -1)
regex, err := regexp.Compile("^" + metricRe + "$")
if err != nil {
log.Warnf("invalid match %s. cannot compile regex in mapping: %v", mapping, err)
}
// put into array no matter there's error or not, we will skip later if regex is nil
ruleREByLength[l] = append(ruleREByLength[l], regex)
}

for l, rules := range ruleByLength {
if len(rules) == 1 {
continue
}
rulesRE := ruleREByLength[l]
for i1, r1 := range rules {
currentRuleNeedBacktrack := false
re1 := rulesRE[i1]
if re1 == nil || strings.Index(r1, "*") == -1 {
continue
}
// if a rule is A.B.C.*.E.*, is there a rule A.B.C.D.x.x or A.B.C.*.E.F? (x is any string or *)
for index := 0; index < len(r1); index++ {
if r1[index] != '*' {
continue
}
reStr := strings.Replace(r1[:index], ".", "\\.", -1)
reStr = strings.Replace(reStr, "*", "\\*", -1)
re := regexp.MustCompile("^" + reStr)
for i2, r2 := range rules {
if i2 == i1 {
continue
}
if len(re.FindStringSubmatchIndex(r2)) > 0 {
currentRuleNeedBacktrack = true
break
}
}
}

for i2, r2 := range rules {
if i2 != i1 && len(re1.FindStringSubmatchIndex(r2)) > 0 {
// log if we care about ordering and the superset occurs before
if !f.disableOrdering && i1 < i2 {
log.Warnf("match \"%s\" is a super set of match \"%s\" but in a lower order, "+
"the first will never be matched\n", r1, r2)
}
currentRuleNeedBacktrack = false
}
}
for i2, re2 := range rulesRE {
if i2 == i1 || re2 == nil {
continue
}
// if r1 is a subset of other rule, we don't need backtrack
// because either we turned on ordering
// or we disabled ordering and can't match it even with backtrack
if len(re2.FindStringSubmatchIndex(r1)) > 0 {
currentRuleNeedBacktrack = false
}
}

if currentRuleNeedBacktrack {
log.Warnf("backtracking required because of match \"%s\", "+
"matching performance may be degraded\n", r1)
needBacktrack = true
}
}
}

// backtracking will always be needed if ordering of rules is not disabled
// since transistions are stored in (unordered) map
// note: don't move this branch to the beginning of this function
// since we need logs for superset rules
f.needsBacktracking = !f.disableOrdering || needBacktrack

return f.needsBacktracking
}

// GetMapping implements a mapping algorithm for Glob pattern
func (f *FSM) GetMapping(statsdMetric string, statsdMetricType string) (interface{}, string, prometheus.Labels, bool) {
matchFields := strings.Split(statsdMetric, ".")
Expand Down Expand Up @@ -294,7 +167,7 @@ func (f *FSM) GetMapping(statsdMetric string, statsdMetricType string) (interfac
captures[captureIdx] = field
captureIdx++
}
} else if f.needsBacktracking {
} else if f.BacktrackingNeeded {
// if backtracking is needed, also check for alternative transition
altState, present := currentState.transitions["*"]
if !present || fieldsLeft > altState.maxRemainingLength || fieldsLeft < altState.minRemainingLength {
Expand All @@ -319,7 +192,7 @@ func (f *FSM) GetMapping(statsdMetric string, statsdMetricType string) (interfac

// do we reach a final state?
if state.result != nil && i == filedsCount-1 {
if f.disableOrdering {
if f.OrderingDisabled {
finalState = state
return formatLabels(finalState, captures)
} else if finalState == nil || finalState.resultPriority > state.resultPriority {
Expand Down Expand Up @@ -360,6 +233,97 @@ func (f *FSM) GetMapping(statsdMetric string, statsdMetricType string) (interfac
return formatLabels(finalState, captures)
}

// TestIfNeedBacktracking test if backtrack is needed for current mappings
func TestIfNeedBacktracking(mappings []string, orderingDisabled bool) bool {
backtrackingNeeded := false
// A has * in rules there's other transisitions at the same state
// this makes A the cause of backtracking
ruleByLength := make(map[int][]string)
ruleREByLength := make(map[int][]*regexp.Regexp)

// first sort rules by length
for _, mapping := range mappings {
l := len(strings.Split(mapping, "."))
ruleByLength[l] = append(ruleByLength[l], mapping)

metricRe := strings.Replace(mapping, ".", "\\.", -1)
metricRe = strings.Replace(metricRe, "*", "([^.]*)", -1)
regex, err := regexp.Compile("^" + metricRe + "$")
if err != nil {
log.Warnf("invalid match %s. cannot compile regex in mapping: %v", mapping, err)
}
// put into array no matter there's error or not, we will skip later if regex is nil
ruleREByLength[l] = append(ruleREByLength[l], regex)
}

for l, rules := range ruleByLength {
if len(rules) == 1 {
continue
}
rulesRE := ruleREByLength[l]
for i1, r1 := range rules {
currentRuleNeedBacktrack := false
re1 := rulesRE[i1]
if re1 == nil || strings.Index(r1, "*") == -1 {
continue
}
// if a rule is A.B.C.*.E.*, is there a rule A.B.C.D.x.x or A.B.C.*.E.F? (x is any string or *)
for index := 0; index < len(r1); index++ {
if r1[index] != '*' {
continue
}
reStr := strings.Replace(r1[:index], ".", "\\.", -1)
reStr = strings.Replace(reStr, "*", "\\*", -1)
re := regexp.MustCompile("^" + reStr)
for i2, r2 := range rules {
if i2 == i1 {
continue
}
if len(re.FindStringSubmatchIndex(r2)) > 0 {
currentRuleNeedBacktrack = true
break
}
}
}

for i2, r2 := range rules {
if i2 != i1 && len(re1.FindStringSubmatchIndex(r2)) > 0 {
// log if we care about ordering and the superset occurs before
if !orderingDisabled && i1 < i2 {
log.Warnf("match \"%s\" is a super set of match \"%s\" but in a lower order, "+
"the first will never be matched", r1, r2)
}
currentRuleNeedBacktrack = false
}
}
for i2, re2 := range rulesRE {
if i2 == i1 || re2 == nil {
continue
}
// if r1 is a subset of other rule, we don't need backtrack
// because either we turned on ordering
// or we disabled ordering and can't match it even with backtrack
if len(re2.FindStringSubmatchIndex(r1)) > 0 {
currentRuleNeedBacktrack = false
}
}

if currentRuleNeedBacktrack {
log.Warnf("backtracking required because of match \"%s\", "+
"matching performance may be degraded", r1)
backtrackingNeeded = true
}
}
}

// backtracking will always be needed if ordering of rules is not disabled
// since transistions are stored in (unordered) map
// note: don't move this branch to the beginning of this function
// since we need logs for superset rules

return !orderingDisabled || backtrackingNeeded
}

func formatLabels(finalState *mappingState, captures map[int]string) (interface{}, string, prometheus.Labels, bool) {
// format name and labels
if finalState != nil {
Expand Down
18 changes: 1 addition & 17 deletions pkg/mapper/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ var (
metricLineRE = regexp.MustCompile(`^(\*\.|` + statsdMetricRE + `\.)+(\*|` + statsdMetricRE + `)$`)
metricNameRE = regexp.MustCompile(`^([a-zA-Z_]|` + templateReplaceRE + `)([a-zA-Z0-9_]|` + templateReplaceRE + `)*$`)
labelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]+$`)

templateReplaceCaptureRE = regexp.MustCompile(`\$\{?([a-zA-Z0-9_\$]+)\}?`)
)

type mapperConfigDefaults struct {
Expand Down Expand Up @@ -81,20 +79,6 @@ var defaultQuantiles = []metricObjective{
{Quantile: 0.99, Error: 0.001},
}

func min(x, y int) int {
if x < y {
return x
}
return y
}

func max(x, y int) int {
if x > y {
return x
}
return y
}

func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
var n MetricMapper

Expand Down Expand Up @@ -191,7 +175,7 @@ func (m *MetricMapper) InitFromYAMLString(fileContents string) error {
mappings = append(mappings, mapping.Match)
}
}
n.FSM.TestIfNeedBacktracking(mappings)
n.FSM.BacktrackingNeeded = fsm.TestIfNeedBacktracking(mappings, n.FSM.OrderingDisabled)

m.FSM = n.FSM
m.doRegex = n.doRegex
Expand Down
Loading

0 comments on commit 6a6b96c

Please sign in to comment.