Skip to content

Commit

Permalink
kaizen: Increase throughput with flexible FA traversal (#332)
Browse files Browse the repository at this point in the history
* kaizen: Increase throughput with flexible FA traversal

Signed-off-by: Tim Bray <[email protected]>

* don't fail benchmark on alert

Signed-off-by: Tim Bray <[email protected]>

---------

Signed-off-by: Tim Bray <[email protected]>
  • Loading branch information
timbray authored Jul 12, 2024
1 parent c28897d commit daef7fd
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:

# Alert on regression
alert-threshold: "120%"
fail-on-alert: true
fail-on-alert: false
comment-on-alert: false

# Disable github pages, for now.
Expand Down
5 changes: 0 additions & 5 deletions match_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ func (m *matchSet) addXSingleThreaded(exes ...X) *matchSet {
return m
}

func (m *matchSet) contains(x X) bool {
_, ok := m.set[x]
return ok
}

func (m *matchSet) matches() []X {
matches := make([]X, 0, len(m.set))
for x := range m.set {
Expand Down
5 changes: 5 additions & 0 deletions match_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ func TestAddXSingleThreaded(t *testing.T) {
}
}

func (m *matchSet) contains(x X) bool {
_, ok := m.set[x]
return ok
}

func isSameMatches(matchSet *matchSet, exes ...X) bool {
if len(exes) == 0 && len(matchSet.matches()) == 0 {
return true
Expand Down
24 changes: 23 additions & 1 deletion nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,29 @@ func (tm *transmap) all() []*fieldMatcher {
return all
}

func traverseFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *bufpair) []*fieldMatcher {
// While some Quamina patterns require the use of NFAs, many (most?) don't, and while we're still using a
// NFA-capable data structure, we can traverse it deterministically if we know in advance that every
// combination of an faState with a byte will transition to at most one other faState.

func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []*fieldMatcher {
for index := 0; index <= len(val); index++ {
var utf8Byte byte
if index < len(val) {
utf8Byte = val[index]
} else {
utf8Byte = valueTerminator
}
next := table.dStep(utf8Byte)
if next == nil {
break
}
transitions = append(transitions, next.fieldTransitions...)
table = next.table
}
return transitions
}

func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *bufpair) []*fieldMatcher {
currentStates := bufs.buf1
currentStates = append(currentStates, &faState{table: table})
nextStates := bufs.buf2
Expand Down
4 changes: 2 additions & 2 deletions shell_style_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ func TestMakeShellStyleFA(t *testing.T) {
var bufs bufpair
for _, should := range shouldsForPatterns[i] {
var transitions []*fieldMatcher
gotTrans := traverseFA(a, []byte(should), transitions, &bufs)
gotTrans := traverseNFA(a, []byte(should), transitions, &bufs)
if len(gotTrans) != 1 || gotTrans[0] != wanted {
t.Errorf("Failure for %s on %s", pattern, should)
}
}
for _, shouldNot := range shouldNotForPatterns[i] {
var transitions []*fieldMatcher
gotTrans := traverseFA(a, []byte(shouldNot), transitions, &bufs)
gotTrans := traverseNFA(a, []byte(shouldNot), transitions, &bufs)
if gotTrans != nil {
t.Errorf("bogus match for %s on %s", pattern, shouldNot)
}
Expand Down
16 changes: 16 additions & 0 deletions small_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ func (t *smallTable) step(utf8Byte byte, out *stepOut) {
panic("Malformed smallTable")
}

// dStep takes a step through an NFA in the case where it is known that the NFA in question
// is deterministic, i.e. each combination of an faState and a byte value transitions to at
// most one other byte value.
func (t *smallTable) dStep(utf8Byte byte) *faState {
for index, ceiling := range t.ceilings {
if utf8Byte < ceiling {
if t.steps[index] == nil {
return nil
} else {
return t.steps[index].states[0]
}
}
}
panic("Malformed smallTable")
}

// makeSmallTable creates a pre-loaded small table, with all bytes not otherwise specified having the defaultStep
// value, and then a few other values with their indexes and values specified in the other two arguments. The
// goal is to reduce memory churn
Expand Down
18 changes: 15 additions & 3 deletions value_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type vmFields struct {
singletonMatch []byte
singletonTransition *fieldMatcher
hasQNumbers bool
isNondeterministic bool
}

func (m *valueMatcher) fields() *vmFields {
Expand Down Expand Up @@ -71,14 +72,22 @@ func (m *valueMatcher) transitionOn(eventField *Field, bufs *bufpair) []*fieldMa
case vmFields.startTable != nil:
// if there is a potential for a numeric match, try making a Q number from the event
if vmFields.hasQNumbers && eventField.IsQNumber {
qNumber, err := qNumFromBytes(val)
qNum, err := qNumFromBytes(val)
if err == nil {
return traverseFA(vmFields.startTable, qNumber, transitions, bufs)
if vmFields.isNondeterministic {
return traverseNFA(vmFields.startTable, qNum, transitions, bufs)
} else {
return traverseDFA(vmFields.startTable, qNum, transitions)
}
}
}

// if it doesn't work as a Q number for some reason, go ahead and compare the string values
return traverseFA(vmFields.startTable, val, transitions, bufs)
if vmFields.isNondeterministic {
return traverseNFA(vmFields.startTable, val, transitions, bufs)
} else {
return traverseDFA(vmFields.startTable, val, transitions)
}

default:
// no FA, no singleton, nothing to do, this probably can't happen because a flattener
Expand Down Expand Up @@ -108,6 +117,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
newFA, nextField = makeMultiAnythingButFA(val.list)
case shellStyleType:
newFA, nextField = makeShellStyleFA(valBytes, printer)
fields.isNondeterministic = true
case prefixType:
newFA, nextField = makePrefixFA(valBytes)
default:
Expand Down Expand Up @@ -150,6 +160,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
case shellStyleType:
newAutomaton, nextField := makeShellStyleFA(valBytes, printer)
fields.startTable = newAutomaton
fields.isNondeterministic = true
m.update(fields)
return nextField
case prefixType:
Expand Down Expand Up @@ -187,6 +198,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
newFA, nextField = makeMultiAnythingButFA(val.list)
case shellStyleType:
newFA, nextField = makeShellStyleFA(valBytes, printer)
fields.isNondeterministic = true
case prefixType:
newFA, nextField = makePrefixFA(valBytes)
default:
Expand Down

0 comments on commit daef7fd

Please sign in to comment.