diff --git a/benchmarks_test.go b/benchmarks_test.go index e5c2c2e..658f7eb 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -86,8 +86,6 @@ func TestCRANLEIGH(t *testing.T) { } } -// - restore when we've got multi-glob working -/* func TestMySoftwareHatesMe(t *testing.T) { line := `{ "type": "Feature", "properties": { "STREET": "BELVEDERE" } }` m := newCoreMatcher() @@ -97,7 +95,7 @@ func TestMySoftwareHatesMe(t *testing.T) { if m.addPattern("EEE", EEEpat) != nil { t.Error("Huh add?") } - matches, err := m.MatchesForEvent([]byte(line)) + matches, _ := m.matchesForJSONEvent([]byte(line)) if len(matches) != 1 || matches[0] != "EEE" { t.Error("Failed to match EEE") } @@ -106,10 +104,7 @@ func TestMySoftwareHatesMe(t *testing.T) { _ = m.addPattern("B", Bpat) _ = m.addPattern("EEE", EEEpat) - matches, err = m.MatchesForEvent([]byte(line)) - if err != nil { - t.Error("Huh? " + err.Error()) - } + matches, _ = m.matchesForJSONEvent([]byte(line)) if !containsX(matches, "B") { t.Error("no match for B") } @@ -117,7 +112,6 @@ func TestMySoftwareHatesMe(t *testing.T) { t.Error("no match for EEE") } } -*/ // exercise shellstyle matching a little, is much faster than TestCityLots because it's only working wth one field func TestBigShellStyle(t *testing.T) { @@ -131,12 +125,10 @@ func TestBigShellStyle(t *testing.T) { "V": 4322, "W": 4162, "X": 0, "Y": 721, "Z": 25, } - /* - restore when we've got multi-glob working funky := map[X]int{ `{"properties": {"STREET":[ {"shellstyle": "N*P*"} ] } }`: 927, `{"properties": {"STREET":[ {"shellstyle": "*E*E*E*"} ] } }`: 1212, } - */ for letter := range wanted { pat := fmt.Sprintf(`{"properties": {"STREET":[ {"shellstyle": "%s*"} ] } }`, letter) @@ -146,14 +138,12 @@ func TestBigShellStyle(t *testing.T) { } } - /* - for funk := range funky { - err := m.addPattern(funk, funk.(string)) - if err != nil { - t.Errorf("err on %s: %s", funk, err.Error()) - } + for funk := range funky { + err := m.addPattern(funk, funk.(string)) + if err != nil { + t.Errorf("err on %s: %s", funk, err.Error()) } - */ + } fmt.Println(matcherStats(m)) lCounts := make(map[X]int) @@ -187,14 +177,11 @@ func TestBigShellStyle(t *testing.T) { t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k]) } } - /* - for k, wc := range funky { - if lCounts[k] != wc { - t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k]) - } + for k, wc := range funky { + if lCounts[k] != wc { + t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k]) } - - */ + } } // TestPatternAddition adds a whole lot of string-only rules as fast as possible The profiler says that the diff --git a/cl2_test.go b/cl2_test.go index 9d2a11a..10c83af 100644 --- a/cl2_test.go +++ b/cl2_test.go @@ -248,13 +248,16 @@ func newBenchmarker() *benchmarker { } func (bm *benchmarker) addRules(rules []string, wanted []int, report bool) { + cm := bm.q.matcher.(*coreMatcher) for i, rule := range rules { rname := fmt.Sprintf("r%d", i) _ = bm.q.AddPattern(rname, rule) bm.wanted[rname] = wanted[i] } + cm.analyze() if report { - fmt.Println(matcherStats(bm.q.matcher.(*coreMatcher))) + fmt.Println(matcherStats(cm)) + fmt.Printf("MaxParallel: %d\n", cm.fields().nfaMeta.maxOutDegree) } } diff --git a/core_matcher.go b/core_matcher.go index 1b2ed31..fa87f2a 100644 --- a/core_matcher.go +++ b/core_matcher.go @@ -34,6 +34,7 @@ type coreMatcher struct { type coreFields struct { state *fieldMatcher segmentsTree *segmentsTree + nfaMeta *nfaMetadata } func newCoreMatcher() *coreMatcher { @@ -41,6 +42,7 @@ func newCoreMatcher() *coreMatcher { m.updateable.Store(&coreFields{ state: newFieldMatcher(), segmentsTree: newSegmentsIndex(), + nfaMeta: &nfaMetadata{}, }) return &m } @@ -49,6 +51,21 @@ func (m *coreMatcher) fields() *coreFields { return m.updateable.Load().(*coreFields) } +// analyze traverses all the different per-field NFAs and gathers metadata that can be +// used to optimize traversal. At the moment, all that it gathers is the maximum outdegree +// from any smallTable, where outdegree is the epsilon count plus the largest number of +// targets jumped to from a single byte transition. Can be called any time but normally +// you'd do this after you've added a bunch of patterns and are ready to start matching +func (m *coreMatcher) analyze() { + // only one thread can be updating at a time + m.lock.Lock() + defer m.lock.Unlock() + + fields := m.fields() + fields.state.gatherMetadata(fields.nfaMeta) + m.updateable.Store(fields) +} + // addPattern - the patternBytes is a JSON text which must be an object. The X is what the matcher returns to indicate // that the provided pattern has been matched. In many applications it might be a string which is the pattern's name. func (m *coreMatcher) addPattern(x X, patternJSON string) error { @@ -75,6 +92,7 @@ func (m *coreMatcher) addPatternWithPrinter(x X, patternJSON string, printer pri currentFields := m.fields() freshStart.segmentsTree = currentFields.segmentsTree.copy() freshStart.state = currentFields.state + freshStart.nfaMeta = currentFields.nfaMeta // Add paths to the segments tree index. for _, field := range patternFields { @@ -173,20 +191,46 @@ func (m *coreMatcher) matchesForFields(fields []Field) ([]X, error) { sort.Sort(fieldsList(fields)) } matches := newMatchSet() - - // pre-allocate a pair of buffers that will be used several levels down the call stack for efficiently - // transversing NFAs - bufs := &bufpair{ - buf1: make([]*faState, 0), - buf2: make([]*faState, 0), - } + cmFields := m.fields() + + // nondeterministic states in this matcher's automata have a list of current states and + // transition to a list of next states. This requires memory shuffling, which we want to + // minimize at matching/traversal time. Whatever we do, we want to keep one pair of + // buffers around for an entire matchesForFields call, bufs is that. + // In theory, there should be significant savings to be had by pre-allocating those buffers, + // or managing a pool of them with sync.Pool, or some such. However, adding any straightforward + // pre-allocation causes massive slowdown on the mainstream cases such as EXACT_MATCH in + // TestRulerCl2(). My hypothesis is that the DFA-like processing there is so efficient that + // anything that does actual allocation is death. + // Thus was created the analyze() call, which traverses the whole coreMatcher tree and returns + // the maximum state outdegree in the nfaMeta data structure, then pre-allocates a quality + // estimate of what's going to be used. This did in fact produce an increase in performnance, + // but that improvement was a small single-digit percentage and things that made one of EXACT, + // ANYTHING_BUT, and SHELLSTYLE matches go faster made one of the others go slower. + // Complicating factor: even if there is some modest amount of garbage collection, the Go + // runtime seems to be very good at shuffling it off into another thread so that the actual + // pattern-matching throughput doesn't suffer much. That's true at least on my massively + // over-equipped M2 MBPro, but probably not on some miserable cloud event-handling worker. + // Conclusion: I dunno. I left the analyze() func in but for now, don't use its results in + // production. + var bufs *bufpair = &bufpair{} + /* + if cmFields.nfaMeta.maxOutDegree < 2 { + bufs = &bufpair{} + } else { + bufferSize := cmFields.nfaMeta.maxOutDegree * 2 + bufs = &bufpair{ + buf1: make([]*faState, 0, bufferSize), + buf2: make([]*faState, 0, bufferSize), + } + } + */ // for each of the fields, we'll try to match the automaton start state to that field - the tryToMatch // routine will, in the case that there's a match, call itself to see if subsequent fields after the // first matched will transition through the machine and eventually achieve a match - s := m.fields() for i := 0; i < len(fields); i++ { - tryToMatch(fields, i, s.state, matches, bufs) + tryToMatch(fields, i, cmFields.state, matches, bufs) } return matches.matches(), nil } diff --git a/field_matcher.go b/field_matcher.go index 54d9716..3020a7c 100644 --- a/field_matcher.go +++ b/field_matcher.go @@ -35,6 +35,18 @@ func (m *fieldMatcher) update(fields *fmFields) { m.updateable.Store(fields) } +func (m *fieldMatcher) gatherMetadata(meta *nfaMetadata) { + for _, vm := range m.fields().transitions { + vm.gatherMetadata(meta) + } + for _, fm := range m.fields().existsTrue { + fm.gatherMetadata(meta) + } + for _, fm := range m.fields().existsFalse { + fm.gatherMetadata(meta) + } +} + func (m *fieldMatcher) addMatch(x X) { current := m.fields() newFields := &fmFields{ diff --git a/nfa.go b/nfa.go index c9417a4..00a1356 100644 --- a/nfa.go +++ b/nfa.go @@ -17,6 +17,10 @@ type faNext struct { states []*faState } +type nfaMetadata struct { + maxOutDegree int +} + type transmap struct { set map[*fieldMatcher]bool } @@ -99,15 +103,13 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, prin return combined } - newTable := newSmallTable() - fieldTransitions := append(state1.fieldTransitions, state2.fieldTransitions...) - combined = &faState{table: newTable, fieldTransitions: fieldTransitions} + combined = &faState{table: newSmallTable(), fieldTransitions: fieldTransitions} pretty, ok := printer.(*prettyPrinter) if ok { - printer.labelTable(combined.table, fmt.Sprintf("%d∎%d", pretty.tableSerial(state1.table), - pretty.tableSerial(state2.table))) + printer.labelTable(combined.table, fmt.Sprintf("%d∎%d", + pretty.tableSerial(state1.table), pretty.tableSerial(state2.table))) } keyMemo[mKey] = combined @@ -120,13 +122,13 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, prin switch { case next1 == next2: uComb[i] = next1 - case next1 != nil && next2 == nil: - uComb[i] = u1[i] - case next1 == nil && next2 != nil: - uComb[i] = u2[i] - case next1 != nil && next2 != nil: + case next2 == nil: // u1 must be non-nil + uComb[i] = next1 + case next1 == nil: // u2 must be non-nil + uComb[i] = next2 + default: // neither is nil, have to merge if i > 0 && next1 == u1[i-1] && next2 == u2[i-1] { - uComb[i] = uComb[i-1] + uComb[i] = uComb[i-1] // dupe of previous step - this happens a lot } else { var comboNext []*faState for _, nextStep1 := range next1.states { diff --git a/prettyprinter.go b/prettyprinter.go index 211f3b4..be208f9 100644 --- a/prettyprinter.go +++ b/prettyprinter.go @@ -157,8 +157,6 @@ func (pp *prettyPrinter) nextString(n *faNext) string { func branchChar(b byte) string { switch b { // TODO: Figure out how to test commented-out cases - case 0: - return "∅" case valueTerminator: return "ℵ" default: diff --git a/shell_style_test.go b/shell_style_test.go index bc52b86..e7e632c 100644 --- a/shell_style_test.go +++ b/shell_style_test.go @@ -110,11 +110,11 @@ func TestWildCardRuler(t *testing.T) { t.Error("Missed on r2") } matches, _ = cm.matchesForJSONEvent([]byte("{\"b\" : \"dexeff\"}")) - if len(matches) != 2 || (!containsX(matches, "r2")) || !containsX(matches, "r3") { + if len(matches) != 2 || (!containsX(matches, "r2", "r3")) { t.Error("Missed on r2/r3") } matches, _ = cm.matchesForJSONEvent([]byte("{\"c\" : \"xyzzz\"}")) - if len(matches) != 2 || (!containsX(matches, "r4")) || !containsX(matches, "r5") { + if len(matches) != 2 || (!containsX(matches, "r4", "r5")) { t.Error("Missed on r4/r5") } matches, _ = cm.matchesForJSONEvent([]byte("{\"d\" : \"12345\"}")) @@ -174,7 +174,12 @@ func TestShellStyleBuildTime(t *testing.T) { t.Error("AddP: " + err.Error()) } } - fmt.Println(matcherStats(q.matcher.(*coreMatcher))) + cm := q.matcher.(*coreMatcher) + + fmt.Println(matcherStats(cm)) + cm.analyze() + fmt.Printf("MaxP: %d\n", cm.fields().nfaMeta.maxOutDegree) + // make sure that all the words actually are matched before := time.Now() for _, word := range words { diff --git a/small_table.go b/small_table.go index 9b097c6..cda511f 100644 --- a/small_table.go +++ b/small_table.go @@ -100,6 +100,20 @@ func makeSmallTable(defaultStep *faNext, indices []byte, steps []*faNext) *small return &t } +func (t *smallTable) gatherMetadata(meta *nfaMetadata) { + eps := len(t.epsilon) + for _, step := range t.steps { + if step != nil { + if (eps + len(step.states)) > meta.maxOutDegree { + meta.maxOutDegree = eps + len(step.states) + } + for _, state := range step.states { + state.table.gatherMetadata(meta) + } + } + } +} + // unpackedTable replicates the data in the smallTable ceilings and states arrays. It's quite hard to // update the list structure in a smallTable, but trivial in an unpackedTable. The idea is that to update // a smallTable you unpack it, update, then re-pack it. Not gonna be the most efficient thing so at some future point… diff --git a/value_matcher.go b/value_matcher.go index bc1b1e8..5a94f2a 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -166,6 +166,13 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche return nextField } +func (m *valueMatcher) gatherMetadata(meta *nfaMetadata) { + start := m.fields().startTable + if start != nil { + start.gatherMetadata(meta) + } +} + // TODO: make these simple FA builders iterative not recursive, this will recurse as deep as the longest string match func makePrefixFA(val []byte) (*smallTable, *fieldMatcher) {