Merge branch 'main' into dependabot/github_actions/github/codeql-acti…

…on-3.25.11
timbray · Jun 30, 2024 · bd00af6 · bd00af6
2 parents 7ee2ff2 + 0a8726c
commit bd00af6
Show file tree

Hide file tree

Showing 9 changed files with 122 additions and 50 deletions.
diff --git a/benchmarks_test.go b/benchmarks_test.go
@@ -86,8 +86,6 @@ func TestCRANLEIGH(t *testing.T) {
 	}
 }
 
-// - restore when we've got multi-glob working
-/*
 func TestMySoftwareHatesMe(t *testing.T) {
 	line := `{ "type": "Feature", "properties": { "STREET": "BELVEDERE" }  }`
 	m := newCoreMatcher()
@@ -97,7 +95,7 @@ func TestMySoftwareHatesMe(t *testing.T) {
 	if m.addPattern("EEE", EEEpat) != nil {
 		t.Error("Huh add?")
 	}
-	matches, err := m.MatchesForEvent([]byte(line))
+	matches, _ := m.matchesForJSONEvent([]byte(line))
 	if len(matches) != 1 || matches[0] != "EEE" {
 		t.Error("Failed to match EEE")
 	}
@@ -106,18 +104,14 @@ func TestMySoftwareHatesMe(t *testing.T) {
 	_ = m.addPattern("B", Bpat)
 	_ = m.addPattern("EEE", EEEpat)
 
-	matches, err = m.MatchesForEvent([]byte(line))
-	if err != nil {
-		t.Error("Huh? " + err.Error())
-	}
+	matches, _ = m.matchesForJSONEvent([]byte(line))
 	if !containsX(matches, "B") {
 		t.Error("no match for B")
 	}
 	if !containsX(matches, "EEE") {
 		t.Error("no match for EEE")
 	}
 }
-*/
 
 // exercise shellstyle matching a little, is much faster than TestCityLots because it's only working wth one field
 func TestBigShellStyle(t *testing.T) {
@@ -131,12 +125,10 @@ func TestBigShellStyle(t *testing.T) {
 		"V": 4322, "W": 4162, "X": 0, "Y": 721, "Z": 25,
 	}
 
-	/* - restore when we've got multi-glob working
 	funky := map[X]int{
 		`{"properties": {"STREET":[ {"shellstyle": "N*P*"} ] } }`:    927,
 		`{"properties": {"STREET":[ {"shellstyle": "*E*E*E*"} ] } }`: 1212,
 	}
-	*/
 
 	for letter := range wanted {
 		pat := fmt.Sprintf(`{"properties": {"STREET":[ {"shellstyle": "%s*"} ] } }`, letter)
@@ -146,14 +138,12 @@ func TestBigShellStyle(t *testing.T) {
 		}
 	}
 
-	/*
-		for funk := range funky {
-			err := m.addPattern(funk, funk.(string))
-			if err != nil {
-				t.Errorf("err on %s: %s", funk, err.Error())
-			}
+	for funk := range funky {
+		err := m.addPattern(funk, funk.(string))
+		if err != nil {
+			t.Errorf("err on %s: %s", funk, err.Error())
 		}
-	*/
+	}
 	fmt.Println(matcherStats(m))
 
 	lCounts := make(map[X]int)
@@ -187,14 +177,11 @@ func TestBigShellStyle(t *testing.T) {
 			t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k])
 		}
 	}
-	/*
-		for k, wc := range funky {
-			if lCounts[k] != wc {
-				t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k])
-			}
+	for k, wc := range funky {
+		if lCounts[k] != wc {
+			t.Errorf("for %s wanted %d got %d", k, wc, lCounts[k])
 		}
-
-	*/
+	}
 }
 
 // TestPatternAddition adds a whole lot of string-only rules as fast as possible  The profiler says that the

diff --git a/cl2_test.go b/cl2_test.go
@@ -248,13 +248,16 @@ func newBenchmarker() *benchmarker {
 }
 
 func (bm *benchmarker) addRules(rules []string, wanted []int, report bool) {
+	cm := bm.q.matcher.(*coreMatcher)
 	for i, rule := range rules {
 		rname := fmt.Sprintf("r%d", i)
 		_ = bm.q.AddPattern(rname, rule)
 		bm.wanted[rname] = wanted[i]
 	}
+	cm.analyze()
 	if report {
-		fmt.Println(matcherStats(bm.q.matcher.(*coreMatcher)))
+		fmt.Println(matcherStats(cm))
+		fmt.Printf("MaxParallel: %d\n", cm.fields().nfaMeta.maxOutDegree)
 	}
 }
 

diff --git a/core_matcher.go b/core_matcher.go
@@ -34,13 +34,15 @@ type coreMatcher struct {
 type coreFields struct {
 	state        *fieldMatcher
 	segmentsTree *segmentsTree
+	nfaMeta      *nfaMetadata
 }
 
 func newCoreMatcher() *coreMatcher {
 	m := coreMatcher{}
 	m.updateable.Store(&coreFields{
 		state:        newFieldMatcher(),
 		segmentsTree: newSegmentsIndex(),
+		nfaMeta:      &nfaMetadata{},
 	})
 	return &m
 }
@@ -49,6 +51,21 @@ func (m *coreMatcher) fields() *coreFields {
 	return m.updateable.Load().(*coreFields)
 }
 
+// analyze traverses all the different per-field NFAs and gathers metadata that can be
+// used to optimize traversal. At the moment, all that it gathers is the maximum outdegree
+// from any smallTable, where outdegree is the epsilon count plus the largest number of
+// targets jumped to from a single byte transition. Can be called any time but normally
+// you'd do this after you've added a bunch of patterns and are ready to start matching
+func (m *coreMatcher) analyze() {
+	// only one thread can be updating at a time
+	m.lock.Lock()
+	defer m.lock.Unlock()
+
+	fields := m.fields()
+	fields.state.gatherMetadata(fields.nfaMeta)
+	m.updateable.Store(fields)
+}
+
 // addPattern - the patternBytes is a JSON text which must be an object. The X is what the matcher returns to indicate
 // that the provided pattern has been matched. In many applications it might be a string which is the pattern's name.
 func (m *coreMatcher) addPattern(x X, patternJSON string) error {
@@ -75,6 +92,7 @@ func (m *coreMatcher) addPatternWithPrinter(x X, patternJSON string, printer pri
 	currentFields := m.fields()
 	freshStart.segmentsTree = currentFields.segmentsTree.copy()
 	freshStart.state = currentFields.state
+	freshStart.nfaMeta = currentFields.nfaMeta
 
 	// Add paths to the segments tree index.
 	for _, field := range patternFields {
@@ -173,20 +191,46 @@ func (m *coreMatcher) matchesForFields(fields []Field) ([]X, error) {
 		sort.Sort(fieldsList(fields))
 	}
 	matches := newMatchSet()
-
-	// pre-allocate a pair of buffers that will be used several levels down the call stack for efficiently
-	// transversing NFAs
-	bufs := &bufpair{
-		buf1: make([]*faState, 0),
-		buf2: make([]*faState, 0),
-	}
+	cmFields := m.fields()
+
+	// nondeterministic states in this matcher's automata have a list of current states and
+	// transition to a list of next states. This requires memory shuffling, which we want to
+	// minimize at matching/traversal time. Whatever we do, we want to keep one pair of
+	// buffers around for an entire matchesForFields call, bufs is that.
+	// In theory, there should be significant savings to be had by pre-allocating those buffers,
+	// or managing a pool of them with sync.Pool, or some such. However, adding any straightforward
+	// pre-allocation causes massive slowdown on the mainstream cases such as EXACT_MATCH in
+	// TestRulerCl2(). My hypothesis is that the DFA-like processing there is so efficient that
+	// anything that does actual allocation is death.
+	// Thus was created the analyze() call, which traverses the whole coreMatcher tree and returns
+	// the maximum state outdegree in the nfaMeta data structure, then pre-allocates a quality
+	// estimate of what's going to be used. This did in fact produce an increase in performnance,
+	// but that improvement was a small single-digit percentage and things that made one of EXACT,
+	// ANYTHING_BUT, and SHELLSTYLE matches go faster made one of the others go slower.
+	// Complicating factor: even if there is some modest amount of garbage collection, the Go
+	// runtime seems to be very good at shuffling it off into another thread so that the actual
+	// pattern-matching throughput doesn't suffer much. That's true at least on my massively
+	// over-equipped M2 MBPro, but probably not on some miserable cloud event-handling worker.
+	// Conclusion: I dunno. I left the analyze() func in but for now, don't use its results in
+	// production.
+	var bufs *bufpair = &bufpair{}
+	/*
+		if cmFields.nfaMeta.maxOutDegree < 2 {
+			bufs = &bufpair{}
+		} else {
+			bufferSize := cmFields.nfaMeta.maxOutDegree * 2
+			bufs = &bufpair{
+				buf1: make([]*faState, 0, bufferSize),
+				buf2: make([]*faState, 0, bufferSize),
+			}
+		}
+	*/
 
 	// for each of the fields, we'll try to match the automaton start state to that field - the tryToMatch
 	// routine will, in the case that there's a match, call itself to see if subsequent fields after the
 	// first matched will transition through the machine and eventually achieve a match
-	s := m.fields()
 	for i := 0; i < len(fields); i++ {
-		tryToMatch(fields, i, s.state, matches, bufs)
+		tryToMatch(fields, i, cmFields.state, matches, bufs)
 	}
 	return matches.matches(), nil
 }

diff --git a/field_matcher.go b/field_matcher.go
@@ -35,6 +35,18 @@ func (m *fieldMatcher) update(fields *fmFields) {
 	m.updateable.Store(fields)
 }
 
+func (m *fieldMatcher) gatherMetadata(meta *nfaMetadata) {
+	for _, vm := range m.fields().transitions {
+		vm.gatherMetadata(meta)
+	}
+	for _, fm := range m.fields().existsTrue {
+		fm.gatherMetadata(meta)
+	}
+	for _, fm := range m.fields().existsFalse {
+		fm.gatherMetadata(meta)
+	}
+}
+
 func (m *fieldMatcher) addMatch(x X) {
 	current := m.fields()
 	newFields := &fmFields{

diff --git a/nfa.go b/nfa.go
@@ -17,6 +17,10 @@ type faNext struct {
 	states []*faState
 }
 
+type nfaMetadata struct {
+	maxOutDegree int
+}
+
 type transmap struct {
 	set map[*fieldMatcher]bool
 }
@@ -99,15 +103,13 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, prin
 		return combined
 	}
 
-	newTable := newSmallTable()
-
 	fieldTransitions := append(state1.fieldTransitions, state2.fieldTransitions...)
-	combined = &faState{table: newTable, fieldTransitions: fieldTransitions}
+	combined = &faState{table: newSmallTable(), fieldTransitions: fieldTransitions}
 
 	pretty, ok := printer.(*prettyPrinter)
 	if ok {
-		printer.labelTable(combined.table, fmt.Sprintf("%d∎%d", pretty.tableSerial(state1.table),
-			pretty.tableSerial(state2.table)))
+		printer.labelTable(combined.table, fmt.Sprintf("%d∎%d",
+			pretty.tableSerial(state1.table), pretty.tableSerial(state2.table)))
 	}
 
 	keyMemo[mKey] = combined
@@ -120,13 +122,13 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, prin
 		switch {
 		case next1 == next2:
 			uComb[i] = next1
-		case next1 != nil && next2 == nil:
-			uComb[i] = u1[i]
-		case next1 == nil && next2 != nil:
-			uComb[i] = u2[i]
-		case next1 != nil && next2 != nil:
+		case next2 == nil: // u1 must be non-nil
+			uComb[i] = next1
+		case next1 == nil: // u2 must be non-nil
+			uComb[i] = next2
+		default: // neither is nil, have to merge
 			if i > 0 && next1 == u1[i-1] && next2 == u2[i-1] {
-				uComb[i] = uComb[i-1]
+				uComb[i] = uComb[i-1] // dupe of previous step - this happens a lot
 			} else {
 				var comboNext []*faState
 				for _, nextStep1 := range next1.states {

diff --git a/prettyprinter.go b/prettyprinter.go
@@ -157,8 +157,6 @@ func (pp *prettyPrinter) nextString(n *faNext) string {
 func branchChar(b byte) string {
 	switch b {
 	// TODO: Figure out how to test commented-out cases
-	case 0:
-		return "∅"
 	case valueTerminator:
 		return "ℵ"
 	default:

diff --git a/shell_style_test.go b/shell_style_test.go
@@ -110,11 +110,11 @@ func TestWildCardRuler(t *testing.T) {
 		t.Error("Missed on r2")
 	}
 	matches, _ = cm.matchesForJSONEvent([]byte("{\"b\" : \"dexeff\"}"))
-	if len(matches) != 2 || (!containsX(matches, "r2")) || !containsX(matches, "r3") {
+	if len(matches) != 2 || (!containsX(matches, "r2", "r3")) {
 		t.Error("Missed on r2/r3")
 	}
 	matches, _ = cm.matchesForJSONEvent([]byte("{\"c\" : \"xyzzz\"}"))
-	if len(matches) != 2 || (!containsX(matches, "r4")) || !containsX(matches, "r5") {
+	if len(matches) != 2 || (!containsX(matches, "r4", "r5")) {
 		t.Error("Missed on r4/r5")
 	}
 	matches, _ = cm.matchesForJSONEvent([]byte("{\"d\" : \"12345\"}"))
@@ -174,7 +174,12 @@ func TestShellStyleBuildTime(t *testing.T) {
 			t.Error("AddP: " + err.Error())
 		}
 	}
-	fmt.Println(matcherStats(q.matcher.(*coreMatcher)))
+	cm := q.matcher.(*coreMatcher)
+
+	fmt.Println(matcherStats(cm))
+	cm.analyze()
+	fmt.Printf("MaxP: %d\n", cm.fields().nfaMeta.maxOutDegree)
+
 	// make sure that all the words actually are matched
 	before := time.Now()
 	for _, word := range words {

diff --git a/small_table.go b/small_table.go
@@ -100,6 +100,20 @@ func makeSmallTable(defaultStep *faNext, indices []byte, steps []*faNext) *small
 	return &t
 }
 
+func (t *smallTable) gatherMetadata(meta *nfaMetadata) {
+	eps := len(t.epsilon)
+	for _, step := range t.steps {
+		if step != nil {
+			if (eps + len(step.states)) > meta.maxOutDegree {
+				meta.maxOutDegree = eps + len(step.states)
+			}
+			for _, state := range step.states {
+				state.table.gatherMetadata(meta)
+			}
+		}
+	}
+}
+
 // unpackedTable replicates the data in the smallTable ceilings and states arrays.  It's quite hard to
 // update the list structure in a smallTable, but trivial in an unpackedTable.  The idea is that to update
 // a smallTable you unpack it, update, then re-pack it.  Not gonna be the most efficient thing so at some future point…

diff --git a/value_matcher.go b/value_matcher.go
@@ -166,6 +166,13 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
 	return nextField
 }
 
+func (m *valueMatcher) gatherMetadata(meta *nfaMetadata) {
+	start := m.fields().startTable
+	if start != nil {
+		start.gatherMetadata(meta)
+	}
+}
+
 // TODO: make these simple FA builders iterative not recursive, this will recurse as deep as the longest string match
 
 func makePrefixFA(val []byte) (*smallTable, *fieldMatcher) {