From 36e31d717b8356b4612f18541c53d6777dcf0602 Mon Sep 17 00:00:00 2001
From: Tim Bray <tbray@textuality.com>
Date: Tue, 28 Jun 2022 12:05:56 -0700
Subject: [PATCH] chore: cleanup after NFA-DFA refactor

Closes: #67
Signed-off-by: Tim Bray <tbray@textuality.com>
---
 README.md           | 39 +++++++++++++++++++++----
 field_matcher.go    | 14 ++++-----
 flatten_json.go     | 13 ++++-----
 flattener.go        |  2 +-
 list_maker.go       | 10 +++----
 match_set.go        |  3 +-
 quamina.go          |  2 +-
 shell_style_test.go |  8 ++----
 small_table.go      | 69 ++-------------------------------------------
 value_matcher.go    | 24 +++++++++-------
 10 files changed, 75 insertions(+), 109 deletions(-)
diff --git a/README.md b/README.md
index f1c6c93..e2cdd9a 100644
--- a/README.md
+++ b/README.md
@@ -268,15 +268,37 @@ you’ve created a Quamina instance, whether through
 `New()` or `Copy()`, keep it around and run as many
 Events through it as is practical.
 
-
-### Performance
+### `AddPattern()` Performance
+
+In **most** cases, tens of thousands of Patterns per second can 
+be added to a Quamina instance; the in-memory data structure will
+become larger, but not unreasonably so. The amount of of
+available memory is the only significant limit to the 
+number of patterns an instance can carry.
+
+The exception is `shellstyle` Patterns. Adding many of these
+can rapidly lead to degradation in elapsed time and memory
+consumption, at a rate which is uneven but at worst 
+O(2<sup>N</sup>) in the number of patterns. A fuzz test
+which adds random 5-letter words with a `*` at a random
+location slows to a crawl after 30 or so `AddPattern()`
+calls, with the Quamina instance having many millions of
+states. Note that such instances, once built, can still
+match Events at high speeds.
+
+This is after some optimization. It is possible there is a
+bug such that automaton-building is unduly wasteful but it
+may remain the case that adding this flavor of Pattern is
+simply not something that can be done at large scale.
+
+### `MatchesForEvent()` Performance
 
 I used to say that the performance of
-`MatchesForEvent` was `O(1)` in the number of
+`MatchesForEvent` was O(1) in the number of
 Patterns. That’s probably a reasonable way to think
 about it, because it’s *almost* right. 
 
-To be correct, the performance is `O(N)` where `N` is 
+To be correct, the performance is O(N) where N is 
 the number of unique fields that appear in all the Patterns 
 that have been added to Quamina.  
 
@@ -315,9 +337,16 @@ is at most N, the number of fields left after discarding.
 
 Thus, adding a new Pattern that only
 mentions fields which are already mentioned in previous
-Patterns is effectively free i.e. `O(1)` in terms of run-time
+Patterns is effectively free i.e. O(1) in terms of run-time
 performance.
 
+### Further documentation
+
+There is a series of blog posts entitled
+[Quamina Diary](https://www.tbray.org/ongoing/What/Technology/Quamina%20Diary/)
+that provides a detailed discussion of the design decisions
+at a length unsuitable for in-code comments.
+
 ### Name
 
 From Wikipedia: Quamina Gladstone (1778 – 16 September
diff --git a/field_matcher.go b/field_matcher.go
index b86d025..9ada5d3 100644
--- a/field_matcher.go
+++ b/field_matcher.go
@@ -3,14 +3,14 @@ package quamina
 import "sync/atomic"
 
 // fieldMatcher represents a state in the matching automaton, which matches field names and dispatches to
-//  valueMatcher to complete matching of field values.  fieldMatcher has a map which is keyed by the
-//  field pathSegments values that can start transitions from this matcher; for each such field, there is a
-//  valueMatcher which, given the field's value, determines whether the automaton progresses to another fieldMatcher
+// valueMatcher to complete matching of field values.  fieldMatcher has a map which is keyed by the
+// field pathSegments values that can start transitions from this matcher; for each such field, there is a
+// valueMatcher which, given the field's value, determines whether the automaton progresses to another fieldMatcher
 // matches contains the X values that arrival at this state implies have matched
 // existsFalseFailures reports the condition that traversal has occurred by matching a field which is named in an
-//  exists:false pattern, and the named X's should be subtracted from the matches list being built up by a match project
+// exists:false pattern, and the named X's should be subtracted from the matches list being built up by a match project
 // the fields that hold state are segregated in updateable so they can be replaced atomically and make the matcher
-//  thread-safe.
+// thread-safe.
 type fieldMatcher struct {
 	updateable atomic.Value // always holds an *fmFields
 }
@@ -20,8 +20,8 @@ type fmFields struct {
 	existsFalseFailures *matchSet
 }
 
-// fields / update / addExistsFalseFailure / addMatch exist to insuleate callers from dealing with
-//  the atomic Load/Store business
+// fields / update / addExistsFalseFailure / addMatch exist to insulate callers from dealing with
+// the atomic Load/Store business
 func (m *fieldMatcher) fields() *fmFields {
 	return m.updateable.Load().(*fmFields)
 }
diff --git a/flatten_json.go b/flatten_json.go
index 40697c2..c19e2ce 100644
--- a/flatten_json.go
+++ b/flatten_json.go
@@ -7,14 +7,13 @@ import (
 )
 
 // flattenJSON is a custom non-general-purpose JSON parser whose object is to implement Flattener and produce a []Field
-//  list from a JSON object.  This could be done (and originally was) with the built-in encoding/json, but the
-//  performance was unsatisfactory (99% of time spent parsing events < 1% matching them). The profiler suggests
-//  that the performance issue was mostly due to excessive memory allocation.
+// list from a JSON object.  This could be done (and originally was) with the built-in encoding/json, but the
+// performance was unsatisfactory (99% of time spent parsing events < 1% matching them). The profiler suggests
+// that the performance issue was mostly due to excessive memory allocation.
 // If we assume that the event is immutable while we're working, then all the pieces of it that constitute
-//  the fields & values can be represented as []byte slices using a couple of offsets into the underlying event.
-//  There is an exception, namely strings that contain \-prefixed JSON escapes; since we want to work with the
-//  actual UTF-8 bytes, this requires re-writing such strings into memory we have to allocate.
-// TODO: There are gaps in the unit-test coverage, including nearly all the error conditions
+// the fields & values can be represented as []byte slices using a couple of offsets into the underlying event.
+// There is an exception, namely strings that contain \-prefixed JSON escapes; since we want to work with the
+// actual UTF-8 bytes, this requires re-writing such strings into memory we have to allocate.
 type flattenJSON struct {
 	event      []byte      // event being processed, treated as immutable
 	eventIndex int         // current byte index into the event
diff --git a/flattener.go b/flattener.go
index 3698f70..160110f 100644
--- a/flattener.go
+++ b/flattener.go
@@ -1,6 +1,6 @@
 package quamina
 
-// Flattener is interface which provides methods to turn a data structure into a list of path-names and
+// Flattener is an interface which provides methods to turn a data structure into a list of path-names and
 // values. The following example illustrates how it works for a JSON object:
 //  { "a": 1, "b": "two", "c": true", "d": nil, "e": { "e1": 2, "e2":, 3.02e-5} "f": [33, "x"]} }
 // should produce
diff --git a/list_maker.go b/list_maker.go
index 375d328..078c5fa 100644
--- a/list_maker.go
+++ b/list_maker.go
@@ -1,10 +1,10 @@
 package quamina
 
-// this needs to exist so that all all the lists containing a single step to X, or the triple step to X,Y,Z are the
-// same list, so that pack/unpack work properly. In a large majority of cases, there's only one step in the list, so
-// those are handled straightforwardly with a map. Otherwise, we laboriously look through all the lists for a match.
-// In Java I'd implement a hashCode() method and everything would be a hash, but I haven't learned yet what the Go
-// equivalent is.
+// this needs to exist so that all all the lists containing a single step to X are the same list, and similarly all
+// those containing the triple step to X,Y,Z are the same list, so that pack/unpack work properly. In a large majority
+// of cases, there's only one step in the list, so those are handled straightforwardly with a map. Otherwise, we
+// laboriously look through all the lists for a match. In Java I'd implement a hashCode() method and everything
+// would be a hash, but I haven't learned yet what the Go equivalent is.
 type dfaMemory struct {
 	singletons map[*nfaStep]*dfaStep
 	plurals    []perList
diff --git a/match_set.go b/match_set.go
index b320f3c..05fe38e 100644
--- a/match_set.go
+++ b/match_set.go
@@ -1,7 +1,7 @@
 package quamina
 
 // matchSet is what it says on the tin; implements a set semantic on matches, which are of type X. These could all
-//  be implemented as match[X]bool but this makes the calling code more readable.
+// be implemented as match[X]bool but this makes the calling code more readable.
 type matchSet struct {
 	set map[X]bool
 }
@@ -11,6 +11,7 @@ func newMatchSet() *matchSet {
 }
 
 func (m *matchSet) addX(exes ...X) *matchSet {
+	// for concurrency, can't update in place
 	newSet := make(map[X]bool, len(m.set)+1)
 	for k := range m.set {
 		newSet[k] = true
diff --git a/quamina.go b/quamina.go
index 49fed8e..1fd54b7 100644
--- a/quamina.go
+++ b/quamina.go
@@ -119,7 +119,7 @@ func (q *Quamina) Copy() *Quamina {
 
 // X is used in the AddPattern and MatchesForEvent APIs to identify the patterns that are added to
 // a Quamina instance and are reported by that instance as matching an event. Commonly, X is a string
-// used to name the event.
+// used to name the pattern.
 type X any
 
 // AddPattern - adds a pattern, identified by the x argument, to a Quamina instance.
diff --git a/shell_style_test.go b/shell_style_test.go
index def6674..40982dd 100644
--- a/shell_style_test.go
+++ b/shell_style_test.go
@@ -2,6 +2,7 @@ package quamina
 
 import (
 	"fmt"
+	"math/rand"
 	"strings"
 	"testing"
 )
@@ -84,8 +85,7 @@ func TestMakeShellStyleAutomaton(t *testing.T) {
 	}
 }
 
-/* To be used in profiling AddPattern for patterns which need NFAs
-func xTestShellStyleBuildTime(t *testing.T) {
+func TestShellStyleBuildTime(t *testing.T) {
 	words := readWWords(t)
 	starWords := make([]string, 0, len(words))
 	patterns := make([]string, 0, len(words))
@@ -107,7 +107,6 @@ func xTestShellStyleBuildTime(t *testing.T) {
 	}
 	fmt.Println(matcherStats(q.matcher.(*coreMatcher)))
 }
-*/
 
 func TestMixedPatterns(t *testing.T) {
 	// let's mix up some prefix, infix, suffix, and exact-match searches
@@ -123,9 +122,6 @@ func TestMixedPatterns(t *testing.T) {
 		`"ZOE"`:     19,
 		`"CRYSTAL"`: 6,
 	}
-	x1, _ := makeShellStyleAutomaton([]byte(`"*ST"`), nil)
-	x2, _ := makeShellStyleAutomaton([]byte(`"*TH"`), nil)
-	mergeNfas(x1, x2)
 
 	stringTemplate := `{"properties": { "STREET": [ XX ] } }`
 	shellTemplate := `{"properties": {"STREET":[ {"shellstyle": XX} ] } }`
diff --git a/small_table.go b/small_table.go
index 0c5c005..8fffc72 100644
--- a/small_table.go
+++ b/small_table.go
@@ -38,7 +38,7 @@ const valueTerminator byte = 0xf5
 //  but I imagine organizing it this way is a bit more memory-efficient.  Suppose we want to model a table where
 //  byte values 3 and 4 map to ss1 and byte 0x34 maps to ss2.  Then the smallTable would look like:
 //  ceilings:--|3|----|5|-|0x34|--|x35|-|byteCeiling|
-//  steps:---|nil|-|&ss1|--|ni|--|&ss2|---------|nil|
+//  steps:---|nil|-|&ss1|--|nil|-|&ss2|---------|nil|
 //  invariant: The last element of ceilings is always byteCeiling
 // The motivation is that we want to build a state machine on byte values to implement things like prefixes and
 // ranges of bytes.  This could be done simply with an array of size byteCeiling for each state in the machine,
@@ -133,7 +133,7 @@ func mergeOneDfaStep(step1, step2 *dfaStep, memoize map[dfaStepKey]*dfaStep) *df
 			uComb[i] = stepNew
 		case stepExisting != nil && stepNew != nil:
 			// there are considerable runs of the same value
-			if i > 1 && stepExisting == uExisting[i-1] && stepNew == uNew[i-1] {
+			if i > 0 && stepExisting == uExisting[i-1] && stepNew == uNew[i-1] {
 				uComb[i] = uComb[i-1]
 			} else {
 				uComb[i] = mergeOneDfaStep(stepExisting, stepNew, memoize)
@@ -148,7 +148,7 @@ func mergeOneDfaStep(step1, step2 *dfaStep, memoize map[dfaStepKey]*dfaStep) *df
 // transitions in the NFA because, as of the time of writing, none of the
 // pattern-matching required those transitions.  It is based on the algorithm
 // taught in the TU München course “Automata and Formal Languages”, lecturer
-// Prof. Dr.Ernst W. Mayr in 2014-15, in particular the examples appearing in
+// Prof. Dr. Ernst W. Mayr in 2014-15, in particular the examples appearing in
 // http://wwwmayr.informatik.tu-muenchen.de/lehre/2014WS/afs/2014-10-14.pdf
 // especially the slide in Example 11.
 //
@@ -208,69 +208,6 @@ func nfaStep2DfaStep(stepList *nfaStepList, memoize *dfaMemory) *dfaStep {
 	return dStep
 }
 
-type nfaStepKey struct {
-	step1 *nfaStep
-	step2 *nfaStep
-}
-
-func mergeNfas(nfa1, nfa2 *smallTable[*nfaStepList]) *smallTable[*nfaStepList] {
-	step1 := &nfaStep{table: nfa1}
-	step2 := &nfaStep{table: nfa2}
-	return mergeOneNfaStep(step1, step2, make(map[nfaStepKey]*nfaStep), newListMaker(), 0).table
-}
-
-func mergeOneNfaStep(step1, step2 *nfaStep, memoize map[nfaStepKey]*nfaStep, lister *listMaker, depth int) *nfaStep {
-	var combined *nfaStep
-	mKey := nfaStepKey{step1: step1, step2: step2}
-	combined, ok := memoize[mKey]
-	if ok {
-		return combined
-	}
-
-	newTable := newSmallTable[*nfaStepList]()
-	switch {
-	case step1.fieldTransitions == nil && step2.fieldTransitions == nil:
-		combined = &nfaStep{table: newTable}
-	case step1.fieldTransitions != nil && step2.fieldTransitions != nil:
-		transitions := append(step1.fieldTransitions, step2.fieldTransitions...)
-		combined = &nfaStep{table: newTable, fieldTransitions: transitions}
-	case step1.fieldTransitions != nil && step2.fieldTransitions == nil:
-		combined = &nfaStep{table: newTable, fieldTransitions: step1.fieldTransitions}
-	case step1.fieldTransitions == nil && step2.fieldTransitions != nil:
-		combined = &nfaStep{table: newTable, fieldTransitions: step2.fieldTransitions}
-	}
-	memoize[mKey] = combined
-
-	u1 := unpackTable(step1.table)
-	u2 := unpackTable(step2.table)
-	var uComb unpackedTable[*nfaStepList]
-	for i, list1 := range u1 {
-		list2 := u2[i]
-		switch {
-		case list1 == nil && list2 == nil:
-			uComb[i] = nil
-		case list1 != nil && list2 == nil:
-			uComb[i] = u1[i]
-		case list1 == nil && list2 != nil:
-			uComb[i] = u2[i]
-		case list1 != nil && list2 != nil:
-			var comboList []*nfaStep
-			for _, nextStep1 := range list1.steps {
-				for _, nextStep2 := range list2.steps {
-					merged := mergeOneNfaStep(nextStep1, nextStep2, memoize, lister, depth+1)
-					comboList = append(comboList, merged)
-				}
-			}
-			uComb[i] = lister.getList(comboList...)
-		}
-	}
-	combined.table.pack(&uComb)
-	return combined
-}
-
-// TODO: Clean up from here on down - too many funcs doing about the same thing, and also it seems that
-//  we never want to have more than one "range", which is the whole table.
-
 // makeSmallDfaTable creates a pre-loaded small table, with all bytes not otherwise specified having the defaultStep
 //  value, and then a few other values with their indexes and values specified in the other two arguments. The
 //  goal is to reduce memory churn
diff --git a/value_matcher.go b/value_matcher.go
index 4c716db..e43ab51 100644
--- a/value_matcher.go
+++ b/value_matcher.go
@@ -8,16 +8,21 @@ import (
 // valueMatcher represents a byte-driven automaton.  The table needs to be the
 // equivalent of a map[byte]nextState and is represented by smallTable. Some
 // patterns can be represented by a deterministic finite automaton (DFA) but
-// others, particularly with a regex failure, need to be represented by a
-// nondeterministic finite automaton (NFA). NFAs trump DFAs so if a valueMatcher
-// has one, it must be used in preference to other alternatives. In some cases
-// there is only one byte sequence forward from a state, i.e. a string-valued
-// field with only one string match. In this case, the DFA and NFA will b null
-// and the value being matched has to exactly equal the singletonMatch field; if
-// so, the singletonTransition is the return value. This is to avoid having a
-// long chain of smallTables each with only one entry.
+// others, particularly with a regex flavor, need to be represented by a
+// nondeterministic finite automaton (NFA).  NFAs are converted to DFAs for
+// simplicity and efficiency. The basic algorithm is to compute the automaton
+// for a pattern, convert it to a DFA if necessary, and merge with any
+// existing DFA.
+// In some (common) cases there is only one byte sequence forward from a state,
+// i.e. a string-valued field with only one string match. In this case, the DFA
+// will be null and the value being matched has to exactly equal the singletonMatch
+// field; if so, the singletonTransition is the return value. This is to avoid
+// having a long chain of smallTables each with only one entry.
+// To allow for concurrent access between one thread running AddPattern and many
+// others running MatchesForEvent, the valueMatcher payload is stored in an
+// atomic.Value
 type valueMatcher struct {
-	updateable atomic.Value
+	updateable atomic.Value // always contains *vmFields
 }
 type vmFields struct {
 	startDfa            *smallTable[*dfaStep]
@@ -83,7 +88,6 @@ func transitionDfa(table *smallTable[*dfaStep], val []byte, transitions []*field
 		}
 
 		transitions = append(transitions, step.fieldTransitions...)
-
 		table = step.table
 	}