From 36e31d717b8356b4612f18541c53d6777dcf0602 Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Tue, 28 Jun 2022 12:05:56 -0700 Subject: [PATCH] chore: cleanup after NFA-DFA refactor Closes: #67 Signed-off-by: Tim Bray --- README.md | 39 +++++++++++++++++++++---- field_matcher.go | 14 ++++----- flatten_json.go | 13 ++++----- flattener.go | 2 +- list_maker.go | 10 +++---- match_set.go | 3 +- quamina.go | 2 +- shell_style_test.go | 8 ++---- small_table.go | 69 ++------------------------------------------- value_matcher.go | 24 +++++++++------- 10 files changed, 75 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index f1c6c93..e2cdd9a 100644 --- a/README.md +++ b/README.md @@ -268,15 +268,37 @@ you’ve created a Quamina instance, whether through `New()` or `Copy()`, keep it around and run as many Events through it as is practical. - -### Performance +### `AddPattern()` Performance + +In **most** cases, tens of thousands of Patterns per second can +be added to a Quamina instance; the in-memory data structure will +become larger, but not unreasonably so. The amount of of +available memory is the only significant limit to the +number of patterns an instance can carry. + +The exception is `shellstyle` Patterns. Adding many of these +can rapidly lead to degradation in elapsed time and memory +consumption, at a rate which is uneven but at worst +O(2N) in the number of patterns. A fuzz test +which adds random 5-letter words with a `*` at a random +location slows to a crawl after 30 or so `AddPattern()` +calls, with the Quamina instance having many millions of +states. Note that such instances, once built, can still +match Events at high speeds. + +This is after some optimization. It is possible there is a +bug such that automaton-building is unduly wasteful but it +may remain the case that adding this flavor of Pattern is +simply not something that can be done at large scale. + +### `MatchesForEvent()` Performance I used to say that the performance of -`MatchesForEvent` was `O(1)` in the number of +`MatchesForEvent` was O(1) in the number of Patterns. That’s probably a reasonable way to think about it, because it’s *almost* right. -To be correct, the performance is `O(N)` where `N` is +To be correct, the performance is O(N) where N is the number of unique fields that appear in all the Patterns that have been added to Quamina. @@ -315,9 +337,16 @@ is at most N, the number of fields left after discarding. Thus, adding a new Pattern that only mentions fields which are already mentioned in previous -Patterns is effectively free i.e. `O(1)` in terms of run-time +Patterns is effectively free i.e. O(1) in terms of run-time performance. +### Further documentation + +There is a series of blog posts entitled +[Quamina Diary](https://www.tbray.org/ongoing/What/Technology/Quamina%20Diary/) +that provides a detailed discussion of the design decisions +at a length unsuitable for in-code comments. + ### Name From Wikipedia: Quamina Gladstone (1778 – 16 September diff --git a/field_matcher.go b/field_matcher.go index b86d025..9ada5d3 100644 --- a/field_matcher.go +++ b/field_matcher.go @@ -3,14 +3,14 @@ package quamina import "sync/atomic" // fieldMatcher represents a state in the matching automaton, which matches field names and dispatches to -// valueMatcher to complete matching of field values. fieldMatcher has a map which is keyed by the -// field pathSegments values that can start transitions from this matcher; for each such field, there is a -// valueMatcher which, given the field's value, determines whether the automaton progresses to another fieldMatcher +// valueMatcher to complete matching of field values. fieldMatcher has a map which is keyed by the +// field pathSegments values that can start transitions from this matcher; for each such field, there is a +// valueMatcher which, given the field's value, determines whether the automaton progresses to another fieldMatcher // matches contains the X values that arrival at this state implies have matched // existsFalseFailures reports the condition that traversal has occurred by matching a field which is named in an -// exists:false pattern, and the named X's should be subtracted from the matches list being built up by a match project +// exists:false pattern, and the named X's should be subtracted from the matches list being built up by a match project // the fields that hold state are segregated in updateable so they can be replaced atomically and make the matcher -// thread-safe. +// thread-safe. type fieldMatcher struct { updateable atomic.Value // always holds an *fmFields } @@ -20,8 +20,8 @@ type fmFields struct { existsFalseFailures *matchSet } -// fields / update / addExistsFalseFailure / addMatch exist to insuleate callers from dealing with -// the atomic Load/Store business +// fields / update / addExistsFalseFailure / addMatch exist to insulate callers from dealing with +// the atomic Load/Store business func (m *fieldMatcher) fields() *fmFields { return m.updateable.Load().(*fmFields) } diff --git a/flatten_json.go b/flatten_json.go index 40697c2..c19e2ce 100644 --- a/flatten_json.go +++ b/flatten_json.go @@ -7,14 +7,13 @@ import ( ) // flattenJSON is a custom non-general-purpose JSON parser whose object is to implement Flattener and produce a []Field -// list from a JSON object. This could be done (and originally was) with the built-in encoding/json, but the -// performance was unsatisfactory (99% of time spent parsing events < 1% matching them). The profiler suggests -// that the performance issue was mostly due to excessive memory allocation. +// list from a JSON object. This could be done (and originally was) with the built-in encoding/json, but the +// performance was unsatisfactory (99% of time spent parsing events < 1% matching them). The profiler suggests +// that the performance issue was mostly due to excessive memory allocation. // If we assume that the event is immutable while we're working, then all the pieces of it that constitute -// the fields & values can be represented as []byte slices using a couple of offsets into the underlying event. -// There is an exception, namely strings that contain \-prefixed JSON escapes; since we want to work with the -// actual UTF-8 bytes, this requires re-writing such strings into memory we have to allocate. -// TODO: There are gaps in the unit-test coverage, including nearly all the error conditions +// the fields & values can be represented as []byte slices using a couple of offsets into the underlying event. +// There is an exception, namely strings that contain \-prefixed JSON escapes; since we want to work with the +// actual UTF-8 bytes, this requires re-writing such strings into memory we have to allocate. type flattenJSON struct { event []byte // event being processed, treated as immutable eventIndex int // current byte index into the event diff --git a/flattener.go b/flattener.go index 3698f70..160110f 100644 --- a/flattener.go +++ b/flattener.go @@ -1,6 +1,6 @@ package quamina -// Flattener is interface which provides methods to turn a data structure into a list of path-names and +// Flattener is an interface which provides methods to turn a data structure into a list of path-names and // values. The following example illustrates how it works for a JSON object: // { "a": 1, "b": "two", "c": true", "d": nil, "e": { "e1": 2, "e2":, 3.02e-5} "f": [33, "x"]} } // should produce diff --git a/list_maker.go b/list_maker.go index 375d328..078c5fa 100644 --- a/list_maker.go +++ b/list_maker.go @@ -1,10 +1,10 @@ package quamina -// this needs to exist so that all all the lists containing a single step to X, or the triple step to X,Y,Z are the -// same list, so that pack/unpack work properly. In a large majority of cases, there's only one step in the list, so -// those are handled straightforwardly with a map. Otherwise, we laboriously look through all the lists for a match. -// In Java I'd implement a hashCode() method and everything would be a hash, but I haven't learned yet what the Go -// equivalent is. +// this needs to exist so that all all the lists containing a single step to X are the same list, and similarly all +// those containing the triple step to X,Y,Z are the same list, so that pack/unpack work properly. In a large majority +// of cases, there's only one step in the list, so those are handled straightforwardly with a map. Otherwise, we +// laboriously look through all the lists for a match. In Java I'd implement a hashCode() method and everything +// would be a hash, but I haven't learned yet what the Go equivalent is. type dfaMemory struct { singletons map[*nfaStep]*dfaStep plurals []perList diff --git a/match_set.go b/match_set.go index b320f3c..05fe38e 100644 --- a/match_set.go +++ b/match_set.go @@ -1,7 +1,7 @@ package quamina // matchSet is what it says on the tin; implements a set semantic on matches, which are of type X. These could all -// be implemented as match[X]bool but this makes the calling code more readable. +// be implemented as match[X]bool but this makes the calling code more readable. type matchSet struct { set map[X]bool } @@ -11,6 +11,7 @@ func newMatchSet() *matchSet { } func (m *matchSet) addX(exes ...X) *matchSet { + // for concurrency, can't update in place newSet := make(map[X]bool, len(m.set)+1) for k := range m.set { newSet[k] = true diff --git a/quamina.go b/quamina.go index 49fed8e..1fd54b7 100644 --- a/quamina.go +++ b/quamina.go @@ -119,7 +119,7 @@ func (q *Quamina) Copy() *Quamina { // X is used in the AddPattern and MatchesForEvent APIs to identify the patterns that are added to // a Quamina instance and are reported by that instance as matching an event. Commonly, X is a string -// used to name the event. +// used to name the pattern. type X any // AddPattern - adds a pattern, identified by the x argument, to a Quamina instance. diff --git a/shell_style_test.go b/shell_style_test.go index def6674..40982dd 100644 --- a/shell_style_test.go +++ b/shell_style_test.go @@ -2,6 +2,7 @@ package quamina import ( "fmt" + "math/rand" "strings" "testing" ) @@ -84,8 +85,7 @@ func TestMakeShellStyleAutomaton(t *testing.T) { } } -/* To be used in profiling AddPattern for patterns which need NFAs -func xTestShellStyleBuildTime(t *testing.T) { +func TestShellStyleBuildTime(t *testing.T) { words := readWWords(t) starWords := make([]string, 0, len(words)) patterns := make([]string, 0, len(words)) @@ -107,7 +107,6 @@ func xTestShellStyleBuildTime(t *testing.T) { } fmt.Println(matcherStats(q.matcher.(*coreMatcher))) } -*/ func TestMixedPatterns(t *testing.T) { // let's mix up some prefix, infix, suffix, and exact-match searches @@ -123,9 +122,6 @@ func TestMixedPatterns(t *testing.T) { `"ZOE"`: 19, `"CRYSTAL"`: 6, } - x1, _ := makeShellStyleAutomaton([]byte(`"*ST"`), nil) - x2, _ := makeShellStyleAutomaton([]byte(`"*TH"`), nil) - mergeNfas(x1, x2) stringTemplate := `{"properties": { "STREET": [ XX ] } }` shellTemplate := `{"properties": {"STREET":[ {"shellstyle": XX} ] } }` diff --git a/small_table.go b/small_table.go index 0c5c005..8fffc72 100644 --- a/small_table.go +++ b/small_table.go @@ -38,7 +38,7 @@ const valueTerminator byte = 0xf5 // but I imagine organizing it this way is a bit more memory-efficient. Suppose we want to model a table where // byte values 3 and 4 map to ss1 and byte 0x34 maps to ss2. Then the smallTable would look like: // ceilings:--|3|----|5|-|0x34|--|x35|-|byteCeiling| -// steps:---|nil|-|&ss1|--|ni|--|&ss2|---------|nil| +// steps:---|nil|-|&ss1|--|nil|-|&ss2|---------|nil| // invariant: The last element of ceilings is always byteCeiling // The motivation is that we want to build a state machine on byte values to implement things like prefixes and // ranges of bytes. This could be done simply with an array of size byteCeiling for each state in the machine, @@ -133,7 +133,7 @@ func mergeOneDfaStep(step1, step2 *dfaStep, memoize map[dfaStepKey]*dfaStep) *df uComb[i] = stepNew case stepExisting != nil && stepNew != nil: // there are considerable runs of the same value - if i > 1 && stepExisting == uExisting[i-1] && stepNew == uNew[i-1] { + if i > 0 && stepExisting == uExisting[i-1] && stepNew == uNew[i-1] { uComb[i] = uComb[i-1] } else { uComb[i] = mergeOneDfaStep(stepExisting, stepNew, memoize) @@ -148,7 +148,7 @@ func mergeOneDfaStep(step1, step2 *dfaStep, memoize map[dfaStepKey]*dfaStep) *df // transitions in the NFA because, as of the time of writing, none of the // pattern-matching required those transitions. It is based on the algorithm // taught in the TU München course “Automata and Formal Languages”, lecturer -// Prof. Dr.Ernst W. Mayr in 2014-15, in particular the examples appearing in +// Prof. Dr. Ernst W. Mayr in 2014-15, in particular the examples appearing in // http://wwwmayr.informatik.tu-muenchen.de/lehre/2014WS/afs/2014-10-14.pdf // especially the slide in Example 11. // @@ -208,69 +208,6 @@ func nfaStep2DfaStep(stepList *nfaStepList, memoize *dfaMemory) *dfaStep { return dStep } -type nfaStepKey struct { - step1 *nfaStep - step2 *nfaStep -} - -func mergeNfas(nfa1, nfa2 *smallTable[*nfaStepList]) *smallTable[*nfaStepList] { - step1 := &nfaStep{table: nfa1} - step2 := &nfaStep{table: nfa2} - return mergeOneNfaStep(step1, step2, make(map[nfaStepKey]*nfaStep), newListMaker(), 0).table -} - -func mergeOneNfaStep(step1, step2 *nfaStep, memoize map[nfaStepKey]*nfaStep, lister *listMaker, depth int) *nfaStep { - var combined *nfaStep - mKey := nfaStepKey{step1: step1, step2: step2} - combined, ok := memoize[mKey] - if ok { - return combined - } - - newTable := newSmallTable[*nfaStepList]() - switch { - case step1.fieldTransitions == nil && step2.fieldTransitions == nil: - combined = &nfaStep{table: newTable} - case step1.fieldTransitions != nil && step2.fieldTransitions != nil: - transitions := append(step1.fieldTransitions, step2.fieldTransitions...) - combined = &nfaStep{table: newTable, fieldTransitions: transitions} - case step1.fieldTransitions != nil && step2.fieldTransitions == nil: - combined = &nfaStep{table: newTable, fieldTransitions: step1.fieldTransitions} - case step1.fieldTransitions == nil && step2.fieldTransitions != nil: - combined = &nfaStep{table: newTable, fieldTransitions: step2.fieldTransitions} - } - memoize[mKey] = combined - - u1 := unpackTable(step1.table) - u2 := unpackTable(step2.table) - var uComb unpackedTable[*nfaStepList] - for i, list1 := range u1 { - list2 := u2[i] - switch { - case list1 == nil && list2 == nil: - uComb[i] = nil - case list1 != nil && list2 == nil: - uComb[i] = u1[i] - case list1 == nil && list2 != nil: - uComb[i] = u2[i] - case list1 != nil && list2 != nil: - var comboList []*nfaStep - for _, nextStep1 := range list1.steps { - for _, nextStep2 := range list2.steps { - merged := mergeOneNfaStep(nextStep1, nextStep2, memoize, lister, depth+1) - comboList = append(comboList, merged) - } - } - uComb[i] = lister.getList(comboList...) - } - } - combined.table.pack(&uComb) - return combined -} - -// TODO: Clean up from here on down - too many funcs doing about the same thing, and also it seems that -// we never want to have more than one "range", which is the whole table. - // makeSmallDfaTable creates a pre-loaded small table, with all bytes not otherwise specified having the defaultStep // value, and then a few other values with their indexes and values specified in the other two arguments. The // goal is to reduce memory churn diff --git a/value_matcher.go b/value_matcher.go index 4c716db..e43ab51 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -8,16 +8,21 @@ import ( // valueMatcher represents a byte-driven automaton. The table needs to be the // equivalent of a map[byte]nextState and is represented by smallTable. Some // patterns can be represented by a deterministic finite automaton (DFA) but -// others, particularly with a regex failure, need to be represented by a -// nondeterministic finite automaton (NFA). NFAs trump DFAs so if a valueMatcher -// has one, it must be used in preference to other alternatives. In some cases -// there is only one byte sequence forward from a state, i.e. a string-valued -// field with only one string match. In this case, the DFA and NFA will b null -// and the value being matched has to exactly equal the singletonMatch field; if -// so, the singletonTransition is the return value. This is to avoid having a -// long chain of smallTables each with only one entry. +// others, particularly with a regex flavor, need to be represented by a +// nondeterministic finite automaton (NFA). NFAs are converted to DFAs for +// simplicity and efficiency. The basic algorithm is to compute the automaton +// for a pattern, convert it to a DFA if necessary, and merge with any +// existing DFA. +// In some (common) cases there is only one byte sequence forward from a state, +// i.e. a string-valued field with only one string match. In this case, the DFA +// will be null and the value being matched has to exactly equal the singletonMatch +// field; if so, the singletonTransition is the return value. This is to avoid +// having a long chain of smallTables each with only one entry. +// To allow for concurrent access between one thread running AddPattern and many +// others running MatchesForEvent, the valueMatcher payload is stored in an +// atomic.Value type valueMatcher struct { - updateable atomic.Value + updateable atomic.Value // always contains *vmFields } type vmFields struct { startDfa *smallTable[*dfaStep] @@ -83,7 +88,6 @@ func transitionDfa(table *smallTable[*dfaStep], val []byte, transitions []*field } transitions = append(transitions, step.fieldTransitions...) - table = step.table }