From cbaf72026561bcb8fd13f783a2d22ca8b7e5b547 Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Fri, 10 Jun 2022 13:53:48 -0700 Subject: [PATCH] api: implement anything-but Closes: #61 Signed-off-by: Tim Bray --- PATTERNS.md | 27 ++++---- README.md | 11 +++- anything_but.go | 143 ++++++++++++++++++++++++++++++++++++++++ anything_but_test.go | 147 ++++++++++++++++++++++++++++++++++++++++++ arrays_test.go | 2 +- benchmarks_test.go | 2 +- concurrency_test.go | 4 +- core_matcher.go | 12 ++-- core_matcher_test.go | 30 +++++++-- pattern.go | 15 +++-- pattern_test.go | 20 +++--- quamina.go | 2 +- shell_style_test.go | 4 +- value_matcher.go | 71 +++++++++++++------- value_matcher_test.go | 14 ++-- 15 files changed, 426 insertions(+), 78 deletions(-) create mode 100644 anything_but.go create mode 100644 anything_but_test.go diff --git a/PATTERNS.md b/PATTERNS.md index d3295de..5dd8399 100644 --- a/PATTERNS.md +++ b/PATTERNS.md @@ -60,14 +60,22 @@ value **MUST** be `true` or `false`. Here are two Exists Patterns that would match the Events above: ```json {"alpha": {"beta": [ {"exists": true} ]}} -``` -```json {"alpha": {"gamma": [ {"exists": false} ]}} ``` If a Field in a Pattern contains an Exists Pattern, it **MUST NOT** contain any other values. +### Anything-But Pattern + +The Pattern Type of an Anything-But Pattern is +`anything-but` and its value **MUST** be an array +of strings. It will match a string value which +is not equal to any of the strings in the array. + +If a Field in a Pattern contains an Anything-But Pattern, +it **MUST NOT** contain any other values. + ### Shellstyle Pattern The Pattern Type of a Shellstyle Pattern is `shellstyle` @@ -84,25 +92,20 @@ Consider the following Event: The following Shellstyle Patterns would match it: ```json {"img": [ {"shellstyle": "*.jpg"} ]} -``` -```json {"img": [ {"shellstyle": "https://example.com/*"} ]} -``` -```json {"img": [ {"shellstyle": "https://example.com/*.jpg"} ]} ``` - ## EventBridge Patterns Quamina’s Patterns are inspired by those offered by the AWS EventBridge service, as documented in [Amazon EventBridge event patterns](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-patterns.html). -As of release 0.1.1, Quamina supports Exists patterns -but does not yet support AWS’s `numeric`, `prefix`, or -`anything-but` patterns. Note that a Shellstyle -Pattern with a trailing `*` is equivalent to an AWS `prefix` -pattern. +As of release 0.1.1, Quamina supports Exists and +Anything-But patterns but does not yet support AWS’s +`numeric` or `prefix` patterns. Note that a +Shellstyle Pattern with a trailing `*` is equivalent +to a `prefix` pattern. diff --git a/README.md b/README.md index 8cb8aec..ca5573d 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ The following Patterns would match it: } } ``` - + ```json { "Image": { @@ -100,7 +100,6 @@ The following Patterns would match it: } } ``` - ```json { "Image": { @@ -108,7 +107,13 @@ The following Patterns would match it: [ { "shellstyle": "http://www.example.com/*9943" } ] } } } - +``` +```json +{ + "Image": { + "Title": [ {"anything-but": ["Pikachu", "Eevee"] } ] + } +} ``` The structure of a Pattern, in terms of field names and nesting, must be the same as the structure of the Event diff --git a/anything_but.go b/anything_but.go new file mode 100644 index 0000000..034ac47 --- /dev/null +++ b/anything_but.go @@ -0,0 +1,143 @@ +package quamina + +import ( + "encoding/json" + "errors" + "fmt" + "io" +) + +func readAnythingButSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) { + t, err := pb.jd.Token() + if err != nil { + return + } + pathVals = valsIn + fieldCount := 0 + delim, ok := t.(json.Delim) + if (!ok) || delim != '[' { + err = errors.New("value for anything-but must be an array") + return + } + done := false + val := typedVal{vType: anythingButType} + for !done { + t, err = pb.jd.Token() + if err == io.EOF { + err = errors.New("anything-but list truncated") + return + } else if err != nil { + return + } + switch tt := t.(type) { + case json.Delim: + if tt == ']' { + done = true + } else { + err = fmt.Errorf("spurious %c in anything-but list", tt) + } + case string: + fieldCount++ + val.list = append(val.list, []byte(`"`+tt+`"`)) + default: + err = errors.New("malformed anything-but list") + done = true + } + } + if err != nil { + return + } + if fieldCount == 0 { + err = errors.New("empty list in 'anything-but' pattern") + return + } + pathVals = append(pathVals, val) + + // this has to be a '}' or you're going to get an err from the tokenizer, so no point looking at the value + _, err = pb.jd.Token() + return +} + +// makeMultiFieldAnythingButAutomaton exists to handle constructs such as +// +// {"x": [ {"anything-but": [ "a", "b" ] } ] } +// +// Making a succession of anything-but automata for each of "a" and "b" and then merging them turns out not +// to work because what the caller means is really an AND - everything that matches neither "a" nor "b". So +// in principle we could intersect automata, which is probably the right answer, but for the moment we can +// build like makeAnythingButAutomaton but do it for several vals in parallel +func makeMultiAnythingButAutomaton(vals [][]byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) { + var nextField *fieldMatcher + if useThisTransition != nil { + nextField = useThisTransition + } else { + nextField = newFieldMatcher() + } + ret, _ := oneMultiAnythingButStep(vals, 0, nextField), nextField + return ret, nextField +} + +// oneMultiAnythingButStep - spookeh +func oneMultiAnythingButStep(vals [][]byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] { + success := &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}} + var u unpackedTable[*dfaStep] + for i := range u { + u[i] = success + } + // for the char at position 'index' in each val + nextSteps := make(map[byte][][]byte) + lastSteps := make(map[byte]bool) + for _, val := range vals { + lastIndex := len(val) - 1 + switch { + case index < lastIndex: + utf8Byte := val[index] + step := nextSteps[utf8Byte] + nextSteps[utf8Byte] = append(step, val) + case index == lastIndex: + lastSteps[val[index]] = true + case index > lastIndex: + // no-op + } + } + + for utf8Byte, valList := range nextSteps { + u[utf8Byte] = &dfaStep{table: oneMultiAnythingButStep(valList, index+1, nextField)} + } + for utf8Byte := range lastSteps { + lastStep := &dfaStep{table: newSmallTable[*dfaStep]()} // note no transition + u[utf8Byte] = &dfaStep{table: makeSmallDfaTable(success, []byte{valueTerminator}, []*dfaStep{lastStep})} + } + table := newSmallTable[*dfaStep]() + table.pack(&u) + return table +} + +/* +// makeAnythingButAutomaton produces a DFA that matches anything but the byte sequence in val. +// For each byte in val with value Z, we produce a table that leads to a nextField match on all non-Z values, +// and to another such table for Z. After all the bytes have matched, a match on valueTerminator leads to +// an empty table with no field Transitions, all others to a nexField match +// [no longer used but retaining for now] +func makeAnythingButAutomaton(val []byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) { + var nextField *fieldMatcher + if useThisTransition != nil { + nextField = useThisTransition + } else { + nextField = newFieldMatcher() + } + return oneAnythingButStep(val, 0, nextField), nextField +} + +func oneAnythingButStep(val []byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] { + var nextStep *dfaStep + success := &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}} + if index == len(val)-1 { + lastStep := &dfaStep{table: newSmallTable[*dfaStep]()} // note no transition + nextStep = &dfaStep{table: makeSmallDfaTable(success, []byte{valueTerminator}, []*dfaStep{lastStep})} + } else { + nextStep = &dfaStep{table: oneAnythingButStep(val, index+1, nextField)} + } + return makeSmallDfaTable(success, []byte{val[index]}, []*dfaStep{nextStep}) +} +*/ diff --git a/anything_but_test.go b/anything_but_test.go new file mode 100644 index 0000000..aef519a --- /dev/null +++ b/anything_but_test.go @@ -0,0 +1,147 @@ +package quamina + +import ( + "strings" + "testing" +) + +func TestAnythingButMerging(t *testing.T) { + pFoo := `{"z": [ "foo" ]}` + pAbFoot := `{"z": [ {"anything-but": [ "foot"] } ]}` + q, _ := New() + var err error + + // can merge with DFA? + err = q.AddPattern("pFoo", pFoo) + if err != nil { + t.Error("add pFoo") + } + err = q.AddPattern("pAbFoot", pAbFoot) + if err != nil { + t.Error("add pAbFoot: " + err.Error()) + } + var m []X + m, err = q.MatchesForEvent([]byte(`{"z": "foo"}`)) + if err != nil { + t.Error("m4E - foo: " + err.Error()) + } + if len(m) != 2 { + t.Errorf("len=%d?!?", len(m)) + } + m, err = q.MatchesForEvent([]byte(`{"z": "foot"}`)) + if err != nil { + t.Error("m4E - foo: " + err.Error()) + } + if len(m) != 0 { + t.Errorf("len=%d?!?", len(m)) + } + + // can merge with NFA? + pFooStar := `{"z": [ {"shellstyle": "foo*" } ]}` + q, _ = New() + err = q.AddPattern("pFooStar", pFooStar) + if err != nil { + t.Error("pFooStar: " + err.Error()) + } + err = q.AddPattern("pAbFoot", pAbFoot) + if err != nil { + t.Error("add pAbFoot: " + err.Error()) + } + m, err = q.MatchesForEvent([]byte(`{"z": "foo"}`)) + if err != nil { + t.Error("m4E: " + err.Error()) + } + if len(m) != 2 { + t.Errorf("len=%d?!?", len(m)) + } + m, err = q.MatchesForEvent([]byte(`{"z": "foot"}`)) + if err != nil { + t.Error("m4E: " + err.Error()) + } + if len(m) != 1 { + t.Errorf("len=%d?!?", len(m)) + } +} + +func TestAnythingButMatching(t *testing.T) { + q, _ := New() + // the idea is we're testing against all the 5-letter Wordle patterns, so we want a 4-letter prefix and + // suffix of an existing wordle, a 5-letter non-wordle, and a 6-letter where the wordle might match at the start + // and end. I tried to think of scenarios that would defeat the pretty-simple anything-but DFA but couldn't. + problemWords := []string{ + `"bloo"`, + `"aper"`, + `"fnord"`, + `"doubts"`, + `"astern"`, + } + pws := strings.Join(problemWords, ",") + pattern := `{"a": [ {"anything-but": [ ` + pws + `] } ] }"` + err := q.AddPattern(pattern, pattern) + if err != nil { + t.Error("AP: " + err.Error()) + } + words := readWWords(t) + template := `{"a": "XX"}` + problemTemplate := `{"a": XX}` + for _, word := range problemWords { + event := strings.ReplaceAll(problemTemplate, "XX", word) + matches, err := q.MatchesForEvent([]byte(event)) + if err != nil { + t.Error("on problem word: " + err.Error()) + } + if len(matches) != 0 { + t.Error("Matched on : " + word) + } + } + for _, word := range words { + ws := string(word) + event := strings.ReplaceAll(template, "XX", ws) + matches, err := q.MatchesForEvent([]byte(event)) + if err != nil { + t.Error("m4E: " + err.Error()) + } + if len(matches) != 1 { + t.Errorf("missed on (len=%d): "+event, len(matches)) + } + } +} + +func TestParseAnythingButPattern(t *testing.T) { + goods := []string{ + `{"a": [ {"anything-but": [ "foo" ] } ] }`, + `{"a": [ {"anything-but": [ "bif", "x", "y", "a;sldkfjas;lkdfjs" ] } ] }`, + } + bads := []string{ + `{"a": [ {"anything-but": x } ] }`, + `{"a": [ {"anything-but": 1 } ] }`, + `{"a": [ {"anything-but": [ "a"`, + `{"a": [ {"anything-but": [ x ] } ] }`, + `{"a": [ {"anything-but": [ {"z": 1} ] } ] }`, + `{"a": [ {"anything-but": [ true ] } ] }`, + `{"a": [ {"anything-but": [ "foo" ] x`, + `{"a": [ {"anything-but": [ "foo" ] ] ] }`, + `{"a": [ {"anything-but": {"x":1} } ] }`, + `{"a": [ {"anything-but": "foo" } ] }`, + `{"a": [ 2, {"anything-but": [ "foo" ] } ] }`, + `{"a": [ {"anything-but": [ "foo" ] }, 2 ] }`, + `{"a": [ {"anything-but": [ ] } ] }`, + } + + for i, good := range goods { + fields, _, err := patternFromJSON([]byte(good)) + if err != nil { + t.Errorf("parse anything-but i=%d: "+err.Error(), i) + } + if len(fields[0].vals) != 1 { + t.Errorf("wanted11 fields got %d", len(fields)) + } + } + + for _, bad := range bads { + _, _, err := patternFromJSON([]byte(bad)) + if err == nil { + t.Errorf(`accepted anything-but "%s"`, bad) + } + } +} diff --git a/arrays_test.go b/arrays_test.go index ec6ec29..ad5d270 100644 --- a/arrays_test.go +++ b/arrays_test.go @@ -88,7 +88,7 @@ func TestArrayCorrectness(t *testing.T) { t.Error(err.Error()) } - matches, err := m.MatchesForJSONEvent([]byte(bands)) + matches, err := m.matchesForJSONEvent([]byte(bands)) if err != nil { t.Error(err.Error()) } diff --git a/benchmarks_test.go b/benchmarks_test.go index a3a9fcc..1666a09 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -69,7 +69,7 @@ func TestCRANLEIGH(t *testing.T) { lines := [][]byte{[]byte(jCranleigh), []byte(j108492)} for _, line := range lines { - mm, err := m.MatchesForJSONEvent(line) + mm, err := m.matchesForJSONEvent(line) if err != nil { t.Error("OOPS " + err.Error()) } diff --git a/concurrency_test.go b/concurrency_test.go index 8be2f79..da398ee 100644 --- a/concurrency_test.go +++ b/concurrency_test.go @@ -77,7 +77,7 @@ func TestConcurrency(t *testing.T) { ch := make(chan string, 1000) sent := 0 for _, line := range lines { - matches, err := m.MatchesForJSONEvent(line) + matches, err := m.matchesForJSONEvent(line) if err != nil { t.Error("Matches4JSON: " + err.Error()) } @@ -119,7 +119,7 @@ func TestConcurrency(t *testing.T) { event = fmt.Sprintf(`{"geometry": { "coordinates": [ %s ] } }`, val) } var matches []X - matches, err = m.MatchesForJSONEvent([]byte(event)) + matches, err = m.matchesForJSONEvent([]byte(event)) if err != nil { t.Error("after concur: " + err.Error()) } diff --git a/core_matcher.go b/core_matcher.go index 80aac1a..dd7e474 100644 --- a/core_matcher.go +++ b/core_matcher.go @@ -18,7 +18,7 @@ import ( "sync/atomic" ) -// coreMatcher uses a finite automaton to implement the MatchesForJSONEvent and MatchesForFields functions. +// coreMatcher uses a finite automaton to implement the matchesForJSONEvent and MatchesForFields functions. // state is the start of the automaton // namesUsed is a map of field names that are used in any of the patterns that this automaton encodes. Typically, // patterns only consider a subset of the fields in an incoming data object, and there is no reason to consider @@ -115,14 +115,16 @@ func (m *coreMatcher) addPattern(x X, patternJSON string) error { return err } -// DeletePattern not implemented by coreMatcher +// deletePattern not implemented by coreMatcher func (m *coreMatcher) deletePatterns(_ X) error { return errors.New("operation not supported") } -// MatchesForJSONEvent calls the flattener to pull the fields out of the event and -// hands over to MatchesForFields -func (m *coreMatcher) MatchesForJSONEvent(event []byte) ([]X, error) { +// matchesForJSONEvent calls the flattener to pull the fields out of the event and +// hands over to MatchesForFields +// This is a leftover from previous times, is only used by tests, but it's used by a *lot* +// so removing it would require a lot of tedious work +func (m *coreMatcher) matchesForJSONEvent(event []byte) ([]X, error) { fields, err := newJSONFlattener().Flatten(event, m) if err != nil { return nil, err diff --git a/core_matcher_test.go b/core_matcher_test.go index bbdacae..198afdb 100644 --- a/core_matcher_test.go +++ b/core_matcher_test.go @@ -24,7 +24,7 @@ func TestBasicMatching(t *testing.T) { } for _, should := range shouldMatch { var matches []X - matches, err = m.MatchesForJSONEvent([]byte(should)) + matches, err = m.matchesForJSONEvent([]byte(should)) if err != nil { t.Error(err.Error()) } @@ -34,7 +34,7 @@ func TestBasicMatching(t *testing.T) { } for _, shouldNot := range shouldNotMatch { var matches []X - matches, _ = m.MatchesForJSONEvent([]byte(shouldNot)) + matches, _ = m.matchesForJSONEvent([]byte(shouldNot)) if len(matches) != 0 { t.Error("Matched: " + shouldNot) } @@ -56,27 +56,27 @@ func TestExerciseMatching(t *testing.T) { "IDs": [116, 943, 234, 38793] } }` - shouldMatches := []string{ + patternsFromReadme := []string{ `{"Foo": [ { "exists": false } ] }"`, `{"Image": {"Width": [800]}}`, `{"Image": { "Animated": [ false], "Thumbnail": { "Height": [ 125 ] } } }}, "IDs": [943]}`, `{"Image": { "Title": [ { "exists": true } ] } }`, `{"Image": { "Width": [800], "Title": [ { "exists": true } ], "Animated": [ false ] } }`, `{"Image": { "Width": [800], "IDs": [ { "exists": true } ] } }`, - //`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://*.example.com/*" } ] } } }`, `{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "*9943" } ] } } }`, `{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*" } ] } } }`, `{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*9943" } ] } } }`, + `{"Image": { "Title": [ {"anything-but": ["Pikachu", "Eevee"] } ] } }`, } var err error - for i, should := range shouldMatches { + for i, should := range patternsFromReadme { m := newCoreMatcher() err = m.addPattern(fmt.Sprintf("should %d", i), should) if err != nil { t.Error("addPattern " + should + ": " + err.Error()) } - matches, err := m.MatchesForJSONEvent([]byte(j)) + matches, err := m.matchesForJSONEvent([]byte(j)) if err != nil { t.Error("M4J: " + err.Error()) } @@ -96,7 +96,7 @@ func TestExerciseMatching(t *testing.T) { if err != nil { t.Error("addPattern: " + shouldNot + ": " + err.Error()) } - matches, err := m.MatchesForJSONEvent([]byte(j)) + matches, err := m.matchesForJSONEvent([]byte(j)) if err != nil { t.Error("ShouldNot " + shouldNot + ": " + err.Error()) } @@ -104,6 +104,22 @@ func TestExerciseMatching(t *testing.T) { t.Error(shouldNot + " matched but shouldn't have") } } + // now add them all + m := newCoreMatcher() + for _, should := range patternsFromReadme { + err = m.addPattern(should, should) + if err != nil { + t.Error("add one of many: " + err.Error()) + } + } + matches, err := m.matchesForJSONEvent([]byte(j)) + if err != nil { + t.Error("m4J on all: " + err.Error()) + } + if len(matches) != len(patternsFromReadme) { + t.Errorf("on mix wanted %d got %d", len(patternsFromReadme), len(matches)) + } + fmt.Println(matcherStats(m)) } func TestSimpleaddPattern(t *testing.T) { diff --git a/pattern.go b/pattern.go index 23eeb40..cdcb732 100644 --- a/pattern.go +++ b/pattern.go @@ -18,11 +18,13 @@ const ( existsTrueType existsFalseType shellStyleType + anythingButType ) type typedVal struct { vType valType val string + list [][]byte } type patternField struct { path string @@ -158,17 +160,17 @@ func readPatternArray(pb *patternBuild) error { return fmt.Errorf("pattern malformed, illegal %v", tt) } case string: - pathVals = append(pathVals, typedVal{stringType, `"` + tt + `"`}) + pathVals = append(pathVals, typedVal{vType: stringType, val: `"` + tt + `"`}) case json.Number: - pathVals = append(pathVals, typedVal{numberType, tt.String()}) + pathVals = append(pathVals, typedVal{vType: numberType, val: tt.String()}) case bool: if tt { - pathVals = append(pathVals, typedVal{literalType, "true"}) + pathVals = append(pathVals, typedVal{vType: literalType, val: "true"}) } else { - pathVals = append(pathVals, typedVal{literalType, "false"}) + pathVals = append(pathVals, typedVal{vType: literalType, val: "false"}) } case nil: - pathVals = append(pathVals, typedVal{literalType, "null"}) + pathVals = append(pathVals, typedVal{vType: literalType, val: "null"}) } elementCount++ } @@ -184,6 +186,9 @@ func readSpecialPattern(pb *patternBuild, valsIn []typedVal) (pathVals []typedVa switch tt := t.(type) { case string: switch tt { + case "anything-but": + containsExclusive = tt + pathVals, err = readAnythingButSpecial(pb, pathVals) case "exists": containsExclusive = tt pathVals, err = readExistsSpecial(pb, pathVals) diff --git a/pattern_test.go b/pattern_test.go index 3ec46e8..b650cb5 100644 --- a/pattern_test.go +++ b/pattern_test.go @@ -41,22 +41,22 @@ func TestPatternFromJSON(t *testing.T) { `{"abc": [ 3, {"shellstyle":"a*b"} ] }`, `{"abc": [ {"shellstyle":"a*b"}, "foo" ] }`, } - w1 := []*patternField{{path: "x", vals: []typedVal{{numberType, "2"}}}} + w1 := []*patternField{{path: "x", vals: []typedVal{{vType: numberType, val: "2"}}}} w2 := []*patternField{{path: "x", vals: []typedVal{ - {literalType, "null"}, - {literalType, "true"}, - {literalType, "false"}, - {stringType, `"hopp"`}, - {numberType, "3.072e-11"}, + {literalType, "null", nil}, + {literalType, "true", nil}, + {literalType, "false", nil}, + {stringType, `"hopp"`, nil}, + {numberType, "3.072e-11", nil}, }}} w3 := []*patternField{ {path: "x\na", vals: []typedVal{ - {numberType, "27"}, - {numberType, "28"}, + {numberType, "27", nil}, + {numberType, "28", nil}, }}, {path: "x\nb\nm", vals: []typedVal{ - {stringType, `"a"`}, - {stringType, `"b"`}, + {stringType, `"a"`, nil}, + {stringType, `"b"`, nil}, }}, } w4 := []*patternField{ diff --git a/quamina.go b/quamina.go index 56ee6b8..9be8eb0 100644 --- a/quamina.go +++ b/quamina.go @@ -131,7 +131,7 @@ func (q *Quamina) AddPattern(x X, patternJSON string) error { return q.matcher.addPattern(x, patternJSON) } -// DeletePattern removes pattnerns identified by the x argument from the Quamina insance; the effect +// DeletePatterns removes pattnerns identified by the x argument from the Quamina insance; the effect // is that return values from future calls to MatchesForEvent will not include this x value. func (q *Quamina) DeletePatterns(x X) error { return q.matcher.deletePatterns(x) diff --git a/shell_style_test.go b/shell_style_test.go index c0fac4b..cc5563d 100644 --- a/shell_style_test.go +++ b/shell_style_test.go @@ -20,7 +20,7 @@ func TestLongCase(t *testing.T) { } for _, should := range shoulds { event := fmt.Sprintf(`{"x": "%s"}`, should) - matches, err := m.MatchesForJSONEvent([]byte(event)) + matches, err := m.matchesForJSONEvent([]byte(event)) if err != nil { t.Error("m4j " + err.Error()) } @@ -164,7 +164,7 @@ func TestMixedPatterns(t *testing.T) { got := make(map[X]int) lines := getCityLotsLines(t) for _, line := range lines { - matches, err := m.MatchesForJSONEvent(line) + matches, err := m.matchesForJSONEvent(line) if err != nil { t.Error("Matches4JSON: " + err.Error()) } diff --git a/value_matcher.go b/value_matcher.go index 5295f9d..45997d0 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -155,18 +155,18 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { // there's already a table, thus an out-degree > 1 if fields.startDfa != nil || fields.startNfa != nil { - if val.vType == shellStyleType { - newNfa, nextField := makeShellStyleAutomaton(valBytes, nil) + switch val.vType { + case stringType, numberType, literalType: + newDfa, nextField := makeStringAutomaton(valBytes, nil) if fields.startNfa != nil { - fields.startNfa = mergeNfas(newNfa, fields.startNfa) + fields.startNfa = mergeNfas(fields.startNfa, dfa2Nfa(newDfa)) } else { - fields.startNfa = mergeNfas(newNfa, dfa2Nfa(fields.startDfa)) - fields.startDfa = nil + fields.startDfa = mergeDfas(fields.startDfa, newDfa) } m.update(fields) return nextField - } else { - newDfa, nextField := makeStringAutomaton(valBytes, nil) + case anythingButType: + newDfa, nextField := makeMultiAnythingButAutomaton(val.list, nil) if fields.startNfa != nil { fields.startNfa = mergeNfas(fields.startNfa, dfa2Nfa(newDfa)) } else { @@ -174,6 +174,18 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { } m.update(fields) return nextField + case shellStyleType: + newNfa, nextField := makeShellStyleAutomaton(valBytes, nil) + if fields.startNfa != nil { + fields.startNfa = mergeNfas(newNfa, fields.startNfa) + } else { + fields.startNfa = mergeNfas(newNfa, dfa2Nfa(fields.startDfa)) + fields.startDfa = nil + } + m.update(fields) + return nextField + default: + panic("unknown value type") } } @@ -182,38 +194,53 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher { // … unless this is completely virgin, in which case put in the singleton, // assuming it's just a string match if fields.singletonMatch == nil { - if val.vType == shellStyleType { - newAutomaton, nextField := makeShellStyleAutomaton(valBytes, nil) - fields.startNfa = newAutomaton - m.update(fields) - return nextField - } else { - // at the moment this works for everything that's not a shellStyle, - // but this may not always be true in future + switch val.vType { + case stringType, numberType, literalType: fields.singletonMatch = valBytes fields.singletonTransition = newFieldMatcher() m.update(fields) return fields.singletonTransition + case anythingButType: + newAutomaton, nextField := makeMultiAnythingButAutomaton(val.list, nil) + fields.startDfa = newAutomaton + m.update(fields) + return nextField + case shellStyleType: + newAutomaton, nextField := makeShellStyleAutomaton(valBytes, nil) + fields.startNfa = newAutomaton + m.update(fields) + return nextField + default: + panic("unknown value type") } } // singleton match is here and this value matches it - if (val.vType != shellStyleType) && bytes.Equal(fields.singletonMatch, valBytes) { - return fields.singletonTransition + if val.vType == stringType || val.vType == numberType || val.vType == literalType { + if bytes.Equal(fields.singletonMatch, valBytes) { + return fields.singletonTransition + } } // singleton is here, we don't match, so our outdegree becomes 2, so we have // to build an automaton with two values in it singletonAutomaton, _ := makeStringAutomaton(fields.singletonMatch, fields.singletonTransition) var nextField *fieldMatcher - if val.vType == shellStyleType { - var newNfa *smallTable[*nfaStepList] - newNfa, nextField = makeShellStyleAutomaton(valBytes, nil) - fields.startNfa = mergeNfas(newNfa, dfa2Nfa(singletonAutomaton)) - } else { + switch val.vType { + case stringType, numberType, literalType: var newDfa *smallTable[*dfaStep] newDfa, nextField = makeStringAutomaton(valBytes, nil) fields.startDfa = mergeDfas(singletonAutomaton, newDfa) + case anythingButType: + var newDfa *smallTable[*dfaStep] + newDfa, nextField = makeMultiAnythingButAutomaton(val.list, nil) + fields.startDfa = mergeDfas(singletonAutomaton, newDfa) + case shellStyleType: + var newNfa *smallTable[*nfaStepList] + newNfa, nextField = makeShellStyleAutomaton(valBytes, nil) + fields.startNfa = mergeNfas(newNfa, dfa2Nfa(singletonAutomaton)) + default: + panic("unknown val type") } // now table is ready for use, nuke singleton to signal threads to use it diff --git a/value_matcher_test.go b/value_matcher_test.go index 12fec20..92df739 100644 --- a/value_matcher_test.go +++ b/value_matcher_test.go @@ -145,7 +145,7 @@ func TestOverlappingValues(t *testing.T) { e1 := `{"x": 3, "a": "foo"}` e2 := `{"x": 3, "a": "football"}` e3 := `{"x": 3, "a": "footballer"}` - matches, err := m.MatchesForJSONEvent([]byte(e1)) + matches, err := m.matchesForJSONEvent([]byte(e1)) if err != nil { t.Error("Error on e1: " + err.Error()) } @@ -155,7 +155,7 @@ func TestOverlappingValues(t *testing.T) { t.Errorf("Failure on e1 - want %v got %v", wantP1, matches[0]) } - matches, err = m.MatchesForJSONEvent([]byte(e2)) + matches, err = m.matchesForJSONEvent([]byte(e2)) if err != nil { t.Error("Error on e2: " + err.Error()) } @@ -163,7 +163,7 @@ func TestOverlappingValues(t *testing.T) { t.Error("Failure on e2") } - matches, err = m.MatchesForJSONEvent([]byte(e3)) + matches, err = m.matchesForJSONEvent([]byte(e3)) if err != nil { t.Error("Error on e3: " + err.Error()) } @@ -205,7 +205,7 @@ func TestFuzzValueMatcher(t *testing.T) { eBase := `{"a": "999"}` for _, pName := range pNames { event := strings.ReplaceAll(eBase, "999", pName.(string)) - matches, err := m.MatchesForJSONEvent([]byte(event)) + matches, err := m.matchesForJSONEvent([]byte(event)) if err != nil { t.Errorf("m4J botch on %s: %s", event, err.Error()) } @@ -232,7 +232,7 @@ func TestFuzzValueMatcher(t *testing.T) { } shouldNot++ event := strings.ReplaceAll(eBase, "999", str) - matches, err := m.MatchesForJSONEvent([]byte(event)) + matches, err := m.matchesForJSONEvent([]byte(event)) if err != nil { t.Errorf("shouldNot botch on %s: %s", event, err.Error()) } @@ -269,7 +269,7 @@ func TestFuzzWithNumbers(t *testing.T) { eBase := `{"a": 999}` for _, pName := range pNames { event := strings.ReplaceAll(eBase, "999", pName.(string)) - matches, err := m.MatchesForJSONEvent([]byte(event)) + matches, err := m.matchesForJSONEvent([]byte(event)) if err != nil { t.Errorf("m4J botch on %s: %s", event, err.Error()) } @@ -295,7 +295,7 @@ func TestFuzzWithNumbers(t *testing.T) { event := strings.ReplaceAll(eBase, "999", ns) // breaks on 98463 // fmt.Println("Event: " + event) - matches, err := m.MatchesForJSONEvent([]byte(event)) + matches, err := m.matchesForJSONEvent([]byte(event)) if err != nil { t.Errorf("shouldNot botch on %s: %s", event, err.Error()) }