Skip to content

Commit

Permalink
api: implement anything-but
Browse files Browse the repository at this point in the history
Closes: #61
Signed-off-by: Tim Bray <[email protected]>
  • Loading branch information
timbray committed Jun 10, 2022
1 parent 14f1d31 commit cbaf720
Show file tree
Hide file tree
Showing 15 changed files with 426 additions and 78 deletions.
27 changes: 15 additions & 12 deletions PATTERNS.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,22 @@ value **MUST** be `true` or `false`. Here
are two Exists Patterns that would match the Events above:
```json
{"alpha": {"beta": [ {"exists": true} ]}}
```
```json
{"alpha": {"gamma": [ {"exists": false} ]}}
```

If a Field in a Pattern contains an Exists Pattern, it
**MUST NOT** contain any other values.

### Anything-But Pattern

The Pattern Type of an Anything-But Pattern is
`anything-but` and its value **MUST** be an array
of strings. It will match a string value which
is not equal to any of the strings in the array.

If a Field in a Pattern contains an Anything-But Pattern,
it **MUST NOT** contain any other values.

### Shellstyle Pattern

The Pattern Type of a Shellstyle Pattern is `shellstyle`
Expand All @@ -84,25 +92,20 @@ Consider the following Event:
The following Shellstyle Patterns would match it:
```json
{"img": [ {"shellstyle": "*.jpg"} ]}
```
```json
{"img": [ {"shellstyle": "https://example.com/*"} ]}
```
```json
{"img": [ {"shellstyle": "https://example.com/*.jpg"} ]}
```

## EventBridge Patterns

Quamina’s Patterns are inspired by those offered by
the AWS EventBridge service, as documented in
[Amazon EventBridge event patterns](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-patterns.html).

As of release 0.1.1, Quamina supports Exists patterns
but does not yet support AWS’s `numeric`, `prefix`, or
`anything-but` patterns. Note that a Shellstyle
Pattern with a trailing `*` is equivalent to an AWS `prefix`
pattern.
As of release 0.1.1, Quamina supports Exists and
Anything-But patterns but does not yet support AWS’s
`numeric` or `prefix` patterns. Note that a
Shellstyle Pattern with a trailing `*` is equivalent
to a `prefix` pattern.



11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ The following Patterns would match it:
}
}
```

```json
{
"Image": {
Expand All @@ -100,15 +100,20 @@ The following Patterns would match it:
}
}
```

```json
{
"Image": {
"Thumbnail": { "Url":
[ { "shellstyle": "http://www.example.com/*9943" } ] }
}
}

```
```json
{
"Image": {
"Title": [ {"anything-but": ["Pikachu", "Eevee"] } ]
}
}
```
The structure of a Pattern, in terms of field names
and nesting, must be the same as the structure of the Event
Expand Down
143 changes: 143 additions & 0 deletions anything_but.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package quamina

import (
"encoding/json"
"errors"
"fmt"
"io"
)

func readAnythingButSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) {
t, err := pb.jd.Token()
if err != nil {
return
}
pathVals = valsIn
fieldCount := 0
delim, ok := t.(json.Delim)
if (!ok) || delim != '[' {
err = errors.New("value for anything-but must be an array")
return
}
done := false
val := typedVal{vType: anythingButType}
for !done {
t, err = pb.jd.Token()
if err == io.EOF {
err = errors.New("anything-but list truncated")
return
} else if err != nil {
return
}
switch tt := t.(type) {
case json.Delim:
if tt == ']' {
done = true
} else {
err = fmt.Errorf("spurious %c in anything-but list", tt)
}
case string:
fieldCount++
val.list = append(val.list, []byte(`"`+tt+`"`))
default:
err = errors.New("malformed anything-but list")
done = true
}
}
if err != nil {
return
}
if fieldCount == 0 {
err = errors.New("empty list in 'anything-but' pattern")
return
}
pathVals = append(pathVals, val)

// this has to be a '}' or you're going to get an err from the tokenizer, so no point looking at the value
_, err = pb.jd.Token()
return
}

// makeMultiFieldAnythingButAutomaton exists to handle constructs such as
//
// {"x": [ {"anything-but": [ "a", "b" ] } ] }
//
// Making a succession of anything-but automata for each of "a" and "b" and then merging them turns out not
// to work because what the caller means is really an AND - everything that matches neither "a" nor "b". So
// in principle we could intersect automata, which is probably the right answer, but for the moment we can
// build like makeAnythingButAutomaton but do it for several vals in parallel
func makeMultiAnythingButAutomaton(vals [][]byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) {
var nextField *fieldMatcher
if useThisTransition != nil {
nextField = useThisTransition
} else {
nextField = newFieldMatcher()
}
ret, _ := oneMultiAnythingButStep(vals, 0, nextField), nextField
return ret, nextField
}

// oneMultiAnythingButStep - spookeh
func oneMultiAnythingButStep(vals [][]byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] {
success := &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}}
var u unpackedTable[*dfaStep]
for i := range u {
u[i] = success
}
// for the char at position 'index' in each val
nextSteps := make(map[byte][][]byte)
lastSteps := make(map[byte]bool)
for _, val := range vals {
lastIndex := len(val) - 1
switch {
case index < lastIndex:
utf8Byte := val[index]
step := nextSteps[utf8Byte]
nextSteps[utf8Byte] = append(step, val)
case index == lastIndex:
lastSteps[val[index]] = true
case index > lastIndex:
// no-op
}
}

for utf8Byte, valList := range nextSteps {
u[utf8Byte] = &dfaStep{table: oneMultiAnythingButStep(valList, index+1, nextField)}
}
for utf8Byte := range lastSteps {
lastStep := &dfaStep{table: newSmallTable[*dfaStep]()} // note no transition
u[utf8Byte] = &dfaStep{table: makeSmallDfaTable(success, []byte{valueTerminator}, []*dfaStep{lastStep})}
}
table := newSmallTable[*dfaStep]()
table.pack(&u)
return table
}

/*
// makeAnythingButAutomaton produces a DFA that matches anything but the byte sequence in val.
// For each byte in val with value Z, we produce a table that leads to a nextField match on all non-Z values,
// and to another such table for Z. After all the bytes have matched, a match on valueTerminator leads to
// an empty table with no field Transitions, all others to a nexField match
// [no longer used but retaining for now]
func makeAnythingButAutomaton(val []byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) {
var nextField *fieldMatcher
if useThisTransition != nil {
nextField = useThisTransition
} else {
nextField = newFieldMatcher()
}
return oneAnythingButStep(val, 0, nextField), nextField
}
func oneAnythingButStep(val []byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] {
var nextStep *dfaStep
success := &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}}
if index == len(val)-1 {
lastStep := &dfaStep{table: newSmallTable[*dfaStep]()} // note no transition
nextStep = &dfaStep{table: makeSmallDfaTable(success, []byte{valueTerminator}, []*dfaStep{lastStep})}
} else {
nextStep = &dfaStep{table: oneAnythingButStep(val, index+1, nextField)}
}
return makeSmallDfaTable(success, []byte{val[index]}, []*dfaStep{nextStep})
}
*/
147 changes: 147 additions & 0 deletions anything_but_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package quamina

import (
"strings"
"testing"
)

func TestAnythingButMerging(t *testing.T) {
pFoo := `{"z": [ "foo" ]}`
pAbFoot := `{"z": [ {"anything-but": [ "foot"] } ]}`
q, _ := New()
var err error

// can merge with DFA?
err = q.AddPattern("pFoo", pFoo)
if err != nil {
t.Error("add pFoo")
}
err = q.AddPattern("pAbFoot", pAbFoot)
if err != nil {
t.Error("add pAbFoot: " + err.Error())
}
var m []X
m, err = q.MatchesForEvent([]byte(`{"z": "foo"}`))
if err != nil {
t.Error("m4E - foo: " + err.Error())
}
if len(m) != 2 {
t.Errorf("len=%d?!?", len(m))
}
m, err = q.MatchesForEvent([]byte(`{"z": "foot"}`))
if err != nil {
t.Error("m4E - foo: " + err.Error())
}
if len(m) != 0 {
t.Errorf("len=%d?!?", len(m))
}

// can merge with NFA?
pFooStar := `{"z": [ {"shellstyle": "foo*" } ]}`
q, _ = New()
err = q.AddPattern("pFooStar", pFooStar)
if err != nil {
t.Error("pFooStar: " + err.Error())
}
err = q.AddPattern("pAbFoot", pAbFoot)
if err != nil {
t.Error("add pAbFoot: " + err.Error())
}
m, err = q.MatchesForEvent([]byte(`{"z": "foo"}`))
if err != nil {
t.Error("m4E: " + err.Error())
}
if len(m) != 2 {
t.Errorf("len=%d?!?", len(m))
}
m, err = q.MatchesForEvent([]byte(`{"z": "foot"}`))
if err != nil {
t.Error("m4E: " + err.Error())
}
if len(m) != 1 {
t.Errorf("len=%d?!?", len(m))
}
}

func TestAnythingButMatching(t *testing.T) {
q, _ := New()
// the idea is we're testing against all the 5-letter Wordle patterns, so we want a 4-letter prefix and
// suffix of an existing wordle, a 5-letter non-wordle, and a 6-letter where the wordle might match at the start
// and end. I tried to think of scenarios that would defeat the pretty-simple anything-but DFA but couldn't.
problemWords := []string{
`"bloo"`,
`"aper"`,
`"fnord"`,
`"doubts"`,
`"astern"`,
}
pws := strings.Join(problemWords, ",")
pattern := `{"a": [ {"anything-but": [ ` + pws + `] } ] }"`
err := q.AddPattern(pattern, pattern)
if err != nil {
t.Error("AP: " + err.Error())
}
words := readWWords(t)
template := `{"a": "XX"}`
problemTemplate := `{"a": XX}`
for _, word := range problemWords {
event := strings.ReplaceAll(problemTemplate, "XX", word)
matches, err := q.MatchesForEvent([]byte(event))
if err != nil {
t.Error("on problem word: " + err.Error())
}
if len(matches) != 0 {
t.Error("Matched on : " + word)
}
}
for _, word := range words {
ws := string(word)
event := strings.ReplaceAll(template, "XX", ws)
matches, err := q.MatchesForEvent([]byte(event))
if err != nil {
t.Error("m4E: " + err.Error())
}
if len(matches) != 1 {
t.Errorf("missed on (len=%d): "+event, len(matches))
}
}
}

func TestParseAnythingButPattern(t *testing.T) {
goods := []string{
`{"a": [ {"anything-but": [ "foo" ] } ] }`,
`{"a": [ {"anything-but": [ "bif", "x", "y", "a;sldkfjas;lkdfjs" ] } ] }`,
}
bads := []string{
`{"a": [ {"anything-but": x } ] }`,
`{"a": [ {"anything-but": 1 } ] }`,
`{"a": [ {"anything-but": [ "a"`,
`{"a": [ {"anything-but": [ x ] } ] }`,
`{"a": [ {"anything-but": [ {"z": 1} ] } ] }`,
`{"a": [ {"anything-but": [ true ] } ] }`,
`{"a": [ {"anything-but": [ "foo" ] x`,
`{"a": [ {"anything-but": [ "foo" ] ] ] }`,
`{"a": [ {"anything-but": {"x":1} } ] }`,
`{"a": [ {"anything-but": "foo" } ] }`,
`{"a": [ 2, {"anything-but": [ "foo" ] } ] }`,
`{"a": [ {"anything-but": [ "foo" ] }, 2 ] }`,
`{"a": [ {"anything-but": [ ] } ] }`,
}

for i, good := range goods {
fields, _, err := patternFromJSON([]byte(good))
if err != nil {
t.Errorf("parse anything-but i=%d: "+err.Error(), i)
}
if len(fields[0].vals) != 1 {
t.Errorf("wanted11 fields got %d", len(fields))
}
}

for _, bad := range bads {
_, _, err := patternFromJSON([]byte(bad))
if err == nil {
t.Errorf(`accepted anything-but "%s"`, bad)
}
}
}
2 changes: 1 addition & 1 deletion arrays_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func TestArrayCorrectness(t *testing.T) {
t.Error(err.Error())
}

matches, err := m.MatchesForJSONEvent([]byte(bands))
matches, err := m.matchesForJSONEvent([]byte(bands))
if err != nil {
t.Error(err.Error())
}
Expand Down
2 changes: 1 addition & 1 deletion benchmarks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func TestCRANLEIGH(t *testing.T) {
lines := [][]byte{[]byte(jCranleigh), []byte(j108492)}

for _, line := range lines {
mm, err := m.MatchesForJSONEvent(line)
mm, err := m.matchesForJSONEvent(line)
if err != nil {
t.Error("OOPS " + err.Error())
}
Expand Down
Loading

0 comments on commit cbaf720

Please sign in to comment.