-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes: #61 Signed-off-by: Tim Bray <[email protected]>
- Loading branch information
Showing
15 changed files
with
426 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
package quamina | ||
|
||
import ( | ||
"encoding/json" | ||
"errors" | ||
"fmt" | ||
"io" | ||
) | ||
|
||
func readAnythingButSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) { | ||
t, err := pb.jd.Token() | ||
if err != nil { | ||
return | ||
} | ||
pathVals = valsIn | ||
fieldCount := 0 | ||
delim, ok := t.(json.Delim) | ||
if (!ok) || delim != '[' { | ||
err = errors.New("value for anything-but must be an array") | ||
return | ||
} | ||
done := false | ||
val := typedVal{vType: anythingButType} | ||
for !done { | ||
t, err = pb.jd.Token() | ||
if err == io.EOF { | ||
err = errors.New("anything-but list truncated") | ||
return | ||
} else if err != nil { | ||
return | ||
} | ||
switch tt := t.(type) { | ||
case json.Delim: | ||
if tt == ']' { | ||
done = true | ||
} else { | ||
err = fmt.Errorf("spurious %c in anything-but list", tt) | ||
} | ||
case string: | ||
fieldCount++ | ||
val.list = append(val.list, []byte(`"`+tt+`"`)) | ||
default: | ||
err = errors.New("malformed anything-but list") | ||
done = true | ||
} | ||
} | ||
if err != nil { | ||
return | ||
} | ||
if fieldCount == 0 { | ||
err = errors.New("empty list in 'anything-but' pattern") | ||
return | ||
} | ||
pathVals = append(pathVals, val) | ||
|
||
// this has to be a '}' or you're going to get an err from the tokenizer, so no point looking at the value | ||
_, err = pb.jd.Token() | ||
return | ||
} | ||
|
||
// makeMultiFieldAnythingButAutomaton exists to handle constructs such as | ||
// | ||
// {"x": [ {"anything-but": [ "a", "b" ] } ] } | ||
// | ||
// Making a succession of anything-but automata for each of "a" and "b" and then merging them turns out not | ||
// to work because what the caller means is really an AND - everything that matches neither "a" nor "b". So | ||
// in principle we could intersect automata, which is probably the right answer, but for the moment we can | ||
// build like makeAnythingButAutomaton but do it for several vals in parallel | ||
func makeMultiAnythingButAutomaton(vals [][]byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) { | ||
var nextField *fieldMatcher | ||
if useThisTransition != nil { | ||
nextField = useThisTransition | ||
} else { | ||
nextField = newFieldMatcher() | ||
} | ||
ret, _ := oneMultiAnythingButStep(vals, 0, nextField), nextField | ||
return ret, nextField | ||
} | ||
|
||
// oneMultiAnythingButStep - spookeh | ||
func oneMultiAnythingButStep(vals [][]byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] { | ||
success := &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}} | ||
var u unpackedTable[*dfaStep] | ||
for i := range u { | ||
u[i] = success | ||
} | ||
// for the char at position 'index' in each val | ||
nextSteps := make(map[byte][][]byte) | ||
lastSteps := make(map[byte]bool) | ||
for _, val := range vals { | ||
lastIndex := len(val) - 1 | ||
switch { | ||
case index < lastIndex: | ||
utf8Byte := val[index] | ||
step := nextSteps[utf8Byte] | ||
nextSteps[utf8Byte] = append(step, val) | ||
case index == lastIndex: | ||
lastSteps[val[index]] = true | ||
case index > lastIndex: | ||
// no-op | ||
} | ||
} | ||
|
||
for utf8Byte, valList := range nextSteps { | ||
u[utf8Byte] = &dfaStep{table: oneMultiAnythingButStep(valList, index+1, nextField)} | ||
} | ||
for utf8Byte := range lastSteps { | ||
lastStep := &dfaStep{table: newSmallTable[*dfaStep]()} // note no transition | ||
u[utf8Byte] = &dfaStep{table: makeSmallDfaTable(success, []byte{valueTerminator}, []*dfaStep{lastStep})} | ||
} | ||
table := newSmallTable[*dfaStep]() | ||
table.pack(&u) | ||
return table | ||
} | ||
|
||
/* | ||
// makeAnythingButAutomaton produces a DFA that matches anything but the byte sequence in val. | ||
// For each byte in val with value Z, we produce a table that leads to a nextField match on all non-Z values, | ||
// and to another such table for Z. After all the bytes have matched, a match on valueTerminator leads to | ||
// an empty table with no field Transitions, all others to a nexField match | ||
// [no longer used but retaining for now] | ||
func makeAnythingButAutomaton(val []byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) { | ||
var nextField *fieldMatcher | ||
if useThisTransition != nil { | ||
nextField = useThisTransition | ||
} else { | ||
nextField = newFieldMatcher() | ||
} | ||
return oneAnythingButStep(val, 0, nextField), nextField | ||
} | ||
func oneAnythingButStep(val []byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] { | ||
var nextStep *dfaStep | ||
success := &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}} | ||
if index == len(val)-1 { | ||
lastStep := &dfaStep{table: newSmallTable[*dfaStep]()} // note no transition | ||
nextStep = &dfaStep{table: makeSmallDfaTable(success, []byte{valueTerminator}, []*dfaStep{lastStep})} | ||
} else { | ||
nextStep = &dfaStep{table: oneAnythingButStep(val, index+1, nextField)} | ||
} | ||
return makeSmallDfaTable(success, []byte{val[index]}, []*dfaStep{nextStep}) | ||
} | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
package quamina | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestAnythingButMerging(t *testing.T) { | ||
pFoo := `{"z": [ "foo" ]}` | ||
pAbFoot := `{"z": [ {"anything-but": [ "foot"] } ]}` | ||
q, _ := New() | ||
var err error | ||
|
||
// can merge with DFA? | ||
err = q.AddPattern("pFoo", pFoo) | ||
if err != nil { | ||
t.Error("add pFoo") | ||
} | ||
err = q.AddPattern("pAbFoot", pAbFoot) | ||
if err != nil { | ||
t.Error("add pAbFoot: " + err.Error()) | ||
} | ||
var m []X | ||
m, err = q.MatchesForEvent([]byte(`{"z": "foo"}`)) | ||
if err != nil { | ||
t.Error("m4E - foo: " + err.Error()) | ||
} | ||
if len(m) != 2 { | ||
t.Errorf("len=%d?!?", len(m)) | ||
} | ||
m, err = q.MatchesForEvent([]byte(`{"z": "foot"}`)) | ||
if err != nil { | ||
t.Error("m4E - foo: " + err.Error()) | ||
} | ||
if len(m) != 0 { | ||
t.Errorf("len=%d?!?", len(m)) | ||
} | ||
|
||
// can merge with NFA? | ||
pFooStar := `{"z": [ {"shellstyle": "foo*" } ]}` | ||
q, _ = New() | ||
err = q.AddPattern("pFooStar", pFooStar) | ||
if err != nil { | ||
t.Error("pFooStar: " + err.Error()) | ||
} | ||
err = q.AddPattern("pAbFoot", pAbFoot) | ||
if err != nil { | ||
t.Error("add pAbFoot: " + err.Error()) | ||
} | ||
m, err = q.MatchesForEvent([]byte(`{"z": "foo"}`)) | ||
if err != nil { | ||
t.Error("m4E: " + err.Error()) | ||
} | ||
if len(m) != 2 { | ||
t.Errorf("len=%d?!?", len(m)) | ||
} | ||
m, err = q.MatchesForEvent([]byte(`{"z": "foot"}`)) | ||
if err != nil { | ||
t.Error("m4E: " + err.Error()) | ||
} | ||
if len(m) != 1 { | ||
t.Errorf("len=%d?!?", len(m)) | ||
} | ||
} | ||
|
||
func TestAnythingButMatching(t *testing.T) { | ||
q, _ := New() | ||
// the idea is we're testing against all the 5-letter Wordle patterns, so we want a 4-letter prefix and | ||
// suffix of an existing wordle, a 5-letter non-wordle, and a 6-letter where the wordle might match at the start | ||
// and end. I tried to think of scenarios that would defeat the pretty-simple anything-but DFA but couldn't. | ||
problemWords := []string{ | ||
`"bloo"`, | ||
`"aper"`, | ||
`"fnord"`, | ||
`"doubts"`, | ||
`"astern"`, | ||
} | ||
pws := strings.Join(problemWords, ",") | ||
pattern := `{"a": [ {"anything-but": [ ` + pws + `] } ] }"` | ||
err := q.AddPattern(pattern, pattern) | ||
if err != nil { | ||
t.Error("AP: " + err.Error()) | ||
} | ||
words := readWWords(t) | ||
template := `{"a": "XX"}` | ||
problemTemplate := `{"a": XX}` | ||
for _, word := range problemWords { | ||
event := strings.ReplaceAll(problemTemplate, "XX", word) | ||
matches, err := q.MatchesForEvent([]byte(event)) | ||
if err != nil { | ||
t.Error("on problem word: " + err.Error()) | ||
} | ||
if len(matches) != 0 { | ||
t.Error("Matched on : " + word) | ||
} | ||
} | ||
for _, word := range words { | ||
ws := string(word) | ||
event := strings.ReplaceAll(template, "XX", ws) | ||
matches, err := q.MatchesForEvent([]byte(event)) | ||
if err != nil { | ||
t.Error("m4E: " + err.Error()) | ||
} | ||
if len(matches) != 1 { | ||
t.Errorf("missed on (len=%d): "+event, len(matches)) | ||
} | ||
} | ||
} | ||
|
||
func TestParseAnythingButPattern(t *testing.T) { | ||
goods := []string{ | ||
`{"a": [ {"anything-but": [ "foo" ] } ] }`, | ||
`{"a": [ {"anything-but": [ "bif", "x", "y", "a;sldkfjas;lkdfjs" ] } ] }`, | ||
} | ||
bads := []string{ | ||
`{"a": [ {"anything-but": x } ] }`, | ||
`{"a": [ {"anything-but": 1 } ] }`, | ||
`{"a": [ {"anything-but": [ "a"`, | ||
`{"a": [ {"anything-but": [ x ] } ] }`, | ||
`{"a": [ {"anything-but": [ {"z": 1} ] } ] }`, | ||
`{"a": [ {"anything-but": [ true ] } ] }`, | ||
`{"a": [ {"anything-but": [ "foo" ] x`, | ||
`{"a": [ {"anything-but": [ "foo" ] ] ] }`, | ||
`{"a": [ {"anything-but": {"x":1} } ] }`, | ||
`{"a": [ {"anything-but": "foo" } ] }`, | ||
`{"a": [ 2, {"anything-but": [ "foo" ] } ] }`, | ||
`{"a": [ {"anything-but": [ "foo" ] }, 2 ] }`, | ||
`{"a": [ {"anything-but": [ ] } ] }`, | ||
} | ||
|
||
for i, good := range goods { | ||
fields, _, err := patternFromJSON([]byte(good)) | ||
if err != nil { | ||
t.Errorf("parse anything-but i=%d: "+err.Error(), i) | ||
} | ||
if len(fields[0].vals) != 1 { | ||
t.Errorf("wanted11 fields got %d", len(fields)) | ||
} | ||
} | ||
|
||
for _, bad := range bads { | ||
_, _, err := patternFromJSON([]byte(bad)) | ||
if err == nil { | ||
t.Errorf(`accepted anything-but "%s"`, bad) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.