Merge branch 'main' into dependabot/github_actions/actions/cache-3.2.6

timbray · Feb 22, 2023 · 888c8ce · 888c8ce
2 parents f7a14b4 + 51f5bbc
commit 888c8ce
Show file tree

Hide file tree

Showing 10 changed files with 183 additions and 10 deletions.
diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml
@@ -43,7 +43,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@436dbd9100756e97f42f45da571adeebf8270723
+      uses: github/codeql-action/init@17573ee1cc1b9d061760f3a006fc4aac4f944fd5
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -54,7 +54,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@436dbd9100756e97f42f45da571adeebf8270723
+      uses: github/codeql-action/autobuild@17573ee1cc1b9d061760f3a006fc4aac4f944fd5
 
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 https://git.io/JvXDl
@@ -68,4 +68,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@436dbd9100756e97f42f45da571adeebf8270723
+      uses: github/codeql-action/analyze@17573ee1cc1b9d061760f3a006fc4aac4f944fd5
diff --git a/PATTERNS.md b/PATTERNS.md
@@ -60,6 +60,23 @@ Thus, the following Pattern would match both JSON events above:
 An **Extended Pattern** **MUST** be a JSON object containing
 a single field whose name is called the **Pattern Type**.
 
+### Prefix Pattern
+
+The Pattern Type of a Prefix Pattern is `prefix` and its value
+**MUST** be a string.
+
+The following event:
+
+```json
+{"a": "alpha"}
+```
+
+would be matched by this Prefix Pattern:
+
+```json
+{"a": [ { "prefix":  "al" } ] }
+```
+
 ### Exists Pattern
 
 The Pattern Type of an Exists Pattern is `exists` and its
@@ -132,9 +149,9 @@ Consider the following Event:
 ```
 The following Shellstyle Patterns would match it:
 ```json
-{"img": [ {"shellstyle": "*.jpg"} ]}
-{"img": [ {"shellstyle": "https://example.com/*"} ]}
-{"img": [ {"shellstyle": "https://example.com/*.jpg"} ]}
+{"img": [ {"shellstyle": "*.jpg"} ] }
+{"img": [ {"shellstyle": "https://example.com/*"} ] }
+{"img": [ {"shellstyle": "https://example.com/*.jpg"} ] }
 ```
 ## EventBridge Patterns
 

diff --git a/README.md b/README.md
@@ -113,6 +113,14 @@ The following Patterns would match it:
   }
 }
 ```
+```json
+{
+  "Image": {
+    "Thumbnail": {
+      "Url": [ "a", { "prefix": "https:" } ] }
+  }
+} 
+```
 The syntax and semantics of Patterns are fully specified
 in [Patterns in Quamina](PATTERNS.md).
 

diff --git a/cl2_test.go b/cl2_test.go
@@ -78,6 +78,35 @@ func TestRulerCl2(t *testing.T) {
 	}
 	exactMatches := []int{1, 101, 35, 655, 1}
 
+	prefixRules := []string{
+		"{\n" +
+			"  \"properties\": {\n" +
+			"    \"STREET\": [ { \"prefix\": \"AC\" } ]\n" +
+			"  }\n" +
+			"}",
+		"{\n" +
+			"  \"properties\": {\n" +
+			"    \"STREET\": [ { \"prefix\": \"BL\" } ]\n" +
+			"  }\n" +
+			"}",
+		"{\n" +
+			"  \"properties\": {\n" +
+			"    \"STREET\": [ { \"prefix\": \"DR\" } ]\n" +
+			"  }\n" +
+			"}",
+		"{\n" +
+			"  \"properties\": {\n" +
+			"    \"STREET\": [ { \"prefix\": \"FU\" } ]\n" +
+			"  }\n" +
+			"}",
+		"{\n" +
+			"  \"properties\": {\n" +
+			"    \"STREET\": [ { \"prefix\": \"RH\" } ]\n" +
+			"  }\n" +
+			"}",
+	}
+	prefixMatches := []int{24, 442, 38, 2387, 328}
+
 	anythingButRules := []string{
 		"{\n" +
 			"  \"properties\": {\n" +
@@ -166,6 +195,10 @@ func TestRulerCl2(t *testing.T) {
 	bm.addRules(exactRules, exactMatches)
 	fmt.Printf("EXACT events/sec: %.1f\n", bm.run(t, lines))
 
+	bm = newBenchmarker()
+	bm.addRules(prefixRules, prefixMatches)
+	fmt.Printf("PREFIX events/sec: %.1f\n", bm.run(t, lines))
+
 	bm = newBenchmarker()
 	bm.addRules(anythingButRules, anythingButMatches)
 	fmt.Printf("ANYTHING-BUT events/sec: %.1f\n", bm.run(t, lines))

diff --git a/core_matcher_test.go b/core_matcher_test.go
@@ -165,9 +165,20 @@ func TestExerciseMatching(t *testing.T) {
 		`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*" } ] } } }`,
 		`{"Image": { "Thumbnail": { "Url": [ { "shellstyle": "https://www.example.com/*9943" } ] } } }`,
 		`{"Image": { "Title": [ {"anything-but":  ["Pikachu", "Eevee"] } ]  } }`,
+		`{"Image": { "Thumbnail": { "Url": [ { "prefix": "https:" } ] } } }`,
+		`{"Image": { "Thumbnail": { "Url": [ "a", { "prefix": "https:" } ] } } }`,
 	}
 
 	var err error
+	blankMatcher := newCoreMatcher()
+	empty, err := blankMatcher.matchesForJSONEvent([]byte(j))
+	if err != nil {
+		t.Error("blank: " + err.Error())
+	}
+	if len(empty) != 0 {
+		t.Error("matches on blank matcher")
+	}
+
 	for i, should := range patternsFromReadme {
 		m := newCoreMatcher()
 		err = m.addPattern(fmt.Sprintf("should %d", i), should)
@@ -187,6 +198,7 @@ func TestExerciseMatching(t *testing.T) {
 		`{"Image": { "Animated": [ { "exists": false } ] } }`,
 		`{"Image": { "NotThere": [ { "exists": true } ] } }`,
 		`{"Image": { "IDs": [ { "exists": false } ], "Animated": [ false ] } }`,
+		`{"Image": { "Thumbnail": { "Url": [ { "prefix": "http:" } ] } } }`,
 	}
 	for i, shouldNot := range shouldNotMatches {
 		m := newCoreMatcher()

diff --git a/pattern.go b/pattern.go
@@ -19,6 +19,7 @@ const (
 	existsFalseType
 	shellStyleType
 	anythingButType
+	prefixType
 )
 
 // typedVal represents the value of a field in a pattern, giving the value and the type of pattern.
@@ -196,12 +197,37 @@ func readSpecialPattern(pb *patternBuild, valsIn []typedVal) (pathVals []typedVa
 		pathVals, err = readExistsSpecial(pb, pathVals)
 	case "shellstyle":
 		pathVals, err = readShellStyleSpecial(pb, pathVals)
+	case "prefix":
+		pathVals, err = readPrefixSpecial(pb, pathVals)
 	default:
 		err = errors.New("unrecognized in special pattern: " + tt)
 	}
 	return
 }
 
+func readPrefixSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) {
+	t, err := pb.jd.Token()
+	if err != nil {
+		return
+	}
+	pathVals = valsIn
+
+	prefixString, ok := t.(string)
+	if !ok {
+		err = errors.New("value for 'prefix' must be a string")
+		return
+	}
+	val := typedVal{
+		vType: prefixType,
+		val:   `"` + prefixString + `"`,
+	}
+	pathVals = append(pathVals, val)
+
+	// has to be } or tokenizer will throw error
+	_, err = pb.jd.Token()
+	return
+}
+
 func readExistsSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedVal, err error) {
 	t, err := pb.jd.Token()
 	if err != nil {

diff --git a/pattern_test.go b/pattern_test.go
@@ -61,6 +61,11 @@ func TestPatternFromJSON(t *testing.T) {
 		`{"xxx": [ { "exists": false, "x": ["a", 3 ] }] }`,
 		`{"abc": [ {"shellstyle":15} ] }`,
 		`{"abc": [ {"shellstyle":"a**b"}, "foo" ] }`,
+		`{"abc": [ {"prefix":23}, "foo" ] }`,
+		`{"abc": [ {"prefix":["a", "b"]}, "foo" ] }`,
+		`{"abc": [ {"prefix": - }, "foo" ] }`,
+		`{"abc": [ {"prefix":  - "a" }, "foo" ] }`,
+		`{"abc": [ {"prefix":  "a" {, "foo" ] }`,
 	}
 	for _, b := range bads {
 		_, err := patternFromJSON([]byte(b))

diff --git a/small_table.go b/small_table.go
@@ -107,8 +107,7 @@ func mergeOneDfaStep(step1, step2 *dfaStep, memoize map[dfaStepKey]*dfaStep) *df
 		return combined
 	}
 
-	// TODO: this works, all the tests pass, but I'm not satisfied with it. My intuition is that you ought
-	// to be able to come out of this with just one *fieldMatcher
+	// TODO: this works, all the tests pass, but should to be able to have with just one *fieldMatcher
 	newTable := newSmallTable[*dfaStep]()
 	switch {
 	case step1.fieldTransitions == nil && step2.fieldTransitions == nil:

diff --git a/value_matcher.go b/value_matcher.go
@@ -70,7 +70,8 @@ func (m *valueMatcher) transitionOn(val []byte) []*fieldMatcher {
 		return transitionDfa(fields.startDfa, val, transitions)
 
 	default:
-		// no dfa, no singleton, nothing to do
+		// no dfa, no singleton, nothing to do, this probably can't happen because a flattener
+		// shouldn't preserve a field that hasn't appeared in a pattern
 		return transitions
 	}
 }
@@ -116,6 +117,8 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher {
 			var newNfa *smallTable[*nfaStepList]
 			newNfa, nextField = makeShellStyleAutomaton(valBytes, nil)
 			newDfa = nfa2Dfa(newNfa)
+		case prefixType:
+			newDfa, nextField = makePrefixAutomaton(valBytes, nil)
 		default:
 			panic("unknown value type")
 		}
@@ -145,6 +148,11 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher {
 			fields.startDfa = nfa2Dfa(newAutomaton)
 			m.update(fields)
 			return nextField
+		case prefixType:
+			newAutomaton, nextField := makePrefixAutomaton(valBytes, nil)
+			fields.startDfa = newAutomaton
+			m.update(fields)
+			return nextField
 		default:
 			panic("unknown value type")
 		}
@@ -171,8 +179,10 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher {
 		var newNfa *smallTable[*nfaStepList]
 		newNfa, nextField = makeShellStyleAutomaton(valBytes, nil)
 		newDfa = nfa2Dfa(newNfa)
+	case prefixType:
+		newDfa, nextField = makePrefixAutomaton(valBytes, nil)
 	default:
-		panic("unknown val type")
+		panic("unknown value type")
 	}
 
 	// now table is ready for use, nuke singleton to signal threads to use it
@@ -183,6 +193,29 @@ func (m *valueMatcher) addTransition(val typedVal) *fieldMatcher {
 	return nextField
 }
 
+func makePrefixAutomaton(val []byte, useThisTransition *fieldMatcher) (*smallTable[*dfaStep], *fieldMatcher) {
+	var nextField *fieldMatcher
+
+	if useThisTransition != nil {
+		nextField = useThisTransition
+	} else {
+		nextField = newFieldMatcher()
+	}
+	return onePrefixStep(val, 0, nextField), nextField
+}
+
+func onePrefixStep(val []byte, index int, nextField *fieldMatcher) *smallTable[*dfaStep] {
+	var nextStep *dfaStep
+
+	// have to stop one short to skip the closing "
+	if index == len(val)-2 {
+		nextStep = &dfaStep{table: newSmallTable[*dfaStep](), fieldTransitions: []*fieldMatcher{nextField}}
+	} else {
+		nextStep = &dfaStep{table: onePrefixStep(val, index+1, nextField)}
+	}
+	return makeSmallDfaTable(nil, []byte{val[index]}, []*dfaStep{nextStep})
+}
+
 // makeStringAutomaton creates a utf8-based automaton from a literal string
 // using smallTables. Note the addition of a valueTerminator. The implementation
 // is recursive because this allows the use of the makeSmallDfaTable call, which

diff --git a/value_matcher_test.go b/value_matcher_test.go
@@ -7,6 +7,46 @@ import (
 	"testing"
 )
 
+func TestInvalidValueTypes(t *testing.T) {
+	var before []typedVal
+	addInvalid(t, before)
+
+	before = append(before, typedVal{vType: stringType, val: "foo"})
+	addInvalid(t, before)
+
+	before = append(before, typedVal{vType: stringType, val: "bar"})
+	addInvalid(t, before)
+}
+func addInvalid(t *testing.T, before []typedVal) {
+	t.Helper()
+	defer func() {
+		if recover() == nil {
+			t.Errorf("TestAddInvalidTransition should have panicked")
+		}
+	}()
+
+	panicType := valType(999)
+
+	// empty value matcher
+	m := newValueMatcher()
+	invalidField := typedVal{
+		vType: panicType,
+		val:   "one",
+	}
+	for _, addBefore := range before {
+		m.addTransition(addBefore)
+	}
+	m.addTransition(invalidField)
+}
+
+func TestNoOpTransition(t *testing.T) {
+	vm := newValueMatcher()
+	tr := vm.transitionOn([]byte("foo"))
+	if len(tr) != 0 {
+		t.Error("matched on empty valuematcher")
+	}
+}
+
 func TestAddTransition(t *testing.T) {
 	m := newValueMatcher()
 	v1 := typedVal{