Merge pull request #1 from tudelft-cda-lab/38-test-cases-for-sage

test
tudelft-cda-lab · Sep 15, 2023 · d0655af · d0655af
2 parents 85d1508 + 879cc80
commit d0655af
Show file tree

Hide file tree

Showing 12 changed files with 1,003 additions and 28 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,138 @@
+name: 'Run tests'
+
+on:
+  pull_request:
+    branches:
+      - '*'
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      ################################################################
+      #################   INSTALL DEPENDENCIES  ######################
+      ################################################################
+
+      - name: Install dependencies
+        run: |
+          apt update
+          apt install graphviz
+          pip install requests numpy matplotlib
+     
+      - name: Fetch flexfringe binary
+        env:
+          FF_REPO: "https://github.com/tudelft-cda-lab/FlexFringe"
+        shell: bash
+        run: |
+          gh release download latest -R $FF_REPO -p "*x64-${{ inputs.platform }}*"
+
+      - name: Clone main branch of tudelft-cda-lab/SAGE
+        uses: actions/checkout@v4
+        with:
+          repository: 'tudelft-cda-lab/SAGE'
+          ref: 'main'
+          path: 'SAGE-main'
+
+      - name: Clone PR branch of tudelft-cda-lab/SAGE
+        uses: actions/checkout@v4
+        with:
+          path: 'SAGE-updated'
+
+
+      ################################################################
+      #################    RUN STYLE CHECK    ########################
+      ################################################################
+
+     #- name: autopep8
+     #  id: autopep8
+     #  uses: peter-evans/autopep8@v2
+     #  with:
+     #    args: --recursive --in-place --aggressive --aggressive .
+
+
+      ################################################################
+      #############    RUN BOTH VERSIONS OF SAGE     #################
+      ################################################################
+
+      - name: Run SAGE on the main branch
+        shell: bash
+        run: |
+          cd SAGE-main/
+          python sage.py alerts/cptc-2017/ orig-2017 --dataset cptc --keep-files
+          python sage.py alerts/cptc-2018/ orig-2018 --dataset cptc --keep-files
+          python sage.py alerts/ccdc/ orig-ccdc --dataset other --keep-files
+          cp -R orig-2017.txt orig-2017.txt.ff.final.json orig-2017.txt.ff.finalsinks.json orig-2017AGs/ ..
+          cp -R orig-2018.txt orig-2018.txt.ff.final.json orig-2018.txt.ff.finalsinks.json orig-2018AGs/ ..
+          cp -R orig-ccdc.txt orig-ccdc.txt.ff.final.json orig-ccdc.txt.ff.finalsinks.json orig-ccdcAGs/ ..
+
+      - name: Copy the test file in the top directory
+        shell: bash
+        run: |
+          cd SAGE-main/
+          mv test-scripts/* ..
+
+      - name: Run SAGE on the updated branch
+        shell: bash
+        run: |
+          cd SAGE-updated/
+          python sage.py alerts/cptc-2017 updated-2017 --dataset cptc --keep-files
+          python sage.py alerts/cptc-2018/ updated-2018 --dataset cptc --keep-files
+          python sage.py alerts/ccdc/ updated-ccdc --dataset other --keep-files
+          cp -R updated-2017.txt updated-2017.txt.ff.final.json updated-2017.txt.ff.finalsinks.json updated-2017AGs/ ..
+          cp -R updated-2018.txt updated-2018.txt.ff.final.json updated-2018.txt.ff.finalsinks.json updated-2018AGs/ ..
+          cp -R updated-ccdc.txt updated-ccdc.txt.ff.final.json updated-ccdc.txt.ff.finalsinks.json updated-ccdcAGs/ ..
+
+      ################################################################
+      #################    RUN REGRESSION TESTS    ###################
+      ################################################################
+
+      - name: Run regression tests on CPTC-2017
+        if: ${{ github.event.label.name != 'changes-ags' }}
+        shell: bash
+        run: |
+          ./test-ags.sh orig-2017 updated-2017
+
+      - name: Run regression tests CPTC-2018
+        if: ${{ github.event.label.name != 'changes-ags' }}
+        shell: bash
+        run: |
+          ./test-ags.sh orig-2018 updated-2018
+
+      - name: Run regression tests CCDC-2018
+        if: ${{ github.event.label.name != 'changes-ags' }}
+        shell: bash
+        run: |
+          ./test-ags.sh orig-ccdc updated-ccdc
+
+
+      ################################################################
+      ###################    RUN SINKS TESTS    ######################
+      ################################################################
+
+      - name: Run sinks tests on CPTC-2017
+        shell: bash
+        run: |
+          cd SAGE-updated/
+          ./test-sinks.sh orig-2017 updated-2017
+
+      - name: Run sinks tests on CPTC-2018
+        shell: bash
+        run: |
+          cd SAGE-updated/
+          ./test-sinks.sh orig-2018 updated-2018
+
+      - name: Run sinks tests on CCDC-2018
+        shell: bash
+        run: |
+          cd SAGE-updated/
+          ./test-sinks.sh orig-ccdc updated-ccdc
+
+      ################################################################
+      ##################    RUN PYTHON TESTS    ######################
+      ################################################################
+
+      - name: Run Python tests for episodes
+        run: |
+          cd SAGE-updated/
+          python tests.py
+
diff --git a/episode_sequence_generation.py b/episode_sequence_generation.py
@@ -32,34 +32,6 @@ def _get_episodes(alert_seq, mcat, plot=False):
     dx = 0.1
     frequencies = [len(x) for x in alert_seq]
 
-    # TODO: move these test cases into a separate test file
-    # test case 1: normal sequence
-    #y = [11, 0, 0, 2, 5, 2, 2, 2, 4, 2, 0, 0, 8, 6, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 13, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 9, 2]
-    # test case 2: start is not detected
-    #y = [ 0, 2, 145, 0, 0, 1, 101, 45, 0, 1, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
-    # test case 2.5: start not detected (unfinished)
-    #y = [39, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 28, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 0, 1, 2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1]
-    # test case 3: last peak not detected (unfinished)
-    #y = [36, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 17, 0, 0, 0, 0, 0, 0, 33, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 5, 6, 1, 2, 2]
-    # test case 4: last peak undetected (finished)
-    #y = [1, 0, 0, 1, 3, 0, 1, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-    # test case 5: end peak is not detected
-    #y = [1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 2, 0]
-    # test case 6: end peak uncompleted again not detected:
-    #y = [8, 4, 0, 0, 0, 4, 0, 0, 5, 0, 0, 1, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2]
-    # test case 7: single peak not detected (conjoined)
-    #y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 207, 0, 53, 24, 0, 0, 0, 0, 0, 0, 0]
-    # test case 8: another single peak not detected
-    #y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-    # test case 9: single peak at the very end
-    #y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 294]
-    # test case 10: ramp up at end
-    #y = [0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 300, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 271, 272]
-    #print(y)
-    #y = [1, 0, 64, 2]
-    #y = [2, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 2, 3]
-    #y = [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
-
     if sum(frequencies) == 0:
         return []
     if len(frequencies) == 1:  # Artificially augmenting list for a single action to be picked up

diff --git a/test-scripts/compare-ag-dirs.sh b/test-scripts/compare-ag-dirs.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# Compares two directories with AGs (presumably, generated by original and modified SAGE algorithm respectively).
+# Prints how many AGs are present in each directory, how many AGs are present in both directories,
+#   and how many AGs are present only in the first and only in the second directories.
+# If at least one AG is present in only in one of the directories, it is also printed.
+#
+# NB! This script is based on .dot files, which are by default deleted during the execution of SAGE.
+#      To prevent the deletion, use the --keep-files option when running SAGE.
+
+set -euo pipefail
+IFS=$'\n\t'
+
+umask 077
+
+function usage(){
+    echo "Usage: $0 path/to/AGs path/to/AGs"
+}
+
+# Check if exactly two arguments are provided
+[[ $# -ne 2 ]] && { usage >&2 ; exit 1; }
+
+# Check if all input directories exist
+original=$(echo $1/ | tr -s '/')
+modified=$(echo $2/ | tr -s '/')
+! [[ -d "$original" ]] && { echo "$0: directory $original does not exist" >&2 ; exit 1 ; }
+! [[ -d "$modified" ]] && { echo "$0: directory $modified does not exist" >&2 ; exit 1 ; }
+
+# Find the generated AGs for each input directory
+original_ags=$(find "$original" -type f -name '*.dot' -printf '%f\n' | sed 's/^.*attack-graph-for-victim-\(.*\)$/\1/' | sed 's/\.dot$//' | sort)
+modified_ags=$(find "$modified" -type f -name '*.dot' -printf '%f\n' | sed 's/^.*attack-graph-for-victim-\(.*\)$/\1/' | sed 's/\.dot$//' | sort)
+
+# Print the number of generated attack graphs for each input directory
+echo "Total number of AGs generated by the original algorithm: $(echo -e "$original_ags" | wc -l)"
+echo "Total number of AGs generated by the modified algorithm: $(echo -e "$modified_ags" | wc -l)"
+
+# Print the number of AGs generated by both algorithms (comment out `| wc -l` if you want to see which AGs these are)
+echo -n "Number of AGs generated both by the original and the modified algorithms: "
+comm -12 <(echo -e "$original_ags") <(echo -e "$modified_ags") | wc -l
+
+# Print the number of attack graphs generated only by the original algorithm (and the AGs if there is at least one)
+echo -n "Number of AGs generated only by the original algorithm: "
+only_original=$(comm -23 <(echo -e "$original_ags") <(echo -e "$modified_ags") | wc -l)
+echo "$only_original"
+if [[ $only_original -ne 0 ]]; then
+    echo "AGs generated only by the original algorithm: "
+    comm -23 <(echo -e "$original_ags") <(echo -e "$modified_ags")
+fi
+
+# Print the number of attack graphs generated only by the modified algorithm (and the AGs if there is at least one)
+echo -n "Number of AGs generated only by the modified algorithm: "
+only_modified=$(comm -13 <(echo -e "$original_ags") <(echo -e "$modified_ags") | wc -l)
+echo "$only_modified"
+if [[ $only_modified -ne 0 ]]; then
+    echo "AGs generated only by the modified algorithm: "
+    comm -13 <(echo -e "$original_ags") <(echo -e "$modified_ags")
+fi
+
+# Exit with 0 if the directories are the same in terms of the generated AGs, otherwise exit with 1
+if [[ "$only_original" -eq 0 ]] && [[ "$only_modified" -eq 0 ]]; then
+    exit 0
+else
+    exit 1
+fi
diff --git a/test-scripts/diff-ags.sh b/test-scripts/diff-ags.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Compares two AGs based on the nodes and edges that they have (presumably, AGs generated by the original and modified algorithms respectively).
+#
+# If two .dot files with AGs are provided, then they are compared with each other.
+#   - If running in quiet mode (i.e. option -q , inspired by the "quick diff"), the script:
+#       - Prints that the graphs are different if at least one node or edge has been found by only one of the algorithms (exit code 1)
+#       - Prints nothing if there are no nodes or edges found by only one of the algorithms (exit code 0)
+#   - If running in normal mode (i.e. with no options), then stats generated by `diff-nodes.sh` and `diff-edges.sh` are printed.
+# If two directories are provided, then every pair of corresponding AGs is compared against each other (recursively).
+#   - The AGs that are different are reported.
+#   - To count the number of different AGs in the input directories, pipe the output to `wc` command (e.g. `./diff-ags.sh orig-2017AGs/ merged-sinks-2017AGs/ | wc -l`).
+#   - NB! Only the AGs present in both directories are compared. The ones that are present only in one of the directories are ignored.
+#
+# Note: The comparison is purely based on the node and edges of the graphs (their names and labels, to be precise).
+#       See also comments for `diff-nodes.sh` and `diff-edges.sh`.
+#
+# NB! This script assumes that the directory with the AGs has the name ExpNameAGs/ and that the AG files also have ExpName in their name (i.e. the name has not been changed).
+#     To avoid potential errors, please only use alphanumeric characters in the name of the experiments (i.e. no whitespaces, enters etc.).
+#
+# NB! This script is based on .dot files, which are by default deleted during the execution of SAGE.
+#      To prevent the deletion, use the --keep-files option when running SAGE.
+
+set -euo pipefail
+IFS=$'\n\t'
+
+umask 077
+
+usage() {
+    echo -e "Usage: $0 [-i] [-q] originalAG.dot modifiedAG.dot\n       $0 [-i] originalAGs/ modifiedAGs/\n
+        -i\tremove node IDs when comparing the attack graphs
+        -q\treport only when attack graphs differ (do not print differences; no effect when comparing directories with the attack graphs)"
+}
+
+mode="normal"
+keep_ids="true"
+if [[ $# -ge 2 ]] && [[ $# -le 4 ]]; then
+    num_options=0
+    while getopts "qi" option; do
+        case ${option} in
+            q ) mode="quiet" ; num_options=$((num_options + 1)) ;;
+            i ) keep_ids="false" ; num_options=$((num_options + 1)) ;;  # Sort of "insensitive [to IDs]"
+            \? ) { usage >&2; exit 1; } ;;
+        esac
+    done
+
+    pos_original=$((1 + num_options))
+    pos_modified=$((2 + num_options))
+    original="${@:pos_original:1}"
+    modified="${@:pos_modified:1}"
+# The number of arguments can only be two, three or four
+else
+    usage >&2
+    exit 1
+fi
+
+# Options for the ./diff-nodes.sh and ./diff-edges.sh scripts
+opt_mode=""
+opt_keep_ids=""
+[[ "$mode" == "quiet" ]] && opt_mode="-q"
+[[ "$keep_ids" == "false" ]] && opt_keep_ids="-i"
+
+# Comparing two .dot files with AGs
+if [[ "${original##*.}" == "dot" ]] && [[ "${modified##*.}" == "dot" ]]; then
+    # Check that both files exist
+    ! [[ -f "$original" ]] && { echo "$0: file $original does not exist" >&2 ; exit 1 ; }
+    ! [[ -f "$modified" ]] && { echo "$0: file $modified does not exist" >&2 ; exit 1 ; }
+
+    # When running in quiet mode, report if the graphs are different or exit quietly if they are the same
+    if [[ "$mode" == "quiet" ]]; then
+        ./diff-nodes.sh $opt_mode $opt_keep_ids "$original" "$modified" 1> /dev/null && ./diff-edges.sh $opt_mode $opt_keep_ids "$original" "$modified" 1> /dev/null || { echo "Attack graphs $original and $modified are different" ; exit 1 ; }
+    # When running in normal mode, show the common nodes and edges, and nodes and edges that are present in only one of the graphs (and their counts)
+    else
+        ./diff-nodes.sh $opt_mode $opt_keep_ids "$original" "$modified"
+        echo "--------------------------------"
+        ./diff-edges.sh $opt_mode $opt_keep_ids "$original" "$modified"
+    fi
+# Comparing two directories with .dot files with AGs
+elif [[ -d "$original" ]] && [[ -d "$modified" ]]; then
+    opt_mode="-q"
+    # Check if the input directories exist
+    original=$(echo "$original/" | tr -s '/')
+    modified=$(echo "$modified/" | tr -s '/')
+    ! [[ -d "$original" ]] && { echo "$0: directory $original does not exist" >&2 ; exit 1 ; }
+    ! [[ -d "$modified" ]] && { echo "$0: directory $modified does not exist" >&2 ; exit 1 ; }
+
+    # Resolve experiment names (i.e. remove the `AGs` part from the names of the directories)
+    exp_original=$(echo "$original" | sed 's@AGs/@@')
+    exp_modified=$(echo "$modified" | sed 's@AGs/@@')
+
+    # This is the prefix used in every AG file name (ExpName-prefix-victim-mcatmserv)
+    prefix="-attack-graph-for-victim-"
+
+    # Actually compare the corresponding AGs in the input directories 
+    # First, find the AGs that are present in both directories (compare the file names without the experiment name and the prefix)
+    comm -12 <(find "$original" -type f -name '*.dot' -printf '%f\n' | sed 's@'"${exp_original}\.txt${prefix}"'@@' | sort)\
+             <(find "$modified" -type f -name '*.dot' -printf '%f\n' | sed 's@'"${exp_modified}\.txt${prefix}"'@@' | sort) |
+             sed 's@^\(.*\)$@./diff-ags.sh '"$opt_mode $opt_keep_ids ${original}${exp_original}\.txt${prefix}"'\1 '"${modified}${exp_modified}\.txt${prefix}"'\1@' |     # Create the commands for the shell
+         sh |                                                                                                                                                            # Execute these commands 
+         sed 's/^.*'"$prefix"'\(.*\)\.dot.*$/\1/' | sed 's/^\([0-9.]\+\)-/\1|/' | sed 's/\([A-Z]\+\)/\1|/'  # This line just creares consistent name (victim|mcat|mserv), instead of the default message
+else
+   usage >&2
+   exit 1 
+fi
+