From b1831bb31c0466287a5a855debb03b57e99a8f29 Mon Sep 17 00:00:00 2001
From: Soren Ptak <ptaksoren@gmail.com>
Date: Tue, 15 Aug 2023 12:34:12 -0700
Subject: [PATCH] Swap to cspell for spelling action

---
 spellings/action.yml                       | 114 +++++++++++++---
 spellings/cspell.config.yaml               |  22 +++
 spellings/getFiles                         | 122 +++++++++++++++++
 spellings/tools/README.md                  |  30 ----
 spellings/tools/ablexicon                  |  88 ------------
 spellings/tools/extract-comments           |  41 ------
 spellings/tools/find-unknown-comment-words | 152 ---------------------
 7 files changed, 238 insertions(+), 331 deletions(-)
 create mode 100644 spellings/cspell.config.yaml
 create mode 100755 spellings/getFiles
 delete mode 100644 spellings/tools/README.md
 delete mode 100755 spellings/tools/ablexicon
 delete mode 100755 spellings/tools/extract-comments
 delete mode 100755 spellings/tools/find-unknown-comment-words

diff --git a/spellings/action.yml b/spellings/action.yml
index a4e7e291..fda2f100 100644
--- a/spellings/action.yml
+++ b/spellings/action.yml
@@ -2,28 +2,102 @@ name: 'spellings'
 description: 'CI spellings check'
 inputs:
   path:
-    description: 'Path to repository folder to check spellings in.'
+    description: 'Path to repository folder to run formatting check for.'
     required: false
     default: ./
+  exclude-files:
+    description: 'List of comma-separated files to exclude from trailing whitespace check. Eg file1,file2'
+    required: false
+    default: ''
+  exclude-dirs:
+    description: 'List of comma-separated directories to exclude from trailing whitespace formatting check. Eg docs,build'
+    required: false
+    default: ''
+  include-extensions:
+    description: 'List of comma-separated file endings to include into the spell check. Eg md, dox, build'
+    required: false
+    default: ''
+
 runs:
   using: "composite"
   steps:
-      - name: Install spell
-        run: |
-          sudo apt-get install spell
-          sudo apt-get install util-linux
-        shell: bash
-      - name: Check spelling
-        working-directory: ${{ inputs.path }}
-        run: |
-          PATH=$PATH:$GITHUB_ACTION_PATH/tools
-          for lexfile in `find ./ -name lexicon.txt`
-          do dir=${lexfile%/lexicon.txt}
-            echo $dir
-            find-unknown-comment-words --directory $dir
-            if [ $? -ne "0" ]
-            then
-              exit 1
-            fi
-          done
-        shell: bash
+    - env:
+        stepName: Install Spell
+        bashPass: \033[32;1mPASSED -
+        bashInfo: \033[33;1mINFO -
+        bashFail: \033[31;1mFAILED -
+        bashEnd:  \033[0m
+      name: ${{ env.stepName }}
+      id: spell-checker-setup
+      shell: bash
+      run: |
+        #  ${{ env.stepName }}
+        echo "::group::${{ env.stepName }}"
+        sudo apt-get install spell -y
+        sudo apt-get install util-linux -y
+        sudo apt-get install fd-find -y
+
+        # This is a 1000+ line log for the install
+        # Wrap it in an echo group
+        echo "::group::NPM Install"
+        sudo apt-get install npm -y
+        sudo npm install -g cspell
+        echo "::endgroup::"
+
+        echo "$GITHUB_ACTION_PATH" >> $GITHUB_PATH
+        export PATH="$PATH:$GITHUB_ACTION_PATH"
+
+        # cp has an error code if it's asked to copy to the same file
+        # Wrap it in a set +e in case the input path is the working directory
+        set +e
+        cp $GITHUB_ACTION_PATH/cspell.config.yaml ${{ inputs.path }}
+        set -e
+        echo "::endgroup::"
+
+        # Make sure we have all the commands we need.
+        echo -e "${{ env.bashInfo }} which getFiles ${{ env.bashEnd }}"
+        echo -e "${{ env.bashInfo }} fdfind --version ${{ env.bashEnd }}"
+        echo -e "${{ env.bashInfo }} cspell --version ${{ env.bashEnd }}"
+
+        # Only get to here if everything above passes
+        echo -e "${{ env.bashPass }} ${{ env.stepName }} ${{ env.bashEnd }}"
+
+    - env:
+        bashPass: \033[32;1mPASSED -
+        bashInfo: \033[33;1mINFO -
+        bashFail: \033[31;1mFAILED -
+        bashEnd:  \033[0m
+        stepName: Check Spelling
+      name: ${{ env.stepName}}
+      id: run-spell-checker
+      working-directory: ${{ inputs.path }}
+      shell: bash
+      run: |
+        #  ${{ env.stepName }}
+        echo "::group::${{ env.stepName }}"
+
+        # Add helper script to path
+        export PATH="$PATH:$GITHUB_ACTION_PATH"
+
+        # Get all files
+        files=$(getFiles --exclude-dirs="${{ inputs.exclude-dirs }}" --exclude-files="${{ inputs.exclude-files }}" --include-extensions="${{ inputs.include-extensions}}")
+
+        # Wrap the check in set +e so it runs against all files
+        set +e
+        exitStatus=0
+        for file in ${files[@]}; do
+          cspell --language-id C --color -c cspell.config.yaml --show-suggestions --show-context "$file"
+          if ! [ $? -eq 0 ]; then
+            exitStatus=1
+          fi
+        done
+        set -e
+
+        echo "::endgroup::"
+        if [ $exitStatus -eq 0 ]; then
+          echo -e "${{ env.bashPass }} ${{ env.stepName }} ${{ env.bashEnd }}"
+        else
+          echo -e "${{ env.bashFail }} ${{ env.stepName }} ${{ env.bashEnd }}"
+          exit 1
+        fi
+
diff --git a/spellings/cspell.config.yaml b/spellings/cspell.config.yaml
new file mode 100644
index 00000000..5b0c7270
--- /dev/null
+++ b/spellings/cspell.config.yaml
@@ -0,0 +1,22 @@
+---
+$schema: https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json
+version: '0.2'
+# Allows things like stringLength
+allowCompoundWords: true
+useGitignore: true
+# Could split this up? And do a dictionary for each repo?
+# But feel like if this isn't super slow
+# That having just one single dictionary might be nicer?
+dictionaryDefinitions:
+  - name: freertos-words
+    path: '.cSpellWords.txt'
+    addWords: true
+dictionaries:
+  - freertos-words
+ignorePaths:
+  - 'node_modules'
+  - '.cSpellWords.txt'
+  - 'dependency'
+  - 'docs'
+  - 'ThirdParty'
+
diff --git a/spellings/getFiles b/spellings/getFiles
new file mode 100755
index 00000000..f43e06ac
--- /dev/null
+++ b/spellings/getFiles
@@ -0,0 +1,122 @@
+#!/bin/bash
+bashPass="\033[32;1mPASSED -"
+bashInfo="\033[33;1mINFO -"
+bashFail="\033[31;1mFAILED -"
+bashEnd="\033[0m"
+
+# Check number of arguments
+files=""
+file=""
+excludeDirs=""
+excludeFiles=""
+includeExtensions=""
+
+# Make the assumption that Mac users will be using FreeBSD Get Opt
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    while [ $# -gt 0 ]; do
+    case "$1" in
+        -ed= | --exclude-dirs=* )
+            excludeDirs="-E $(echo "${1#*=}" | sed -E 's/,/ -E /g')"
+            shift
+            ;;
+        -ef= | --exclude-files=* )
+            excludeFiles="-E $(echo "${1#*=}" | sed -E 's/,/ -E /g')"
+            shift
+            ;;
+        -ie= | --include-extensions=* )
+            includeExtensions="-e $(echo "${1#*=}" | sed -E 's/,/ -e /g')"
+            shift
+            ;;
+        -h | --help )
+                echo -e "$bashInfo Find all .c and .h files with the Amazon copyright in them $bashEnd"
+                echo -e "$bashInfo It exports this to a bash array variable called \"files\" $bashEnd"
+                echo -e "$bashInfo This script can take in two optional arguments $bashEnd"
+                echo -e "$bashInfo -ef= | --exclude-files=:       A comma seperated list of files to exclude $bashEnd"
+                echo -e "$bashInfo -ed= | --exclude-dir=:         A comma seperated list of directories to exclude $bashEnd"
+                echo -e "$bashInfo -ie= | --include-extensions=:  Any additional exstensions to search for $bashEnd"
+                exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+        esac
+    done
+    # For FreeBSD (default MacOS shell) assume that you have fd
+    # By default only grab the c and h files. Only grab the files with amazon copyright
+    # Put all the files in an array. Uncomment the following line to see the command run.
+    # echo "fd -e c -e h "$excludeDirs" "$excludeFiles" "$includeExtensions" --exec grep -lie \"copyright (.*) 20[0-9]{2} amazon.com\" "
+    # Need this in a set +/- e so it doesn't return an error on files that fail the grep
+    set +e
+    files=$(fd -e c -e h $excludeDirs $excludeFiles $includeExtensions --exec grep -lie "copyright (.*) 20\d\d amazon.com")
+    set -e
+
+# Making the assumption that anybody else running this is a gnu getopt user
+else
+    VALID_ARGS=$(getopt -o h,ed:,ef:,if: --long help,exclude-dirs:,exclude-files:,include-extensions: -- "$@")
+    eval set -- "$VALID_ARGS"
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            ed | --exclude-dirs )
+                # $2 Holds the argument passed after --exclude-files or --ed
+                # Use sed to replace the commas with the exclude flag
+                if ! [ -z "$2" ]; then
+                    excludeDirs="-E $(echo "$2" | sed -r 's/,/ -E /g' )"
+                fi
+                shift 2
+                ;;
+
+            ef | --exclude-files )
+                # $2 Holds the argument passed after --exclude-files or --ed
+                # Use sed to replace the commas with the exclude flag
+                if ! [ -z "$2" ]; then
+                    excludeFiles="-E $( echo "$2" | sed -r 's/,/ -E /g' )"
+                fi
+                shift 2
+                ;;
+
+            if | --include-extensions )
+                # $2 Holds the argument passed after --exclude-files or --ed
+                # Use sed to replace the commas with the exclude flag
+                if ! [ -z "$2" ]; then
+                    includeExtensions="-e $( echo "$2" | sed -r 's/,/ -E /g' )"
+                fi
+                shift 2
+                ;;
+
+            h | --help )
+                echo -e "$bashInfo Find all .c and .h files with the Amazon copyright in them $bashEnd"
+                echo -e "$bashInfo It exports this to a bash array variable called \"files\" $bashEnd"
+                echo -e "$bashInfo This script can take in two optional arguments $bashEnd"
+                echo -e "$bashInfo --exclude-files:       A comma seperated list of files to exclude $bashEnd"
+                echo -e "$bashInfo --exclude-dir:         A comma seperated list of directories to exclude $bashEnd"
+                echo -e "$bashInfo --include-extensions:  Any additional exstensions to search for $bashEnd"
+                exit 0
+                ;;
+            -- )
+                shift
+                break
+                ;;
+            esac
+    done
+    # Make the assumption that gnu getopt means you're going to use fdfind vs fd
+    # By default only grab the c and h files. Only grab the files with amazon copyright
+    # Put all the files in an array. Uncomment the following line to see the command run.
+    # echo "fdfind -e c -e h "$excludeDirs" "$excludeFiles" "$includeExtensions" --exec grep -liE \"copyright (.*) 20[0-9]{2} amazon.com\" "
+    # Need this in a set +/- e so it doesn't return an error on files that fail the grep
+    set +e
+    files=$(fdfind -e c -e h $excludeDirs $excludeFiles $includeExtensions --exec grep -liE "copyright (.*) 20[0-9]{2} amazon.com" )
+    set -e
+fi
+
+# For future FreeBSD users:
+# By default if you try and capture this output like you see on the github workflows
+# By doing something like files=$(getFiles <ARGS>) it will look like it doesn't work
+# This is because on a Mac's default shell this will expand to a single variable
+# But on ubuntu's default shell it will expand to an array that can be iterated over
+# My recommendation would be to uncomment the fd command above and capture that
+for file in ${files[@]}; do
+    if ! [ -z "$file" ]; then
+        echo "$file"
+    fi
+done
diff --git a/spellings/tools/README.md b/spellings/tools/README.md
deleted file mode 100644
index b0bd89aa..00000000
--- a/spellings/tools/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Pre-requisites to running the spell check scripts
-
-1. In your GNU environment, install the *spell* and *getopt* programs. Use the following commands in Debian distributions, to install the packages (*getopt* is part of the `util-linux` package):
-   ```shell
-   apt-get install spell
-   apt-get install util-linux
-   ```
-
-1. Add the folder containing the **spellings/tools/ablexicon**, **spellings/tools/extract-comments**, and **spellings/tools/find-unknown-comment-words** scripts to your system's PATH.
-   ```shell
-   export PATH=<REPO_ROOT>/spellings/tools:$PATH
-   ```
-
-# How to create a lexicon.txt for a new library.
-
-1. Ensure there does not exist a file called "lexicon.txt" in your library's root directory. Run the following command to create a lexicon.txt for your library:
-   ```shell
-   find-unknown-comment-words -d /path/to/your/library/root > /path/to/your/library/root/lexicon.txt
-   ```
-
-1. Check the contents of */path/to/your/library/root/lexicon.txt* for any misspelled words. Fix them in your library's source code and delete them from the lexicon.txt.
-
-# How to run for changes to an existing library.
-
-1. If there exists a lexicon.txt in the library's root directory, run the following command:
-   ```shell
-   find-unknown-comment-words -d /path/to/your/library/root/lexicon.txt
-   ```
-
-1. Add any non-dictionary correctly spelled words to */path/to/your/library/root/lexicon.txt*. Fix any misspelled words in your code comment change.
diff --git a/spellings/tools/ablexicon b/spellings/tools/ablexicon
deleted file mode 100755
index de790a93..00000000
--- a/spellings/tools/ablexicon
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-#
-# ablexicon - Compare an input list of words against a dictionary and
-# optional lexicon.  If any words are in neither the dictionary nor the
-# lexicon, log them to stdout.
-#
-set -e
-set -f
-
-function usage () {
-    echo "Find occurrences of non-dictionary/lexicon words"
-    echo ""
-    echo "Usage:"
-    echo " ${0##*/} [options]"
-    echo ""
-    echo "Options:"
-    echo " -f, --file         source text (defaults to /dev/fd/0)"
-    echo " -l, --lexicon      lexicon file (one word per line)"
-    echo " -h, --help         display this help"
-    exit 1
-}
-
-#
-# Verify that required commands are present
-#
-REQUIRED=( "spell" "getopt" )
-for i in "${REQUIRED[@]}"
-do
-    command -v $i"" >/dev/null
-    if [ $? -ne "0" ]
-    then
-        echo "'"$i"' must be installed, exiting...">&2
-        exit 1
-    fi
-done
-
-GETOPT_OUT=`getopt -o hf:l: --long help,file:,lexicon: -n "${0##*/}" -- "$@"`
-if [ $? != 0 ]
-then
-    echo "Exiting..." >&2
-    exit 1
-fi
-
-eval set -- "$GETOPT_OUT"
-
-INFILE=/dev/fd/0
-LEXICON=/dev/null
-while true; do
-  case "$1" in
-    -h | --help ) usage $0 ;;
-    -f | --file ) INFILE="$2"; shift 2 ;;
-    -l | --lexicon ) LEXICON="$2"; shift 2 ;;
-    -- ) shift; break ;;
-    * ) break ;;
-  esac
-done
-
-if [ ! -f $INFILE"" ] && [ $INFILE"" != /dev/fd/0 ]
-then
-    echo "Invalid input file"
-    usage
-fi
-
-#
-# Search for all input words, sort them removing duplicate words in
-# the process and then find them in the dictionary.
-#
-for word in `cat $INFILE"" | sort -u | spell`
-do
-    #
-    # Search for each remaining word in the lexicon
-    #
-    if [ $LEXICON"" != /dev/null ]
-    then
-        if ! grep -w -q "$word" $LEXICON""
-        then
-            #
-            # The word is neither in the dictionary nor the lexicon, send
-            # it to stdout.
-            #
-            echo $word
-        fi
-    else
-        # if the lexicon is /dev/null i.e. it is not provided by the user,
-        # then we should just echo the word.
-        echo $word
-    fi
-done
diff --git a/spellings/tools/extract-comments b/spellings/tools/extract-comments
deleted file mode 100755
index f52cb303..00000000
--- a/spellings/tools/extract-comments
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-#
-# Extract comments from C/C++ files
-#
-set -e
-set -f
-
-function usage () {
-    echo "Extract comments from C/C++ files"
-    echo ""
-    echo "usage: "${0##*/}" file-list"
-    exit 1
-}
-
-if [ $# -lt 1 ]
-then
-	usage $0
-fi
-
-if [ $1 = "-h" ] || [ $1 == "--help" ]
-then
-    usage $0
-fi
-
-while test $# -gt 0
-do
-    if [ ! -f $1 ]
-    then
-        echo $0": '"$1"' is not a file." 2>/dev/null
-        exit 1
-    fi
-#
-# Extract all words from C/C++ language comments; add line
-# numbers to aid in searching.
-#
-# NOTE: This has some limitations.  For example, it prints
-# non-comment text at the beginning of a comment line.
-#
-    nl -ba $1 | awk '/\/\// {print $0}; /\/\*/ {comment=1}; {if(comment) print $0}; /\*\// {comment=0}'
-    shift
-done
diff --git a/spellings/tools/find-unknown-comment-words b/spellings/tools/find-unknown-comment-words
deleted file mode 100755
index 228cb65c..00000000
--- a/spellings/tools/find-unknown-comment-words
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/bin/bash
-#
-# Locate unknown words in C/C++ comments.  Uses "extract-comments"
-# and "ablexicon" scripts.
-#
-set -o nounset
-set -o pipefail
-set -o errexit
-set -f
-
-BLUE="\e[1;34m"
-GREEN="\e[1;32m"
-DEFAULTFG="\e[39m"
-
-function usage () {
-    echo "Find unknown words in C/C++ comments"
-    echo ""
-    echo "Usage:"
-    echo " ${0##*/} [options]"
-    echo ""
-    echo "Options:"
-    echo " -d, --directory    directory to scan (defaults to .)"
-    echo " -l, --lexicon      lexicon file (one word per line, default 'lexicon.txt')"
-    echo " -t, --terse        terse output only (enabled if no lexicon available)"
-    echo " -h, --help         display this help"
-    exit 1
-}
-
-#
-# Verify that required commands are present
-#
-REQUIRED=( "extract-comments" "ablexicon" "getopt" )
-for i in "${REQUIRED[@]}"
-do
-    command -v $i"" >/dev/null
-    if [ $? -ne "0" ]
-    then
-        echo "Can't find '"$i"' , exiting...">&2
-        exit 1
-    fi
-done
-
-GETOPT_OUT=`getopt -o htd:l: --long help,terse,directory:,lexicon: -n "${0##*/}" -- "$@"`
-if [ $? != 0 ]
-then
-    echo "Exiting..." >&2
-    exit 1
-fi
-
-eval set -- "$GETOPT_OUT"
-
-DIRNAME=/dev/fd/0
-LEXICON=
-STATUS=
-TERSE=
-while true; do
-  case "$1" in
-    -h | --help ) usage $0 ;;
-    -t | --terse ) TERSE=1; shift ;;
-    -d | --directory ) DIRNAME="$2"; shift 2 ;;
-    -l | --lexicon ) LEXICON="$2"; shift 2 ;;
-    -- ) shift; break ;;
-    * ) break ;;
-  esac
-done
-
-if [ ! -d $DIRNAME"" ]
-then
-    echo "Invalid directory: "$DIRNAME
-    usage
-fi
-
-if [ $LEXICON"" = "" ]
-then
-    if [ -f $DIRNAME/lexicon.txt ]
-    then
-        LEXICON=$DIRNAME/lexicon.txt
-    else
-        LEXICON=/dev/null
-        TERSE=1
-    fi
-fi
-
-TMPFILE=${0##*/}-$USER-$RANDOM
-
-unknowns=( "not-used" )     # get around empty array with nounset
-# Symlinks will be ignored in this spell check. `-type f` switch in `find` command will ignore symlinks.
-
-extract-comments  `find $DIRNAME \( -iname \*.[ch] -o -iname \*.dox \) -type f` |
-    tr [:upper:] [:lower:] |
-    grep -o -E '[a-zA-Z]+' |
-    ablexicon -l $LEXICON > $TMPFILE
-
-readarray -O 1 -t unknowns < $TMPFILE
-rm -f $TMPFILE
-
-for word in "${unknowns[@]}"
-do
-    if [ $word"" == "not-used" ]
-	then
-        continue
-	fi
-
-    if [ $TERSE"" != "" ]
-    then
-        echo $word
-        continue
-    fi
-
-    # Symlinks will be ignored in this spell check. `-type f` switch in `find` command will ignore symlinks.
-    for file in `find $DIRNAME \( -iname \*.[ch] -o -iname \*.dox \) -type f`
-    do
-        if [[ $file == *"third_party"*  || $file == *"CMock"* ]]
-        then
-            continue
-        fi
-        # Disable errexit here, extract-comments can return non-zero
-        set +e
-        #
-        # A little inefficient here; we will grep twice, once to detect
-        # the unknown word and another to print it with color highlighting.
-        # If there's a way to preserve ANSI color output with the first
-        # search and reuse it within the if statement (I gave up trying
-        # to find one after a few minutes), that would be nice.
-        #
-        extract-comments $file | grep -iw $word > /dev/null
-        if [ $? == "0" ]
-        then
-            if [ $STATUS"" != "1"  ]
-            then
-                echo -e $GREEN"############################################################################"$DEFAULTFG
-                echo -e $GREEN"#"$DEFAULTFG
-                echo -e $GREEN"#  Unknown word(s) found.  Please either correct the spelling or add them"$DEFAULTFG
-                echo -e $GREEN"#  to the lexicon file '"$LEXICON"'".$DEFAULTFG
-                echo -e $GREEN"#"$DEFAULTFG
-                echo -e $GREEN"############################################################################"$DEFAULTFG
-                STATUS=1  # Return non-zero status if any unidentified words are found
-            fi
-            echo ""
-            echo -e $BLUE$file$DEFAULTFG
-            echo ""
-            extract-comments $file | grep --color=always -iw $word | GREP_COLORS="mt=01;32" grep --color=always -E -e '^[ \t]*[0-9]+'
-        fi
-        # Re-enable errexit
-        set -o errexit
-    done
-done
-
-if [ $STATUS"" = "1" ]
-then
-    exit 1
-fi