From b1831bb31c0466287a5a855debb03b57e99a8f29 Mon Sep 17 00:00:00 2001 From: Soren Ptak Date: Tue, 15 Aug 2023 12:34:12 -0700 Subject: [PATCH] Swap to cspell for spelling action --- spellings/action.yml | 114 +++++++++++++--- spellings/cspell.config.yaml | 22 +++ spellings/getFiles | 122 +++++++++++++++++ spellings/tools/README.md | 30 ---- spellings/tools/ablexicon | 88 ------------ spellings/tools/extract-comments | 41 ------ spellings/tools/find-unknown-comment-words | 152 --------------------- 7 files changed, 238 insertions(+), 331 deletions(-) create mode 100644 spellings/cspell.config.yaml create mode 100755 spellings/getFiles delete mode 100644 spellings/tools/README.md delete mode 100755 spellings/tools/ablexicon delete mode 100755 spellings/tools/extract-comments delete mode 100755 spellings/tools/find-unknown-comment-words diff --git a/spellings/action.yml b/spellings/action.yml index a4e7e291..fda2f100 100644 --- a/spellings/action.yml +++ b/spellings/action.yml @@ -2,28 +2,102 @@ name: 'spellings' description: 'CI spellings check' inputs: path: - description: 'Path to repository folder to check spellings in.' + description: 'Path to repository folder to run formatting check for.' required: false default: ./ + exclude-files: + description: 'List of comma-separated files to exclude from trailing whitespace check. Eg file1,file2' + required: false + default: '' + exclude-dirs: + description: 'List of comma-separated directories to exclude from trailing whitespace formatting check. Eg docs,build' + required: false + default: '' + include-extensions: + description: 'List of comma-separated file endings to include into the spell check. Eg md, dox, build' + required: false + default: '' + runs: using: "composite" steps: - - name: Install spell - run: | - sudo apt-get install spell - sudo apt-get install util-linux - shell: bash - - name: Check spelling - working-directory: ${{ inputs.path }} - run: | - PATH=$PATH:$GITHUB_ACTION_PATH/tools - for lexfile in `find ./ -name lexicon.txt` - do dir=${lexfile%/lexicon.txt} - echo $dir - find-unknown-comment-words --directory $dir - if [ $? -ne "0" ] - then - exit 1 - fi - done - shell: bash + - env: + stepName: Install Spell + bashPass: \033[32;1mPASSED - + bashInfo: \033[33;1mINFO - + bashFail: \033[31;1mFAILED - + bashEnd: \033[0m + name: ${{ env.stepName }} + id: spell-checker-setup + shell: bash + run: | + # ${{ env.stepName }} + echo "::group::${{ env.stepName }}" + sudo apt-get install spell -y + sudo apt-get install util-linux -y + sudo apt-get install fd-find -y + + # This is a 1000+ line log for the install + # Wrap it in an echo group + echo "::group::NPM Install" + sudo apt-get install npm -y + sudo npm install -g cspell + echo "::endgroup::" + + echo "$GITHUB_ACTION_PATH" >> $GITHUB_PATH + export PATH="$PATH:$GITHUB_ACTION_PATH" + + # cp has an error code if it's asked to copy to the same file + # Wrap it in a set +e in case the input path is the working directory + set +e + cp $GITHUB_ACTION_PATH/cspell.config.yaml ${{ inputs.path }} + set -e + echo "::endgroup::" + + # Make sure we have all the commands we need. + echo -e "${{ env.bashInfo }} which getFiles ${{ env.bashEnd }}" + echo -e "${{ env.bashInfo }} fdfind --version ${{ env.bashEnd }}" + echo -e "${{ env.bashInfo }} cspell --version ${{ env.bashEnd }}" + + # Only get to here if everything above passes + echo -e "${{ env.bashPass }} ${{ env.stepName }} ${{ env.bashEnd }}" + + - env: + bashPass: \033[32;1mPASSED - + bashInfo: \033[33;1mINFO - + bashFail: \033[31;1mFAILED - + bashEnd: \033[0m + stepName: Check Spelling + name: ${{ env.stepName}} + id: run-spell-checker + working-directory: ${{ inputs.path }} + shell: bash + run: | + # ${{ env.stepName }} + echo "::group::${{ env.stepName }}" + + # Add helper script to path + export PATH="$PATH:$GITHUB_ACTION_PATH" + + # Get all files + files=$(getFiles --exclude-dirs="${{ inputs.exclude-dirs }}" --exclude-files="${{ inputs.exclude-files }}" --include-extensions="${{ inputs.include-extensions}}") + + # Wrap the check in set +e so it runs against all files + set +e + exitStatus=0 + for file in ${files[@]}; do + cspell --language-id C --color -c cspell.config.yaml --show-suggestions --show-context "$file" + if ! [ $? -eq 0 ]; then + exitStatus=1 + fi + done + set -e + + echo "::endgroup::" + if [ $exitStatus -eq 0 ]; then + echo -e "${{ env.bashPass }} ${{ env.stepName }} ${{ env.bashEnd }}" + else + echo -e "${{ env.bashFail }} ${{ env.stepName }} ${{ env.bashEnd }}" + exit 1 + fi + diff --git a/spellings/cspell.config.yaml b/spellings/cspell.config.yaml new file mode 100644 index 00000000..5b0c7270 --- /dev/null +++ b/spellings/cspell.config.yaml @@ -0,0 +1,22 @@ +--- +$schema: https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json +version: '0.2' +# Allows things like stringLength +allowCompoundWords: true +useGitignore: true +# Could split this up? And do a dictionary for each repo? +# But feel like if this isn't super slow +# That having just one single dictionary might be nicer? +dictionaryDefinitions: + - name: freertos-words + path: '.cSpellWords.txt' + addWords: true +dictionaries: + - freertos-words +ignorePaths: + - 'node_modules' + - '.cSpellWords.txt' + - 'dependency' + - 'docs' + - 'ThirdParty' + diff --git a/spellings/getFiles b/spellings/getFiles new file mode 100755 index 00000000..f43e06ac --- /dev/null +++ b/spellings/getFiles @@ -0,0 +1,122 @@ +#!/bin/bash +bashPass="\033[32;1mPASSED -" +bashInfo="\033[33;1mINFO -" +bashFail="\033[31;1mFAILED -" +bashEnd="\033[0m" + +# Check number of arguments +files="" +file="" +excludeDirs="" +excludeFiles="" +includeExtensions="" + +# Make the assumption that Mac users will be using FreeBSD Get Opt +if [[ "$OSTYPE" == "darwin"* ]]; then + while [ $# -gt 0 ]; do + case "$1" in + -ed= | --exclude-dirs=* ) + excludeDirs="-E $(echo "${1#*=}" | sed -E 's/,/ -E /g')" + shift + ;; + -ef= | --exclude-files=* ) + excludeFiles="-E $(echo "${1#*=}" | sed -E 's/,/ -E /g')" + shift + ;; + -ie= | --include-extensions=* ) + includeExtensions="-e $(echo "${1#*=}" | sed -E 's/,/ -e /g')" + shift + ;; + -h | --help ) + echo -e "$bashInfo Find all .c and .h files with the Amazon copyright in them $bashEnd" + echo -e "$bashInfo It exports this to a bash array variable called \"files\" $bashEnd" + echo -e "$bashInfo This script can take in two optional arguments $bashEnd" + echo -e "$bashInfo -ef= | --exclude-files=: A comma seperated list of files to exclude $bashEnd" + echo -e "$bashInfo -ed= | --exclude-dir=: A comma seperated list of directories to exclude $bashEnd" + echo -e "$bashInfo -ie= | --include-extensions=: Any additional exstensions to search for $bashEnd" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + done + # For FreeBSD (default MacOS shell) assume that you have fd + # By default only grab the c and h files. Only grab the files with amazon copyright + # Put all the files in an array. Uncomment the following line to see the command run. + # echo "fd -e c -e h "$excludeDirs" "$excludeFiles" "$includeExtensions" --exec grep -lie \"copyright (.*) 20[0-9]{2} amazon.com\" " + # Need this in a set +/- e so it doesn't return an error on files that fail the grep + set +e + files=$(fd -e c -e h $excludeDirs $excludeFiles $includeExtensions --exec grep -lie "copyright (.*) 20\d\d amazon.com") + set -e + +# Making the assumption that anybody else running this is a gnu getopt user +else + VALID_ARGS=$(getopt -o h,ed:,ef:,if: --long help,exclude-dirs:,exclude-files:,include-extensions: -- "$@") + eval set -- "$VALID_ARGS" + while [ $# -gt 0 ]; do + case "$1" in + ed | --exclude-dirs ) + # $2 Holds the argument passed after --exclude-files or --ed + # Use sed to replace the commas with the exclude flag + if ! [ -z "$2" ]; then + excludeDirs="-E $(echo "$2" | sed -r 's/,/ -E /g' )" + fi + shift 2 + ;; + + ef | --exclude-files ) + # $2 Holds the argument passed after --exclude-files or --ed + # Use sed to replace the commas with the exclude flag + if ! [ -z "$2" ]; then + excludeFiles="-E $( echo "$2" | sed -r 's/,/ -E /g' )" + fi + shift 2 + ;; + + if | --include-extensions ) + # $2 Holds the argument passed after --exclude-files or --ed + # Use sed to replace the commas with the exclude flag + if ! [ -z "$2" ]; then + includeExtensions="-e $( echo "$2" | sed -r 's/,/ -E /g' )" + fi + shift 2 + ;; + + h | --help ) + echo -e "$bashInfo Find all .c and .h files with the Amazon copyright in them $bashEnd" + echo -e "$bashInfo It exports this to a bash array variable called \"files\" $bashEnd" + echo -e "$bashInfo This script can take in two optional arguments $bashEnd" + echo -e "$bashInfo --exclude-files: A comma seperated list of files to exclude $bashEnd" + echo -e "$bashInfo --exclude-dir: A comma seperated list of directories to exclude $bashEnd" + echo -e "$bashInfo --include-extensions: Any additional exstensions to search for $bashEnd" + exit 0 + ;; + -- ) + shift + break + ;; + esac + done + # Make the assumption that gnu getopt means you're going to use fdfind vs fd + # By default only grab the c and h files. Only grab the files with amazon copyright + # Put all the files in an array. Uncomment the following line to see the command run. + # echo "fdfind -e c -e h "$excludeDirs" "$excludeFiles" "$includeExtensions" --exec grep -liE \"copyright (.*) 20[0-9]{2} amazon.com\" " + # Need this in a set +/- e so it doesn't return an error on files that fail the grep + set +e + files=$(fdfind -e c -e h $excludeDirs $excludeFiles $includeExtensions --exec grep -liE "copyright (.*) 20[0-9]{2} amazon.com" ) + set -e +fi + +# For future FreeBSD users: +# By default if you try and capture this output like you see on the github workflows +# By doing something like files=$(getFiles ) it will look like it doesn't work +# This is because on a Mac's default shell this will expand to a single variable +# But on ubuntu's default shell it will expand to an array that can be iterated over +# My recommendation would be to uncomment the fd command above and capture that +for file in ${files[@]}; do + if ! [ -z "$file" ]; then + echo "$file" + fi +done diff --git a/spellings/tools/README.md b/spellings/tools/README.md deleted file mode 100644 index b0bd89aa..00000000 --- a/spellings/tools/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# Pre-requisites to running the spell check scripts - -1. In your GNU environment, install the *spell* and *getopt* programs. Use the following commands in Debian distributions, to install the packages (*getopt* is part of the `util-linux` package): - ```shell - apt-get install spell - apt-get install util-linux - ``` - -1. Add the folder containing the **spellings/tools/ablexicon**, **spellings/tools/extract-comments**, and **spellings/tools/find-unknown-comment-words** scripts to your system's PATH. - ```shell - export PATH=/spellings/tools:$PATH - ``` - -# How to create a lexicon.txt for a new library. - -1. Ensure there does not exist a file called "lexicon.txt" in your library's root directory. Run the following command to create a lexicon.txt for your library: - ```shell - find-unknown-comment-words -d /path/to/your/library/root > /path/to/your/library/root/lexicon.txt - ``` - -1. Check the contents of */path/to/your/library/root/lexicon.txt* for any misspelled words. Fix them in your library's source code and delete them from the lexicon.txt. - -# How to run for changes to an existing library. - -1. If there exists a lexicon.txt in the library's root directory, run the following command: - ```shell - find-unknown-comment-words -d /path/to/your/library/root/lexicon.txt - ``` - -1. Add any non-dictionary correctly spelled words to */path/to/your/library/root/lexicon.txt*. Fix any misspelled words in your code comment change. diff --git a/spellings/tools/ablexicon b/spellings/tools/ablexicon deleted file mode 100755 index de790a93..00000000 --- a/spellings/tools/ablexicon +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash -# -# ablexicon - Compare an input list of words against a dictionary and -# optional lexicon. If any words are in neither the dictionary nor the -# lexicon, log them to stdout. -# -set -e -set -f - -function usage () { - echo "Find occurrences of non-dictionary/lexicon words" - echo "" - echo "Usage:" - echo " ${0##*/} [options]" - echo "" - echo "Options:" - echo " -f, --file source text (defaults to /dev/fd/0)" - echo " -l, --lexicon lexicon file (one word per line)" - echo " -h, --help display this help" - exit 1 -} - -# -# Verify that required commands are present -# -REQUIRED=( "spell" "getopt" ) -for i in "${REQUIRED[@]}" -do - command -v $i"" >/dev/null - if [ $? -ne "0" ] - then - echo "'"$i"' must be installed, exiting...">&2 - exit 1 - fi -done - -GETOPT_OUT=`getopt -o hf:l: --long help,file:,lexicon: -n "${0##*/}" -- "$@"` -if [ $? != 0 ] -then - echo "Exiting..." >&2 - exit 1 -fi - -eval set -- "$GETOPT_OUT" - -INFILE=/dev/fd/0 -LEXICON=/dev/null -while true; do - case "$1" in - -h | --help ) usage $0 ;; - -f | --file ) INFILE="$2"; shift 2 ;; - -l | --lexicon ) LEXICON="$2"; shift 2 ;; - -- ) shift; break ;; - * ) break ;; - esac -done - -if [ ! -f $INFILE"" ] && [ $INFILE"" != /dev/fd/0 ] -then - echo "Invalid input file" - usage -fi - -# -# Search for all input words, sort them removing duplicate words in -# the process and then find them in the dictionary. -# -for word in `cat $INFILE"" | sort -u | spell` -do - # - # Search for each remaining word in the lexicon - # - if [ $LEXICON"" != /dev/null ] - then - if ! grep -w -q "$word" $LEXICON"" - then - # - # The word is neither in the dictionary nor the lexicon, send - # it to stdout. - # - echo $word - fi - else - # if the lexicon is /dev/null i.e. it is not provided by the user, - # then we should just echo the word. - echo $word - fi -done diff --git a/spellings/tools/extract-comments b/spellings/tools/extract-comments deleted file mode 100755 index f52cb303..00000000 --- a/spellings/tools/extract-comments +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# -# Extract comments from C/C++ files -# -set -e -set -f - -function usage () { - echo "Extract comments from C/C++ files" - echo "" - echo "usage: "${0##*/}" file-list" - exit 1 -} - -if [ $# -lt 1 ] -then - usage $0 -fi - -if [ $1 = "-h" ] || [ $1 == "--help" ] -then - usage $0 -fi - -while test $# -gt 0 -do - if [ ! -f $1 ] - then - echo $0": '"$1"' is not a file." 2>/dev/null - exit 1 - fi -# -# Extract all words from C/C++ language comments; add line -# numbers to aid in searching. -# -# NOTE: This has some limitations. For example, it prints -# non-comment text at the beginning of a comment line. -# - nl -ba $1 | awk '/\/\// {print $0}; /\/\*/ {comment=1}; {if(comment) print $0}; /\*\// {comment=0}' - shift -done diff --git a/spellings/tools/find-unknown-comment-words b/spellings/tools/find-unknown-comment-words deleted file mode 100755 index 228cb65c..00000000 --- a/spellings/tools/find-unknown-comment-words +++ /dev/null @@ -1,152 +0,0 @@ -#!/bin/bash -# -# Locate unknown words in C/C++ comments. Uses "extract-comments" -# and "ablexicon" scripts. -# -set -o nounset -set -o pipefail -set -o errexit -set -f - -BLUE="\e[1;34m" -GREEN="\e[1;32m" -DEFAULTFG="\e[39m" - -function usage () { - echo "Find unknown words in C/C++ comments" - echo "" - echo "Usage:" - echo " ${0##*/} [options]" - echo "" - echo "Options:" - echo " -d, --directory directory to scan (defaults to .)" - echo " -l, --lexicon lexicon file (one word per line, default 'lexicon.txt')" - echo " -t, --terse terse output only (enabled if no lexicon available)" - echo " -h, --help display this help" - exit 1 -} - -# -# Verify that required commands are present -# -REQUIRED=( "extract-comments" "ablexicon" "getopt" ) -for i in "${REQUIRED[@]}" -do - command -v $i"" >/dev/null - if [ $? -ne "0" ] - then - echo "Can't find '"$i"' , exiting...">&2 - exit 1 - fi -done - -GETOPT_OUT=`getopt -o htd:l: --long help,terse,directory:,lexicon: -n "${0##*/}" -- "$@"` -if [ $? != 0 ] -then - echo "Exiting..." >&2 - exit 1 -fi - -eval set -- "$GETOPT_OUT" - -DIRNAME=/dev/fd/0 -LEXICON= -STATUS= -TERSE= -while true; do - case "$1" in - -h | --help ) usage $0 ;; - -t | --terse ) TERSE=1; shift ;; - -d | --directory ) DIRNAME="$2"; shift 2 ;; - -l | --lexicon ) LEXICON="$2"; shift 2 ;; - -- ) shift; break ;; - * ) break ;; - esac -done - -if [ ! -d $DIRNAME"" ] -then - echo "Invalid directory: "$DIRNAME - usage -fi - -if [ $LEXICON"" = "" ] -then - if [ -f $DIRNAME/lexicon.txt ] - then - LEXICON=$DIRNAME/lexicon.txt - else - LEXICON=/dev/null - TERSE=1 - fi -fi - -TMPFILE=${0##*/}-$USER-$RANDOM - -unknowns=( "not-used" ) # get around empty array with nounset -# Symlinks will be ignored in this spell check. `-type f` switch in `find` command will ignore symlinks. - -extract-comments `find $DIRNAME \( -iname \*.[ch] -o -iname \*.dox \) -type f` | - tr [:upper:] [:lower:] | - grep -o -E '[a-zA-Z]+' | - ablexicon -l $LEXICON > $TMPFILE - -readarray -O 1 -t unknowns < $TMPFILE -rm -f $TMPFILE - -for word in "${unknowns[@]}" -do - if [ $word"" == "not-used" ] - then - continue - fi - - if [ $TERSE"" != "" ] - then - echo $word - continue - fi - - # Symlinks will be ignored in this spell check. `-type f` switch in `find` command will ignore symlinks. - for file in `find $DIRNAME \( -iname \*.[ch] -o -iname \*.dox \) -type f` - do - if [[ $file == *"third_party"* || $file == *"CMock"* ]] - then - continue - fi - # Disable errexit here, extract-comments can return non-zero - set +e - # - # A little inefficient here; we will grep twice, once to detect - # the unknown word and another to print it with color highlighting. - # If there's a way to preserve ANSI color output with the first - # search and reuse it within the if statement (I gave up trying - # to find one after a few minutes), that would be nice. - # - extract-comments $file | grep -iw $word > /dev/null - if [ $? == "0" ] - then - if [ $STATUS"" != "1" ] - then - echo -e $GREEN"############################################################################"$DEFAULTFG - echo -e $GREEN"#"$DEFAULTFG - echo -e $GREEN"# Unknown word(s) found. Please either correct the spelling or add them"$DEFAULTFG - echo -e $GREEN"# to the lexicon file '"$LEXICON"'".$DEFAULTFG - echo -e $GREEN"#"$DEFAULTFG - echo -e $GREEN"############################################################################"$DEFAULTFG - STATUS=1 # Return non-zero status if any unidentified words are found - fi - echo "" - echo -e $BLUE$file$DEFAULTFG - echo "" - extract-comments $file | grep --color=always -iw $word | GREP_COLORS="mt=01;32" grep --color=always -E -e '^[ \t]*[0-9]+' - fi - # Re-enable errexit - set -o errexit - done -done - -if [ $STATUS"" = "1" ] -then - exit 1 -fi