From 442020e4be63a895624a5295c141c13dea3cf68b Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Thu, 28 Nov 2024 21:59:06 -0500 Subject: [PATCH] Add check-spelling --- .github/actions/spelling/README.md | 2 +- .github/actions/spelling/candidate.patterns | 82 ++++++-- .github/actions/spelling/excludes.txt | 26 +-- .github/actions/spelling/expect.txt | 167 +--------------- .../actions/spelling/line_forbidden.patterns | 98 +++++++++- .github/actions/spelling/patterns.txt | 179 +++++++----------- .github/actions/spelling/reject.txt | 1 + .github/workflows/spelling3.yml | 40 ++-- 8 files changed, 271 insertions(+), 324 deletions(-) diff --git a/.github/actions/spelling/README.md b/.github/actions/spelling/README.md index 01bb8c2e1e0..1f699f3de3d 100644 --- a/.github/actions/spelling/README.md +++ b/.github/actions/spelling/README.md @@ -14,4 +14,4 @@ File | Purpose | Format | Info [advice.md](advice.md) | Supplement for GitHub comment when unrecognized words are found | GitHub Markdown | [advice](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-advice) Note: you can replace any of these files with a directory by the same name (minus the suffix) -and then include multiple files inside that directory (with that suffix) to merge multiple files. +and then include multiple files inside that directory (with that suffix) to merge multiple files together. diff --git a/.github/actions/spelling/candidate.patterns b/.github/actions/spelling/candidate.patterns index b159d41dc00..b5d5f40f8a6 100644 --- a/.github/actions/spelling/candidate.patterns +++ b/.github/actions/spelling/candidate.patterns @@ -218,7 +218,7 @@ accounts\.binance\.com/[a-z/]*oauth/authorize\?[-0-9a-zA-Z&%]* \bmedium\.com/@?[^/\s"]+/[-\w]+ # microsoft -\b(?:https?://|)(?:(?:download\.visualstudio|docs|msdn2?|research)\.microsoft|blogs\.msdn)\.com/[-_a-zA-Z0-9()=./%]* +\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]* # powerbi \bapp\.powerbi\.com/reportEmbed/[^"' ]* # vs devops @@ -405,7 +405,7 @@ ipfs://[0-9a-zA-Z]{3,} # Punycode \bxn--[-0-9a-z]+ # sha -sha\d+:[0-9]*[a-f]{3,}[0-9a-f]* +sha\d+:[0-9a-f]*?[a-f]{3,}[0-9a-f]* # sha-... -- uses a fancy capture (\\?['"]|")[0-9a-f]{40,}\g{-1} # hex runs @@ -425,6 +425,9 @@ sha\d+:[0-9]*[a-f]{3,}[0-9a-f]* # pki -----BEGIN.*-----END +# pki (base64) +LS0tLS1CRUdJT.* + # uuid: \b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b # hex digits including css/html color classes: @@ -446,11 +449,23 @@ integrity=(['"])(?:\s*sha\d+-[-a-zA-Z=;:/0-9+]{40,})+\g{-1} Name\[[^\]]+\]=.* # IServiceProvider / isAThing -(?:\b|_)(?:I|isA)(?=(?:[A-Z][a-z]{2,})+(?:[A-Z]|\b)) +(?:\b|_)(?:(?:ns|)I|isA)(?=(?:[A-Z][a-z]{2,})+(?:[A-Z\d]|\b)) # crypt (['"])\$2[ayb]\$.{56}\g{-1} +# apache/old crypt +(['"]|)\$+(?:apr|)1\$+.{8}\$+.{22}\g{-1} + +# sha1 hash +\{SHA\}[-a-zA-Z=;:/0-9+]{3,} + +# machine learning (?) +#\b(?i)ml(?=[a-z]{2,}) + +# python +\b(?i)py(?!gments|gmy|lon|ramid|ro|th)(?=[a-z]{2,}) + # scrypt / argon \$(?:scrypt|argon\d+[di]*)\$\S+ @@ -463,8 +478,17 @@ Name\[[^\]]+\]=.* # scala modules ("[^"]+"\s*%%?\s*){2,3}"[^"]+" +# container images +image: [-\w./:@]+ + +# Docker images +^\s*FROM\s+\S+:\S+(?:\s+AS\s+\S+|) + +# `docker images` REPOSITORY TAG IMAGE ID CREATED SIZE +\s*\S+/\S+\s+\S+\s+[0-9a-f]{8,}\s+\d+\s+(?:hour|day|week)s ago\s+[\d.]+[KMGT]B + # Intel intrinsics -_mm_\w+ +_mm_(?!dd)\w+ # Input to GitHub JSON content: (['"])[-a-zA-Z=;:/0-9+]*=\g{-1} @@ -473,15 +497,15 @@ content: (['"])[-a-zA-Z=;:/0-9+]*=\g{-1} # you'll want to remove the `(?=.*?")` suffix. # The `(?=.*?")` suffix should limit the false positives rate # printf -#%(?:(?:(?:hh?|ll?|[jzt])?[diuoxn]|l?[cs]|L?[fega]|p)(?=[a-z]{2,})|(?:X|L?[FEGA]|p)(?=[a-zA-Z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%)\b)(?=.*?['"]) +%(?:(?:(?:hh?|ll?|[jzt])?[diuoxn]|l?[cs]|L?[fega]|p)(?=[a-z]{2,})|(?:X|L?[FEGA])(?=[a-zA-Z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%)\b)(?=.*?['"]) # Alternative printf # %s -%(?:s(?=[a-z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%)\b)(?=.*?['"]) +%(?:s(?=[a-z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%[^s])\b)(?=.*?['"]) # Python string prefix / binary prefix # Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings -(?[-a-zA-Z=;:/0-9+]{3,}== 0.0.22) \\\w{2,}\{ @@ -629,8 +673,14 @@ TeX/AMS # eslint "varsIgnorePattern": ".+" +# nolint +nolint:\w+ + # Windows short paths -[/\\][^/\\]{5,6}~\d{1,2}[/\\] +[/\\][^/\\]{5,6}~\d{1,2}(?=[/\\]) + +# Windows Resources with accelerators +\b[A-Z]&[a-z]+\b(?!;) # cygwin paths /cygdrive/[a-zA-Z]/(?:Program Files(?: \(.*?\)| ?)(?:/[-+.~\\/()\w ]+)*|[-+.~\\/()\w])+ @@ -641,12 +691,18 @@ TeX/AMS # alternate printf markers if you run into latex and friends #(? Don't use `can not` when you mean `cannot`. The only time you're likely to see `can not` written as separate words is when the word `can` happens to precede some other phrase that happens to start with `not`. @@ -41,11 +48,26 @@ # - if you encounter such a case, add a pattern for that case to patterns.txt. \b[Cc]an not\b +# Do not use `(click) here` links +# For more information, see: +# * https://www.w3.org/QA/Tips/noClickHere +# * https://webaim.org/techniques/hypertext/link_text +# * https://granicus.com/blog/why-click-here-links-are-bad/ +# * https://heyoka.medium.com/dont-use-click-here-f32f445d1021 +(?:>|\[)(?:(?:click |)here|link|(?:read |)more)(?:) + +# Should be `equals` to `is equal to` +\bequals to\b + # Should be `GitHub` -(?]* -S[Hh][Aa]256: [0-9A-Fa-f]{64} -SHA256::ConvertToBytes\("[0-9A-Fa-f]{64}" -# data urls -data:[a-zA-Z=;,/0-9+-]+ -# uuid: -\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b -# hex digits including css/html color classes: -(?:[\\0][xX]|\\u|[uU]\+|#|\%23)[0-9a-fA-FgGrR]{2,}[uU]?[lL]{0,2}\b -"[0-9a-f]{32}" -"[0-9a-f]{64}" -# sha-1 -\b[0-9a-f]{40}\b + El proyecto .* diferentes + # Package family names and package full names \b[-_~.A-Za-z0-9]+_[a-z0-9]{13}\b + # Locales for name normalization \b\p{Lu}{2,3}(?:-(?:CANS|CYRL|LATN|MONG))?-\p{Lu}{2}(?![A-Z])(?:-VALENCIA)?\b + # Azure pipeline tasks - task: .* -# Slash-prefixed patterns -\\native(?![a-z]) -\\Release -/NPH(?![a-z]) -/td(?![a-z]) - -# .gitignore -^\[[\w/[\]*.]*$ - -# URLs -- Added here instead of allow.txt to facilitate wildcarding them as more are added -http://rfc3161.gtm.corp.microsoft.com/TSS/HttpTspServer - # schema regex "pattern": .*$ -# doc/ManifestSpecv1.0.md +# doc/.../manifest.md ^ShortDescription: Le nouveau.*$ # Ignore test patterns GetRestAPIBaseUri\(".*"\) == L".*" -# fabricbot.json -"(?:id|user)": "[-A-Za-z0-9_]*" - -# URL escaped characters -\%[0-9A-F]{2} - # some forms of `any more` are correct \battempt any more\b @@ -67,102 +36,92 @@ GetRestAPIBaseUri\(".*"\) == L".*" 9nblggh4nns1 # Automatically suggested patterns -# hit-count: 407 file-count: 78 + +# hit-count: 3076 file-count: 503 # IServiceProvider / isAThing -(?:\b|_)(?:[IT]|isA)(?=(?:[A-Z][a-z]{2,})+(?:[A-Z]|\b)) +(?:\b|_)(?:(?:ns|)I|isA|T)(?=(?:[A-Z][a-z]{2,})+(?:[A-Z\d]|\b)) -# hit-count: 13 file-count: 9 -# GitHub SHAs (markdown) -(?:\[`?[0-9a-f]+`?\]\(https:/|)/(?:www\.|)github\.com(?:/[^/\s"]+){2,}(?:/[^/\s")]+)(?:[0-9a-f]+(?:[-0-9a-zA-Z/#.]*|)\b|) +# hit-count: 1396 file-count: 96 +# uuid: +\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b -# Compiler flags -(?:^|[\t ,"'`=(])-[D](?!ata|ebug|ependency|epth|esktop|estination|ev|irectory|ownload) -(?:^|[\t ,"'`=(])-[W](?!ait|arning|in|orking) -(?:^|[\t ,"'`=(])-[l](?!og) -(?:^|[\t ,"'`=(])-[f](?!eatures|ile|ind) - -T(?=h[a-z]{2,}) +# hit-count: 366 file-count: 148 +# hex runs +\b[0-9a-fA-F]{16,}\b -# Automatically suggested patterns -# hit-count: 30 file-count: 4 +# hit-count: 337 file-count: 135 +# microsoft +\b(?:https?://|)(?:(?:(?:apps|blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%?&:#]* + +# hit-count: 296 file-count: 23 # version suffix v# (?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_])) -# hit-count: 8 file-count: 4 -# libraries -\b(?i)lib(?!elous|erty|rar(?:i(?:an|es)|y))(?=[a-z]) - -# hit-count: 4 file-count: 4 -# Non-English -[a-zA-Z]*[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3}[a-zA-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]* - -# hit-count: 3 file-count: 3 -# tar arguments -\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+ - -# hit-count: 3 file-count: 3 -# Python string prefix / binary prefix -# Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings -(? + { + "cspell": "https://raw.githubusercontent.com/check-spelling/cspell-dicts/v20241114/dictionaries/" + } extra_dictionaries: | - cspell:cpp/src/compiler-msvc.txt - cspell:cpp/src/stdlib-c.txt - cspell:cpp/src/stdlib-cpp.txt + cspell:software-terms/softwareTerms.txt + cspell:cpp/stdlib-cpp.txt cspell:filetypes/filetypes.txt - cspell:fullstack/dict/fullstack.txt - cspell:powershell/dict/powershell.txt - cspell:software-terms/dict/softwareTerms.txt + cspell:cpp/stdlib-c.txt + cspell:php/php.txt + cspell:python/python/python-lib.txt + cspell:dotnet/dotnet.txt + cspell:golang/go.txt + cspell:cpp/compiler-msvc.txt + cspell:dart/dart.txt + cspell:html/html.txt + cspell:powershell/powershell.txt + cspell:aws/aws.txt + cspell:python/common/extra.txt + cspell:node/node.txt + cspell:npm/npm.txt + cspell:fullstack/fullstack.txt + cspell:java/java.txt cspell:csharp/csharp.txt - cspell:dotnet/dict/dotnet.txt - check_extra_dictionaries: '' + cspell:cpp/ecosystem.txt + cspell:typescript/typescript.txt + cspell:cpp/lang-keywords.txt comment-pr: name: Report (PR) @@ -124,7 +140,7 @@ jobs: if: (success() || failure()) && needs.spelling.outputs.followup && contains(github.event_name, 'pull_request') steps: - name: comment - uses: check-spelling/check-spelling@v0.0.22 + uses: check-spelling/check-spelling@v0.0.24 with: checkout: true task: ${{ needs.spelling.outputs.followup }}