Skip to content

Commit

Permalink
Merge branch 'master' into feature/spellchecker
Browse files Browse the repository at this point in the history
  • Loading branch information
sarthakpati authored Oct 16, 2024
2 parents b98d76d + 301c188 commit fa6ae76
Show file tree
Hide file tree
Showing 41 changed files with 1,658 additions and 74 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,17 @@ jobs:
run: |
pytest --cov=. --cov-report=xml --cov-append -k "update_version"
- name: Upload coverage
- name: Upload coverage to CodeCov
if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change
uses: codecov/codecov-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: unittests
flags: unittests

- name: Upload coverage to Codacy
if: github.ref == 'refs/heads/master' # only run when on master
uses: codacy/[email protected]
with:
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
coverage-reports: ./coverage.xml
142 changes: 142 additions & 0 deletions GANDLF/cli/huggingface_hub_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from huggingface_hub import HfApi, snapshot_download, ModelCardData, ModelCard
from typing import List, Union
from GANDLF import version
from pathlib import Path
from GANDLF.utils import get_git_hash
import re


def validate_model_card(file_path: str):
"""
Validate that the required fields in the model card are not null, empty, or set to 'REQUIRED_FOR_GANDLF'.
The fields must contain valid alphabetic or alphanumeric values.
Args:
file_path (str): The path to the Markdown file to validate.
Raises:
AssertionError: If any required field is missing, empty, null, or contains 'REQUIRED_FOR_GANDLF'.
"""
# Read the Markdown file
path = Path(file_path)
with path.open("r") as file:
template_str = file.read()

# Define required fields and their regex patterns to capture the values
patterns = {
"Developed by": re.compile(
r'\*\*Developed by:\*\*\s*\{\{\s*developers\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
re.MULTILINE,
),
"License": re.compile(
r'\*\*License:\*\*\s*\{\{\s*license\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
re.MULTILINE,
),
"Primary Organization": re.compile(
r'\*\*Primary Organization:\*\*\s*\{\{\s*primary_organization\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
re.MULTILINE,
),
"Commercial use policy": re.compile(
r'\*\*Commercial use policy:\*\*\s*\{\{\s*commercial_use\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
re.MULTILINE,
),
}

# Iterate through the required fields and validate
for field, pattern in patterns.items():
match = pattern.search(template_str)

# Ensure the field is present and does not contain 'REQUIRED_FOR_GANDLF'
assert match, f"Field '{field}' is missing or not found in the file."

extract_value = match.group(1)

# Get the field value
value = (
re.search(r"\[([^\]]+)\]", extract_value).group(1)
if re.search(r"\[([^\]]+)\]", extract_value)
else None
)

# Ensure the field is not set to 'REQUIRED_FOR_GANDLF' or empty
assert (
value != "REQUIRED_FOR_GANDLF"
), f"The value for '{field}' is set to the default placeholder '[REQUIRED_FOR_GANDLF]'. It must be a valid value."
assert value, f"The value for '{field}' is empty or null."

# Ensure the value contains only alphabetic or alphanumeric characters
assert re.match(
r"^[a-zA-Z0-9]+$", value
), f"The value for '{field}' must be alphabetic or alphanumeric, but got: '{value}'"

print(
"All required fields are valid, non-empty, properly filled, and do not contain '[REQUIRED_FOR_GANDLF]'."
)

# Example usage
return template_str


def push_to_model_hub(
repo_id: str,
folder_path: str,
hf_template: str,
path_in_repo: Union[str, None] = None,
commit_message: Union[str, None] = None,
commit_description: Union[str, None] = None,
token: Union[str, None] = None,
repo_type: Union[str, None] = None,
revision: Union[str, None] = None,
allow_patterns: Union[List[str], str, None] = None,
ignore_patterns: Union[List[str], str, None] = None,
delete_patterns: Union[List[str], str, None] = None,
):
api = HfApi(token=token)

try:
repo_id = api.create_repo(repo_id).repo_id
except Exception as e:
print(f"Error: {e}")

tags = ["v" + version]

git_hash = get_git_hash()

if not git_hash == "None":
tags += [git_hash]

readme_template = validate_model_card(hf_template)

card_data = ModelCardData(library_name="GaNDLF", tags=tags)
card = ModelCard.from_template(card_data, template_str=readme_template)

card.save(Path(folder_path, "README.md"))

api.upload_folder(
repo_id=repo_id,
folder_path=folder_path,
repo_type="model",
revision=revision,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
delete_patterns=delete_patterns,
)
print("Model Sucessfully Uploded")


def download_from_hub(
repo_id: str,
revision: Union[str, None] = None,
cache_dir: Union[str, None] = None,
local_dir: Union[str, None] = None,
force_download: bool = False,
token: Union[str, None] = None,
):
snapshot_download(
repo_id=repo_id,
revision=revision,
cache_dir=cache_dir,
local_dir=local_dir,
force_download=force_download,
token=token,
)
11 changes: 10 additions & 1 deletion GANDLF/compute/inference_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,16 @@ def inference_loop(
assert file_to_load != None, "The 'best_file' was not found"

main_dict = torch.load(file_to_load, map_location=parameters["device"])
model.load_state_dict(main_dict["model_state_dict"])
state_dict = main_dict["model_state_dict"]
if parameters.get("differential_privacy"):
# this is required for torch==1.11 and for DP inference
new_state_dict = {}
for key, val in state_dict.items():
new_key = key.replace("_module.", "")
new_state_dict[new_key] = val # remove `module.`
state_dict = new_state_dict

model.load_state_dict(state_dict)
parameters["previous_parameters"] = main_dict.get("parameters", None)
model.eval()
elif parameters["model"]["type"].lower() == "openvino":
Expand Down
68 changes: 68 additions & 0 deletions GANDLF/compute/training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
from .forward_pass import validate_network
from .generic import create_pytorch_objects

from GANDLF.privacy.opacus.model_handling import empty_collate
from GANDLF.privacy.opacus import handle_dynamic_batch_size, prep_for_opacus_training
from opacus.utils.batch_memory_manager import wrap_data_loader

# hides torchio citation request, see https://github.com/fepegar/torchio/issues/235
os.environ["TORCHIO_HIDE_CITATION_PROMPT"] = "1"

Expand Down Expand Up @@ -91,6 +95,14 @@ def train_network(
for batch_idx, (subject) in enumerate(
tqdm(train_dataloader, desc="Looping over training data")
):
if params.get("differential_privacy"):
subject, params["batch_size"] = handle_dynamic_batch_size(
subject=subject, params=params
)
assert not isinstance(
model, torch.nn.DataParallel
), "Differential privacy is not supported with DataParallel or DistributedDataParallel. Please use a single GPU or DDP with Opacus."

optimizer.zero_grad()
image = ( # 5D tensor: (B, C, H, W, D)
torch.cat(
Expand Down Expand Up @@ -212,6 +224,23 @@ def train_network(
return average_epoch_train_loss, average_epoch_train_metric


def train_network_wrapper(model, train_dataloader, optimizer, params):
"""
Wrapper Function to handle train_dataloader for benign and DP cases and pass on to train a network for a single epoch
"""

if params.get("differential_privacy"):
with train_dataloader as memory_safe_data_loader:
epoch_train_loss, epoch_train_metric = train_network(
model, memory_safe_data_loader, optimizer, params
)
else:
epoch_train_loss, epoch_train_metric = train_network(
model, train_dataloader, optimizer, params
)
return epoch_train_loss, epoch_train_metric


def training_loop(
training_data: pd.DataFrame,
validation_data: pd.DataFrame,
Expand Down Expand Up @@ -368,6 +397,7 @@ def training_loop(
logger_csv_filename=os.path.join(output_dir, "logs_validation.csv"),
metrics=metrics_log,
mode="valid",
add_epsilon=bool(params.get("differential_privacy")),
)
if testingDataDefined:
test_logger = Logger(
Expand All @@ -392,6 +422,36 @@ def training_loop(

print("Using device:", device, flush=True)

if params.get("differential_privacy"):
print(
"Using Opacus to make training differentially private with respect to the training data."
)

model, optimizer, train_dataloader, privacy_engine = prep_for_opacus_training(
model=model,
optimizer=optimizer,
train_dataloader=train_dataloader,
params=params,
)

train_dataloader.collate_fn = empty_collate(train_dataloader.dataset[0])

# train_dataloader = BatchMemoryManager(
# data_loader=train_dataloader,
# max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
# optimizer=optimizer,
# )
batch_size = params["batch_size"]
max_physical_batch_size = params["differential_privacy"].get(
"physical_batch_size"
)
if max_physical_batch_size and max_physical_batch_size != batch_size:
train_dataloader = wrap_data_loader(
data_loader=train_dataloader,
max_batch_size=max_physical_batch_size,
optimizer=optimizer,
)

# Iterate for number of epochs
for epoch in range(start_epoch, epochs):
if params["track_memory_usage"]:
Expand Down Expand Up @@ -453,6 +513,14 @@ def training_loop(

patience += 1

# if training with differential privacy, print privacy epsilon
if params.get("differential_privacy"):
delta = params["differential_privacy"]["delta"]
this_epsilon = privacy_engine.get_epsilon(delta)
print(f" Epoch Final Privacy: (ε = {this_epsilon:.2f}, δ = {delta})")
# save for logging
epoch_valid_metric["epsilon"] = this_epsilon

# Write the losses to a logger
train_logger.write(epoch, epoch_train_loss, epoch_train_metric)
valid_logger.write(epoch, epoch_valid_loss, epoch_valid_metric)
Expand Down
5 changes: 5 additions & 0 deletions GANDLF/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from .utils import version_check
from GANDLF.data.post_process import postprocessing_after_reverse_one_hot_encoding
from GANDLF.privacy.opacus import parse_opacus_params

from GANDLF.metrics import surface_distance_ids
from importlib.metadata import version
Expand Down Expand Up @@ -710,6 +711,10 @@ def _parseConfig(
temp_dict["type"] = params["optimizer"]
params["optimizer"] = temp_dict

# initialize defaults for DP
if params.get("differential_privacy"):
params = parse_opacus_params(params, initialize_key)

# initialize defaults for inference mechanism
inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0}
initialize_inference_mechanism = False
Expand Down
Loading

1 comment on commit fa6ae76

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@check-spelling-bot Report

🔴 Please review

See the 📜action log or 📝 job summary for details.

Unrecognized words (663)
Abhishek
Abousamra
acdfbac
acsconv
adadelta
adagrad
adamax
adamw
addcdiv
addcmul
addgroup
addoption
ademamix
agc
agni
Aimilia
aimiliag
albumentations
allclose
allcontributors
allsigned
amsgrad
Anirban
anomymizer
anonymization
applyaugs
apptainer
Aqubvel
arange
archs
arcname
argmax
argwhere
Arnout
arxiv
asarray
astype
atleast
augs
auroc
autobuild
autocast
autodetermined
auxilary
avgs
awaa
Babak
bacf
backprop
backpropagate
backpropagation
Baheti
Baid
Bakas
Bashyam
batchnorm
bdfc
beggining
bgr
Bhalerao
bibtex
biggeest
bincount
biomedicalimaging
Bjoern
blabla
brahim
brainage
Brox
btw
Buildx
BVd
BVpye
capsys
cbica
cbig
cca
CCE
cdc
cdir
cel
cencoder
centercrop
cff
Chitalia
christos
Chunrui
Chv
cla
classif
classitk
codacy
codecov
CODEOWNERS
codeql
codereview
codespace
Colab
colorconv
colorjitter
colorlog
commandline
configfile
configgenerator
convs
cooldown
cosineannealing
cosineannealinglr
cosinesimilarity
croporpad
cropzero
ctc
CUBLAS
cudnn
cycliclr
datacenter
dataframe
dataprep
datestampnow
Davatzikos
dcce
dclog
dcm
dcmtk
deac
deadsnakes
DEBIAN
deconv
deepresunet
Deepthi
deepunet
denseblock
denselayer
densenet
depthconv
Despina
despinak
devcontainer
dfu
dicelog
dicom
dicomanonymizer
digestpath
disaggregating
discussioncomment
distilbert
DLF
DNN
dockerfiles
dockerized
dockertag
doi
Dokania
dotnet
downsamp
downsample
downsampling
doxygen
dpn
dqbm
dropna
dtype
dynunet
edac
edt
edu
eep
efc
efficientnet
efficientnetb
effiency
EIuqemz
elementwise
embeddings
Emre
ener
entrypoints
Ethem
excepthook
exctype
exponentiallr
fcn
Fdocker
fepegar
figsize
filenaming
filereader
fillna
finetuned
flaim
fnull
frgfm
fromarray
fromiter
Fsegmentation
Fulcio
Fworkflows
gandlf
Gastounioti
gbp
gcam
gcampp
GDCM
gdown
gdp
gelu
geometricanirban
Getka
getoption
getsizeof
ggcam
ghc
gle
glx
Gonz
Gotkowski
gpu
Grenko
gridaggregator
gridsampler
groundtruth
Guley
gumbel
Haghighi
Hamamc
Hamamci
hausdorff
healper
heatmaps
hexops
hft
histo
histopath
holocron
hookwrapper
HOUNSFIELD
hpc
hstack
HTR
huggingface
hyperparameters
idxs
ihc
iloc
imagenet
imbalanced
imread
imsave
imshow
imwrite
indeces
inlinehilite
inp
inputdata
instancenorm
interp
ISBI
issn
itcr
iterrows
itk
itkimage
itksnap
jaccard
JAX
JBHI
JDu
JSTARS
Junwen
jupyter
Jupyterlab
kaiming
kakumarabhishek
Karargyris
Karkada
keepdim
keleshev
kenshohara
KFold
kickstart
kld
Kontos
ksel
kspace
Kullback
Kurc
labelsample
labelsampler
lambd
layerchange
Lbtnaq
ldir
leakyrelu
Leibler
levelname
levelno
libgl
libjpeg
libpython
libsm
libvips
libxext
lightresunet
lightunet
linalg
linenums
lineplot
linspace
linting
lly
logit
logpt
logsoftmax
logvar
longreprtext
lps
lrelu
LROn
lstsq
lucidrains
macenko
mainrun
makereport
mathews
matplotlib
matthews
maxpool
mbergman
mcc
mcclog
MCD
mcr
MCT
mde
mdmc
medatory
medcam
medmnist
medperf
medpy
Megh
mencoder
menze
metr
miccai
missingprediction
misspled
mkdocs
mlco
mlcommons
mlcube
mlcubedir
mlp
modeified
modelbase
modelcard
modeldir
modelio
monai
Mouchtaris
moveaxis
mpp
mps
mri
msa
mscadocs
msdnet
mse
msle
Mukhopadhyay
multiclass
multidim
multilabel
mytagname
nadam
nans
naveenk
ncbi
ncc
ndarray
ndexbio
ndim
ndimage
ndlf
nesterov
neuroimage
nfnets
nibabel
nicl
NIf
nifti
nih
nii
nlabel
nmae
nnf
nonroot
normtype
notsigned
novograd
nsd
nuitka
numel
numlay
nvidia
octicons
offis
OFWCPDRE
ohif
onefile
onlatest
onnx
openfl
openslide
opensource
openvino
opm
opset
Orhun
ossar
outconv
outputdir
outputfile
palletsprojects
Panchumarthy
pathmnist
pati
pbar
pchs
Pdocker
pearson
Phenomics
pkl
plt
pmwiki
pnas
Prashant
prcomment
predics
predmask
preds
probs
Prunner
prv
psnr
psutil
pth
PTk
pubmed
purelib
pwadry
pydantic
pydicom
pyinstaller
pymdownx
pypa
pyplot
pytorch
pyversion
qsub
qubvel
radam
Radeon
radiomics
radxtools
ramework
randomaffine
randomanisotropy
randombiasfield
randomblur
randomelasticdeformation
randomflip
randommotion
randomnoise
randomswap
rdp
reco
recoverconfig
reducelronplateau
reduceonplateau
reencoded
refering
refernce
Rekor
relativized
relu
rensen
Reparameterization
reparameterize
rescaler
residualunet
resnet
resunet
rgbatorgb
rgbtorgba
rigourous
Ritesh
rmsprop
rocm
rocmdocs
Ronneberger
rowvar
ruifrok
runnning
runtest
Saltz
samplewise
Sarthak
sarthakpati
savefig
sbakas
sbia
scikit
scipy
screenshots
scse
sdata
sdnet
seaborn
Seac
sebastianffx
securefederatedai
segmap
segmask
segmentor
Sens
sessionstart
setbiasranges
setcutoffrange
setsigmaranges
Sezgin
sge
Shahira
shubham
siddhesh
sigstore
silu
Simonyan
simpleitk
sitk
skimage
sklearn
slurm
smi
socio
Soham
Sotirios
sparseadam
spellchecker
spellckecker
Sprop
Spyridon
ssim
stackexchange
stainextract
stainlib
steplr
stepsize
sterr
subjectid
subommands
Sucessfully
sume
superfences
sustainability
swapaxes
Tahsin
tcia
tempconvs
tensorboard
tgz
thresholded
thresholding
Thu
tiatoolbox
tiffslide
timepoints
timm
tio
tioq
tiosd
TLDR
tmi
TOOLSDIRECTORY
torchaudio
torchinfo
torchio
torchmetrics
torchvision
towardsdatascience
TPAMI
tqdm
traininginference
transunet
triaged
tryfirst
tsaftaris
TUDA
tversky
uanced
uinc
Ujjwal
Umeton
unet
unetr
uniformsample
uniformsampler
unittests
unitwise
unsqueeze
upenn
Uploaing
Uploded
upsample
upsampled
upsampling
utm
uzh
vahadane
validing
valuetopredict
vgg
Vinayak
vios
visualstudiomagazine
vmem
voxel
VRAM
vtk
vvv
WACV
warmupcosineschedule
Wauplin
wcs
weightedsample
weightedsampler
whl
WORKDIR
wsi
wsl
xavier
xdim
XDl
XEI
xkq
xlabel
xlim
xnat
XResolution
XTools
yamlchecker
yamlvalidator
ydim
ylabel
YResolution
Yrv
Yuemeng
zarr
Zeroplanes
zicat
znorm
ZNormalization
Zou
Some files were automatically ignored 🙈

These sample patterns would exclude them:

^\Q__init__.py\E$
^\QGANDLF/data/patch_miner/__init__.py\E$
^\QGANDLF/data/patch_miner/opm/__init__.py\E$
^\QGANDLF/grad_clipping/__init__.py\E$
^\QGANDLF/models/seg_modules/__init__.py\E$
^\QGANDLF/privacy/__init__.py\E$
^\Qtesting/__init__.py\E$

You should consider adding them to:

.github/actions/spelling/excludes.txt

File matching is via Perl regular expressions.

To check these files, more of their words need to be in the dictionary than not. You can use patterns.txt to exclude portions, add items to the dictionary (e.g. by adding them to allow.txt), or fix typos.

To accept these unrecognized words as correct and update file exclusions, you could run the following commands

... in a clone of the [email protected]:mlcommons/GaNDLF.git repository
on the feature/spellchecker branch (ℹ️ how do I use this?):

curl -s -S -L 'https://raw.githubusercontent.com/check-spelling/check-spelling/main/apply.pl' |
perl - 'https://github.com/mlcommons/GaNDLF/actions/runs/11370631110/attempts/1'
Available 📚 dictionaries could cover words not in the 📘 dictionary
Dictionary Entries Covers Uniquely
cspell:java/src/java-terms.txt 920 1 1

Consider adding them (in .github/workflows/spellchecker.yml) for uses: check-spelling/check-spelling@main in its with:

      with:
        extra_dictionaries:
          cspell:java/src/java-terms.txt

To stop checking additional dictionaries, add (in .github/workflows/spellchecker.yml) for uses: check-spelling/check-spelling@main in its with:

check_extra_dictionaries: ''
Pattern suggestions ✂️ (31)

You could add these patterns to .github/actions/spelling/patterns.txt:

# Automatically suggested patterns
# hit-count: 711 file-count: 63
# machine learning (?)
\b(?i)ml(?=[a-z]{2,})

# hit-count: 570 file-count: 105
# https/http/file urls
(?:\b(?:https?|ftp|file)://)[-A-Za-z0-9+&@#/*%?=~_|!:,.;]+[-A-Za-z0-9+&@#/*%=~_|]

# hit-count: 146 file-count: 52
# GitHub SHAs (markdown)
(?:\[`?[0-9a-f]+`?\]\(https:/|)/(?:www\.|)github\.com(?:/[^/\s"]+){2,}(?:/[^/\s")]+)(?:[0-9a-f]+(?:[-0-9a-zA-Z/#.]*|)\b|)

# hit-count: 130 file-count: 53
# python
\b(?i)py(?!gments|gmy|lon|ramid|ro|th)(?=[a-z]{2,})

# hit-count: 41 file-count: 29
# scala imports
^import (?:[\w.]|\{\w*?(?:,\s*(?:\w*|\*))+\})+

# hit-count: 26 file-count: 8
# libraries
(?:\b|_)lib(?:re(?=office)|)(?!era[lt]|ero|erty|rar(?:i(?:an|es)|y))(?=[a-z])

# hit-count: 24 file-count: 15
# Python string prefix / binary prefix
# Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings
(?<!['"])\b(?:B|BR|Br|F|FR|Fr|R|RB|RF|Rb|Rf|U|UR|Ur|b|bR|br|f|fR|fr|r|rB|rF|rb|rf|u|uR|ur)['"](?=[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})

# hit-count: 20 file-count: 11
# container images
image: [-\w./:@]+

# hit-count: 17 file-count: 12
# Compiler flags (Unix, Java/Scala)
# Use if you have things like `-Pdocker` and want to treat them as `docker`
(?:^|[\t ,>"'`=(])-(?:(?:J-|)[DPWXY]|[Llf])(?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,})

# hit-count: 16 file-count: 8
# version suffix <word>v#
(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_]))

# hit-count: 15 file-count: 9
# GitHub actions
\buses:\s+[-\w.]+/[-\w./]+@[-\w.]+

# hit-count: 13 file-count: 11
# Compiler flags (Windows / PowerShell)
# This is a subset of the more general compiler flags pattern.
# It avoids matching `-Path` to prevent it from being treated as `ath`
(?:^|[\t ,"'`=(])-(?:[DPL](?=[A-Z]{2,})|[WXYlf](?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,}))

# hit-count: 12 file-count: 1
# tar arguments
\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+

# hit-count: 10 file-count: 7
# hex runs
\b[0-9a-fA-F]{16,}\b

# hit-count: 6 file-count: 6
# mailto urls
mailto:[-a-zA-Z=;:/?%&0-9+@._]{3,}

# hit-count: 5 file-count: 3
# URL escaped characters
%[0-9A-F][A-F](?=[A-Za-z])

# hit-count: 4 file-count: 4
# Docker images
^\s*FROM\s+\S+:\S+(?:\s+AS\s+\S+|)

# hit-count: 4 file-count: 1
# shields.io
\bshields\.io/[-\w/%?=&.:+;,]*

# hit-count: 1 file-count: 1
# Google Forms
\bforms\.gle/\w+

# hit-count: 1 file-count: 1
# gist github
\bgist\.github\.com/[^/\s"]+/[0-9a-f]+

# hit-count: 1 file-count: 1
# git.io
\bgit\.io/[0-9a-zA-Z]+

# hit-count: 1 file-count: 1
# Contributor
\[[^\]]+\]\(https://github\.com/[^/\s"]+/?\)

# hit-count: 1 file-count: 1
# medium
\bmedium\.com/@?[^/\s"]+/[-\w]+

# hit-count: 1 file-count: 1
# vs devops
\bvisualstudio.com(?::443|)/[-\w/?=%&.]*

# hit-count: 1 file-count: 1
# stackexchange -- https://stackexchange.com/feeds/sites
\b(?:askubuntu|serverfault|stack(?:exchange|overflow)|superuser).com/(?:questions/\w+/[-\w]+|a/)

# hit-count: 1 file-count: 1
# Wikipedia
\ben\.wikipedia\.org/wiki/[-\w%.#]+

# hit-count: 1 file-count: 1
# sha-... -- uses a fancy capture
(\\?['"]|&quot;)[0-9a-f]{40,}\g{-1}

# hit-count: 1 file-count: 1
# uuid:
\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b

# hit-count: 1 file-count: 1
# Non-English
[a-zA-Z]*[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3}[a-zA-ZÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]*|[a-zA-Z]{3,}[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]|[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3,}

# hit-count: 1 file-count: 1
# in [email protected]+, printf markers aren't automatically consumed
# printf markers
(?<!\\)\\[nrt](?=[a-z]{2,})

# hit-count: 1 file-count: 1
# alternate printf markers if you run into latex and friends
(?<!\\)\\[nrt](?=[a-z]{2,})(?=.*['"`])

Errors (4)

See the 📜action log or 📝 job summary for details.

❌ Errors Count
ℹ️ binary-file 7
ℹ️ candidate-pattern 63
❌ check-file-path 310
❌ forbidden-pattern 13

See ❌ Event descriptions for more information.

Please sign in to comment.