From b52c91b1c54ab6be1cc0ac0d68e17f42cb19a8ab Mon Sep 17 00:00:00 2001 From: Gaurav-Aggarwal-AWS <33462878+aggarg@users.noreply.github.com> Date: Thu, 16 Jun 2022 23:46:28 -0700 Subject: [PATCH] Print file names with external links (#44) Files names were printed with internal links but not with external links which made it harder to decide which files to update when the same link existed in multiple files. This commit prints file name with external links also just like with internal links. Signed-off-by: Gaurav Aggarwal --- link-verifier/verify-links.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/link-verifier/verify-links.py b/link-verifier/verify-links.py index 8bbc353e..32352b2d 100755 --- a/link-verifier/verify-links.py +++ b/link-verifier/verify-links.py @@ -12,6 +12,7 @@ from termcolor import cprint from multiprocessing import Pool import traceback +from collections import defaultdict MARKDOWN_SEARCH_TERM = r'\.md$' # Regex to find a URL @@ -314,6 +315,7 @@ def main(): broken_links = [] md_file_list = [] link_set = set() + link_to_files = defaultdict(set) exclude_dirs = [dir.lower() for dir in args.exclude_dirs] if args.exclude_dirs else [] if args.user_agent != None: @@ -353,6 +355,7 @@ def main(): urls = re.findall(URL_SEARCH_TERM, text) for url in urls: link_set.add(url[0]) + link_to_files[url[0]].add(f_path) # If allowlist file is passed, add those links to link_cache so that link check on those URLs can be bypassed. if args.allowlist is not None: @@ -393,10 +396,12 @@ def main(): is_broken, status_code = test_url(link) if is_broken: broken_links.append(link) - cprint(f'{status_code}\t{link}', 'red') + print("FILES:", link_to_files[link]) + cprint(f'\t{status_code}\t{link}', 'red') else: if args.verbose: - cprint(f'{status_code}\t{link}', 'green') + print("FILES:", link_to_files[link]) + cprint(f'\t{status_code}\t{link}', 'green') # Return code > 0 to return error. num_broken = len(broken_links)