From c8087ea6bd07bff1806ef959e84946ef7e116f2e Mon Sep 17 00:00:00 2001 From: Paul Bartell Date: Wed, 2 Jun 2021 11:16:57 -0700 Subject: [PATCH] Open link-verifier target files with encoding="utf8", errors='ignore' options Print each file name that is processed to stdout. --- link-verifier/verify-links.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/link-verifier/verify-links.py b/link-verifier/verify-links.py index e9d51d18..7c791827 100755 --- a/link-verifier/verify-links.py +++ b/link-verifier/verify-links.py @@ -335,7 +335,9 @@ def main(): dirs[:] = [dir for dir in dirs if dir.lower() not in exclude_dirs] for file in files: if any(file.endswith(file_type) for file_type in args.include_files): - with open(os.path.join(root, file), 'r') as f: + f_path = os.path.join(root, file) + print("Processing File: {}".format(f_path)) + with open(f_path, 'r', encoding="utf8", errors='ignore') as f: text = f.read() urls = re.findall(URL_SEARCH_TERM, text) for url in urls: