From 3168f1cb23fece4bc66b57a125460bb9ee3abce0 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Fri, 2 Feb 2024 15:38:07 -0500
Subject: [PATCH] re-ran linting/formatting after rebase

---
 src/cleanup_whitespace.py                     |  90 +++++++-------
 src/factor_field.py                           |  57 +++++----
 src/generate_docs.py                          |  30 +++--
 src/generate_field_enum_csv.py                |   4 +-
 src/generate_field_values_csv.py              |  70 +++++------
 src/generate_field_yaml.py                    |  25 ++--
 src/generate_grid.py                          |  31 +++--
 src/generate_schema.py                        |  34 ++---
 src/ingest_validation_tools/check_factory.py  |  76 ++++++------
 src/ingest_validation_tools/cli_utils.py      |   5 +-
 .../directory_validator.py                    |  19 +--
 src/ingest_validation_tools/docs_utils.py     |  23 ++--
 src/ingest_validation_tools/enums.py          |   1 -
 src/ingest_validation_tools/error_report.py   |   4 +-
 .../plugin_validator.py                       |  27 ++--
 src/ingest_validation_tools/schema_loader.py  |  22 ++--
 .../table_validator.py                        |  30 ++---
 src/ingest_validation_tools/upload.py         | 117 +++++-------------
 .../validation_utils.py                       |  41 ++----
 .../yaml_include_loader.py                    |  28 ++---
 src/validate_tsv.py                           |  12 +-
 src/validate_upload.py                        |  22 ++--
 22 files changed, 306 insertions(+), 462 deletions(-)

diff --git a/src/cleanup_whitespace.py b/src/cleanup_whitespace.py
index b637d9d89..a2a4b19b2 100755
--- a/src/cleanup_whitespace.py
+++ b/src/cleanup_whitespace.py
@@ -1,34 +1,30 @@
 #!/usr/bin/env python3
 
+import argparse
 import csv
 import sys
-import argparse
 from pathlib import Path
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description='''
+        description="""
 Use the "--tsv_in"/"--tsv_out" options to strip invisible characters from TSVs.
-'''
+"""
     )
     mutex = parser.add_mutually_exclusive_group(required=True)
     mutex.add_argument(
-        '--tsv_in',
-        type=Path,
-        metavar='INPUT',
-        help='TSV to strip padding whitespace from')
+        "--tsv_in", type=Path, metavar="INPUT", help="TSV to strip padding whitespace from"
+    )
     mutex.add_argument(
-        '--encoding_test',
+        "--encoding_test",
         type=str,
-        metavar='ENCODING',
-        help='Generate test TSV using this encoding')
+        metavar="ENCODING",
+        help="Generate test TSV using this encoding",
+    )
     parser.add_argument(
-        '--tsv_out',
-        type=Path,
-        metavar='OUTPUT',
-        help='Destination for clean TSV',
-        required=True)
+        "--tsv_out", type=Path, metavar="OUTPUT", help="Destination for clean TSV", required=True
+    )
     args = parser.parse_args()
 
     if args.encoding_test:
@@ -40,50 +36,52 @@ def main():
 
 def print_encoding_test(encoding, output_path):
     space_chars = [
-        '\u000b',  # vertical tab
-        '\u0020',  # normal space
+        "\u000b",  # vertical tab
+        "\u0020",  # normal space
     ]
-    if encoding != 'ascii':
+    if encoding != "ascii":
         space_chars += [
-            '\u00a0',  # non-breaking space
+            "\u00a0",  # non-breaking space
         ]
-    if encoding not in ['ascii', 'latin-1']:
+    if encoding not in ["ascii", "latin-1"]:
         space_chars += [
-            '\u2003',  # em space
-            '\u3000',  # idiographic space
+            "\u2003",  # em space
+            "\u3000",  # idiographic space
         ]
-    padding = ''.join(space_chars)
+    padding = "".join(space_chars)
 
-    with output_path.open(mode='w', encoding=encoding) as f:
+    with output_path.open(mode="w", encoding=encoding) as f:
         # Header:
         print(
-            'quoted', 'empty', 'padded',
-            '',  # Empty column header: should be cleaned up!
-            sep='\t', file=f
+            "quoted",
+            "empty",
+            "padded",
+            "",  # Empty column header: should be cleaned up!
+            sep="\t",
+            file=f,
         )
 
         # Body:
         print(
             f'"{padding}123{padding}"',
-            '',
-            f'{padding}123{padding}',
-            '', '',  # Two empty cells: should be cleaned up!
-            sep='\t', file=f
-        )
-        print(
-            '', '', '', '',  # More empty cells: should be cleaned up!
-            sep='\t', file=f
+            "",
+            f"{padding}123{padding}",
+            "",
+            "",  # Two empty cells: should be cleaned up!
+            sep="\t",
+            file=f,
         )
+        print("", "", "", "", sep="\t", file=f)  # More empty cells: should be cleaned up!
     # Trailing \n means there's a trailing empty line in the TSV to clean up.
     return 0
 
 
 def print_clean_tsv(input_path, output_path):
-    dialect = 'excel-tab'
-    writer = csv.writer(output_path.open(mode='w', newline=''), dialect=dialect)
+    dialect = "excel-tab"
+    writer = csv.writer(output_path.open(mode="w", newline=""), dialect=dialect)
 
-    for encoding in ['utf-8', 'latin-1']:
-        warn(f'Trying to read {input_path} as {encoding}...')
+    for encoding in ["utf-8", "latin-1"]:
+        warn(f"Trying to read {input_path} as {encoding}...")
         try:
             # Read the file completely to determine if there are encoding problems,
             # rather than reading and writing line-by-line.
@@ -91,10 +89,10 @@ def print_clean_tsv(input_path, output_path):
             clean_rows = clean(rows)
             for row in clean_rows:
                 writer.writerow(row)
-            warn('Read succeeded')
+            warn("Read succeeded")
             return 0
         except UnicodeDecodeError as e:
-            warn(f'Read failed: {e}')
+            warn(f"Read failed: {e}")
             continue
     return 1
 
@@ -108,7 +106,7 @@ def csv_to_rows(tsv_path, encoding=None, dialect=None):
 
 
 def clean(rows):
-    '''
+    """
     >>> clean([
     ...     ['  x', 'y  ', ''],
     ...     ['', '  Hi!  ', '', ''],
@@ -117,7 +115,7 @@ def clean(rows):
     ... ])
     [['x', 'y'], ['', 'Hi!']]
 
-    '''
+    """
     clean_rows = []
     max_i = None
     for row in rows:
@@ -126,16 +124,16 @@ def clean(rows):
             continue
         if max_i is None:
             max_i = last_non_empty_index(stripped_row)
-        clean_rows.append(stripped_row[:max_i + 1])
+        clean_rows.append(stripped_row[: max_i + 1])
     return clean_rows
 
 
 def last_non_empty_index(values):
-    '''
+    """
     >>> last_non_empty_index(['', '', '0', '', ''])
     2
 
-    '''
+    """
     return max(i for i, val in enumerate(values) if len(val))
 
 
diff --git a/src/factor_field.py b/src/factor_field.py
index 83009be71..d3c5854f3 100755
--- a/src/factor_field.py
+++ b/src/factor_field.py
@@ -1,32 +1,33 @@
 #!/usr/bin/env python3
 
-import sys
 import argparse
-from pathlib import Path
 import fileinput
+import sys
 from collections import defaultdict
+from pathlib import Path
 
 
 def main():
-    parser = argparse.ArgumentParser(description='''
+    parser = argparse.ArgumentParser(
+        description="""
     Factor out all variants of a given field.
-    ''')
-    parser.add_argument(
-        '--field',
-        metavar='NAME',
-        required=True)
+    """
+    )
+    parser.add_argument("--field", metavar="NAME", required=True)
     parser.add_argument(
-        '--input_dir',
+        "--input_dir",
         type=Path,
-        metavar='IN',
-        help='Directory to scan for instances of the field',
-        default='src/ingest_validation_tools/table-schemas/assays')
+        metavar="IN",
+        help="Directory to scan for instances of the field",
+        default="src/ingest_validation_tools/table-schemas/assays",
+    )
     parser.add_argument(
-        '--output_dir',
+        "--output_dir",
         type=Path,
-        metavar='OUT',
-        help='Directory to write field extracts',
-        default='src/ingest_validation_tools/table-schemas/includes/fields')
+        metavar="OUT",
+        help="Directory to write field extracts",
+        default="src/ingest_validation_tools/table-schemas/includes/fields",
+    )
     args = parser.parse_args()
 
     factor_field(args.field, args.input_dir, args.output_dir)
@@ -46,18 +47,22 @@ def pull(field_name, input_dir):
             lines=lines,
             get_file_name=lambda: str(fileinput.filename()),
             field_name=field_name,
-            definitions=definitions
+            definitions=definitions,
         )
     return definitions
 
 
 def push(field_name, definitions, output_dir):
-    options = [
-        f"# {'; '.join(sorted(files))}\n{definition}"
-        for definition, files in definitions.items()
-    ] if len(definitions) > 1 else definitions.keys()
+    options = (
+        [
+            f"# {'; '.join(sorted(files))}\n{definition}"
+            for definition, files in definitions.items()
+        ]
+        if len(definitions) > 1
+        else definitions.keys()
+    )
     if options:
-        (output_dir / f'{field_name}.yaml').write_text('\n'.join(options))
+        (output_dir / f"{field_name}.yaml").write_text("\n".join(options))
     else:
         print(f"Check spelling of field name: '{field_name}'")
         sys.exit(1)
@@ -93,18 +98,18 @@ def replace(lines, get_file_name, field_name, definitions):
     definition = None
     for line in lines:
         # This assumes the YAML has been cleaned up!
-        if f'name: {field_name}' in line:
+        if f"name: {field_name}" in line:
             inside = True
-            print(f'# include: ../includes/fields/{field_name}.yaml')
+            print(f"# include: ../includes/fields/{field_name}.yaml")
             definition = line
             continue
-        elif inside and line[0] not in ['-', '#']:
+        elif inside and line[0] not in ["-", "#"]:
             definition += line
             continue
         elif inside:
             definitions[definition].add(get_file_name())
             inside = False
-        print(line, end='')
+        print(line, end="")
 
 
 if __name__ == "__main__":
diff --git a/src/generate_docs.py b/src/generate_docs.py
index 2f201fd33..86e1f9c57 100755
--- a/src/generate_docs.py
+++ b/src/generate_docs.py
@@ -2,29 +2,29 @@
 
 import argparse
 import os
-from pathlib import Path
 import sys
-from yaml import dump as dump_yaml
+from pathlib import Path
 
 from tableschema_to_template.create_xlsx import create_xlsx
+from yaml import dump as dump_yaml
 
+from ingest_validation_tools.cli_utils import dir_path
+from ingest_validation_tools.docs_utils import (
+    generate_readme_md,
+    generate_template_tsv,
+    get_tsv_name,
+    get_xlsx_name,
+)
 from ingest_validation_tools.schema_loader import (
-    dict_table_schema_versions,
-    get_table_schema,
     dict_directory_schema_versions,
+    dict_table_schema_versions,
+    enum_maps_to_lists,
     get_directory_schema,
+    get_fields_wo_headers,
     get_is_assay,
-    enum_maps_to_lists,
     get_pipeline_infos,
-    get_fields_wo_headers,
-)
-from ingest_validation_tools.docs_utils import (
-    get_tsv_name,
-    get_xlsx_name,
-    generate_template_tsv,
-    generate_readme_md,
+    get_table_schema,
 )
-from ingest_validation_tools.cli_utils import dir_path
 
 
 def main():
@@ -158,9 +158,7 @@ def main():
         )
         max_schema["fields"] = get_fields_wo_headers(max_schema)
         if max_schema["fields"][0]["name"] != "is_cedar":
-            with open(
-                deprecated_path / get_tsv_name(args.type, is_assay=is_assay), "w"
-            ) as f:
+            with open(deprecated_path / get_tsv_name(args.type, is_assay=is_assay), "w") as f:
                 f.write(generate_template_tsv(max_schema))
             create_xlsx(
                 max_schema,
diff --git a/src/generate_field_enum_csv.py b/src/generate_field_enum_csv.py
index 9ccdfa7f3..5df382d74 100755
--- a/src/generate_field_enum_csv.py
+++ b/src/generate_field_enum_csv.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
+import argparse
 import sys
 from csv import DictWriter
-import argparse
 
 from ingest_validation_tools.schema_loader import (
-    list_table_schema_versions,
     get_table_schema,
+    list_table_schema_versions,
 )
 
 
diff --git a/src/generate_field_values_csv.py b/src/generate_field_values_csv.py
index bc9cb89c9..c27f2fefd 100755
--- a/src/generate_field_values_csv.py
+++ b/src/generate_field_values_csv.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
-import sys
-from csv import DictWriter
 import argparse
 import re
+import sys
+from csv import DictWriter
 
 import requests
 
@@ -10,67 +10,61 @@
 def main():
     parser = argparse.ArgumentParser()
 
-    default_url = 'https://search.api.hubmapconsortium.org/portal/search'
-    parser.add_argument(
-        '--url',
-        default=default_url,
-        help=f'ES endpoint. Default: {default_url}')
+    default_url = "https://search.api.hubmapconsortium.org/portal/search"
+    parser.add_argument("--url", default=default_url, help=f"ES endpoint. Default: {default_url}")
 
     default_size = 20
     parser.add_argument(
-        '--size',
+        "--size",
         type=int,
         default=default_size,
-        help=f'Number of records to pull. Default: {default_size}')
+        help=f"Number of records to pull. Default: {default_size}",
+    )
 
-    default_type = 'Dataset'
+    default_type = "Dataset"
     parser.add_argument(
-        '--type',
-        default=default_type,
-        help=f'Entity type to query. Default: {default_type}')
+        "--type", default=default_type, help=f"Entity type to query. Default: {default_type}"
+    )
 
     args = parser.parse_args()
 
     query = {
-        'post_filter': {'term': {'entity_type.keyword': args.type}},
-        'size': args.size,
-        '_source': ['metadata.metadata' if args.type == 'Dataset' else 'metadata']
+        "post_filter": {"term": {"entity_type.keyword": args.type}},
+        "size": args.size,
+        "_source": ["metadata.metadata" if args.type == "Dataset" else "metadata"],
     }
     response = requests.post(args.url, json=query)
-    hits = response.json()['hits']['hits']
+    hits = response.json()["hits"]["hits"]
 
     writer = DictWriter(
-        sys.stdout,
-        fieldnames=[
-            'uuid',
-            'assay_type',
-            'field',
-            'value'],
-        extrasaction='ignore')
+        sys.stdout, fieldnames=["uuid", "assay_type", "field", "value"], extrasaction="ignore"
+    )
     writer.writeheader()
     for hit in hits:
-        uuid = hit['_id']
+        uuid = hit["_id"]
 
-        if 'metadata' not in hit['_source']:
+        if "metadata" not in hit["_source"]:
             continue
-        meta = hit['_source']['metadata']
+        meta = hit["_source"]["metadata"]
 
-        if 'metadata' in meta:
-            meta = meta['metadata']
+        if "metadata" in meta:
+            meta = meta["metadata"]
 
         for field, value in meta.items():
-            if not re.search(r'[A-Za-z]', value):
+            if not re.search(r"[A-Za-z]", value):
                 continue
-            writer.writerow({
-                'uuid': uuid,
-                'assay_type': meta['assay_type'] if 'assay_type' in meta else 'Sample',
-                'field': field,
-                'value': value
-            })
+            writer.writerow(
+                {
+                    "uuid": uuid,
+                    "assay_type": meta["assay_type"] if "assay_type" in meta else "Sample",
+                    "field": field,
+                    "value": value,
+                }
+            )
 
-    assert len(hits) < args.size, f'Result truncated at {args.size}'
+    assert len(hits) < args.size, f"Result truncated at {args.size}"
     return 0
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(main())  # pragma: no cover
diff --git a/src/generate_field_yaml.py b/src/generate_field_yaml.py
index 8c874f9bd..2dce1642c 100755
--- a/src/generate_field_yaml.py
+++ b/src/generate_field_yaml.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
+import argparse
 import sys
+
 from yaml import dump as dump_yaml
-import argparse
 
 from ingest_validation_tools.schema_loader import (
-    list_table_schema_versions,
-    get_table_schema,
     get_is_assay,
+    get_table_schema,
+    list_table_schema_versions,
 )
 
 
@@ -47,9 +48,7 @@ def __init__(self):
         self.default_value = None
 
     def add(self, field, schema_name=None, schema=None):
-        name, attr_value = self._get_name_value(
-            field, schema_name=schema_name, schema=schema
-        )
+        name, attr_value = self._get_name_value(field, schema_name=schema_name, schema=schema)
         if self._skip_field(name, attr_value):
             return
         if name in self.mapping and self.mapping[name] != attr_value:
@@ -66,9 +65,7 @@ def _skip_field(self, name, attr_value):
         return False
 
     def _handle_collision(self, name, attr_value):
-        raise Exception(
-            f'{name} is inconsistent: "{self.mapping[name]}" != "{attr_value}"'
-        )
+        raise Exception(f'{name} is inconsistent: "{self.mapping[name]}" != "{attr_value}"')
 
     def dump_yaml(self):
         return dump_yaml(self.mapping)
@@ -170,14 +167,8 @@ class AssayMapper(AbstractSetValuedMapper):
     """
 
     def _get_name_value(self, field, schema_name=None, schema=None):
-        assay_type_fields = [
-            field for field in schema["fields"] if field["name"] == "assay_type"
-        ]
-        value = (
-            assay_type_fields[0]["constraints"]["enum"]
-            if len(assay_type_fields)
-            else []
-        )
+        assay_type_fields = [field for field in schema["fields"] if field["name"] == "assay_type"]
+        value = assay_type_fields[0]["constraints"]["enum"] if len(assay_type_fields) else []
         return field["name"], set(value)
 
 
diff --git a/src/generate_grid.py b/src/generate_grid.py
index 7966330ac..4b6de8dd6 100755
--- a/src/generate_grid.py
+++ b/src/generate_grid.py
@@ -1,25 +1,22 @@
 #!/usr/bin/env python3
 
 import argparse
-from pathlib import Path
 import sys
 from datetime import datetime
+from pathlib import Path
 
-from yaml import safe_load
 import xlsxwriter
+from yaml import safe_load
 
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'target',
-        type=Path,
-        help='Path for Excel file')
+    parser.add_argument("target", type=Path, help="Path for Excel file")
     args = parser.parse_args()
 
-    docs_path = Path(__file__).parent.parent / 'docs'
-    field_schemas = safe_load((docs_path / 'field-schemas.yaml').read_text())
-    field_descriptions = safe_load((docs_path / 'field-descriptions.yaml').read_text())
+    docs_path = Path(__file__).parent.parent / "docs"
+    field_schemas = safe_load((docs_path / "field-schemas.yaml").read_text())
+    field_descriptions = safe_load((docs_path / "field-descriptions.yaml").read_text())
 
     all_schemas = set()
     for schemas in field_schemas.values():
@@ -28,18 +25,20 @@ def main():
     schema_cols = sorted(all_schemas)
 
     workbook = xlsxwriter.Workbook(args.target)
-    worksheet = workbook.add_worksheet('schemas + fields')
-    workbook.set_properties({
-        # So regenerated Excel files will be binary identical:
-        'created': datetime(2000, 1, 1)
-    })
+    worksheet = workbook.add_worksheet("schemas + fields")
+    workbook.set_properties(
+        {
+            # So regenerated Excel files will be binary identical:
+            "created": datetime(2000, 1, 1)
+        }
+    )
 
     # Set column widths:
     worksheet.set_column(0, 0, 40)
     worksheet.set_column(1, len(schema_cols), 2)
 
     # Format and write headers:
-    header_format = workbook.add_format({'rotation': 60})
+    header_format = workbook.add_format({"rotation": 60})
     worksheet.freeze_panes(1, 1)
     for col, schema in enumerate_from_1(schema_cols):
         worksheet.write(0, col, schema, header_format)
@@ -50,7 +49,7 @@ def main():
         worksheet.write_comment(row, 0, field_descriptions[field])
         for col, schema in enumerate_from_1(schema_cols):
             if schema in field_schemas[field]:
-                worksheet.write(row, col, '✓')
+                worksheet.write(row, col, "✓")
 
     workbook.close()
 
diff --git a/src/generate_schema.py b/src/generate_schema.py
index 895d5f705..4adf2b458 100755
--- a/src/generate_schema.py
+++ b/src/generate_schema.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 
+import argparse
 import csv
 import sys
-import argparse
 
 from yaml import dump as dump_yaml
 
@@ -10,36 +10,26 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--fields',
-        type=argparse.FileType('r'),
-        help='Two-column TSV: Field name and description')
+        "--fields", type=argparse.FileType("r"), help="Two-column TSV: Field name and description"
+    )
     args = parser.parse_args()
 
     field_list = []
-    for row in csv.reader(args.fields, dialect='excel-tab'):
+    for row in csv.reader(args.fields, dialect="excel-tab"):
         if len(row) == 2:
-            field_list.append({
-                'name': row[0],
-                'description': row[1]
-            })
+            field_list.append({"name": row[0], "description": row[1]})
     field_list[0] = {
         # Rebuild dict, so 'heading' is first.
-        'heading': 'Level 2',
-        **field_list[0]
+        "heading": "Level 2",
+        **field_list[0],
     }
     level_1_overrides = [
-        {
-            'name': name,
-            'constraints': {
-                'enum': ['TODO']
-            }
-        }
-        for name in ['assay_category', 'assay_type', 'analyte_class']
+        {"name": name, "constraints": {"enum": ["TODO"]}}
+        for name in ["assay_category", "assay_type", "analyte_class"]
     ]
-    print(dump_yaml({
-        'doc_url': 'TODO',
-        'fields': level_1_overrides + field_list
-    }, sort_keys=False))
+    print(
+        dump_yaml({"doc_url": "TODO", "fields": level_1_overrides + field_list}, sort_keys=False)
+    )
 
     return 0
 
diff --git a/src/ingest_validation_tools/check_factory.py b/src/ingest_validation_tools/check_factory.py
index 515de3ab3..fed99ace6 100644
--- a/src/ingest_validation_tools/check_factory.py
+++ b/src/ingest_validation_tools/check_factory.py
@@ -1,15 +1,14 @@
+import json
 import re
-from string import Template
 from pathlib import Path
+from string import Template
 from sys import stderr
-import json
-from typing import List, Callable, Dict, Any, Iterator
+from typing import Any, Callable, Dict, Iterator, List
 
 import frictionless
 import requests
 
-
-cache_path = Path(__file__).parent / 'url-status-cache.json'
+cache_path = Path(__file__).parent / "url-status-cache.json"
 
 ErrorIterator = Iterator[frictionless.errors.CellError]
 Row = Dict[str, Any]
@@ -22,64 +21,65 @@ def make_checks(schema) -> List[Check]:
         factory.make_url_check(),
         factory.make_sequence_limit_check(),
         factory.make_units_check(),
-        factory.make_forbid_na_check()
+        factory.make_forbid_na_check(),
     ]
 
 
-class _CheckFactory():
+class _CheckFactory:
     def __init__(self, schema):
         self.schema = schema
         self._prev_value_run_length = {}
 
     def _get_constrained_fields(self, constraint: str) -> Dict[str, List]:
-        c_c = 'custom_constraints'
+        c_c = "custom_constraints"
         return {
-            f['name']: f[c_c][constraint] for f in self.schema['fields']
+            f["name"]: f[c_c][constraint]
+            for f in self.schema["fields"]
             if c_c in f and constraint in f[c_c]
         }
 
     def _check_url_status_cache(self, url: str) -> str:
         if not cache_path.exists():
-            cache_path.write_text('{}')
+            cache_path.write_text("{}")
         url_status_cache = json.loads(cache_path.read_text())
         if url not in url_status_cache:
-            print(f'Fetching un-cached url: {url}', file=stderr)
+            print(f"Fetching un-cached url: {url}", file=stderr)
             try:
                 response = requests.get(url)
                 url_status_cache[url] = response.status_code
             except Exception as e:
                 url_status_cache[url] = str(e)
-            cache_path.write_text(json.dumps(
-                url_status_cache,
-                sort_keys=True,
-                indent=2
-            ))
+            cache_path.write_text(json.dumps(url_status_cache, sort_keys=True, indent=2))
         return url_status_cache[url]
 
-    def make_url_check(self, template=Template(
-            'URL returned $status: "$url"')) -> Check:
-        url_constrained_fields = self._get_constrained_fields('url')
+    def make_url_check(self, template=Template('URL returned $status: "$url"')) -> Check:
+        url_constrained_fields = self._get_constrained_fields("url")
 
         def url_check(row):
             for k, v in row.items():
                 if v is None:
                     continue
                 if k in url_constrained_fields:
-                    prefix = url_constrained_fields[k]['prefix']
-                    url = f'{prefix}{v}'
+                    prefix = url_constrained_fields[k]["prefix"]
+                    url = f"{prefix}{v}"
                     status = self._check_url_status_cache(url)
                     if status != 200:
                         note = template.substitute(status=status, url=url)
                         yield frictionless.errors.CellError.from_row(row, note=note, field_name=k)
+
         return url_check
 
-    def make_sequence_limit_check(self, template=Template(
-            'there is a run of $run_length sequential items: Limit is $limit. '
-            'If correct, reorder rows.')) -> Check:
-        sequence_limit_fields = self._get_constrained_fields('sequence_limit')
+    def make_sequence_limit_check(
+        self,
+        template=Template(
+            "there is a run of $run_length sequential items: Limit is $limit. "
+            "If correct, reorder rows."
+        ),
+    ) -> Check:
+        sequence_limit_fields = self._get_constrained_fields("sequence_limit")
 
         def sequence_limit_check(row):
-            prefix_number_re = r'(?P<prefix>.*?)(?P<number>\d+)$'
+            prefix_number_re = r"(?P<prefix>.*?)(?P<number>\d+)$"
             for k, v in row.items():
                 # If the schema declares the field as datetime,
                 # "v" will be a python object, and regexes will error.
@@ -101,8 +101,8 @@ def sequence_limit_check(row):
                 prev_value, run_length = self._prev_value_run_length[k]
                 prev_match = re.search(prefix_number_re, prev_value)
                 if (
-                    match.group('prefix') != prev_match.group('prefix') or
-                    int(match.group('number')) != int(prev_match.group('number')) + 1
+                    match.group("prefix") != prev_match.group("prefix")
+                    or int(match.group("number")) != int(prev_match.group("number")) + 1
                 ):
                     self._prev_value_run_length[k] = (v, 1)
                     continue
@@ -111,16 +111,17 @@ def sequence_limit_check(row):
                 self._prev_value_run_length[k] = (v, run_length)
 
                 limit = sequence_limit_fields[k]
-                assert limit > 1, 'The lowest allowed limit is 2'
+                assert limit > 1, "The lowest allowed limit is 2"
                 if run_length >= limit:
                     note = template.substitute(run_length=run_length, limit=limit)
                     yield frictionless.errors.CellError.from_row(row, note=note, field_name=k)
 
         return sequence_limit_check
 
-    def make_units_check(self, template=Template(
-            'it requires a value when $units_for is filled')) -> Check:
-        units_constrained_fields = self._get_constrained_fields('units_for')
+    def make_units_check(
+        self, template=Template("it requires a value when $units_for is filled")
+    ) -> Check:
+        units_constrained_fields = self._get_constrained_fields("units_for")
 
         def units_check(row):
             for k, v in row.items():
@@ -129,19 +130,22 @@ def units_check(row):
                     if (row[units_for] or row[units_for] == 0) and not row[k]:
                         note = template.substitute(units_for=units_for)
                         yield frictionless.errors.CellError.from_row(row, note=note, field_name=k)
+
         return units_check
 
-    def make_forbid_na_check(self, template=Template(
-            '"N/A" fields should just be left empty')) -> Check:
-        forbid_na_constrained_fields = self._get_constrained_fields('forbid_na')
+    def make_forbid_na_check(
+        self, template=Template('"N/A" fields should just be left empty')
+    ) -> Check:
+        forbid_na_constrained_fields = self._get_constrained_fields("forbid_na")
 
         def forbid_na_check(row):
             for k, v in row.items():
                 if (
                     k in forbid_na_constrained_fields
                     and isinstance(v, str)
-                    and v.upper() in ['NA', 'N/A']
+                    and v.upper() in ["NA", "N/A"]
                 ):
                     note = template.substitute()
                     yield frictionless.errors.CellError.from_row(row, note=note, field_name=k)
+
         return forbid_na_check
diff --git a/src/ingest_validation_tools/cli_utils.py b/src/ingest_validation_tools/cli_utils.py
index 8aabfef58..8da1ee932 100644
--- a/src/ingest_validation_tools/cli_utils.py
+++ b/src/ingest_validation_tools/cli_utils.py
@@ -13,7 +13,4 @@ def dir_path(s):
         raise ShowUsageException(f'"{s}" is not a directory')
 
 
-exit_codes = namedtuple(
-    'ExitCode',
-    ['VALID', 'BUG', 'ERROR', 'INVALID']
-)(0, 1, 2, 3)
+exit_codes = namedtuple("ExitCode", ["VALID", "BUG", "ERROR", "INVALID"])(0, 1, 2, 3)
diff --git a/src/ingest_validation_tools/directory_validator.py b/src/ingest_validation_tools/directory_validator.py
index fb89de93c..3e21606c3 100644
--- a/src/ingest_validation_tools/directory_validator.py
+++ b/src/ingest_validation_tools/directory_validator.py
@@ -1,8 +1,9 @@
 import os
 import re
 from fnmatch import fnmatch
-from typing import List, Dict, Tuple
 from pathlib import Path
+from typing import Dict, List, Tuple
+
 from ingest_validation_tools.yaml_include_loader import load_yaml
 
 
@@ -40,9 +41,7 @@ def validate_directory(
             actual_paths += [f"{prefix}/"]
         # Otherwise this should be a branch directory
         else:
-            actual_paths += (
-                [f"{prefix}/{name}" for name in file_names] if prefix else file_names
-            )
+            actual_paths += [f"{prefix}/{name}" for name in file_names] if prefix else file_names
 
     """TODO: message_munger adds periods at the end of these messages
     which is very confusing for regex! Also human readability of required_patterns
@@ -55,9 +54,7 @@ def validate_directory(
         assert isinstance(dependency_pattern, str)
         # Check to see whether there's a match
         matching_paths = [
-            actual
-            for actual in actual_paths
-            if re.fullmatch(dependency_pattern, actual)
+            actual for actual in actual_paths if re.fullmatch(dependency_pattern, actual)
         ]
         # If there's a match, then we have to check that the dependent items are also captured
         # Let's also short-circuit and get failures out of the way
@@ -87,9 +84,7 @@ def validate_directory(
     not_allowed_errors.extend(
         _get_not_allowed_errors(actual_paths, allowed_patterns, dataset_ignore_globs)
     )
-    required_missing_errors.extend(
-        _get_missing_required_errors(actual_paths, required_patterns)
-    )
+    required_missing_errors.extend(_get_missing_required_errors(actual_paths, required_patterns))
 
     errors = {}
     if not_allowed_errors:
@@ -113,9 +108,7 @@ def _get_not_allowed_errors(
     return not_allowed_errors
 
 
-def _get_missing_required_errors(
-    paths: List[str], required_patterns: List[str]
-) -> List[str]:
+def _get_missing_required_errors(paths: List[str], required_patterns: List[str]) -> List[str]:
     return [
         pattern
         for pattern in required_patterns
diff --git a/src/ingest_validation_tools/docs_utils.py b/src/ingest_validation_tools/docs_utils.py
index f12647cac..b84a79e36 100644
--- a/src/ingest_validation_tools/docs_utils.py
+++ b/src/ingest_validation_tools/docs_utils.py
@@ -1,8 +1,8 @@
+import html
 import re
-from string import Template
 from pathlib import Path
-import html
-from typing import Dict, Any
+from string import Template
+from typing import Any, Dict
 from urllib.parse import urlencode
 
 import requests
@@ -110,9 +110,7 @@ def _get_portal_names_md(assay_types):
         if portal_name is None:
             links.append(f"{assay_type} not in Portal")
             continue
-        query = urlencode(
-            {"mapped_data_types[0]": portal_name, "entity_type[0]": "Dataset"}
-        )
+        query = urlencode({"mapped_data_types[0]": portal_name, "entity_type[0]": "Dataset"})
         url = f"https://portal.hubmapconsortium.org/search?{query}"
         links.append(f"[{portal_name}]({url})")
     return f'In the portal: {" / ".join(links)}'
@@ -179,10 +177,7 @@ def generate_readme_md(
     if (
         is_deprecated
         or is_draft
-        or (
-            is_cedar
-            and max_version_table_schema.get("fields", [])[0].get("example", "") == ""
-        )
+        or (is_cedar and max_version_table_schema.get("fields", [])[0].get("example", "") == "")
     ):
         tsv_url = ""
         xlsx_url = ""
@@ -581,9 +576,7 @@ def _make_dir_descriptions(dir_schemas, pipeline_infos):
     <BLANKLINE>
     <BLANKLINE>
     """
-    pipeline_infos_md = " and ".join(
-        make_pipeline_link(info) for info in pipeline_infos
-    )
+    pipeline_infos_md = " and ".join(make_pipeline_link(info) for info in pipeline_infos)
     pipeline_blurb = (
         f"The HIVE will process each dataset with\n{pipeline_infos_md}.\n"
         if pipeline_infos
@@ -610,9 +603,7 @@ def _make_dir_descriptions(dir_schemas, pipeline_infos):
                 f"<summary><b>Version {v}"
                 f'{" (use this one)" if current_version else ""}'
                 f"</b></summary>\n"
-                + _make_dir_description(
-                    schema["files"], schema.get("deprecated", False)
-                )
+                + _make_dir_description(schema["files"], schema.get("deprecated", False))
                 + "\n\n"
             )
         current_version = False
diff --git a/src/ingest_validation_tools/enums.py b/src/ingest_validation_tools/enums.py
index 166df1f80..cdd8bc175 100644
--- a/src/ingest_validation_tools/enums.py
+++ b/src/ingest_validation_tools/enums.py
@@ -1,6 +1,5 @@
 from typing import Dict, List
 
-
 """
 >>> import requests
 >>> local_names = shared_enums['assay_type']
diff --git a/src/ingest_validation_tools/error_report.py b/src/ingest_validation_tools/error_report.py
index 5dca8c807..3e87e2340 100644
--- a/src/ingest_validation_tools/error_report.py
+++ b/src/ingest_validation_tools/error_report.py
@@ -1,8 +1,8 @@
-from yaml import Dumper, dump
 from typing import List, Union
 
-from ingest_validation_tools.message_munger import munge, recursive_munge
+from yaml import Dumper, dump
 
+from ingest_validation_tools.message_munger import munge, recursive_munge
 
 # Force dump not to use alias syntax.
 # https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
diff --git a/src/ingest_validation_tools/plugin_validator.py b/src/ingest_validation_tools/plugin_validator.py
index 1caef3ea2..c435d52c1 100644
--- a/src/ingest_validation_tools/plugin_validator.py
+++ b/src/ingest_validation_tools/plugin_validator.py
@@ -1,8 +1,9 @@
+import inspect
 import sys
 from importlib import util
-import inspect
-from typing import List, Union, Tuple, Iterator, Type
 from pathlib import Path
+from typing import Iterator, List, Tuple, Type, Union
+
 from ingest_validation_tools.schema_loader import SchemaVersion
 
 PathOrStr = Union[str, Path]
@@ -43,9 +44,7 @@ class Validator(object):
     """float: a rough measure of cost to run.  Lower is better.
     """
 
-    def __init__(
-        self, base_paths: List[Path], assay_type: str, contains: List = [], **kwargs
-    ):
+    def __init__(self, base_paths: List[Path], assay_type: str, contains: List = [], **kwargs):
         """
         base_paths is expected to be a list of directories.
         These are the root paths of the directory trees to be validated.
@@ -59,9 +58,7 @@ def __init__(
         elif isinstance(base_paths, str):
             self.paths = [Path(base_paths)]
         else:
-            raise Exception(
-                f"Validator init received base_paths arg as type {type(base_paths)}"
-            )
+            raise Exception(f"Validator init received base_paths arg as type {type(base_paths)}")
         self.assay_type = assay_type
         self.contains = contains
 
@@ -94,9 +91,7 @@ def run_plugin_validators_iter(
     for column_name in ["assay_type", "dataset_type"]:
         if column_name in sv.rows[0]:
             if any(row[column_name] != sv.dataset_type for row in sv.rows):
-                raise ValidatorError(
-                    f"{metadata_path} contains more than one assay type"
-                )
+                raise ValidatorError(f"{metadata_path} contains more than one assay type")
 
     data_paths = []
     if all("data_path" in row for row in sv.rows):
@@ -105,9 +100,7 @@ def run_plugin_validators_iter(
             if not data_path.is_absolute():
                 data_path = (Path(metadata_path).parent / data_path).resolve()
             if not data_path.is_dir():
-                raise ValidatorError(
-                    f"{data_path} should be the base directory of a dataset"
-                )
+                raise ValidatorError(f"{data_path} should be the base directory of a dataset")
             data_paths.append(data_path)
         for k, v in validation_error_iter(
             data_paths, sv.dataset_type, plugin_dir, sv.contains, **kwargs
@@ -142,11 +135,7 @@ def validation_class_iter(plugin_dir: PathOrStr) -> Iterator[Type[Validator]]:
                 sys.modules[mod_nm] = mod
                 spec.loader.exec_module(mod)  # type: ignore
             for _, obj in inspect.getmembers(mod):
-                if (
-                    inspect.isclass(obj)
-                    and obj != Validator
-                    and issubclass(obj, Validator)
-                ):
+                if inspect.isclass(obj) and obj != Validator and issubclass(obj, Validator):
                     sort_me.append((obj.cost, obj.description, obj))
     sort_me.sort()
     for _, _, cls in sort_me:
diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py
index dd3555db3..f95754ebc 100644
--- a/src/ingest_validation_tools/schema_loader.py
+++ b/src/ingest_validation_tools/schema_loader.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
-from dataclasses import dataclass, field
 
-from pathlib import Path
+import re
 from collections import defaultdict
 from copy import deepcopy
-import re
-from typing import List, Dict, Set, Sequence, Optional, Union
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Sequence, Set, Union
 
-from ingest_validation_tools.yaml_include_loader import load_yaml
 from ingest_validation_tools.enums import shared_enums
-
+from ingest_validation_tools.yaml_include_loader import load_yaml
 
 _table_schemas_path = Path(__file__).parent / "table-schemas"
 _directory_schemas_path = Path(__file__).parent / "directory-schemas"
@@ -87,9 +86,7 @@ def get_row_data(self):
         assay_type = self.rows[0].get("assay_type")
         dataset_type = self.rows[0].get("dataset_type")
         if assay_type is not None and dataset_type is not None:
-            raise PreflightError(
-                f"Found both assay_type and dataset_type for path {self.path}!"
-            )
+            raise PreflightError(f"Found both assay_type and dataset_type for path {self.path}!")
         else:
             self.dataset_type = assay_type if assay_type else dataset_type
 
@@ -334,9 +331,7 @@ def _validate_level_1_enum(field: dict) -> None:
 
     name = field["name"]
     if name in shared_enums:
-        optional = not field["constraints"].get(
-            "required", True
-        )  # Default: required = True
+        optional = not field["constraints"].get("required", True)  # Default: required = True
         actual = set(
             field["constraints"].get(
                 "enum",
@@ -346,8 +341,7 @@ def _validate_level_1_enum(field: dict) -> None:
         )
         allowed = set(shared_enums[name])
         assert actual <= allowed, (
-            f"Unexpected enums for {name}: {actual - allowed}\n"
-            f"Allowed: {sorted(allowed)}"
+            f"Unexpected enums for {name}: {actual - allowed}\n" f"Allowed: {sorted(allowed)}"
         )
 
 
diff --git a/src/ingest_validation_tools/table_validator.py b/src/ingest_validation_tools/table_validator.py
index 2867bb07a..25659f0e2 100644
--- a/src/ingest_validation_tools/table_validator.py
+++ b/src/ingest_validation_tools/table_validator.py
@@ -1,7 +1,7 @@
 import csv
-from pathlib import Path
-from typing import List, Optional, Dict, Union
 from enum import Enum
+from pathlib import Path
+from typing import Dict, List, Optional, Union
 
 import frictionless
 
@@ -38,9 +38,7 @@ def get_table_errors(
 
     schema_fields_dict = {field["name"]: field for field in schema["fields"]}
 
-    return [
-        _get_message(error, schema_fields_dict, report_type) for error in task["errors"]
-    ]
+    return [_get_message(error, schema_fields_dict, report_type) for error in task["errors"]]
 
 
 def _get_pre_flight_errors(tsv_path: Path, schema: dict) -> Optional[List[str]]:
@@ -51,9 +49,7 @@ def _get_pre_flight_errors(tsv_path: Path, schema: dict) -> Optional[List[str]]:
     delimiter = dialect.delimiter
     expected_delimiter = "\t"
     if delimiter != expected_delimiter:
-        return [
-            f"Delimiter is {repr(delimiter)}, rather than expected {repr(expected_delimiter)}"
-        ]
+        return [f"Delimiter is {repr(delimiter)}, rather than expected {repr(expected_delimiter)}"]
 
     # Re-reading the file is ugly, but creating a stream seems gratuitous.
     with tsv_path.open() as tsv_handle:
@@ -75,9 +71,7 @@ def _get_pre_flight_errors(tsv_path: Path, schema: dict) -> Optional[List[str]]:
             for i_pair in enumerate(zip(fields, expected_fields)):
                 i, (actual, expected) = i_pair
                 if actual != expected:
-                    errors.append(
-                        f'In column {i+1}, found "{actual}", expected "{expected}"'
-                    )
+                    errors.append(f'In column {i+1}, found "{actual}", expected "{expected}"')
             return errors
 
     return None
@@ -119,27 +113,19 @@ def _get_message(
     if "code" in error and error["code"] == "missing-label":
         msg = "Bug: Should have been caught pre-flight. File an issue."
         return msg if return_str else get_json(msg)
-    if (
-        "rowPosition" in error
-        and "fieldName" in error
-        and "cell" in error
-        and "note" in error
-    ):
+    if "rowPosition" in error and "fieldName" in error and "cell" in error and "note" in error:
         msg = (
             f'On row {error["rowPosition"]}, column "{error["fieldName"]}", '
             f'value "{error["cell"]}" fails because {error["note"]}'
             f'{f". Example: {example}" if example else example}'
         )
-        return (
-            msg
-            if return_str
-            else get_json(msg, error["rowPosition"], error["fieldName"])
-        )
+        return msg if return_str else get_json(msg, error["rowPosition"], error["fieldName"])
     return error["message"]
 
 
 if __name__ == "__main__":
     import argparse
+
     from yaml import safe_load
 
     parser = argparse.ArgumentParser("CLI just for testing")
diff --git a/src/ingest_validation_tools/upload.py b/src/ingest_validation_tools/upload.py
index 71612b8d0..a5a10b34f 100644
--- a/src/ingest_validation_tools/upload.py
+++ b/src/ingest_validation_tools/upload.py
@@ -1,13 +1,13 @@
 from __future__ import annotations
-from copy import copy
-import logging
 
+import logging
 import subprocess
 from collections import Counter, defaultdict
+from copy import copy
 from datetime import datetime
 from fnmatch import fnmatch
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union, DefaultDict
+from typing import Any, DefaultDict, Dict, List, Optional, Union
 
 import requests
 
@@ -80,9 +80,7 @@ def __init__(
                     self.globus_token,
                     self.directory_path,
                 )
-                for path in (
-                    tsv_paths if tsv_paths else directory_path.glob(f"*{TSV_SUFFIX}")
-                )
+                for path in (tsv_paths if tsv_paths else directory_path.glob(f"*{TSV_SUFFIX}"))
             }
 
             self.effective_tsv_paths = {
@@ -195,9 +193,7 @@ def validation_routine(
 
     @property
     def multi_parent(self) -> Optional[SchemaVersion]:
-        multi_assay_parents = [
-            sv for sv in self.effective_tsv_paths.values() if sv.contains
-        ]
+        multi_assay_parents = [sv for sv in self.effective_tsv_paths.values() if sv.contains]
         if len(multi_assay_parents) == 0:
             return
         if len(multi_assay_parents) > 1:
@@ -235,23 +231,14 @@ def _check_upload(self) -> dict:
 
     def _get_local_tsv_errors(self) -> Optional[Dict]:
         errors: DefaultDict[str, list] = defaultdict(list)
-        types_counter = Counter(
-            [v.schema_name for v in self.effective_tsv_paths.values()]
-        )
-        repeated = [
-            assay_type for assay_type, count in types_counter.items() if count > 1
-        ]
+        types_counter = Counter([v.schema_name for v in self.effective_tsv_paths.values()])
+        repeated = [assay_type for assay_type, count in types_counter.items() if count > 1]
         if repeated:
             raise ErrorDictException(
-                {
-                    "Repeated": f"There is more than one TSV for this type: {', '.join(repeated)}"
-                }
+                {"Repeated": f"There is more than one TSV for this type: {', '.join(repeated)}"}
             )
         for path, schema in self.effective_tsv_paths.items():
-            if (
-                "data_path" not in schema.rows[0]
-                or "contributors_path" not in schema.rows[0]
-            ):
+            if "data_path" not in schema.rows[0] or "contributors_path" not in schema.rows[0]:
                 errors.update(
                     {
                         f"{path} (as {schema.table_schema})": [
@@ -288,9 +275,7 @@ def _get_directory_errors(self) -> dict:
                     errors.update(dir_errors)
         return errors
 
-    def _get_multi_assay_dir_errors(
-        self, path: str, dataset_types: Dict
-    ) -> Optional[Dict]:
+    def _get_multi_assay_dir_errors(self, path: str, dataset_types: Dict) -> Optional[Dict]:
         parent = dataset_types.get("parent")
         # Validate against parent multi-assay type if data_path is in parent TSV
         if parent:
@@ -342,15 +327,11 @@ def _validate(
                 return {f"{tsv_path} (as {schema_version.table_schema})": e}
 
             if schema.get("deprecated") and not self.ignore_deprecation:
-                return {
-                    "Schema version is deprecated": f"{schema_version.table_schema}"
-                }
+                return {"Schema version is deprecated": f"{schema_version.table_schema}"}
 
             local_errors = get_table_errors(tsv_path, schema, report_type)
             if local_errors:
-                local_validated[
-                    f"{tsv_path} (as {schema_version.table_schema})"
-                ] = local_errors
+                local_validated[f"{tsv_path} (as {schema_version.table_schema})"] = local_errors
         else:
             """
             Passing offline=True will skip all API/URL validation;
@@ -359,9 +340,7 @@ def _validate(
             manually (see tests-manual/README.md)
             """
             if self.offline:
-                logging.info(
-                    f"{tsv_path}: Offline validation selected, cannot reach API."
-                )
+                logging.info(f"{tsv_path}: Offline validation selected, cannot reach API.")
                 return errors
             else:
                 url_errors = self._cedar_url_checks(tsv_path, schema_version)
@@ -395,9 +374,7 @@ def _get_plugin_errors(self, **kwargs) -> dict:
                 # if this is a multi-assay upload, check all files ONCE
                 # using the parent metadata file as a manifest, skipping
                 # non-parent dataset_types
-                if not self.multi_parent or (
-                    sv.dataset_type == self.multi_parent.dataset_type
-                ):
+                if not self.multi_parent or (sv.dataset_type == self.multi_parent.dataset_type):
                     for k, v in run_plugin_validators_iter(
                         metadata_path, sv, plugin_path, **kwargs
                     ):
@@ -420,8 +397,7 @@ def _api_validation(
             errors["Request Errors"] = response.json()
         elif response.json()["reporting"] and len(response.json()["reporting"]) > 0:
             errors["Validation Errors"] = [
-                self._get_message(error, report_type)
-                for error in response.json()["reporting"]
+                self._get_message(error, report_type) for error in response.json()["reporting"]
             ]
         else:
             logging.info(f"No errors found during CEDAR validation for {tsv_path}!")
@@ -467,9 +443,7 @@ def _check_multi_assay_children(self):
             else:
                 for row in sv.rows:
                     if row.get("data_path"):
-                        self.multi_assay_data_paths[row["data_path"]][
-                            "components"
-                        ].append(sv)
+                        self.multi_assay_data_paths[row["data_path"]]["components"].append(sv)
                 necessary.remove(sv.dataset_type.lower())
         message = ""
         if necessary:
@@ -497,9 +471,7 @@ def _check_data_paths_shared_with_parent(self):
             # removing from multi_data_paths to trigger error downstream
             if not related_svs.get("components") and not related_svs.get("parent"):
                 continue
-            existing_components = {
-                sv.dataset_type.lower() for sv in related_svs["components"]
-            }
+            existing_components = {sv.dataset_type.lower() for sv in related_svs["components"]}
             # If all required components are not present, add to missing_components
             # to trigger error downstream
             diff = set(self.multi_parent.contains).difference(existing_components)
@@ -546,13 +518,9 @@ def _cedar_url_checks(self, tsv_path: str, schema_version: SchemaVersion):
         schema_name = schema_version.schema_name
 
         if "sample" in schema_name:
-            constrained_fields[
-                "sample_id"
-            ] = "https://entity.api.hubmapconsortium.org/entities/"
+            constrained_fields["sample_id"] = "https://entity.api.hubmapconsortium.org/entities/"
         elif "organ" in schema_name:
-            constrained_fields[
-                "organ_id"
-            ] = "https://entity.api.hubmapconsortium.org/entities/"
+            constrained_fields["organ_id"] = "https://entity.api.hubmapconsortium.org/entities/"
         elif "contributors" in schema_name:
             constrained_fields["orcid_id"] = "https://pub.orcid.org/v3.0/"
         else:
@@ -568,15 +536,11 @@ def _cedar_url_checks(self, tsv_path: str, schema_version: SchemaVersion):
     def _check_matching_urls(self, tsv_path: str, constrained_fields: dict):
         rows = read_rows(Path(tsv_path), "ascii")
         fields = rows[0].keys()
-        missing_fields = [
-            k for k in constrained_fields.keys() if k not in fields
-        ].sort()
+        missing_fields = [k for k in constrained_fields.keys() if k not in fields].sort()
         if missing_fields:
             return {f"Missing fields: {sorted(missing_fields)}"}
         if not self.globus_token:
-            return {
-                "No token": "No token was received to check URL fields against Entity API."
-            }
+            return {"No token": "No token was received to check URL fields against Entity API."}
         url_errors = []
         for i, row in enumerate(rows):
             check = {k: v for k, v in row.items() if k in constrained_fields}
@@ -592,9 +556,7 @@ def _check_matching_urls(self, tsv_path: str, constrained_fields: dict):
                     )
                     response.raise_for_status()
                 except Exception as e:
-                    url_errors.append(
-                        f"Row {i+2}, field '{field}' with value '{value}': {e}"
-                    )
+                    url_errors.append(f"Row {i+2}, field '{field}' with value '{value}': {e}")
         return url_errors
 
     def _get_message(
@@ -621,12 +583,7 @@ def _get_message(
         example = error.get("repairSuggestion", "")
 
         return_str = report_type is ReportType.STR
-        if (
-            "errorType" in error
-            and "column" in error
-            and "row" in error
-            and "value" in error
-        ):
+        if "errorType" in error and "column" in error and "row" in error and "value" in error:
             # This may need readability improvements
             msg = (
                 f'On row {error["row"]}, column "{error["column"]}", '
@@ -644,9 +601,7 @@ def _check_path(
         metadata_path: Union[str, Path],
     ) -> Optional[Dict]:
         if ref == "data":
-            errors = self._check_data_path(
-                schema_version, Path(metadata_path), path_value
-            )
+            errors = self._check_data_path(schema_version, Path(metadata_path), path_value)
         else:
             errors = self._check_other_path(Path(metadata_path), path_value, ref)
         return errors
@@ -687,14 +642,10 @@ def _check_data_path(
             dataset_ignore_globs=self.dataset_ignore_globs,
         )
         if ref_errors:
-            errors[
-                f"{str(metadata_path)}, column 'data_path', value '{path_value}'"
-            ] = ref_errors
+            errors[f"{str(metadata_path)}, column 'data_path', value '{path_value}'"] = ref_errors
         return errors
 
-    def _check_other_path(
-        self, metadata_path: Path, other_path_value: str, path_type: str
-    ):
+    def _check_other_path(self, metadata_path: Path, other_path_value: str, path_type: str):
         errors = {}
         other_path = self.directory_path / other_path_value
         try:
@@ -705,9 +656,7 @@ def _check_other_path(
                 self.directory_path,
             )
         except Exception as e:
-            errors[
-                f"{metadata_path}, column '{path_type}_path', value '{other_path_value}'"
-            ] = [e]
+            errors[f"{metadata_path}, column '{path_type}_path', value '{other_path_value}'"] = [e]
             return errors
         tsv_ref_errors = self.validation_routine(tsv_paths={str(other_path): schema})
         # TSV located and read, errors found
@@ -736,12 +685,8 @@ def __get_no_ref_errors(self) -> dict:
             and not any([fnmatch(path.name, glob) for glob in self.upload_ignore_globs])
         }
         unreferenced_paths = non_metadata_paths - referenced_data_paths
-        unreferenced_dir_paths = [
-            path for path in unreferenced_paths if Path(path).is_dir()
-        ]
-        unreferenced_file_paths = [
-            path for path in unreferenced_paths if not Path(path).is_dir()
-        ]
+        unreferenced_dir_paths = [path for path in unreferenced_paths if Path(path).is_dir()]
+        unreferenced_file_paths = [path for path in unreferenced_paths if not Path(path).is_dir()]
         errors = {}
         if unreferenced_dir_paths:
             errors["Directories"] = unreferenced_dir_paths
@@ -755,9 +700,7 @@ def __get_multi_ref_errors(self) -> dict:
         errors = {}
         data_references = self.__get_data_references()
         multi_references = [
-            path
-            for path, value in self.multi_assay_data_paths.items()
-            if value.get("parent")
+            path for path, value in self.multi_assay_data_paths.items() if value.get("parent")
         ]
         for path, references in data_references.items():
             if path not in multi_references:
diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py
index 8fc1fab24..41c602a51 100644
--- a/src/ingest_validation_tools/validation_utils.py
+++ b/src/ingest_validation_tools/validation_utils.py
@@ -1,21 +1,21 @@
-from collections import defaultdict
 import json
 import logging
+from collections import defaultdict
 from csv import DictReader
 from pathlib import Path, PurePath
 from typing import DefaultDict, Dict, List, Optional, Union
 
 import requests
 
+from ingest_validation_tools.directory_validator import (
+    DirectoryValidationErrors,
+    validate_directory,
+)
 from ingest_validation_tools.schema_loader import (
     PreflightError,
     SchemaVersion,
     get_directory_schema,
 )
-from ingest_validation_tools.directory_validator import (
-    validate_directory,
-    DirectoryValidationErrors,
-)
 from ingest_validation_tools.table_validator import ReportType
 from ingest_validation_tools.test_validation_utils import (
     compare_mock_with_response,
@@ -64,21 +64,15 @@ def get_schema_version(
         offline=offline,
     )
     if not assay_type_data:
-        message = (
-            f"Assay data not retrieved from assayclassifier endpoint for TSV {path}."
-        )
+        message = f"Assay data not retrieved from assayclassifier endpoint for TSV {path}."
         if "assay_type" in rows[0]:
             message += f' Assay type: {rows[0].get("assay_type")}.'
         elif "dataset_type" in rows[0]:
             message += f' Dataset type: {rows[0].get("dataset_type")}.'
         if "channel_id" in rows[0]:
-            message += (
-                ' Has "channel_id": Antibodies TSV found where metadata TSV expected.'
-            )
+            message += ' Has "channel_id": Antibodies TSV found where metadata TSV expected.'
         elif "orcid_id" in rows[0]:
-            message += (
-                ' Has "orcid_id": Contributors TSV found where metadata TSV expected.'
-            )
+            message += ' Has "orcid_id": Contributors TSV found where metadata TSV expected.'
         else:
             message += f' Column headers in TSV: {", ".join(rows[0].keys())}'
         raise PreflightError(message)
@@ -114,12 +108,8 @@ def get_other_schema_name(rows: List, path: str) -> Optional[str]:
         else:
             match = {key: field for key, value in other_types.items() if field in value}
             other_type.update(match)
-    if other_type and (
-        "assay_name" in rows[0].keys() or "dataset_type" in rows[0].keys()
-    ):
-        raise PreflightError(
-            f"Metadata TSV contains invalid field: {list(other_type.values())}"
-        )
+    if other_type and ("assay_name" in rows[0].keys() or "dataset_type" in rows[0].keys()):
+        raise PreflightError(f"Metadata TSV contains invalid field: {list(other_type.values())}")
     if len(other_type) == 1:
         return list(other_type.keys())[0]
     elif len(other_type) > 1:
@@ -194,9 +184,7 @@ def get_data_dir_errors(
     if schema is None:
         return {"Undefined directory schema": dir_schema}
 
-    schema_warning_fields = [
-        field for field in schema if field in ["deprecated", "draft"]
-    ]
+    schema_warning_fields = [field for field in schema if field in ["deprecated", "draft"]]
     schema_warning = (
         {f"{schema_warning_fields[0].title()} directory schema": dir_schema}
         if schema_warning_fields
@@ -204,9 +192,7 @@ def get_data_dir_errors(
     )
 
     try:
-        validate_directory(
-            data_path, schema["files"], dataset_ignore_globs=dataset_ignore_globs
-        )
+        validate_directory(data_path, schema["files"], dataset_ignore_globs=dataset_ignore_globs)
     except DirectoryValidationErrors as e:
         # If there are DirectoryValidationErrors and the schema is deprecated/draft...
         #    schema deprecation/draft status is more important.
@@ -259,8 +245,7 @@ def get_context_of_decode_error(e: UnicodeDecodeError) -> str:
 
 def get_other_names():
     return [
-        p.stem.split("-v")[0]
-        for p in (Path(__file__).parent / "table-schemas/others").iterdir()
+        p.stem.split("-v")[0] for p in (Path(__file__).parent / "table-schemas/others").iterdir()
     ]
 
 
diff --git a/src/ingest_validation_tools/yaml_include_loader.py b/src/ingest_validation_tools/yaml_include_loader.py
index dccd690ec..e2a2f1a74 100644
--- a/src/ingest_validation_tools/yaml_include_loader.py
+++ b/src/ingest_validation_tools/yaml_include_loader.py
@@ -24,31 +24,27 @@ def load_yaml(path: Path) -> dict:
 
 def _load_includes(path: Path, indent: int = 0) -> str:
     text = path.read_text()
-    if re.match(r'\s', text[0]):
-        raise Exception(f'Unexpected padding in the first column: {path}')
-    if re.search(r'\S.*#\s*include:', text):
+    if re.match(r"\s", text[0]):
+        raise Exception(f"Unexpected padding in the first column: {path}")
+    if re.search(r"\S.*#\s*include:", text):
         raise Exception(f'"# include:" is not alone on a line in: {path}')
     expanded_text = re.sub(
-        r'^([ \t]*)#\s*include:\s*(\S+)',
+        r"^([ \t]*)#\s*include:\s*(\S+)",
         _expand_match_generator(path.parent),
         text,
-        flags=re.MULTILINE
+        flags=re.MULTILINE,
+    )
+    indent_string = " " * indent
+    indented_expanded_text = (
+        indent_string
+        + re.sub(r"^", lambda match: indent_string, expanded_text, flags=re.MULTILINE).strip()
     )
-    indent_string = ' ' * indent
-    indented_expanded_text = indent_string + re.sub(
-        r'^',
-        lambda match: indent_string,
-        expanded_text,
-        flags=re.MULTILINE
-    ).strip()
     return indented_expanded_text
 
 
 def _expand_match_generator(parent_dir: Path) -> Callable:
     def _expand_match(match):
-        expanded = _load_includes(
-            parent_dir / match.group(2),
-            indent=len(match.group(1))
-        )
+        expanded = _load_includes(parent_dir / match.group(2), indent=len(match.group(1)))
         return expanded
+
     return _expand_match
diff --git a/src/validate_tsv.py b/src/validate_tsv.py
index bb0046e4e..22c3fff5c 100755
--- a/src/validate_tsv.py
+++ b/src/validate_tsv.py
@@ -1,18 +1,14 @@
 #!/usr/bin/env python3
 
 import argparse
-from pathlib import Path
-import sys
 import inspect
+import sys
+from pathlib import Path
 
-from ingest_validation_tools.error_report import ErrorReport
 from ingest_validation_tools.cli_utils import ShowUsageException, exit_codes
+from ingest_validation_tools.error_report import ErrorReport
 from ingest_validation_tools.schema_loader import PreflightError
-from ingest_validation_tools.validation_utils import (
-    get_tsv_errors,
-    get_schema_version,
-)
-
+from ingest_validation_tools.validation_utils import get_schema_version, get_tsv_errors
 
 reminder = (
     "REMINDER: Besides running validate_tsv.py, "
diff --git a/src/validate_upload.py b/src/validate_upload.py
index 4da6b7d2d..84307e220 100755
--- a/src/validate_upload.py
+++ b/src/validate_upload.py
@@ -1,22 +1,22 @@
 #!/usr/bin/env python3
 
 import argparse
-import sys
-from pathlib import Path
 import inspect
+import sys
 from datetime import datetime
+from pathlib import Path
 
+from ingest_validation_tools.check_factory import cache_path
+from ingest_validation_tools.cli_utils import ShowUsageException, dir_path, exit_codes
 from ingest_validation_tools.error_report import ErrorReport
 from ingest_validation_tools.upload import Upload
-from ingest_validation_tools.cli_utils import ShowUsageException, exit_codes, dir_path
-from ingest_validation_tools.check_factory import cache_path
 
 directory_schemas = sorted(
     {
         p.stem
-        for p in (
-            Path(__file__).parent / "ingest_validation_tools" / "directory-schemas"
-        ).glob("*.yaml")
+        for p in (Path(__file__).parent / "ingest_validation_tools" / "directory-schemas").glob(
+            "*.yaml"
+        )
     }
 )
 
@@ -105,9 +105,7 @@ def make_parser():
 
     # Are there plugin validations?
 
-    parser.add_argument(
-        "--plugin_directory", action="store", help="Directory of plugin tests."
-    )
+    parser.add_argument("--plugin_directory", action="store", help="Directory of plugin tests.")
     parser.add_argument(
         "--run_plugins",
         required=False,
@@ -126,9 +124,7 @@ def make_parser():
     error_report_methods = [
         name for (name, _) in inspect.getmembers(ErrorReport) if name.startswith("as_")
     ]
-    parser.add_argument(
-        "--output", choices=error_report_methods, default="as_text_list"
-    )
+    parser.add_argument("--output", choices=error_report_methods, default="as_text_list")
 
     parser.add_argument(
         "--add_notes",