re-ran linting/formatting after rebase

hubmapconsortium · Feb 2, 2024 · 3168f1c · 3168f1c
1 parent 3f2022a
commit 3168f1c
Show file tree

Hide file tree

Showing 22 changed files with 306 additions and 462 deletions.
diff --git a/src/cleanup_whitespace.py b/src/cleanup_whitespace.py
@@ -1,34 +1,30 @@
 #!/usr/bin/env python3
 
+import argparse
 import csv
 import sys
-import argparse
 from pathlib import Path
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description='''
+        description="""
 Use the "--tsv_in"/"--tsv_out" options to strip invisible characters from TSVs.
-'''
+"""
     )
     mutex = parser.add_mutually_exclusive_group(required=True)
     mutex.add_argument(
-        '--tsv_in',
-        type=Path,
-        metavar='INPUT',
-        help='TSV to strip padding whitespace from')
+        "--tsv_in", type=Path, metavar="INPUT", help="TSV to strip padding whitespace from"
+    )
     mutex.add_argument(
-        '--encoding_test',
+        "--encoding_test",
         type=str,
-        metavar='ENCODING',
-        help='Generate test TSV using this encoding')
+        metavar="ENCODING",
+        help="Generate test TSV using this encoding",
+    )
     parser.add_argument(
-        '--tsv_out',
-        type=Path,
-        metavar='OUTPUT',
-        help='Destination for clean TSV',
-        required=True)
+        "--tsv_out", type=Path, metavar="OUTPUT", help="Destination for clean TSV", required=True
+    )
     args = parser.parse_args()
 
     if args.encoding_test:
@@ -40,61 +36,63 @@ def main():
 
 def print_encoding_test(encoding, output_path):
     space_chars = [
-        '\u000b',  # vertical tab
-        '\u0020',  # normal space
+        "\u000b",  # vertical tab
+        "\u0020",  # normal space
     ]
-    if encoding != 'ascii':
+    if encoding != "ascii":
         space_chars += [
-            '\u00a0',  # non-breaking space
+            "\u00a0",  # non-breaking space
         ]
-    if encoding not in ['ascii', 'latin-1']:
+    if encoding not in ["ascii", "latin-1"]:
         space_chars += [
-            '\u2003',  # em space
-            '\u3000',  # idiographic space
+            "\u2003",  # em space
+            "\u3000",  # idiographic space
         ]
-    padding = ''.join(space_chars)
+    padding = "".join(space_chars)
 
-    with output_path.open(mode='w', encoding=encoding) as f:
+    with output_path.open(mode="w", encoding=encoding) as f:
         # Header:
         print(
-            'quoted', 'empty', 'padded',
-            '',  # Empty column header: should be cleaned up!
-            sep='\t', file=f
+            "quoted",
+            "empty",
+            "padded",
+            "",  # Empty column header: should be cleaned up!
+            sep="\t",
+            file=f,
         )
 
         # Body:
         print(
             f'"{padding}123{padding}"',
-            '',
-            f'{padding}123{padding}',
-            '', '',  # Two empty cells: should be cleaned up!
-            sep='\t', file=f
-        )
-        print(
-            '', '', '', '',  # More empty cells: should be cleaned up!
-            sep='\t', file=f
+            "",
+            f"{padding}123{padding}",
+            "",
+            "",  # Two empty cells: should be cleaned up!
+            sep="\t",
+            file=f,
         )
+        print("", "", "", "", sep="\t", file=f)  # More empty cells: should be cleaned up!
     # Trailing \n means there's a trailing empty line in the TSV to clean up.
     return 0
 
 
 def print_clean_tsv(input_path, output_path):
-    dialect = 'excel-tab'
-    writer = csv.writer(output_path.open(mode='w', newline=''), dialect=dialect)
+    dialect = "excel-tab"
+    writer = csv.writer(output_path.open(mode="w", newline=""), dialect=dialect)
 
-    for encoding in ['utf-8', 'latin-1']:
-        warn(f'Trying to read {input_path} as {encoding}...')
+    for encoding in ["utf-8", "latin-1"]:
+        warn(f"Trying to read {input_path} as {encoding}...")
         try:
             # Read the file completely to determine if there are encoding problems,
             # rather than reading and writing line-by-line.
             rows = csv_to_rows(input_path, encoding=encoding, dialect=dialect)
             clean_rows = clean(rows)
             for row in clean_rows:
                 writer.writerow(row)
-            warn('Read succeeded')
+            warn("Read succeeded")
             return 0
         except UnicodeDecodeError as e:
-            warn(f'Read failed: {e}')
+            warn(f"Read failed: {e}")
             continue
     return 1
 
@@ -108,7 +106,7 @@ def csv_to_rows(tsv_path, encoding=None, dialect=None):
 
 
 def clean(rows):
-    '''
+    """
     >>> clean([
     ...     ['  x', 'y  ', ''],
     ...     ['', '  Hi!  ', '', ''],
@@ -117,7 +115,7 @@ def clean(rows):
     ... ])
     [['x', 'y'], ['', 'Hi!']]
 
-    '''
+    """
     clean_rows = []
     max_i = None
     for row in rows:
@@ -126,16 +124,16 @@ def clean(rows):
             continue
         if max_i is None:
             max_i = last_non_empty_index(stripped_row)
-        clean_rows.append(stripped_row[:max_i + 1])
+        clean_rows.append(stripped_row[: max_i + 1])
     return clean_rows
 
 
 def last_non_empty_index(values):
-    '''
+    """
     >>> last_non_empty_index(['', '', '0', '', ''])
     2
 
-    '''
+    """
     return max(i for i, val in enumerate(values) if len(val))
 
 

diff --git a/src/factor_field.py b/src/factor_field.py
@@ -1,32 +1,33 @@
 #!/usr/bin/env python3
 
-import sys
 import argparse
-from pathlib import Path
 import fileinput
+import sys
 from collections import defaultdict
+from pathlib import Path
 
 
 def main():
-    parser = argparse.ArgumentParser(description='''
+    parser = argparse.ArgumentParser(
+        description="""
     Factor out all variants of a given field.
-    ''')
-    parser.add_argument(
-        '--field',
-        metavar='NAME',
-        required=True)
+    """
+    )
+    parser.add_argument("--field", metavar="NAME", required=True)
     parser.add_argument(
-        '--input_dir',
+        "--input_dir",
         type=Path,
-        metavar='IN',
-        help='Directory to scan for instances of the field',
-        default='src/ingest_validation_tools/table-schemas/assays')
+        metavar="IN",
+        help="Directory to scan for instances of the field",
+        default="src/ingest_validation_tools/table-schemas/assays",
+    )
     parser.add_argument(
-        '--output_dir',
+        "--output_dir",
         type=Path,
-        metavar='OUT',
-        help='Directory to write field extracts',
-        default='src/ingest_validation_tools/table-schemas/includes/fields')
+        metavar="OUT",
+        help="Directory to write field extracts",
+        default="src/ingest_validation_tools/table-schemas/includes/fields",
+    )
     args = parser.parse_args()
 
     factor_field(args.field, args.input_dir, args.output_dir)
@@ -46,18 +47,22 @@ def pull(field_name, input_dir):
             lines=lines,
             get_file_name=lambda: str(fileinput.filename()),
             field_name=field_name,
-            definitions=definitions
+            definitions=definitions,
         )
     return definitions
 
 
 def push(field_name, definitions, output_dir):
-    options = [
-        f"# {'; '.join(sorted(files))}\n{definition}"
-        for definition, files in definitions.items()
-    ] if len(definitions) > 1 else definitions.keys()
+    options = (
+        [
+            f"# {'; '.join(sorted(files))}\n{definition}"
+            for definition, files in definitions.items()
+        ]
+        if len(definitions) > 1
+        else definitions.keys()
+    )
     if options:
-        (output_dir / f'{field_name}.yaml').write_text('\n'.join(options))
+        (output_dir / f"{field_name}.yaml").write_text("\n".join(options))
     else:
         print(f"Check spelling of field name: '{field_name}'")
         sys.exit(1)
@@ -93,18 +98,18 @@ def replace(lines, get_file_name, field_name, definitions):
     definition = None
     for line in lines:
         # This assumes the YAML has been cleaned up!
-        if f'name: {field_name}' in line:
+        if f"name: {field_name}" in line:
             inside = True
-            print(f'# include: ../includes/fields/{field_name}.yaml')
+            print(f"# include: ../includes/fields/{field_name}.yaml")
             definition = line
             continue
-        elif inside and line[0] not in ['-', '#']:
+        elif inside and line[0] not in ["-", "#"]:
             definition += line
             continue
         elif inside:
             definitions[definition].add(get_file_name())
             inside = False
-        print(line, end='')
+        print(line, end="")
 
 
 if __name__ == "__main__":

diff --git a/src/generate_docs.py b/src/generate_docs.py
@@ -2,29 +2,29 @@
 
 import argparse
 import os
-from pathlib import Path
 import sys
-from yaml import dump as dump_yaml
+from pathlib import Path
 
 from tableschema_to_template.create_xlsx import create_xlsx
+from yaml import dump as dump_yaml
 
+from ingest_validation_tools.cli_utils import dir_path
+from ingest_validation_tools.docs_utils import (
+    generate_readme_md,
+    generate_template_tsv,
+    get_tsv_name,
+    get_xlsx_name,
+)
 from ingest_validation_tools.schema_loader import (
-    dict_table_schema_versions,
-    get_table_schema,
     dict_directory_schema_versions,
+    dict_table_schema_versions,
+    enum_maps_to_lists,
     get_directory_schema,
+    get_fields_wo_headers,
     get_is_assay,
-    enum_maps_to_lists,
     get_pipeline_infos,
-    get_fields_wo_headers,
-)
-from ingest_validation_tools.docs_utils import (
-    get_tsv_name,
-    get_xlsx_name,
-    generate_template_tsv,
-    generate_readme_md,
+    get_table_schema,
 )
-from ingest_validation_tools.cli_utils import dir_path
 
 
 def main():
@@ -158,9 +158,7 @@ def main():
         )
         max_schema["fields"] = get_fields_wo_headers(max_schema)
         if max_schema["fields"][0]["name"] != "is_cedar":
-            with open(
-                deprecated_path / get_tsv_name(args.type, is_assay=is_assay), "w"
-            ) as f:
+            with open(deprecated_path / get_tsv_name(args.type, is_assay=is_assay), "w") as f:
                 f.write(generate_template_tsv(max_schema))
             create_xlsx(
                 max_schema,

diff --git a/src/generate_field_enum_csv.py b/src/generate_field_enum_csv.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
+import argparse
 import sys
 from csv import DictWriter
-import argparse
 
 from ingest_validation_tools.schema_loader import (
-    list_table_schema_versions,
     get_table_schema,
+    list_table_schema_versions,
 )