Apply black code style (#121)

direct-phonology · Sep 1, 2023 · 59c2f25 · 59c2f25
1 parent 3c5cb0e
commit 59c2f25
Show file tree

Hide file tree

Showing 12 changed files with 140 additions and 118 deletions.
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@
 ![pyversions](https://img.shields.io/pypi/pyversions/dphon.svg?style=flat)
 [![zenodo](https://zenodo.org/badge/DOI/10.5281/zenodo.4641277.svg)](https://zenodo.org/record/4641277)
 [![spaCy](https://img.shields.io/static/v1?label=made%20with%20%E2%9D%A4%20and&message=spaCy&color=09a3d5)](https://spacy.io)
+[![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
 ## installation
 

diff --git a/dphon/__init__.py b/dphon/__init__.py
@@ -1 +1 @@
-__version__ = '2.1.0-dev'
+__version__ = "2.1.0-dev"
diff --git a/dphon/align.py b/dphon/align.py
@@ -42,7 +42,7 @@ class SmithWatermanAligner(Aligner):
     def __init__(self, scorer: Scorer_T = None, gap_char: str = "-") -> None:
         self.scorer = scorer
         self.gap_char = gap_char
-        logging.info(f"using {self.__class__} with gap_char=\"{gap_char}\"")
+        logging.info(f'using {self.__class__} with gap_char="{gap_char}"')
 
     def _get_seqs(self, match: Match) -> Tuple[Seq_T, Seq_T]:
         """Get the two sequences to compare."""
@@ -56,16 +56,17 @@ def __call__(self, match: Match) -> Match:
         and sequence texts calculated for the alignment."""
 
         # compute the alignment and keep non-aligned regions
-        (lu, cu, _ru), (lv, cv, _rv), score = sw_align(*self._get_seqs(match),
-                                                        self.scorer)
+        (lu, cu, _ru), (lv, cv, _rv), score = sw_align(
+            *self._get_seqs(match), self.scorer
+        )
 
         # use lengths of non-aligned regions to move the sequence boundaries
         # [...] ["A", "B", "C"] [...]
         # ---->                <----
         u, v = match.utxt.doc, match.vtxt.doc
         us, vs = match.utxt.start + len(lu), match.vtxt.start + len(lv)
-        utxt = u[us:us + len(cu)]
-        vtxt = v[vs:vs + len(cv)]
+        utxt = u[us : us + len(cu)]
+        vtxt = v[vs : vs + len(cv)]
 
         # use the gaps in the alignment to construct a new sequence of token
         # texts, inserting gap_char wherever the aligner created a gap
@@ -113,8 +114,6 @@ def _get_seqs(self, match: Match) -> Tuple[Seq_T, Seq_T]:
         # combine the phonemes for each token into a single string; if there's
         # no phonetic content, use the token text in place of the phonemes
         return (
-            ["".join([p or "" for p in t._.phonemes])
-             or t.text for t in match.utxt],
-            ["".join([p or "" for p in t._.phonemes])
-             or t.text for t in match.vtxt],
+            ["".join([p or "" for p in t._.phonemes]) or t.text for t in match.utxt],
+            ["".join([p or "" for p in t._.phonemes]) or t.text for t in match.vtxt],
         )
diff --git a/dphon/cli.py b/dphon/cli.py
@@ -110,18 +110,22 @@ def run() -> None:
     args = docopt(__doc__, version=__version__)
 
     # install global logging and exception handlers
-    logging.basicConfig(level=LOG_LEVELS[args["-v"]], format="%(message)s",
-                        datefmt="[%X]", handlers=[RichHandler(console=err_console)])
+    logging.basicConfig(
+        level=LOG_LEVELS[args["-v"]],
+        format="%(message)s",
+        datefmt="[%X]",
+        handlers=[RichHandler(console=err_console)],
+    )
     logging.captureWarnings(True)
     traceback.install()
 
     # setup pipeline
     nlp = setup(args)
 
     # setup match highlighting
-    console.highlighter = MatchHighlighter(g2p=nlp.get_pipe("g2p"),
-                                           context=int(args["--context"]),
-                                           gap_char="　")
+    console.highlighter = MatchHighlighter(
+        g2p=nlp.get_pipe("g2p"), context=int(args["--context"]), gap_char="　"
+    )
 
     # process all texts
     graph = process(nlp, args)
@@ -158,17 +162,15 @@ def run() -> None:
 def setup(args: Dict) -> Language:
     """Set up the spaCy processing pipeline."""
     # get sound table
-    v2_path = pkg_resources.resource_filename(
-        __package__, "data/sound_table_v2.json")
+    v2_path = pkg_resources.resource_filename(__package__, "data/sound_table_v2.json")
     sound_table = get_sound_table_json(Path(v2_path))
 
     # add Doc metadata
     if not Doc.has_extension("id"):
         Doc.set_extension("id", default="")
 
     # setup spaCy model
-    nlp = spacy.blank(
-        "zh", meta={"tokenizer": {"config": {"use_jieba": False}}})
+    nlp = spacy.blank("zh", meta={"tokenizer": {"config": {"use_jieba": False}}})
     nlp.add_pipe("g2p", config={"sound_table": sound_table})
     nlp.add_pipe("ngrams", config={"n": int(args["--ngram-order"])})
     nlp.add_pipe("ngram_phonemes_index", name="index")
@@ -191,7 +193,7 @@ def process(nlp: Language, args: Dict) -> MatchGraph:
     for doc, context in nlp.pipe(load_texts(args["<path>"]), as_tuples=True):
         doc._.id = context["id"]
         graph.add_doc(context["id"], doc)
-        logging.debug(f"indexed doc \"{doc._.id}\"")
+        logging.debug(f'indexed doc "{doc._.id}"')
     stop = time.perf_counter() - start
     logging.info(f"indexed {graph.number_of_docs()} docs in {stop:.1f}s")
 
@@ -214,12 +216,14 @@ def process(nlp: Language, args: Dict) -> MatchGraph:
     with progress:
         for _seed, locations in groups:
             logging.debug(
-                f"evaluating seed group \"{locations[0].text}\", size={len(locations)}")
+                f'evaluating seed group "{locations[0].text}", size={len(locations)}'
+            )
             progress.update(task, seed=locations[0].text)
             for utxt, vtxt in combinations(locations, 2):
                 if utxt.doc._.id != vtxt.doc._.id:  # skip same-doc matches
                     graph.add_match(
-                        Match(utxt.doc._.id, vtxt.doc._.id, utxt, vtxt, 1.0))
+                        Match(utxt.doc._.id, vtxt.doc._.id, utxt, vtxt, 1.0)
+                    )
             progress.advance(task)
     stop = time.perf_counter() - start
     logging.info(f"seeded {graph.number_of_matches()} matches in {stop:.1f}s")
@@ -230,10 +234,11 @@ def process(nlp: Language, args: Dict) -> MatchGraph:
         graph.filter(has_variant)
 
     # extend all matches
-    graph.extend(LevenshteinPhoneticExtender(
-        threshold=float(args["--threshold"]),
-        len_limit=int(args["--len-limit"])
-    ))
+    graph.extend(
+        LevenshteinPhoneticExtender(
+            threshold=float(args["--threshold"]), len_limit=int(args["--len-limit"])
+        )
+    )
 
     # align all matches
     graph.align(SmithWatermanPhoneticAligner(gap_char="　"))

diff --git a/dphon/console.py b/dphon/console.py
@@ -9,12 +9,9 @@
 
 
 # Default color scheme for highlighting matches
-DEFAULT_THEME = Theme({
-    "context": "dim",
-    "variant": "blue",
-    "insertion": "green",
-    "mismatch": "red"
-})
+DEFAULT_THEME = Theme(
+    {"context": "dim", "variant": "blue", "insertion": "green", "mismatch": "red"}
+)
 
 # Consoles for rendering output
 console = Console(theme=DEFAULT_THEME, soft_wrap=True)
@@ -28,12 +25,13 @@ class MatchHighlighter(RegexHighlighter):
     gap_char: str
     g2p: GraphemesToPhonemes
 
-    def __init__(self, g2p: GraphemesToPhonemes, context: int = 0, gap_char: str = "-") -> None:
+    def __init__(
+        self, g2p: GraphemesToPhonemes, context: int = 0, gap_char: str = "-"
+    ) -> None:
         """Create a new highlighter with optional context for each match."""
         # can't have negative context
         if context < 0:
-            raise ValueError(
-                f"{self.__class__} context must be greater than 0")
+            raise ValueError(f"{self.__class__} context must be greater than 0")
 
         # store parameters
         self.context = context
@@ -44,7 +42,7 @@ def __init__(self, g2p: GraphemesToPhonemes, context: int = 0, gap_char: str = "
     def format_match(self, match: Match) -> Tuple[str, str]:
         """Return match sequences as Rich format strings, with optional context.
 
-        Adds markup for highlighting insertions, mismatches, etc. If context is 
+        Adds markup for highlighting insertions, mismatches, etc. If context is
         set, also adds highlighted context to either end of the match.
         """
 
@@ -83,7 +81,7 @@ def _mark(self, match: Match) -> Tuple[str, str]:
                 continue
 
             # gap in v: insertion in u (if not punctuation)
-            if match.av[i] == self.gap_char  and match.au[i].isalnum():
+            if match.av[i] == self.gap_char and match.au[i].isalnum():
                 su.append(f"[insertion]{match.au[i]}[/insertion]")
                 sv.append(match.av[i])
                 u_ptr += 1
@@ -133,5 +131,5 @@ def transcription(self, match: Match) -> Tuple[str, str]:
         """Get the phonemic transcription for the match for display."""
         return (
             "*" + " ".join(match.utxt._.syllables),
-            "*" + " ".join(match.vtxt._.syllables)
+            "*" + " ".join(match.vtxt._.syllables),
         )
diff --git a/dphon/corpus.py b/dphon/corpus.py
@@ -54,7 +54,7 @@ def __call__(self, paths: Iterable[str]) -> Iterable[DocInfo_T]:
         Output is a single tuple of (contents, metadata) where "contents" is the
         contents of the file as a string and "metadata" is an arbitrary dict.
 
-        One tuple per doc should be returned for consumption by spaCy's 
+        One tuple per doc should be returned for consumption by spaCy's
         `nlp.pipe(as_tuples=True)`.
         """
         raise NotImplementedError
@@ -78,7 +78,8 @@ def _check(self, paths: Iterable[str]) -> Dict[Path, Any]:
                     logging.debug(f"found {file.resolve()}, size={size}B")
                 else:
                     logging.warning(
-                        f"path {file.resolve()} isn't a {self.filetype} file")
+                        f"path {file.resolve()} isn't a {self.filetype} file"
+                    )
 
         # if no valid files were found, notify the user and exit. otherwise
         # report the total number of files found
@@ -113,21 +114,19 @@ def __call__(self, paths: Iterable[str]) -> Iterable[DocInfo_T]:
 
         # sort files by size, largest first, to speed up processing by spaCy
         files = self._check(paths)
-        files_by_size = OrderedDict(sorted(files.items(),
-                                           key=lambda f: f[1]["size"],
-                                           reverse=True))
+        files_by_size = OrderedDict(
+            sorted(files.items(), key=lambda f: f[1]["size"], reverse=True)
+        )
 
         # track progress
-        task = self.progress.add_task(
-            "indexing", filename="", total=len(files))
+        task = self.progress.add_task("indexing", filename="", total=len(files))
 
         # open each file and yield contents with metadata as DocInfo_T
         with self.progress:
             for file, meta in files_by_size.items():
                 self.progress.update(task, filename=file.name)
                 with file.open(encoding="utf8") as contents:
-                    logging.debug(
-                        f"loaded doc \"{meta['id']}\" from {file.resolve()}")
+                    logging.debug(f"loaded doc \"{meta['id']}\" from {file.resolve()}")
                     yield contents.read().translate(OC_TEXT), {"id": meta["id"]}
                     self.progress.advance(task)
 
@@ -157,8 +156,7 @@ def __call__(self, paths: Iterable[str]) -> Iterable[DocInfo_T]:
 
         # track progress
         files = self._check(paths)
-        task = self.progress.add_task(
-            "indexing", filename="", total=len(files))
+        task = self.progress.add_task("indexing", filename="", total=len(files))
 
         # open each file and yield each line, with all properties except "text"
         # being passed as second element in tuple
@@ -169,6 +167,7 @@ def __call__(self, paths: Iterable[str]) -> Iterable[DocInfo_T]:
                     for doc in reader:
                         meta = {k: v for k, v in doc.items() if k != "text"}
                         logging.debug(
-                            f"loaded doc \"{doc['id']}\" from {file.resolve()}")
+                            f"loaded doc \"{doc['id']}\" from {file.resolve()}"
+                        )
                         yield doc["text"].translate(OC_TEXT), meta
                     self.progress.advance(task)