Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
omukazu committed Jun 27, 2024
1 parent 758dd42 commit 4bca62b
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/kwja/datamodule/datasets/seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def _load_examples(self, documents: List[Document], is_train: bool) -> List[Seq2
examples: List[Seq2SeqExample] = []
example_id: int = 0
for document in track(documents, description="Loading examples"):
document = self._postprocess_document(document)
for sentence in document.sentences:
src_tokens: List[str] = self.formatter.get_src_tokens(sentence)
src_input_ids: List[int] = self.tokenizer.convert_tokens_to_ids(src_tokens) + [
Expand Down
3 changes: 2 additions & 1 deletion src/kwja/utils/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def normalize_text(text: str) -> str:

def normalize_morpheme(morpheme: Morpheme) -> None:
morpheme.text = normalize_text(morpheme.text)
morpheme.reading = normalize_text(morpheme.reading)
reading = "ゔぁいおりん" if morpheme.reading == "う゛ぁいおりん" else morpheme.reading
morpheme.reading = normalize_text(reading)
morpheme.lemma = normalize_text(morpheme.lemma)
canon = morpheme.semantics.get("代表表記")
if isinstance(canon, str):
Expand Down

0 comments on commit 4bca62b

Please sign in to comment.