Skip to content

Commit

Permalink
Fixed unittest cases to account for fixed neutral tone
Browse files Browse the repository at this point in the history
  • Loading branch information
andreihar committed Jan 4, 2024
1 parent a1558f4 commit 3bd7f09
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 16 deletions.
2 changes: 1 addition & 1 deletion taibun/data/words.json
Original file line number Diff line number Diff line change
Expand Up @@ -2155,7 +2155,7 @@
"呃酸": "eh-sng",
"厄運": "eh-ūn",
"嬰仔名": "enn-á-miâ/inn-á-miâ",
"嬰仔": "enn-á/inn--á",
"嬰仔": "enn-á/inn-á",
"嬰": "enn/inn",
"狹小": "e̍h-sió/ue̍h-sió",
"狹細": "e̍h-sè/ue̍h-suè",
Expand Down
11 changes: 6 additions & 5 deletions taibun/taibun.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,9 @@ def __set_default_sandhi(self):
def __get_number_tones(self, input):
words = self.__preprocess_word(input[0])
number_tones = [self.__get_number_tone(w) for w in words if len(w) > 0]
replace_with_zero = False
number_tones = [s[:-1] + '0' if replace_with_zero or (replace_with_zero := s[-1] == '0') else s for s in number_tones]
if self.sandhi or self.format == 'number':
replace_with_zero = False
number_tones = [s[:-1] + '0' if replace_with_zero or (replace_with_zero := s[-1] == '0') else s for s in number_tones]
if self.sandhi:
number_tones = self.__tone_sandhi(number_tones, input[1])
return number_tones
Expand Down Expand Up @@ -153,7 +154,7 @@ def __get_number_tone(self, input):
elif re.search('̍', input): input += '8'
elif input[-1] in finals: input += '4'
else: input += '1'
if input.startswith(self.suffix_token):
if input.startswith(self.suffix_token) and (input[-1] == 'h' or self.sandhi or self.format == 'number'):
input = input[:-1] + '0'
input = "".join(c for c in unicodedata.normalize("NFD", input) if unicodedata.category(c) != "Mn")
return input
Expand Down Expand Up @@ -228,7 +229,7 @@ def __tailo_to_poj(self, input):
# Helper to convert syllable from Tai-lo to 方音符號 (zhuyin)
def __tailo_to_zhuyin(self, input):
convert = {
'p4':'ㆴ4', 'p8':'ㆴ8', 'k4':'ㆶ4', 'k8':'ㆶ8', 't4':'ㆵ4', 't8':'ㆵ8', 'h4':'ㆷ4', 'h8':'ㆷ8',
'p4':'ㆴ4', 'p8':'ㆴ8', 'k4':'ㆶ4', 'k8':'ㆶ8', 't4':'ㆵ4', 't8':'ㆵ8', 'h4':'ㆷ4', 'h8':'ㆷ8', 'h0': '0',
'tshing':'ㄑㄧㄥ', 'tshinn':'ㄑㆪ', 'phing':'ㄆㄧㄥ', 'phinn':'ㄆㆪ', 'tsing':'ㄐㄧㄥ', 'tsinn':'ㄐㆪ',
'ainn':'ㆮ', 'aunn':'ㆯ', 'giok':'ㆣㄧㄜㆶ', 'ngai':'ㄫㄞ', 'ngau':'ㄫㄠ', 'ngoo':'ㄫㆦ', 'ping':'ㄅㄧㄥ',
'pinn':'ㄅㆪ', 'senn':'ㄙㆥ', 'sing':'ㄒㄧㄥ', 'sinn':'ㄒㆪ', 'tshi':'ㄑㄧ',
Expand Down Expand Up @@ -338,7 +339,7 @@ def __tailo_to_ipa(self, input):
if self.dialect == 'north':
convert.update({'o':'o'})
convert2 = {
'p4':'p̚4','p8':'p̚8','k4':'k̚4','k8':'k̚8','t4':'t̚4','t8':'t̚8','h4':'ʔ4','h8':'ʔ8','si':'ɕi'}
'p4':'p̚4','p8':'p̚8','k4':'k̚4','k8':'k̚8','t4':'t̚4','t8':'t̚8','h4':'ʔ4','h8':'ʔ8','si':'ɕi','h0':'ʔ0'}
tones = ['', '⁴⁴', '⁵³', '¹¹', '²¹', '²⁵', '', '²²', '⁵'] if self.dialect != 'north' else ['', '⁵⁵', '⁵¹', '²¹', '³²', '²⁴', '', '³³', '⁴']
convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
convert2.update({k.capitalize(): v.capitalize() for k, v in convert2.items()})
Expand Down
4 changes: 2 additions & 2 deletions tests/test_delimiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_default():
(["ㄆㄧㄠ ㄍㆤ˪","ㄢˋ ㄋㆤ/ㄢˋ ㄋㄧ","ㄍㄞ˪ ㄒㄧㄠ˫ ㆢㄧㄣˊ/ㄍㄞ˪ ㄒㄧㄠ˫ ㄌㄧㄣˊ","ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣ ㄚˋ ㆢㄧㆵ˙ ㄐㄧㄣ ㄏㄜˋ ㆤˊ ㆢㄧㆵ˙ ㄐㄧˋ/ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣ ㄚˋ ㄌㄧㆵ˙ ㄐㄧㄣ ㄏㄜˋ ㆤˊ ㄌㄧㆵ˙ ㄐㄧˋ","ㄌㄢˋ ㆤˊ ㄐㄧㄚㆷ˙ ㄅㆭ˫"], "Zhuyin"),
(["phiau1 ke3","an2 ne1/an2 ni1","kai3 siau7 jin5/kai3 siau7 lin5","ciok4 li2 kin1 a2 jit8 cin1 ho2 e5 jit8 ci2/ciok4 li2 kin1 a2 lit8 cin1 ho2 e5 lit8 ci2","lan2 e5 ciah8 png7"], "TLPA"),
(["piāogè","ǎnlnē/ǎnlnī","gàisiâozzín/gàisiâolín","ziōk lǐ gīnǎzzít zīnhǒ é zzítzǐ/ziōk lǐ gīnǎlít zīnhǒ é lítzǐ","lǎn é ziáhbn̂g"], "Pingyim"),
(["piāu-gê","an-ne/an-ni","gài-siâu-rĭn/gài-siâu-lĭn","ziok li gīn-a-rīt zīn-hor--ē rīt-zì/ziok li gīn-a-līt zīn-hor--ê līt-zì","lan--ē ziâ-bn̄g"], "Tongiong")
(["piāu-gê","an-ne/an-ni","gài-siâu-rĭn/gài-siâu-lĭn","ziok li gīn-a-rīt zīn-hor-- rīt-zì/ziok li gīn-a-līt zīn-hor-- līt-zì","lan--ē ziâ-bn̄g"], "Tongiong")
]
for transl, system in test_data:
data = [f"{h},{t}" for h, t in zip(hanji_data, transl)]
Expand Down Expand Up @@ -49,7 +49,7 @@ def test_nospace():
(["ㄆㄧㄠㄍㆤ˪","ㄢˋㄋㆤ/ㄢˋㄋㄧ","ㄍㄞ˪ㄒㄧㄠ˫ㆢㄧㄣˊ/ㄍㄞ˪ㄒㄧㄠ˫ㄌㄧㄣˊ","ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣㄚˋㆢㄧㆵ˙ ㄐㄧㄣㄏㄜˋ ㆤˊ ㆢㄧㆵ˙ㄐㄧˋ/ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣㄚˋㄌㄧㆵ˙ ㄐㄧㄣㄏㄜˋ ㆤˊ ㄌㄧㆵ˙ㄐㄧˋ","ㄌㄢˋ ㆤˊ ㄐㄧㄚㆷ˙ㄅㆭ˫"], "Zhuyin"),
(["phiau1ke3","an2ne1/an2ni1","kai3siau7jin5/kai3siau7lin5","ciok4 li2 kin1a2jit8 cin1ho2 e5 jit8ci2/ciok4 li2 kin1a2lit8 cin1ho2 e5 lit8ci2","lan2 e5 ciah8png7"], "TLPA"),
(["piāogè","ǎnlnē/ǎnlnī","gàisiâozzín/gàisiâolín","ziōk lǐ gīnǎzzít zīnhǒ é zzítzǐ/ziōk lǐ gīnǎlít zīnhǒ é lítzǐ","lǎn é ziáhbn̂g"], "Pingyim"),
(["piāugê","anne/anni","gàisiâurĭn/gàisiâulĭn","ziok li gīnarīt zīnhor--ē rītzì/ziok li gīnalīt zīnhor--ê lītzì","lan--ē ziâbn̄g"], "Tongiong")
(["piāugê","anne/anni","gàisiâurĭn/gàisiâulĭn","ziok li gīnarīt zīnhor--ē rītzì/ziok li gīnalīt zīnhor--ê lītzì","lan-- ziâbn̄g"], "Tongiong")
]
for transl, system in test_data:
data = [f"{h},{t}" for h, t in zip(hanji_data, transl)]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ipa_conversion.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_pingyim_conversion.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_tlpa_conversion.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions tests/test_tongiong_conversion.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_zhuyin_conversion.py

Large diffs are not rendered by default.

0 comments on commit 3bd7f09

Please sign in to comment.