Fixed unittest cases to account for fixed neutral tone

andreihar · Jan 4, 2024 · 3bd7f09 · 3bd7f09
1 parent a1558f4
commit 3bd7f09
Show file tree

Hide file tree

Showing 8 changed files with 17 additions and 16 deletions.
diff --git a/taibun/data/words.json b/taibun/data/words.json
@@ -2155,7 +2155,7 @@
 	"呃酸": "eh-sng",
 	"厄運": "eh-ūn",
 	"嬰仔名": "enn-á-miâ/inn-á-miâ",
-	"嬰仔": "enn-á/inn--á",
+	"嬰仔": "enn-á/inn-á",
 	"嬰": "enn/inn",
 	"狹小": "e̍h-sió/ue̍h-sió",
 	"狹細": "e̍h-sè/ue̍h-suè",

diff --git a/taibun/taibun.py b/taibun/taibun.py
@@ -122,8 +122,9 @@ def __set_default_sandhi(self):
     def __get_number_tones(self, input):
         words = self.__preprocess_word(input[0])
         number_tones = [self.__get_number_tone(w) for w in words if len(w) > 0]
-        replace_with_zero = False
-        number_tones = [s[:-1] + '0' if replace_with_zero or (replace_with_zero := s[-1] == '0') else s for s in number_tones]
+        if self.sandhi or self.format == 'number':
+            replace_with_zero = False
+            number_tones = [s[:-1] + '0' if replace_with_zero or (replace_with_zero := s[-1] == '0') else s for s in number_tones]
         if self.sandhi:
             number_tones = self.__tone_sandhi(number_tones, input[1])
         return number_tones
@@ -153,7 +154,7 @@ def __get_number_tone(self, input):
         elif re.search('̍', input): input += '8'
         elif input[-1] in finals: input += '4'
         else: input += '1'
-        if input.startswith(self.suffix_token):
+        if input.startswith(self.suffix_token) and (input[-1] == 'h' or self.sandhi or self.format == 'number'):
             input = input[:-1] + '0'
         input = "".join(c for c in unicodedata.normalize("NFD", input) if unicodedata.category(c) != "Mn")
         return input
@@ -228,7 +229,7 @@ def __tailo_to_poj(self, input):
     # Helper to convert syllable from Tai-lo to 方音符號 (zhuyin)
     def __tailo_to_zhuyin(self, input):
         convert = {
-            'p4':'ㆴ4', 'p8':'ㆴ8', 'k4':'ㆶ4', 'k8':'ㆶ8', 't4':'ㆵ4', 't8':'ㆵ8', 'h4':'ㆷ4', 'h8':'ㆷ8',
+            'p4':'ㆴ4', 'p8':'ㆴ8', 'k4':'ㆶ4', 'k8':'ㆶ8', 't4':'ㆵ4', 't8':'ㆵ8', 'h4':'ㆷ4', 'h8':'ㆷ8', 'h0': '0',
             'tshing':'ㄑㄧㄥ', 'tshinn':'ㄑㆪ', 'phing':'ㄆㄧㄥ', 'phinn':'ㄆㆪ', 'tsing':'ㄐㄧㄥ', 'tsinn':'ㄐㆪ',
             'ainn':'ㆮ', 'aunn':'ㆯ', 'giok':'ㆣㄧㄜㆶ', 'ngai':'ㄫㄞ', 'ngau':'ㄫㄠ', 'ngoo':'ㄫㆦ', 'ping':'ㄅㄧㄥ',
             'pinn':'ㄅㆪ', 'senn':'ㄙㆥ', 'sing':'ㄒㄧㄥ', 'sinn':'ㄒㆪ', 'tshi':'ㄑㄧ',
@@ -338,7 +339,7 @@ def __tailo_to_ipa(self, input):
         if self.dialect == 'north':
             convert.update({'o':'o'})
         convert2 = {
-            'p4':'p̚4','p8':'p̚8','k4':'k̚4','k8':'k̚8','t4':'t̚4','t8':'t̚8','h4':'ʔ4','h8':'ʔ8','si':'ɕi'}
+            'p4':'p̚4','p8':'p̚8','k4':'k̚4','k8':'k̚8','t4':'t̚4','t8':'t̚8','h4':'ʔ4','h8':'ʔ8','si':'ɕi','h0':'ʔ0'}
         tones = ['', '⁴⁴', '⁵³', '¹¹', '²¹', '²⁵', '', '²²', '⁵'] if self.dialect != 'north' else ['', '⁵⁵', '⁵¹', '²¹', '³²', '²⁴', '', '³³', '⁴']
         convert.update({k.capitalize(): v.capitalize() for k, v in convert.items()})
         convert2.update({k.capitalize(): v.capitalize() for k, v in convert2.items()})

diff --git a/tests/test_delimiter.py b/tests/test_delimiter.py
@@ -10,7 +10,7 @@ def test_default():
 		(["ㄆㄧㄠ ㄍㆤ˪","ㄢˋ ㄋㆤ/ㄢˋ ㄋㄧ","ㄍㄞ˪ ㄒㄧㄠ˫ ㆢㄧㄣˊ/ㄍㄞ˪ ㄒㄧㄠ˫ ㄌㄧㄣˊ","ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣ ㄚˋ ㆢㄧㆵ˙ ㄐㄧㄣ ㄏㄜˋ ㆤˊ ㆢㄧㆵ˙ ㄐㄧˋ/ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣ ㄚˋ ㄌㄧㆵ˙ ㄐㄧㄣ ㄏㄜˋ ㆤˊ ㄌㄧㆵ˙ ㄐㄧˋ","ㄌㄢˋ ㆤˊ ㄐㄧㄚㆷ˙ ㄅㆭ˫"], "Zhuyin"),
 		(["phiau1 ke3","an2 ne1/an2 ni1","kai3 siau7 jin5/kai3 siau7 lin5","ciok4 li2 kin1 a2 jit8 cin1 ho2 e5 jit8 ci2/ciok4 li2 kin1 a2 lit8 cin1 ho2 e5 lit8 ci2","lan2 e5 ciah8 png7"], "TLPA"),
 		(["piāogè","ǎnlnē/ǎnlnī","gàisiâozzín/gàisiâolín","ziōk lǐ gīnǎzzít zīnhǒ é zzítzǐ/ziōk lǐ gīnǎlít zīnhǒ é lítzǐ","lǎn é ziáhbn̂g"], "Pingyim"),
-		(["piāu-gê","an-ne/an-ni","gài-siâu-rĭn/gài-siâu-lĭn","ziok li gīn-a-rīt zīn-hor--ē rīt-zì/ziok li gīn-a-līt zīn-hor--ê līt-zì","lan--ē ziâ-bn̄g"], "Tongiong")
+		(["piāu-gê","an-ne/an-ni","gài-siâu-rĭn/gài-siâu-lĭn","ziok li gīn-a-rīt zīn-hor--e̊ rīt-zì/ziok li gīn-a-līt zīn-hor--e̊ līt-zì","lan--ē ziâ-bn̄g"], "Tongiong")
     ]
 	for transl, system in test_data:
 		data = [f"{h},{t}" for h, t in zip(hanji_data, transl)]
@@ -49,7 +49,7 @@ def test_nospace():
 		(["ㄆㄧㄠㄍㆤ˪","ㄢˋㄋㆤ/ㄢˋㄋㄧ","ㄍㄞ˪ㄒㄧㄠ˫ㆢㄧㄣˊ/ㄍㄞ˪ㄒㄧㄠ˫ㄌㄧㄣˊ","ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣㄚˋㆢㄧㆵ˙ ㄐㄧㄣㄏㄜˋ ㆤˊ ㆢㄧㆵ˙ㄐㄧˋ/ㄐㄧㆦㆶ ㄌㄧˋ ㄍㄧㄣㄚˋㄌㄧㆵ˙ ㄐㄧㄣㄏㄜˋ ㆤˊ ㄌㄧㆵ˙ㄐㄧˋ","ㄌㄢˋ ㆤˊ ㄐㄧㄚㆷ˙ㄅㆭ˫"], "Zhuyin"),
 		(["phiau1ke3","an2ne1/an2ni1","kai3siau7jin5/kai3siau7lin5","ciok4 li2 kin1a2jit8 cin1ho2 e5 jit8ci2/ciok4 li2 kin1a2lit8 cin1ho2 e5 lit8ci2","lan2 e5 ciah8png7"], "TLPA"),
 		(["piāogè","ǎnlnē/ǎnlnī","gàisiâozzín/gàisiâolín","ziōk lǐ gīnǎzzít zīnhǒ é zzítzǐ/ziōk lǐ gīnǎlít zīnhǒ é lítzǐ","lǎn é ziáhbn̂g"], "Pingyim"),
-		(["piāugê","anne/anni","gàisiâurĭn/gàisiâulĭn","ziok li gīnarīt zīnhor--ē rītzì/ziok li gīnalīt zīnhor--ê lītzì","lan--ē ziâbn̄g"], "Tongiong")
+		(["piāugê","anne/anni","gàisiâurĭn/gàisiâulĭn","ziok li gīnarīt zīnhor--ē rītzì/ziok li gīnalīt zīnhor--ê lītzì","lan--e̊ ziâbn̄g"], "Tongiong")
 	]
 	for transl, system in test_data:
 		data = [f"{h},{t}" for h, t in zip(hanji_data, transl)]

diff --git a/tests/test_ipa_conversion.py b/tests/test_ipa_conversion.py
diff --git a/tests/test_pingyim_conversion.py b/tests/test_pingyim_conversion.py
diff --git a/tests/test_tlpa_conversion.py b/tests/test_tlpa_conversion.py
diff --git a/tests/test_tongiong_conversion.py b/tests/test_tongiong_conversion.py
diff --git a/tests/test_zhuyin_conversion.py b/tests/test_zhuyin_conversion.py