diff --git a/src/test/suite/input/arabic_no_lem.atf b/src/test/suite/input/arabic_no_lem.atf new file mode 100644 index 0000000..c81163e --- /dev/null +++ b/src/test/suite/input/arabic_no_lem.atf @@ -0,0 +1,14 @@ +&X001001 = JCS 48, 089 +#project: cams/gkab +#atf: lang akk-x-stdbab +#atf: use unicode +#atf: use math +@tablet +@obverse + +1. [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM + +@translation parallel ar project +@obverse + +1. في شتة ٦٣ في شهر تبت، يوم ٢ diff --git a/src/test/suite/input/english_broken.atf b/src/test/suite/input/english_broken.atf new file mode 100644 index 0000000..fdd329f --- /dev/null +++ b/src/test/suite/input/english_broken.atf @@ -0,0 +1,21 @@ +&P393071 = SAAB 17, 1 +#project: ztcc +#atf: lang akk-x-neoass +#atf: use unicode +@tablet + +@obverse +1. 1(diš)# me 6(u)# ANŠE ŠE.PAD#-MEŠ + + +#tr.en: 160 homers of barley + +2. TA IGI {m}b-ia#-a + + +#tr.en: from the custody of Bayâ; + +3. 1(diš) ME 8(u) ANŠE {m}u₂-din#-ni + + +#tr.en: 180 homers (from) Udīnu; diff --git a/src/test/suite/input/english_no_lem.atf b/src/test/suite/input/english_no_lem.atf new file mode 100644 index 0000000..ad4a1ec --- /dev/null +++ b/src/test/suite/input/english_no_lem.atf @@ -0,0 +1,21 @@ +&P393071 = SAAB 17, 1 +#project: ztcc +#atf: lang akk-x-neoass +#atf: use unicode +@tablet + +@obverse +1. 1(diš)# me 6(u)# ANŠE ŠE.PAD#-MEŠ + + +#tr.en: 160 homers of barley + +2. TA IGI {m}ba-ia#-a + + +#tr.en: from the custody of Bayâ; + +3. 1(diš) ME 8(u) ANŠE {m}u₂-din#-ni + + +#tr.en: 180 homers (from) Udīnu; diff --git a/src/test/suite/lemmatisation.test.ts b/src/test/suite/lemmatisation.test.ts new file mode 100644 index 0000000..0467b6f --- /dev/null +++ b/src/test/suite/lemmatisation.test.ts @@ -0,0 +1,46 @@ +import * as assert from 'assert'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as vscode from 'vscode'; +import { lemmatise } from '../../server/messages'; + + +suite('Lemmatisation Test Suite', () => { + vscode.window.showInformationMessage('Start lemmatisation tests.'); + + for (const [file, project] of [['english', 'ztcc'], ['arabic', 'cams/gkab']]) { + test(`Lemmatisation results for ${file}_no_lem.atf`, async () => { + const text = fs.readFileSync( + path.join(__dirname, + `../../../src/test/suite/input/${file}_no_lem.atf`)).toString(); + const server_result = await lemmatise(`${file}_no_lem.atf`, project, text); + const lemmatised_text = fs.readFileSync( + path.join(__dirname, + `../../../src/test/suite/reference/${file}_with_lem.atf`)).toString().replace(/\r\n/g, '\n'); + + assert(!server_result.contains_errors()); + assert.strictEqual(server_result.atf_content, lemmatised_text); + }); + } + + for (const [file, project] of [['english', 'ztcc'], ['arabic', 'cams/gkab']]) { + test(`Lemmatisation results for ${file}_with_lem.atf`, async () => { + const text = fs.readFileSync( + path.join(__dirname, + `../../../src/test/suite/reference/${file}_with_lem.atf`)).toString().replace(/\r\n/g, '\n'); + const server_result = await lemmatise(`${file}_with_lem.atf`, project, text); + + assert(!server_result.contains_errors()); + assert.strictEqual(text, server_result.atf_content); + }); + } + + test('Lemmatisation results when there are errors', async () => { + const text = fs.readFileSync( + path.join(__dirname, + '../../../src/test/suite/input/english_broken.atf')).toString(); + const server_result = await lemmatise('english_broken.atf', 'ztcc', text); + + assert(server_result.contains_errors()); + }); +}); diff --git a/src/test/suite/reference/arabic_with_lem.atf b/src/test/suite/reference/arabic_with_lem.atf new file mode 100644 index 0000000..9353c5e --- /dev/null +++ b/src/test/suite/reference/arabic_with_lem.atf @@ -0,0 +1,15 @@ +&X001001 = JCS 48, 089 +#project: cams/gkab +#atf: lang akk-x-stdbab +#atf: use unicode +#atf: use math +@tablet +@obverse + +1. [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM +#lem: šanat[year]N; n; Ṭebetu[1]MN; mūša[at night]AV; ūm[day]N; n + +@translation parallel ar project +@obverse + +1. في شتة ٦٣ في شهر تبت، يوم ٢ diff --git a/src/test/suite/reference/english_with_lem.atf b/src/test/suite/reference/english_with_lem.atf new file mode 100644 index 0000000..5670b07 --- /dev/null +++ b/src/test/suite/reference/english_with_lem.atf @@ -0,0 +1,24 @@ +&P393071 = SAAB 17, 1 +#project: ztcc +#atf: lang akk-x-neoass +#atf: use unicode +@tablet + +@obverse +1. 1(diš)# me 6(u)# ANŠE ŠE.PAD#-MEŠ +#lem: n; mē[(one) hundred]NU; n; imāru[donkey]N; X + + +#tr.en: 160 homers of barley + +2. TA IGI {m}ba-ia#-a +#lem: issu[from]PRP; pān[front]N; X + + +#tr.en: from the custody of Bayâ; + +3. 1(diš) ME 8(u) ANŠE {m}u₂-din#-ni +#lem: n; X; n; imāru[donkey]N; X + + +#tr.en: 180 homers (from) Udīnu;