diff --git a/.gitignore b/.gitignore index e2dd696..6e4ce56 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,6 @@ typings/ # IntelliJ config .idea + +# compiler output +lib diff --git a/__tests__/AlignmentHelpers.test.js b/__tests__/AlignmentHelpers.test.js index 3475fe6..f994351 100644 --- a/__tests__/AlignmentHelpers.test.js +++ b/__tests__/AlignmentHelpers.test.js @@ -1,46 +1,11 @@ +/* eslint-disable no-use-before-define */ /* eslint-env jest */ import fs from 'fs-extra'; import path from 'path'; +import usfmjs from 'usfm-js'; jest.unmock('fs-extra'); import wordaligner, {VerseObjectUtils} from '../src/'; const RESOURCES = path.join('__tests__', 'fixtures', 'pivotAlignmentVerseObjects'); -/** - * Reads a usfm file from the resources dir - * @param {string} filename relative path to usfm file - * @return {Object} - The read JSON object - */ -const readJSON = filename => { - const fullPath = path.join(RESOURCES, filename); - if (fs.existsSync(fullPath)) { - const json = fs.readJsonSync(fullPath); - return json; - } - console.log('File not found.'); - return false; -}; - -/** - * Generator for testing merging of alignment into verseObjects - * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` - */ -const mergeTest = (name = {}) => { - const json = readJSON(`${name}.json`); - expect(json).toBeTruthy(); - const {alignment, verseObjects, verseString, wordBank} = json; - const output = wordaligner.merge(alignment, wordBank, verseString); - expect(output).toEqual(verseObjects); -}; -/** - * Generator for testing unmerging of alignment from verseObjects - * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` - */ -const unmergeTest = (name = {}) => { - const json = readJSON(`${name}.json`); - expect(json).toBeTruthy(); - const {verseObjects, alignment, wordBank, alignedVerseString} = json; - const output = wordaligner.unmerge(verseObjects, alignedVerseString); - expect(output).toEqual({alignment, wordBank}); -}; describe("Merge Alignment into Verse Objects", () => { it('handles one to one', () => { @@ -102,6 +67,9 @@ describe("Merge Alignment into Verse Objects", () => { } expect(fail).toBeTruthy(); }); + it('handles titus 1-12', () => { + mergeTest('tit1-12'); + }); }); describe("UnMerge Alignment from Verse Objects", () => { @@ -153,6 +121,18 @@ describe("UnMerge Alignment from Verse Objects", () => { it('handles acts 1-4', () => { unmergeTest('acts-1-4'); }); + it('handles titus 1-12', () => { + unmergeTest('tit1-12'); + }); +}); + +describe("export USFM3 from Verse Objects", () => { + it('handles acts-1-11', () => { + exportTest('acts-1-11'); + }); + it('handles acts 1-4', () => { + exportTest('acts-1-4'); + }); }); describe('wordaligner.generateBlankAlignments', () => { @@ -242,8 +222,90 @@ describe('wordaligner.generateWordBank', () => { // then expect(results).toEqual(wordBank); }); - - // - // helpers - // }); + +// +// helpers +// + +/** + * Reads a json file from the resources dir + * @param {string} filename relative path to usfm file + * @return {Object} - The read JSON object + */ +const readJSON = filename => { + const fullPath = path.join(RESOURCES, filename); + if (fs.existsSync(fullPath)) { + const json = fs.readJsonSync(fullPath); + return json; + } + console.log('File not found.'); + return false; +}; + +/** + * Reads a usfm file from the resources dir + * @param {string} filename relative path to usfm file + * @return {Object} - The read JSON object + */ +const readUSFM = filename => { + const fullPath = path.join(RESOURCES, filename); + if (fs.existsSync(fullPath)) { + const usfm = fs.readFileSync(fullPath, 'UTF-8').toString(); + return usfm; + } + console.log('File not found.'); + return false; +}; + +/** + * Generator for testing merging of alignment into verseObjects + * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` + */ +const mergeTest = (name = {}) => { + const json = readJSON(`${name}.json`); + expect(json).toBeTruthy(); + const {alignment, verseObjects, verseString, wordBank} = json; + const output = wordaligner.merge(alignment, wordBank, verseString); + expect(output).toEqual(verseObjects); +}; + +/** + * Generator for testing unmerging of alignment from verseObjects + * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` + */ +const unmergeTest = (name = {}) => { + const json = readJSON(`${name}.json`); + expect(json).toBeTruthy(); + const {verseObjects, alignment, wordBank, alignedVerseString} = json; + const output = wordaligner.unmerge(verseObjects, alignedVerseString); + expect(output).toEqual({alignment, wordBank}); +}; + +/** + * Generator for testing merging of alignment into verseObjects + * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` + */ +const exportTest = (name = {}) => { + const json = readJSON(`${name}.json`); + expect(json).toBeTruthy(); + const expectedUsfm = readUSFM(`${name}.usfm`); + expect(expectedUsfm).toBeTruthy(); + const {alignment, verseString, wordBank} = json; + const output = wordaligner.merge(alignment, wordBank, verseString); + const outputData = { + chapters: {}, + headers: [], + verses: { + 1: output + } + }; + let usfm = usfmjs.toUSFM(outputData, {chunk: true}); + const split = usfm.split("\\v 1"); + usfm = split.length > 1 ? split[1] : ""; + if (usfm.substr(0, 1) === ' ') { + usfm = usfm.substr(1); + } + expect(usfm).toEqual(expectedUsfm); +}; + diff --git a/__tests__/VerseObjectUtils.js b/__tests__/VerseObjectUtils.js index 7779344..cbe6e94 100644 --- a/__tests__/VerseObjectUtils.js +++ b/__tests__/VerseObjectUtils.js @@ -67,7 +67,7 @@ describe('VerseObjectUtils.getWordsFromVerseObjects', () => { occurrence: 1, occurrences: 1 }, - {type: 'text', text: ','}, + {type: 'text', text: ', '}, { tag: 'w', type: 'word', @@ -147,7 +147,7 @@ describe("getOrderedVerseObjectsFromString", () => { }, { type: "text", - text: "," + text: ", " }, { tag: "w", @@ -193,7 +193,7 @@ describe("getOrderedVerseObjectsFromString", () => { }, { type: "text", - text: "," + text: ", " }, { tag: "w", @@ -253,7 +253,7 @@ describe("getOrderedVerseObjectsFromString", () => { }, { type: "text", - text: "," + text: ", " }, { tag: "w", @@ -278,7 +278,7 @@ describe("getOrderedVerseObjectsFromString", () => { }, { type: "text", - text: "." + text: ". " }, { tag: "f", diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json index c0ae917..973e3a4 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json @@ -1,6 +1,6 @@ { "alignedVerseString": "οἳ καὶ εἶπαν, “ ἄνδρες, Γαλιλαῖοι, τί ἑστήκατε βλέποντες εἰς τὸν οὐρανόν? οὗτος ὁ Ἰησοῦς ὁ ἀναλημφθεὶς ἀφ’ ὑμῶν εἰς τὸν οὐρανὸν, οὕτως ἐλεύσεται ὃν τρόπον ἐθεάσασθε αὐτὸν πορευόμενον εἰς τὸν οὐρανόν.”", - "verseString": "And they said,\"Men of Galilee, why do you stand looking into heaven? This Jesus who has been taken up from you into heaven, will likewise return in the same manner as you saw him going into heaven.\"\\p", + "verseString": "And they said,\"Men of Galilee, why do you stand looking into heaven? This Jesus who has been taken up from you into heaven, will likewise return in the same manner as you saw him going into heaven.\"\n\\p", "alignment": [ { "topWords": [ @@ -709,7 +709,7 @@ "type": "milestone" }, { - "text": ",", + "text": ", ", "type": "text" }, { @@ -834,7 +834,7 @@ "type": "milestone" }, { - "text": "?", + "text": "? ", "type": "text" }, { @@ -1035,7 +1035,7 @@ "type": "milestone" }, { - "text": ",", + "text": ", ", "type": "text" }, { @@ -1273,7 +1273,7 @@ "type": "text" }, { - "text": "\"", + "text": "\"\n", "type": "text" }, { diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.usfm b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.usfm new file mode 100644 index 0000000..2db9cf1 --- /dev/null +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.usfm @@ -0,0 +1,104 @@ + +\zaln-s | x-strong="G25320" x-lemma="καί" x-morph="Gr,D,,,,,,,,," x-occurrence="1" x-occurrences="1" x-content="καὶ" +\w And|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G37390" x-lemma="ὅς" x-morph="Gr,RR,,,,NMP," x-occurrence="1" x-occurrences="1" x-content="οἳ" +\w they|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G30040" x-lemma="λέγω" x-morph="Gr,V,IAA3,,P," x-occurrence="1" x-occurrences="1" x-content="εἶπαν" +\w said|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*," +\zaln-s | x-strong="G04350" x-lemma="ἀνήρ" x-morph="Gr,N,,,,,VMP," x-occurrence="1" x-occurrences="1" x-content="ἄνδρες" +\w Men|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G10570" x-lemma="Γαλιλαῖος" x-morph="Gr,NS,,,,VMP," x-occurrence="1" x-occurrences="1" x-content="Γαλιλαῖοι" +\w of|x-occurrence="1" x-occurrences="1"\w* +\w Galilee|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*, +\zaln-s | x-strong="G51010" x-lemma="τίς" x-morph="Gr,RT,,,,ANS," x-occurrence="1" x-occurrences="1" x-content="τί" +\w why|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G24760" x-lemma="ἵστημι" x-morph="Gr,V,IEA2,,P," x-occurrence="1" x-occurrences="1" x-content="ἑστήκατε" +\w do|x-occurrence="1" x-occurrences="1"\w* +\w you|x-occurrence="1" x-occurrences="3"\w* +\w stand|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G09910" x-lemma="βλέπω" x-morph="Gr,V,PPA,NMP," x-occurrence="1" x-occurrences="1" x-content="βλέποντες" +\w looking|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G15190" x-lemma="εἰς" x-morph="Gr,P,,,,,A,,," x-occurrence="1" x-occurrences="3" x-content="εἰς" +\w into|x-occurrence="1" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,AMS," x-occurrence="1" x-occurrences="3" x-content="τὸν" +\zaln-s | x-strong="G37720" x-lemma="οὐρανός" x-morph="Gr,N,,,,,AMS," x-occurrence="1" x-occurrences="2" x-content="οὐρανόν" +\w heaven|x-occurrence="1" x-occurrences="3"\w* +\zaln-e\* +\zaln-e\*? +\zaln-s | x-strong="G37780" x-lemma="οὗτος" x-morph="Gr,ED,,,,NMS," x-occurrence="1" x-occurrences="1" x-content="οὗτος" +\w This|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,NMS," x-occurrence="1" x-occurrences="2" x-content="ὁ" +\zaln-s | x-strong="G24240" x-lemma="Ἰησοῦς" x-morph="Gr,N,,,,,NMS," x-occurrence="1" x-occurrences="1" x-content="Ἰησοῦς" +\w Jesus|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,RD,,,,NMS," x-occurrence="2" x-occurrences="2" x-content="ὁ" +\zaln-s | x-strong="G03530" x-lemma="ἀναλαμβάνω" x-morph="Gr,V,PAP,NMS," x-occurrence="1" x-occurrences="1" x-content="ἀναλημφθεὶς" +\w who|x-occurrence="1" x-occurrences="1"\w* +\w has|x-occurrence="1" x-occurrences="1"\w* +\w been|x-occurrence="1" x-occurrences="1"\w* +\w taken|x-occurrence="1" x-occurrences="1"\w* +\w up|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-e\* +\zaln-s | x-strong="G05750" x-lemma="ἀπό" x-morph="Gr,P,,,,,G,,," x-occurrence="1" x-occurrences="1" x-content="ἀφ’" +\w from|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G47710" x-lemma="σύ" x-morph="Gr,RP,,,2G,P," x-occurrence="1" x-occurrences="1" x-content="ὑμῶν" +\w you|x-occurrence="2" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G15190" x-lemma="εἰς" x-morph="Gr,P,,,,,A,,," x-occurrence="2" x-occurrences="3" x-content="εἰς" +\w into|x-occurrence="2" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,AMS," x-occurrence="2" x-occurrences="3" x-content="τὸν" +\zaln-s | x-strong="G37720" x-lemma="οὐρανός" x-morph="Gr,N,,,,,AMS," x-occurrence="1" x-occurrences="1" x-content="οὐρανὸν" +\w heaven|x-occurrence="2" x-occurrences="3"\w* +\zaln-e\* +\zaln-e\*, +\zaln-s | x-strong="G20640" x-lemma="ἔρχομαι" x-morph="Gr,V,IFM3,,S," x-occurrence="1" x-occurrences="1" x-content="ἐλεύσεται" +\w will|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G37790" x-lemma="οὕτως" x-morph="Gr,D,,,,,,,,," x-occurrence="1" x-occurrences="1" x-content="οὕτως" +\w likewise|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G20640" x-lemma="ἔρχομαι" x-morph="Gr,V,IFM3,,S," x-occurrence="1" x-occurrences="1" x-content="ἐλεύσεται" +\w return|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G37390" x-lemma="ὅς" x-morph="Gr,ER,,,,AMS," x-occurrence="1" x-occurrences="1" x-content="ὃν" +\w in|x-occurrence="1" x-occurrences="1"\w* +\w the|x-occurrence="1" x-occurrences="1"\w* +\w same|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G51580" x-lemma="τρόπος" x-morph="Gr,N,,,,,AMS," x-occurrence="1" x-occurrences="1" x-content="τρόπον" +\w manner|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G23000" x-lemma="θεάομαι" x-morph="Gr,V,IAM2,,P," x-occurrence="1" x-occurrences="1" x-content="ἐθεάσασθε" +\w as|x-occurrence="1" x-occurrences="1"\w* +\w you|x-occurrence="3" x-occurrences="3"\w* +\w saw|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G08460" x-lemma="αὐτός" x-morph="Gr,RP,,,3AMS," x-occurrence="1" x-occurrences="1" x-content="αὐτὸν" +\w him|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G41980" x-lemma="πορεύω" x-morph="Gr,V,PPM,AMS," x-occurrence="1" x-occurrences="1" x-content="πορευόμενον" +\w going|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G15190" x-lemma="εἰς" x-morph="Gr,P,,,,,A,,," x-occurrence="3" x-occurrences="3" x-content="εἰς" +\w into|x-occurrence="3" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,AMS," x-occurrence="3" x-occurrences="3" x-content="τὸν" +\zaln-s | x-strong="G37720" x-lemma="οὐρανός" x-morph="Gr,N,,,,,AMS," x-occurrence="2" x-occurrences="2" x-content="οὐρανόν" +\w heaven|x-occurrence="3" x-occurrences="3"\w* +\zaln-e\* +\zaln-e\*." +\p \ No newline at end of file diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.json b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.json index c109691..00837ef 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.json @@ -465,7 +465,7 @@ "type": "milestone" }, { - "text": ",", + "text": ", ", "type": "text" }, { @@ -590,7 +590,7 @@ "type": "milestone" }, { - "text": ",", + "text": ", ", "type": "text" }, { @@ -729,7 +729,7 @@ "type": "milestone" }, { - "text": ",", + "text": ", ", "type": "text" }, { @@ -759,7 +759,7 @@ "type": "milestone" }, { - "text": ",", + "text": ", ", "type": "text" }, { diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.usfm b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.usfm new file mode 100644 index 0000000..872a6e2 --- /dev/null +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.usfm @@ -0,0 +1,68 @@ + +\zaln-s | x-lemma="καί" x-morph="Gr,CC,,,,,,,," x-occurrence="1" x-occurrences="1" x-strong="G25320" x-content="καὶ" +\w When|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="συναλίζω" x-morph="Gr,V,PPM,NMS," x-occurrence="1" x-occurrences="1" x-strong="G48710" x-content="συναλιζόμενος" +\w he|x-occurrence="1" x-occurrences="3"\w* +\w was|x-occurrence="1" x-occurrences="1"\w* +\w meeting|x-occurrence="1" x-occurrences="1"\w* +\w together|x-occurrence="1" x-occurrences="1"\w* +\w with|x-occurrence="1" x-occurrences="1"\w* +\w them|x-occurrence="1" x-occurrences="2"\w* +\zaln-e\*, +\zaln-s | x-lemma="παραγγέλλω" x-morph="Gr,V,IAA3,,S," x-occurrence="1" x-occurrences="1" x-strong="G38530" x-content="παρήγγειλεν" +\w he|x-occurrence="2" x-occurrences="3"\w* +\w commanded|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="αὐτός" x-morph="Gr,RP,,,3DMP," x-occurrence="1" x-occurrences="1" x-strong="G08460" x-content="αὐτοῖς" +\w them|x-occurrence="2" x-occurrences="2"\w* +\zaln-e\* +\zaln-s | x-lemma="μή" x-morph="Gr,DO,,,,,,,," x-occurrence="1" x-occurrences="1" x-strong="G33610" x-content="μὴ" +\w not|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="χωρίζω" x-morph="Gr,V,NPP,,,,," x-occurrence="1" x-occurrences="1" x-strong="G55630" x-content="χωρίζεσθαι" +\w to|x-occurrence="1" x-occurrences="2"\w* +\w leave|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ἀπό" x-morph="Gr,P,,,,,G,,," x-occurrence="1" x-occurrences="1" x-strong="G05750" x-content="ἀπὸ" +\zaln-s | x-lemma="Ἱεροσόλυμα" x-morph="Gr,N,,,,,GNP," x-occurrence="1" x-occurrences="1" x-strong="G24140" x-content="Ἱεροσολύμων" +\w Jerusalem|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-e\*, +\zaln-s | x-lemma="ἀλλά" x-morph="Gr,CO,,,,,,,," x-occurrence="1" x-occurrences="1" x-strong="G02350" x-content="ἀλλὰ" +\w but|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="περιμένω" x-morph="Gr,V,NPA,,,,," x-occurrence="1" x-occurrences="1" x-strong="G40370" x-content="περιμένειν" +\w to|x-occurrence="2" x-occurrences="2"\w* +\w wait|x-occurrence="1" x-occurrences="1"\w* +\w for|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ὁ" x-morph="Gr,EA,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G35880" x-content="τὴν" +\w the|x-occurrence="1" x-occurrences="2"\w* +\zaln-e\* +\zaln-s | x-lemma="ἐπαγγελία" x-morph="Gr,N,,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G18600" x-content="ἐπαγγελίαν" +\w promise|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ὁ" x-morph="Gr,EA,,,,GMS," x-occurrence="1" x-occurrences="1" x-strong="G35880" x-content="τοῦ" +\w of|x-occurrence="1" x-occurrences="1"\w* +\w the|x-occurrence="2" x-occurrences="2"\w* +\zaln-e\* +\zaln-s | x-lemma="πατήρ" x-morph="Gr,N,,,,,GMS," x-occurrence="1" x-occurrences="1" x-strong="G39620" x-content="Πατρὸς" +\w Father|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*, +\zaln-s | x-lemma="ὅς" x-morph="Gr,RR,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G37390" x-content="ἣν" +\w about|x-occurrence="1" x-occurrences="1"\w* +\w which|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*, +\zaln-s | x-lemma="ὅς" x-morph="Gr,RR,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G37390" x-content="ἣν" +\w he|x-occurrence="3" x-occurrences="3"\w* +\w said|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*,\qt-s |who="Jesus"\*" +\zaln-s | x-lemma="ἐγώ" x-morph="Gr,RP,,,1G,S," x-occurrence="1" x-occurrences="1" x-strong="G14730" x-content="μου" +\w You|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ἀκούω" x-morph="Gr,V,IAA2,,P," x-occurrence="1" x-occurrences="1" x-strong="G01910" x-content="ἠκούσατέ" +\w heard|x-occurrence="1" x-occurrences="1"\w* +\w from|x-occurrence="1" x-occurrences="1"\w* +\w me|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json b/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json index 5212e59..0a394e3 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json @@ -207,7 +207,7 @@ }, { "type": "text", - "text": "," + "text": ", " }, { "tag": "zaln", @@ -250,7 +250,7 @@ }, { "type": "text", - "text": "." + "text": ". " }, { "tag": "f", diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1b.json b/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1b.json index ab2a4c1..f507555 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1b.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1b.json @@ -79,7 +79,7 @@ }, { "type": "text", - "text": "," + "text": ", " }, { "tag": "zaln", diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-1.json b/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-1.json index e268168..c73807f 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-1.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-1.json @@ -23,7 +23,7 @@ }, { "type": "text", - "text": "," + "text": ", " }, { "strongs": "G14010", @@ -169,7 +169,7 @@ }, { "type": "text", - "text": "," + "text": ", " }, { "strongs": "G25960", diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json b/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json new file mode 100644 index 0000000..708d9b9 --- /dev/null +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json @@ -0,0 +1,585 @@ +{ + "alignedVerseString": "εἶπέν τις ἐξ αὐτῶν, ἴδιος αὐτῶν προφήτης, “Κρῆτες ἀεὶ ψεῦσται, κακὰ θηρία, γαστέρες ἀργαί.”", + "verseString": "One of their own prophets has said, \"Cretans are always liars, evil beasts, lazy gluttons.\"\n\\p", + "verseObjects": [ + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "One", + "type": "word" + } + ], + "content": "τις", + "lemma": "τις", + "morph": "Gr,RI,,,,NMS,", + "occurrence": 1, + "occurrences": 1, + "strong": "G51000", + "tag": "zaln", + "type": "milestone" + }, + { + "text": " ", + "type": "text" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "of", + "type": "word" + } + ], + "content": "ἐξ", + "lemma": "ἐκ", + "morph": "Gr,P,,,,,G,,,", + "occurrence": 1, + "occurrences": 1, + "strong": "G15370", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "children": [ + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "their", + "type": "word" + }, + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "own", + "type": "word" + } + ], + "content": "αὐτῶν", + "lemma": "αὐτός", + "morph": "Gr,RP,,,3GMP,", + "occurrence": 2, + "occurrences": 2, + "strong": "G08460", + "tag": "zaln", + "type": "milestone" + } + ], + "content": "ἴδιος", + "lemma": "ἴδιος", + "morph": "Gr,RD,,,,NMS,", + "occurrence": 1, + "occurrences": 1, + "strong": "G23980", + "tag": "zaln", + "type": "milestone" + } + ], + "content": "αὐτῶν", + "lemma": "αὐτός", + "morph": "Gr,RP,,,3GMP,", + "occurrence": 1, + "occurrences": 2, + "strong": "G08460", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "prophets", + "type": "word" + } + ], + "content": "προφήτης", + "lemma": "προφήτης", + "morph": "Gr,N,,,,,NMS,", + "occurrence": 1, + "occurrences": 1, + "strong": "G43960", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "has", + "type": "word" + }, + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "said", + "type": "word" + } + ], + "content": "εἶπέν", + "lemma": "λέγω", + "morph": "Gr,V,IAA3,,S,", + "occurrence": 1, + "occurrences": 1, + "strong": "G30040", + "tag": "zaln", + "type": "milestone" + }, + { + "text": ", ", + "type": "text" + }, + { + "text": "\"", + "type": "text" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "Cretans", + "type": "word" + } + ], + "content": "Κρῆτες", + "lemma": "Κρής", + "morph": "Gr,N,,,,,NMP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G29120", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "are", + "type": "word" + } + ], + "content": "ψεῦσται", + "lemma": "ψεύστης", + "morph": "Gr,N,,,,,NMP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G55830", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "always", + "type": "word" + } + ], + "content": "ἀεὶ", + "lemma": "ἀεί", + "morph": "Gr,D,,,,,,,,,", + "occurrence": 1, + "occurrences": 1, + "strong": "G01040", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "liars", + "type": "word" + } + ], + "content": "ψεῦσται", + "lemma": "ψεύστης", + "morph": "Gr,N,,,,,NMP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G55830", + "tag": "zaln", + "type": "milestone" + }, + { + "text": ", ", + "type": "text" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "evil", + "type": "word" + } + ], + "content": "κακὰ", + "lemma": "κακός", + "morph": "Gr,AA,,,,NNP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G25560", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "beasts", + "type": "word" + } + ], + "content": "θηρία", + "lemma": "θηρίον", + "morph": "Gr,N,,,,,NNP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G23420", + "tag": "zaln", + "type": "milestone" + }, + { + "text": ", ", + "type": "text" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "lazy", + "type": "word" + } + ], + "content": "ἀργαί", + "lemma": "ἀργός", + "morph": "Gr,AA,,,,NFP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G06920", + "tag": "zaln", + "type": "milestone" + }, + { + "children": [ + { + "occurrence": 1, + "occurrences": 1, + "tag": "w", + "text": "gluttons", + "type": "word" + } + ], + "content": "γαστέρες", + "lemma": "γαστήρ", + "morph": "Gr,N,,,,,NFP,", + "occurrence": 1, + "occurrences": 1, + "strong": "G10640", + "tag": "zaln", + "type": "milestone" + }, + { + "text": ".", + "type": "text" + }, + { + "text": "\"\n", + "type": "text" + }, + { + "tag": "p", + "type": "paragraph" + } + ], + "alignment": [ + { + "topWords": [ + { + "word": "εἶπέν", + "strong": "G30040", + "lemma": "λέγω", + "morph": "Gr,V,IAA3,,S,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "has", + "occurrence": 1, + "occurrences": 1 + }, + { + "word": "said", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "τις", + "strong": "G51000", + "lemma": "τις", + "morph": "Gr,RI,,,,NMS,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "One", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "ἐξ", + "strong": "G15370", + "lemma": "ἐκ", + "morph": "Gr,P,,,,,G,,,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "of", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "αὐτῶν", + "strong": "G08460", + "lemma": "αὐτός", + "morph": "Gr,RP,,,3GMP,", + "occurrence": 1, + "occurrences": 2 + }, + { + "word": "ἴδιος", + "strong": "G23980", + "lemma": "ἴδιος", + "morph": "Gr,RD,,,,NMS,", + "occurrence": 1, + "occurrences": 1 + }, + { + "word": "αὐτῶν", + "strong": "G08460", + "lemma": "αὐτός", + "morph": "Gr,RP,,,3GMP,", + "occurrence": 2, + "occurrences": 2 + } + ], + "bottomWords": [ + { + "word": "their", + "occurrence": 1, + "occurrences": 1 + }, + { + "word": "own", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "προφήτης", + "strong": "G43960", + "lemma": "προφήτης", + "morph": "Gr,N,,,,,NMS,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "prophets", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "Κρῆτες", + "strong": "G29120", + "lemma": "Κρής", + "morph": "Gr,N,,,,,NMP,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "Cretans", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "ἀεὶ", + "strong": "G01040", + "lemma": "ἀεί", + "morph": "Gr,D,,,,,,,,,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "always", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "ψεῦσται", + "strong": "G55830", + "lemma": "ψεύστης", + "morph": "Gr,N,,,,,NMP,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "are", + "occurrence": 1, + "occurrences": 1 + }, + { + "word": "liars", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "κακὰ", + "strong": "G25560", + "lemma": "κακός", + "morph": "Gr,AA,,,,NNP,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "evil", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "θηρία", + "strong": "G23420", + "lemma": "θηρίον", + "morph": "Gr,N,,,,,NNP,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "beasts", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "γαστέρες", + "strong": "G10640", + "lemma": "γαστήρ", + "morph": "Gr,N,,,,,NFP,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "gluttons", + "occurrence": 1, + "occurrences": 1 + } + ] + }, + { + "topWords": [ + { + "word": "ἀργαί", + "strong": "G06920", + "lemma": "ἀργός", + "morph": "Gr,AA,,,,NFP,", + "occurrence": 1, + "occurrences": 1 + } + ], + "bottomWords": [ + { + "word": "lazy", + "occurrence": 1, + "occurrences": 1 + } + ] + } + ], + "wordBank": [] +} diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json b/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json index 4d2e265..1c7933a 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json @@ -165,7 +165,7 @@ }, { "type": "text", - "text": ":" + "text": ":\n" }, { "tag": "q", @@ -260,7 +260,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -339,7 +339,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -399,7 +399,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -471,7 +471,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -557,7 +557,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -659,7 +659,7 @@ }, { "type": "text", - "text": "\"" + "text": "\"\n" }, { "tag": "s5", diff --git a/lib/index.js b/lib/index.js deleted file mode 100644 index e62db13..0000000 --- a/lib/index.js +++ /dev/null @@ -1,8 +0,0 @@ -'use strict'; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -var VerseObjectUtils = exports.VerseObjectUtils = require('./js/utils/verseObjects'); -var ArrayUtils = exports.ArrayUtils = require('./js/utils/array'); -exports.default = require('./js/aligner'); \ No newline at end of file diff --git a/lib/js/aligner.js b/lib/js/aligner.js deleted file mode 100644 index 581d1d6..0000000 --- a/lib/js/aligner.js +++ /dev/null @@ -1,548 +0,0 @@ -'use strict'; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.getBlankAlignmentDataForVerse = exports.generateWordBank = exports.generateBlankAlignments = exports.verseHasAlignments = exports.unmerge = exports.addVerseObjectToAlignment = exports.orderAlignments = exports.indexOfMilestone = exports.indexOfFirstMilestone = exports.merge = exports.hasAlignments = undefined; - -var _typeof2 = require('babel-runtime/helpers/typeof'); - -var _typeof3 = _interopRequireDefault(_typeof2); - -var _getIterator2 = require('babel-runtime/core-js/get-iterator'); - -var _getIterator3 = _interopRequireDefault(_getIterator2); - -var _keys = require('babel-runtime/core-js/object/keys'); - -var _keys2 = _interopRequireDefault(_keys); - -var _stringify = require('babel-runtime/core-js/json/stringify'); - -var _stringify2 = _interopRequireDefault(_stringify); - -exports.verseStringWordsContainedInAlignments = verseStringWordsContainedInAlignments; - -var _verseObjects = require('./utils/verseObjects'); - -var VerseObjectUtils = _interopRequireWildcard(_verseObjects); - -var _array = require('./utils/array'); - -var ArrayUtils = _interopRequireWildcard(_array); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -/** - * check if there were any alignments - * @param {Array} alignments - alginments to be checked - * @return {boolean} true if an alignment was found - */ -/** - * - * @file Contains the logic for aligning words. - * @author unfoldingWord - */ - -// helpers -var hasAlignments = exports.hasAlignments = function hasAlignments(alignments) { - var indexFirstAlignment = alignments.findIndex(function (alignment) { - return alignment.bottomWords.length > 0; - }); - return indexFirstAlignment >= 0; -}; - -/** - * @description pivots alignments into bottomWords/targetLanguage verseObjectArray sorted by verseText - * @param {Array} alignments - array of aligned word objects {bottomWords, topWords} - * @param {Array} wordBank - array of topWords - * @param {String} verseString - The string to base the bottomWords sorting - * @param {Boolean} useVerseText - if true, then return parsed verse text if unaligned verse has changed, otherwise return null - * @return {Array} - sorted array of verseObjects to be used for verseText of targetLanguage - */ -var merge = exports.merge = function merge(alignments, wordBank, verseString) { - var useVerseText = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : false; - - // get the definitive list of verseObjects from the verse, unaligned but in order - var _VerseObjectUtils$get = VerseObjectUtils.getOrderedVerseObjectsFromString(verseString), - unalignedOrdered = _VerseObjectUtils$get.newVerseObjects, - wordMap = _VerseObjectUtils$get.wordMap; - // assign verseObjects with unaligned objects to be replaced with aligned ones - // check each word in the verse string is also in the word bank or alignments - - - var verseObjectsNotInAlignmentData = verseStringWordsContainedInAlignments(alignments, wordBank, wordMap); - if (verseObjectsNotInAlignmentData.length > 0) { - if (hasAlignments(alignments)) { - // if verse has some alignments - var verseWordsJoined = verseObjectsNotInAlignmentData.map(function (_ref) { - var text = _ref.text; - return text; - }).join(', '); - throw { - message: 'The words "' + verseWordsJoined + '" from the target language verse are not in the alignment data.', - type: 'InvalidatedAlignments' - }; - } else { - // if verse had no alignments - return useVerseText ? unalignedOrdered : null; // use parsed verse text - } - } - // each wordBank object should result in one verseObject - var wbLen = wordBank.length; - for (var i = 0; i < wbLen; i++) { - var bottomWord = wordBank[i]; - var verseObject = VerseObjectUtils.wordVerseObjectFromBottomWord(bottomWord); - var index = VerseObjectUtils.indexOfVerseObject(wordMap, verseObject); - if (index > -1) { - var location = wordMap[index]; - location.array[location.pos] = verseObject; - } else if (hasAlignments(alignments)) { - // if verse has some alignments - throw { message: 'Word "' + bottomWord.word + '" is in wordBank, but missing from target language verse.', type: 'InvalidatedAlignments' }; - } else { - // if verse had no alignments - return useVerseText ? unalignedOrdered : null; // use parsed verse text - } - } - var indicesToDelete = []; - // each alignment should result in one verseObject - var aLen = alignments.length; - - var _loop = function _loop(_i) { - var alignment = alignments[_i]; - var topWords = alignment.topWords, - bottomWords = alignment.bottomWords; - // each bottomWord results in a nested verseObject of tag: w, type: word - // located inside innermost nested topWord/k verseObject - - var replacements = {}; - var bwLen = bottomWords.length; - for (var j = 0; j < bwLen; j++) { - var _bottomWord = bottomWords[j]; - var _verseObject2 = VerseObjectUtils.wordVerseObjectFromBottomWord(_bottomWord); - var _index = VerseObjectUtils.indexOfVerseObject(wordMap, _verseObject2); - if (_index === -1) { - throw { message: "VerseObject not found in verseText while merging:" + (0, _stringify2.default)(_verseObject2), type: 'InvalidatedAlignments' }; - } - replacements[_index] = _verseObject2; - } - // each topWord results in a nested verseObject of tag: k, type: milestone - var milestones = topWords.map(function (topWord) { - return VerseObjectUtils.milestoneVerseObjectFromTopWord(topWord); - }); - var indices = (0, _keys2.default)(replacements); - // group consecutive indexes so that they can be aggregated - var groupedConsecutiveIndices = ArrayUtils.groupConsecutiveNumbers(indices, wordMap); - // loop through groupedConsecutiveIndices to reduce and place where needed. - var gLen = groupedConsecutiveIndices.length; - for (var _j = 0; _j < gLen; _j++) { - var consecutiveIndices = groupedConsecutiveIndices[_j]; - // map the consecutiveIndices to replacement verseObjects - var replacementVerseObjects = consecutiveIndices.map(function (index) { - return replacements[index]; - }); - // remove and use the first index in group to place the aligned verseObject milestone later - var indexToReplace = consecutiveIndices.shift(); - // the rest of the consecutiveIndices need to be queued to be deleted later after shift - indicesToDelete = indicesToDelete.concat(consecutiveIndices); - // place the replacementVerseObjects in the last milestone as children - milestones[milestones.length - 1].children = replacementVerseObjects; - // nest the milestones so that the first is the parent and each subsequent is nested - var milestone = VerseObjectUtils.nestMilestones(milestones); - // replace the original verseObject from the verse text with the aligned milestone verseObject - var _location = wordMap[indexToReplace]; - _location.array[_location.pos] = milestone; - } - }; - - for (var _i = 0; _i < aLen; _i++) { - _loop(_i); - } - // deleteIndices that were queued due to consecutive bottomWords in alignments - var verseObjects = ArrayUtils.deleteIndices(unalignedOrdered, indicesToDelete, wordMap); - return verseObjects; -}; - -/** - * Determines if the given verse objects from a string are contained in - * the given alignments - * - * @param {Array} alignments - array of aligned word objects {bottomWords, topWords} - * @param {Array} wordBank - array of unused topWords for aligning - * @param {Array} wordMap - ordered map of word locations in verseObjects - * @return {Array} - returns array of word verse objects from a string that are not contained in - * the given alignments - */ -function verseStringWordsContainedInAlignments(alignments, wordBank, wordMap) { - var unalignedMap = wordMap.filter(function (wordItem) { - var verseObject = wordItem.array[wordItem.pos]; - var checkIfWordMatches = function checkIfWordMatches(verseObject) { - return function (_ref2) { - var word = _ref2.word, - occurrence = _ref2.occurrence, - occurrences = _ref2.occurrences; - - var verseObjectWord = verseObject.text; - var verseObjectOccurrence = verseObject.occurrence; - var verseObjectOccurrences = verseObject.occurrences; - return word === verseObjectWord && occurrence === verseObjectOccurrence && occurrences === verseObjectOccurrences; - }; - }; - if (verseObject.type !== 'word') return false; - var wordCheckerFunction = checkIfWordMatches(verseObject); - var containedInWordBank = Boolean(wordBank.find(wordCheckerFunction)); - var containedInAlignments = Boolean(alignments.find(function (_ref3) { - var bottomWords = _ref3.bottomWords; - - return Boolean(bottomWords.find(wordCheckerFunction)); - })); - return !containedInWordBank && !containedInAlignments; - }); - return unalignedMap.map(function (location) { - return location.array[location.pos]; - }); -} - -/** - * @description find the alignment to use for this milestone. If milestone has already been given an alignment, then - * use that one. Otherwise return null. This is needed because milestones are not always - * contiguous. - * @param {Array} baseMilestones - already found base milestones. - * @param {Object} newMilestone - milestone not yet given an alignment - * @return {Object} previous Alignment if found - else null. - */ -var getAlignmentForMilestone = function getAlignmentForMilestone(baseMilestones, newMilestone) { - var _iteratorNormalCompletion = true; - var _didIteratorError = false; - var _iteratorError = undefined; - - try { - for (var _iterator = (0, _getIterator3.default)(baseMilestones), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { - var baseMilestone = _step.value; - - if (baseMilestone.alignment && VerseObjectUtils.sameMilestone(baseMilestone.milestone, newMilestone)) { - return baseMilestone.alignment; - } - } - } catch (err) { - _didIteratorError = true; - _iteratorError = err; - } finally { - try { - if (!_iteratorNormalCompletion && _iterator.return) { - _iterator.return(); - } - } finally { - if (_didIteratorError) { - throw _iteratorError; - } - } - } - - return null; -}; - -/** - * compare occurrences of a and b, and handle conversion to int if necessary - * @param {Object} a - first occurrences value - * @param {Object} b - second occurrence value - * @return {boolean} - if they are the same or not - */ -var compareOccurrences = function compareOccurrences(a, b) { - var sameOccurrence = a.occurrence === b.occurrence; - if (!sameOccurrence && a.occurrence && b.occurrence) { - if ((0, _typeof3.default)(a.occurrence) !== (0, _typeof3.default)(b.occurrence)) { - // one may be string and the other an int - var occurrence1 = typeof a.occurrence === 'string' ? parseInt(a.occurrence, 10) : a.occurrence; - var occurrence2 = typeof b.occurrence === 'string' ? parseInt(b.occurrence, 10) : b.occurrence; - sameOccurrence = occurrence1 === occurrence2 && occurrence1 !== 0; - } - } - return sameOccurrence; -}; - -/** - * @description returns index of the verseObject in the alignments first milestone (ignores occurrences since that can be off) - * @param {Array} alignments - array of the alignments to search in - * @param {Object} verseObject - verseObject to search for - * @return {Int} - the index of the verseObject - */ -var indexOfFirstMilestone = exports.indexOfFirstMilestone = function indexOfFirstMilestone(alignments, verseObject) { - var index = -1; - if (verseObject.type === 'word') { - index = alignments.findIndex(function (alignment) { - if (alignment.topWords.length > 0) { - var _verseObject = alignment.topWords[0]; - if (_verseObject.word === verseObject.text) { - return compareOccurrences(_verseObject, verseObject); - } - } - return false; - }); - } - return index; -}; - -/** - * @description returns index of the verseObject in the alignments milestone (ignores occurrences since that can be off) - * @param {Array} alignments - array of the alignments to search in - * @param {Object} verseObject - verseObject to search for - * @return {Int} - the index of the verseObject - */ -var indexOfMilestone = exports.indexOfMilestone = function indexOfMilestone(alignments, verseObject) { - var index = -1; - if (verseObject.type === 'word') { - index = alignments.findIndex(function (alignment) { - var _iteratorNormalCompletion2 = true; - var _didIteratorError2 = false; - var _iteratorError2 = undefined; - - try { - for (var _iterator2 = (0, _getIterator3.default)(alignment.topWords), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { - var _verseObject = _step2.value; - - if (_verseObject.word === verseObject.text) { - return compareOccurrences(_verseObject, verseObject); - } - } - } catch (err) { - _didIteratorError2 = true; - _iteratorError2 = err; - } finally { - try { - if (!_iteratorNormalCompletion2 && _iterator2.return) { - _iterator2.return(); - } - } finally { - if (_didIteratorError2) { - throw _iteratorError2; - } - } - } - - return false; - }); - } - return index; -}; - -/** - * @description uses the alignedVerseString to order alignments - * @param {String|Array} alignmentVerse - optional alignment verse - * @param {Array} alignmentUnOrdered - alignments to order - * @return {Array} ordered alignments if alignment string given, else unordered alignments - */ -var orderAlignments = exports.orderAlignments = function orderAlignments(alignmentVerse, alignmentUnOrdered) { - var orderedObjects = null; - if (typeof alignmentVerse === 'string') { - orderedObjects = VerseObjectUtils.getOrderedVerseObjectsFromString(alignmentVerse); - } else { - orderedObjects = VerseObjectUtils.getOrderedVerseObjects(alignmentVerse); - } - var wordMap = orderedObjects.wordMap; - if (Array.isArray(wordMap)) { - var alignment = []; - // order alignments - for (var i = 0; i < wordMap.length; i++) { - var location = wordMap[i]; - var nextWord = location.array[location.pos]; - var index = indexOfFirstMilestone(alignmentUnOrdered, nextWord); - if (index < 0 && nextWord.type === 'word' && i < wordMap.length - 1) { - var verseObjectAfter = location.array[location.pos + 1]; - if (verseObjectAfter.type === 'text') { - // maybe this was punctuation split from word - var originalText = nextWord.text; - nextWord.text += verseObjectAfter.text; // add possible punctuation - index = indexOfFirstMilestone(alignmentUnOrdered, nextWord); // try again - if (index < 0) { - nextWord.text = originalText; // restore original text if not a match - } - } - } - if (index >= 0) { - alignment.push(alignmentUnOrdered[index]); - alignmentUnOrdered.splice(index, 1); // remove item - } else if (nextWord.type === 'word') { - // if not found, may be either an unaligned topWord or merged topWord - index = indexOfMilestone(alignmentUnOrdered, nextWord); - if (index < 0) { - // if not found in unordered list, try already ordered - index = indexOfMilestone(alignment, nextWord); - } - if (index < 0) { - // if still not found in topWords, it's an unaligned topWord - var wordObject = VerseObjectUtils.alignmentObjectFromVerseObject(nextWord); - alignment.push({ topWords: [wordObject], bottomWords: [] }); - } - } - } - if (alignmentUnOrdered.length > 0) { - alignment.push.apply(alignment, alignmentUnOrdered); // fast concat - } - return alignment; - } - return alignmentUnOrdered; -}; - -/** - * @description adds verse object to alignment - * @param {Object} verseObject - The verse obejct to add to alignmer - * @param {Object} alignment - pre-existing alignments - */ -var addVerseObjectToAlignment = exports.addVerseObjectToAlignment = function addVerseObjectToAlignment(verseObject, alignment) { - if (verseObject.type === 'milestone' && verseObject.children.length > 0) { - var wordObject = VerseObjectUtils.alignmentObjectFromVerseObject(verseObject); - var duplicate = alignment.topWords.find(function (obj) { - return obj.word === wordObject.word && obj.occurrence === wordObject.occurrence; - }); - if (!duplicate) { - alignment.topWords.push(wordObject); - } - verseObject.children.forEach(function (_verseObject) { - addVerseObjectToAlignment(_verseObject, alignment); - }); - } else if (verseObject.type === 'word' && !verseObject.children) { - var _wordObject = VerseObjectUtils.alignmentObjectFromVerseObject(verseObject); - alignment.bottomWords.push(_wordObject); - } -}; - -/** - * extracts alignment from verse object and adds to baseMilestones and alignments - * @param {Array} baseMilestones - array of milestones found - * @param {Object} verseObject - to add to arrays - * @param {Array} alignments - array of alignments found - */ -var addAlignment = function addAlignment(baseMilestones, verseObject, alignments) { - var alignment = getAlignmentForMilestone(baseMilestones, verseObject); - if (!alignment) { - alignment = { topWords: [], bottomWords: [] }; - alignments.push(alignment); - baseMilestones.push({ alignment: alignment, milestone: verseObject }); - } - addVerseObjectToAlignment(verseObject, alignment); - if (verseObject.children && verseObject.type !== "milestone") { - var length = verseObject.children.length; - for (var i = 0; i < length; i++) { - addAlignment(baseMilestones, verseObject.children[i], alignments); - } - } -}; - -/** - * @description pivots alignments into bottomWords/targetLanguage verseObjectArray sorted by verseText - * @param {Array} verseObjects - array of aligned verseObjects [{milestone children={verseObject}}, ...] - * @param {Array|Object|String} alignedVerse - optional verse to use for ordering alignments - * @return {Object} - object of alignments (array of alignments) and wordbank (array of unused words) - */ -var unmerge = exports.unmerge = function unmerge(verseObjects, alignedVerse) { - var baseMilestones = []; - var wordBank = []; - var alignments = []; - if (verseObjects && verseObjects.verseObjects) { - verseObjects = verseObjects.verseObjects; - } - if (typeof alignedVerse !== 'string') { - alignedVerse = VerseObjectUtils.getWordList(alignedVerse); - } - var len = verseObjects.length; - for (var i = 0; i < len; i++) { - var verseObject = verseObjects[i]; - addAlignment(baseMilestones, verseObject, alignments); - } - var alignmentUnOrdered = []; - len = alignments.length; - for (var _i2 = 0; _i2 < len; _i2++) { - var _alignment = alignments[_i2]; - if (_alignment.topWords.length > 0) { - alignmentUnOrdered.push(_alignment); - } else { - wordBank = wordBank.concat(_alignment.bottomWords); - } - } - var alignment = orderAlignments(alignedVerse, alignmentUnOrdered); - return { alignment: alignment, wordBank: wordBank }; -}; - -/** - * Helper method to find if the given alignments object actually - * has aligned data. If not we do not want to show the reset dialog - * - * @param {array} alignments - alignments object with array of top words/bottom words - * @return {boolean} - Whether or not the verse has alignments - */ -var verseHasAlignments = exports.verseHasAlignments = function verseHasAlignments(_ref4) { - var alignments = _ref4.alignments; - - if (alignments) { - return alignments.filter(function (_ref5) { - var bottomWords = _ref5.bottomWords; - - return bottomWords.length > 0; - }).length > 0; - } -}; - -/** - * @description - generates the word alignment tool alignmentData from the UGNT verseData - * @param {String|Array|Object} verseData - array of verseObjects - * @return {Array} alignmentObjects from verse text - */ -var generateBlankAlignments = exports.generateBlankAlignments = function generateBlankAlignments(verseData) { - var wordList = VerseObjectUtils.getWordList(verseData); - var alignments = wordList.map(function (wordData, index) { - var word = wordData.word || wordData.text; - var occurrences = VerseObjectUtils.getOccurrences(wordList, word); - var occurrence = VerseObjectUtils.getOccurrence(wordList, index, word); - var alignment = { - topWords: [{ - word: word, - strong: wordData.strong || wordData.strongs, - lemma: wordData.lemma, - morph: wordData.morph, - occurrence: occurrence, - occurrences: occurrences - }], - bottomWords: [] - }; - return alignment; - }); - return alignments; -}; - -/** - * @description - generates the word alignment tool word bank from targetLanguage verse - * @param {String|Array|Object} verseData - string of the verseText in the targetLanguage - * @return {Array} alignmentObjects from verse text - */ -var generateWordBank = exports.generateWordBank = function generateWordBank(verseData) { - var verseWords = VerseObjectUtils.getWordList(verseData); - var wordBank = verseWords.map(function (object, index) { - var word = object.text; - var occurrences = VerseObjectUtils.getOccurrences(verseWords, word); - var occurrence = VerseObjectUtils.getOccurrence(verseWords, index, word); - return { - word: word, - occurrence: occurrence, - occurrences: occurrences - }; - }); - return wordBank; -}; - -/** - * Wrapper method for resetting alignments in verse to being blank alignments - * i.e. (all words in word bank and not joined with alignments data) - * Note: This method does not overwrite any data - * @param {string} ugntVerse - Array of verse objects containing ugnt words - * @param {string} targetLanguageVerse - Current target language string from the bibles reducer - * @return {{alignments, wordBank}} - Reset alignments data - */ -var getBlankAlignmentDataForVerse = exports.getBlankAlignmentDataForVerse = function getBlankAlignmentDataForVerse(ugntVerse, targetLanguageVerse) { - var alignments = generateBlankAlignments(ugntVerse); - var wordBank = generateWordBank(targetLanguageVerse); - return { alignments: alignments, wordBank: wordBank }; -}; \ No newline at end of file diff --git a/lib/js/utils/array.js b/lib/js/utils/array.js deleted file mode 100644 index 7a8475b..0000000 --- a/lib/js/utils/array.js +++ /dev/null @@ -1,73 +0,0 @@ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.flattenArray = exports.deleteIndices = exports.groupConsecutiveNumbers = undefined; - -var _toConsumableArray2 = require("babel-runtime/helpers/toConsumableArray"); - -var _toConsumableArray3 = _interopRequireDefault(_toConsumableArray2); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -/** - * @description - Groups consecutive numbers in an array - * @param {Array} numbers - array of numbers to be grouped - * @param {Array} wordMap - ordered map of word locations in verseObjects - * @return {Array} - grouped array of array of consecutive numbers - */ -var groupConsecutiveNumbers = exports.groupConsecutiveNumbers = function groupConsecutiveNumbers(numbers, wordMap) { - return numbers.reduce(function (accumulator, currentValue, currentIndex, originalArray) { - if (currentValue >= 0) { - // ignore undefined entries - var current = wordMap[currentValue]; - var last = currentIndex > 0 ? wordMap[originalArray[currentIndex - 1]] : {}; - // if this iteration is consecutive to the last, add it to the previous run - if (current.array === last.array && // make sure they are stored in the same array - current.pos - last.pos === 1) { - accumulator[accumulator.length - 1].push(currentValue); - } else { - // the start of a new run including first element - // create a new subarray with this as the start - accumulator.push([currentValue]); - } - } - return accumulator; // return state for next iteration - }, []); -}; - -/** - * @description - Deletes indices from an array safely - * @param {Array} array - array elements to delete from - * @param {Array} indices - array of indicies to delete - * @param {Array} wordMap - ordered map of word locations in verseObjects - * @return {Array} - the resulting array after indexes were safely removed - */ -var deleteIndices = exports.deleteIndices = function deleteIndices(array, indices, wordMap) { - indices.sort(function (a, b) { - return b - a; - }); - var length = indices.length; - for (var i = 0; i < length; i++) { - var index = indices[i]; - if (index >= 0) { - var location = wordMap[index]; - if (location) { - location.array.splice(location.pos, 1); - } - } - } - return array; -}; - -/** - * Helper function to flatten a double nested array - * @param {array} arr - Array to be flattened - * @return {array} - Flattened array - */ -var flattenArray = exports.flattenArray = function flattenArray(arr) { - var _ref; - - return (_ref = []).concat.apply(_ref, (0, _toConsumableArray3.default)(arr)); -}; \ No newline at end of file diff --git a/lib/js/utils/verseObjects.js b/lib/js/utils/verseObjects.js deleted file mode 100644 index 42a59d4..0000000 --- a/lib/js/utils/verseObjects.js +++ /dev/null @@ -1,725 +0,0 @@ -'use strict'; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.getWordsFromVerseObjects = exports.sortWordObjectsByString = exports.wordObjectArrayFromString = exports.populateOccurrencesInWordObjects = exports.combineVerseArray = exports.addVerseObjectToAlignment = exports.sameMilestone = exports.getWordList = exports.getWordListForVerse = exports.getWordListFromVerseObjectArray = exports.mergeVerseData = exports.extractWordsFromVerseObject = exports.indexOfVerseObject = exports.alignmentObjectFromVerseObject = exports.milestoneVerseObjectFromTopWord = exports.wordVerseObjectFromBottomWord = exports.nestMilestones = exports.getOrderedVerseObjectsFromString = exports.getOrderedVerseObjects = exports.getOccurrences = exports.getOccurrence = exports.getWordText = undefined; - -var _typeof2 = require('babel-runtime/helpers/typeof'); - -var _typeof3 = _interopRequireDefault(_typeof2); - -var _stringify = require('babel-runtime/core-js/json/stringify'); - -var _stringify2 = _interopRequireDefault(_stringify); - -var _getIterator2 = require('babel-runtime/core-js/get-iterator'); - -var _getIterator3 = _interopRequireDefault(_getIterator2); - -var _lodash = require('lodash'); - -var _lodash2 = _interopRequireDefault(_lodash); - -var _usfmJs = require('usfm-js'); - -var _usfmJs2 = _interopRequireDefault(_usfmJs); - -var _stringPunctuationTokenizer = require('string-punctuation-tokenizer'); - -var _stringPunctuationTokenizer2 = _interopRequireDefault(_stringPunctuationTokenizer); - -var _array = require('./array'); - -var ArrayUtils = _interopRequireWildcard(_array); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -/** - * An object containing information about the word in a target language - * - * @typedef WordObject - * @type {Object} - * @property {number} occurrences - Total amount of ccurrences for - * the word in the verse. - * @property {number} occurrence - Specific occurrence of the word - * in the verse. - * @property {string} text - The text that used for rendering on the screen. - * @property {string} tag - Denotes the type of usfm tag the word originates - * from. - * @property {[WordObject]} [children] - Containing WordObject's - * for pivoting WordObject's off of another - * @property {('text'|'word'|'paragraph')} type - Denotes the category of content - * the word holds - * @property {string} [word] - The text that used for rendering on the screen. - */ - -/** - * An object containing information about the word - * in the original language - * @typedef {[WordObject]} VerseObject - */ - -/** - * get text from word type verse object or word object - * @param {WordObject} wordObject - an object containing information about the word - * @return {string|undefined} text from word object - */ -/* eslint-disable no-negated-condition */ -var getWordText = exports.getWordText = function getWordText(wordObject) { - if (wordObject && wordObject.type === 'word') { - return wordObject.text; - } - return wordObject ? wordObject.word : undefined; -}; - -/** - * Gets the occurrence of a subString in words by counting up to subString index - * @param {String|Array} words - word list or string to search - * @param {Number} currentWordIndex - index of desired word in words - * @param {String} subString - The sub string to search for - * @return {Integer} - the occurrence of the word at currentWordIndex - */ -var getOccurrence = exports.getOccurrence = function getOccurrence(words, currentWordIndex, subString) { - if (typeof words === 'string') { - return _stringPunctuationTokenizer2.default.occurrenceInString(words, currentWordIndex, subString); - } - - var occurrence = 0; - if (Array.isArray(words)) { - for (var i = 0; i <= currentWordIndex; i++) { - if (getWordText(words[i]) === subString) occurrence++; - } - } - return occurrence; -}; - -/** - * Function that count occurrences of a substring in words - * @param {String|Array} words - word list or string to search - * @param {String} subString - The sub string to search for - * @return {Integer} - the count of the occurrences - */ -var getOccurrences = exports.getOccurrences = function getOccurrences(words, subString) { - if (typeof words === 'string') { - return _stringPunctuationTokenizer2.default.occurrencesInString(words, subString); - } - - var occurrences = 0; - if (Array.isArray(words)) { - var _iteratorNormalCompletion = true; - var _didIteratorError = false; - var _iteratorError = undefined; - - try { - for (var _iterator = (0, _getIterator3.default)(words), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { - var word = _step.value; - - if (getWordText(word) === subString) occurrences++; - } - } catch (err) { - _didIteratorError = true; - _iteratorError = err; - } finally { - try { - if (!_iteratorNormalCompletion && _iterator.return) { - _iterator.return(); - } - } finally { - if (_didIteratorError) { - throw _iteratorError; - } - } - } - } - return occurrences; -}; - -/** - * @description verseObjects with occurrences from verseObjects - * @param {Array} verseObjects - Word list to add occurrence(s) to - * @return {{newVerseObjects: Array, wordMap: Array}} - clone of verseObjects and word map - */ -var getOrderedVerseObjects = exports.getOrderedVerseObjects = function getOrderedVerseObjects(verseObjects) { - var wordMap = []; - var _verseObjects = _lodash2.default.cloneDeep(verseObjects); - var length = _verseObjects.length; - for (var i = 0; i < length; i++) { - var verseObject = _verseObjects[i]; - if (verseObject.type === 'word') { - verseObject.occurrence = getOccurrence(_verseObjects, i, verseObject.text); - verseObject.occurrences = getOccurrences(_verseObjects, verseObject.text); - wordMap.push({ array: _verseObjects, pos: i }); - } - } - return { newVerseObjects: _verseObjects, wordMap: wordMap }; -}; - -/** - * get texts from nested verse objects - * @param {Array} verseObjects - nested verse objects to extract text from - * @return {Array} array of texts found - */ -var getVerseObjectsText = function getVerseObjectsText(verseObjects) { - var texts = []; - if (verseObjects) { - var length = verseObjects.length; - for (var i = 0; i < length; i++) { - var vo = verseObjects[i]; - if (vo.text) { - texts.push(vo.text); - } - if (vo.children) { - var childTexts = getVerseObjectsText(vo.children); - texts.push.apply(texts, childTexts); // concat arrays - } - } - } - return texts; -}; - -/** - * parse text into tokens - * @param {string} text - string to tokenize - * @param {Array} newVerseObjects - nested verse objects - * @param {Array} wordMap - ordered map of word locations in verseObjects - * @param {Number} nonWordVerseObjectCount - keeps count of entries that are not actually words - * @param {String} verseText - text of the entire verse - * @return {Number} new nonWordVerseObjectCount - */ -var tokenizeText = function tokenizeText(text, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText) { - if (text) { - var tokens = _stringPunctuationTokenizer2.default.tokenizeWithPunctuation(text); - var tokenLength = tokens.length; - for (var j = 0; j < tokenLength; j++) { - var word = tokens[j]; - var verseObject = void 0; - if (_stringPunctuationTokenizer2.default.word.test(word)) { - // if the text has word characters, its a word object - var wordIndex = wordMap.length; - var occurrence = _stringPunctuationTokenizer2.default.occurrenceInString(verseText, wordIndex, word); - var occurrences = _stringPunctuationTokenizer2.default.occurrencesInString(verseText, word); - if (occurrence > occurrences) occurrence = occurrences; - verseObject = { - tag: "w", - type: "word", - text: word, - occurrence: occurrence, - occurrences: occurrences - }; - wordMap.push({ array: newVerseObjects, pos: newVerseObjects.length }); - } else { - // the text does not have word characters - nonWordVerseObjectCount++; - verseObject = { - type: "text", - text: word - }; - } - newVerseObjects.push(verseObject); - } - } - return nonWordVerseObjectCount; -}; - -/** - * step through verse objects extracting words - * @param {Array} verseObjects - original array of verse objects with words split - * @param {Array} newVerseObjects - new array of verse objects with words split - * @param {Array} wordMap - ordered map of word locations in verseObjects - * @param {String} verseText - text of the entire verse - * @param {Number} nonWordVerseObjectCount - keeps count of entries that are not actually words - * @return {Number} updated nonWordVerseObjectCount - */ -var getWordsFromNestedVerseObjects = function getWordsFromNestedVerseObjects(verseObjects, newVerseObjects, wordMap, verseText, nonWordVerseObjectCount) { - var voLength = verseObjects.length; - for (var i = 0; i < voLength; i++) { - var verseObject = verseObjects[i]; - var vsObjText = verseObject.text && verseObject.text.trim(); - if (verseObject.type !== 'text') { - // preseserve non-text verseObject except for text part which will be split into words - delete verseObject.text; - newVerseObjects.push(verseObject); - if (verseObject.children) { - var newChildVerseObjects = []; - nonWordVerseObjectCount = tokenizeText(vsObjText, newChildVerseObjects, wordMap, nonWordVerseObjectCount, verseText); - nonWordVerseObjectCount = getWordsFromNestedVerseObjects(verseObject.children, newChildVerseObjects, wordMap, verseText, nonWordVerseObjectCount); - verseObject.children = newChildVerseObjects; - } else { - nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText); - } - } else { - nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText); - } - } - return nonWordVerseObjectCount; -}; - -/** - * @description verseObjects with occurrences via string - * @param {String} string - The string to search in - * @return {{newVerseObjects: Array, wordMap: Array}} - clone of verseObjects and word map - */ -var getOrderedVerseObjectsFromString = exports.getOrderedVerseObjectsFromString = function getOrderedVerseObjectsFromString(string) { - var newVerseObjects = []; - var wordMap = []; - if (string) { - // convert string using usfm to JSON - var _verseObjects = _usfmJs2.default.toJSON('\\v 1 ' + string, { chunk: true }).verses["1"].verseObjects; - var _verseObjectsWithTextString = getVerseObjectsText(_verseObjects).join(' '); - - getWordsFromNestedVerseObjects(_verseObjects, newVerseObjects, wordMap, _verseObjectsWithTextString, 0); - } - return { newVerseObjects: newVerseObjects, wordMap: wordMap }; -}; - -/** - * @description Nests the milestons so that the first is the root and each after is nested - * @param {Array} milestones - an array of milestone objects - * @return {Object} - the nested milestone - */ -var nestMilestones = exports.nestMilestones = function nestMilestones(milestones) { - var _milestones = JSON.parse((0, _stringify2.default)(milestones)); - var milestone = void 0; - _milestones.reverse(); - _milestones.forEach(function (_milestone) { - if (milestone) { - // if the milestone was already there - _milestone.children = [milestone]; // nest the existing milestone as children - milestone = _milestone; // replace the milestone with this one - } else { - // if this is the first milestone, populate it - milestone = _milestone; - } - // next loop will use the resulting milestone to nest until no more milestones - }); - return milestone; -}; - -/** - * @description Converts a bottomWord to a verseObject of tag: w, type: word - * @param {WordObject} bottomWord - a wordObject to convert - * @param {string} textKey - key of the text in the bottom word object - * @return {Object} - a verseObject of tag: w, type: word - */ -var wordVerseObjectFromBottomWord = exports.wordVerseObjectFromBottomWord = function wordVerseObjectFromBottomWord(bottomWord) { - var textKey = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'word'; - return { - tag: "w", - type: "word", - text: bottomWord[textKey], - occurrence: bottomWord.occurrence, - occurrences: bottomWord.occurrences - }; -}; - -/** - * @description Converts a topWord to a verseObject of tag: w, type: word - * @param {WordObject} topWord - a wordObject to convert - * @return {Object} - a verseObject of tag: w, type: word - */ -var milestoneVerseObjectFromTopWord = exports.milestoneVerseObjectFromTopWord = function milestoneVerseObjectFromTopWord(topWord) { - var verseObject = JSON.parse((0, _stringify2.default)(topWord)); - verseObject.tag = "zaln"; - verseObject.type = "milestone"; - verseObject.content = topWord.word; - delete verseObject.word; - delete verseObject.tw; - return verseObject; -}; - -/** - * @description Converts a verseObject of tag: w, type: word into an alignmentObject - * @param {WordObject} verseObject - a wordObject to convert - * @return {Object} - an alignmentObject - */ -var alignmentObjectFromVerseObject = exports.alignmentObjectFromVerseObject = function alignmentObjectFromVerseObject(verseObject) { - var wordObject = JSON.parse((0, _stringify2.default)(verseObject)); - wordObject.word = wordObject.text || wordObject.content; - delete wordObject.content; - delete wordObject.text; - delete wordObject.tag; - delete wordObject.type; - delete wordObject.children; - return wordObject; -}; - -/** - * @description Returns index of the verseObject in the verseObjects (ignores occurrences since that can be off) - * @param {Array} wordMap - ordered map of word locations in verseObjects - * @param {Object} verseObject - verseObject to search for - * @return {Int} - the index of the verseObject - */ -var indexOfVerseObject = exports.indexOfVerseObject = function indexOfVerseObject(wordMap, verseObject) { - return wordMap.findIndex(function (wordItem) { - var _verseObject = wordItem.array[wordItem.pos]; - return _verseObject.text === verseObject.text && _verseObject.occurrence === verseObject.occurrence && _verseObject.type === verseObject.type && _verseObject.tag === verseObject.tag; - }); -}; - -/** - * extracts word objects from verse object. If verseObject is word type, return that in array, else if it is a - * milestone, then add words found in children to word array. If no words found return empty array. - * @param {object} verseObject - verse objects to have words extracted from - * @return {Array} words found - */ -var extractWordsFromVerseObject = exports.extractWordsFromVerseObject = function extractWordsFromVerseObject(verseObject) { - var words = []; - if ((typeof verseObject === 'undefined' ? 'undefined' : (0, _typeof3.default)(verseObject)) === 'object') { - if (verseObject.word || verseObject.type === 'word') { - words.push(verseObject); - } else if (verseObject.children) { - var _iteratorNormalCompletion2 = true; - var _didIteratorError2 = false; - var _iteratorError2 = undefined; - - try { - for (var _iterator2 = (0, _getIterator3.default)(verseObject.children), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { - var child = _step2.value; - - var childWords = extractWordsFromVerseObject(child); - words.push.apply(words, childWords); // fast concat arrays - } - } catch (err) { - _didIteratorError2 = true; - _iteratorError2 = err; - } finally { - try { - if (!_iteratorNormalCompletion2 && _iterator2.return) { - _iterator2.return(); - } - } finally { - if (_didIteratorError2) { - throw _iteratorError2; - } - } - } - } - } - return words; -}; - -/** - * @description merge verse data into a string - * @param {Object|Array} verseData - verse objects to be merged - * @param {array} filter - Optional filter to get a specific type of word object type. - * @return {String} - the merged verse object string - */ -var mergeVerseData = exports.mergeVerseData = function mergeVerseData(verseData, filter) { - if (verseData.verseObjects) { - verseData = verseData.verseObjects; - } - var verseArray = []; - verseData.forEach(function (part) { - if (typeof part === 'string') { - verseArray.push(part); - } - var words = [part]; - if (part.type === 'milestone') { - words = extractWordsFromVerseObject(part); - } - words.forEach(function (word) { - if (!filter || word.text && word.type && filter.includes(word.type)) { - verseArray.push(word.text); - } - }); - }); - var verseText = ''; - var _iteratorNormalCompletion3 = true; - var _didIteratorError3 = false; - var _iteratorError3 = undefined; - - try { - for (var _iterator3 = (0, _getIterator3.default)(verseArray), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { - var verse = _step3.value; - - if (verse) { - if (verseText && verseText[verseText.length - 1] !== '\n') { - verseText += ' '; - } - verseText += verse; - } - } - } catch (err) { - _didIteratorError3 = true; - _iteratorError3 = err; - } finally { - try { - if (!_iteratorNormalCompletion3 && _iterator3.return) { - _iterator3.return(); - } - } finally { - if (_didIteratorError3) { - throw _iteratorError3; - } - } - } - - return verseText; -}; - -/** - * extract list of word objects from array of verseObjects (will also search children of milestones). - * @param {Array} verseObjects - verse objects to search for word list from - * @return {Array} - words found - */ -var getWordListFromVerseObjectArray = exports.getWordListFromVerseObjectArray = function getWordListFromVerseObjectArray(verseObjects) { - var wordList = []; - var _iteratorNormalCompletion4 = true; - var _didIteratorError4 = false; - var _iteratorError4 = undefined; - - try { - for (var _iterator4 = (0, _getIterator3.default)(verseObjects), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { - var verseObject = _step4.value; - - var words = extractWordsFromVerseObject(verseObject); - wordList.push.apply(wordList, words); // fast concat arrays - } - } catch (err) { - _didIteratorError4 = true; - _iteratorError4 = err; - } finally { - try { - if (!_iteratorNormalCompletion4 && _iterator4.return) { - _iterator4.return(); - } - } finally { - if (_didIteratorError4) { - throw _iteratorError4; - } - } - } - - return wordList; -}; - -var addContentAttributeToChildren = function addContentAttributeToChildren(childrens, parentObject, grandParentObject) { - var childrensWithAttribute = []; - - for (var i = 0; i < childrens.length; i++) { - var child = childrens[i]; - if (child.children) { - child = addContentAttributeToChildren(child.children, child, parentObject); - } else if (!child.content && parentObject.content) { - var childrenContent = [parentObject]; - if (grandParentObject) childrenContent.push(grandParentObject); - child.content = childrenContent; - } - childrensWithAttribute.push(child); - } - - return childrensWithAttribute; -}; - -/** - * @description flatten verse objects from nested format to flat array - * @param {array} verse - source array of nested verseObjects - * @param {array} words - output array that will be filled with flattened verseObjects - */ -var flattenVerseObjects = function flattenVerseObjects(verse, words) { - for (var i = 0; i < verse.length; i++) { - var object = verse[i]; - if (object) { - if (object.type === 'word') { - object.strong = object.strong || object.strongs; - words.push(object); - } else if (object.type === 'milestone') { - // get children of milestone - // add content attibute to children - var newObject = addContentAttributeToChildren(object.children, object); - flattenVerseObjects(newObject, words); - } else { - words.push(object); - } - } - } -}; - -/** - * @description returns a flat array of VerseObjects (currently needed for rendering UGNT since words may be nested in milestones) - * @param {Object|Array} verse - verseObjects that need to be flattened. - * @return {array} wordlist - flat array of VerseObjects - */ -var getWordListForVerse = exports.getWordListForVerse = function getWordListForVerse(verse) { - var words = []; - if (verse.verseObjects) { - flattenVerseObjects(verse.verseObjects, words); - } else { - // already a flat word list - words = verse; - } - return words; -}; - -/** Method to filter usfm markers from a string or verseObjects array - * @param {String|Array|Object} verseObjects - The string to remove markers from - * @return {Array} - Array without usfm markers - */ -var getWordList = exports.getWordList = function getWordList(verseObjects) { - var wordList = []; - if (typeof verseObjects === 'string') { - var _getOrderedVerseObjec = getOrderedVerseObjectsFromString(verseObjects), - newVerseObjects = _getOrderedVerseObjec.newVerseObjects; - - verseObjects = newVerseObjects; - } - if (verseObjects && verseObjects.verseObjects) { - verseObjects = verseObjects.verseObjects; - } - - if (verseObjects) { - wordList = getWordListFromVerseObjectArray(verseObjects); - } - return wordList; -}; - -/** - * @description test to see if this is the same milestone (needed when milestones are not contiguous) - * @param {Object} a - First milestone to test - * @param {Object} b - Second milestone to test - * @return {boolean} true if same milestone - */ -var sameMilestone = exports.sameMilestone = function sameMilestone(a, b) { - var same = a.type === b.type && a.content === b.content && a.occurrence === b.occurrence; - return same; -}; - -/** - * @description adds verse object to alignment - * @param {Object} verseObject - Verse object to be added - * @param {Object} alignment - The alignment object that will be added to - */ -var addVerseObjectToAlignment = exports.addVerseObjectToAlignment = function addVerseObjectToAlignment(verseObject, alignment) { - if (verseObject.type === 'milestone' && verseObject.children.length > 0) { - /** @type{WordObject} */ - var wordObject = alignmentObjectFromVerseObject(verseObject); - var duplicate = alignment.topWords.find(function (obj) { - return obj.word === wordObject.word && obj.occurrence === wordObject.occurrence; - }); - if (!duplicate) { - alignment.topWords.push(wordObject); - } - verseObject.children.forEach(function (_verseObject) { - addVerseObjectToAlignment(_verseObject, alignment); - }); - } else if (verseObject.type === 'word' && !verseObject.children) { - /** @type{WordObject} */ - var _wordObject2 = alignmentObjectFromVerseObject(verseObject); - alignment.bottomWords.push(_wordObject2); - } -}; - -/** - * Concatenates an array of words into a verse. - * @param {array} verseArray - array of strings in a verse. - * @return {string} combined verse - */ -var combineVerseArray = exports.combineVerseArray = function combineVerseArray(verseArray) { - return verseArray.map(function (o) { - return getWordText(o); - }).join(' '); -}; - -/** - * create an array of word objects with occurrence(s) - * @param {[WordObject]} words - List of words without occurrences - * @return {[WordObject]} - array of wordObjects - */ -var populateOccurrencesInWordObjects = exports.populateOccurrencesInWordObjects = function populateOccurrencesInWordObjects(words) { - words = getWordList(words); - var index = 0; // only count verseObject words - return words.map(function (wordObject) { - var wordText = getWordText(wordObject); - if (wordText) { - // if verseObject is word - wordObject.occurrence = getOccurrence(words, index++, wordText); - wordObject.occurrences = getOccurrences(words, wordText); - return wordObject; - } - return null; - }).filter(function (wordObject) { - return wordObject !== null; - }); -}; - -/** - * @description wordObjectArray via string - * @param {String} string - The string to search in - * @return {[WordObject]} - array of wordObjects - */ -var wordObjectArrayFromString = exports.wordObjectArrayFromString = function wordObjectArrayFromString(string) { - var wordObjectArray = _stringPunctuationTokenizer2.default.tokenize(string).map(function (word, index) { - var occurrence = _stringPunctuationTokenizer2.default.occurrenceInString(string, index, word); - var occurrences = _stringPunctuationTokenizer2.default.occurrencesInString(string, word); - return { - word: word, - occurrence: occurrence, - occurrences: occurrences - }; - }); - return wordObjectArray; -}; - -/** - * @description sorts wordObjectArray via string - * @param {[WordObject]} wordObjectArray - array of wordObjects - * @param {string|[VerseObject]|VerseObject} stringData - The string to search in - * @return {[WordObject]} - sorted array of wordObjects - */ -var sortWordObjectsByString = exports.sortWordObjectsByString = function sortWordObjectsByString(wordObjectArray, stringData) { - if (stringData.verseObjects) { - stringData = populateOccurrencesInWordObjects(stringData.verseObjects); - } else if (Array.isArray(stringData)) { - stringData = populateOccurrencesInWordObjects(stringData); - } else { - stringData = wordObjectArrayFromString(stringData); - } - var _wordObjectArray = wordObjectArray.map(function (wordObject) { - var word = wordObject.word, - occurrence = wordObject.occurrence, - occurrences = wordObject.occurrences; - - var _wordObject = { - word: word, - occurrence: occurrence, - occurrences: occurrences - }; - var indexInString = stringData.findIndex(function (object) { - var equal = getWordText(object) === getWordText(_wordObject) && object.occurrence === _wordObject.occurrence && object.occurrences === _wordObject.occurrences; - return equal; - }); - wordObject.index = indexInString; - return wordObject; - }); - _wordObjectArray = _wordObjectArray.sort(function (a, b) { - return a.index - b.index; - }); - _wordObjectArray = _wordObjectArray.map(function (wordObject) { - delete wordObject.index; - return wordObject; - }); - return _wordObjectArray; -}; - -/** - * Helper method to grab only verse objects or childen of verse objects but - * not grab verse objects containing children. - * i.e. given {a:1, b:{2, children:{2a, 2b}} returns 1, 2a, 2b (skips 2) - * - * @param {[VerseObject]} verseObjects - Objects containing data for the words such as - * occurences, occurence, tag, text and type - * @return {[WordObject]} - same format as input, except objects containing childern - * get flatten to top level - */ -var getWordsFromVerseObjects = exports.getWordsFromVerseObjects = function getWordsFromVerseObjects(verseObjects) { - var wordObjects = verseObjects.map(function (versebject) { - if (versebject.children) { - return getWordsFromVerseObjects(versebject.children); - } - return versebject; - }); - return ArrayUtils.flattenArray(wordObjects); -}; \ No newline at end of file diff --git a/package.json b/package.json index 616167d..2d4cb1a 100644 --- a/package.json +++ b/package.json @@ -1,13 +1,16 @@ { "name": "word-aligner", - "version": "0.2.1", + "version": "0.2.2", "description": "A library for handling word alignment", "main": "lib/index.js", "scripts": { "test": "eslint ./src ./index.js && jest", "fix": "eslint ./src ./index.js --fix", "compile": "rimraf lib && babel src/ -d lib/", - "prepublish": "npm run compile" + "prebuild": "rm -rf ./lib", + "build": "babel ./src -d ./lib", + "prepare": "if [ ! -d './lib/' ]; then npm run build; fi", + "prepublishOnly": "npm test && npm run build" }, "jest": { "collectCoverageFrom": [ @@ -55,6 +58,6 @@ "rimraf": "^2.6.2", "string-punctuation-tokenizer": "0.9.1", "transform-runtime": "0.0.0", - "usfm-js": "^1.0.4" + "usfm-js": "^1.0.6" } } diff --git a/src/js/aligner.js b/src/js/aligner.js index b034318..79526f3 100644 --- a/src/js/aligner.js +++ b/src/js/aligner.js @@ -163,7 +163,9 @@ export function verseStringWordsContainedInAlignments( * @return {Object} previous Alignment if found - else null. */ const getAlignmentForMilestone = (baseMilestones, newMilestone) => { - for (let baseMilestone of baseMilestones) { + const length = baseMilestones.length; + for (let i = 0; i < length; i++) { + const baseMilestone = baseMilestones[i]; if (baseMilestone.alignment && VerseObjectUtils.sameMilestone(baseMilestone.milestone, newMilestone)) { return baseMilestone.alignment; @@ -216,15 +218,19 @@ export const indexOfFirstMilestone = (alignments, verseObject) => { * @description returns index of the verseObject in the alignments milestone (ignores occurrences since that can be off) * @param {Array} alignments - array of the alignments to search in * @param {Object} verseObject - verseObject to search for - * @return {Int} - the index of the verseObject + * @return {Number} - the index of the verseObject */ export const indexOfMilestone = (alignments, verseObject) => { let index = -1; if (verseObject.type === 'word') { index = alignments.findIndex(alignment => { - for (let _verseObject of alignment.topWords) { + const length = alignment.topWords.length; + for (let i = 0; i < length; i++) { + const _verseObject = alignment.topWords[i]; if (_verseObject.word === verseObject.text) { - return compareOccurrences(_verseObject, verseObject); + if (compareOccurrences(_verseObject, verseObject)) { + return true; + } } } return false; @@ -251,18 +257,21 @@ export const orderAlignments = function(alignmentVerse, alignmentUnOrdered) { if (Array.isArray(wordMap)) { let alignment = []; // order alignments - for (let i = 0; i < wordMap.length; i++) { + const wmLen = wordMap.length; + for (let i = 0; i < wmLen; i++) { const location = wordMap[i]; const nextWord = location.array[location.pos]; let index = indexOfFirstMilestone(alignmentUnOrdered, nextWord); - if ((index < 0) && (nextWord.type === 'word') && (i < wordMap.length - 1)) { + if ((index < 0) && (nextWord.type === 'word') && (i < wmLen - 1)) { const verseObjectAfter = location.array[location.pos + 1]; if (verseObjectAfter.type === 'text') { // maybe this was punctuation split from word const originalText = nextWord.text; - nextWord.text += verseObjectAfter.text; // add possible punctuation + nextWord.text += verseObjectAfter.text.substr(0, 1); // add possible punctuation index = indexOfFirstMilestone(alignmentUnOrdered, nextWord); // try again if (index < 0) { nextWord.text = originalText; // restore original text if not a match + } else { + verseObjectAfter.text = verseObjectAfter.text.substr(1, 0); // remove punctuation } } } diff --git a/src/js/utils/verseObjects.js b/src/js/utils/verseObjects.js index 8615dec..807adbb 100644 --- a/src/js/utils/verseObjects.js +++ b/src/js/utils/verseObjects.js @@ -128,6 +128,32 @@ const getVerseObjectsText = verseObjects => { return texts; }; +/** + * make sure we pick up extra white space between tokens + * @param {string} text - string to tokenize + * @param {Number} lastPos - position of end of last token + * @param {Number} pos - position to grab up to + * @param {Array} newVerseObjects - nested verse objects + * @param {Boolean} end - if true, then at end of line + * @return {{lastPos: *, verseObject: *}} - new verse object and updated position + */ +const fillGap = (text, lastPos, pos, newVerseObjects, end = false) => { + let verseObject = null; + const gap = text.substring(lastPos, pos); + const lastVerseObject = newVerseObjects.length && newVerseObjects[newVerseObjects.length - 1]; + if (lastVerseObject && (lastVerseObject.type === 'text')) { // append to previous text + lastVerseObject.text += gap; + } else if (end || (gap !== ' ')) { // if not default single space, then save gap + verseObject = { + type: "text", + text: gap + }; + newVerseObjects.push(verseObject); + } + lastPos += gap.length; + return lastPos; +}; + /** * parse text into tokens * @param {string} text - string to tokenize @@ -141,9 +167,14 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v if (text) { const tokens = tokenizer.tokenizeWithPunctuation(text); const tokenLength = tokens.length; + let verseObject; + let lastPos = 0; for (let j = 0; j < tokenLength; j++) { const word = tokens[j]; - let verseObject; + const pos = text.indexOf(word, lastPos); + if (pos > lastPos) { // make sure we are not dropping white space + lastPos = fillGap(text, lastPos, pos, newVerseObjects); + } if (tokenizer.word.test(word)) { // if the text has word characters, its a word object const wordIndex = wordMap.length; let occurrence = tokenizer.occurrenceInString( @@ -169,8 +200,12 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v text: word }; } + lastPos += word.length; newVerseObjects.push(verseObject); } + if (lastPos < text.length) { + lastPos = fillGap(text, lastPos, text.length, newVerseObjects, true); + } } return nonWordVerseObjectCount; }; @@ -188,7 +223,7 @@ const getWordsFromNestedVerseObjects = (verseObjects, newVerseObjects, wordMap, const voLength = verseObjects.length; for (let i = 0; i < voLength; i++) { const verseObject = verseObjects[i]; - let vsObjText = verseObject.text && verseObject.text.trim(); + let vsObjText = verseObject.text; if ((verseObject.type !== 'text')) { // preseserve non-text verseObject except for text part which will be split into words delete verseObject.text;