diff --git a/__tests__/AlignmentHelpers.test.js b/__tests__/AlignmentHelpers.test.js index c52ecf9..f994351 100644 --- a/__tests__/AlignmentHelpers.test.js +++ b/__tests__/AlignmentHelpers.test.js @@ -1,46 +1,11 @@ +/* eslint-disable no-use-before-define */ /* eslint-env jest */ import fs from 'fs-extra'; import path from 'path'; +import usfmjs from 'usfm-js'; jest.unmock('fs-extra'); import wordaligner, {VerseObjectUtils} from '../src/'; const RESOURCES = path.join('__tests__', 'fixtures', 'pivotAlignmentVerseObjects'); -/** - * Reads a usfm file from the resources dir - * @param {string} filename relative path to usfm file - * @return {Object} - The read JSON object - */ -const readJSON = filename => { - const fullPath = path.join(RESOURCES, filename); - if (fs.existsSync(fullPath)) { - const json = fs.readJsonSync(fullPath); - return json; - } - console.log('File not found.'); - return false; -}; - -/** - * Generator for testing merging of alignment into verseObjects - * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` - */ -const mergeTest = (name = {}) => { - const json = readJSON(`${name}.json`); - expect(json).toBeTruthy(); - const {alignment, verseObjects, verseString, wordBank} = json; - const output = wordaligner.merge(alignment, wordBank, verseString); - expect(output).toEqual(verseObjects); -}; -/** - * Generator for testing unmerging of alignment from verseObjects - * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` - */ -const unmergeTest = (name = {}) => { - const json = readJSON(`${name}.json`); - expect(json).toBeTruthy(); - const {verseObjects, alignment, wordBank, alignedVerseString} = json; - const output = wordaligner.unmerge(verseObjects, alignedVerseString); - expect(output).toEqual({alignment, wordBank}); -}; describe("Merge Alignment into Verse Objects", () => { it('handles one to one', () => { @@ -161,6 +126,15 @@ describe("UnMerge Alignment from Verse Objects", () => { }); }); +describe("export USFM3 from Verse Objects", () => { + it('handles acts-1-11', () => { + exportTest('acts-1-11'); + }); + it('handles acts 1-4', () => { + exportTest('acts-1-4'); + }); +}); + describe('wordaligner.generateBlankAlignments', () => { const createEmptyAlignment = function(verseObjects) { let wordList = VerseObjectUtils.getWordList(verseObjects); @@ -248,8 +222,90 @@ describe('wordaligner.generateWordBank', () => { // then expect(results).toEqual(wordBank); }); - - // - // helpers - // }); + +// +// helpers +// + +/** + * Reads a json file from the resources dir + * @param {string} filename relative path to usfm file + * @return {Object} - The read JSON object + */ +const readJSON = filename => { + const fullPath = path.join(RESOURCES, filename); + if (fs.existsSync(fullPath)) { + const json = fs.readJsonSync(fullPath); + return json; + } + console.log('File not found.'); + return false; +}; + +/** + * Reads a usfm file from the resources dir + * @param {string} filename relative path to usfm file + * @return {Object} - The read JSON object + */ +const readUSFM = filename => { + const fullPath = path.join(RESOURCES, filename); + if (fs.existsSync(fullPath)) { + const usfm = fs.readFileSync(fullPath, 'UTF-8').toString(); + return usfm; + } + console.log('File not found.'); + return false; +}; + +/** + * Generator for testing merging of alignment into verseObjects + * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` + */ +const mergeTest = (name = {}) => { + const json = readJSON(`${name}.json`); + expect(json).toBeTruthy(); + const {alignment, verseObjects, verseString, wordBank} = json; + const output = wordaligner.merge(alignment, wordBank, verseString); + expect(output).toEqual(verseObjects); +}; + +/** + * Generator for testing unmerging of alignment from verseObjects + * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` + */ +const unmergeTest = (name = {}) => { + const json = readJSON(`${name}.json`); + expect(json).toBeTruthy(); + const {verseObjects, alignment, wordBank, alignedVerseString} = json; + const output = wordaligner.unmerge(verseObjects, alignedVerseString); + expect(output).toEqual({alignment, wordBank}); +}; + +/** + * Generator for testing merging of alignment into verseObjects + * @param {string} name - the name of the test files to use. e.g. `valid` will test `valid.usfm` to `valid.json` + */ +const exportTest = (name = {}) => { + const json = readJSON(`${name}.json`); + expect(json).toBeTruthy(); + const expectedUsfm = readUSFM(`${name}.usfm`); + expect(expectedUsfm).toBeTruthy(); + const {alignment, verseString, wordBank} = json; + const output = wordaligner.merge(alignment, wordBank, verseString); + const outputData = { + chapters: {}, + headers: [], + verses: { + 1: output + } + }; + let usfm = usfmjs.toUSFM(outputData, {chunk: true}); + const split = usfm.split("\\v 1"); + usfm = split.length > 1 ? split[1] : ""; + if (usfm.substr(0, 1) === ' ') { + usfm = usfm.substr(1); + } + expect(usfm).toEqual(expectedUsfm); +}; + diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json index 27acab0..973e3a4 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.json @@ -1,6 +1,6 @@ { "alignedVerseString": "οἳ καὶ εἶπαν, “ ἄνδρες, Γαλιλαῖοι, τί ἑστήκατε βλέποντες εἰς τὸν οὐρανόν? οὗτος ὁ Ἰησοῦς ὁ ἀναλημφθεὶς ἀφ’ ὑμῶν εἰς τὸν οὐρανὸν, οὕτως ἐλεύσεται ὃν τρόπον ἐθεάσασθε αὐτὸν πορευόμενον εἰς τὸν οὐρανόν.”", - "verseString": "And they said,\"Men of Galilee, why do you stand looking into heaven? This Jesus who has been taken up from you into heaven, will likewise return in the same manner as you saw him going into heaven.\"\\p", + "verseString": "And they said,\"Men of Galilee, why do you stand looking into heaven? This Jesus who has been taken up from you into heaven, will likewise return in the same manner as you saw him going into heaven.\"\n\\p", "alignment": [ { "topWords": [ @@ -1273,7 +1273,7 @@ "type": "text" }, { - "text": "\"", + "text": "\"\n", "type": "text" }, { diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.usfm b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.usfm new file mode 100644 index 0000000..2db9cf1 --- /dev/null +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-11.usfm @@ -0,0 +1,104 @@ + +\zaln-s | x-strong="G25320" x-lemma="καί" x-morph="Gr,D,,,,,,,,," x-occurrence="1" x-occurrences="1" x-content="καὶ" +\w And|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G37390" x-lemma="ὅς" x-morph="Gr,RR,,,,NMP," x-occurrence="1" x-occurrences="1" x-content="οἳ" +\w they|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G30040" x-lemma="λέγω" x-morph="Gr,V,IAA3,,P," x-occurrence="1" x-occurrences="1" x-content="εἶπαν" +\w said|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*," +\zaln-s | x-strong="G04350" x-lemma="ἀνήρ" x-morph="Gr,N,,,,,VMP," x-occurrence="1" x-occurrences="1" x-content="ἄνδρες" +\w Men|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G10570" x-lemma="Γαλιλαῖος" x-morph="Gr,NS,,,,VMP," x-occurrence="1" x-occurrences="1" x-content="Γαλιλαῖοι" +\w of|x-occurrence="1" x-occurrences="1"\w* +\w Galilee|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*, +\zaln-s | x-strong="G51010" x-lemma="τίς" x-morph="Gr,RT,,,,ANS," x-occurrence="1" x-occurrences="1" x-content="τί" +\w why|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G24760" x-lemma="ἵστημι" x-morph="Gr,V,IEA2,,P," x-occurrence="1" x-occurrences="1" x-content="ἑστήκατε" +\w do|x-occurrence="1" x-occurrences="1"\w* +\w you|x-occurrence="1" x-occurrences="3"\w* +\w stand|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G09910" x-lemma="βλέπω" x-morph="Gr,V,PPA,NMP," x-occurrence="1" x-occurrences="1" x-content="βλέποντες" +\w looking|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G15190" x-lemma="εἰς" x-morph="Gr,P,,,,,A,,," x-occurrence="1" x-occurrences="3" x-content="εἰς" +\w into|x-occurrence="1" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,AMS," x-occurrence="1" x-occurrences="3" x-content="τὸν" +\zaln-s | x-strong="G37720" x-lemma="οὐρανός" x-morph="Gr,N,,,,,AMS," x-occurrence="1" x-occurrences="2" x-content="οὐρανόν" +\w heaven|x-occurrence="1" x-occurrences="3"\w* +\zaln-e\* +\zaln-e\*? +\zaln-s | x-strong="G37780" x-lemma="οὗτος" x-morph="Gr,ED,,,,NMS," x-occurrence="1" x-occurrences="1" x-content="οὗτος" +\w This|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,NMS," x-occurrence="1" x-occurrences="2" x-content="ὁ" +\zaln-s | x-strong="G24240" x-lemma="Ἰησοῦς" x-morph="Gr,N,,,,,NMS," x-occurrence="1" x-occurrences="1" x-content="Ἰησοῦς" +\w Jesus|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,RD,,,,NMS," x-occurrence="2" x-occurrences="2" x-content="ὁ" +\zaln-s | x-strong="G03530" x-lemma="ἀναλαμβάνω" x-morph="Gr,V,PAP,NMS," x-occurrence="1" x-occurrences="1" x-content="ἀναλημφθεὶς" +\w who|x-occurrence="1" x-occurrences="1"\w* +\w has|x-occurrence="1" x-occurrences="1"\w* +\w been|x-occurrence="1" x-occurrences="1"\w* +\w taken|x-occurrence="1" x-occurrences="1"\w* +\w up|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-e\* +\zaln-s | x-strong="G05750" x-lemma="ἀπό" x-morph="Gr,P,,,,,G,,," x-occurrence="1" x-occurrences="1" x-content="ἀφ’" +\w from|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G47710" x-lemma="σύ" x-morph="Gr,RP,,,2G,P," x-occurrence="1" x-occurrences="1" x-content="ὑμῶν" +\w you|x-occurrence="2" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G15190" x-lemma="εἰς" x-morph="Gr,P,,,,,A,,," x-occurrence="2" x-occurrences="3" x-content="εἰς" +\w into|x-occurrence="2" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,AMS," x-occurrence="2" x-occurrences="3" x-content="τὸν" +\zaln-s | x-strong="G37720" x-lemma="οὐρανός" x-morph="Gr,N,,,,,AMS," x-occurrence="1" x-occurrences="1" x-content="οὐρανὸν" +\w heaven|x-occurrence="2" x-occurrences="3"\w* +\zaln-e\* +\zaln-e\*, +\zaln-s | x-strong="G20640" x-lemma="ἔρχομαι" x-morph="Gr,V,IFM3,,S," x-occurrence="1" x-occurrences="1" x-content="ἐλεύσεται" +\w will|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G37790" x-lemma="οὕτως" x-morph="Gr,D,,,,,,,,," x-occurrence="1" x-occurrences="1" x-content="οὕτως" +\w likewise|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G20640" x-lemma="ἔρχομαι" x-morph="Gr,V,IFM3,,S," x-occurrence="1" x-occurrences="1" x-content="ἐλεύσεται" +\w return|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G37390" x-lemma="ὅς" x-morph="Gr,ER,,,,AMS," x-occurrence="1" x-occurrences="1" x-content="ὃν" +\w in|x-occurrence="1" x-occurrences="1"\w* +\w the|x-occurrence="1" x-occurrences="1"\w* +\w same|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G51580" x-lemma="τρόπος" x-morph="Gr,N,,,,,AMS," x-occurrence="1" x-occurrences="1" x-content="τρόπον" +\w manner|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G23000" x-lemma="θεάομαι" x-morph="Gr,V,IAM2,,P," x-occurrence="1" x-occurrences="1" x-content="ἐθεάσασθε" +\w as|x-occurrence="1" x-occurrences="1"\w* +\w you|x-occurrence="3" x-occurrences="3"\w* +\w saw|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G08460" x-lemma="αὐτός" x-morph="Gr,RP,,,3AMS," x-occurrence="1" x-occurrences="1" x-content="αὐτὸν" +\w him|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G41980" x-lemma="πορεύω" x-morph="Gr,V,PPM,AMS," x-occurrence="1" x-occurrences="1" x-content="πορευόμενον" +\w going|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-strong="G15190" x-lemma="εἰς" x-morph="Gr,P,,,,,A,,," x-occurrence="3" x-occurrences="3" x-content="εἰς" +\w into|x-occurrence="3" x-occurrences="3"\w* +\zaln-e\* +\zaln-s | x-strong="G35880" x-lemma="ὁ" x-morph="Gr,EA,,,,AMS," x-occurrence="3" x-occurrences="3" x-content="τὸν" +\zaln-s | x-strong="G37720" x-lemma="οὐρανός" x-morph="Gr,N,,,,,AMS," x-occurrence="2" x-occurrences="2" x-content="οὐρανόν" +\w heaven|x-occurrence="3" x-occurrences="3"\w* +\zaln-e\* +\zaln-e\*." +\p \ No newline at end of file diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.usfm b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.usfm new file mode 100644 index 0000000..872a6e2 --- /dev/null +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/acts-1-4.usfm @@ -0,0 +1,68 @@ + +\zaln-s | x-lemma="καί" x-morph="Gr,CC,,,,,,,," x-occurrence="1" x-occurrences="1" x-strong="G25320" x-content="καὶ" +\w When|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="συναλίζω" x-morph="Gr,V,PPM,NMS," x-occurrence="1" x-occurrences="1" x-strong="G48710" x-content="συναλιζόμενος" +\w he|x-occurrence="1" x-occurrences="3"\w* +\w was|x-occurrence="1" x-occurrences="1"\w* +\w meeting|x-occurrence="1" x-occurrences="1"\w* +\w together|x-occurrence="1" x-occurrences="1"\w* +\w with|x-occurrence="1" x-occurrences="1"\w* +\w them|x-occurrence="1" x-occurrences="2"\w* +\zaln-e\*, +\zaln-s | x-lemma="παραγγέλλω" x-morph="Gr,V,IAA3,,S," x-occurrence="1" x-occurrences="1" x-strong="G38530" x-content="παρήγγειλεν" +\w he|x-occurrence="2" x-occurrences="3"\w* +\w commanded|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="αὐτός" x-morph="Gr,RP,,,3DMP," x-occurrence="1" x-occurrences="1" x-strong="G08460" x-content="αὐτοῖς" +\w them|x-occurrence="2" x-occurrences="2"\w* +\zaln-e\* +\zaln-s | x-lemma="μή" x-morph="Gr,DO,,,,,,,," x-occurrence="1" x-occurrences="1" x-strong="G33610" x-content="μὴ" +\w not|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="χωρίζω" x-morph="Gr,V,NPP,,,,," x-occurrence="1" x-occurrences="1" x-strong="G55630" x-content="χωρίζεσθαι" +\w to|x-occurrence="1" x-occurrences="2"\w* +\w leave|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ἀπό" x-morph="Gr,P,,,,,G,,," x-occurrence="1" x-occurrences="1" x-strong="G05750" x-content="ἀπὸ" +\zaln-s | x-lemma="Ἱεροσόλυμα" x-morph="Gr,N,,,,,GNP," x-occurrence="1" x-occurrences="1" x-strong="G24140" x-content="Ἱεροσολύμων" +\w Jerusalem|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-e\*, +\zaln-s | x-lemma="ἀλλά" x-morph="Gr,CO,,,,,,,," x-occurrence="1" x-occurrences="1" x-strong="G02350" x-content="ἀλλὰ" +\w but|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="περιμένω" x-morph="Gr,V,NPA,,,,," x-occurrence="1" x-occurrences="1" x-strong="G40370" x-content="περιμένειν" +\w to|x-occurrence="2" x-occurrences="2"\w* +\w wait|x-occurrence="1" x-occurrences="1"\w* +\w for|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ὁ" x-morph="Gr,EA,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G35880" x-content="τὴν" +\w the|x-occurrence="1" x-occurrences="2"\w* +\zaln-e\* +\zaln-s | x-lemma="ἐπαγγελία" x-morph="Gr,N,,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G18600" x-content="ἐπαγγελίαν" +\w promise|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ὁ" x-morph="Gr,EA,,,,GMS," x-occurrence="1" x-occurrences="1" x-strong="G35880" x-content="τοῦ" +\w of|x-occurrence="1" x-occurrences="1"\w* +\w the|x-occurrence="2" x-occurrences="2"\w* +\zaln-e\* +\zaln-s | x-lemma="πατήρ" x-morph="Gr,N,,,,,GMS," x-occurrence="1" x-occurrences="1" x-strong="G39620" x-content="Πατρὸς" +\w Father|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*, +\zaln-s | x-lemma="ὅς" x-morph="Gr,RR,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G37390" x-content="ἣν" +\w about|x-occurrence="1" x-occurrences="1"\w* +\w which|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*, +\zaln-s | x-lemma="ὅς" x-morph="Gr,RR,,,,AFS," x-occurrence="1" x-occurrences="1" x-strong="G37390" x-content="ἣν" +\w he|x-occurrence="3" x-occurrences="3"\w* +\w said|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\*,\qt-s |who="Jesus"\*" +\zaln-s | x-lemma="ἐγώ" x-morph="Gr,RP,,,1G,S," x-occurrence="1" x-occurrences="1" x-strong="G14730" x-content="μου" +\w You|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* +\zaln-s | x-lemma="ἀκούω" x-morph="Gr,V,IAA2,,P," x-occurrence="1" x-occurrences="1" x-strong="G01910" x-content="ἠκούσατέ" +\w heard|x-occurrence="1" x-occurrences="1"\w* +\w from|x-occurrence="1" x-occurrences="1"\w* +\w me|x-occurrence="1" x-occurrences="1"\w* +\zaln-e\* diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json b/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json index 250e98a..0a394e3 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/matt1-1.json @@ -250,7 +250,7 @@ }, { "type": "text", - "text": "." + "text": ". " }, { "tag": "f", diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json b/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json index 7e9feee..708d9b9 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/tit1-12.json @@ -1,6 +1,6 @@ { "alignedVerseString": "εἶπέν τις ἐξ αὐτῶν, ἴδιος αὐτῶν προφήτης, “Κρῆτες ἀεὶ ψεῦσται, κακὰ θηρία, γαστέρες ἀργαί.”", - "verseString": "One of their own prophets has said, \"Cretans are always liars, evil beasts, lazy gluttons.\"", + "verseString": "One of their own prophets has said, \"Cretans are always liars, evil beasts, lazy gluttons.\"\n\\p", "verseObjects": [ { "children": [ @@ -312,8 +312,12 @@ "type": "text" }, { - "text": "\"", + "text": "\"\n", "type": "text" + }, + { + "tag": "p", + "type": "paragraph" } ], "alignment": [ diff --git a/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json b/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json index 4d2e265..1c7933a 100644 --- a/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json +++ b/__tests__/fixtures/pivotAlignmentVerseObjects/v_1ti3-16.json @@ -165,7 +165,7 @@ }, { "type": "text", - "text": ":" + "text": ":\n" }, { "tag": "q", @@ -260,7 +260,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -339,7 +339,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -399,7 +399,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -471,7 +471,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -557,7 +557,7 @@ }, { "type": "text", - "text": "," + "text": ",\n" }, { "tag": "q", @@ -659,7 +659,7 @@ }, { "type": "text", - "text": "\"" + "text": "\"\n" }, { "tag": "s5", diff --git a/src/js/utils/verseObjects.js b/src/js/utils/verseObjects.js index ab23a42..807adbb 100644 --- a/src/js/utils/verseObjects.js +++ b/src/js/utils/verseObjects.js @@ -128,6 +128,32 @@ const getVerseObjectsText = verseObjects => { return texts; }; +/** + * make sure we pick up extra white space between tokens + * @param {string} text - string to tokenize + * @param {Number} lastPos - position of end of last token + * @param {Number} pos - position to grab up to + * @param {Array} newVerseObjects - nested verse objects + * @param {Boolean} end - if true, then at end of line + * @return {{lastPos: *, verseObject: *}} - new verse object and updated position + */ +const fillGap = (text, lastPos, pos, newVerseObjects, end = false) => { + let verseObject = null; + const gap = text.substring(lastPos, pos); + const lastVerseObject = newVerseObjects.length && newVerseObjects[newVerseObjects.length - 1]; + if (lastVerseObject && (lastVerseObject.type === 'text')) { // append to previous text + lastVerseObject.text += gap; + } else if (end || (gap !== ' ')) { // if not default single space, then save gap + verseObject = { + type: "text", + text: gap + }; + newVerseObjects.push(verseObject); + } + lastPos += gap.length; + return lastPos; +}; + /** * parse text into tokens * @param {string} text - string to tokenize @@ -147,19 +173,7 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v const word = tokens[j]; const pos = text.indexOf(word, lastPos); if (pos > lastPos) { // make sure we are not dropping white space - const gap = text.substring(lastPos, pos); - const lastVerseObject = newVerseObjects.length && newVerseObjects[newVerseObjects.length - 1]; - if (lastVerseObject && (lastVerseObject.type === 'text')) { // append to previous text - lastVerseObject.text += gap; - } else - if (gap !== ' ') { // if not default single space, then save gap - verseObject = { - type: "text", - text: gap - }; - newVerseObjects.push(verseObject); - } - lastPos += gap.length; + lastPos = fillGap(text, lastPos, pos, newVerseObjects); } if (tokenizer.word.test(word)) { // if the text has word characters, its a word object const wordIndex = wordMap.length; @@ -189,6 +203,9 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v lastPos += word.length; newVerseObjects.push(verseObject); } + if (lastPos < text.length) { + lastPos = fillGap(text, lastPos, text.length, newVerseObjects, true); + } } return nonWordVerseObjectCount; }; @@ -206,7 +223,7 @@ const getWordsFromNestedVerseObjects = (verseObjects, newVerseObjects, wordMap, const voLength = verseObjects.length; for (let i = 0; i < voLength; i++) { const verseObject = verseObjects[i]; - let vsObjText = verseObject.text && verseObject.text.trim(); + let vsObjText = verseObject.text; if ((verseObject.type !== 'text')) { // preseserve non-text verseObject except for text part which will be split into words delete verseObject.text;