diff --git a/src/AssemblyInfo.props b/src/AssemblyInfo.props index d0ba837b..a481f809 100644 --- a/src/AssemblyInfo.props +++ b/src/AssemblyInfo.props @@ -1,9 +1,9 @@ - 3.2.3 + 3.2.4 $(Version)-$(VersionSuffix) - 3.2.3.0 - 3.2.3.0 + 3.2.4.0 + 3.2.4.0 SIL International SIL International Machine diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index da66b34f..7d9e3391 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -68,7 +68,10 @@ string pubNumber } else { - EndVerseText(state); + if (CurrentTextType == ScriptureTextType.NonVerse) + EndNonVerseText(state); + else + EndVerseText(state); UpdateVerseRef(state.VerseRef, marker); StartVerseText(state); } diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index 030580b7..b148f391 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -36,8 +36,25 @@ protected override IEnumerable GetVersesInDocOrder() { string usfm = ReadUsfm(); var rowCollector = new TextRowCollector(this); + + var tokenizer = new UsfmTokenizer(_stylesheet); + IReadOnlyList tokens; + try + { + tokens = tokenizer.Tokenize(usfm, _includeMarkers); + } + catch (Exception ex) + { + var sb = new StringBuilder(); + sb.Append($"An error occurred while tokenizing the text '{Id}`"); + if (!string.IsNullOrEmpty(Project)) + sb.Append($" in project '{Project}'"); + sb.Append($". Error: '{ex.Message}'"); + throw new InvalidOperationException(sb.ToString(), ex); + } + var parser = new UsfmParser( - usfm, + tokens, rowCollector, _stylesheet, Versification, diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index e1b25c76..b046be22 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -70,6 +70,27 @@ public void GetRows_DuplicateVerseWithTable() Assert.That(rows, Has.Length.EqualTo(5)); } + [Test] + public void GetRows_VersePara_BeginningNonVerseSegment() + { + // a verse paragraph that begins with a non-verse segment followed by a verse segment + TextRow[] rows = GetRows( + @"\id MAT - Test +\c 1 +\q1 +\f \fr 119 \ft World \f* +\v 1 First verse in line!?! +\c 2 +\d +description +\b +", + includeAllText: true + ); + + Assert.That(rows, Has.Length.EqualTo(4)); + } + private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) { UsfmMemoryText text =