Skip to content

Commit

Permalink
Usfm nonverse multichapter (#224)
Browse files Browse the repository at this point in the history
* Added test case to fail

* Don't preserve _curTextType across verses.

* Properly close non-verse segment at the beginning of a verse para

- Add extra info when exception occurs while tokenizing in UsfmTextBase

---------

Co-authored-by: Damien Daspit <[email protected]>
  • Loading branch information
johnml1135 and ddaspit authored Jul 12, 2024
1 parent 04276d7 commit 2659e19
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ string pubNumber
}
else
{
EndVerseText(state);
if (CurrentTextType == ScriptureTextType.NonVerse)
EndNonVerseText(state);
else
EndVerseText(state);
UpdateVerseRef(state.VerseRef, marker);
StartVerseText(state);
}
Expand Down
19 changes: 18 additions & 1 deletion src/SIL.Machine/Corpora/UsfmTextBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,25 @@ protected override IEnumerable<TextRow> GetVersesInDocOrder()
{
string usfm = ReadUsfm();
var rowCollector = new TextRowCollector(this);

var tokenizer = new UsfmTokenizer(_stylesheet);
IReadOnlyList<UsfmToken> tokens;
try
{
tokens = tokenizer.Tokenize(usfm, _includeMarkers);
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.Append($"An error occurred while tokenizing the text '{Id}`");
if (!string.IsNullOrEmpty(Project))
sb.Append($" in project '{Project}'");
sb.Append($". Error: '{ex.Message}'");
throw new InvalidOperationException(sb.ToString(), ex);
}

var parser = new UsfmParser(
usfm,
tokens,
rowCollector,
_stylesheet,
Versification,
Expand Down
21 changes: 21 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,27 @@ public void GetRows_DuplicateVerseWithTable()
Assert.That(rows, Has.Length.EqualTo(5));
}

[Test]
public void GetRows_VersePara_BeginningNonVerseSegment()
{
// a verse paragraph that begins with a non-verse segment followed by a verse segment
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\q1
\f \fr 119 \ft World \f*
\v 1 First verse in line!?!
\c 2
\d
description
\b
",
includeAllText: true
);

Assert.That(rows, Has.Length.EqualTo(4));
}

private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false)
{
UsfmMemoryText text =
Expand Down

0 comments on commit 2659e19

Please sign in to comment.