Skip to content

Commit

Permalink
Merge branch 'master' into scripture_range_parser_no_error_on_empty_s…
Browse files Browse the repository at this point in the history
…tring
  • Loading branch information
Enkidu93 authored Jul 15, 2024
2 parents e32df20 + 39df17e commit 1b072a9
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 5 deletions.
6 changes: 3 additions & 3 deletions src/AssemblyInfo.props
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<Project>
<PropertyGroup>
<Version>3.2.3</Version>
<Version>3.2.4</Version>
<Version Condition=" '$(VersionSuffix)' != '' ">$(Version)-$(VersionSuffix)</Version>
<AssemblyVersion>3.2.3.0</AssemblyVersion>
<FileVersion>3.2.3.0</FileVersion>
<AssemblyVersion>3.2.4.0</AssemblyVersion>
<FileVersion>3.2.4.0</FileVersion>
<Company>SIL International</Company>
<Authors>SIL International</Authors>
<Product>Machine</Product>
Expand Down
5 changes: 4 additions & 1 deletion src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ string pubNumber
}
else
{
EndVerseText(state);
if (CurrentTextType == ScriptureTextType.NonVerse)
EndNonVerseText(state);
else
EndVerseText(state);
UpdateVerseRef(state.VerseRef, marker);
StartVerseText(state);
}
Expand Down
19 changes: 18 additions & 1 deletion src/SIL.Machine/Corpora/UsfmTextBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,25 @@ protected override IEnumerable<TextRow> GetVersesInDocOrder()
{
string usfm = ReadUsfm();
var rowCollector = new TextRowCollector(this);

var tokenizer = new UsfmTokenizer(_stylesheet);
IReadOnlyList<UsfmToken> tokens;
try
{
tokens = tokenizer.Tokenize(usfm, _includeMarkers);
}
catch (Exception ex)
{
var sb = new StringBuilder();
sb.Append($"An error occurred while tokenizing the text '{Id}`");
if (!string.IsNullOrEmpty(Project))
sb.Append($" in project '{Project}'");
sb.Append($". Error: '{ex.Message}'");
throw new InvalidOperationException(sb.ToString(), ex);
}

var parser = new UsfmParser(
usfm,
tokens,
rowCollector,
_stylesheet,
Versification,
Expand Down
21 changes: 21 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,27 @@ public void GetRows_DuplicateVerseWithTable()
Assert.That(rows, Has.Length.EqualTo(5));
}

[Test]
public void GetRows_VersePara_BeginningNonVerseSegment()
{
// a verse paragraph that begins with a non-verse segment followed by a verse segment
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\q1
\f \fr 119 \ft World \f*
\v 1 First verse in line!?!
\c 2
\d
description
\b
",
includeAllText: true
);

Assert.That(rows, Has.Length.EqualTo(4));
}

private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false)
{
UsfmMemoryText text =
Expand Down

0 comments on commit 1b072a9

Please sign in to comment.