-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
scripture_text_corpus.py
112 lines (95 loc) · 4.82 KB
/
scripture_text_corpus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from typing import Generator, Iterable, Optional, Tuple, cast
from ..scripture import ENGLISH_VERSIFICATION, ORIGINAL_VERSIFICATION
from ..scripture.canon import book_id_to_number, book_number_to_id, is_canonical
from ..scripture.scripture_ref import ScriptureRef
from ..scripture.verse_ref import VerseRef, Versification
from ..utils.context_managed_generator import ContextManagedGenerator
from .dictionary_text_corpus import DictionaryTextCorpus
from .scripture_text import ScriptureText
from .text_corpus import TextCorpus
from .text_row import TextRow
class ScriptureTextCorpus(DictionaryTextCorpus):
def __init__(
self, versification: Versification = ENGLISH_VERSIFICATION, texts: Iterable[ScriptureText] = []
) -> None:
super().__init__(texts)
self._versification = versification
@property
def versification(self) -> Versification:
return self._versification
class _VersificationRefCorpusText(ScriptureText):
def __init__(self, book_num: int, versification: Versification) -> None:
super().__init__(book_number_to_id(book_num), versification)
def _get_rows(self) -> Generator[TextRow, None, None]:
b = book_id_to_number(self.id)
for c in range(1, self.versification.get_last_chapter(b) + 1):
for v in range(1, self.versification.get_last_verse(b, c) + 1):
vref = self._create_verse_ref(str(c), str(v))
if not self._versification.is_excluded(vref.bbbcccvvv):
yield from self._create_rows(vref)
def create_versification_ref_corpus(
versification: Versification = ORIGINAL_VERSIFICATION,
) -> ScriptureTextCorpus:
return ScriptureTextCorpus(
versification,
(
_VersificationRefCorpusText(b, versification)
for b in range(1, versification.get_last_book() + 1)
if is_canonical(b)
and (
versification.get_last_chapter(b) != 1
or versification.get_last_verse(b, versification.get_last_chapter(b)) != 1
)
and (b < 87 or b > 92)
),
)
def extract_scripture_corpus(
corpus: TextCorpus,
ref_corpus: TextCorpus = create_versification_ref_corpus(),
) -> ContextManagedGenerator[Tuple[str, VerseRef, Optional[VerseRef]], None, None]:
parallel_corpus = ref_corpus.align_rows(corpus, all_source_rows=True)
def extract() -> Generator[Tuple[str, VerseRef, Optional[VerseRef]], None, None]:
with parallel_corpus.get_rows() as rows:
cur_ref: Optional[VerseRef] = None
cur_trg_ref: Optional[VerseRef] = None
cur_trg_line = ""
cur_trg_line_range = True
for row in rows:
scripture_ref: ScriptureRef = cast(ScriptureRef, row.ref)
if not scripture_ref.is_verse:
continue
vref: VerseRef = scripture_ref.verse_ref
if cur_ref is not None and vref.compare_to(cur_ref, compare_segments=False) != 0:
yield "<range>" if cur_trg_line_range else cur_trg_line, cur_ref, cur_trg_ref
cur_trg_line_range = cur_trg_line_range or len(cur_trg_line) > 0
cur_trg_line = ""
cur_trg_ref = None
cur_ref = vref
if cur_trg_ref is None and len(row.target_refs) > 0:
cur_trg_ref = cast(ScriptureRef, row.target_refs[0]).verse_ref
elif cur_trg_ref is not None and len(row.target_refs) > 0 and cur_trg_ref != row.target_refs[0]:
cur_trg_ref = cur_trg_ref.copy()
cur_trg_ref.simplify()
trg_ref = cast(ScriptureRef, row.target_refs[0]).verse_ref
if cur_trg_ref < trg_ref:
start_ref = cur_trg_ref
end_ref = trg_ref
else:
start_ref = trg_ref
end_ref = cur_trg_ref
if start_ref.chapter == end_ref.chapter:
if start_ref.verse_num != end_ref.verse_num:
cur_trg_ref = VerseRef.from_range(start_ref, end_ref)
else:
cur_trg_ref = end_ref
if not row.is_target_in_range or row.is_target_range_start or len(row.target_text) > 0:
if len(row.target_text) > 0:
if len(cur_trg_line) > 0:
cur_trg_line += " "
cur_trg_line += row.target_text
cur_trg_line_range = False
if cur_ref is not None:
yield "<range>" if cur_trg_line_range else cur_trg_line, cur_ref, cur_trg_ref
return ContextManagedGenerator(extract())
def is_scripture(text_corpus: TextCorpus) -> bool:
return text_corpus.versification is not None