-
Notifications
You must be signed in to change notification settings - Fork 5
/
annotations.jl
106 lines (82 loc) · 4.36 KB
/
annotations.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#####
##### `onda.annotation`
#####
@schema "onda.annotation" Annotation
@version AnnotationV1 begin
recording::UUID = UUID(recording)
id::UUID = UUID(id)
span::TimeSpan = TimeSpan(span)
end
Legolas.accepted_field_type(::AnnotationV1SchemaVersion, ::Type{TimeSpan}) = Union{NamedTupleTimeSpan,TimeSpan}
"""
@version AnnotationV1 begin
recording::UUID
id::UUID
span::TimeSpan
end
A Legolas-generated record type representing an [`onda.annotation` as described by the Onda Format Specification](https://github.com/beacon-biosignals/Onda.jl##ondaannotation1).
See https://github.com/beacon-biosignals/Legolas.jl for details regarding Legolas record types.
"""
AnnotationV1
"""
validate_annotations(annotations)
Perform both table-level and row-level validation checks on the content of `annotations`,
a presumed `onda.annotation` table. Returns `annotations`.
This function will throw an error in any of the following cases:
- `Legolas.validate(Tables.schema(annotations), AnnotationV1SchemaVersion())` throws an error
- `AnnotationV1(r)` errors for any `r` in `Tables.rows(annotations)`
- `annotations` contains rows with duplicate `id`s
"""
validate_annotations(annotations) = _fully_validate_legolas_table(:validate_annotations, annotations, AnnotationV1, AnnotationV1SchemaVersion(), :id)
#####
##### `merge_overlapping_annotations`
#####
@schema "onda.merged-annotation" MergedAnnotation
@version MergedAnnotationV1 > AnnotationV1 begin
from::Vector{UUID}
end
"""
@version MergedAnnotationV1 > AnnotationV1 begin
from::Vector{UUID}
end
A Legolas-generated record type representing an annotation derived from "merging" one or more existing annotations.
This record type extends `AnnotationV1` with a single additional required field, `from::Vector{UUID}`, whose entries
are the `id`s of the annotation's source annotation(s).
See https://github.com/beacon-biosignals/Legolas.jl for details regarding Legolas record types.
"""
MergedAnnotationV1
"""
merge_overlapping_annotations([predicate=TimeSpans.overlaps,] annotations)
Given the `onda.annotation@1`-compliant table `annotations`, return a `Vector{MergedAnnotationV1}` where "overlapping"
consecutive entries of `annotations` have been merged using `TimeSpans.shortest_timespan_containing`.
Two consecutive annotations `a` and `b` are determined to be "overlapping" if `a.recording == b.recording && predicate(a.span, b.span)`.
Merged annotations' `span` fields are generated via calling `TimeSpans.shortest_timespan_containing` on the overlapping set of source
annotations.
Note that every annotation in the returned table has a freshly generated `id` field and a non-empty `from` field. An output annotation
whose `from` field only a contains a single element corresponds to an individual non-overlapping annotation in the provided `annotations`.
Note that this function internally works with `Tables.columns(annotations)` rather than `annotations` directly, so it may be slower and/or
require more memory if `!Tables.columnaccess(annotations)`.
See also `TimeSpans.merge_spans` for similar functionality on generic time spans (instead of annotations).
"""
function merge_overlapping_annotations(predicate, annotations)
columns = Tables.columns(annotations)
merged = MergedAnnotationV1[]
for (rid, (locs,)) in Legolas.locations((columns.recording,))
subset = (recording=view(columns.recording, locs), id=view(columns.id, locs), span=view(columns.span, locs))
p = sortperm(subset.span; by=TimeSpans.start)
sorted = Tables.rows((recording=view(subset.recording, p), id=view(subset.id, p), span=view(subset.span, p)))
init = first(sorted)
push!(merged, MergedAnnotationV1(; recording=rid, id=uuid4(), span=init.span, from=[init.id]))
for next in Iterators.drop(sorted, 1)
prev = merged[end]
if next.recording == prev.recording && predicate(next.span, prev.span)
push!(prev.from, next.id)
merged[end] = MergedAnnotationV1(rowmerge(prev; span=TimeSpans.shortest_timespan_containing((prev.span, next.span))))
else
push!(merged, MergedAnnotationV1(; recording=next.recording, id=uuid4(), span=next.span, from=[next.id]))
end
end
end
return merged
end
merge_overlapping_annotations(annotations) = merge_overlapping_annotations(overlaps, annotations)