Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle sample numbers > 2**31 in annotation files #328

Merged
merged 8 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added sample-data/huge.qrs
Binary file not shown.
36 changes: 35 additions & 1 deletion tests/test_annotation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import re
import unittest

import numpy as np

import wfdb

class test_annotation():

class TestAnnotation(unittest.TestCase):
"""
Testing read and write of WFDB annotations, including Physionet
streaming.
Expand Down Expand Up @@ -183,3 +186,34 @@ def test_3(self):
assert (comp == [True] * 6)
assert annotation.__eq__(pn_annotation)
assert annotation.__eq__(write_annotation)

def test_4(self):
"""
Read and write annotations with large time skips

Annotation file created by:
echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs
"""
annotation = wfdb.rdann('sample-data/huge', 'qrs')
self.assertEqual(annotation.sample[0], 10000000000)
annotation.wrann()

annotation1 = wfdb.rdann('sample-data/huge', 'qrs')
annotation2 = wfdb.rdann('huge', 'qrs')
self.assertEqual(annotation1, annotation2)

@classmethod
def tearDownClass(cls):
writefiles = [
'100.atr',
'1003.atr',
'12726.anI',
'huge.qrs',
]
for file in writefiles:
if os.path.isfile(file):
os.remove(file)


if __name__ == '__main__':
unittest.main()
6 changes: 3 additions & 3 deletions tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,9 +521,9 @@ def test_header_with_non_utf8(self):
@classmethod
def tearDownClass(cls):
"Clean up written files"
writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat',
'100.hea','1003.atr','100_3chan.dat','100_3chan.hea',
'12726.anI','a103l.hea','a103l.mat','s0010_re.dat',
writefiles = ['03700181.dat','03700181.hea','100.dat',
'100.hea','100_3chan.dat','100_3chan.hea',
'a103l.hea','a103l.mat','s0010_re.dat',
's0010_re.hea','s0010_re.xyz','test01_00s.dat',
'test01_00s.hea','test01_00s_skewframe.hea',
'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea']
Expand Down
78 changes: 38 additions & 40 deletions wfdb/io/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,6 @@ def check_field(self, field):
raise ValueError("The 'sample' field must only contain non-negative integers")
if min(sampdiffs) < 0 :
raise ValueError("The 'sample' field must contain monotonically increasing sample numbers")
if max(sampdiffs) > 2147483648:
raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31')

elif field == 'label_store':
if min(item) < 1 or max(item) > 49:
Expand Down Expand Up @@ -1370,19 +1368,19 @@ def field2bytes(field, value):
# sample difference
sd = value[0]

# Add SKIP element if value is too large for single byte
if sd>1023:
# 8 bytes in total:
# - [0, 59>>2] indicates SKIP
# - Next 4 gives sample difference
# - Final 2 give 0 and sym
data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode]
# Just need samp and sym
else:
# - First byte stores low 8 bits of samp
# - Second byte stores high 2 bits of samp
# and sym
data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode]
data_bytes = []
# Add SKIP elements if value is too large
while sd > 0x7fffffff:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We now have this hex value and the same decimal value 2147483647 in this file. Perhaps unify under a constant module level variable for clarity?

data_bytes += [0, 59 << 2, 0xff, 0x7f, 0xff, 0xff]
sd -= 0x7fffffff
if sd > 1023:
data_bytes += [0, 59 << 2,
tompollard marked this conversation as resolved.
Show resolved Hide resolved
(sd >> 16) & 255,
(sd >> 24) & 255,
(sd >> 0) & 255,
(sd >> 8) & 255]
sd = 0
data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode]

elif field == 'num':
# First byte stores num
Expand Down Expand Up @@ -1653,8 +1651,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False,
subtype, chan, num, aux_note)

# Convert lists to numpy arrays dtype='int'
(sample, label_store, subtype,
chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num)
(label_store, subtype,
chan, num) = lists_to_int_arrays(label_store, subtype, chan, num)

# Convert sample numbers to a numpy array of 'int64'
sample = np.array(sample, dtype='int64')

# Try to get fs from the header file if it is not contained in the
# annotation file
Expand Down Expand Up @@ -1748,8 +1749,8 @@ def load_byte_pairs(record_name, extension, pn_dir):

Returns
-------
filebytes : str
The input filestream converted to bytes.
filebytes : ndarray
The input filestream converted to an Nx2 array of unsigned bytes.

"""
# local file
Expand All @@ -1769,8 +1770,8 @@ def proc_ann_bytes(filebytes, sampto):

Parameters
----------
filebytes : str
The input filestream converted to bytes.
filebytes : ndarray
The input filestream converted to an Nx2 array of unsigned bytes.
sampto : int
The maximum sample number for annotations to be returned.

Expand Down Expand Up @@ -1852,8 +1853,8 @@ def proc_core_fields(filebytes, bpi):

Parameters
----------
filebytes : str
The input filestream converted to bytes.
filebytes : ndarray
The input filestream converted to an Nx2 array of unsigned bytes.
bpi : int
The index to start the conversion.

Expand All @@ -1869,31 +1870,28 @@ def proc_core_fields(filebytes, bpi):
The index to start the conversion.

"""
label_store = filebytes[bpi, 1] >> 2
sample_diff = 0

# The current byte pair will contain either the actual d_sample + annotation store value,
# or 0 + SKIP.

# Not a skip - it is the actual sample number + annotation type store value
if label_store != 59:
sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
bpi = bpi + 1
# Skip. Note: Could there be another skip after the first?
else:
while filebytes[bpi, 1] >> 2 == 59:
# 4 bytes storing dt
sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
+ filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]
skip_diff = ((int(filebytes[bpi + 1, 0]) << 16)
+ (int(filebytes[bpi + 1, 1]) << 24)
+ (int(filebytes[bpi + 2, 0]) << 0)
+ (int(filebytes[bpi + 2, 1]) << 8))

# Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1
if sample_diff > 2147483647:
sample_diff = sample_diff - 4294967296
if skip_diff > 2147483647:
skip_diff = skip_diff - 4294967296

# After the 4 bytes, the next pair's samp is also added
sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3)
sample_diff += skip_diff
bpi = bpi + 3

# The label is stored after the 4 bytes. Samples here should be 0.
label_store = filebytes[bpi + 3, 1] >> 2
bpi = bpi + 4
# Not a skip - it is the actual sample number + annotation type store value
label_store = filebytes[bpi, 1] >> 2
sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3))
bpi = bpi + 1

return sample_diff, label_store, bpi

Expand Down