Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error handling #989

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ biom-2.1.16-dev

Maintenance:

* Allow testing for nan and inf values with "hasnan" and "hasinf" error states. See issue [#982](https://github.com/biocore/biom-format/issues/982).
* Python 3.7 and 3.8 removed from CI as they are [end-of-life](https://devguide.python.org/versions/). Python 3.13 added to CI. See PR[#986](https://github.com/biocore/biom-format/pull/986).

biom 2.1.16
Expand Down
78 changes: 58 additions & 20 deletions biom/err.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@
Treatment of a table in which the number of sample metadata elements
differs from the size of the data.

hasnan : 'ignore'
Treatment of tables with nan values.

hasinf : 'ignore'
Treatment of tables with inf values.

Examples
--------

Expand Down Expand Up @@ -62,6 +68,8 @@
from sys import stdout
from contextlib import contextmanager

import numpy as np

from biom.exception import TableException


Expand All @@ -73,6 +81,15 @@
SAMPDUP = "Duplicate sample IDs!"
OBSMDSIZE = "Size of observation metadata differs from matrix size!"
SAMPMDSIZE = "Size of sample metadata differs from matrix size!"
HASNAN = "Table contains nan values!"
HASINF = "Table contains inf values!"

IGNORE = 'ignore'
RAISE = 'raise'
CALL = 'call'
WARN = 'warn'
PRINT = 'print'
ALL = 'all'


# _zz_ so the sort order places this test last
Expand Down Expand Up @@ -113,13 +130,25 @@ def _test_sampmdsize(t):
return t.shape[1] != len(md) if md is not None else False


def _test_hasnan(t):
"""Check if a table contains nan values."""
# wrap in bool to ensure return dtype is Python not numpy
return bool(np.isnan(t._data.data).any())


def _test_hasinf(t):
"""Check if a table contains inf values."""
# wrap in bool to ensure return dtype is Python not numpy
return bool(np.isinf(t._data.data).any())


def _create_error_states(msg, callback, exception):
"""Create error states"""
return {'ignore': lambda x: None,
'warn': lambda x: warn(msg),
'raise': lambda x: exception(msg),
'call': callback if callback is not None else lambda x: None,
'print': lambda x: stdout.write(msg + '\n')}
return {IGNORE: lambda x: None,
WARN: lambda x: warn(msg),
RAISE: lambda x: exception(msg),
CALL: callback if callback is not None else lambda x: None,
PRINT: lambda x: stdout.write(msg + '\n')}


class ErrorProfile:
Expand All @@ -129,7 +158,7 @@ class ErrorProfile:
handled, how those errors are handled, and performs the handling of the
errors.
"""
_valid_states = frozenset(['raise', 'ignore', 'call', 'print', 'warn'])
_valid_states = frozenset([RAISE, IGNORE, CALL, PRINT, WARN])

def __init__(self):
self._profile = {}
Expand Down Expand Up @@ -213,8 +242,8 @@ def state(self):
@state.setter
def state(self, new_state):
"""Update current state"""
if 'all' in new_state:
to_update = [(err, new_state['all']) for err in self._state]
if ALL in new_state:
to_update = [(err, new_state[ALL]) for err in self._state]
else:
to_update = new_state.items()

Expand Down Expand Up @@ -252,7 +281,10 @@ def test(self, item, *args):
args = self._test.keys()

for errtype in sorted(args):
test = self._test.get(errtype, lambda: None)
test = self._test.get(errtype, lambda _: None)

if self._state.get(errtype) == IGNORE:
continue

if test(item):
return self._handle_error(errtype, item)
Expand Down Expand Up @@ -320,19 +352,23 @@ def getcall(self, errtype):


__errprof = ErrorProfile()
__errprof.register('empty', EMPTY, 'ignore', _zz_test_empty,
__errprof.register('empty', EMPTY, IGNORE, _zz_test_empty,
exception=TableException)
__errprof.register('obssize', OBSSIZE, RAISE, _test_obssize,
exception=TableException)
__errprof.register('sampsize', SAMPSIZE, RAISE, _test_sampsize,
exception=TableException)
__errprof.register('obssize', OBSSIZE, 'raise', _test_obssize,
__errprof.register('obsdup', OBSDUP, RAISE, _test_obsdup,
exception=TableException)
__errprof.register('sampsize', SAMPSIZE, 'raise', _test_sampsize,
__errprof.register('sampdup', SAMPDUP, RAISE, _test_sampdup,
exception=TableException)
__errprof.register('obsdup', OBSDUP, 'raise', _test_obsdup,
__errprof.register('obsmdsize', OBSMDSIZE, RAISE, _test_obsmdsize,
exception=TableException)
__errprof.register('sampdup', SAMPDUP, 'raise', _test_sampdup,
__errprof.register('sampmdsize', SAMPMDSIZE, RAISE, _test_sampmdsize,
exception=TableException)
__errprof.register('obsmdsize', OBSMDSIZE, 'raise', _test_obsmdsize,
__errprof.register('hasnan', HASNAN, IGNORE, _test_hasnan,
exception=TableException)
__errprof.register('sampmdsize', SAMPMDSIZE, 'raise', _test_sampmdsize,
__errprof.register('hasinf', HASINF, IGNORE, _test_hasinf,
exception=TableException)


Expand Down Expand Up @@ -384,8 +420,8 @@ def seterr(**kwargs):

"""
old_state = __errprof.state.copy()
if 'all' in kwargs:
__errprof.state = {'all': kwargs['all']}
if ALL in kwargs:
__errprof.state = {ALL: kwargs[ALL]}
else:
__errprof.state = kwargs
return old_state
Expand Down Expand Up @@ -499,5 +535,7 @@ def errstate(**kwargs):

"""
old_state = seterr(**kwargs)
yield
seterr(**old_state)
try:
yield
finally:
seterr(**old_state)
122 changes: 102 additions & 20 deletions biom/tests/test_err.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
from biom.err import (_zz_test_empty, _test_obssize, _test_sampsize,
_test_obsdup, _test_sampdup, _test_obsmdsize,
_test_sampmdsize, errstate, geterr, seterr, geterrcall,
seterrcall, errcheck, __errprof)
_test_hasnan, _test_hasinf, seterrcall, errcheck,
__errprof, IGNORE, RAISE, EMPTY, OBSSIZE, SAMPSIZE, CALL,
WARN, OBSDUP, SAMPDUP, OBSMDSIZE, SAMPMDSIZE, HASNAN,
HASINF)


runtime_ep = __errprof
Expand Down Expand Up @@ -67,6 +70,16 @@ def test_test_sampmdsize(self):
self.ex_table._sample_metadata[:-1]
self.assertTrue(_test_sampmdsize(self.ex_table))

def test_test_hasnan(self):
self.assertFalse(_test_hasnan(self.ex_table))
self.ex_table._data.data[0] = np.nan
self.assertTrue(_test_hasnan(self.ex_table))

def test_test_hasinf(self):
self.assertFalse(_test_hasinf(self.ex_table))
self.ex_table._data.data[0] = np.inf
self.assertTrue(_test_hasinf(self.ex_table))


class ErrorProfileTests(TestCase):
def setUp(self):
Expand Down Expand Up @@ -101,11 +114,11 @@ def test_test_evaluation_order(self):
'Duplicate observation IDs')

def test_state(self):
self.ep.state = {'all': 'ignore'}
self.ep.state = {'all': IGNORE}
self.assertEqual(set(self.ep._state.values()), {'ignore'})
self.ep.state = {'empty': 'call'}
self.assertEqual(set(self.ep._state.values()), {'ignore', 'call'})
self.assertEqual(self.ep.state['empty'], 'call')
self.ep.state = {'empty': CALL}
self.assertEqual(set(self.ep._state.values()), {'ignore', CALL})
self.assertEqual(self.ep.state['empty'], CALL)

with self.assertRaises(KeyError):
self.ep.state = {'empty': 'missing'}
Expand All @@ -124,17 +137,18 @@ def callback(foo):
self.ep.setcall('empty', callback)

self.assertTrue(isinstance(self.ep._handle_error('empty', None),
TableException))
self.ep.state = {'empty': 'call'}
TableException))

self.ep.state = {'empty': CALL}
self.assertEqual(self.ep._handle_error('empty', None), 10)

def test_setcall(self):
def callback(foo):
return 10

self.assertEqual(self.ep._profile['empty']['call'](None), None)
self.assertEqual(self.ep._profile['empty'][CALL](None), None)
self.ep.setcall('empty', callback)
self.assertEqual(self.ep._profile['empty']['call'](None), 10)
self.assertEqual(self.ep._profile['empty'][CALL](None), 10)

with self.assertRaises(KeyError):
self.ep.setcall('emptyfoo', callback)
Expand All @@ -155,16 +169,16 @@ def cb(x):
def test(x):
return x == 5

self.ep.register('foo', 'bar', 'ignore', test, callback=cb)
self.ep.register('foo', 'bar', IGNORE, test, callback=cb)
self.assertTrue('foo' in self.ep)
self.ep.state = {'foo': 'call'}
self.ep.state = {'foo': CALL}
self.assertEqual(self.ep._handle_error('foo', None), 123)

foo_prof = self.ep._profile['foo'].copy()
prof, func, state = self.ep.unregister('foo')

self.assertEqual(func, test)
self.assertEqual(state, 'call')
self.assertEqual(state, CALL)
self.assertEqual(prof, foo_prof)

with self.assertRaises(KeyError):
Expand All @@ -184,20 +198,20 @@ def setUp(self):
def test_geterr(self):
state = geterr()
self.assertEqual(state, runtime_ep._state)
old = seterr(all='call')
old = seterr(all=CALL)
self.assertNotEqual(geterr(), state)
seterr(**old)

def test_seterr(self):
existing = seterr(empty='warn')
self.assertEqual(runtime_ep._state['empty'], 'warn')
existing = seterr(empty=WARN)
self.assertEqual(runtime_ep._state['empty'], WARN)
self.assertNotEqual(runtime_ep._state['empty'], existing)
seterr(empty=existing['empty'])
self.assertNotEqual(runtime_ep._state['empty'], 'warn')
self.assertNotEqual(runtime_ep._state['empty'], WARN)
self.assertEqual(runtime_ep._state, existing)

def test_geterrcall(self):
exp = runtime_ep._profile['sampsize']['call']
exp = runtime_ep._profile['sampsize'][CALL]
obs = geterrcall('sampsize')
self.assertEqual(obs, exp)

Expand All @@ -224,11 +238,79 @@ def foo(item):

table = Table([], [], [])
seterrcall('empty', foo)
self.assertNotEqual(geterr()['empty'], 'call')
with errstate(empty='call'):
self.assertNotEqual(geterr()['empty'], CALL)
with errstate(empty=CALL):
result = errcheck(table)
self.assertEqual(result, "the callback called")
self.assertNotEqual(geterr()['empty'], 'call')
self.assertNotEqual(geterr()['empty'], CALL)


def _what_to_raise(errtype):
d = {k: IGNORE for k in __errprof._state}
d[errtype] = RAISE
return d


class IntegrationTests(TestCase):
def _check(self, errcond, msg, table_data):
with self.assertRaisesRegex(TableException, msg):
with errstate(**_what_to_raise(errcond)):
Table(*table_data)

def test_has_duplicate_samples(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
list('ab'),
['S1', 'S1', 'S2'])
self._check('sampdup', SAMPDUP, data)

def test_has_duplicate_observations(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['x', 'x'],
list('abc'))
self._check('obsdup', OBSDUP, data)

def test_is_empty(self):
data = ([], [], [])
self._check('empty', EMPTY, data)

def test_observation_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['w', 'x', 'y'],
list('abc'))
self._check('obssize', OBSSIZE, data)

def test_sample_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['w', 'x'],
list('ab'))
self._check('sampsize', SAMPSIZE, data)

def test_observation_metadata_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['x', 'y'],
list('abc'),
[{1: 2}, {1: 3}, {1: 4}])
self._check('obsmdsize', OBSMDSIZE, data)

def test_sample_metadata_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['x', 'y'],
list('abc'),
None,
[{1: 2}, ])
self._check('sampmdsize', SAMPMDSIZE, data)

def test_has_nan(self):
data = (np.array([[1, 2, np.nan], [4, 5, 6]]),
['x', 'y'],
list('abc'))
self._check('hasnan', HASNAN, data)

def test_has_inf(self):
data = (np.array([[1, 2, np.inf], [4, 5, 6]]),
['x', 'y'],
list('abc'))
self._check('hasinf', HASINF, data)


if __name__ == '__main__':
Expand Down
6 changes: 4 additions & 2 deletions biom/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from biom.parse import (generatedby, MetadataMap, parse_biom_table, parse_uc,
load_table, save_table)
from biom.table import Table
from biom.err import errstate, IGNORE
from biom.util import __version__
from biom.tests.long_lines import (uc_empty, uc_invalid_id, uc_minimal,
uc_lib_minimal,
Expand Down Expand Up @@ -61,8 +62,9 @@ def tearDown(self):

def test_from_tsv_bug_854(self):
data = StringIO('#FeatureID\tSample1')
exp = Table([], [], ['Sample1'])
obs = Table.from_tsv(data, None, None, lambda x: x)
with errstate(all=IGNORE):
exp = Table([], [], ['Sample1'])
obs = Table.from_tsv(data, None, None, lambda x: x)
self.assertEqual(obs, exp)

def test_generatedby(self):
Expand Down
Loading