From 47b2cc5c0083fbc238f3ff6f8c1ba891fdfb0387 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 3 Dec 2024 15:14:18 -0800 Subject: [PATCH] MAINT/ENH/TST: fixes #982 and expands err check testing --- biom/err.py | 72 ++++++++++++++++------- biom/tests/test_err.py | 121 ++++++++++++++++++++++++++++++++------- biom/tests/test_parse.py | 6 +- 3 files changed, 157 insertions(+), 42 deletions(-) diff --git a/biom/err.py b/biom/err.py index 013875ec..987f039f 100644 --- a/biom/err.py +++ b/biom/err.py @@ -62,6 +62,8 @@ from sys import stdout from contextlib import contextmanager +import numpy as np + from biom.exception import TableException @@ -73,6 +75,15 @@ SAMPDUP = "Duplicate sample IDs!" OBSMDSIZE = "Size of observation metadata differs from matrix size!" SAMPMDSIZE = "Size of sample metadata differs from matrix size!" +HASNAN = "Table contains nan values!" +HASINF = "Table contains inf values!" + +IGNORE = 'ignore' +RAISE = 'raise' +CALL = 'call' +WARN = 'warn' +PRINT = 'print' +ALL = 'all' # _zz_ so the sort order places this test last @@ -113,13 +124,25 @@ def _test_sampmdsize(t): return t.shape[1] != len(md) if md is not None else False +def _test_hasnan(t): + """Check if a table contains nan values.""" + # wrap in bool to ensure return dtype is Python not numpy + return bool(np.isnan(t._data.data).any()) + + +def _test_hasinf(t): + """Check if a table contains inf values.""" + # wrap in bool to ensure return dtype is Python not numpy + return bool(np.isinf(t._data.data).any()) + + def _create_error_states(msg, callback, exception): """Create error states""" - return {'ignore': lambda x: None, - 'warn': lambda x: warn(msg), - 'raise': lambda x: exception(msg), - 'call': callback if callback is not None else lambda x: None, - 'print': lambda x: stdout.write(msg + '\n')} + return {IGNORE: lambda x: None, + WARN: lambda x: warn(msg), + RAISE: lambda x: exception(msg), + CALL: callback if callback is not None else lambda x: None, + PRINT: lambda x: stdout.write(msg + '\n')} class ErrorProfile: @@ -129,7 +152,7 @@ class ErrorProfile: handled, how those errors are handled, and performs the handling of the errors. """ - _valid_states = frozenset(['raise', 'ignore', 'call', 'print', 'warn']) + _valid_states = frozenset([RAISE, IGNORE, CALL, PRINT, WARN]) def __init__(self): self._profile = {} @@ -213,8 +236,8 @@ def state(self): @state.setter def state(self, new_state): """Update current state""" - if 'all' in new_state: - to_update = [(err, new_state['all']) for err in self._state] + if ALL in new_state: + to_update = [(err, new_state[ALL]) for err in self._state] else: to_update = new_state.items() @@ -252,7 +275,10 @@ def test(self, item, *args): args = self._test.keys() for errtype in sorted(args): - test = self._test.get(errtype, lambda: None) + test = self._test.get(errtype, lambda _: None) + + if self._state.get(errtype) == IGNORE: + continue if test(item): return self._handle_error(errtype, item) @@ -320,19 +346,23 @@ def getcall(self, errtype): __errprof = ErrorProfile() -__errprof.register('empty', EMPTY, 'ignore', _zz_test_empty, +__errprof.register('empty', EMPTY, IGNORE, _zz_test_empty, + exception=TableException) +__errprof.register('obssize', OBSSIZE, RAISE, _test_obssize, + exception=TableException) +__errprof.register('sampsize', SAMPSIZE, RAISE, _test_sampsize, exception=TableException) -__errprof.register('obssize', OBSSIZE, 'raise', _test_obssize, +__errprof.register('obsdup', OBSDUP, RAISE, _test_obsdup, exception=TableException) -__errprof.register('sampsize', SAMPSIZE, 'raise', _test_sampsize, +__errprof.register('sampdup', SAMPDUP, RAISE, _test_sampdup, exception=TableException) -__errprof.register('obsdup', OBSDUP, 'raise', _test_obsdup, +__errprof.register('obsmdsize', OBSMDSIZE, RAISE, _test_obsmdsize, exception=TableException) -__errprof.register('sampdup', SAMPDUP, 'raise', _test_sampdup, +__errprof.register('sampmdsize', SAMPMDSIZE, RAISE, _test_sampmdsize, exception=TableException) -__errprof.register('obsmdsize', OBSMDSIZE, 'raise', _test_obsmdsize, +__errprof.register('hasnan', HASNAN, IGNORE, _test_hasnan, exception=TableException) -__errprof.register('sampmdsize', SAMPMDSIZE, 'raise', _test_sampmdsize, +__errprof.register('hasinf', HASINF, IGNORE, _test_hasinf, exception=TableException) @@ -384,8 +414,8 @@ def seterr(**kwargs): """ old_state = __errprof.state.copy() - if 'all' in kwargs: - __errprof.state = {'all': kwargs['all']} + if ALL in kwargs: + __errprof.state = {ALL: kwargs[ALL]} else: __errprof.state = kwargs return old_state @@ -499,5 +529,7 @@ def errstate(**kwargs): """ old_state = seterr(**kwargs) - yield - seterr(**old_state) + try: + yield + finally: + seterr(**old_state) diff --git a/biom/tests/test_err.py b/biom/tests/test_err.py index 67ae1aff..8b5a961a 100644 --- a/biom/tests/test_err.py +++ b/biom/tests/test_err.py @@ -18,7 +18,10 @@ from biom.err import (_zz_test_empty, _test_obssize, _test_sampsize, _test_obsdup, _test_sampdup, _test_obsmdsize, _test_sampmdsize, errstate, geterr, seterr, geterrcall, - seterrcall, errcheck, __errprof) + _test_hasnan, _test_hasinf, seterrcall, errcheck, + __errprof, IGNORE, RAISE, EMPTY, OBSSIZE, SAMPSIZE, CALL, + WARN, OBSDUP, SAMPDUP, OBSMDSIZE, SAMPMDSIZE, HASNAN, + HASINF) runtime_ep = __errprof @@ -67,6 +70,16 @@ def test_test_sampmdsize(self): self.ex_table._sample_metadata[:-1] self.assertTrue(_test_sampmdsize(self.ex_table)) + def test_test_hasnan(self): + self.assertFalse(_test_hasnan(self.ex_table)) + self.ex_table._data.data[0] = np.nan + self.assertTrue(_test_hasnan(self.ex_table)) + + def test_test_hasinf(self): + self.assertFalse(_test_hasinf(self.ex_table)) + self.ex_table._data.data[0] = np.inf + self.assertTrue(_test_hasinf(self.ex_table)) + class ErrorProfileTests(TestCase): def setUp(self): @@ -101,11 +114,11 @@ def test_test_evaluation_order(self): 'Duplicate observation IDs') def test_state(self): - self.ep.state = {'all': 'ignore'} + self.ep.state = {'all': IGNORE} self.assertEqual(set(self.ep._state.values()), {'ignore'}) - self.ep.state = {'empty': 'call'} - self.assertEqual(set(self.ep._state.values()), {'ignore', 'call'}) - self.assertEqual(self.ep.state['empty'], 'call') + self.ep.state = {'empty': CALL} + self.assertEqual(set(self.ep._state.values()), {'ignore', CALL}) + self.assertEqual(self.ep.state['empty'], CALL) with self.assertRaises(KeyError): self.ep.state = {'empty': 'missing'} @@ -124,17 +137,18 @@ def callback(foo): self.ep.setcall('empty', callback) self.assertTrue(isinstance(self.ep._handle_error('empty', None), - TableException)) - self.ep.state = {'empty': 'call'} + TableException)) + + self.ep.state = {'empty': CALL} self.assertEqual(self.ep._handle_error('empty', None), 10) def test_setcall(self): def callback(foo): return 10 - self.assertEqual(self.ep._profile['empty']['call'](None), None) + self.assertEqual(self.ep._profile['empty'][CALL](None), None) self.ep.setcall('empty', callback) - self.assertEqual(self.ep._profile['empty']['call'](None), 10) + self.assertEqual(self.ep._profile['empty'][CALL](None), 10) with self.assertRaises(KeyError): self.ep.setcall('emptyfoo', callback) @@ -155,16 +169,16 @@ def cb(x): def test(x): return x == 5 - self.ep.register('foo', 'bar', 'ignore', test, callback=cb) + self.ep.register('foo', 'bar', IGNORE, test, callback=cb) self.assertTrue('foo' in self.ep) - self.ep.state = {'foo': 'call'} + self.ep.state = {'foo': CALL} self.assertEqual(self.ep._handle_error('foo', None), 123) foo_prof = self.ep._profile['foo'].copy() prof, func, state = self.ep.unregister('foo') self.assertEqual(func, test) - self.assertEqual(state, 'call') + self.assertEqual(state, CALL) self.assertEqual(prof, foo_prof) with self.assertRaises(KeyError): @@ -184,20 +198,20 @@ def setUp(self): def test_geterr(self): state = geterr() self.assertEqual(state, runtime_ep._state) - old = seterr(all='call') + old = seterr(all=CALL) self.assertNotEqual(geterr(), state) seterr(**old) def test_seterr(self): - existing = seterr(empty='warn') - self.assertEqual(runtime_ep._state['empty'], 'warn') + existing = seterr(empty=WARN) + self.assertEqual(runtime_ep._state['empty'], WARN) self.assertNotEqual(runtime_ep._state['empty'], existing) seterr(empty=existing['empty']) - self.assertNotEqual(runtime_ep._state['empty'], 'warn') + self.assertNotEqual(runtime_ep._state['empty'], WARN) self.assertEqual(runtime_ep._state, existing) def test_geterrcall(self): - exp = runtime_ep._profile['sampsize']['call'] + exp = runtime_ep._profile['sampsize'][CALL] obs = geterrcall('sampsize') self.assertEqual(obs, exp) @@ -224,11 +238,78 @@ def foo(item): table = Table([], [], []) seterrcall('empty', foo) - self.assertNotEqual(geterr()['empty'], 'call') - with errstate(empty='call'): + self.assertNotEqual(geterr()['empty'], CALL) + with errstate(empty=CALL): result = errcheck(table) self.assertEqual(result, "the callback called") - self.assertNotEqual(geterr()['empty'], 'call') + self.assertNotEqual(geterr()['empty'], CALL) + +def _what_to_raise(errtype): + d = {k: IGNORE for k in __errprof._state} + d[errtype] = RAISE + return d + + +class IntegrationTests(TestCase): + def _check(self, errcond, msg, table_data): + with self.assertRaisesRegex(TableException, msg): + with errstate(**_what_to_raise(errcond)): + Table(*table_data) + + def test_has_duplicate_samples(self): + data = (np.array([[1, 2, 3], [4, 5, 6]]), + list('ab'), + ['S1', 'S1', 'S2']) + self._check('sampdup', SAMPDUP, data) + + def test_has_duplicate_observations(self): + data = (np.array([[1, 2, 3], [4, 5, 6]]), + ['x', 'x'], + list('abc')) + self._check('obsdup', OBSDUP, data) + + def test_is_empty(self): + data = ([], [], []) + self._check('empty', EMPTY, data) + + def test_observation_size(self): + data = (np.array([[1, 2, 3], [4, 5, 6]]), + ['w', 'x', 'y'], + list('abc')) + self._check('obssize', OBSSIZE, data) + + def test_sample_size(self): + data = (np.array([[1, 2, 3], [4, 5, 6]]), + ['w', 'x'], + list('ab')) + self._check('sampsize', SAMPSIZE, data) + + def test_observation_metadata_size(self): + data = (np.array([[1, 2, 3], [4, 5, 6]]), + ['x', 'y'], + list('abc'), + [{1: 2}, {1: 3}, {1: 4}]) + self._check('obsmdsize', OBSMDSIZE, data) + + def test_sample_metadata_size(self): + data = (np.array([[1, 2, 3], [4, 5, 6]]), + ['x', 'y'], + list('abc'), + None, + [{1: 2}, ]) + self._check('sampmdsize', SAMPMDSIZE, data) + + def test_has_nan(self): + data = (np.array([[1, 2, np.nan], [4, 5, 6]]), + ['x', 'y'], + list('abc')) + self._check('hasnan', HASNAN, data) + + def test_has_inf(self): + data = (np.array([[1, 2, np.inf], [4, 5, 6]]), + ['x', 'y'], + list('abc')) + self._check('hasinf', HASINF, data) if __name__ == '__main__': diff --git a/biom/tests/test_parse.py b/biom/tests/test_parse.py index 1b310160..de09ecd5 100644 --- a/biom/tests/test_parse.py +++ b/biom/tests/test_parse.py @@ -20,6 +20,7 @@ from biom.parse import (generatedby, MetadataMap, parse_biom_table, parse_uc, load_table, save_table) from biom.table import Table +from biom.err import errstate, IGNORE from biom.util import __version__ from biom.tests.long_lines import (uc_empty, uc_invalid_id, uc_minimal, uc_lib_minimal, @@ -61,8 +62,9 @@ def tearDown(self): def test_from_tsv_bug_854(self): data = StringIO('#FeatureID\tSample1') - exp = Table([], [], ['Sample1']) - obs = Table.from_tsv(data, None, None, lambda x: x) + with errstate(all=IGNORE): + exp = Table([], [], ['Sample1']) + obs = Table.from_tsv(data, None, None, lambda x: x) self.assertEqual(obs, exp) def test_generatedby(self):