biocore · wasade · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -4,6 +4,10 @@ BIOM-Format ChangeLog
 biom-2.1.16-dev
 ---------------
 
+New Features:
+
+* `Table.allclose` is now available to provide almost equality support including equality of `nan` by wrapping NumPy's `allclose`. See issues [#982](https://github.com/biocore/biom-format/issues/982) and [#983](https://github.com/biocore/biom-format/issues/983). 
+
 Maintenance:
 
 * Python 3.7 and 3.8 removed from CI as they are [end-of-life](https://devguide.python.org/versions/). Python 3.13 added to CI. See PR[#986](https://github.com/biocore/biom-format/pull/986).

diff --git a/biom/table.py b/biom/table.py
@@ -1839,28 +1839,76 @@ def descriptive_equality(self, other):
             return "Observation metadata are not the same"
         if not np.array_equal(self.metadata(), other.metadata()):
             return "Sample metadata are not the same"
-        if not self._data_equality(other._data):
+        if not self._data_equality(other):
             return "Data elements are not the same"
 
         return "Tables appear equal"
 
     def __eq__(self, other):
         """Equality is determined by the data matrix, metadata, and IDs"""
+        if not self._data_equality_meta(other):
+            return False
+
+        if not self._data_equality(other):
+            return False
+
+        return True
+
+    def allclose(self, other, **allclose_kwargs):
+        """Allow for almost equality testing using np.allclose
+
+        Parameters
+        ----------
+        other : biom.Table
+            The table to compare against.
+        allclose_kwargs : dict
+            Any keyword arguments to provide to np.allclose
+
+        Notes
+        -----
+        Specify `equal_nan=True` to allow Nan to test equal.
+
+        Returns
+        -------
+        bool
+            Whether the two tables are equal within tolerance.
+        """
+        if not self._data_equality_meta(other):
+            return False
+
+        self_data = self._data.tocsr().data
+        other_data = other._data.tocsr().data
+
+        return np.allclose(self_data, other_data, **allclose_kwargs)
+
+    def _data_equality_meta(self, other):
         if not isinstance(other, self.__class__):
             return False
+
         if self.type != other.type:
             return False
+
         if not np.array_equal(self.ids(axis='observation'),
                               other.ids(axis='observation')):
             return False
+
         if not np.array_equal(self.ids(), other.ids()):
             return False
+
         if not np.array_equal(self.metadata(axis='observation'),
                               other.metadata(axis='observation')):
             return False
+
         if not np.array_equal(self.metadata(), other.metadata()):
             return False
-        if not self._data_equality(other._data):
+
+        if self._data.shape != other._data.shape:
+            return False
+
+        if self._data.dtype != other._data.dtype:
+            return False
+
+        if self._data.nnz != other._data.nnz:
             return False
 
         return True
@@ -1879,19 +1927,10 @@ def _data_equality(self, other):
         necessary before performing the final comparison.
 
         """
-        if self._data.shape != other.shape:
-            return False
-
-        if self._data.dtype != other.dtype:
-            return False
-
-        if self._data.nnz != other.nnz:
-            return False
-
-        self._data = self._data.tocsr()
-        other = other.tocsr()
+        self_data = self._data.tocsr()
+        other_data = other._data.tocsr()
 
-        if (self._data != other).nnz > 0:
+        if (self_data != other_data).nnz > 0:
             return False
 
         return True

diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py
@@ -2561,6 +2561,26 @@ def test_sort(self):
         with self.assertRaises(UnknownAxisError):
             t.sort(axis='foo')
 
+    def test_allclose(self):
+        self.assertTrue(self.st1.allclose(self.st1))
+        self.assertTrue(self.st1.allclose(self.st2))
+        self.assertFalse(self.st1.allclose(self.st3))
+
+        st4 = self.st1.copy()
+        st4._data.data += 0.0001
+        self.assertFalse(self.st1.allclose(st4))
+        self.assertTrue(self.st1.allclose(st4, atol=1e-1))
+
+        st5 = self.st1.copy()
+        st6 = self.st1.copy()
+
+        st5._data.data[0] = np.nan
+        st6._data.data[0] = np.nan
+
+        self.assertFalse(st5.allclose(st6))
+        self.assertFalse(st5.allclose(st6, atol=1e-1))
+        self.assertTrue(st5.allclose(st6, equal_nan=True))
+
     def test_eq(self):
         """sparse equality"""
         self.assertTrue(self.st1 == self.st2)
@@ -2573,9 +2593,9 @@ def test_eq(self):
 
     def test_data_equality(self):
         """check equality between tables"""
-        self.assertTrue(self.st1._data_equality(self.st2._data))
-        self.assertTrue(self.st1._data_equality(self.st1._data))
-        self.assertFalse(self.st1._data_equality(self.st3._data))
+        self.assertTrue(self.st1._data_equality(self.st2))
+        self.assertTrue(self.st1._data_equality(self.st1))
+        self.assertFalse(self.st1._data_equality(self.st3))
 
     def test_nonzero(self):
         """Return a list of nonzero positions"""