Merge pull request #307 from MannLabs/timstof-transpose

FIX bug in timstof transpose function
MannLabs · Aug 15, 2024 · d3425ea · d3425ea
2 parents 011c84d + f66ac5c
commit d3425ea
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 12 deletions.
diff --git a/alphadia/data/bruker.py b/alphadia/data/bruker.py
@@ -105,7 +105,10 @@ def transpose(self):
 
         logger.info("Transposing detector events")
         push_indices, tof_indptr, intensity_values = transpose(
-            self._tof_indices, self._push_indptr, self._intensity_values
+            self._tof_indices,
+            self._push_indptr,
+            len(self._mz_values),
+            self._intensity_values,
         )
         logger.info("Finished transposing data")
 
@@ -859,7 +862,7 @@ def build_chunks(number_of_elements, num_chunks):
 
 
 @nb.njit
-def transpose(tof_indices, push_indptr, values):
+def transpose(tof_indices, push_indptr, n_tof_indices, values):
     """
     The default alphatims data format consists of a sparse matrix where pushes are the rows, tof indices (discrete mz values) the columns and intensities the values.
     A lookup starts with a given push index p which points to the row. The start and stop indices of the row are accessed from dia_data.push_indptr[p] and dia_data.push_indptr[p+1].
@@ -877,6 +880,9 @@ def transpose(tof_indices, push_indptr, values):
     push_indptr : np.ndarray
         start stop values for each row (n_rows +1)
 
+    n_tof_indices : int
+        number of tof indices which is usually equal to len(dia_data.mz_values)
+
     values : np.ndarray
         values (n_values)
 
@@ -896,28 +902,25 @@ def transpose(tof_indices, push_indptr, values):
         values (n_values)
 
     """
-    # this is one less than the old col count or the new row count
-    max_tof_index = tof_indices.max()
-
-    tof_indcount = np.zeros((max_tof_index + 1), dtype=np.uint32)
+    tof_indcount = np.zeros((n_tof_indices), dtype=np.uint32)
 
     # get new row counts
     for v in tof_indices:
         tof_indcount[v] += 1
 
     # get new indptr
-    tof_indptr = np.zeros((max_tof_index + 1 + 1), dtype=np.int64)
+    tof_indptr = np.zeros((n_tof_indices + 1), dtype=np.int64)
 
-    for i in range(max_tof_index + 1):
+    for i in range(n_tof_indices):
         tof_indptr[i + 1] = tof_indptr[i] + tof_indcount[i]
 
-    tof_indcount = np.zeros((max_tof_index + 1), dtype=np.uint32)
+    tof_indcount = np.zeros((n_tof_indices), dtype=np.uint32)
 
     # get new values
     push_indices = np.zeros((len(tof_indices)), dtype=np.uint32)
     new_values = np.zeros_like(values)
 
-    chunks = build_chunks(max_tof_index + 1, 20)
+    chunks = build_chunks(n_tof_indices, 20)
 
     with nb.objmode:
         alphatims.utils.set_threads(20)

diff --git a/tests/unit_tests/test_data.py b/tests/unit_tests/test_data.py
@@ -8,13 +8,14 @@ def test_transpose():
     values = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
     tof_indices = np.array([0, 3, 2, 4, 1, 2, 4])
     push_ptr = np.array([0, 2, 4, 5, 7])
+    n_tof_indices = 7
 
     push_indices, tof_indptr, intensity_values = bruker.transpose(
-        tof_indices, push_ptr, values
+        tof_indices, push_ptr, n_tof_indices, values
     )
 
     _push_indices = np.array([0, 2, 1, 3, 0, 1, 3])
-    _tof_indptr = np.array([0, 1, 2, 4, 5, 7])
+    _tof_indptr = np.array([0, 1, 2, 4, 5, 7, 7, 7])
     _intensity_values = np.array([1.0, 5.0, 3.0, 6.0, 2.0, 4.0, 7.0])
 
     assert np.allclose(push_indices, _push_indices)