sgkit-dev · jeromekelleher · Oct 25, 2024 · Oct 23, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/bio2zarr/vcf2zarr/icf.py b/bio2zarr/vcf2zarr/icf.py
@@ -512,7 +512,7 @@ def compute_laa_field(variant) -> np.ndarray:
     The LAA field is a list of one-based indices into the ALT alleles
     that indicates which alternate alleles are observed in the sample.
 
-    This method infers which alleles are observed from the GT, AD, and PL fields.
+    This method infers which alleles are observed from the GT field.
     """
     sample_count = variant.num_called + variant.num_unknown
     alt_allele_count = len(variant.ALT)
@@ -528,48 +528,6 @@ def compute_laa_field(variant) -> np.ndarray:
             np.bincount, axis=1, arr=genotypes, minlength=allele_count
         )
         allele_counts += genotype_allele_counts
-    if "AD" in variant.FORMAT:
-        depths = variant.format("AD")
-        depths.clip(0, None, out=depths)
-
-        def bincount_nonzero(arr, *, minlength):
-            # nonzero returns the indices of the nonzero elements for each axis
-            return np.bincount(arr.nonzero()[0], minlength=minlength)
-
-        depths_allele_counts = np.apply_along_axis(
-            bincount_nonzero, axis=1, arr=depths, minlength=allele_count
-        )
-        allele_counts += depths_allele_counts
-    if "PL" in variant.FORMAT:
-        likelihoods = variant.format("PL")
-        likelihoods.clip(0, None, out=likelihoods)
-        # n is the indices of the nonzero likelihoods
-        n = np.tile(np.arange(likelihoods.shape[1]), (likelihoods.shape[0], 1))
-        assert n.shape == likelihoods.shape
-        n[likelihoods <= 0] = 0
-        ploidy = variant.ploidy
-
-        if ploidy == 1:
-            a = n
-            b = np.zeros_like(a)
-        elif ploidy == 2:
-            # We have n = b(b+1) / 2 + a
-            # We need to compute a and b
-            b = np.ceil(np.sqrt(2 * n + 9 / 4) - 3 / 2).astype(int)
-            a = (n - b * (b + 1) / 2).astype(int)
-        else:
-            # TODO: Handle all possible ploidy
-            raise ValueError(f"Cannot handle ploidy = {ploidy}")
-
-        a_counts = np.apply_along_axis(
-            np.bincount, axis=1, arr=a, minlength=allele_count
-        )
-        b_counts = np.apply_along_axis(
-            np.bincount, axis=1, arr=b, minlength=allele_count
-        )
-        assert a_counts.shape == b_counts.shape == allele_counts.shape
-        allele_counts += a_counts
-        allele_counts += b_counts
 
     allele_counts[:, 0] = 0  # We don't count the reference allele
     max_row_length = 1
@@ -640,6 +598,7 @@ def compute_lpl_field(variant, laa_val: np.ndarray) -> np.ndarray:
         pl_val = np.broadcast_to(pl_val, n.shape)
     row_index = np.arange(pl_val.shape[0]).reshape(-1, 1)
     lpl_val = pl_val[row_index, n]
+    lpl_val[b == constants.INT_FILL] = constants.INT_FILL
 
     return lpl_val
 

diff --git a/tests/test_vcf_examples.py b/tests/test_vcf_examples.py
@@ -446,41 +446,41 @@ def ds(self, tmp_path_factory):
 
     def test_call_LAA(self, ds):
         call_LAA = [
-            [[1, -2, -2], [1, -2, -2]],
-            [[1, -2, -2], [1, -2, -2]],
-            [[1, 2, -2], [1, 2, -2]],
+            [[1, -2, -2], [-2, -2, -2]],
+            [[-2, -2, -2], [1, -2, -2]],
+            [[1, -2, -2], [2, -2, -2]],
             [[1, 2, 3], [2, 3, -2]],
-            [[1, -2, -2], [1, -2, -2]],
-            [[2, -2, -2], [1, -2, -2]],
+            [[-2, -2, -2], [-2, -2, -2]],
+            [[-2, -2, -2], [1, -2, -2]],
             [[1, -2, -2], [-1, -2, -2]],
             [[2, -2, -2], [2, -2, -2]],
-            [[1, -2, -2], [-2, -2, -2]],
             [[-2, -2, -2], [-2, -2, -2]],
-            [[1, -2, -2], [1, -2, -2]],
+            [[-2, -2, -2], [-2, -2, -2]],
+            [[-2, -2, -2], [1, -2, -2]],
         ]
         nt.assert_array_equal(ds.call_LAA.values, call_LAA)
 
     def test_call_LPL(self, ds):
         call_LPL = [
             [
                 [100, 0, 105, -2, -2, -2, -2, -2, -2, -2],
-                [0, 100, 200, -2, -2, -2, -2, -2, -2, -2],
+                [0, -2, -2, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
-                [0, 100, 200, -2, -2, -2, -2, -2, -2, -2],
+                [0, -2, -2, -2, -2, -2, -2, -2, -2, -2],
                 [154, 22, 0, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
-                [1002, 55, 1002, 0, 55, 1002, -2, -2, -2, -2],
-                [154, 154, 0, 154, 102, 102, -2, -2, -2, -2],
+                [1002, 55, 1002, -2, -2, -2, -2, -2, -2, -2],
+                [154, 154, 102, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
                 [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
                 [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             ],
             [
-                [30, 30, 30, -2, -2, -2, -2, -2, -2, -2],
-                [30, 60, 0, -2, -2, -2, -2, -2, -2, -2],
+                [30, -2, -2, -2, -2, -2, -2, -2, -2, -2],
+                [30, -2, -2, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
                 [0, 30, -2, -2, -2, -2, -2, -2, -2, -2],
@@ -495,12 +495,12 @@ def test_call_LPL(self, ds):
                 [10, 40, 60, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
-                [30, 30, 30, -2, -2, -2, -2, -2, -2, -2],
-                [30, -1, 0, -2, -2, -2, -2, -2, -2, -2],
+                [30, -2, -2, -2, -2, -2, -2, -2, -2, -2],
+                [30, -2, -2, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
-                [-1, -1, -1, -2, -2, -2, -2, -2, -2, -2],
-                [-1, -1, -1, -2, -2, -2, -2, -2, -2, -2],
+                [-1, -2, -2, -2, -2, -2, -2, -2, -2, -2],
+                [-1, -2, -2, -2, -2, -2, -2, -2, -2, -2],
             ],
             [
                 [-1, -1, -2, -2, -2, -2, -2, -2, -2, -2],
@@ -637,107 +637,12 @@ def test_call_AD(self, ds):
         nt.assert_array_equal(ds.call_AD.values, call_AD)
 
     def test_call_LAA(self, ds):
-        call_LAA = [
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, 2, -2, -2], [1, 2, -2, -2], [1, 2, -2, -2]],
-            [[1, 2, 3, -2], [1, 2, 3, -2], [1, 2, 3, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
-            [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
-            [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
-            [[1, 2, 3, -2], [1, 2, 3, -2], [1, 2, 3, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, 2, -2, -2], [1, 2, -2, -2], [1, 2, -2, -2]],
-            [[1, -2, -2, -2], [1, -2, -2, -2], [1, -2, -2, -2]],
-            [[1, 2, -2, -2], [1, 2, -2, -2], [1, 2, -2, -2]],
-            [[1, 2, 3, -2], [1, 2, 3, -2], [1, 2, 3, -2]],
-            [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
-            [[1, 2, 3, -2], [1, 2, 3, -2], [1, 2, 3, -2]],
-        ]
+        # All the genotypes are 0/0
+        call_LAA = np.full((23, 3, 1), -2)
         nt.assert_array_equal(ds.call_LAA.values, call_LAA)
 
     def test_call_LPL(self, ds):
-        # fmt: off
-        call_LPL = [
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2]],  # noqa: E501
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800]],  # noqa: E501
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800]],  # noqa: E501
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800]],  # noqa: E501
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2]],  # noqa: E501
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, -2, -2, -2, -2, -2, -2, -2, -2, -2]],
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2]],  # noqa: E501
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800],  # noqa: E501
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, 120, 1800, 1800, 1800, 1800]],  # noqa: E501
-            [[0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2],
-             [0, 120, 1800, 120, 1800, 1800, 120, 1800, 1800, 1800, -2, -2, -2, -2, -2]]
-        ]
-        # fmt: on
+        call_LPL = np.tile([0, -2, -2], (23, 3, 1))
         nt.assert_array_equal(ds.call_LPL.values, call_LPL)
 
     def test_call_PID(self, ds):