oxli-bio · Adamtaranto · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/src/lib.rs b/src/lib.rs
@@ -26,6 +26,12 @@ impl KmerCountTable {
         }
     }
 
+    // TODO: Optionally store hash:kmer pair when counting a new kmer
+    // Modify KmerCountTable to optionally store map of hash:kmer
+    // Modify SeqToHashes to return canonical kmer & hash
+
+    // TODO: Add function to get canonical kmer using hash key
+
     fn hash_kmer(&self, kmer: String) -> Result<u64> {
         if kmer.len() as u8 != self.ksize {
             Err(anyhow!("wrong ksize"))
@@ -92,13 +98,48 @@ impl KmerCountTable {
         hash_keys.iter().map(|&key| self.get_hash(key)).collect()
     }
 
+    // TODO: Add method "drop"
+    // remove kmer from table
+
+    // TODO: Add method "drop_hash"
+    // remove hash from table
+
+    // TODO: Add "mincut". Remove counts below a minimum cutoff.
+
+    // TODO: Add "maxcut". Remove counts above an maximum cutoff.
+
+    // TODO: Serialize the KmerCountTable instance to a JSON string.
+
+    // TODO: Compress JSON string with gzip and save to file
+
+    // TODO: Static method to load KmerCountTable from serialized JSON. Yield new object.
+
+    // TODO: Add method "dump"
+    // Output tab delimited kmer:count pairs
+    // Default sort by count
+    // Option sort kmers lexicographically
+
+    // TODO: Add method "dump_hash"
+    // Output tab delimited hash:count pairs
+    // Default sort by count
+    // Option sort on keys
+
+    // TODO: Add method "histo"
+    // Output frequency counts
+
     // Getter for the 'hashes' attribute, returning all hash keys in the table
     #[getter]
     pub fn hashes(&self) -> Vec<u64> {
         // Collect and return all keys from the counts HashMap
         self.counts.keys().cloned().collect()
     }
 
+    // TODO: Getter for the version attribute
+    // Store oxli version when instance is created
+
+    // TODO: Getter for the consumed seq len attribute
+    // Update tracker when DNA is processed with count() or consume()
+
     // Consume this DNA string. Return number of k-mers consumed.
     #[pyo3(signature = (seq, allow_bad_kmers=true))]
     pub fn consume(&mut self, seq: String, allow_bad_kmers: bool) -> PyResult<u64> {
@@ -179,6 +220,16 @@ impl KmerCountTable {
     fn __xor__(&self, other: &KmerCountTable) -> HashSet<u64> {
         self.symmetric_difference(other)
     }
+
+    // Python dunder method for __iter__
+
+    // Python dunder method for __next__
+
+    // Python dunder method for __len__
+
+    // Python dunder method for __getitem__
+
+    // Python dunder method for __setitem__
 }
 
 // Python module definition

diff --git a/src/python/tests/test_attr.py b/src/python/tests/test_attr.py
@@ -0,0 +1,31 @@
+import oxli
+import pytest
+from test_basic import create_sample_kmer_table
+
+# Test attributes
+
+def test_hashes_attribute():
+    table = create_sample_kmer_table(3, ["AAA", "TTT", "AAC"])
+    hashes = table.hashes
+    hash_aaa = table.hash_kmer("AAA")  # 10679328328772601858
+    hash_ttt = table.hash_kmer("TTT")  # 10679328328772601858
+    hash_aac = table.hash_kmer("AAC")  # 6579496673972597301
+
+    expected_hashes = set(
+        [hash_aaa, hash_ttt, hash_aac]
+    )  # {10679328328772601858, 6579496673972597301}
+    assert (
+        set(hashes) == expected_hashes
+    ), ".hashes attribute should match the expected set of hash keys"
+
+
+def test_version_attr():
+    '''Check version attribute matches current version.'''
+    pass
+
+def test_total_consumed_seq_len_attr():
+    '''Should log total seq len consumed.'''
+    # Individual kmers
+    # Long seqs with multiple kmers
+    # Exclude invalid kmers?
+    pass
diff --git a/src/python/tests/test_basic.py b/src/python/tests/test_basic.py
@@ -107,22 +107,6 @@ def test_consume_bad_DNA_ignore_is_default():
     assert cg.get("CCGA") == 1  # rc
 
 
-# Test attributes
-def test_hashes_attribute():
-    table = create_sample_kmer_table(3, ["AAA", "TTT", "AAC"])
-    hashes = table.hashes
-    hash_aaa = table.hash_kmer("AAA")  # 10679328328772601858
-    hash_ttt = table.hash_kmer("TTT")  # 10679328328772601858
-    hash_aac = table.hash_kmer("AAC")  # 6579496673972597301
-
-    expected_hashes = set(
-        [hash_aaa, hash_ttt, hash_aac]
-    )  # {10679328328772601858, 6579496673972597301}
-    assert (
-        set(hashes) == expected_hashes
-    ), ".hashes attribute should match the expected set of hash keys"
-
-
 # Getting counts
 def test_count_vs_counthash():
     # test a bug reported by adam taranto: count and get should work together!
@@ -180,75 +164,8 @@ def test_get_hash_array():
     ), "Hash array counts should match the counts of 'AAA' and 'AAC' and return zero for 'GGG'."
     assert rev_counts == [0, 1, 2], "Count should be in same order as input list"
 
-
 def test_get_array():
     """
     Get vector of counts corresponding to vector of kmers.
     """
-    # TODO: Add function to get list of counts given list of kmers.
-    pass
-
-
-# Set operations
-def test_union():
-    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
-    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
-
-    union_set = table1.union(table2)
-    expected_union = set(table1.hashes).union(table2.hashes)
-
-    assert union_set == expected_union, "Union of hash sets should match"
-
-
-def test_intersection():
-    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
-    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
-
-    intersection_set = table1.intersection(table2)
-    expected_intersection = set(table1.hashes).intersection(table2.hashes)
-
-    assert (
-        intersection_set == expected_intersection
-    ), "Intersection of hash sets should match"
-
-
-def test_difference():
-    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
-    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
-
-    difference_set = table1.difference(table2)
-    expected_difference = set(table1.hashes).difference(table2.hashes)
-
-    assert difference_set == expected_difference, "Difference of hash sets should match"
-
-
-def test_symmetric_difference():
-    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
-    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
-
-    symmetric_difference_set = table1.symmetric_difference(table2)
-    expected_symmetric_difference = set(table1.hashes).symmetric_difference(
-        table2.hashes
-    )
-
-    assert (
-        symmetric_difference_set == expected_symmetric_difference
-    ), "Symmetric difference of hash sets should match"
-
-
-def test_dunder_methods():
-    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
-    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
-
-    assert table1.__or__(table2) == table1.union(
-        table2
-    ), "__or__ method should match union()"
-    assert table1.__and__(table2) == table1.intersection(
-        table2
-    ), "__and__ method should match intersection()"
-    assert table1.__sub__(table2) == table1.difference(
-        table2
-    ), "__sub__ method should match difference()"
-    assert table1.__xor__(table2) == table1.symmetric_difference(
-        table2
-    ), "__xor__ method should match symmetric_difference()"
+    pass
diff --git a/src/python/tests/test_dunders.py b/src/python/tests/test_dunders.py
@@ -0,0 +1,25 @@
+import oxli
+import pytest
+from test_basic import create_sample_kmer_table
+
+
+def test_len_dunder_method():
+    '''__len__ should return number of keys in KmerCountTable.'''
+    pass
+
+def test_iter_dunder_method():
+    '''KmerCountTable should be iterable, yield hash:count pairs'''
+    pass
+
+def test_next_dunder_method():
+    '''Select next key in generator'''
+    pass
+
+def test_getitem_dunder_method():
+    '''Query an object to using the indexing syntax (obj[key])'''
+    # Same behaviour as .get()
+    pass
+
+def test_setitem_dunder_method():
+    '''Set values using the indexing syntax (obj[key] = value)'''
+    pass
diff --git a/src/python/tests/test_kmer_map.py b/src/python/tests/test_kmer_map.py
@@ -0,0 +1,8 @@
+import oxli
+import pytest
+from test_basic import create_sample_kmer_table
+
+
+def test_kmermap():
+    '''Test option to add kmermap'''
+    pass
diff --git a/src/python/tests/test_output.py b/src/python/tests/test_output.py
@@ -0,0 +1,24 @@
+import oxli
+import pytest
+from test_basic import create_sample_kmer_table
+
+
+def test_serialise():
+    '''Serialise object to JSON '''
+    pass
+
+def test_deserialise():
+    '''Load object from file.'''
+    pass
+
+def test_dump():
+    '''Write tab delimited kmer:count pairs'''
+    pass
+
+def test_dump_hash():
+    '''Write tab delimited hash_count pairs '''
+    pass
+
+def test_histo():
+    '''Write frequency counts.'''
+    pass
diff --git a/src/python/tests/test_remove.py b/src/python/tests/test_remove.py
@@ -0,0 +1,19 @@
+import oxli
+import pytest
+from test_basic import create_sample_kmer_table
+
+def test_drop():
+    '''Remove kmer by name.'''
+    pass
+
+def test_drop_hash():
+    '''Remove record by hash.'''
+    pass
+
+def test_mincut():
+    '''Remove all records with counts < threshold. '''
+    pass
+
+def test_maxcut():
+    '''Remove all records with counts > threshold. '''
+    pass
diff --git a/src/python/tests/test_setops.py b/src/python/tests/test_setops.py
@@ -0,0 +1,68 @@
+import oxli
+import pytest
+
+from test_basic import create_sample_kmer_table
+
+# Set operations
+def test_union():
+    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
+    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
+
+    union_set = table1.union(table2)
+    expected_union = set(table1.hashes).union(table2.hashes)
+
+    assert union_set == expected_union, "Union of hash sets should match"
+
+
+def test_intersection():
+    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
+    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
+
+    intersection_set = table1.intersection(table2)
+    expected_intersection = set(table1.hashes).intersection(table2.hashes)
+
+    assert (
+        intersection_set == expected_intersection
+    ), "Intersection of hash sets should match"
+
+
+def test_difference():
+    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
+    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
+
+    difference_set = table1.difference(table2)
+    expected_difference = set(table1.hashes).difference(table2.hashes)
+
+    assert difference_set == expected_difference, "Difference of hash sets should match"
+
+
+def test_symmetric_difference():
+    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
+    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
+
+    symmetric_difference_set = table1.symmetric_difference(table2)
+    expected_symmetric_difference = set(table1.hashes).symmetric_difference(
+        table2.hashes
+    )
+
+    assert (
+        symmetric_difference_set == expected_symmetric_difference
+    ), "Symmetric difference of hash sets should match"
+
+
+def test_dunder_methods():
+    table1 = create_sample_kmer_table(3, ["AAA", "AAC"])
+    table2 = create_sample_kmer_table(3, ["AAC", "AAG"])
+
+    assert table1.__or__(table2) == table1.union(
+        table2
+    ), "__or__ method should match union()"
+    assert table1.__and__(table2) == table1.intersection(
+        table2
+    ), "__and__ method should match intersection()"
+    assert table1.__sub__(table2) == table1.difference(
+        table2
+    ), "__sub__ method should match difference()"
+    assert table1.__xor__(table2) == table1.symmetric_difference(
+        table2
+    ), "__xor__ method should match symmetric_difference()"