From b625362fc3130214358425a038cbf4ff2f9783e0 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 13 Sep 2024 22:30:39 +1000 Subject: [PATCH 1/2] Add test placeholders --- src/python/tests/test_attr.py | 31 +++++++++++ src/python/tests/test_basic.py | 85 +------------------------------ src/python/tests/test_dunders.py | 25 +++++++++ src/python/tests/test_kmer_map.py | 8 +++ src/python/tests/test_output.py | 24 +++++++++ src/python/tests/test_remove.py | 19 +++++++ src/python/tests/test_setops.py | 68 +++++++++++++++++++++++++ 7 files changed, 176 insertions(+), 84 deletions(-) create mode 100644 src/python/tests/test_attr.py create mode 100644 src/python/tests/test_dunders.py create mode 100644 src/python/tests/test_kmer_map.py create mode 100644 src/python/tests/test_output.py create mode 100644 src/python/tests/test_remove.py create mode 100644 src/python/tests/test_setops.py diff --git a/src/python/tests/test_attr.py b/src/python/tests/test_attr.py new file mode 100644 index 0000000..1d539f5 --- /dev/null +++ b/src/python/tests/test_attr.py @@ -0,0 +1,31 @@ +import oxli +import pytest +from test_basic import create_sample_kmer_table + +# Test attributes + +def test_hashes_attribute(): + table = create_sample_kmer_table(3, ["AAA", "TTT", "AAC"]) + hashes = table.hashes + hash_aaa = table.hash_kmer("AAA") # 10679328328772601858 + hash_ttt = table.hash_kmer("TTT") # 10679328328772601858 + hash_aac = table.hash_kmer("AAC") # 6579496673972597301 + + expected_hashes = set( + [hash_aaa, hash_ttt, hash_aac] + ) # {10679328328772601858, 6579496673972597301} + assert ( + set(hashes) == expected_hashes + ), ".hashes attribute should match the expected set of hash keys" + + +def test_version_attr(): + '''Check version attribute matches current version.''' + pass + +def test_total_consumed_seq_len_attr(): + '''Should log total seq len consumed.''' + # Individual kmers + # Long seqs with multiple kmers + # Exclude invalid kmers? + pass \ No newline at end of file diff --git a/src/python/tests/test_basic.py b/src/python/tests/test_basic.py index ba44c33..97ed7eb 100644 --- a/src/python/tests/test_basic.py +++ b/src/python/tests/test_basic.py @@ -107,22 +107,6 @@ def test_consume_bad_DNA_ignore_is_default(): assert cg.get("CCGA") == 1 # rc -# Test attributes -def test_hashes_attribute(): - table = create_sample_kmer_table(3, ["AAA", "TTT", "AAC"]) - hashes = table.hashes - hash_aaa = table.hash_kmer("AAA") # 10679328328772601858 - hash_ttt = table.hash_kmer("TTT") # 10679328328772601858 - hash_aac = table.hash_kmer("AAC") # 6579496673972597301 - - expected_hashes = set( - [hash_aaa, hash_ttt, hash_aac] - ) # {10679328328772601858, 6579496673972597301} - assert ( - set(hashes) == expected_hashes - ), ".hashes attribute should match the expected set of hash keys" - - # Getting counts def test_count_vs_counthash(): # test a bug reported by adam taranto: count and get should work together! @@ -180,75 +164,8 @@ def test_get_hash_array(): ), "Hash array counts should match the counts of 'AAA' and 'AAC' and return zero for 'GGG'." assert rev_counts == [0, 1, 2], "Count should be in same order as input list" - def test_get_array(): """ Get vector of counts corresponding to vector of kmers. """ - # TODO: Add function to get list of counts given list of kmers. - pass - - -# Set operations -def test_union(): - table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) - table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) - - union_set = table1.union(table2) - expected_union = set(table1.hashes).union(table2.hashes) - - assert union_set == expected_union, "Union of hash sets should match" - - -def test_intersection(): - table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) - table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) - - intersection_set = table1.intersection(table2) - expected_intersection = set(table1.hashes).intersection(table2.hashes) - - assert ( - intersection_set == expected_intersection - ), "Intersection of hash sets should match" - - -def test_difference(): - table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) - table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) - - difference_set = table1.difference(table2) - expected_difference = set(table1.hashes).difference(table2.hashes) - - assert difference_set == expected_difference, "Difference of hash sets should match" - - -def test_symmetric_difference(): - table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) - table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) - - symmetric_difference_set = table1.symmetric_difference(table2) - expected_symmetric_difference = set(table1.hashes).symmetric_difference( - table2.hashes - ) - - assert ( - symmetric_difference_set == expected_symmetric_difference - ), "Symmetric difference of hash sets should match" - - -def test_dunder_methods(): - table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) - table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) - - assert table1.__or__(table2) == table1.union( - table2 - ), "__or__ method should match union()" - assert table1.__and__(table2) == table1.intersection( - table2 - ), "__and__ method should match intersection()" - assert table1.__sub__(table2) == table1.difference( - table2 - ), "__sub__ method should match difference()" - assert table1.__xor__(table2) == table1.symmetric_difference( - table2 - ), "__xor__ method should match symmetric_difference()" + pass \ No newline at end of file diff --git a/src/python/tests/test_dunders.py b/src/python/tests/test_dunders.py new file mode 100644 index 0000000..3290748 --- /dev/null +++ b/src/python/tests/test_dunders.py @@ -0,0 +1,25 @@ +import oxli +import pytest +from test_basic import create_sample_kmer_table + + +def test_len_dunder_method(): + '''__len__ should return number of keys in KmerCountTable.''' + pass + +def test_iter_dunder_method(): + '''KmerCountTable should be iterable, yield hash:count pairs''' + pass + +def test_next_dunder_method(): + '''Select next key in generator''' + pass + +def test_getitem_dunder_method(): + '''Query an object to using the indexing syntax (obj[key])''' + # Same behaviour as .get() + pass + +def test_setitem_dunder_method(): + '''Set values using the indexing syntax (obj[key] = value)''' + pass \ No newline at end of file diff --git a/src/python/tests/test_kmer_map.py b/src/python/tests/test_kmer_map.py new file mode 100644 index 0000000..f4bf376 --- /dev/null +++ b/src/python/tests/test_kmer_map.py @@ -0,0 +1,8 @@ +import oxli +import pytest +from test_basic import create_sample_kmer_table + + +def test_kmermap(): + '''Test option to add kmermap''' + pass \ No newline at end of file diff --git a/src/python/tests/test_output.py b/src/python/tests/test_output.py new file mode 100644 index 0000000..8394602 --- /dev/null +++ b/src/python/tests/test_output.py @@ -0,0 +1,24 @@ +import oxli +import pytest +from test_basic import create_sample_kmer_table + + +def test_serialise(): + '''Serialise object to JSON ''' + pass + +def test_deserialise(): + '''Load object from file.''' + pass + +def test_dump(): + '''Write tab delimited kmer:count pairs''' + pass + +def test_dump_hash(): + '''Write tab delimited hash_count pairs ''' + pass + +def test_histo(): + '''Write frequency counts.''' + pass diff --git a/src/python/tests/test_remove.py b/src/python/tests/test_remove.py new file mode 100644 index 0000000..b129063 --- /dev/null +++ b/src/python/tests/test_remove.py @@ -0,0 +1,19 @@ +import oxli +import pytest +from test_basic import create_sample_kmer_table + +def test_drop(): + '''Remove kmer by name.''' + pass + +def test_drop_hash(): + '''Remove record by hash.''' + pass + +def test_mincut(): + '''Remove all records with counts < threshold. ''' + pass + +def test_maxcut(): + '''Remove all records with counts > threshold. ''' + pass diff --git a/src/python/tests/test_setops.py b/src/python/tests/test_setops.py new file mode 100644 index 0000000..df71ee3 --- /dev/null +++ b/src/python/tests/test_setops.py @@ -0,0 +1,68 @@ +import oxli +import pytest + +from test_basic import create_sample_kmer_table + +# Set operations +def test_union(): + table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) + table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) + + union_set = table1.union(table2) + expected_union = set(table1.hashes).union(table2.hashes) + + assert union_set == expected_union, "Union of hash sets should match" + + +def test_intersection(): + table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) + table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) + + intersection_set = table1.intersection(table2) + expected_intersection = set(table1.hashes).intersection(table2.hashes) + + assert ( + intersection_set == expected_intersection + ), "Intersection of hash sets should match" + + +def test_difference(): + table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) + table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) + + difference_set = table1.difference(table2) + expected_difference = set(table1.hashes).difference(table2.hashes) + + assert difference_set == expected_difference, "Difference of hash sets should match" + + +def test_symmetric_difference(): + table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) + table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) + + symmetric_difference_set = table1.symmetric_difference(table2) + expected_symmetric_difference = set(table1.hashes).symmetric_difference( + table2.hashes + ) + + assert ( + symmetric_difference_set == expected_symmetric_difference + ), "Symmetric difference of hash sets should match" + + +def test_dunder_methods(): + table1 = create_sample_kmer_table(3, ["AAA", "AAC"]) + table2 = create_sample_kmer_table(3, ["AAC", "AAG"]) + + assert table1.__or__(table2) == table1.union( + table2 + ), "__or__ method should match union()" + assert table1.__and__(table2) == table1.intersection( + table2 + ), "__and__ method should match intersection()" + assert table1.__sub__(table2) == table1.difference( + table2 + ), "__sub__ method should match difference()" + assert table1.__xor__(table2) == table1.symmetric_difference( + table2 + ), "__xor__ method should match symmetric_difference()" From 42ffd71d0c317f30ea4f12539943115792bc35a6 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 13 Sep 2024 22:30:59 +1000 Subject: [PATCH 2/2] Add function placeholders --- src/lib.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 78c341e..0c3afba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,12 @@ impl KmerCountTable { } } + // TODO: Optionally store hash:kmer pair when counting a new kmer + // Modify KmerCountTable to optionally store map of hash:kmer + // Modify SeqToHashes to return canonical kmer & hash + + // TODO: Add function to get canonical kmer using hash key + fn hash_kmer(&self, kmer: String) -> Result { if kmer.len() as u8 != self.ksize { Err(anyhow!("wrong ksize")) @@ -92,6 +98,35 @@ impl KmerCountTable { hash_keys.iter().map(|&key| self.get_hash(key)).collect() } + // TODO: Add method "drop" + // remove kmer from table + + // TODO: Add method "drop_hash" + // remove hash from table + + // TODO: Add "mincut". Remove counts below a minimum cutoff. + + // TODO: Add "maxcut". Remove counts above an maximum cutoff. + + // TODO: Serialize the KmerCountTable instance to a JSON string. + + // TODO: Compress JSON string with gzip and save to file + + // TODO: Static method to load KmerCountTable from serialized JSON. Yield new object. + + // TODO: Add method "dump" + // Output tab delimited kmer:count pairs + // Default sort by count + // Option sort kmers lexicographically + + // TODO: Add method "dump_hash" + // Output tab delimited hash:count pairs + // Default sort by count + // Option sort on keys + + // TODO: Add method "histo" + // Output frequency counts + // Getter for the 'hashes' attribute, returning all hash keys in the table #[getter] pub fn hashes(&self) -> Vec { @@ -99,6 +134,12 @@ impl KmerCountTable { self.counts.keys().cloned().collect() } + // TODO: Getter for the version attribute + // Store oxli version when instance is created + + // TODO: Getter for the consumed seq len attribute + // Update tracker when DNA is processed with count() or consume() + // Consume this DNA string. Return number of k-mers consumed. #[pyo3(signature = (seq, allow_bad_kmers=true))] pub fn consume(&mut self, seq: String, allow_bad_kmers: bool) -> PyResult { @@ -179,6 +220,16 @@ impl KmerCountTable { fn __xor__(&self, other: &KmerCountTable) -> HashSet { self.symmetric_difference(other) } + + // Python dunder method for __iter__ + + // Python dunder method for __next__ + + // Python dunder method for __len__ + + // Python dunder method for __getitem__ + + // Python dunder method for __setitem__ } // Python module definition