-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Removing kmer records #28
Merged
Merged
Changes from 2 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
afac4f0
Add methods for removing records from count table.
Adamtaranto 0134c09
Ruff format
Adamtaranto ab33957
Don't check for correct kmer len in drop(). Let hash_kmer() do it.
Adamtaranto 838e1f8
Merge branch 'main' into dev_remove_kmers
Adamtaranto File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,112 @@ | ||
import oxli | ||
import pytest | ||
from test_basic import create_sample_kmer_table | ||
import oxli | ||
|
||
|
||
@pytest.fixture | ||
def setup_kmer_table(): | ||
"""Fixture to set up a KmerCountTable with ksize=4 and some initial k-mers""" | ||
kct = oxli.KmerCountTable(ksize=4) | ||
kct.count("AAAA") # Hash of canonical form will be used (AAAA) | ||
kct.count("CCCC") # CCCC | ||
kct.count("ATAT") # ATAT | ||
kct.count("GGGG") # Should map to CCCC | ||
kct.count("TTTT") # Should map to AAAA | ||
kct.count("CCCC") # Increment count for CCCC/GGGG | ||
# AAAA/TTTT = 2 | ||
# ATAT = 1 | ||
# CCCC/GGGG = 3 | ||
return kct | ||
|
||
|
||
def test_drop(setup_kmer_table): | ||
""" | ||
Test the drop method to remove a k-mer by its string representation. | ||
Edge case: Dropping a k-mer that doesn't exist. | ||
""" | ||
kct = setup_kmer_table | ||
|
||
# Drop "GGGG" which exists, and check it's removed | ||
kct.drop("GGGG") | ||
assert kct.get("GGGG") == 0, "Expected 'GGGG' to be removed." | ||
|
||
# Drop "AAAA", should remove both "AAAA" and "TTTT" (same canonical form) | ||
kct.drop("AAAA") | ||
assert kct.get("AAAA") == 0, "Expected 'AAAA' (and 'TTTT') to be removed." | ||
|
||
# Edge case: Drop a k-mer that doesn't exist, e.g., "GGGA" | ||
kct.drop("GGGA") # "GGGA" not present in the table | ||
assert kct.get("GGGA") == 0 # "GGGA" not present in the table | ||
|
||
# Raise error if kmer longer than table k len. | ||
with pytest.raises(ValueError): | ||
kct.drop("GGGAA") # "GGGAA" longer than table k | ||
|
||
|
||
def test_drop_hash(setup_kmer_table): | ||
""" | ||
Test the drop_hash method to remove a k-mer by its hash. | ||
Edge case: Dropping a hash that doesn't exist. | ||
""" | ||
kct = setup_kmer_table | ||
|
||
# Drop by the hash for "CCCC", and check it's removed | ||
hashval = kct.hash_kmer("CCCC") | ||
kct.drop_hash(hashval) | ||
assert kct.get_hash(hashval) == 0, "Expected 'CCCC' and 'GGGG' to be removed." | ||
assert kct.get("CCCC") == 0, "Expected 'CCCC' to be removed." | ||
assert kct.get("GGGG") == 0, "Expected 'GGGG' to be removed." | ||
|
||
# Edge case: Drop a hash that doesn't exist | ||
non_existent_hash = 999999999 | ||
kct.drop_hash(non_existent_hash) # Should not raise an error | ||
assert ( | ||
kct.get_hash(non_existent_hash) == 0 | ||
), "Expected non-existent hash removal to succeed." | ||
|
||
|
||
def test_mincut(setup_kmer_table): | ||
""" | ||
Test the mincut method to remove all k-mers with counts less than a given threshold. | ||
Edge cases: Threshold is higher than all counts, no k-mers to remove. | ||
""" | ||
kct = setup_kmer_table | ||
|
||
# Set a threshold that only removes k-mers with counts < 2 | ||
removed = kct.mincut(3) | ||
assert removed == 2, "Expected 2 k-mers to be removed ('ATAT' and 'AAAA/TTTT')." | ||
assert kct.get("GGGG") == 3, "Expected 'GGGG/CCCC' to remain." | ||
|
||
# Edge case: Threshold is higher than all k-mer counts (remove everything) | ||
removed = kct.mincut(10) | ||
assert removed == 1, "Expected all remaining k-mers to be removed ('GGGG/CCCC')." | ||
assert len(kct.hashes) == 0, "Expected no k-mers left after removing all." | ||
|
||
|
||
def test_drop(): | ||
'''Remove kmer by name.''' | ||
pass | ||
def test_maxcut(setup_kmer_table): | ||
""" | ||
Test the maxcut method to remove all k-mers with counts greater than a given threshold. | ||
Edge case: Threshold is lower than all counts, no k-mers to remove. | ||
""" | ||
kct = setup_kmer_table | ||
|
||
def test_drop_hash(): | ||
'''Remove record by hash.''' | ||
pass | ||
# Set a threshold that only removes k-mers with counts > 1 (GGGG) | ||
removed = kct.maxcut(2) | ||
assert removed == 1, "Expected 'CCCC/GGGG' to be removed." | ||
assert kct.get("GGGG") == 0, "Expected 'CCCC/GGGG' to be removed." | ||
assert ( | ||
kct.get("AAAA") == 2 | ||
), "Should not remove kmers with exact maxcut value, only greater." | ||
|
||
def test_mincut(): | ||
'''Remove all records with counts < threshold. ''' | ||
pass | ||
# Edge case: Threshold is higher than all k-mer counts (remove none) | ||
removed = kct.maxcut(10) | ||
assert removed == 0, "Expected no k-mers to be removed since all counts are < 10." | ||
assert ( | ||
len(kct.hashes) == 2 | ||
), "Expected 2 records with counts < 10 to remain in the table." | ||
|
||
def test_maxcut(): | ||
'''Remove all records with counts > threshold. ''' | ||
pass | ||
# Edge case: Threshold is lower than all k-mer counts (remove all) | ||
removed = kct.maxcut(0) | ||
assert removed == 2, "Expected no k-mers to be removed since all counts are > 0." | ||
assert ( | ||
len(kct.hashes) == 0 | ||
), "Expected 0 records with counts < 1 to remain in the table." |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can replace the if/else with just
which will forward the
Err
up the call stack. (I know I didn't do that elsewhere, not sure why ;)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense. Done.