Skip to content

Commit

Permalink
Add tests for dump_kmers()
Browse files Browse the repository at this point in the history
  • Loading branch information
Adamtaranto committed Sep 30, 2024
1 parent 631d0f8 commit 5087fdd
Showing 1 changed file with 144 additions and 2 deletions.
146 changes: 144 additions & 2 deletions src/python/tests/test_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@pytest.fixture
def kmer_count_table():
"""Fixture to set up a KmerCountTable instance with sample data."""
kct = KmerCountTable(ksize=4)
kct = KmerCountTable(ksize=4, store_kmers=True)
kct.count("AAAA") # 17832910516274425539
kct.count("TTTT") # 17832910516274425539
kct.count("AATT") # 382727017318141683
Expand All @@ -21,7 +21,7 @@ def kmer_count_table():
@pytest.fixture
def empty_kmer_count_table():
"""Fixture to set up an empty KmerCountTable instance."""
return KmerCountTable(ksize=4)
return KmerCountTable(ksize=4, store_kmers=True)


def test_dump_conflicting_sort_options(kmer_count_table):
Expand Down Expand Up @@ -165,3 +165,145 @@ def test_dump_hash_empty_table(empty_kmer_count_table):

# Cleanup
remove(temp_file_path)


# Tests for dump_kmers()


def test_dump_kmers_conflicting_sort_options(kmer_count_table):
"""Test that passing both sortcounts=True and sortkeys=True raises a ValueError."""
with pytest.raises(
ValueError, match="Cannot sort by both counts and kmers at the same time."
):
kmer_count_table.dump_kmers(file=None, sortcounts=True, sortkeys=True)


def test_dump_kmers_sortcounts_with_ties(kmer_count_table):
"""Test the dump_kmers function with sortcounts=True, ensuring it handles ties in counts."""
result = kmer_count_table.dump_kmers(file=None, sortcounts=True, sortkeys=False)
unsorted = kmer_count_table.dump_kmers(file=None, sortcounts=False, sortkeys=False)

assert result != unsorted, "If this fails choose a different test dataset"

# Expected output sorted by count, with secondary sorting by kmer for ties
expected = [
("AATT", 1),
(
"AAAA",
2,
), # 'AAAA'/'TTTT' is tied with 'GGGG / CCCC' on counts, 'AAAA' is lexicographically smaller
("CCCC", 2),
]

assert result == expected, f"Expected {expected}, but got {result}"


def test_dump_kmers_single_kmer():
"""Test the dump_kmers function with only a single k-mer counted."""
kct = KmerCountTable(ksize=4, store_kmers=True)
kct.count("AAAA") # Canonical kmer: 'AAAA'

result = kct.dump_kmers(file=None, sortcounts=True, sortkeys=False)

expected = [("AAAA", 1)]

assert result == expected, f"Expected {expected}, but got {result}"


def test_dump_kmers_write_to_file(kmer_count_table):
"""Test the dump_kmers function when writing to a file.
This test checks if the function correctly writes the kmer:count pairs to a file.
"""
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file_path = temp_file.name

kmer_count_table.dump_kmers(file=temp_file_path, sortcounts=True, sortkeys=False)

with open(temp_file_path, "r") as f:
lines = f.readlines()

# Expected output sorted by count then kmer (default behavior)
expected_lines = [
f"AATT\t1\n",
f"AAAA\t2\n", # 'AAAA'/'TTTT'
f"CCCC\t2\n",
]

assert lines == expected_lines, f"Expected {expected_lines}, but got {lines}"

# Cleanup
remove(temp_file_path)


def test_dump_kmers_write_to_file_sortkeys(kmer_count_table):
"""Test the dump_kmers function with sortkeys=True when writing to a file."""
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file_path = temp_file.name

kmer_count_table.dump_kmers(file=temp_file_path, sortkeys=True)

with open(temp_file_path, "r") as f:
lines = f.readlines()

# Expected output sorted by canonical kmers
expected_lines = [
f"AAAA\t2\n", # 'AAAA'/'TTTT'
f"AATT\t1\n",
f"CCCC\t2\n",
]

assert lines == expected_lines, f"Expected {expected_lines}, but got {lines}"

# Cleanup
remove(temp_file_path)


def test_dump_kmers_sortkeys(kmer_count_table):
"""Test the dump_kmers function with sortkeys=True.
This test verifies if the function sorts by canonical k-mers when `sortkeys` is set to True.
"""
result = kmer_count_table.dump_kmers(file=None, sortkeys=True)
unsorted = kmer_count_table.dump_kmers(file=None, sortcounts=False, sortkeys=False)

assert result != unsorted, "If this fails choose a different test dataset"

# Expected output sorted by canonical kmer
expected = [
("AAAA", 2), # 'AAAA'/'TTTT'
("AATT", 1),
("CCCC", 2),
]

assert result == expected, f"Expected {expected}, but got {result}"


def test_dump_kmers_invalid_file_path(kmer_count_table):
"""Test that passing an invalid file path raises an error."""
with pytest.raises(OSError):
kmer_count_table.dump_kmers(file="", sortkeys=True)


def test_dump_kmers_empty_table(empty_kmer_count_table):
"""Test the dump_kmers function on an empty KmerCountTable.
This test checks that the function handles an empty table correctly.
"""
# Test that calling dump_kmers without file returns an empty list
result = empty_kmer_count_table.dump_kmers(file=None, sortkeys=False)
assert result == [], "Expected an empty list from an empty KmerCountTable"

# Test that calling dump_kmers with a file writes nothing to the file
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file_path = temp_file.name

empty_kmer_count_table.dump_kmers(file=temp_file_path, sortkeys=False)

with open(temp_file_path, "r") as f:
lines = f.readlines()

assert lines == [], "Expected an empty file for an empty KmerCountTable"

# Cleanup
remove(temp_file_path)

0 comments on commit 5087fdd

Please sign in to comment.