Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: seqcoll.to_fasta supports block_size #1822

Merged
merged 6 commits into from
Apr 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions src/cogent3/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,9 +984,20 @@ def to_json(self):
"""returns json formatted string"""
return json.dumps(self.to_rich_dict())

def to_fasta(self):
"""Return alignment in Fasta format"""
return alignment_to_fasta(self.to_dict())
def to_fasta(self, block_size: int = 60) -> str:
"""Return alignment in Fasta format.

Parameters
----------
block_size
the sequence length to write to each line,
by default 60

Returns
-------
The Fasta formatted alignment.
"""
return alignment_to_fasta(self.to_dict(), block_size=block_size)

def to_nexus(self, seq_type, wrap=50):
"""
Expand Down Expand Up @@ -1192,10 +1203,15 @@ def get_seq(self, seqname):
"""Return a sequence object for the specified seqname."""
return self.named_seqs[seqname]

def to_dict(self):
"""Returns the alignment as dict of names -> strings.
def to_dict(self) -> dict[str, str]:
"""Returns the alignment as a dict of sequence names -> strings.

Note the mapping goes to strings, not Sequence objects.

Note: returns strings, NOT Sequence objects.
Returns
-------
a dict mapping sequence names to a string representation of
their sequences.
"""
align_dict = {}

Expand Down
45 changes: 38 additions & 7 deletions src/cogent3/format/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,45 @@
"""Writer for FASTA sequence format
"""

from typing import Optional

from cogent3.format.util import _AlignmentFormatter


def alignment_to_fasta(alignment_dict, block_size=60, order=None):
"""Returns a Fasta string given an alignment."""
def alignment_to_fasta(
alignment_dict: dict[str, str],
block_size: int = 60,
order: Optional[list[str]] = None,
) -> str:
"""Returns a Fasta string given an alignment.

Parameters
----------
alignment_dict
dict of seq_name
block_size
the sequence length to write to each line,
by default 60
order
optional list of sequence names, which order to print in.
Assumes complete and correct list of names,
by default None

Returns
-------
The alignment in the Fasta format.
"""
order = order or []
return FastaFormatter().format(alignment_dict, block_size, order)


class FastaFormatter(_AlignmentFormatter):
def format(self, alignment_dict, block_size, order):
def format(
self,
alignment_dict: dict[str, str],
block_size: int = 60,
order: Optional[list[str]] = None,
) -> str:
"""Format the alignment to Fasta.

Parameters
Expand All @@ -21,12 +49,15 @@ def format(self, alignment_dict, block_size, order):
dict of seq_name
block_size
the sequence length to write to each line,
default is 60
by default 60
order
optional list of sequence names, which order to
print in.
optional list of sequence names, which order to print in.
Assumes complete and correct list of names,
by default None

(Assumes complete and correct list of names)
Returns
-------
The alignment in the Fasta format.
"""
# setup
if not order:
Expand Down
14 changes: 12 additions & 2 deletions tests/test_core/test_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,18 @@ def test_to_phylip(self):

def test_to_fasta(self):
"""SequenceCollection should return correct FASTA string"""
aln = self.Class(["AAA", "CCC"])
self.assertEqual(aln.to_fasta(), ">seq_0\nAAA\n>seq_1\nCCC\n")
aln1 = self.Class(["AAA", "CCC"])
self.assertEqual(aln1.to_fasta(), ">seq_0\nAAA\n>seq_1\nCCC\n")
self.assertEqual(aln1.to_fasta(block_size=2), ">seq_0\nAA\nA\n>seq_1\nCC\nC\n")

aln2 = self.Class(["GCATGCAT", "TCAGACGT"])
self.assertEqual(aln2.to_fasta(), ">seq_0\nGCATGCAT\n>seq_1\nTCAGACGT\n")
self.assertEqual(
aln2.to_fasta(block_size=4), ">seq_0\nGCAT\nGCAT\n>seq_1\nTCAG\nACGT\n"
)
self.assertEqual(
aln2.to_fasta(block_size=3), ">seq_0\nGCA\nTGC\nAT\n>seq_1\nTCA\nGAC\nGT\n"
)

def test_to_nexus(self):
"""SequenceCollection should return correct Nexus string format"""
Expand Down
Loading