Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom fragment tile step #14

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions fragfold/create_fragment_msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def main(args):
fragment_name = Path(fragment_a3m).stem
fragment_start_range = (args.fragment_ntermres_start,args.fragment_ntermres_final)
fragment_length = args.fragment_length
fragment_tile_step = args.fragment_tile_step
protein_copies = args.protein_copies

protein_range = (args.protein_ntermres,args.protein_ctermres)
Expand All @@ -23,6 +24,7 @@ def main(args):
protein_range,
fragment_start_range,
fragment_length,
fragment_tile_step,
protein_copies)
else:
# Create heteromeric interaction MSAs: fragments of protein A + full-length protein B
Expand All @@ -37,6 +39,7 @@ def main(args):
protein_range,
fragment_start_range,
fragment_length,
fragment_tile_step,
protein_copies)
with open("a3m_list.txt","w") as file:
for path in msa_path_list:
Expand Down Expand Up @@ -71,6 +74,12 @@ def main(args):
help="The residue length of the fragments",
default=30
)
parser.add_argument(
"--fragment_tile_step",
type=int,
help="The tiling step size in aa when generating fragments",
default=1
)
parser.add_argument(
"--protein_a3m_input",
type=Path,
Expand Down
12 changes: 8 additions & 4 deletions fragfold/src/colabfold_create_msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def verifyProteinRange(protein_range,protein_n_res):
(protein_range[1] > protein_n_res):
raise ValueError(f"Provided protein residue range: ({protein_range[0]},{protein_range[1]}) is invalid")

def createIndividualMSAsFullLengthFragment(a3m_path,name,protein_range,fragment_start_range,fragment_length,protein_copies=1):
def createIndividualMSAsFullLengthFragment(a3m_path,name,protein_range,fragment_start_range,fragment_length,fragment_tile_step,protein_copies=1):
'''Loads a monomer MSA and creates new MSAs that contain 1) a large section of the monomer and 2) a short fragment of the monomer

Args
Expand All @@ -211,6 +211,8 @@ def createIndividualMSAsFullLengthFragment(a3m_path,name,protein_range,fragment_
an inclusive range defining the range of starting residues for fragments of length `fragment_length`
fragment_length : int
the number of residues to take when defining a fragment
fragment_tile_step : int
the number of residues to step when generating fragments
'''
print("Generating MSAs for a monomeric interaction...")

Expand All @@ -227,7 +229,7 @@ def createIndividualMSAsFullLengthFragment(a3m_path,name,protein_range,fragment_
except FileExistsError:
print('Directory already exists, possibly overwriting existing files')

fragment_start_iter = range(fragment_start_range[0],min(fragment_start_range[1]+1,protein_n_res-fragment_length+2))
fragment_start_iter = range(fragment_start_range[0],min(fragment_start_range[1]+1,protein_n_res-fragment_length+2),fragment_tile_step)
with Pool() as p:
a3m_out_path_list = p.starmap(createIndividualMSAsFullLengthFragment_starmap,[(a3m_path,fragment_start,fragment_length,dir_name,name,protein_copies,protein_range) for fragment_start in fragment_start_iter])

Expand All @@ -241,7 +243,7 @@ def createIndividualMSAsFullLengthFragment_starmap(a3m_path,fragment_start,fragm
createMSA(a3m_path, protein_range, fragment_range, -1, abs_a3m_out_path, protein_copies)
return abs_a3m_out_path

def createIndividualMSAsFullLengthFragmentHeteromeric(fulllength_a3m_path,fulllength_name,fragment_a3m_path,fragment_name,protein_range,fragment_start_range,fragment_length,protein_copies=1):
def createIndividualMSAsFullLengthFragmentHeteromeric(fulllength_a3m_path,fulllength_name,fragment_a3m_path,fragment_name,protein_range,fragment_start_range,fragment_length,fragment_tile_step,protein_copies=1):
'''Loads a monomer MSA and creates new MSAs that contain 1) a large section of the monomer and 2) a short fragment of the monomer

Args
Expand All @@ -258,6 +260,8 @@ def createIndividualMSAsFullLengthFragmentHeteromeric(fulllength_a3m_path,fullle
an inclusive range defining the range of starting residues for fragments of length `fragment_length`
fragment_length : int
the number of residues to take when defining a fragment
fragment_tile_step : int
the number of residues to step when generating fragments
'''
print("Generating MSAs for a heteromeric interaction...")

Expand All @@ -275,7 +279,7 @@ def createIndividualMSAsFullLengthFragmentHeteromeric(fulllength_a3m_path,fullle
except FileExistsError:
print('Directory already exists, possibly overwriting existing files')

fragment_start_iter = range(fragment_start_range[0],min(fragment_start_range[1]+1,fragmentprotein_n_res-fragment_length+2))
fragment_start_iter = range(fragment_start_range[0],min(fragment_start_range[1]+1,fragmentprotein_n_res-fragment_length+2),fragment_tile_step)
with Pool() as p:
a3m_out_path_list = p.starmap(createIndividualMSAsFullLengthFragmentHeteromeric_starmap,[(fulllength_a3m_path,fragment_a3m_path,fragment_start,fragment_length,dir_name,fulllength_name,fragment_name,protein_copies,protein_range) for fragment_start in fragment_start_iter])

Expand Down