Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed output error when extra_copy_number >= 10 #62

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions liftoff/align_features.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from multiprocessing import Pool
import math
from functools import partial
import numpy as np
from pyfaidx import Fasta, Faidx
import math
from multiprocessing import Pool
from os import path
import subprocess

import numpy as np
from pyfaidx import Faidx, Fasta
import pysam

from liftoff import aligned_seg, liftoff_utils
from os import path


def align_features_to_target(ref_chroms, target_chroms, args, feature_hierarchy, liftover_type, unmapped_features):
Expand Down Expand Up @@ -54,14 +56,33 @@ def align_single_chroms(ref_chroms, target_chroms, threads, args, genome_size, l
command = [minimap2_path, '-o', output_file, target_file, features_file] + args.mm2_options.split(" ") + [
"--split-prefix", split_prefix, '-t', threads_arg]
subprocess.run(command)
modified_sam_header(output_file)
else:
minimap2_index = build_minimap2_index(target_file, args, threads_arg, minimap2_path)
command = [minimap2_path, '-o', output_file, minimap2_index, features_file] + args.mm2_options.split(" ") + [
'-t', threads_arg]
subprocess.run(command)
modified_sam_header(output_file)
return output_file


def modified_sam_header(sam_file):
header_lines, alignment_lines = [], []
with open(sam_file, 'r') as open_sam:
for eachline in open_sam.readlines():
if eachline[0] == '@':
header_lines.append(eachline)
else:
alignment_lines.append(eachline)
modified_header_lines = []
for header_line in header_lines:
if header_line not in modified_header_lines:
modified_header_lines.append(header_line)
with open(sam_file, 'w') as open_sam:
open_sam.writelines(modified_header_lines)
open_sam.writelines(alignment_lines)


def get_features_file(ref_chroms, args, liftover_type, index):
if ref_chroms[index] == args.reference and (liftover_type == "chrm_by_chrm" or liftover_type == "copies"):
features_name = 'reference_all'
Expand Down
10 changes: 5 additions & 5 deletions liftoff/write_new_gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def add_to_copy_num_dict(parent, copy_num_dict):

def add_attributes(parent, copy_num, args):
parent.score = "."
parent.attributes["extra_copy_number"] = str(copy_num)
parent.attributes["extra_copy_number"] = [copy_num]
parent.attributes["copy_num_ID"] = [parent.id + "_" + str(copy_num)]
if float(parent.attributes["coverage"][0]) < args.a:
parent.attributes["partial_mapping"] = ["True"]
Expand All @@ -49,7 +49,7 @@ def build_parent_dict(child_features, parent_dict, final_parent):
parent_child_dict = {}
for child in child_features:
if child.id not in parent_dict:
child.attributes["extra_copy_number"] = final_parent.attributes["extra_copy_number"][0]
child.attributes["extra_copy_number"] = final_parent.attributes["extra_copy_number"]
if child.attributes["Parent"][0] in parent_child_dict:
parent_child_dict[child.attributes["Parent"][0]].append(child)
else:
Expand Down Expand Up @@ -84,7 +84,7 @@ def make_gff_line(feature):
if attr != "copy_id":
value_str = ""
for value in feature.attributes[attr]:
value_str += value + ","
value_str += str(value) + ","
attributes_str += (attr + "=" + value_str[:-1] + ";")
return feature.seqid + "\t" + feature.source + "\t" + feature.featuretype + "\t" + str(feature.start) + \
"\t" + str(feature.end) + "\t" + "." + "\t" + feature.strand + "\t" + "." + "\t" + attributes_str[:-1]
Expand All @@ -97,7 +97,7 @@ def make_gtf_line(feature):
if len(feature.attributes[attr]) >0:
value_str = ""
for value in feature.attributes[attr]:
value_str += value + ","
value_str += str(value) + ","
attributes_str += (attr + " " + '"' + value_str[:-1] + '"' + "; ")
return feature.seqid + "\t" + feature.source + "\t" + feature.featuretype + "\t" + str(feature.start) + \
"\t" + str(feature.end) + "\t" + "." + "\t" + feature.strand + "\t" + "." + "\t" + attributes_str
"\t" + str(feature.end) + "\t" + "." + "\t" + feature.strand + "\t" + "." + "\t" + attributes_str