Skip to content

Commit

Permalink
MAINT: code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Vini2 committed Jun 14, 2024
1 parent 39e732e commit 796d23d
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 58 deletions.
11 changes: 10 additions & 1 deletion phables/workflow/scripts/phables_utils/genome_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# Class for genome path
class GenomePath:
def __init__(
self, id, bubble_case, node_order, node_order_human, node_id_order, path, coverage, length, gc
self,
id,
bubble_case,
node_order,
node_order_human,
node_id_order,
path,
coverage,
length,
gc,
):
self.id = id
self.bubble_case = bubble_case
Expand Down
88 changes: 63 additions & 25 deletions phables/workflow/scripts/phables_utils/long_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ def resolve_long(

# Case 2 - only one is circular
elif one_circular:

case_name = "case2_linear"

case2_found.add(my_count)
Expand Down Expand Up @@ -293,7 +292,9 @@ def resolve_long(
]

repeat_order = f"{repeat_unitig_name}:fwd," * repeat_count
path_with_repeats_human = f"{unitig_name}:fwd,{repeat_order[:-1]}"
path_with_repeats_human = (
f"{unitig_name}:fwd,{repeat_order[:-1]}"
)
node_id_order_with_repeats = [unitig_to_consider] + [
repeat_unitig for x in range(repeat_count)
]
Expand All @@ -318,7 +319,6 @@ def resolve_long(

# Case 3 components
elif len(candidate_nodes) > 2 and len(candidate_nodes) <= compcount:

case_name = "case3_circular"

# Create initial directed graph with coverage values
Expand Down Expand Up @@ -805,11 +805,17 @@ def resolve_long(

for c in path_order:
if c.endswith("+"):
path_node_order_human += f"{c[:-1]}:fwd,"
path_node_order_human += (
f"{c[:-1]}:fwd,"
)
else:
path_node_order_human += f"{c[:-1]}:rev,"

path_node_order_human = path_node_order_human[:-1]
path_node_order_human += (
f"{c[:-1]}:rev,"
)

path_node_order_human = path_node_order_human[
:-1
]

# Create GenomePath object with path details
genome_path = GenomePath(
Expand Down Expand Up @@ -871,9 +877,12 @@ def resolve_long(
logger.debug(f"Identified candidate sinks: {sink_candidates}")

if len(source_candidates) > 0 and len(sink_candidates) > 0:

source_node_indices = [unitig_names_rev[x[:-1]] for x in source_candidates]
sink_node_indices = [unitig_names_rev[x[:-1]] for x in sink_candidates]
source_node_indices = [
unitig_names_rev[x[:-1]] for x in source_candidates
]
sink_node_indices = [
unitig_names_rev[x[:-1]] for x in sink_candidates
]

# Create refined directed graph for flow network
# ----------------------------------------------------------------------
Expand Down Expand Up @@ -1000,7 +1009,9 @@ def resolve_long(
# Extend subpath using coverages of predecessors
for u_pred in G_edge.predecessors(u):
u_pred_name = unitig_names_rev[u_pred[:-1]]
u_pred_index = candidate_nodes.index(u_pred_name) + 1
u_pred_index = (
candidate_nodes.index(u_pred_name) + 1
)
u_pred_cov = unitig_coverages[u_pred[:-1]]
u_cov = unitig_coverages[u[:-1]]

Expand All @@ -1026,7 +1037,9 @@ def resolve_long(
# Extend subpath using coverages of successors
for v_succ in G_edge.successors(v):
v_succ_name = unitig_names_rev[v_succ[:-1]]
v_succ_index = candidate_nodes.index(v_succ_name) + 1
v_succ_index = (
candidate_nodes.index(v_succ_name) + 1
)
v_succ_cov = unitig_coverages[v_succ[:-1]]
v_cov = unitig_coverages[v[:-1]]

Expand Down Expand Up @@ -1070,7 +1083,9 @@ def resolve_long(

if subpath_coverage > covtol:
u_pred_name = unitig_names_rev[u_pred[:-1]]
u_pred_index = candidate_nodes.index(u_pred_name) + 1
u_pred_index = (
candidate_nodes.index(u_pred_name) + 1
)
if (
(v_index - 1) not in source_node_indices
and (u_index - 1) not in source_node_indices
Expand Down Expand Up @@ -1099,9 +1114,12 @@ def resolve_long(

if subpath_coverage > covtol:
v_succ_name = unitig_names_rev[v_succ[:-1]]
v_succ_index = candidate_nodes.index(v_succ_name) + 1
v_succ_index = (
candidate_nodes.index(v_succ_name) + 1
)
if (
(v_succ_index - 1) not in source_node_indices
(v_succ_index - 1)
not in source_node_indices
and (u_index - 1) not in source_node_indices
and (v_index - 1) not in source_node_indices
and (v_index - 1) not in sink_node_indices
Expand All @@ -1119,7 +1137,9 @@ def resolve_long(

# Add common start to source links
for source_v in source_candidates:
source_node_index = candidate_nodes.index(unitig_names_rev[source_v[:-1]]) + 1
source_node_index = (
candidate_nodes.index(unitig_names_rev[source_v[:-1]]) + 1
)
source_node_cov = unitig_coverages[source_v[:-1]]
cov_upper_bound = int(max_comp_cov * alpha)

Expand All @@ -1137,7 +1157,9 @@ def resolve_long(

# Add common sink to end links
for sink_v in sink_candidates:
sink_node_index = candidate_nodes.index(unitig_names_rev[sink_v[:-1]]) + 1
sink_node_index = (
candidate_nodes.index(unitig_names_rev[sink_v[:-1]]) + 1
)
sink_node_cov = unitig_coverages[sink_v[:-1]]
cov_upper_bound = int(max_comp_cov * alpha)

Expand All @@ -1150,10 +1172,12 @@ def resolve_long(
)
)

subpaths[subpath_count] = [sink_node_index, len(candidate_nodes) + 1]
subpaths[subpath_count] = [
sink_node_index,
len(candidate_nodes) + 1,
]
subpath_count += 1


logger.debug(f"edge_list_indices: {edge_list_indices}")
logger.debug(f"subpaths: {subpaths}")

Expand Down Expand Up @@ -1218,7 +1242,11 @@ def resolve_long(
# Get mapped unitigs in order from the flow network
path_order = []
for path_edge in candidate_paths[0]:
if not( path_edge == 0 or path_edge == len(candidate_nodes) + 1):
if not (
path_edge == 0
or path_edge
== len(candidate_nodes) + 1
):
path_order.append(
edge_list_indices[path_edge]
)
Expand Down Expand Up @@ -1267,11 +1295,17 @@ def resolve_long(

for c in path_order:
if c.endswith("+"):
path_node_order_human += f"{c[:-1]}:fwd,"
path_node_order_human += (
f"{c[:-1]}:fwd,"
)
else:
path_node_order_human += f"{c[:-1]}:rev,"

path_node_order_human = path_node_order_human[:-1]
path_node_order_human += (
f"{c[:-1]}:rev,"
)

path_node_order_human = (
path_node_order_human[:-1]
)

# Create GenomePath object with path details
genome_path = GenomePath(
Expand Down Expand Up @@ -1422,7 +1456,11 @@ def resolve_long(
and len(in_degree) > 0
and len(out_degree) > 0
):
coverage_frac = max(path_coverages) / min(path_coverages) if min(path_coverages) > 0 else 1
coverage_frac = (
max(path_coverages) / min(path_coverages)
if min(path_coverages) > 0
else 1
)

# Create GenomeComponent object with component details
genome_comp = GenomeComponent(
Expand Down
4 changes: 3 additions & 1 deletion phables/workflow/scripts/phables_utils/output_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ def write_res_genome_info(all_resolved_paths, output):
"""

with open(f"{output}/resolved_genome_info.txt", "w") as myfile:
myfile.write(f"Path\tCase\tCoverage\tLength\tGC content\tNode order (gfa link format)\tNode order (human readable)\n")
myfile.write(
f"Path\tCase\tCoverage\tLength\tGC content\tNode order (gfa link format)\tNode order (human readable)\n"
)
for genomic_path in all_resolved_paths:
myfile.write(
f"{genomic_path.id}\t{genomic_path.bubble_case}\t{genomic_path.coverage}\t{genomic_path.length}\t{genomic_path.gc}\t{genomic_path.node_order}\t{genomic_path.node_order_human}\n"
Expand Down
Loading

0 comments on commit 796d23d

Please sign in to comment.