Skip to content

Commit

Permalink
fix: correct long inputs for antismash summary
Browse files Browse the repository at this point in the history
  • Loading branch information
matinnuhamunada committed Jul 12, 2023
1 parent 765d622 commit 144ad89
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
32 changes: 30 additions & 2 deletions workflow/bgcflow/bgcflow/data/make_genome_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,36 @@ def write_genome_table(input_json, samples_table, genome_table):
df_samples = pd.concat(dfList, axis=0)

# Handle multiple json
input_json = input_json.split()
bgc_counts = combine_bgc_counts(input_json)
input_json = Path(input_json)
logging.info(input_json)
if input_json.is_file() and input_json.suffix == ".json":
logging.info(f"Getting BGC overview from a single file: {input_json}")
input_json_files = input_json

elif input_json.is_file() and input_json.suffix == ".txt":
logging.info(f"Getting BGC overview from a text file: {input_json}")
with open(input_json, "r") as file:
file_content = [i.strip("\n") for i in file.readlines()]
if len(file_content) == 1:
# Paths space-separated on a single line
paths = file_content[0].split()
else:
# Paths written on separate lines
paths = file_content
input_json_files = [
Path(path) for path in paths if Path(path).suffix == ".json"
]
else:
input_json_files = [
Path(file)
for file in str(input_json).split()
if Path(file).suffix == ".json"
]
logging.info(
f"Getting BGC overview from the given list of {len(input_json_files)} files..."
)

bgc_counts = combine_bgc_counts(input_json_files)
bgc_counts = pd.DataFrame.from_dict(bgc_counts).T

logging.debug(f"Writing file to: {genome_table}")
Expand Down
7 changes: 6 additions & 1 deletion workflow/rules/bgc_analytics.smk
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,12 @@ rule antismash_summary:
df_samples=lambda wildcards: PEP_PROJECTS[wildcards.name].config["sample_table"],
shell:
"""
python workflow/bgcflow/bgcflow/data/make_genome_dataset.py '{input.bgc_count}' '{params.df_samples}' {output.df_antismash_summary} 2>> {log}
TMPDIR="data/interim/tmp/{wildcards.name}/{wildcards.version}"
mkdir -p $TMPDIR
INPUT_JSON="$TMPDIR/df_bgc_counts.txt"
echo '{input.bgc_count}' > $INPUT_JSON
python workflow/bgcflow/bgcflow/data/make_genome_dataset.py $INPUT_JSON '{params.df_samples}' {output.df_antismash_summary} 2>> {log}
rm $INPUT_JSON
"""

rule write_dependency_versions:
Expand Down

0 comments on commit 144ad89

Please sign in to comment.