Skip to content

Commit

Permalink
Merge pull request #686 from ARTbio/print_unique_counts
Browse files Browse the repository at this point in the history
repenrich2 prints unique counts
  • Loading branch information
drosofff authored Apr 21, 2024
2 parents 03183e2 + 0c14ce1 commit b1761de
Show file tree
Hide file tree
Showing 8 changed files with 302 additions and 1 deletion.
6 changes: 6 additions & 0 deletions tools/repenrich2/RepEnrich2.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ def run_bowtie(args):
sumofrepeatreads += int(line[4])
print(f"Identified {sumofrepeatreads} unique reads that mapped to repeats.")

# print unique mapper counts
with open("unique_mapper_counts.tsv", 'w') as fout:
fout.write("#element\tcount\n")
for count in sorted(counts):
fout.write(f"{count}\t{counts[count]}\n")

# multimapper parsing
if not paired_end:
args_list = [(metagenome, fastqfile_1) for
Expand Down
2 changes: 1 addition & 1 deletion tools/repenrich2/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">2.31.1</token>
<token name="@VERSION_SUFFIX@">4</token>
<token name="@VERSION_SUFFIX@">5</token>
<token name="@PROFILE@">23.0</token>

<xml name="repenrich_requirements">
Expand Down
3 changes: 3 additions & 0 deletions tools/repenrich2/repenrich2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
</inputs>

<outputs>
<data format="tabular" name="unique_mapper_counts" label="RepEnrich on ${on_string}: unique mapper counts" from_work_dir="unique_mapper_counts.tsv" />
<data format="tabular" name="class_fraction_counts" label="RepEnrich on ${on_string}: class fraction counts" from_work_dir="class_fraction_counts.tsv" />
<data format="tabular" name="family_fraction_counts" label="RepEnrich on ${on_string}: family fraction counts" from_work_dir="family_fraction_counts.tsv" />
<data format="tabular" name="fraction_counts" label="RepEnrich on ${on_string}: fraction counts" from_work_dir="fraction_counts.tsv" />
Expand All @@ -114,6 +115,7 @@
<param name="genomeSource" value="history"/>
<param name="genome" value="chrY-1-500k.fa" ftype="fasta"/>
<param name="repeatmasker" value="chrY-1-500k.fa.out" ftype="txt"/>
<output name="unique_mapper_counts" file="chrY_single_unique_mapper_counts.tab" ftype="tabular"/>
<output name="class_fraction_counts" file="chrY_single_class_fraction_counts.tab" ftype="tabular"/>
<output name="family_fraction_counts" file="chrY_single_family_fraction_counts.tab" ftype="tabular"/>
<output name="fraction_counts" file="chrY_single_fraction_counts.tab" ftype="tabular"/>
Expand All @@ -125,6 +127,7 @@
<param name="genomeSource" value="history"/>
<param name="genome" value="chrY-1-500k.fa" ftype="fasta"/>
<param name="repeatmasker" value="chrY-1-500k.fa.out" ftype="txt"/>
<output name="unique_mapper_counts" file="chrY_paired_unique_mapper_counts.tab" ftype="tabular"/>
<output name="class_fraction_counts" file="chrY_paired_class_fraction_counts.tab" ftype="tabular"/>
<output name="family_fraction_counts" file="chrY_paired_family_fraction_counts.tab" ftype="tabular"/>
<output name="fraction_counts" file="chrY_paired_fraction_counts.tab" ftype="tabular"/>
Expand Down
Binary file removed tools/repenrich2/test-data/Samp.fastq.gz
Binary file not shown.
Binary file removed tools/repenrich2/test-data/Samp_L.fastq.gz
Binary file not shown.
Binary file removed tools/repenrich2/test-data/Samp_R.fastq.gz
Binary file not shown.
146 changes: 146 additions & 0 deletions tools/repenrich2/test-data/chrY_paired_unique_mapper_counts.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#element count
A-rich 0
ACCORD2_I-int 0
ACCORD2_LTR 0
ACCORD_I-int 0
BARI1 0
BATUMI_LTR 0
BS 0
BS2 58
BURDOCK_I-int 0
Baggins1 0
Bica_I-int 38
Bica_LTR 0
CIRCE 0
Chouto_I-int 0
Copia1-I_DM 0
Copia_I-int 25181
Copia_LTR 647
DIVER2_I-int 0
DIVER2_LTR 0
DM1731_I-int 6
DM1731_LTR 0
DM176_I-int 0
DM412 0
DM412B_LTR 0
DMCR1A 4
DMLTR5 0
DMRT1A 0
DMRT1B 86
DMRT1C 0
DNAREP1_DM 0
DOC2_DM 0
DOC3_DM 0
FB4_DM 38
FROGGER_I-int 0
FW2_DM 0
G3_DM 0
G5A_DM 0
G5_DM 0
G6_DM 0
GA-rich 0
GTWIN_I-int 0
G_DM 0
Gypsy11_I-int 0
Gypsy11_LTR 0
Gypsy12_LTR 0
Gypsy2-I_DM 2
Gypsy2-LTR_DM 0
Gypsy3_LTR 0
Gypsy4_I-int 0
Gypsy5_I-int 0
Gypsy6A_LTR 0
Gypsy6_I-int 26
Gypsy8_I-int 0
Gypsy8_LTR 0
Gypsy9_I-int 0
Gypsy_I-int 32
Gypsy_LTR 1
HELENA_RT 0
HETA 24
HMSBEAGLE_I-int 2
IDEFIX_I-int 4
IDEFIX_LTR 0
Invader1_I-int 0
Invader1_LTR 0
Invader2_I-int 0
Invader4_I-int 0
Invader4_LTR 0
Invader5_I-int 0
Invader5_LTR 0
Invader6_I-int 0
Invader6_LTR 0
MAX_I-int 49
MAX_LTR 2
MDG1_I-int 0
MDG1_LTR 0
MDG3_I-int 152
MDG3_LTR 0
MICROPIA_I-int 0
MICROPIA_LTR 0
Mariner2_DM 0
NINJA_I-int 0
NOMAD_I-int 0
PROTOP_A 32
PROTOP_B 0
QUASIMODO2-I_DM 42
QUASIMODO2-LTR_DM 0
QUASIMODO_I-int 10
QUASIMODO_LTR 2
R1_DM 0
ROOA_I-int 0
ROOA_LTR 0
ROVER-I_DM 381
ROVER-LTR_DM 2
S2_DM 0
STALKER4_I-int 77
STALKER4_LTR 4
S_DM 48
Stalker2_I-int 80
Stalker2_LTR 2
TART-A 4
TART_B1 19
TC1-2_DM 0
TC1_DM 0
TLD2 0
TRANSIB1 0
TRANSIB2 30
ZAM_I-int 0
_AACACA_n 0
_AAT_n 0
_ACAATAG_n 0
_ACC_n 0
_AGAGAAG_n 0
_AGAGA_n 0
_ATAAT_n 0
_ATATATT_n 0
_ATATTAT_n 0
_ATTTTT_n 0
_ATT_n 0
_AT_n 0
_A_n 0
_CATA_n 0
_CTTTT_n 0
_GAGAA_n 0
_GCCTTT_n 0
_TAATAT_n 0
_TAATA_n 0
_TATAAAA_n 0
_TATAA_n 0
_TATCATG_n 0
_TA_n 0
_TGTTG_n 0
_TTATATA_n 0
_TTATAT_n 0
_TTATA_n 0
_TTA_n 0
_TTCTT_n 0
_TTC_n 0
_TTTAT_n 0
_TTTA_n 0
_TTTC_n 0
_TTTGA_n 0
_TTTTAG_n 0
_TTTTCTT_n 0
_TTTTC_n 0
_T_n 0
146 changes: 146 additions & 0 deletions tools/repenrich2/test-data/chrY_single_unique_mapper_counts.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#element count
A-rich 0
ACCORD2_I-int 0
ACCORD2_LTR 0
ACCORD_I-int 0
BARI1 0
BATUMI_LTR 0
BS 0
BS2 29
BURDOCK_I-int 0
Baggins1 0
Bica_I-int 19
Bica_LTR 0
CIRCE 0
Chouto_I-int 0
Copia1-I_DM 0
Copia_I-int 12652
Copia_LTR 134
DIVER2_I-int 1
DIVER2_LTR 0
DM1731_I-int 3
DM1731_LTR 0
DM176_I-int 0
DM412 0
DM412B_LTR 0
DMCR1A 2
DMLTR5 0
DMRT1A 0
DMRT1B 35
DMRT1C 0
DNAREP1_DM 0
DOC2_DM 0
DOC3_DM 0
FB4_DM 15
FROGGER_I-int 0
FW2_DM 0
G3_DM 0
G5A_DM 0
G5_DM 0
G6_DM 0
GA-rich 0
GTWIN_I-int 0
G_DM 0
Gypsy11_I-int 0
Gypsy11_LTR 0
Gypsy12_LTR 0
Gypsy2-I_DM 1
Gypsy2-LTR_DM 0
Gypsy3_LTR 0
Gypsy4_I-int 0
Gypsy5_I-int 0
Gypsy6A_LTR 0
Gypsy6_I-int 12
Gypsy8_I-int 0
Gypsy8_LTR 0
Gypsy9_I-int 0
Gypsy_I-int 15
Gypsy_LTR 0
HELENA_RT 0
HETA 12
HMSBEAGLE_I-int 1
IDEFIX_I-int 0
IDEFIX_LTR 0
Invader1_I-int 0
Invader1_LTR 0
Invader2_I-int 0
Invader4_I-int 0
Invader4_LTR 0
Invader5_I-int 0
Invader5_LTR 0
Invader6_I-int 0
Invader6_LTR 0
MAX_I-int 27
MAX_LTR 1
MDG1_I-int 0
MDG1_LTR 0
MDG3_I-int 70
MDG3_LTR 0
MICROPIA_I-int 0
MICROPIA_LTR 0
Mariner2_DM 0
NINJA_I-int 0
NOMAD_I-int 0
PROTOP_A 18
PROTOP_B 0
QUASIMODO2-I_DM 19
QUASIMODO2-LTR_DM 0
QUASIMODO_I-int 3
QUASIMODO_LTR 1
R1_DM 0
ROOA_I-int 0
ROOA_LTR 0
ROVER-I_DM 188
ROVER-LTR_DM 1
S2_DM 0
STALKER4_I-int 28
STALKER4_LTR 0
S_DM 25
Stalker2_I-int 32
Stalker2_LTR 2
TART-A 2
TART_B1 10
TC1-2_DM 0
TC1_DM 0
TLD2 0
TRANSIB1 0
TRANSIB2 12
ZAM_I-int 0
_AACACA_n 0
_AAT_n 0
_ACAATAG_n 0
_ACC_n 0
_AGAGAAG_n 0
_AGAGA_n 0
_ATAAT_n 0
_ATATATT_n 0
_ATATTAT_n 0
_ATTTTT_n 0
_ATT_n 0
_AT_n 0
_A_n 0
_CATA_n 0
_CTTTT_n 0
_GAGAA_n 0
_GCCTTT_n 0
_TAATAT_n 0
_TAATA_n 0
_TATAAAA_n 0
_TATAA_n 0
_TATCATG_n 0
_TA_n 0
_TGTTG_n 0
_TTATATA_n 0
_TTATAT_n 0
_TTATA_n 0
_TTA_n 0
_TTCTT_n 0
_TTC_n 0
_TTTAT_n 0
_TTTA_n 0
_TTTC_n 0
_TTTGA_n 0
_TTTTAG_n 0
_TTTTCTT_n 0
_TTTTC_n 0
_T_n 0

0 comments on commit b1761de

Please sign in to comment.