Skip to content

Commit

Permalink
Merge pull request #687 from ARTbio/fix_repenrich2_bammanip
Browse files Browse the repository at this point in the history
Fix repenrich2 bam manipulation
  • Loading branch information
drosofff authored Apr 22, 2024
2 parents b1761de + d15947d commit a9af128
Show file tree
Hide file tree
Showing 9 changed files with 338 additions and 338 deletions.
6 changes: 3 additions & 3 deletions tools/repenrich2/RepEnrich2.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,14 @@ def run_bowtie(args):
# print class-, family- and fraction-repeats counts to files
with open("class_fraction_counts.tsv", 'w') as fout:
for key in sorted(classfractionalcounts):
fout.write(f"{key}\t{classfractionalcounts[key]}\n")
fout.write(f"{key}\t{round(classfractionalcounts[key], 2)}\n")

with open("family_fraction_counts.tsv", 'w') as fout:
for key in sorted(familyfractionalcounts):
fout.write(f"{key}\t{familyfractionalcounts[key]}\n")
fout.write(f"{key}\t{round(familyfractionalcounts[key], 2)}\n")

with open("fraction_counts.tsv", 'w') as fout:
for key in sorted(fractionalcounts):
fout.write(f"{key}\t{repeat_ref[key]['class']}\t"
f"{repeat_ref[key]['family']}\t"
f"{fractionalcounts[key]}\n")
f"{round(fractionalcounts[key], 2)}\n")
2 changes: 1 addition & 1 deletion tools/repenrich2/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">2.31.1</token>
<token name="@VERSION_SUFFIX@">5</token>
<token name="@VERSION_SUFFIX@">6</token>
<token name="@PROFILE@">23.0</token>

<xml name="repenrich_requirements">
Expand Down
12 changes: 6 additions & 6 deletions tools/repenrich2/repenrich2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@
bowtie2 -x $index_path -p \${GALAXY_SLOTS:-4} input.fastq
| samtools sort -@ "\${GALAXY_SLOTS:-4}" -T tmp -O bam -o aligned.bam 2>&1 &&
samtools view -@ "\${GALAXY_SLOTS:-4}" -F 4 -b -q 38 aligned.bam -o unique.bam &&
samtools view -@ "\${GALAXY_SLOTS:-4}" -h -F 4 -b aligned.bam \
| samtools view -@ "\${GALAXY_SLOTS:-4}" -U -b -q 38 - \
| bedtools bamtofastq -i /dev/stdin -fq multimap.fastq &&
samtools view -@ "\${GALAXY_SLOTS:-4}" -F 4 -b aligned.bam
| samtools view -@ "\${GALAXY_SLOTS:-4}" -U filt_aligned.bam -bq 38 - > /dev/null &&
bedtools bamtofastq -i filt_aligned.bam -fq multimap.fastq &&
#else:
bowtie2 -x $index_path -p \${GALAXY_SLOTS:-4} -1 input.fastq -2 input_2.fastq
| samtools sort -@ "\${GALAXY_SLOTS:-4}" -T tmp -O bam -o aligned.bam 2>&1 &&
samtools view -@ "\${GALAXY_SLOTS:-4}" -f 3 -b -q 38 aligned.bam -o unique.bam &&
samtools view -@ "\${GALAXY_SLOTS:-4}" -f 3 -b aligned.bam \
| samtools view -@ "\${GALAXY_SLOTS:-4}" -U -b -q 38 - \
| samtools sort -@ "\${GALAXY_SLOTS:-4}" -n - -
samtools view -@ "\${GALAXY_SLOTS:-4}" -f 3 -b aligned.bam
| samtools view -@ "\${GALAXY_SLOTS:-4}" -U filt_aligned.bam -bq 38 - > /dev/null &&
samtools sort -@ "\${GALAXY_SLOTS:-4}" -T tmp -n filt_aligned.bam
| bedtools bamtofastq -i /dev/stdin -fq multimap_1.fastq -fq2 multimap_2.fastq &&
#end if
samtools index unique.bam &&
Expand Down
12 changes: 6 additions & 6 deletions tools/repenrich2/test-data/chrY_paired_class_fraction_counts.tab
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
DNA 148.0
LINE 195.0
LTR 26742.0
Low_complexity 0.0
RC 0.0
Simple_repeat 0.0
DNA 194.66
LINE 279.83
LTR 27051.62
Low_complexity 8.48
RC 4.24
Simple_repeat 161.17
26 changes: 13 additions & 13 deletions tools/repenrich2/test-data/chrY_paired_family_fraction_counts.tab
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
CMC-Transib 30.0
CR1 4.0
Copia 25834.0
Gypsy 857.0
Helitron 0.0
Jockey 105.0
LOA 0.0
Low_complexity 0.0
P 32.0
Pao 51.0
R1 86.0
Simple_repeat 0.0
TcMar-Tc1 86.0
CMC-Transib 38.48
CR1 8.24
Copia 25859.45
Gypsy 1107.24
Helitron 4.24
Jockey 164.38
LOA 4.24
Low_complexity 8.48
P 40.48
Pao 84.93
R1 102.97
Simple_repeat 161.17
TcMar-Tc1 115.69
290 changes: 145 additions & 145 deletions tools/repenrich2/test-data/chrY_paired_fraction_counts.tab
Original file line number Diff line number Diff line change
@@ -1,145 +1,145 @@
A-rich Low_complexity Low_complexity 0.0
ACCORD2_I-int LTR Gypsy 0.0
ACCORD2_LTR LTR Gypsy 0.0
ACCORD_I-int LTR Gypsy 0.0
BARI1 DNA TcMar-Tc1 0.0
BATUMI_LTR LTR Pao 0.0
BS LINE Jockey 0.0
BS2 LINE Jockey 58.0
BURDOCK_I-int LTR Gypsy 0.0
Baggins1 LINE LOA 0.0
Bica_I-int LTR Gypsy 38.0
Bica_LTR LTR Gypsy 0.0
CIRCE LTR Gypsy 0.0
Chouto_I-int LTR Gypsy 0.0
Copia1-I_DM LTR Copia 0.0
Copia_I-int LTR Copia 25181.0
Copia_LTR LTR Copia 647.0
DIVER2_I-int LTR Pao 0.0
DIVER2_LTR LTR Pao 0.0
DM1731_I-int LTR Copia 6.0
DM1731_LTR LTR Copia 0.0
DM176_I-int LTR Gypsy 0.0
DM412 LTR Gypsy 0.0
DM412B_LTR LTR Gypsy 0.0
DMCR1A LINE CR1 4.0
DMLTR5 LTR Gypsy 0.0
DMRT1A LINE R1 0.0
DMRT1B LINE R1 86.0
DMRT1C LINE R1 0.0
DNAREP1_DM RC Helitron 0.0
DOC2_DM LINE Jockey 0.0
DOC3_DM LINE Jockey 0.0
FB4_DM DNA TcMar-Tc1 38.0
FROGGER_I-int LTR Copia 0.0
FW2_DM LINE Jockey 0.0
G3_DM LINE Jockey 0.0
G5A_DM LINE Jockey 0.0
G5_DM LINE Jockey 0.0
G6_DM LINE Jockey 0.0
GA-rich Low_complexity Low_complexity 0.0
GTWIN_I-int LTR Gypsy 0.0
G_DM LINE Jockey 0.0
Gypsy11_I-int LTR Gypsy 0.0
Gypsy11_LTR LTR Gypsy 0.0
Gypsy12_LTR LTR Gypsy 0.0
Gypsy2-I_DM LTR Gypsy 2.0
Gypsy2-LTR_DM LTR Gypsy 0.0
Gypsy3_LTR LTR Gypsy 0.0
Gypsy4_I-int LTR Gypsy 0.0
Gypsy5_I-int LTR Gypsy 0.0
Gypsy6A_LTR LTR Gypsy 0.0
Gypsy6_I-int LTR Gypsy 26.0
Gypsy8_I-int LTR Gypsy 0.0
Gypsy8_LTR LTR Gypsy 0.0
Gypsy9_I-int LTR Gypsy 0.0
Gypsy_I-int LTR Gypsy 32.0
Gypsy_LTR LTR Gypsy 1.0
HELENA_RT LINE Jockey 0.0
HETA LINE Jockey 24.0
HMSBEAGLE_I-int LTR Gypsy 2.0
IDEFIX_I-int LTR Gypsy 4.0
IDEFIX_LTR LTR Gypsy 0.0
Invader1_I-int LTR Gypsy 0.0
Invader1_LTR LTR Gypsy 0.0
Invader2_I-int LTR Gypsy 0.0
Invader4_I-int LTR Gypsy 0.0
Invader4_LTR LTR Gypsy 0.0
Invader5_I-int LTR Gypsy 0.0
Invader5_LTR LTR Gypsy 0.0
Invader6_I-int LTR Gypsy 0.0
Invader6_LTR LTR Gypsy 0.0
MAX_I-int LTR Pao 49.0
MAX_LTR LTR Pao 2.0
MDG1_I-int LTR Gypsy 0.0
MDG1_LTR LTR Gypsy 0.0
MDG3_I-int LTR Gypsy 152.0
MDG3_LTR LTR Gypsy 0.0
MICROPIA_I-int LTR Gypsy 0.0
MICROPIA_LTR LTR Gypsy 0.0
Mariner2_DM DNA TcMar-Tc1 0.0
NINJA_I-int LTR Pao 0.0
NOMAD_I-int LTR Gypsy 0.0
PROTOP_A DNA P 32.0
PROTOP_B DNA P 0.0
QUASIMODO2-I_DM LTR Gypsy 42.0
QUASIMODO2-LTR_DM LTR Gypsy 0.0
QUASIMODO_I-int LTR Gypsy 10.0
QUASIMODO_LTR LTR Gypsy 2.0
R1_DM LINE R1 0.0
ROOA_I-int LTR Pao 0.0
ROOA_LTR LTR Pao 0.0
ROVER-I_DM LTR Gypsy 381.0
ROVER-LTR_DM LTR Gypsy 2.0
S2_DM DNA TcMar-Tc1 0.0
STALKER4_I-int LTR Gypsy 77.0
STALKER4_LTR LTR Gypsy 4.0
S_DM DNA TcMar-Tc1 48.0
Stalker2_I-int LTR Gypsy 80.0
Stalker2_LTR LTR Gypsy 2.0
TART-A LINE Jockey 4.0
TART_B1 LINE Jockey 19.0
TC1-2_DM DNA TcMar-Tc1 0.0
TC1_DM DNA TcMar-Tc1 0.0
TLD2 LTR Gypsy 0.0
TRANSIB1 DNA CMC-Transib 0.0
TRANSIB2 DNA CMC-Transib 30.0
ZAM_I-int LTR Gypsy 0.0
_AACACA_n Simple_repeat Simple_repeat 0.0
_AAT_n Simple_repeat Simple_repeat 0.0
_ACAATAG_n Simple_repeat Simple_repeat 0.0
_ACC_n Simple_repeat Simple_repeat 0.0
_AGAGAAG_n Simple_repeat Simple_repeat 0.0
_AGAGA_n Simple_repeat Simple_repeat 0.0
_ATAAT_n Simple_repeat Simple_repeat 0.0
_ATATATT_n Simple_repeat Simple_repeat 0.0
_ATATTAT_n Simple_repeat Simple_repeat 0.0
_ATTTTT_n Simple_repeat Simple_repeat 0.0
_ATT_n Simple_repeat Simple_repeat 0.0
_AT_n Simple_repeat Simple_repeat 0.0
_A_n Simple_repeat Simple_repeat 0.0
_CATA_n Simple_repeat Simple_repeat 0.0
_CTTTT_n Simple_repeat Simple_repeat 0.0
_GAGAA_n Simple_repeat Simple_repeat 0.0
_GCCTTT_n Simple_repeat Simple_repeat 0.0
_TAATAT_n Simple_repeat Simple_repeat 0.0
_TAATA_n Simple_repeat Simple_repeat 0.0
_TATAAAA_n Simple_repeat Simple_repeat 0.0
_TATAA_n Simple_repeat Simple_repeat 0.0
_TATCATG_n Simple_repeat Simple_repeat 0.0
_TA_n Simple_repeat Simple_repeat 0.0
_TGTTG_n Simple_repeat Simple_repeat 0.0
_TTATATA_n Simple_repeat Simple_repeat 0.0
_TTATAT_n Simple_repeat Simple_repeat 0.0
_TTATA_n Simple_repeat Simple_repeat 0.0
_TTA_n Simple_repeat Simple_repeat 0.0
_TTCTT_n Simple_repeat Simple_repeat 0.0
_TTC_n Simple_repeat Simple_repeat 0.0
_TTTAT_n Simple_repeat Simple_repeat 0.0
_TTTA_n Simple_repeat Simple_repeat 0.0
_TTTC_n Simple_repeat Simple_repeat 0.0
_TTTGA_n Simple_repeat Simple_repeat 0.0
_TTTTAG_n Simple_repeat Simple_repeat 0.0
_TTTTCTT_n Simple_repeat Simple_repeat 0.0
_TTTTC_n Simple_repeat Simple_repeat 0.0
_T_n Simple_repeat Simple_repeat 0.0
A-rich Low_complexity Low_complexity 4.24
ACCORD2_I-int LTR Gypsy 4.24
ACCORD2_LTR LTR Gypsy 4.24
ACCORD_I-int LTR Gypsy 4.24
BARI1 DNA TcMar-Tc1 4.24
BATUMI_LTR LTR Pao 4.24
BS LINE Jockey 4.24
BS2 LINE Jockey 62.24
BURDOCK_I-int LTR Gypsy 4.24
Baggins1 LINE LOA 4.24
Bica_I-int LTR Gypsy 42.24
Bica_LTR LTR Gypsy 4.24
CIRCE LTR Gypsy 4.24
Chouto_I-int LTR Gypsy 4.24
Copia1-I_DM LTR Copia 4.24
Copia_I-int LTR Copia 25185.24
Copia_LTR LTR Copia 651.24
DIVER2_I-int LTR Pao 4.24
DIVER2_LTR LTR Pao 4.24
DM1731_I-int LTR Copia 10.24
DM1731_LTR LTR Copia 4.24
DM176_I-int LTR Gypsy 4.24
DM412 LTR Gypsy 4.24
DM412B_LTR LTR Gypsy 4.24
DMCR1A LINE CR1 8.24
DMLTR5 LTR Gypsy 4.24
DMRT1A LINE R1 4.24
DMRT1B LINE R1 90.24
DMRT1C LINE R1 4.24
DNAREP1_DM RC Helitron 4.24
DOC2_DM LINE Jockey 4.24
DOC3_DM LINE Jockey 4.24
FB4_DM DNA TcMar-Tc1 42.24
FROGGER_I-int LTR Copia 4.24
FW2_DM LINE Jockey 4.24
G3_DM LINE Jockey 4.24
G5A_DM LINE Jockey 4.24
G5_DM LINE Jockey 4.24
G6_DM LINE Jockey 4.24
GA-rich Low_complexity Low_complexity 4.24
GTWIN_I-int LTR Gypsy 4.24
G_DM LINE Jockey 4.24
Gypsy11_I-int LTR Gypsy 4.24
Gypsy11_LTR LTR Gypsy 4.24
Gypsy12_LTR LTR Gypsy 4.24
Gypsy2-I_DM LTR Gypsy 6.24
Gypsy2-LTR_DM LTR Gypsy 4.24
Gypsy3_LTR LTR Gypsy 4.24
Gypsy4_I-int LTR Gypsy 4.24
Gypsy5_I-int LTR Gypsy 4.24
Gypsy6A_LTR LTR Gypsy 4.24
Gypsy6_I-int LTR Gypsy 30.24
Gypsy8_I-int LTR Gypsy 4.24
Gypsy8_LTR LTR Gypsy 4.24
Gypsy9_I-int LTR Gypsy 4.24
Gypsy_I-int LTR Gypsy 36.24
Gypsy_LTR LTR Gypsy 5.24
HELENA_RT LINE Jockey 4.24
HETA LINE Jockey 28.24
HMSBEAGLE_I-int LTR Gypsy 6.24
IDEFIX_I-int LTR Gypsy 8.24
IDEFIX_LTR LTR Gypsy 4.24
Invader1_I-int LTR Gypsy 4.24
Invader1_LTR LTR Gypsy 4.24
Invader2_I-int LTR Gypsy 4.24
Invader4_I-int LTR Gypsy 4.24
Invader4_LTR LTR Gypsy 4.24
Invader5_I-int LTR Gypsy 4.24
Invader5_LTR LTR Gypsy 4.24
Invader6_I-int LTR Gypsy 4.24
Invader6_LTR LTR Gypsy 4.24
MAX_I-int LTR Pao 53.24
MAX_LTR LTR Pao 6.24
MDG1_I-int LTR Gypsy 4.24
MDG1_LTR LTR Gypsy 4.24
MDG3_I-int LTR Gypsy 156.24
MDG3_LTR LTR Gypsy 4.24
MICROPIA_I-int LTR Gypsy 4.24
MICROPIA_LTR LTR Gypsy 4.24
Mariner2_DM DNA TcMar-Tc1 4.24
NINJA_I-int LTR Pao 4.24
NOMAD_I-int LTR Gypsy 4.24
PROTOP_A DNA P 36.24
PROTOP_B DNA P 4.24
QUASIMODO2-I_DM LTR Gypsy 46.24
QUASIMODO2-LTR_DM LTR Gypsy 4.24
QUASIMODO_I-int LTR Gypsy 14.24
QUASIMODO_LTR LTR Gypsy 6.24
R1_DM LINE R1 4.24
ROOA_I-int LTR Pao 4.24
ROOA_LTR LTR Pao 4.24
ROVER-I_DM LTR Gypsy 385.24
ROVER-LTR_DM LTR Gypsy 6.24
S2_DM DNA TcMar-Tc1 4.24
STALKER4_I-int LTR Gypsy 81.24
STALKER4_LTR LTR Gypsy 8.24
S_DM DNA TcMar-Tc1 52.24
Stalker2_I-int LTR Gypsy 84.24
Stalker2_LTR LTR Gypsy 6.24
TART-A LINE Jockey 8.24
TART_B1 LINE Jockey 23.24
TC1-2_DM DNA TcMar-Tc1 4.24
TC1_DM DNA TcMar-Tc1 4.24
TLD2 LTR Gypsy 4.24
TRANSIB1 DNA CMC-Transib 4.24
TRANSIB2 DNA CMC-Transib 34.24
ZAM_I-int LTR Gypsy 4.24
_AACACA_n Simple_repeat Simple_repeat 4.24
_AAT_n Simple_repeat Simple_repeat 4.24
_ACAATAG_n Simple_repeat Simple_repeat 4.24
_ACC_n Simple_repeat Simple_repeat 4.24
_AGAGAAG_n Simple_repeat Simple_repeat 4.24
_AGAGA_n Simple_repeat Simple_repeat 4.24
_ATAAT_n Simple_repeat Simple_repeat 4.24
_ATATATT_n Simple_repeat Simple_repeat 4.24
_ATATTAT_n Simple_repeat Simple_repeat 4.24
_ATTTTT_n Simple_repeat Simple_repeat 4.24
_ATT_n Simple_repeat Simple_repeat 4.24
_AT_n Simple_repeat Simple_repeat 4.24
_A_n Simple_repeat Simple_repeat 4.24
_CATA_n Simple_repeat Simple_repeat 4.24
_CTTTT_n Simple_repeat Simple_repeat 4.24
_GAGAA_n Simple_repeat Simple_repeat 4.24
_GCCTTT_n Simple_repeat Simple_repeat 4.24
_TAATAT_n Simple_repeat Simple_repeat 4.24
_TAATA_n Simple_repeat Simple_repeat 4.24
_TATAAAA_n Simple_repeat Simple_repeat 4.24
_TATAA_n Simple_repeat Simple_repeat 4.24
_TATCATG_n Simple_repeat Simple_repeat 4.24
_TA_n Simple_repeat Simple_repeat 4.24
_TGTTG_n Simple_repeat Simple_repeat 4.24
_TTATATA_n Simple_repeat Simple_repeat 4.24
_TTATAT_n Simple_repeat Simple_repeat 4.24
_TTATA_n Simple_repeat Simple_repeat 4.24
_TTA_n Simple_repeat Simple_repeat 4.24
_TTCTT_n Simple_repeat Simple_repeat 4.24
_TTC_n Simple_repeat Simple_repeat 4.24
_TTTAT_n Simple_repeat Simple_repeat 4.24
_TTTA_n Simple_repeat Simple_repeat 4.24
_TTTC_n Simple_repeat Simple_repeat 4.24
_TTTGA_n Simple_repeat Simple_repeat 4.24
_TTTTAG_n Simple_repeat Simple_repeat 4.24
_TTTTCTT_n Simple_repeat Simple_repeat 4.24
_TTTTC_n Simple_repeat Simple_repeat 4.24
_T_n Simple_repeat Simple_repeat 4.24
12 changes: 6 additions & 6 deletions tools/repenrich2/test-data/chrY_single_class_fraction_counts.tab
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
DNA 70.0
LINE 90.0
LTR 13210.0
Low_complexity 0.0
RC 0.0
Simple_repeat 0.0
DNA 121.89
LINE 184.34
LTR 13554.36
Low_complexity 9.43
RC 4.72
Simple_repeat 179.26
Loading

0 comments on commit a9af128

Please sign in to comment.