-
Notifications
You must be signed in to change notification settings - Fork 5
/
format_qiime_to_uparse.py
executable file
·46 lines (35 loc) · 1.33 KB
/
format_qiime_to_uparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
import os
import argparse
def main():
parser = argparse.ArgumentParser(description=\
'Take QIIME / QIITA demultiplexed sequences in fastq format and \
reformat their headers for use in UPARSE.')
req = parser.add_argument_group('required arguments')
req.add_argument('-i', '--input_fp', required=True,
type=str, help='A fastq file with headers formatted by QIIME \
(i.e. Sample IDs first in header lines).')
req.add_argument('-o', '--output_fp', required=True,
help='The output file path.')
args = parser.parse_args()
seqs_in = open(args.input_fp, 'U')
seqs_out = open(args.output_fp, 'w')
for head, seq, qual in basic_fastq_parser(seqs_in):
sampleID = head.split('_')[0].split('@')[1]
new_head = head + ";barcodelabel=" + sampleID + ";"
write_fastq(new_head, seq, qual, seqs_out)
def basic_fastq_parser(in_f):
lineno, head, seq, qual = 0, "", "", ""
for l in in_f:
lineno += 1
if lineno % 4 == 1:
head = l.strip()
elif lineno % 4 == 2:
seq = l.strip()
elif lineno % 4 == 0:
qual = l.strip()
yield head, seq, qual
def write_fastq(header, seq, qual, out_f):
out_f.write('%s\n%s\n+\n%s\n' % (header, seq, qual))
if __name__ == "__main__":
main()