-
Notifications
You must be signed in to change notification settings - Fork 2
/
parse_sequence.h
89 lines (73 loc) · 1.35 KB
/
parse_sequence.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#ifndef __PARSE_SEQUENCE
#define __PARSE_SEQUENCE
#include <string>
#include <zlib.h> // For compressed text-based files
// The allowed sequence file types
typedef enum {
FASTA,
FASTQ,
UNKNOWN_SEQUENCE
} FileType;
// A class for iterating through a sequence file, one sequence at a time
class SequenceIterator
{
private:
gzFile fin;
FileType file_type;
std::string seq;
std::string curr_defline;
std::string next_defline;
void next();
void next_fasta();
void next_fastq();
public:
SequenceIterator()
{
file_type = FASTA;
fin = NULL;
};
SequenceIterator(const std::string &m_filename)
{
fin = NULL;
load(m_filename);
};
~SequenceIterator()
{
clear();
};
void load(const std::string &m_filename);
inline void clear()
{
if(fin != NULL){
gzclose(fin);
fin = NULL;
}
};
operator bool() const
{
switch(file_type){
case FASTA:
case FASTQ:
return (fin != NULL);
default:
throw __FILE__ ":SequenceIterator:: bool(): Unknown sequence file type";
break;
};
// We should never get here
return false;
};
// Read the next sequence
void operator++()
{
next();
};
const std::string& get_seq() const
{
return seq;
};
const std::string& get_info() const
{
return curr_defline;
};
};
#endif // __PARSE_SEQUENCE