-
Notifications
You must be signed in to change notification settings - Fork 9
/
fitreg_util.hpp
128 lines (112 loc) · 3.54 KB
/
fitreg_util.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#ifndef FITREG_UTIL_HPP
#define FITREG_UTIL_HPP
#include <cmath>
#include <cstdlib>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <vector>
#include <stdexcept>
#include <ctime>
#include "text_tools.hpp"
#include <fenv.h>
#include <iomanip>
#include <dirent.h>
#include <map>
#include <cstdint>
using namespace std;
using namespace hui;
// the size of the line buffer
// It must be large enough!!
const size_t lineBufferSize = 1000000;
map<string, string> readFitregDirectory(string inDir){
// read list of fitreg files
DIR *dir;
struct dirent *ent;
map<string, string> chrFiles;
string ending(".fit");
if ((dir = opendir (inDir.c_str())) != NULL) {
/* print all the files and directories within directory */
while ((ent = readdir (dir)) != NULL) {
string fileName(ent->d_name);
if (fileName.length() >= ending.length()) {
if (fileName.compare (fileName.length() - ending.length(), ending.length(), ending) == 0){
vector<string> tokens = TextTools::split(fileName, '.');
string chr = tokens[0];
string dir = inDir + "/" + fileName;
chrFiles[chr] = dir;
}
}
}
closedir (dir);
} else {
/* could not open directory */
//throw runtime_error("Could not open directory!\n");
cerr << "Error: cannot open INSIGHT data directory!\n";
exit(1);
}
return chrFiles;
}
bool readNextFeatureEntry(ifstream &handle, string &chr, uint64_t &start, uint64_t &end, vector<double> &features, size_t featureSize){
string oldChr = chr;
uint64_t oldEnd = end;
string line;
// skip command lines and null lines here
while (line == "" || line[0] == '#'){
istream &state = getline(handle, line);
// check end of file
if (!state){
return false;
}
}
char buffer[lineBufferSize];
if (lineBufferSize < line.length() + 10){
throw runtime_error("Line is longer than buffer size!\n");
}
strcpy(buffer, line.c_str());
char *token;
// chromosome
if ((token = strtok(buffer, "\t")) != NULL){
chr = token;
}
else{
throw runtime_error("Cannot obtain genomic chr number!\n");
}
// start
if ((token = strtok(NULL, "\t")) != NULL){
start = strtoull(token, NULL, 0);
}
else{
throw runtime_error("Cannot obtain genomic chr number!\n");
}
// end
if ((token = strtok(NULL, "\t")) != NULL){
end = strtoull(token, NULL, 0);
}
else{
throw runtime_error("Cannot obtain genomic chr number!\n");
}
features.clear();
while ((token = strtok(NULL, "\t")) != NULL){
features.push_back(atof(token));
}
// check validation of features
if (featureSize != 0 && features.size() != featureSize){
cerr << "Error:" << endl;
cerr << line << endl;
cerr << features.size() << " != " << featureSize << endl;
throw runtime_error("The number of features do not mathch the title line!\n");
}
// check whether the features are sorted and proper
if (chr == oldChr){
if (start < oldEnd){
throw runtime_error("Feature file is not sorted properly or has overlapping intervals. Please check the input file!!\n");
}
if (end < start){
throw runtime_error("Start of a feature is greater than its end. Please check the input file!!\n");
}
}
return true;
}
#endif