-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_files.cc
113 lines (100 loc) · 3.03 KB
/
read_files.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include <map>
#include <queue>
#include <openssl/md5.h>
#include <sstream>
#include <common.h>
#include <read_files.h>
using std::fstream;
using std::string;
using std::vector;
using std::queue;
using std::unordered_multimap;
unordered_multimap<string, string> ReadFiles::GetFileList(string path) {
unordered_multimap<string, string> file_map;
cout << "Directory to create maps from: " << path << endl;
queue<string> dir_list;
dir_list.push(path);
while(!dir_list.empty()) {
string dir_path = dir_list.front();
cout << "Listing files for " << dir_path << endl;
DIR* dp = opendir(dir_path.c_str());
if (dp == NULL) {
cout << "Error(" << errno << ") opening " << dir_path << endl;
exit(1);
}
struct dirent* dirp;
while ((dirp = readdir(dp))) {
if (dirp->d_name[0] == '.') continue;
// Skip the invalid files.
struct stat filestat;
string filepath = dir_path + "/" + dirp->d_name;
if (stat( filepath.c_str(), &filestat)) continue;
if (S_ISDIR(filestat.st_mode)) {
cout << "Parsing Directory " << filepath << endl;
dir_list.push(filepath);
} else {
cout << "Adding path " << filepath << endl;
string key = get_file_key(filepath);
file_map.insert(make_pair(key, filepath));
}
}
closedir(dp);
dir_list.pop();
}
return file_map;
}
string ReadFiles::get_file_key(string filepath) {
fstream fin;
struct stat filestat;
stat(filepath.c_str(), &filestat);
fin.open(filepath.c_str(), std::ios::in | std::ios::binary);
if (!fin) {
cout << "\n***Unable to open " << filepath << "***\n" << endl;
exit(1);
}
int hash_len = FILE_HASH_LEN;
if (filestat.st_size < FILE_HASH_LEN) {
cout << "Resetting hash len" << endl;
hash_len=filestat.st_size;
}
char key[hash_len];
fin.read(key, hash_len);
if (!fin) std::cout << "error: only " << fin.gcount() << " could be read";
string md5;
{
unsigned char result[MD5_DIGEST_LENGTH];
MD5((unsigned char *)key, hash_len, result);
std::stringstream ss;
for(int i=0; i <MD5_DIGEST_LENGTH; i++) {
ss << std::hex << result[i];
}
md5 = ss.str();
}
fin.close();
cout << "### Key ###\n\tfile_size "
<< filestat.st_size << " \n\tkey " << md5 << endl;
return md5;
}
void ReadFiles::dump_map(const std::unordered_multimap<std::string, std::string>& file_map) {
fstream map_file(MAP_FILE_NAME, std::ios::out|std::ios::binary);
long int total_saving = 0;
if (map_file.is_open()) {
string prev_key;
for (auto pair : file_map) {
string cur_key = pair.first;
if (!prev_key.empty() && prev_key == cur_key) {
map_file << "DUPLICATE " << pair.second << endl;
struct stat filestat;
stat(pair.second.c_str(), &filestat);
total_saving += filestat.st_size;
} else {
map_file << pair.second << endl;
prev_key = pair.first;
}
}
} else {
cout << "MAP file creation failed.";
exit(1);
}
cout << "You can save " << total_saving/1024/1024 << "MBs";
}