-
-
Notifications
You must be signed in to change notification settings - Fork 7.3k
/
Copy pathmd5.cpp
384 lines (355 loc) · 13.8 KB
/
md5.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/**
* @file
* @author [tGautot](https://github.com/tGautot)
* @brief Simple C++ implementation of the [MD5 Hashing
* Algorithm](https://en.wikipedia.org/wiki/MD5)
* @details
* The [MD5 Algorithm](https://en.wikipedia.org/wiki/MD5) is a
* hashing algorithm which was designed in 1991 by [Ronal
* Rivest](https://en.wikipedia.org/wiki/Ron_Rivest).
*
* MD5 is one of the most used hashing algorithm there is. Some of its
* use cases are:
* 1. Providing checksum for downloaded software
* 2. Store salted password
*
* However MD5 has be know to be cryptographically weak for quite some
* time, yet it is still widely used. This weakness was exploited by the
* [Flame Malware](https://en.wikipedia.org/wiki/Flame_(malware)) in 2012
*
* ### Algorithm
* First of all, all values are expected to be in [little endian]
* (https://en.wikipedia.org/wiki/Endianness). This is especially important
* when using part of the bytestring as an integer.
*
* The first step of the algorithm is to pad the message for its length to
* be a multiple of 64 (bytes). This is done by first adding 0x80 (10000000)
* and then only zeroes until the last 8 bytes must be filled, where then the
* 64 bit size of the input will be added
*
* Once this is done, the algo breaks down this padded message
* into 64 bytes chunks. Each chunk is used for one *round*, a round
* breaks the chunk into 16 blocks of 4 bytes. During these rounds
* the algorithm will update its 128 bit state (represented by 4 ints: A,B,C,D)
* For more precisions on these operations please see the [Wikipedia
* aritcle](https://en.wikipedia.org/wiki/MD5#Algorithm).
* The signature given by MD5 is its 128 bit state once all rounds are done.
* @note This is a simple implementation for a byte string but
* some implmenetations can work on bytestream, messages of unknown length.
*/
#include <algorithm> /// Used for std::copy
#include <array> /// Used for std::array
#include <cassert> /// Used for assert
#include <cstdint>
#include <cstring> /// Used for std::memcopy
#include <iostream> /// Used for IO operations
#include <string> /// Used for strings
#include <vector> /// Used for std::vector
/**
* @namespace hashing
* @brief Hashing algorithms
*/
namespace hashing {
/**
* @namespace MD5
* @brief Functions for the [MD5](https://en.wikipedia.org/wiki/MD5) algorithm
* implementation
*/
namespace md5 {
/**
* @brief Rotates the bits of a 32-bit unsigned integer
* @param n Integer to rotate
* @param rotate How many bits for the rotation
* @return uint32_t The rotated integer
*/
uint32_t leftRotate32bits(uint32_t n, std::size_t rotate) {
return (n << rotate) | (n >> (32 - rotate));
}
/**
* @brief Checks whether integers are stored as big endian or not
* @note Taken from [this](https://stackoverflow.com/a/1001373) StackOverflow
* post
* @return true IF integers are detected to work as big-endian
* @return false IF integers are detected to work as little-endian
*/
bool isBigEndian() {
union {
uint32_t i;
std::array<char, 4> c;
} bint = {0x01020304};
return bint.c[0] == 1;
}
/**
* @brief Sets 32-bit integer to little-endian if needed
* @param n Number to set to little-endian (uint32_t)
* @return uint32_t param n with binary representation as little-endian
*/
uint32_t toLittleEndian32(uint32_t n) {
if (!isBigEndian()) {
return ((n << 24) & 0xFF000000) | ((n << 8) & 0x00FF0000) |
((n >> 8) & 0x0000FF00) | ((n >> 24) & 0x000000FF);
}
// Machine works on little endian, no need to change anything
return n;
}
/**
* @brief Sets 64-bit integer to little-endian if needed
* @param n Number to set to little-endian (uint64_t)
* @return uint64_t param n with binary representation as little-endian
*/
uint64_t toLittleEndian64(uint64_t n) {
if (!isBigEndian()) {
return ((n << 56) & 0xFF00000000000000) |
((n << 40) & 0x00FF000000000000) |
((n << 24) & 0x0000FF0000000000) |
((n << 8) & 0x000000FF00000000) |
((n >> 8) & 0x00000000FF000000) |
((n >> 24) & 0x0000000000FF0000) |
((n >> 40) & 0x000000000000FF00) |
((n >> 56) & 0x00000000000000FF);
;
}
// Machine works on little endian, no need to change anything
return n;
}
/**
* @brief Transforms the 128-bit MD5 signature into a 32 char hex string
* @param sig The MD5 signature (Expected 16 bytes)
* @return std::string The hex signature
*/
std::string sig2hex(void* sig) {
const char* hexChars = "0123456789abcdef";
auto* intsig = static_cast<uint8_t*>(sig);
std::string hex = "";
for (uint8_t i = 0; i < 16; i++) {
hex.push_back(hexChars[(intsig[i] >> 4) & 0xF]);
hex.push_back(hexChars[(intsig[i]) & 0xF]);
}
return hex;
}
/**
* @brief The MD5 algorithm itself, taking in a bytestring
* @param input_bs The bytestring to hash
* @param input_size The size (in BYTES) of the input
* @return void* Pointer to the 128-bit signature
*/
void* hash_bs(const void* input_bs, uint64_t input_size) {
auto* input = static_cast<const uint8_t*>(input_bs);
// Step 0: Initial Data (Those are decided in the MD5 protocol)
// s is the shift used in the leftrotate each round
std::array<uint32_t, 64> s = {
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
// K is pseudo-random values used each round
// The values can be obtained by the following python code:
/**
* @brief Values of K are pseudo-random and used to "salt" each round
* The values can be obtained by the following python code
* @code{.py}
* from math import floor, sin
*
* for i in range(64):
* print(floor(2**32 * abs(sin(i+1))))
* @endcode
*/
std::array<uint32_t, 64> K = {
3614090360, 3905402710, 606105819, 3250441966, 4118548399, 1200080426,
2821735955, 4249261313, 1770035416, 2336552879, 4294925233, 2304563134,
1804603682, 4254626195, 2792965006, 1236535329, 4129170786, 3225465664,
643717713, 3921069994, 3593408605, 38016083, 3634488961, 3889429448,
568446438, 3275163606, 4107603335, 1163531501, 2850285829, 4243563512,
1735328473, 2368359562, 4294588738, 2272392833, 1839030562, 4259657740,
2763975236, 1272893353, 4139469664, 3200236656, 681279174, 3936430074,
3572445317, 76029189, 3654602809, 3873151461, 530742520, 3299628645,
4096336452, 1126891415, 2878612391, 4237533241, 1700485571, 2399980690,
4293915773, 2240044497, 1873313359, 4264355552, 2734768916, 1309151649,
4149444226, 3174756917, 718787259, 3951481745};
// The initial 128-bit state
uint32_t a0 = 0x67452301, A = 0;
uint32_t b0 = 0xefcdab89, B = 0;
uint32_t c0 = 0x98badcfe, C = 0;
uint32_t d0 = 0x10325476, D = 0;
// Step 1: Processing the bytestring
// First compute the size the padded message will have
// so it is possible to allocate the right amount of memory
uint64_t padded_message_size = 0;
if (input_size % 64 < 56) {
padded_message_size = input_size + 64 - (input_size % 64);
} else {
padded_message_size = input_size + 128 - (input_size % 64);
}
std::vector<uint8_t> padded_message(padded_message_size);
// Beginning of the padded message is the original message
std::copy(input, input + input_size, padded_message.begin());
// Afterwards comes a single 1 bit and then only zeroes
padded_message[input_size] = 1 << 7; // 10000000
for (uint64_t i = input_size; i % 64 != 56; i++) {
if (i == input_size) {
continue; // pass first iteration
}
padded_message[i] = 0;
}
// We then have to add the 64-bit size of the message at the end
// When there is a conversion from int to bytestring or vice-versa
// We always need to make sure it is little endian
uint64_t input_bitsize_le = toLittleEndian64(input_size * 8);
for (uint8_t i = 0; i < 8; i++) {
padded_message[padded_message_size - 8 + i] =
(input_bitsize_le >> (56 - 8 * i)) & 0xFF;
}
// Already allocate memory for blocks
std::array<uint32_t, 16> blocks{};
// Rounds
for (uint64_t chunk = 0; chunk * 64 < padded_message_size; chunk++) {
// First, build the 16 32-bits blocks from the chunk
for (uint8_t bid = 0; bid < 16; bid++) {
blocks[bid] = 0;
// Having to build a 32-bit word from 4-bit words
// Add each and shift them to the left
for (uint8_t cid = 0; cid < 4; cid++) {
blocks[bid] = (blocks[bid] << 8) +
padded_message[chunk * 64 + bid * 4 + cid];
}
}
A = a0;
B = b0;
C = c0;
D = d0;
// Main "hashing" loop
for (uint8_t i = 0; i < 64; i++) {
uint32_t F = 0, g = 0;
if (i < 16) {
F = (B & C) | ((~B) & D);
g = i;
} else if (i < 32) {
F = (D & B) | ((~D) & C);
g = (5 * i + 1) % 16;
} else if (i < 48) {
F = B ^ C ^ D;
g = (3 * i + 5) % 16;
} else {
F = C ^ (B | (~D));
g = (7 * i) % 16;
}
// Update the accumulators
F += A + K[i] + toLittleEndian32(blocks[g]);
A = D;
D = C;
C = B;
B += leftRotate32bits(F, s[i]);
}
// Update the state with this chunk's hash
a0 += A;
b0 += B;
c0 += C;
d0 += D;
}
// Build signature from state
// Note, any type could be used for the signature
// uint8_t was used to make the 16 bytes obvious
// The sig needs to be little endian
auto* sig = new uint8_t[16];
for (uint8_t i = 0; i < 4; i++) {
sig[i] = (a0 >> (8 * i)) & 0xFF;
sig[i + 4] = (b0 >> (8 * i)) & 0xFF;
sig[i + 8] = (c0 >> (8 * i)) & 0xFF;
sig[i + 12] = (d0 >> (8 * i)) & 0xFF;
}
return sig;
}
/**
* @brief Converts the string to bytestring and calls the main algorithm
* @param message Plain character message to hash
* @return void* Pointer to the MD5 signature
*/
void* hash(const std::string& message) {
return hash_bs(&message[0], message.size());
}
} // namespace md5
} // namespace hashing
/**
* @brief Self-test implementations of well-known MD5 hashes
* @returns void
*/
static void test() {
// Hashes empty string and stores signature
void* sig = hashing::md5::hash("");
std::cout << "Hashing empty string" << std::endl;
// Prints signature hex representation
std::cout << hashing::md5::sig2hex(sig) << std::endl << std::endl;
// Test with cassert whether sig is correct from the expected value
assert(hashing::md5::sig2hex(sig).compare(
"d41d8cd98f00b204e9800998ecf8427e") == 0);
// Hashes "The quick brown fox jumps over the lazy dog" and stores signature
void* sig2 =
hashing::md5::hash("The quick brown fox jumps over the lazy dog");
std::cout << "Hashing The quick brown fox jumps over the lazy dog"
<< std::endl;
// Prints signature hex representation
std::cout << hashing::md5::sig2hex(sig2) << std::endl << std::endl;
// Test with cassert whether sig is correct from the expected value
assert(hashing::md5::sig2hex(sig2).compare(
"9e107d9d372bb6826bd81d3542a419d6") == 0);
// Hashes "The quick brown fox jumps over the lazy dog." (notice the
// additional period) and stores signature
void* sig3 =
hashing::md5::hash("The quick brown fox jumps over the lazy dog.");
std::cout << "Hashing "
"The quick brown fox jumps over the lazy dog."
<< std::endl;
// Prints signature hex representation
std::cout << hashing::md5::sig2hex(sig3) << std::endl << std::endl;
// Test with cassert whether sig is correct from the expected value
assert(hashing::md5::sig2hex(sig3).compare(
"e4d909c290d0fb1ca068ffaddf22cbd0") == 0);
// Hashes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
// and stores signature
void* sig4 = hashing::md5::hash(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
std::cout
<< "Hashing "
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
<< std::endl;
// Prints signature hex representation
std::cout << hashing::md5::sig2hex(sig4) << std::endl << std::endl;
// Test with cassert whether sig is correct from the expected value
assert(hashing::md5::sig2hex(sig4).compare(
"d174ab98d277d9f5a5611c2c9f419d9f") == 0);
}
/**
* @brief Puts user in a loop where inputs can be given and MD5 hash will be
* computed and printed
* @returns void
*/
static void interactive() {
while (true) {
std::string input;
std::cout << "Enter a message to be hashed (Ctrl-C to exit): "
<< std::endl;
std::getline(std::cin, input);
void* sig = hashing::md5::hash(input);
std::cout << "Hash is: " << hashing::md5::sig2hex(sig) << std::endl;
while (true) {
std::cout << "Want to enter another message? (y/n) ";
std::getline(std::cin, input);
if (input.compare("y") == 0) {
break;
} else if (input.compare("n") == 0) {
return;
}
}
}
}
/**
* @brief Main function
* @returns 0 on exit
*/
int main() {
test(); // run self-test implementations
// Launch interactive mode where user can input messages and see
// their hash
interactive();
return 0;
}