-
Notifications
You must be signed in to change notification settings - Fork 0
/
iombench.c
executable file
·633 lines (572 loc) · 20.2 KB
/
iombench.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
/*
* iombench.c
*
* Copyright 2014 Yongkun Wang
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* A microbenchmark for storage device such as hard disk and flash SSD.
*
* Written by Yongkun Wang ([email protected])
*/
#define _GNU_SOURCE
#define _LARGEFILE64_SOURCE
#ifdef __APPLE__
#define lseek64 lseek
#define open64 open
#endif
#define SECTOR_SIZE 512
#define ASCII_PRINTABLE_LOW 32
#define ASCII_PRINTABLE_HIGH 126
#define MAX_FILE_NAME_LENGTH 1024
#define GET_OUTPUT(fd) ((fd) != NULL ? (fd) : stdout)
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <getopt.h>
#include <pthread.h>
#include <libgen.h>
#include <limits.h>
void usage(void)
{
fprintf(stderr,
"\n"
"NAME\n"
" iombench - microbenchmark for storage devices/systems\n"
"\n"
"SYNOPSIS\n"
" iombench [ -d time ] [ -f filename ] [ -n count ] [ -H ]\n"
" [ -p size ] [ -o filename ] [ -P ] [ -r percent ]\n"
" [ -R time ] [ -s addr ] [ -S addr ] [ -t count ]\n"
" [ -w percent ]\n"
" iombench -h\n"
"\n"
"DESCRIPTION\n"
" iombench is a microbenchmark for storage systems or devices\n"
" such as hard disk and flash SSD. It provides the following\n"
" features:\n"
" - Various IO sequences including sequential/random\n"
" reads/writes, and mixed IOs with any R/W ratio.\n"
" - Multi-threading to simulate multiple outstanding IOs.\n"
" - Using O_SYNC and O_DIRECT to try to bypass OS or file\n"
" system buffer. Support raw IO on device file.\n"
" It is recommended to tune your devices/systems for prefetching\n"
" or write-back cache with some tools like hdparm.\n"
"\n"
"OPTIONS\n"
" -d <time> Duration of test of each thread in seconds. \n"
" The longer the better, especially for SSDs.\n"
"\n"
" -f <filename> Filename for test. Can be device file like /dev/sda."
"\n"
" This is a recommended way to test new drives.\n"
" Make sure you have correct permissions.\n"
" WARNING! All data include partition table will be\n"
" overwritten! Cannot be recovered!\n"
"\n"
" -n <count> Send <number> of requests per thread. Program will\n"
" terminate by -n or -d which happens first.\n"
"\n"
" -H Human friendly output.\n"
"\n"
" -o <filename> Output file to append. Default output to console.\n"
"\n"
" -p <size> Page size in sector (512B) for IO. This number\n"
" is multiplied by 512. For example, -p 8 uses 4096\n"
" as page size.\n"
"\n"
" -P Output the execution details of each request.\n"
"\n"
" -r Use random addresses. Random IO.\n"
"\n"
" -R <time> Rampup interval in seconds between threads.\n"
"\n"
" -s <addr> Initial file offset. Automatically aligned by 512.\n"
" Default to 0.\n"
"\n"
" -S <addr> End offset. Automatically aligned by 512. Note\n"
" that lseek64 is not defined on OSX, so the largest\n"
" offset on Mac may be 2G even you specify larger\n"
" value.\n"
" If this value is larger than file length, reads\n"
" or writes may happen on the hole of the file. Run\n"
" 'man lseek' to see the details. A safe way is to \n"
" generate the file in advance using this tool or\n"
" dd, then set the end offset smaller than the file\n"
" length.\n"
" For sequential read/write, file offset will be\n"
" reset to initial address (-s) when file offset\n"
" reaches this value.\n"
" Within the valid address space, set this value the\n"
" larger the better, which ensures large disk head\n"
" movement in random access for hard disk, and might\n"
" trigger more data movements and block erasures\n"
" inside SSDs (it highly depends on implementation\n"
" of FTL).\n"
" Set this value to the capacity of disk when testing\n"
" with device file.\n"
"\n"
" -t <count> Number of threads.\n"
"\n"
" -w <percent> Percent of write requests. 0-100. Default 50.\n"
"\n");
}
/* for options */
int duration = 10;
char filename[MAX_FILE_NAME_LENGTH] = "testfile.tmp";
int request_count = 100;
char human_readable[2];
char output_filename[MAX_FILE_NAME_LENGTH];
FILE *output_file;
int page_size = 4096;
int print_detail = 0;
int random_addr = 0;
int rampup_interval = 0; /* in us (microsecond) */
off_t start_addr = 0;
off_t seek_span = 16*1024*1024;
int thread_count = 1;
int write_percent = 50;
void close_file(void)
{
if (output_file != NULL)
fclose(output_file);
}
void print_option_values(void)
{
fprintf(GET_OUTPUT(output_file), "configuration: %s"
"request_count %d , filename %s , %s"
"page_size %d , write_percent %d , random_addr %d , duration %d , %s"
"start_addr %lld , seek_span %lld , thread_count %d , %s"
"rampup_interval %d , print_detail %d , output_filename %s\n",
human_readable,
request_count, filename, human_readable,
page_size, write_percent, random_addr, duration, human_readable,
(long long int)start_addr, (long long int)seek_span, thread_count,
human_readable,
rampup_interval, print_detail, output_filename);
}
/* for stats */
time_t sum_time = 0;
long total_count = 0;
time_t w_sum_time = 0;
long w_total_count = 0;
time_t r_sum_time = 0;
long r_total_count = 0;
/* pthread init */
pthread_mutex_t mutex_mix_sum = PTHREAD_MUTEX_INITIALIZER;
inline off_t align_address(off_t addr)
{
if (addr < SECTOR_SIZE)
addr = 0;
else
addr = addr - addr % SECTOR_SIZE;
return addr;
}
/*
* check whether the directories exist, make directories when directories do not
* exist and "create" is 1. Return 0 on successful completion, otherwise return
* -1.
*/
int check_path(const char *filename, int create)
{
struct stat s;
int err_stat = stat(filename, &s);
if (err_stat != -1 && S_ISBLK(s.st_mode))
return 0;
if ( err_stat == -1 && errno != ENOENT) {
perror("check_path:blkfile_check:stat");
return errno;
}
char *file = strdup(filename);
char *dir = dirname(file);
if (dir == NULL) {
fprintf(stderr, "failed to get directory for %s.", filename);
return -1;
}
if (strcmp(dir, ".") == 0 || strcmp(dir, "/") == 0 ||
strcmp(dir, "./") == 0) {
return 0;
}
err_stat = stat(dir, &s);
if (err_stat != -1 && S_ISDIR(s.st_mode))
return 0;
if ( err_stat == -1 && errno != ENOENT) {
perror("check_path:dir_check:stat");
return errno;
}
if (create > 0) {
#if defined(__linux__) || defined(__APPLE__)
char command[MAX_FILE_NAME_LENGTH];
snprintf(command, MAX_FILE_NAME_LENGTH - 1, "mkdir -p -m 755 %s", dir);
if(system(command) != -1)
return 0;
#endif
return -1;
}
return 0;
}
void get_options(int argc, char **argv)
{
int duration_enabled = 0;
int req_count_enabled = 0;
int opt;
while ((opt = getopt(argc, argv, "d:f:hHn:o:Pp:rR:s:S:t:w:")) != -1) {
switch (opt) {
case 'd':
duration = atoi(optarg);
if (duration <= 0) {
printf("incorrect value %s for -d <duration>.\n", optarg);
exit(-1);
}
duration_enabled = 1;
break;
case 'f':
strncpy(filename, optarg, MAX_FILE_NAME_LENGTH - 1);
if (check_path(filename, 1) != 0) {
printf("Failed to validate/create path for %s\n", filename);
exit(-2);
}
break;
case 'n':
request_count = atoi(optarg);
if (request_count <= 0) {
printf("incorrect value %s for -n <count>.\n", optarg);
exit(-3);
}
req_count_enabled = 1;
break;
case 'h':
usage();
exit(0);
break;
case 'H':
strcpy(human_readable, "\n");
break;
case 'o':
strncpy(output_filename, optarg, MAX_FILE_NAME_LENGTH - 1);
if (check_path(output_filename, 1) != 0) {
printf("Failed to validate/create path for %s\n",
output_filename);
exit(-4);
}
output_file = fopen(output_filename, "a+");
if (output_file == NULL) {
perror("Error open output file.\n");
exit(errno);
}
break;
case 'p':
page_size= atoi(optarg);
if (page_size < 1 || page_size > 2048) {
printf("incorrect value %s for -p <size>.\n", optarg);
exit(-5);
}
page_size *= SECTOR_SIZE;
break;
case 'P':
print_detail = 1;
break;
case 'r':
random_addr = 1;
break;
case 'R':
rampup_interval = atoi(optarg);
if (rampup_interval < 1) {
printf("incorrect value %s for -R <time>.\n", optarg);
exit(-6);
}
break;
case 's':
start_addr = atoll(optarg);
if (start_addr < 0) {
printf("incorrect value %s for -s <addr>.\n", optarg);
exit(-7);
}
align_address(start_addr);
break;
case 'S':
seek_span = atoll(optarg);
if (seek_span <= 0) {
printf("incorrect value %s for -S <addr>.\n", optarg);
exit(-8);
}
align_address(seek_span);
break;
case 't':
thread_count = atoi(optarg);
if (thread_count < 1) {
printf("incorrect value %s for -t <count>.\n", optarg);
exit(-9);
}
break;
case 'w':
write_percent = atoi(optarg);
if (write_percent > 100 || write_percent < 0) {
printf("incorrect value %s for -w <percent>, "
"should be [0, 100].\n", optarg);
exit(-10);
}
break;
case ':':
printf("%s takes an argument which is missing.\n", optarg);
break;
case '?':
usage();
exit(0);
break;
}
}
if (optind < argc) {
printf("There are invalid options.\n");
usage();
exit(-11);
}
/* disable another stop timer if only one timer specified */
if (duration_enabled && !req_count_enabled)
request_count = INT_MAX;
if (!duration_enabled && req_count_enabled)
duration = INT_MAX;
}
inline off_t reposition_offset(int fd, long req_count)
{
off_t offset = 0;
off_t cursor = 0;
if (random_addr > 0) {
/* get random address */
struct timeval tv_rand;
if (gettimeofday(&tv_rand, NULL) < 0) {
perror("reposition_offset:gettimeofday()");
exit(errno);
}
srand((unsigned int)(tv_rand.tv_usec * tv_rand.tv_sec * seek_span));
cursor = (double)rand() / RAND_MAX * seek_span;
cursor = align_address(cursor);
/* reposition the offset of open file.
* For hard disk, lseek dosen't move the disk arm to specified address,
* the overhead of lseek is usually very small, should be less than 2us.
* the overhead of disk arm moving is included in read/write function.
*/
offset = lseek64(fd, cursor, SEEK_SET);
if (offset == -1) {
fprintf(stderr, "The end offset %lld specified by -S may be larger "
"than the file length.\n", (long long int)seek_span);
perror("reposition_offset:lseek:");
exit(errno);
}
} else {
/* rewind to file start when advancing beyond the file size. */
if ((off_t)((req_count + 1) * page_size) > seek_span) {
offset = lseek64(fd, start_addr, SEEK_SET);
if (offset == -1) {
perror("lseek(): init start_addr error.\n");
exit(errno);
}
} else {
/* do nothing, file offset automatically advances sequentially. */
offset = req_count * page_size;
}
}
return offset;
}
inline int should_write(void)
{
int is_write = 1;
if (write_percent == 0) {
is_write = 0;
} else if (write_percent == 100) {
is_write = 1;
} else {
/* flip the coin to decide read or write */
struct timeval tv_rand;
if (gettimeofday(&tv_rand, NULL) < 0) {
perror("should_write:gettimeofday()");
exit(errno);
}
srand((unsigned int)(tv_rand.tv_usec * tv_rand.tv_sec));
int perc = (double)rand() / RAND_MAX * 100;
if (perc < write_percent) {
is_write = 1;
} else {
is_write = 0;
}
}
return is_write;
}
void *do_io()
{
int iData = 33;
char *buffer;
ssize_t retval = 0;
struct timeval tv_before;
struct timeval tv_after;
off_t offset;
long local_count = 0;
long local_sum_time = 0;
long time_elapsed = 0;
long r_count = 0;
long w_count = 0;
long r_time = 0;
long w_time = 0;
int flags = O_CREAT | O_RDWR | O_SYNC;
#ifdef __linux__
flags |= O_DIRECT;
flags |= O_LARGEFILE;
#endif
int fd_test = open(filename, flags, 0666);
if (fd_test < 0) {
fprintf(stderr, "error to open file %s, please check permission.\n",
filename);
perror("do_io:open()");
exit(errno);
}
if (posix_memalign((void **)&buffer, SECTOR_SIZE, page_size)) {
perror("do_io:posix_memalign()");
exit(errno);
}
struct timeval tv_start;
if (gettimeofday(&tv_start, NULL) < 0) {
perror("do_io:gettimeofday()");
exit(errno);
}
time_t start_time, stop_time;
start_time = time(NULL);
stop_time = start_time + duration;
long i=0;
while (time(NULL) < stop_time && i < request_count) {
offset = reposition_offset(fd_test, i);
int is_write = should_write();
/* prepare human readable data for write */
if (is_write > 0) {
iData++;
if ( iData > 126 ) iData = 33;
memset(buffer, iData, page_size);
}
time_elapsed = 0;
if (gettimeofday(&tv_before, NULL) < 0){
perror("do_io:gettimeofday()");
exit(errno);
}
if (is_write > 0) {
retval = write(fd_test, buffer, page_size);
} else {
retval = read(fd_test, buffer, page_size);
}
if (retval < 0) {
perror(is_write > 0 ? "write() error.\n" : "read() error.\n");
exit(errno);
}
if (gettimeofday(&tv_after, NULL) < 0){
perror("do_io:gettimeofday()");
exit(errno);
}
time_elapsed=(tv_after.tv_sec - tv_before.tv_sec) * 1000000 +
tv_after.tv_usec - tv_before.tv_usec;
if (print_detail > 0)
fprintf(GET_OUTPUT(output_file), "io,%ld,%s,%d,%lld,%ld\n",
tv_before.tv_sec, is_write == 1 ? "w": "r",
page_size, (long long int)offset, time_elapsed);
local_count++;
local_sum_time += time_elapsed;
if (is_write > 0) {
w_count++;
w_time += time_elapsed;
} else {
r_count++;
r_time += time_elapsed;
}
i++;
}
if (thread_count > 1) {
fprintf(GET_OUTPUT(output_file), "thread: page_size %d , "
"[ read_count %ld , read_time(us) %ld , avg_latency(us) %ld ], "
"[ write_count %ld , write_time(us) %ld , avg_latency(us) %ld ], "
"[ total_count %ld , time(us) %ld , avg_latency(us) %ld ]\n",
page_size,
r_count, r_time, r_count == 0 ? 0 : r_time / r_count,
w_count, w_time, w_count == 0 ? 0 : w_time / w_count,
local_count, local_sum_time,
local_sum_time == 0 ? 0 : local_sum_time / local_count);
}
pthread_mutex_lock(&mutex_mix_sum);
sum_time += local_sum_time;
total_count += local_count;
w_sum_time += w_time;
w_total_count += w_count;
r_sum_time += r_time;
r_total_count += r_count;
pthread_mutex_unlock(&mutex_mix_sum);
close(fd_test);
free(buffer);
return NULL;
}
void start_io_threads(void)
{
pthread_t *g_tid = NULL;
g_tid = (pthread_t *)malloc(sizeof(pthread_t *) * thread_count);
int i=0;
for (i = 0; i < thread_count; i++) {
int ret = pthread_create(&g_tid[i], NULL, do_io, NULL);
if (ret != 0) {
perror("error creating threads.\n");
if (ret == EAGAIN) {
perror("not enough system resources.\n");
}
exit(errno);
}
if (rampup_interval > 0) {
struct timespec ts, rem;
ts.tv_sec = (time_t) (rampup_interval / 1000000);
ts.tv_nsec = (long) (rampup_interval % 1000000) * 1000;
while (nanosleep(&ts, &rem) == -1) {
if (errno == EINTR) {
memcpy(&ts, &rem, sizeof(struct timespec));
} else {
printf("sleep time invalid %ld s %ld ns",
ts.tv_sec, ts.tv_nsec);
break;
}
}
}
}
for (i = 0; i< thread_count; i++) {
if (pthread_join(g_tid[i], NULL) != 0)
perror("thread wait error.\n");
}
if (g_tid != NULL)
free(g_tid);
}
int main(int argc, char **argv)
{
atexit(close_file);
get_options(argc, argv);
print_option_values();
start_io_threads();
fprintf(GET_OUTPUT(output_file), "summary: page_size %d , %s"
"[ read_count %ld , read_time(us) %ld , avg_latency(us) %ld ], %s"
"[ write_count %ld , write_time(us) %ld , avg_latency(us) %ld ], %s"
"[ total_count %ld , time(us) %ld , avg_latency(us) %ld ]\n",
page_size, human_readable,
r_total_count, r_sum_time,
r_total_count == 0 ? 0 : r_sum_time / r_total_count, human_readable,
w_total_count, w_sum_time,
w_total_count == 0 ? 0 : w_sum_time / w_total_count, human_readable,
total_count, sum_time,
sum_time == 0 ? 0 : sum_time / total_count);
return 0;
}