Skip to content

Commit

Permalink
Merge pull request #3901 from sysown/v2.x-3889
Browse files Browse the repository at this point in the history
Fix several invalid memory accesses reported by AFL + ASAN - Closes #3889
  • Loading branch information
renecannao authored Jun 19, 2022
2 parents 3afcc12 + 0ef9086 commit a05e466
Show file tree
Hide file tree
Showing 35 changed files with 514 additions and 22 deletions.
123 changes: 109 additions & 14 deletions lib/c_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ static inline char is_hex_char(char c)

// between pointer, check string is number - need to be changed more functions
// TODO: f-1 shouldn't be access if 'f' is the first position supplied, could lead to
// buffer overflow.
// buffer overflow. NOTE: This is now addressed by 'is_digit_string_2'.
static char is_digit_string(char *f, char *t)
{
if(f == t)
Expand Down Expand Up @@ -1043,7 +1043,10 @@ enum p_st get_next_st(const struct options* opts, struct shared_st* shared_st) {
enum p_st st = st_no_mark_found;

// cmnt type 1 - start with '/*'
if(*shared_st->q == '/' && *(shared_st->q+1) == '*') {
if(
// v1_crashing_payload_05
shared_st->q_cur_pos < (shared_st->d_max_len-1) && *shared_st->q == '/' && *(shared_st->q+1) == '*'
) {
st = st_cmnt_type_1;
}
// cmnt type 2 - start with '#'
Expand Down Expand Up @@ -1104,6 +1107,61 @@ void copy_next_char(shared_st* shared_st, options* opts) {

char cur_cmd_cmnt[FIRST_COMMENT_MAX_LENGTH];

/**
* @brief Safer version of 'is_digit_string' performing boundary checks.
*
* @param shared_st The shared state used for the boundary checks.
* @param f Initial position of the string being checked.
* @param t Final position of the string being checked.
*
* @return '1' if the supplied string is recognized as a 'digit_string', '0' otherwise.
*/
static char is_digit_string_2(shared_st* shared_st, char *f, char *t)
{
if(f == t)
{
if(is_digit_char(*f))
return 1;
else
return 0;
}

int is_hex = 0;
int i = 0;

// 0x, 0X, n.m, nE+m, nE-m, Em
while(f != t)
{
char is_float = 0;

if (f > shared_st->res_init_pos) {
is_float = *f == '.' || tolower(*f) == 'e' || (tolower(*(f-1)) == 'e' && (*f == '+' || *f == '-'));
} else {
is_float = *f == '.' || tolower(*f) == 'e';
}

if(f > shared_st->res_init_pos && i == 1 && *(f-1) == '0' && (*f == 'x' || *f == 'X'))
{
is_hex = 1;
}
// none hex
else if(!is_hex && !is_digit_char(*f) && is_float == 0)
{
return 0;
}
// hex
else if(is_hex && !is_hex_char(*f))
{
return 0;
}

f++;
i++;
}

return 1;
}

/**
* @brief Process a detected comment of type "/\* *\/". Determines when to exit the 'st_cmnt_type_1' state.
* @details Function assumes that 'shared_st->q' is pointing to the initial mark '/' of the comment start, and
Expand All @@ -1124,7 +1182,8 @@ enum p_st process_cmnt_type_1(options* opts, shared_st* shared_st, cmnt_type_1_s
const char* res_final_pos = shared_st->res_init_pos + shared_st->d_max_len;

// initial mark "/*|/*!" detection
if (shared_st->res_cur_pos < (res_final_pos - 2) && *shared_st->q == '/' && *(shared_st->q+1) == '*') {
// comments are not copied by while processed, boundary checks should rely on 'q_cur_pos' and 'q_len'.
if (shared_st->q_cur_pos <= (shared_st->q_len-2) && *shared_st->q == '/' && *(shared_st->q+1) == '*') {
c_t_1_st->cur_cmd_cmnt_len = 0;

// check length before accessing beyond 'q_cur_pos + 1'
Expand All @@ -1143,6 +1202,11 @@ enum p_st process_cmnt_type_1(options* opts, shared_st* shared_st, cmnt_type_1_s
// discard processed "/*" or "/*!"
shared_st->q += 2 + c_t_1_st->is_cmd;
shared_st->q_cur_pos += 2 + c_t_1_st->is_cmd;

// v1_crashing_payload_04
if (shared_st->q_cur_pos >= shared_st->q_len - 1) {
return st_no_mark_found;
}
}

// TODO: Check if there is exclusion between this regular first comments and first comment that are 'cmd'
Expand Down Expand Up @@ -1272,6 +1336,8 @@ enum p_st process_cmnt_type_1(options* opts, shared_st* shared_st, cmnt_type_1_s
shared_st->prev_char = ' ';
// back to main shared_st->query parsing state
next_st = st_no_mark_found;
// reset the comment processing state (v1_crashing_payload_04)
c_t_1_st->is_cmd = 0;
// skip ending mark for comment for next iteration
shared_st->q_cur_pos += 1;
shared_st->q++;
Expand All @@ -1295,10 +1361,15 @@ static __attribute__((always_inline)) inline
enum p_st process_cmnt_type_2(shared_st* shared_st) {
enum p_st next_state = st_cmnt_type_2;

// discard processed "#"
if (*shared_st->q == '#') {
// discard processed "#" (v1_crashing_payload_02)
if (*shared_st->q == '#' && shared_st->q_cur_pos <= (shared_st->q_len - 2)) {
shared_st->q += 1;
shared_st->q_cur_pos += 1;

if (shared_st->q_cur_pos == (shared_st->q_len - 2)) {
next_state = st_no_mark_found;
return next_state;
}
}

if (*shared_st->q == '\n' || *shared_st->q == '\r' || (shared_st->q_cur_pos == shared_st->q_len - 1)) {
Expand Down Expand Up @@ -1328,9 +1399,17 @@ enum p_st process_cmnt_type_3(shared_st* shared_st) {
enum p_st next_state = st_cmnt_type_3;

// discard processed "-- "
if (*shared_st->q == '-' && *(shared_st->q+1)=='-' && is_space_char(*(shared_st->q+2))) {
if (
shared_st->q_cur_pos <= (shared_st->q_len - 4) &&
*shared_st->q == '-' && *(shared_st->q+1)=='-' && is_space_char(*(shared_st->q+2))
) {
shared_st->q += 3;
shared_st->q_cur_pos += 3;

if (shared_st->q_cur_pos == (shared_st->q_len - 4)) {
next_state = st_no_mark_found;
return next_state;
}
}

if (*shared_st->q == '\n' || *shared_st->q == '\r' || (shared_st->q_cur_pos == shared_st->q_len - 1)) {
Expand Down Expand Up @@ -1457,7 +1536,7 @@ enum p_st process_literal_digit(shared_st* shared_st, literal_digit_st* digit_st
char is_float_char = *shared_st->q == '.' ||
( tolower(shared_st->prev_char) == 'e' && ( *shared_st->q == '-' || *shared_st->q == '+' ) );
if ((is_token_char(*shared_st->q) && is_float_char == 0) || shared_st->q_len == shared_st->q_cur_pos + 1) {
if (is_digit_string(digit_st->start_pos, shared_st->res_cur_pos)) {
if (is_digit_string_2(shared_st, digit_st->start_pos, shared_st->res_cur_pos)) {
shared_st->res_cur_pos = digit_st->start_pos;

// place the replacement mark
Expand Down Expand Up @@ -1882,13 +1961,22 @@ void stage_2_parsing(shared_st* shared_st, stage_1_st* stage_1_st, stage_2_st* s
// second stage: Space and (+|-) replacement
while (shared_st->res_cur_pos <= digest_end) {
if (*shared_st->res_cur_pos == ' ') {
char lc = *(shared_st->res_cur_pos-1);
char lc = '0';

if (shared_st->res_cur_pos > shared_st->res_init_pos) {
lc = *(shared_st->res_cur_pos-1);
}

char rc = *(shared_st->res_cur_pos+1);

if (lc == '(' || rc == ')') {
shared_st->res_cur_pos++;
} else if ((is_arithmetic_op(lc) && rc == '?') || lc == ',' || rc == ',') {
char llc = *(shared_st->res_cur_pos-2);
char llc = '0';

if (shared_st->res_cur_pos > shared_st->res_init_pos + 1) {
llc = *(shared_st->res_cur_pos-2);
}

if (opts->keep_comment && (llc == '*' && lc == '/')) {
*shared_st->res_pre_pos++ = *shared_st->res_cur_pos++;
Expand All @@ -1901,8 +1989,14 @@ void stage_2_parsing(shared_st* shared_st, stage_1_st* stage_1_st, stage_2_st* s
*shared_st->res_pre_pos++ = *shared_st->res_cur_pos++;
}
} else if (*shared_st->res_cur_pos == '+' || *shared_st->res_cur_pos == '-') {
char llc = *(shared_st->res_cur_pos-2);
char lc = *(shared_st->res_cur_pos-1);
char llc = '0';
if (shared_st->res_cur_pos > shared_st->res_init_pos + 1) {
llc = *(shared_st->res_cur_pos-2);
}
char lc = '0';
if (shared_st->res_cur_pos > shared_st->res_init_pos) {
lc = *(shared_st->res_cur_pos-1);
}
char rc = *(shared_st->res_cur_pos+1);

// patterns to cover:
Expand All @@ -1929,7 +2023,7 @@ void stage_2_parsing(shared_st* shared_st, stage_1_st* stage_1_st, stage_2_st* s
}
}
} else if (opts->replace_number == 1 && is_digit_char(*shared_st->res_cur_pos) ) {
if (*(shared_st->res_pre_pos-1) != '?') {
if (shared_st->res_pre_pos > shared_st->res_init_pos && *(shared_st->res_pre_pos-1) != '?') {
*shared_st->res_pre_pos++ = '?';
}
shared_st->res_cur_pos++;
Expand Down Expand Up @@ -2387,9 +2481,10 @@ void final_stage(shared_st* shared_st, stage_1_st* stage_1_st, const options* op
// D: `select ? `
// ^ never collapsed
// ```
if (shared_st->res_cur_pos > shared_st->res_init_pos) {
{
// v1_crashing_payload_06
char* wspace = shared_st->res_cur_pos - 1;
while (*wspace == ' ') {
while (wspace > shared_st->res_init_pos && *wspace == ' ') {
wspace--;
}
wspace++;
Expand Down
28 changes: 28 additions & 0 deletions test/afl_digest_test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,33 @@ cd /src/test/afl_digest_test/
make
```

For better testing for invalid memory accesses, compiling with ASAN is recommended:

```
docker run -tid -v $(pwd):/src aflplusplus/aflplusplus
docker exec -it $(CONTAINER_ID) /bin/bash
cd /src/test/afl_digest_test/
export AFL_USE_ASAN=1
make
```

For checking that the compilation with ASAN was successful, you can check the binary symbols:

```
nm -a afl_test | grep '__asan\|__tsan\|__msan'
```

ASAN symbols should be visible:

```
...
U __asan_after_dynamic_init
U __asan_before_dynamic_init
U __asan_handle_no_return
U __asan_init
...
```

Then for launching an individual instance of `afl-fuzz` it's enough to run:

```
Expand All @@ -37,6 +64,7 @@ OPTIONS:
-l, --lowercase ARG Query digest 'LowerCase'
-n, --replace-null ARG Query digest 'ReplaceNULL'
-s, --digest-size ARG Query digest 'MaxLength'
-c, --keep-comment ARG Query digest 'KeepComment'. Value '0' or '1', default '0'.
```

Expand Down
17 changes: 16 additions & 1 deletion test/afl_digest_test/afl_mysql_query_digest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ __thread bool mysql_thread___query_digests_replace_null = true;
__thread bool mysql_thread___query_digests_no_digits = false;
__thread int mysql_thread___query_digests_grouping_limit = 3;
__thread int mysql_thread___query_digests_groups_grouping_limit = 1;
__thread int mysql_thread___query_digests_keep_comment = 0;

using std::string;
using option_err = std::pair<int, string>;
Expand Down Expand Up @@ -94,6 +95,10 @@ option_err parse_parameter_options(int argc, const char** argv) {
(const char *)"", 1, 1, 0, (const char *)"Query digest 'GroupsGroupingLimit'",
(const char *)"-G", (const char *)"--groups-grouping-limit"
);
opts.add(
(const char *)"", 1, 1, 0, (const char *)"Query digest 'KeepComment'",
(const char *)"-c", (const char *)"--keep-comment"
);

// parse the arguments
opts.parse(argc, argv);
Expand Down Expand Up @@ -145,6 +150,12 @@ option_err parse_parameter_options(int argc, const char** argv) {
} else {
mysql_thread___query_digests_groups_grouping_limit = option_value;
}
err_res = check_and_set_option(opts, "-c", &option_value);
if (err_res.first != EXIT_SUCCESS) {
return err_res;
} else {
mysql_thread___query_digests_keep_comment = option_value;
}

return err_res;
}
Expand Down Expand Up @@ -173,7 +184,11 @@ int main(int argc, const char** argv) {

while (__AFL_LOOP(10000)) {
int len = __AFL_FUZZ_TESTCASE_LEN;
process_digest_test(buf, len);

unsigned char* alloc_buff = static_cast<unsigned char*>(malloc(len));
memcpy(alloc_buff, buf, len);

process_digest_test(alloc_buff, len);
}

return 0;
Expand Down
22 changes: 16 additions & 6 deletions test/afl_digest_test/launch_tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
screen -d -m afl-fuzz -M main-$HOSTNAME -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 50 -g 0 -G 0
if [ "$1" != "keep_comment" ]; then
screen -d -m afl-fuzz -M main-$HOSTNAME -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 50 -g 0 -G 0

screen -d -m afl-fuzz -S variant-1 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 50 -g 1 -G 1
screen -d -m afl-fuzz -S variant-2 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 100 -g 2 -G 2
screen -d -m afl-fuzz -S variant-3 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 128 -g 3 -G 3
screen -d -m afl-fuzz -S variant-4 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 300 -g 4 -G 4
screen -d -m afl-fuzz -S variant-5 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 1000 -g 5 -G 5
screen -d -m afl-fuzz -S variant-1 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 50 -g 1 -G 1
screen -d -m afl-fuzz -S variant-2 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 100 -g 2 -G 2
screen -d -m afl-fuzz -S variant-3 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 128 -g 3 -G 3
screen -d -m afl-fuzz -S variant-4 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 300 -g 4 -G 4
screen -d -m afl-fuzz -S variant-5 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 1000 -g 5 -G 5
else
screen -d -m afl-fuzz -M main-$HOSTNAME -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 50 -g 0 -G 0 -c 1

screen -d -m afl-fuzz -S variant-1 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 50 -g 1 -G 1 -c 1
screen -d -m afl-fuzz -S variant-2 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 100 -g 2 -G 2 -c 1
screen -d -m afl-fuzz -S variant-3 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 128 -g 3 -G 3 -c 1
screen -d -m afl-fuzz -S variant-4 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 300 -g 4 -G 4 -c 1
screen -d -m afl-fuzz -S variant-5 -i inputs/ -o output/ -- ./afl_test -d 1 -l 1 -n 1 -s 1000 -g 5 -G 5 -c 1
fi
2 changes: 1 addition & 1 deletion test/tap/tests/test_mysql_query_digests_stages-t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ int main(int argc, char** argv) {
exec_grouping_tests = false;
}
if (tests_filter_str.find("regular") == std::string::npos) {
exec_grouping_tests = false;
exec_regular_tests = false;
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT `dddddddddddddddddddddddddddddddddd`.* FROM `dddddddddddddddddddddddddddddddd`.`dddddddddddddddddddddddddddddddddd` WHERE `dddddddddddddddddddddddddddddddddd`.`dddddd` = 000000 AND `dddddddddddddddddddddddddddddddddd`.`ssssss` = 0 AND `dddddddddddddddddddddddddddddddddd`.`ddddddd` = 'dddddddddddeeeeeeeeeeeee' AND `eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee`.`iiiiiii` IN (000000, 000000, 000000, 000000, 000000, 000000, 000000, 000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000, 0000000) /*aaaaaaaaaaa:ppppppppppp,cccccccccc:eee/mmmmmmmmm/mmmmmm/uuuuu,aaaaaa:uuuuuu,llll:/aaa/lll/mmmmmmmmmmmmmmm/ppppppp/ooooooo/mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm ii aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'*/
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions test/tap/tests/tokenizer_payloads/crashing_payload_19.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
22
Binary file not shown.
1 change: 1 addition & 0 deletions test/tap/tests/tokenizer_payloads/crashing_payload_21.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-- ( 000�m{6�*0 o#*, We(((�((((0400000*�-v
Expand Down
1 change: 1 addition & 0 deletions test/tap/tests/tokenizer_payloads/crashing_payload_22.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--W--W�=T(4(fm3�4.-0)
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions test/tap/tests/tokenizer_payloads/crashing_payload_8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# WeirdFirstComment\nINSER+ /* fst_comment */ INTO /*! random_comment */ db.table ( col1, col2,col3,col4, col5 ) VALUES ('val',1, 2,3,'foo'), ('val2',3,NULL,4,'foo2'), ('val2', 5,0x0239192,4,'foo2'), ('val2', 7,NULL,4,'foo2'), (1.1E+9, 2.9E-9
0x23914993, 928.2939123), ('val2',3�ULL,4,'ooo2'), ('val2',3*3.293192493419231,NULL,4+2,'foo2'), ('val2', "9212312",NULL,92.1293123,"foo2"), ON DUPLICATE KEY UPDATE col1 = VALUES(col2) -- final_comment \n
1 change: 1 addition & 0 deletions test/tap/tests/tokenizer_payloads/crashing_payload_9.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
22
Loading

0 comments on commit a05e466

Please sign in to comment.