Skip to content

Commit

Permalink
Allow UTF-8 characters outside of ASCII
Browse files Browse the repository at this point in the history
A badlist of disallowed characters will need to be created later.
  • Loading branch information
DemiMarie committed Jan 31, 2023
1 parent ce2df91 commit 372fcab
Showing 1 changed file with 94 additions and 2 deletions.
96 changes: 94 additions & 2 deletions qrexec-lib/unpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,90 @@ void fix_times_and_perms(struct file_header *untrusted_hdr,
do_exit(errno, untrusted_name);
}

/* validate single UTF-8 character
* return bytes count of this character, or 0 if the character is invalid */
static int validate_utf8_char(const unsigned char *untrusted_c) {
int tails_count = 0;
int total_size = 0;
/* it is safe to access byte pointed by the parameter and the next one
* (which can be terminating NULL), but every next byte can access only if
* neither of previous bytes was NULL
*/

/* According to http://www.ietf.org/rfc/rfc3629.txt:
* UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
* UTF8-1 = %x00-7F
* UTF8-2 = %xC2-DF UTF8-tail
* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
* %xF4 %x80-8F 2( UTF8-tail )
* UTF8-tail = %x80-BF
*/

if (*untrusted_c <= 0x7F) {
return 1;
} else if (*untrusted_c >= 0xC2 && *untrusted_c <= 0xDF) {
total_size = 2;
tails_count = 1;
} else switch (*untrusted_c) {
case 0xE0:
untrusted_c++;
total_size = 3;
if (*untrusted_c >= 0xA0 && *untrusted_c <= 0xBF)
tails_count = 1;
else
return 0;
break;
case 0xE1: case 0xE2: case 0xE3: case 0xE4:
case 0xE5: case 0xE6: case 0xE7: case 0xE8:
case 0xE9: case 0xEA: case 0xEB: case 0xEC:
/* 0xED */
case 0xEE:
case 0xEF:
total_size = 3;
tails_count = 2;
break;
case 0xED:
untrusted_c++;
total_size = 3;
if (*untrusted_c >= 0x80 && *untrusted_c <= 0x9F)
tails_count = 1;
else
return 0;
break;
case 0xF0:
untrusted_c++;
total_size = 4;
if (*untrusted_c >= 0x90 && *untrusted_c <= 0xBF)
tails_count = 2;
else
return 0;
break;
case 0xF1:
case 0xF2:
case 0xF3:
total_size = 4;
tails_count = 3;
break;
case 0xF4:
untrusted_c++;
if (*untrusted_c >= 0x80 && *untrusted_c <= 0x8F)
tails_count = 2;
else
return 0;
break;
default:
return 0;
}

while (tails_count-- > 0) {
untrusted_c++;
if (!(*untrusted_c >= 0x80 && *untrusted_c <= 0xBF))
return 0;
}
return total_size;
}

static size_t validate_path(const char *const untrusted_name, size_t allowed_leading_dotdot)
{
Expand Down Expand Up @@ -161,8 +245,16 @@ static size_t validate_path(const char *const untrusted_name, size_t allowed_lea
abort();
}
}
if (untrusted_name[i] < 0x20 || (unsigned char)untrusted_name[i] > 0x7F)
do_exit(EILSEQ, untrusted_name); // path is non-ASCII or has control characters
if (untrusted_name[i] < 0x20 || (unsigned char)untrusted_name[i] == 0x7F)
do_exit(EILSEQ, untrusted_name); // path has control characters
int utf8_ret = validate_utf8_char((const unsigned char *)(untrusted_name + i));
if (utf8_ret > 0) {
/* loop will do one additional increment */
i += utf8_ret - 1;
continue;
} else {
do_exit(EILSEQ, untrusted_name); // not valid UTF-8
}
}
return non_dotdot_components;
}
Expand Down

0 comments on commit 372fcab

Please sign in to comment.