Skip to content

Commit

Permalink
Fix expansion of multibyte IFS characters
Browse files Browse the repository at this point in the history
Closes att#13. Previously, the `varsub` method used for the macro expansion of
`$param`, `${param}`, and `${param op word}` would incorrectly expand the
internal field separator (IFS) if it was a multibyte character. This was due to
truncation based on the incorrect assumption that the IFS would never be larger
than a single byte.

This change fixes this issue by carefully tracking the number of bytes that
should be persisted in the IFS case and ensuring that all bytes are written
during expansion and substitution.
  • Loading branch information
etscrivner committed Jul 28, 2018
1 parent 82d686d commit 4e0d6e3
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/cmd/ksh93/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ all_tests = [
['alias'], ['append'], ['arith'], ['arrays'], ['arrays2'], ['attributes'],
['basic', 90], ['bracket'], ['builtins'], ['case'], ['comvar'],
['comvario'], ['coprocess', 50], ['cubetype'], ['directoryfd'], ['enum'],
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'],
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'], ['ifs'],
['io'], ['leaks'], ['locale'], ['math', 50], ['nameref'], ['namespace'],
['modifiers'], ['options'], ['path'], ['pointtype'], ['quoting'],
['quoting2'], ['readcsv'], ['recttype'], ['restricted'], ['return'], ['select'],
Expand Down
20 changes: 16 additions & 4 deletions src/cmd/ksh93/sh/macro.c
Original file line number Diff line number Diff line change
Expand Up @@ -1663,7 +1663,13 @@ static_fn bool varsub(Mac_t *mp) {
int match[2 * (MATCH_MAX + 1)], index;
int nmatch, nmatch_prev, vsize_last, tsize;
char *vlast = NULL, *oldv;
d = (mode == '@' ? ' ' : mp->ifs);
char *ifs_bytes = NULL;
if (mode == '@') {
d = ' ';
} else {
d = mp->ifs;
ifs_bytes = mp->ifsp;
}
while (1) {
if (!v) v = "";
if (c == '/' || c == '#' || c == '%') {
Expand Down Expand Up @@ -1792,10 +1798,16 @@ static_fn bool varsub(Mac_t *mp) {
mp->atmode = mode == '@';
mp->pattern = oldpat;
} else if (d) {
if (mp->sp) {
sfputc(mp->sp, d);
Sfio_t *sfio_ptr = (mp->sp) ? mp->sp : stkp;

// Handle multi-byte characters being used for the internal
// field separator (IFS).
if (ifs_bytes) {
for (int i = 0; i < mbsize(ifs_bytes); i++) {
sfputc(sfio_ptr, ifs_bytes[i]);
}
} else {
sfputc(stkp, d);
sfputc(sfio_ptr, d);
}
}
}
Expand Down
25 changes: 25 additions & 0 deletions src/cmd/ksh93/tests/ifs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# These are the tests for the internal field separator (IFS).

IFS=e
set : :
[[ "$*" == ":e:" ]] || log_error "IFS failed" ":e:" "$*"

IFS='|' read first second third <<< 'one|two|three'
[[ "${first}" -eq "one" ]] || log_error "IFS failed" "one" "${first}"
[[ "${second}" -eq "two" ]] || log_error "IFS failed" "two" "${second}"
[[ "${third}" -eq "three" ]] || log_error "IFS failed" "three" "${third}"

# Multi-byte character checks will only work if UTF-8 inputs are enabled
if [ "${LANG}" = "C.UTF-8" ]
then
# 2 byte latin accented e character
IFS=é
set : :
[[ "$*" == ":é:" ]] || log_error "IFS failed with multibyte character" ":é:" "$*"

# 4 byte roman sestertius character
IFS=𐆘 read first second third <<< 'one𐆘two𐆘three'
[[ "${first}" -eq "one" ]] || log_error "IFS failed" "one" "${first}"
[[ "${second}" -eq "two" ]] || log_error "IFS failed" "two" "${second}"
[[ "${third}" -eq "three" ]] || log_error "IFS failed" "three" "${third}"
fi

0 comments on commit 4e0d6e3

Please sign in to comment.