Skip to content

Commit

Permalink
This seems to solve a lot of UTF-8 issues. Needs performance evaluation.
Browse files Browse the repository at this point in the history
  • Loading branch information
spgarbet committed Feb 22, 2022
1 parent 2f9a02a commit ec838db
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 15 deletions.
38 changes: 29 additions & 9 deletions inst/tests/test_as_yaml.R
Original file line number Diff line number Diff line change
Expand Up @@ -457,18 +457,38 @@ test_no_dots_at_end <- function()
}

# UTF-8 testing
test_latin1_strings <- function() {
latin1_in <- list("\xAB", "\xBF")
test_latin1_strings_no_enc <- function() {
latin1_in <- list("\xAB\xF4", "\xBF\xE9")
checkEquals("- <ab><f4>\n- <bf><e9>\n", as.yaml(latin1_in))
}

test_latin1_names_no_enc <- function() {
latin1_in <- list("a", "b")
names(latin1_in) <- c("\xE9", "\xF4")
as.yaml(latin1_in)
checkEquals("<e9>: a\n<f4>: b\n", as.yaml(latin1_in))
}

test_latin1_strings_enc <- function() {
x <- "\xAB\xF4"
y <- "\xBF\xE9"
Encoding(x) <- "latin1"
Encoding(y) <- "latin1"
latin1_in <- list(x,y)

checkEquals("- «ô\n- ¿é\n", as.yaml(latin1_in))
}

# This cause a full crash
#
# test_latin1_names <- function() {
# latin1_in <- list("a", "b")
# names(latin1_in) <- c("\xAB", "\xBF")
# as.yaml(latin1_in)
# }
test_latin1_names_enc <- function() {
latin1_in <- list("a", "b")
x <- "\xAB\xF4"
y <- "\xBF\xE9"
Encoding(x) <- "latin1"
Encoding(y) <- "latin1"
names(latin1_in) <- c(x,y)

checkEquals("«ô: a\n¿é: b\n", as.yaml(latin1_in))
}



15 changes: 13 additions & 2 deletions inst/tests/test_write_yaml.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,22 @@ test_output_is_written_to_a_file_when_a_filename_is_specified <- function() {
}

# Latin1 UTF-8 test
test_output_from_latin1_works <- function()
test_output_from_latin1_no_enc_works <- function()
{
filename <- tempfile()
write_yaml(list("\xab"), filename)
output <- readLines(filename)
unlink(filename)
checkEquals(c("- \xab"), output)
checkEquals(c("- <ab>"), output)
}

test_output_from_latin1_enc_works <- function()
{
x <- "\xab\xef"
Encoding(x) <- "latin1"
filename <- tempfile()
write_yaml(list(x), filename)
output <- readLines(filename)
unlink(filename)
checkEquals("- «ï", output)
}
15 changes: 11 additions & 4 deletions src/r_emit.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,19 @@ emit_char(emitter, event, s_obj, tag, implicit_tag, scalar_style)
yaml_emitter_t *emitter;
yaml_event_t *event;
SEXP s_obj;
char *tag;
const char *tag;
int implicit_tag;
yaml_scalar_style_t scalar_style;
{
#ifdef DEBUG
Rprintf("emit_char length=%d\n", LENGTH(s_obj));
#endif
const void *vmax=vmaxget();
const char *trans=Rf_translateCharUTF8(s_obj);
yaml_scalar_event_initialize(event, NULL, (yaml_char_t *)tag,
(yaml_char_t *)Rf_translateCharUTF8(s_obj), LENGTH(s_obj),
(yaml_char_t *)trans, strlen(trans),
implicit_tag, implicit_tag, scalar_style);
vmaxset(vmax);

return yaml_emitter_emit(emitter, event);
}
Expand All @@ -351,7 +357,7 @@ emit_string(emitter, event, s_obj, tag, implicit_tag)
yaml_emitter_t *emitter;
yaml_event_t *event;
SEXP s_obj;
char *tag;
const char *tag;
int implicit_tag;
{
SEXP s_new_obj = NULL, s_chr = NULL, quoted = NULL;
Expand Down Expand Up @@ -396,7 +402,7 @@ emit_factor(emitter, event, s_obj, tag, implicit_tag)
yaml_emitter_t *emitter;
yaml_event_t *event;
SEXP s_obj;
char *tag;
const char *tag;
int implicit_tag;
{
SEXP s_levels = NULL, s_level_chr = NULL;
Expand Down Expand Up @@ -466,6 +472,7 @@ emit_object(emitter, event, s_obj, omap, column_major, precision, s_handlers)
#if DEBUG
Rprintf("=== Emitting ===\n");
PrintValue(s_obj);
Rprintf("TYPEOF: %d\n", TYPEOF(s_obj));
#endif

/* Look for custom handler by class */
Expand Down

0 comments on commit ec838db

Please sign in to comment.