Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(r): Don't invoke undefined behaviour in conversions to/from Arrow #167

Merged
merged 2 commits into from
Mar 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions r/src/as_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,12 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr

int64_t* buffer_data = (int64_t*)buffer->data;
for (int64_t i = 0; i < len; i++) {
buffer_data[i] = x_data[i];
// UBSAN warns for buffer_data[i] = nan
if (R_IsNA(x_data[i]) || R_IsNaN(x_data[i])) {
buffer_data[i] = 0;
} else {
buffer_data[i] = x_data[i];
}
}

buffer->size_bytes = len * sizeof(int64_t);
Expand All @@ -254,7 +259,10 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr
// It's easy to accidentally overflow here, so make sure to warn
int64_t n_overflow = 0;
for (int64_t i = 0; i < len; i++) {
if (x_data[i] > INT_MAX || x_data[i] < INT_MIN) {
// UBSAN warns for buffer_data[i] = nan
if (R_IsNA(x_data[i]) || R_IsNaN(x_data[i])) {
buffer_data[i] = 0;
} else if (x_data[i] > INT_MAX || x_data[i] < INT_MIN) {
n_overflow++;
buffer_data[i] = 0;
} else {
Expand Down
6 changes: 3 additions & 3 deletions r/tests/testthat/test-as-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,10 @@ test_that("as_nanoarrow_array() works for double() -> na_int32()", {
as.raw(array$buffers[[1]]),
packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5)))
)
# The last element here is (int)NaN not NA_integer_
# The last element here is 0 because (int)nan is undefined behaviour
expect_identical(
head(as.raw(array$buffers[[2]]), 10 * 4L),
as.raw(as_nanoarrow_buffer(1:10))
as.raw(array$buffers[[2]]),
as.raw(as_nanoarrow_buffer(c(1:10, 0L)))
)

# With overflow
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/test-convert-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -329,13 +329,13 @@ test_that("convert to vector works for null -> logical()", {
})

test_that("convert to vector warns for invalid integer()", {
array <- as_nanoarrow_array(.Machine$double.xmax)
array <- as_nanoarrow_array(.Machine$integer.max + 1)
expect_warning(
expect_identical(convert_array(array, integer()), NA_integer_),
"1 value\\(s\\) outside integer range set to NA"
)

array <- as_nanoarrow_array(c(NA, .Machine$double.xmax))
array <- as_nanoarrow_array(c(NA, .Machine$integer.max + 1))
expect_warning(
expect_identical(convert_array(array, integer()), c(NA_integer_, NA_integer_)),
"1 value\\(s\\) outside integer range set to NA"
Expand Down