Skip to content

Commit

Permalink
Assorted UBSAN cleanups (pandas-dev#55112)
Browse files Browse the repository at this point in the history
* first round of fixes

* fix up includes

* updates

* dedup logic

* move comment
  • Loading branch information
WillAyd authored Sep 13, 2023
1 parent 81fb7e7 commit f00efd0
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 16 deletions.
8 changes: 7 additions & 1 deletion pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Numeric decoder derived from TCL library
#include <float.h>
#include <locale.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -763,7 +764,12 @@ void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {

void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
char *wstr;
JSUINT64 uvalue = (value < 0) ? -value : value;
JSUINT64 uvalue;
if (value == INT64_MIN) {
uvalue = INT64_MAX + UINT64_C(1);
} else {
uvalue = (value < 0) ? -value : value;
}

wstr = enc->offset;
// Conversion. Number is reversed.
Expand Down
49 changes: 34 additions & 15 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cimport cython
from cpython.datetime cimport (
PyDateTime_CheckExact,
PyDateTime_DATE_GET_HOUR,
Expand All @@ -18,6 +17,7 @@ from cpython.object cimport (
Py_LT,
Py_NE,
)
from libc.stdint cimport INT64_MAX

import_datetime()
PandasDateTime_IMPORT
Expand Down Expand Up @@ -545,14 +545,14 @@ cdef ndarray astype_round_check(
return iresult


@cython.overflowcheck(True)
cdef int64_t get_conversion_factor(
NPY_DATETIMEUNIT from_unit,
NPY_DATETIMEUNIT to_unit
) except? -1:
"""
Find the factor by which we need to multiply to convert from from_unit to to_unit.
"""
cdef int64_t value, overflow_limit, factor
if (
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
Expand All @@ -565,28 +565,44 @@ cdef int64_t get_conversion_factor(
return 1

if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
factor = 7
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
factor = 24
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
factor = 60
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
factor = 60
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
factor = 1000
else:
raise ValueError("Converting from M or Y units is not supported.")

overflow_limit = INT64_MAX // factor
if value > overflow_limit or value < -overflow_limit:
raise OverflowError("result would overflow")

return factor * value


cdef int64_t convert_reso(
int64_t value,
Expand All @@ -595,7 +611,7 @@ cdef int64_t convert_reso(
bint round_ok,
) except? -1:
cdef:
int64_t res_value, mult, div, mod
int64_t res_value, mult, div, mod, overflow_limit

if from_reso == to_reso:
return value
Expand Down Expand Up @@ -624,9 +640,12 @@ cdef int64_t convert_reso(
else:
# e.g. ns -> us, risk of overflow, but no risk of lossy rounding
mult = get_conversion_factor(from_reso, to_reso)
with cython.overflowcheck(True):
overflow_limit = INT64_MAX // mult
if value > overflow_limit or value < -overflow_limit:
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
res_value = value * mult
raise OverflowError("result would overflow")

res_value = value * mult

return res_value

Expand Down

0 comments on commit f00efd0

Please sign in to comment.