Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: separate casting out of Index.__new__ #30586

Merged
merged 2 commits into from
Jan 1, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 100 additions & 35 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,41 +349,8 @@ def __new__(
# they are actually ints, e.g. '0' and 0.0
# should not be coerced
# GH 11836
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "integer":
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
elif inferred in ["floating", "mixed-integer-float"]:
if isna(data).any():
raise ValueError("cannot convert float NaN to integer")

if inferred == "mixed-integer-float":
data = maybe_cast_to_integer_array(data, dtype)

# If we are actually all equal to integers,
# then coerce to integer.
try:
return cls._try_convert_to_int_index(
data, copy, name, dtype
)
except ValueError:
pass

# Return an actual float index.
return Float64Index(data, copy=copy, name=name)

elif inferred == "string":
pass
else:
data = data.astype(dtype)
elif is_float_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "string":
pass
else:
data = data.astype(dtype)
else:
data = np.array(data, dtype=dtype, copy=copy)
data = _maybe_cast_with_dtype(data, dtype, copy)
dtype = data.dtype # TODO: maybe not for object?

# maybe coerce to a sub-class
if is_signed_integer_dtype(data.dtype):
Expand Down Expand Up @@ -5486,3 +5453,101 @@ def maybe_extract_name(name, obj, cls) -> Optional[Hashable]:
raise TypeError(f"{cls.__name__}.name must be a hashable type")

return name


def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
jreback marked this conversation as resolved.
Show resolved Hide resolved
"""
If a dtype is passed, cast to the closest matching dtype that is supported
by Index.

Parameters
----------
data : np.ndarray
dtype : np.dtype
copy : bool

Returns
-------
np.ndarray
"""
# we need to avoid having numpy coerce
# things that look like ints/floats to ints unless
# they are actually ints, e.g. '0' and 0.0
# should not be coerced
# GH 11836
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "integer":
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
elif inferred in ["floating", "mixed-integer-float"]:
if isna(data).any():
raise ValueError("cannot convert float NaN to integer")

if inferred == "mixed-integer-float":
data = maybe_cast_to_integer_array(data, dtype)

# If we are actually all equal to integers,
# then coerce to integer.
try:
data = _try_convert_to_int_array(data, copy, dtype)
except ValueError:
data = np.array(data, dtype=np.float64, copy=copy)

elif inferred == "string":
pass
else:
data = data.astype(dtype)
elif is_float_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "string":
pass
else:
data = data.astype(dtype)
else:
data = np.array(data, dtype=dtype, copy=copy)

return data


def _try_convert_to_int_array(
jreback marked this conversation as resolved.
Show resolved Hide resolved
data: np.ndarray, copy: bool, dtype: np.dtype
) -> np.ndarray:
"""
Attempt to convert an array of data into an integer array.

Parameters
----------
data : The data to convert.
copy : bool
Whether to copy the data or not.
dtype : np.dtype

Returns
-------
int_array : data converted to either an ndarray[int64] or ndarray[uint64]

Raises
------
ValueError if the conversion was not successful.
"""

if not is_unsigned_integer_dtype(dtype):
# skip int64 conversion attempt if uint-like dtype is passed, as
# this could return Int64Index when UInt64Index is what's desired
try:
res = data.astype("i8", copy=False)
if (res == data).all():
return res # TODO: might still need to copy
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be in pandas/core/dtypes/cast.py and i think duplicates some code there, but ok for now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, there's definitely some duplication to get rid of, but first there are some kinks to iron out, in particular #21311, #17246.

except (OverflowError, TypeError, ValueError):
pass

# Conversion to int64 failed (possibly due to overflow) or was skipped,
# so let's try now with uint64.
try:
res = data.astype("u8", copy=False)
if (res == data).all():
return res # TODO: might still need to copy
except (OverflowError, TypeError, ValueError):
pass

raise ValueError