Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ArrayManager] DataFrame constructor from ndarray #40441

Merged
Merged
34 changes: 30 additions & 4 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
maybe_convert_platform,
maybe_infer_to_datetimelike,
maybe_upcast,
sanitize_to_nanoseconds,
)
from pandas.core.dtypes.common import (
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_integer_dtype,
Expand All @@ -58,6 +60,7 @@
DatetimeArray,
)
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
sanitize_array,
)
Expand Down Expand Up @@ -304,10 +307,33 @@ def ndarray_to_mgr(
index, columns = _get_axes(
values.shape[0], values.shape[1], index=index, columns=columns
)
values = values.T

_check_values_indices_shape_match(values, index, columns)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By moving values = values.T below _check_values_indices_shape_match, that function doesn't need to take the transposed shapes into account (and can be used for both AM and BM).


if typ == "array":

values = sanitize_to_nanoseconds(values)
if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)

if dtype is None and is_object_dtype(values.dtype):
arrays = [
ensure_wrapped_if_datetimelike(
maybe_infer_to_datetimelike(values[:, i].copy())
)
for i in range(values.shape[1])
]
elif is_datetime_or_timedelta_dtype(values.dtype):
arrays = [
ensure_wrapped_if_datetimelike(values[:, i].copy())
for i in range(values.shape[1])
]
else:
arrays = [values[:, i].copy() for i in range(values.shape[1])]
return ArrayManager(arrays, [index, columns], verify_integrity=False)
jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved

values = values.T

# if we don't have a dtype specified, then try to convert objects
# on the entire block; this is to convert if we have datetimelike's
# embedded in an object type
Expand Down Expand Up @@ -349,13 +375,13 @@ def _check_values_indices_shape_match(
Check that the shape implied by our axes matches the actual shape of the
data.
"""
if values.shape[0] != len(columns):
if values.shape[1] != len(columns) or values.shape[0] != len(index):
# Could let this raise in Block constructor, but we get a more
# helpful exception message this way.
if values.shape[1] == 0:
if values.shape[0] == 0:
raise ValueError("Empty data passed with indices specified.")

passed = values.T.shape
passed = values.shape
implied = (len(index), len(columns))
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")

Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,23 @@ def test_astype_to_incorrect_datetimelike(self, unit):
other = f"m8[{unit}]"

df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
msg = fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]"
msg = "|".join(
[
fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]",
"cannot astype a datetimelike from "
fr"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]",
]
)
with pytest.raises(TypeError, match=msg):
df.astype(other)

msg = fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]"
msg = "|".join(
[
fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]",
"cannot astype a timedelta from "
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
fr"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]",
]
)
df = DataFrame(np.array([[1, 2, 3]], dtype=other))
with pytest.raises(TypeError, match=msg):
df.astype(dtype)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)
import pandas._testing as tm
from pandas.arrays import (
DatetimeArray,
IntervalArray,
PeriodArray,
SparseArray,
Expand Down Expand Up @@ -2569,6 +2570,13 @@ def test_construction_from_set_raises(self, typ):
with pytest.raises(TypeError, match=msg):
Series(values)

def test_construction_from_ndarray_datetimelike(self):
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
# ensure the underlying arrays are properly wrapped as EA when
# constructed from 2D ndarray
arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3)
df = DataFrame(arr)
assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays)


def get1(obj):
if isinstance(obj, Series):
Expand Down