Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TYP: internals.construction #40154

Merged
merged 11 commits into from
Mar 3, 2021
3 changes: 3 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,7 @@ def maybe_cast_to_datetime(
dta = dta.tz_localize(None)
value = dta
elif is_datetime64tz:
dtype = cast(DatetimeTZDtype, dtype)
# The string check can be removed once issue #13712
# is solved. String data that is passed with a
# datetime64tz is assumed to be naive which should
Expand Down Expand Up @@ -1700,6 +1701,8 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
dtype('<M8[ns]')

>>> ensure_nanosecond_dtype(np.dtype("m8[ps]"))
Traceback (most recent call last):
...
TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]]
"""
msg = (
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1939,12 +1939,11 @@ def from_records(
arr_columns_list.append(k)
arrays.append(v)

arrays, arr_columns = reorder_arrays(arrays, arr_columns_list, columns)
arr_columns = Index(arr_columns_list)
arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns)

elif isinstance(data, (np.ndarray, DataFrame)):
arrays, columns = to_arrays(data, columns)
if columns is not None:
columns = ensure_index(columns)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is ensured by to_arrays already?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

arr_columns = columns
else:
arrays, arr_columns = to_arrays(data, columns)
Expand All @@ -1954,9 +1953,7 @@ def from_records(
arrays[i] = lib.maybe_convert_objects(arr, try_float=True)

arr_columns = ensure_index(arr_columns)
if columns is not None:
columns = ensure_index(columns)
else:
if columns is None:
columns = arr_columns

if exclude is None:
Expand Down
37 changes: 18 additions & 19 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
if TYPE_CHECKING:
from numpy.ma.mrecords import MaskedRecords


# ---------------------------------------------------------------------
# BlockManager Interface

Expand All @@ -91,7 +92,7 @@ def arrays_to_mgr(
dtype: Optional[DtypeObj] = None,
verify_integrity: bool = True,
typ: Optional[str] = None,
):
) -> Manager:
"""
Segregate Series based on type and coerce into matrices.

Expand All @@ -109,11 +110,11 @@ def arrays_to_mgr(
# don't force copy because getting jammed in an ndarray anyway
arrays = _homogenize(arrays, index, dtype)

columns = ensure_index(columns)
else:
columns = ensure_index(columns)
index = ensure_index(index)

columns = ensure_index(columns)

# from BlockManager perspective
axes = [columns, index]

Expand All @@ -140,9 +141,8 @@ def rec_array_to_mgr(
fdata = ma.getdata(data)
if index is None:
index = _get_names_from_index(fdata)
if index is None:
index = ibase.default_index(len(data))
index = ensure_index(index)
else:
index = ensure_index(index)
jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved

if columns is not None:
columns = ensure_index(columns)
Expand Down Expand Up @@ -215,14 +215,14 @@ def mgr_to_mgr(mgr, typ: str):

def ndarray_to_mgr(
values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str
):
) -> Manager:
# used in DataFrame.__init__
# input must be a ndarray, list, Series, index
# input must be a ndarray, list, Series, Index, ExtensionArray

if isinstance(values, ABCSeries):
if columns is None:
if values.name is not None:
columns = [values.name]
columns = Index([values.name])
if index is None:
index = values.index
else:
Expand Down Expand Up @@ -309,7 +309,9 @@ def ndarray_to_mgr(
return create_block_manager_from_blocks(block_values, [columns, index])


def dict_to_mgr(data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str):
def dict_to_mgr(
data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str
) -> Manager:
"""
Segregate Series based on type and coerce into matrices.
Needs to handle a lot of exceptional cases.
Expand Down Expand Up @@ -531,21 +533,18 @@ def extract_index(data) -> Index:
return ensure_index(index)


def reorder_arrays(arrays, arr_columns, columns):
def reorder_arrays(
arrays: List[ArrayLike], arr_columns: Index, columns: Optional[Index]
) -> Tuple[List[ArrayLike], Index]:
# reorder according to the columns
if (
columns is not None
and len(columns)
and arr_columns is not None
and len(arr_columns)
):
if columns is not None and len(columns) and len(arr_columns):
indexer = ensure_index(arr_columns).get_indexer(columns)
arr_columns = ensure_index([arr_columns[i] for i in indexer])
arrays = [arrays[i] for i in indexer]
return arrays, arr_columns


def _get_names_from_index(data):
def _get_names_from_index(data) -> Index:
has_some_name = any(getattr(s, "name", None) is not None for s in data)
if not has_some_name:
return ibase.default_index(len(data))
Expand All @@ -560,7 +559,7 @@ def _get_names_from_index(data):
index[i] = f"Unnamed {count}"
count += 1

return index
return Index(index)


def _get_axes(
Expand Down