Skip to content

Commit

Permalink
REF: define _header_line and have_mi_columns non-dynamically (pandas-…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Yi Wei committed May 19, 2023
1 parent e07b338 commit 96b692b
Showing 1 changed file with 32 additions and 22 deletions.
54 changes: 32 additions & 22 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,25 +365,34 @@ def _convert_data(
clean_dtypes,
)

@cache_readonly
def _have_mi_columns(self) -> bool:
if self.header is None:
return False

header = self.header
if isinstance(header, (list, tuple, np.ndarray)):
return len(header) > 1
else:
return False

def _infer_columns(
self,
) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]:
names = self.names
num_original_columns = 0
clear_buffer = True
unnamed_cols: set[Scalar | None] = set()
self._header_line = None

if self.header is not None:
header = self.header
have_mi_columns = self._have_mi_columns

if isinstance(header, (list, tuple, np.ndarray)):
have_mi_columns = len(header) > 1
# we have a mi columns, so read an extra line
if have_mi_columns:
header = list(header) + [header[-1] + 1]
else:
have_mi_columns = False
header = [header]

columns: list[list[Scalar | None]] = []
Expand Down Expand Up @@ -531,27 +540,14 @@ def _infer_columns(
columns, columns[0], num_original_columns
)
else:
try:
line = self._buffered_line()

except StopIteration as err:
if not names:
raise EmptyDataError("No columns to parse from file") from err

line = names[:]

# Store line, otherwise it is lost for guessing the index
self._header_line = line
ncols = len(line)
ncols = len(self._header_line)
num_original_columns = ncols

if not names:
columns = [list(range(ncols))]
columns = self._handle_usecols(
columns, columns[0], num_original_columns
)
elif self.usecols is None or len(names) >= num_original_columns:
columns = self._handle_usecols([names], names, num_original_columns)
columns = self._handle_usecols(columns, columns[0], ncols)
elif self.usecols is None or len(names) >= ncols:
columns = self._handle_usecols([names], names, ncols)
num_original_columns = len(names)
elif not callable(self.usecols) and len(names) != len(self.usecols):
raise ValueError(
Expand All @@ -560,12 +556,26 @@ def _infer_columns(
)
else:
# Ignore output but set used columns.
self._handle_usecols([names], names, ncols)
columns = [names]
num_original_columns = ncols
self._handle_usecols(columns, columns[0], ncols)

return columns, num_original_columns, unnamed_cols

@cache_readonly
def _header_line(self):
# Store line for reuse in _get_index_name
if self.header is not None:
return None

try:
line = self._buffered_line()
except StopIteration as err:
if not self.names:
raise EmptyDataError("No columns to parse from file") from err

line = self.names[:]
return line

def _handle_usecols(
self,
columns: list[list[Scalar | None]],
Expand Down

0 comments on commit 96b692b

Please sign in to comment.