-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Deprecated usecols with out of bounds indices in read_csv #41130
Changes from 3 commits
9a82d19
0287dd9
97158ed
f446e4f
f5d3a05
21b496b
92488c0
41e3310
e34631b
5bef676
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -470,12 +470,14 @@ def _infer_columns(self): | |
if self.usecols is not None: | ||
# Set _use_cols. We don't store columns because they are | ||
# overwritten. | ||
self._handle_usecols(columns, names) | ||
self._handle_usecols(columns, names, num_original_columns) | ||
else: | ||
num_original_columns = len(names) | ||
columns = [names] | ||
else: | ||
columns = self._handle_usecols(columns, columns[0]) | ||
columns = self._handle_usecols( | ||
columns, columns[0], num_original_columns | ||
) | ||
else: | ||
try: | ||
line = self._buffered_line() | ||
|
@@ -494,10 +496,12 @@ def _infer_columns(self): | |
columns = [[f"{self.prefix}{i}" for i in range(ncols)]] | ||
else: | ||
columns = [list(range(ncols))] | ||
columns = self._handle_usecols(columns, columns[0]) | ||
columns = self._handle_usecols( | ||
columns, columns[0], num_original_columns | ||
) | ||
else: | ||
if self.usecols is None or len(names) >= num_original_columns: | ||
columns = self._handle_usecols([names], names) | ||
columns = self._handle_usecols([names], names, num_original_columns) | ||
num_original_columns = len(names) | ||
else: | ||
if not callable(self.usecols) and len(names) != len(self.usecols): | ||
|
@@ -506,13 +510,13 @@ def _infer_columns(self): | |
"header fields in the file" | ||
) | ||
# Ignore output but set used columns. | ||
self._handle_usecols([names], names) | ||
self._handle_usecols([names], names, ncols) | ||
columns = [names] | ||
num_original_columns = ncols | ||
|
||
return columns, num_original_columns, unnamed_cols | ||
|
||
def _handle_usecols(self, columns, usecols_key): | ||
def _handle_usecols(self, columns, usecols_key, num_original_columns): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Brief docstring on this new parameter to explain how it differs from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type args here |
||
""" | ||
Sets self._col_indices | ||
|
||
|
@@ -537,6 +541,13 @@ def _handle_usecols(self, columns, usecols_key): | |
else: | ||
col_indices.append(col) | ||
else: | ||
missing_usecols = [ | ||
col for col in self.usecols if col >= num_original_columns | ||
] | ||
if missing_usecols: | ||
raise ParserError( | ||
f"Usecols indices {missing_usecols} are out of bounds!" | ||
) | ||
col_indices = self.usecols | ||
|
||
columns = [ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -312,3 +312,19 @@ def test_malformed_skipfooter(python_parser_only): | |
msg = "Expected 3 fields in line 4, saw 5" | ||
with pytest.raises(ParserError, match=msg): | ||
parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1) | ||
|
||
|
||
@pytest.mark.parametrize("header", [0, None]) | ||
@pytest.mark.parametrize("names", [None, ["a", "b"], ["a", "b", "c"]]) | ||
def test_usecols_indices_out_of_bounds(python_parser_only, names, header): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be tested with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #41129 |
||
# GH#25623 | ||
if header == 0 and names == ["a", "b", "c"]: | ||
pytest.skip("This case is not valid") | ||
parser = python_parser_only | ||
data = """ | ||
a,b | ||
1,2 | ||
""" | ||
msg = r"Usecols indices \[2\] are out of bounds!" | ||
with pytest.raises(ParserError, match=msg): | ||
parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=header) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im not sure what "uncontrolled" means here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not raised on purpose by us but instead raised because we are accessing a non existent list index