From 59d20d8c4d57adc34f01148709d629945c99cab5 Mon Sep 17 00:00:00 2001 From: Soyoun Kim Date: Sat, 28 Oct 2017 18:11:58 +0900 Subject: [PATCH] CLN: replace %s syntax with .format in pandas.io.parsers progress towards #16130 --- pandas/io/parsers.py | 65 +++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9a255231bbe70..2996e078a069d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1485,9 +1485,9 @@ def extract(r): for n in range(len(columns[0])): if all(compat.to_str(c[n]) in self.unnamed_cols for c in columns): raise ParserError( - "Passed header=[%s] are too many rows for this " + "Passed header=[{header}] are too many rows for this " "multi_index of columns" - % ','.join(str(x) for x in self.header) + .format(header=','.join(str(x) for x in self.header)) ) # Clean the column names (if we have an index_col). @@ -1520,9 +1520,11 @@ def _maybe_dedup_names(self, names): counts[col] = cur_count + 1 if is_potential_mi: - col = col[:-1] + ('%s.%d' % (col[-1], cur_count),) + col = col[:-1] + ('{column}.{count}'.format( + column=col[-1], count=cur_count),) else: - col = '%s.%d' % (col, cur_count) + col = '{column}.{count}'.format( + column=col, count=cur_count) cur_count = counts[col] names[i] = col @@ -1569,7 +1571,7 @@ def _get_simple_index(self, data, columns): def ix(col): if not isinstance(col, compat.string_types): return col - raise ValueError('Index %s invalid' % col) + raise ValueError('Index {col} invalid'.format(col=col)) to_remove = [] index = [] @@ -1593,8 +1595,8 @@ def _get_name(icol): return icol if col_names is None: - raise ValueError(('Must supply column order to use %s as ' - 'index') % str(icol)) + raise ValueError(('Must supply column order to use {icol!s} ' + 'as index').format(icol=icol)) for i, c in enumerate(col_names): if i == icol: @@ -1709,7 +1711,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, result[c] = cvals if verbose and na_count: - print('Filled %d NA values in column %s' % (na_count, str(c))) + print('Filled {count} NA values in column {c!s}'.format( + count=na_count, c=c)) return result def _infer_types(self, values, na_values, try_num_bool=True): @@ -1810,8 +1813,10 @@ def _cast_types(self, values, cast_type, column): values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError: - raise ValueError("Unable to convert column %s to " - "type %s" % (column, cast_type)) + raise ValueError( + "Unable to convert column {column} to type " + "{cast_type}".format( + column=column, cast_type=cast_type)) return values def _do_date_conversions(self, names, data): @@ -1874,7 +1879,7 @@ def __init__(self, src, **kwds): if self.names is None: if self.prefix: - self.names = ['%s%d' % (self.prefix, i) + self.names = ['{prefix}{i}'.format(prefix=self.prefix, i=i) for i in range(self._reader.table_width)] else: self.names = lrange(self._reader.table_width) @@ -2276,10 +2281,11 @@ def __init__(self, f, **kwds): raise ValueError('Only length-1 decimal markers supported') if self.thousands is None: - self.nonnum = re.compile('[^-^0-9^%s]+' % self.decimal) + self.nonnum = re.compile( + r'[^-^0-9^{decimal}]+'.format(decimal=self.decimal)) else: - self.nonnum = re.compile('[^-^0-9^%s^%s]+' % (self.thousands, - self.decimal)) + self.nonnum = re.compile(r'[^-^0-9^{thousands}^{decimal}]+'.format( + thousands=self.thousands, decimal=self.decimal)) def _set_no_thousands_columns(self): # Create a set of column ids that are not to be stripped of thousands @@ -2518,8 +2524,8 @@ def _infer_columns(self): except StopIteration: if self.line_pos < hr: raise ValueError( - 'Passed header=%s but only %d lines in file' - % (hr, self.line_pos + 1)) + 'Passed header={hr} but only {pos} lines in ' + 'file'.format(hr=hr, pos=(self.line_pos + 1))) # We have an empty file, so check # if columns are provided. That will @@ -2560,7 +2566,8 @@ def _infer_columns(self): while cur_count > 0: counts[col] = cur_count + 1 - col = "%s.%d" % (col, cur_count) + col = '{column}.{count}'.format( + column=col, count=cur_count) cur_count = counts[col] this_columns[i] = col @@ -2628,8 +2635,8 @@ def _infer_columns(self): if not names: if self.prefix: - columns = [['%s%d' % (self.prefix, i) - for i in range(ncols)]] + columns = [['{prefix}{idx}'.format( + prefix=self.prefix, idx=i) for i in range(ncols)]] else: columns = [lrange(ncols)] columns = self._handle_usecols(columns, columns[0]) @@ -3056,8 +3063,9 @@ def _rows_to_cols(self, content): content.append(l) for row_num, actual_len in bad_lines: - msg = ('Expected %d fields in line %d, saw %d' % - (col_len, row_num + 1, actual_len)) + msg = ('Expected {col_len} fields in line {line}, saw ' + '{length}'.format(col_len=col_len, line=(row_num + 1), + length=actual_len)) if (self.delimiter and len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE): @@ -3228,8 +3236,9 @@ def _isindex(colspec): new_name, col, old_names = _try_convert_dates( converter, colspec, data_dict, orig_names) if new_name in data_dict: - raise ValueError('New date column already in dict %s' % - new_name) + raise ValueError( + 'New date column already in dict {name}'.format( + name=new_name)) new_data[new_name] = col new_cols.append(new_name) date_cols.update(old_names) @@ -3238,8 +3247,8 @@ def _isindex(colspec): # dict of new name to column list for new_name, colspec in compat.iteritems(parse_spec): if new_name in data_dict: - raise ValueError('Date column %s already in dict' % - new_name) + raise ValueError( + 'Date column {name} already in dict'.format(name=new_name)) _, col, old_names = _try_convert_dates(converter, colspec, data_dict, orig_names) @@ -3418,7 +3427,7 @@ def _stringify_na_values(na_values): # we are like 999 here if v == int(v): v = int(v) - result.append("%s.0" % v) + result.append("{value}.0".format(value=v)) result.append(str(v)) result.append(v) @@ -3563,8 +3572,8 @@ def get_rows(self, infer_nrows, skiprows=None): def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters - delimiters = ''.join(r'\%s' % x for x in self.delimiter) - pattern = re.compile('([^%s]+)' % delimiters) + delimiters = ''.join(r'\{}'.format(x) for x in self.delimiter) + pattern = re.compile('([^{}]+)'.format(delimiters)) rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width")