From 0dbffcc23adcbf0a979c4fbab18817d41b426f71 Mon Sep 17 00:00:00 2001 From: Soyoun Kim Date: Sat, 28 Oct 2017 18:11:58 +0900 Subject: [PATCH] CLN: replace %s syntax with .format in pandas.io progress towards #16130 --- pandas/io/packers.py | 3 +- pandas/io/parsers.py | 63 +++++----- pandas/io/pytables.py | 249 +++++++++++++++++++++----------------- pandas/io/sas/sas7bdat.py | 10 +- pandas/io/sql.py | 39 +++--- pandas/io/stata.py | 21 ++-- 6 files changed, 216 insertions(+), 169 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index b83eab7d0eba0..efe4e3a91c69c 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -519,7 +519,8 @@ def encode(obj): elif isinstance(obj, date): return {u'typ': u'date', u'data': u(obj.isoformat())} - raise Exception("cannot encode this datetimelike object: %s" % obj) + raise Exception( + "cannot encode this datetimelike object: {obj}".format(obj=obj)) elif isinstance(obj, Period): return {u'typ': u'period', u'ordinal': obj.ordinal, diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5590e8f445c67..b58ba73fc8e21 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1464,9 +1464,9 @@ def extract(r): for n in range(len(columns[0])): if all(compat.to_str(c[n]) in self.unnamed_cols for c in columns): raise ParserError( - "Passed header=[%s] are too many rows for this " + "Passed header=[{header}] are too many rows for this " "multi_index of columns" - % ','.join(str(x) for x in self.header) + .format(header=','.join(str(x) for x in self.header)) ) # Clean the column names (if we have an index_col). @@ -1499,9 +1499,10 @@ def _maybe_dedup_names(self, names): counts[col] = cur_count + 1 if is_potential_mi: - col = col[:-1] + ('%s.%d' % (col[-1], cur_count),) + col = col[:-1] + ('{col}.{cnt}'.format( + col=col[-1], cnt=cur_count),) else: - col = '%s.%d' % (col, cur_count) + col = '{col}.{cnt}'.format(col=col, cnt=cur_count) cur_count = counts[col] names[i] = col @@ -1548,7 +1549,7 @@ def _get_simple_index(self, data, columns): def ix(col): if not isinstance(col, compat.string_types): return col - raise ValueError('Index %s invalid' % col) + raise ValueError('Index {col} invalid'.format(col=col)) to_remove = [] index = [] @@ -1572,8 +1573,8 @@ def _get_name(icol): return icol if col_names is None: - raise ValueError(('Must supply column order to use %s as ' - 'index') % str(icol)) + raise ValueError(('Must supply column order to use {icol!s} ' + 'as index').format(icol=icol)) for i, c in enumerate(col_names): if i == icol: @@ -1688,7 +1689,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, result[c] = cvals if verbose and na_count: - print('Filled %d NA values in column %s' % (na_count, str(c))) + print('Filled {count} NA values in column {c!s}'.format( + count=na_count, c=c)) return result def _infer_types(self, values, na_values, try_num_bool=True): @@ -1789,8 +1791,9 @@ def _cast_types(self, values, cast_type, column): values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError: - raise ValueError("Unable to convert column %s to " - "type %s" % (column, cast_type)) + raise ValueError( + "Unable to convert column {column} to type {type}".format( + column=column, type=cast_type)) return values def _do_date_conversions(self, names, data): @@ -1853,7 +1856,7 @@ def __init__(self, src, **kwds): if self.names is None: if self.prefix: - self.names = ['%s%d' % (self.prefix, i) + self.names = ['{prefix}{i}'.format(prefix=self.prefix, i=i) for i in range(self._reader.table_width)] else: self.names = lrange(self._reader.table_width) @@ -2255,10 +2258,11 @@ def __init__(self, f, **kwds): raise ValueError('Only length-1 decimal markers supported') if self.thousands is None: - self.nonnum = re.compile('[^-^0-9^%s]+' % self.decimal) + self.nonnum = re.compile( + '[^-^0-9^{decimal}]+'.format(decimal=self.decimal)) else: - self.nonnum = re.compile('[^-^0-9^%s^%s]+' % (self.thousands, - self.decimal)) + self.nonnum = re.compile('[^-^0-9^{thousands}^{decimal}]+'.format( + thousands=self.thousands, decimal=self.decimal)) def _set_no_thousands_columns(self): # Create a set of column ids that are not to be stripped of thousands @@ -2497,8 +2501,8 @@ def _infer_columns(self): except StopIteration: if self.line_pos < hr: raise ValueError( - 'Passed header=%s but only %d lines in file' - % (hr, self.line_pos + 1)) + 'Passed header={hr} but only {pos} lines in ' + 'file'.format(hr=hr, pos=(self.line_pos + 1))) # We have an empty file, so check # if columns are provided. That will @@ -2539,7 +2543,8 @@ def _infer_columns(self): while cur_count > 0: counts[col] = cur_count + 1 - col = "%s.%d" % (col, cur_count) + col = "{columns}.{count}".format( + columns=col, count=cur_count) cur_count = counts[col] this_columns[i] = col @@ -2607,8 +2612,8 @@ def _infer_columns(self): if not names: if self.prefix: - columns = [['%s%d' % (self.prefix, i) - for i in range(ncols)]] + columns = [['{prefix}{idx}'.format( + prefix=self.prefix, idx=i) for i in range(ncols)]] else: columns = [lrange(ncols)] columns = self._handle_usecols(columns, columns[0]) @@ -3035,8 +3040,9 @@ def _rows_to_cols(self, content): content.append(l) for row_num, actual_len in bad_lines: - msg = ('Expected %d fields in line %d, saw %d' % - (col_len, row_num + 1, actual_len)) + msg = ('Expected {col_len} fields in line {line}, saw ' + '{length}'.format(col_len=col_len, line=(row_num + 1), + length=actual_len)) if (self.delimiter and len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE): @@ -3207,8 +3213,9 @@ def _isindex(colspec): new_name, col, old_names = _try_convert_dates( converter, colspec, data_dict, orig_names) if new_name in data_dict: - raise ValueError('New date column already in dict %s' % - new_name) + raise ValueError( + 'New date column already in dict {name}'.format( + name=new_name)) new_data[new_name] = col new_cols.append(new_name) date_cols.update(old_names) @@ -3217,8 +3224,8 @@ def _isindex(colspec): # dict of new name to column list for new_name, colspec in compat.iteritems(parse_spec): if new_name in data_dict: - raise ValueError('Date column %s already in dict' % - new_name) + raise ValueError( + 'Date column {name} already in dict'.format(name=new_name)) _, col, old_names = _try_convert_dates(converter, colspec, data_dict, orig_names) @@ -3397,7 +3404,7 @@ def _stringify_na_values(na_values): # we are like 999 here if v == int(v): v = int(v) - result.append("%s.0" % v) + result.append("{value}.0".format(value=v)) result.append(str(v)) result.append(v) @@ -3542,8 +3549,8 @@ def get_rows(self, infer_nrows, skiprows=None): def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters - delimiters = ''.join(r'\%s' % x for x in self.delimiter) - pattern = re.compile('([^%s]+)' % delimiters) + delimiters = ''.join(r'\{}'.format(x for x in self.delimiter)) + pattern = re.compile('([^{}]+)'.format(delimiters)) rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b115529f696b8..02e2ccd2f6633 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -363,7 +363,7 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs): if not exists: raise compat.FileNotFoundError( - 'File %s does not exist' % path_or_buf) + 'File {path} does not exist'.format(path=path_or_buf)) store = HDFStore(path_or_buf, mode=mode, **kwargs) # can't auto open/close if we are using an iterator @@ -465,8 +465,8 @@ def __init__(self, path, mode=None, complevel=None, complib=None, try: import tables # noqa except ImportError as ex: # pragma: no cover - raise ImportError('HDFStore requires PyTables, "{ex}" problem ' - 'importing'.format(ex=str(ex))) + raise ImportError('HDFStore requires PyTables, "{ex!s}" problem ' + 'importing'.format(ex=ex)) if complib is not None and complib not in tables.filters.all_complibs: raise ValueError( @@ -515,8 +515,9 @@ def __getattr__(self, name): return self.get(name) except (KeyError, ClosedFileError): pass - raise AttributeError("'%s' object has no attribute '%s'" % - (type(self).__name__, name)) + raise AttributeError( + "'{object}' object has no attribute '{name}'".format( + object=type(self).__name__, name=name)) def __contains__(self, key): """ check for existence of this key @@ -533,7 +534,8 @@ def __len__(self): return len(self.groups()) def __unicode__(self): - return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) + return '{type}\nFile path: {path}\n'.format( + type=type(self), path=pprint_thing(self._path)) def __enter__(self): return self @@ -601,7 +603,8 @@ def open(self, mode='a', **kwargs): self._handle = tables.open_file(self._path, self._mode, **kwargs) except (IOError) as e: # pragma: no cover if 'can not be written' in str(e): - print('Opening %s in read-only mode' % self._path) + print( + 'Opening {path} in read-only mode'.format(path=self._path)) self._handle = tables.open_file(self._path, 'r', **kwargs) else: raise @@ -688,7 +691,7 @@ def get(self, key): """ group = self.get_node(key) if group is None: - raise KeyError('No object named %s in the file' % key) + raise KeyError('No object named {key} in the file'.format(key=key)) return self._read_group(group) def select(self, key, where=None, start=None, stop=None, columns=None, @@ -716,7 +719,7 @@ def select(self, key, where=None, start=None, stop=None, columns=None, """ group = self.get_node(key) if group is None: - raise KeyError('No object named %s in the file' % key) + raise KeyError('No object named {key} in the file'.format(key=key)) # create the storer and axes where = _ensure_term(where, scope_level=1) @@ -821,11 +824,11 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, nrows = None for t, k in itertools.chain([(s, selector)], zip(tbls, keys)): if t is None: - raise KeyError("Invalid table [%s]" % k) + raise KeyError("Invalid table [{key}]".format(key=k)) if not t.is_table: raise TypeError( - "object [%s] is not a table, and cannot be used in all " - "select as multiple" % t.pathname + "object [{obj}] is not a table, and cannot be used in all " + "select as multiple".format(obj=t.pathname) ) if nrows is None: @@ -1225,7 +1228,8 @@ def info(self): .. versionadded:: 0.21.0 """ - output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) + output = '{type}\nFile path: {path}\n'.format( + type=type(self), path=pprint_thing(self._path)) if self.is_open: lkeys = sorted(list(self.keys())) if len(lkeys): @@ -1241,8 +1245,9 @@ def info(self): pprint_thing(s or 'invalid_HDFStore node')) except Exception as detail: keys.append(k) - values.append("[invalid_HDFStore node: %s]" - % pprint_thing(detail)) + values.append( + "[invalid_HDFStore node: {detail}]".format( + detail=pprint_thing(detail))) output += adjoin(12, keys, values) else: @@ -1276,10 +1281,9 @@ def _create_storer(self, group, format=None, value=None, append=False, def error(t): raise TypeError( - "cannot properly create the storer for: [%s] [group->%s," - "value->%s,format->%s,append->%s,kwargs->%s]" - % (t, group, type(value), format, append, kwargs) - ) + "cannot properly create the storer for: [{}] [group->{}," + "value->{},format->{},append->{},kwargs->{}]".format( + t, group, type(value), format, append, kwargs)) pt = _ensure_decoded(getattr(group._v_attrs, 'pandas_type', None)) tt = _ensure_decoded(getattr(group._v_attrs, 'table_type', None)) @@ -1559,7 +1563,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, def set_name(self, name, kind_attr=None): """ set the name of this indexer """ self.name = name - self.kind_attr = kind_attr or "%s_kind" % name + self.kind_attr = kind_attr or "{name}_kind".format(name=name) if self.cname is None: self.cname = name @@ -1590,7 +1594,7 @@ def __unicode__(self): self.axis, self.pos, self.kind))) - return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % temp + return "name->{},cname->{},axis->{},pos->{},kind->{}".format(*temp) def __eq__(self, other): """ compare 2 col items """ @@ -1714,10 +1718,11 @@ def validate_col(self, itemsize=None): itemsize = self.itemsize if c.itemsize < itemsize: raise ValueError( - "Trying to store a string with len [%s] in [%s] " - "column but\nthis column has a limit of [%s]!\n" + "Trying to store a string with len [{}] in [{}] " + "column but\nthis column has a limit of [{}]!\n" "Consider using min_itemsize to preset the sizes on " - "these columns" % (itemsize, self.cname, c.itemsize)) + "these columns".format( + itemsize, self.cname, c.itemsize)) return c.itemsize return None @@ -1727,8 +1732,8 @@ def validate_attr(self, append): if append: existing_kind = getattr(self.attrs, self.kind_attr, None) if existing_kind is not None and existing_kind != self.kind: - raise TypeError("incompatible kind in col [%s - %s]" % - (existing_kind, self.kind)) + raise TypeError("incompatible kind in col [{} - {}]".format( + existing_kind, self.kind)) def update_info(self, info): """ set/update the info for this indexable with the key/value @@ -1753,9 +1758,9 @@ def update_info(self, info): else: raise ValueError( - "invalid info for [%s] for [%s], existing_value [%s] " - "conflicts with new value [%s]" - % (self.name, key, existing_value, value)) + "invalid info for [{}] for [{}], existing_value [{}] " + "conflicts with new value [{}]".format( + self.name, key, existing_value, value)) else: if value is not None or existing_value is not None: idx[key] = value @@ -1840,7 +1845,7 @@ def create_for_block( """ return a new datacol with the block i """ if cname is None: - cname = name or 'values_block_%d' % i + cname = name or 'values_block_{idx}'.format(idx=i) if name is None: name = cname @@ -1850,7 +1855,7 @@ def create_for_block( if version[0] == 0 and version[1] <= 10 and version[2] == 0: m = re.search(r"values_block_(\d+)", name) if m: - name = "values_%s" % m.groups()[0] + name = "values_{group}".format(group=m.groups()[0]) except IndexError: pass @@ -1876,7 +1881,8 @@ def __unicode__(self): self.dtype, self.kind, self.shape))) - return "name->%s,cname->%s,dtype->%s,kind->%s,shape->%s" % temp + return ("name->{},cname->{},dtype->{},kind->{}," + "shape->{}".format(*temp)) def __eq__(self, other): """ compare 2 col items """ @@ -1926,7 +1932,8 @@ def set_kind(self): self.kind = 'bool' else: raise AssertionError( - "cannot interpret dtype of [%s] in [%s]" % (dtype, self)) + "cannot interpret dtype of [{dtype}] in [{obj}]".format( + dtype=dtype, obj=self)) # set my typ if we need if self.typ is None: @@ -2009,9 +2016,9 @@ def set_atom_string(self, block, block_items, existing_col, min_itemsize, inferred_type = lib.infer_dtype(col.ravel(), skipna=False) if inferred_type != 'string': raise TypeError( - "Cannot serialize the column [%s] because\n" - "its data contents are [%s] object dtype" - % (item, inferred_type) + "Cannot serialize the column [{item}] because\n" + "its data contents are [{type}] object dtype".format( + item=item, type=inferred_type) ) # itemsize is the maximum length of a string (along any dimension) @@ -2033,16 +2040,17 @@ def set_atom_string(self, block, block_items, existing_col, min_itemsize, self.itemsize = itemsize self.kind = 'string' self.typ = self.get_atom_string(block, itemsize) - self.set_data(data_converted.astype('|S%d' % itemsize, copy=False)) + self.set_data(data_converted.astype( + '|S{size}'.format(size=itemsize), copy=False)) def get_atom_coltype(self, kind=None): """ return the PyTables column class for this column """ if kind is None: kind = self.kind if self.kind.startswith('uint'): - col_name = "UInt%sCol" % kind[4:] + col_name = "UInt{name}Col".format(name=kind[4:]) else: - col_name = "%sCol" % kind.capitalize() + col_name = "{name}Col".format(name=kind.capitalize()) return getattr(_tables(), col_name) @@ -2325,8 +2333,10 @@ def __unicode__(self): s = self.shape if s is not None: if isinstance(s, (list, tuple)): - s = "[%s]" % ','.join(pprint_thing(x) for x in s) - return "%-12.12s (shape->%s)" % (self.pandas_type, s) + s = "[{shape}]".format( + shape=','.join(pprint_thing(x) for x in s)) + return "{type:12.12} (shape->{shape})".format( + type=self.pandas_type, shape=s) return self.pandas_type def set_object_info(self): @@ -2542,7 +2552,8 @@ def read_array(self, key, start=None, stop=None): return ret def read_index(self, key, **kwargs): - variety = _ensure_decoded(getattr(self.attrs, '%s_variety' % key)) + variety = _ensure_decoded( + getattr(self.attrs, '{key}_variety'.format(key=key))) if variety == u'multi': return self.read_multi_index(key, **kwargs) @@ -2554,20 +2565,22 @@ def read_index(self, key, **kwargs): _, index = self.read_index_node(getattr(self.group, key), **kwargs) return index else: # pragma: no cover - raise TypeError('unrecognized index variety: %s' % variety) + raise TypeError( + 'unrecognized index variety: {variety}'.format( + variety=variety)) def write_index(self, key, index): if isinstance(index, MultiIndex): - setattr(self.attrs, '%s_variety' % key, 'multi') + setattr(self.attrs, '{key}_variety'.format(key=key), 'multi') self.write_multi_index(key, index) elif isinstance(index, BlockIndex): - setattr(self.attrs, '%s_variety' % key, 'block') + setattr(self.attrs, '{key}_variety'.format(key=key), 'block') self.write_block_index(key, index) elif isinstance(index, IntIndex): - setattr(self.attrs, '%s_variety' % key, 'sparseint') + setattr(self.attrs, '{key}_variety'.format(key=key), 'sparseint') self.write_sparse_intindex(key, index) else: - setattr(self.attrs, '%s_variety' % key, 'regular') + setattr(self.attrs, '{key}_variety'.format(key=key), 'regular') converted = _convert_index(index, self.encoding, self.errors, self.format_type).set_name('index') @@ -2587,33 +2600,33 @@ def write_index(self, key, index): node._v_attrs.tz = _get_tz(index.tz) def write_block_index(self, key, index): - self.write_array('%s_blocs' % key, index.blocs) - self.write_array('%s_blengths' % key, index.blengths) - setattr(self.attrs, '%s_length' % key, index.length) + self.write_array('{key}_blocs'.format(key=key), index.blocs) + self.write_array('{key}_blengths'.format(key=key), index.blengths) + setattr(self.attrs, '{key}_length'.format(key=key), index.length) def read_block_index(self, key, **kwargs): - length = getattr(self.attrs, '%s_length' % key) - blocs = self.read_array('%s_blocs' % key, **kwargs) - blengths = self.read_array('%s_blengths' % key, **kwargs) + length = getattr(self.attrs, '{key}_length'.format(key=key)) + blocs = self.read_array('{key}_blocs'.format(key=key), **kwargs) + blengths = self.read_array('{key}_blengths'.format(key=key), **kwargs) return BlockIndex(length, blocs, blengths) def write_sparse_intindex(self, key, index): - self.write_array('%s_indices' % key, index.indices) - setattr(self.attrs, '%s_length' % key, index.length) + self.write_array('{key}_indices'.format(key=key), index.indices) + setattr(self.attrs, '{key}_length'.format(key=key), index.length) def read_sparse_intindex(self, key, **kwargs): - length = getattr(self.attrs, '%s_length' % key) - indices = self.read_array('%s_indices' % key, **kwargs) + length = getattr(self.attrs, '{key}_length'.format(key=key)) + indices = self.read_array('{key}_indices'.format(key=key), **kwargs) return IntIndex(length, indices) def write_multi_index(self, key, index): - setattr(self.attrs, '%s_nlevels' % key, index.nlevels) + setattr(self.attrs, '{key}_nlevels'.format(key=key), index.nlevels) for i, (lev, level_codes, name) in enumerate(zip(index.levels, index.codes, index.names)): # write the level - level_key = '%s_level%d' % (key, i) + level_key = '{key}_level{idx}'.format(key=key, idx=i) conv_level = _convert_index(lev, self.encoding, self.errors, self.format_type).set_name(level_key) self.write_array(level_key, conv_level.values) @@ -2622,26 +2635,27 @@ def write_multi_index(self, key, index): node._v_attrs.name = name # write the name - setattr(node._v_attrs, '%s_name%d' % (key, i), name) + setattr(node._v_attrs, '{key}_name{name}'.format( + key=key, name=name), name) # write the labels - label_key = '%s_label%d' % (key, i) + label_key = '{key}_label{idx}'.format(key=key, idx=i) self.write_array(label_key, level_codes) def read_multi_index(self, key, **kwargs): - nlevels = getattr(self.attrs, '%s_nlevels' % key) + nlevels = getattr(self.attrs, '{key}_nlevels'.format(key=key)) levels = [] codes = [] names = [] for i in range(nlevels): - level_key = '%s_level%d' % (key, i) + level_key = '{key}_level{idx}'.format(key=key, idx=i) name, lev = self.read_index_node(getattr(self.group, level_key), **kwargs) levels.append(lev) names.append(name) - label_key = '%s_label%d' % (key, i) + label_key = '{key}_label{idx}'.format(key=key, idx=i) level_codes = self.read_array(label_key, **kwargs) codes.append(level_codes) @@ -2889,7 +2903,7 @@ def read(self, **kwargs): columns = self.read_index('columns') sdict = {} for c in columns: - key = 'sparse_series_%s' % c + key = 'sparse_series_{columns}'.format(columns=c) s = SparseSeriesFixed(self.parent, getattr(self.group, key)) s.infer_axes() sdict[c] = s.read() @@ -2901,7 +2915,7 @@ def write(self, obj, **kwargs): """ write it as a collection of individual sparse series """ super(SparseFrameFixed, self).write(obj, **kwargs) for name, ss in compat.iteritems(obj): - key = 'sparse_series_%s' % name + key = 'sparse_series_{name}'.format(name=name) if key not in self.group._v_children: node = self._handle.create_group(self.group, key) else: @@ -2925,7 +2939,7 @@ def shape(self): # items items = 0 for i in range(self.nblocks): - node = getattr(self.group, 'block%d_items' % i) + node = getattr(self.group, 'block{idx}_items'.format(idx=i)) shape = getattr(node, 'shape', None) if shape is not None: items += shape[0] @@ -2958,15 +2972,16 @@ def read(self, start=None, stop=None, **kwargs): for i in range(self.ndim): _start, _stop = (start, stop) if i == select_axis else (None, None) - ax = self.read_index('axis%d' % i, start=_start, stop=_stop) + ax = self.read_index('axis{idx}'.format( + idx=i), start=_start, stop=_stop) axes.append(ax) items = axes[0] blocks = [] for i in range(self.nblocks): - blk_items = self.read_index('block%d_items' % i) - values = self.read_array('block%d_values' % i, + blk_items = self.read_index('block{idx}_items'.format(idx=i)) + values = self.read_array('block{idx}_values'.format(idx=i), start=_start, stop=_stop) blk = make_block(values, placement=items.get_indexer(blk_items)) @@ -2986,15 +3001,16 @@ def write(self, obj, **kwargs): if not ax.is_unique: raise ValueError( "Columns index has to be unique for fixed format") - self.write_index('axis%d' % i, ax) + self.write_index('axis{idx}'.format(idx=i), ax) # Supporting mixed-type DataFrame objects...nontrivial self.attrs.nblocks = len(data.blocks) for i, blk in enumerate(data.blocks): # I have no idea why, but writing values before items fixed #2299 blk_items = data.items.take(blk.mgr_locs) - self.write_array('block%d_values' % i, blk.values, items=blk_items) - self.write_index('block%d_items' % i, blk_items) + self.write_array('block{idx}_values'.format(idx=i), + blk.values, items=blk_items) + self.write_index('block{idx}_items'.format(idx=i), blk_items) class FrameFixed(BlockManagerFixed): @@ -3065,17 +3081,18 @@ def format_type(self): def __unicode__(self): """ return a pretty representatgion of myself """ self.infer_axes() - dc = ",dc->[%s]" % ','.join( - self.data_columns) if len(self.data_columns) else '' + dc = ",dc->[{columns}]".format(columns=(','.join( + self.data_columns) if len(self.data_columns) else '')) ver = '' if self.is_old_version: - ver = "[%s]" % '.'.join(str(x) for x in self.version) + ver = "[{version}]".format( + version='.'.join(str(x) for x in self.version)) - return "%-12.12s%s (typ->%s,nrows->%s,ncols->%s,indexers->[%s]%s)" % ( - self.pandas_type, ver, self.table_type_short, self.nrows, - self.ncols, ','.join(a.name for a in self.index_axes), dc - ) + return ( + "{:12.12}{} (typ->{},nrows->{},ncols->{},indexers->[{}]{})".format( + self.pandas_type, ver, self.table_type_short, self.nrows, + self.ncols, ','.join(a.name for a in self.index_axes), dc)) def __getitem__(self, c): """ return the axis for c """ @@ -3090,8 +3107,10 @@ def validate(self, other): return if other.table_type != self.table_type: - raise TypeError("incompatible table_type with existing [%s - %s]" % - (other.table_type, self.table_type)) + raise TypeError( + "incompatible table_type with existing " + "[{other} - {self}]".format( + other=other.table_type, self=self.table_type)) for c in ['index_axes', 'non_index_axes', 'values_axes']: sv = getattr(self, c, None) @@ -3103,13 +3122,14 @@ def validate(self, other): oax = ov[i] if sax != oax: raise ValueError( - "invalid combinate of [%s] on appending data [%s] " - "vs current table [%s]" % (c, sax, oax)) + "invalid combinate of [{c}] on appending data " + "[{sax}] vs current table [{oax}]".format( + c=c, sax=sax, oax=oax)) # should never get here raise Exception( - "invalid combinate of [%s] on appending data [%s] vs " - "current table [%s]" % (c, sv, ov)) + "invalid combinate of [{c}] on appending data [{sv}] vs " + "current table [{ov}]".format(c=c, sv=sv, ov=ov)) @property def is_multi_index(self): @@ -3292,8 +3312,8 @@ def validate_min_itemsize(self, min_itemsize): continue if k not in q: raise ValueError( - "min_itemsize has the key [%s] which is not an axis or " - "data_column" % k) + "min_itemsize has the key [{key}] which is not an axis or " + "data_column".format(key=k)) @property def indexables(self): @@ -3480,9 +3500,10 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, try: axes = _AXES_MAP[type(obj)] except KeyError: - raise TypeError("cannot properly create the storer for: " - "[group->%s,value->%s]" - % (self.group._v_name, type(obj))) + raise TypeError( + "cannot properly create the storer for: [group->{group}," + "value->{value}]".format( + group=self.group._v_name, value=type(obj))) # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] @@ -3600,9 +3621,10 @@ def get_blk_items(mgr, blocks): new_blk_items.append(b_items) except (IndexError, KeyError): raise ValueError( - "cannot match existing table structure for [%s] on " - "appending data" % ','.join(pprint_thing(item) for - item in items)) + "cannot match existing table structure for [{items}] " + "on appending data".format( + items=(','.join(pprint_thing(item) for + item in items)))) blocks = new_blocks blk_items = new_blk_items @@ -3627,9 +3649,11 @@ def get_blk_items(mgr, blocks): try: existing_col = existing_table.values_axes[i] except (IndexError, KeyError): - raise ValueError("Incompatible appended table [%s] with " - "existing table [%s]" - % (blocks, existing_table.values_axes)) + raise ValueError( + "Incompatible appended table [{blocks}]" + "with existing table [{table}]".format( + blocks=blocks, + table=existing_table.values_axes)) else: existing_col = None @@ -3651,9 +3675,8 @@ def get_blk_items(mgr, blocks): except Exception as detail: raise Exception( "cannot find the correct atom type -> " - "[dtype->%s,items->%s] %s" - % (b.dtype.name, b_items, str(detail)) - ) + "[dtype->{name},items->{items}] {detail!s}".format( + name=b.dtype.name, items=b_items, detail=detail)) j += 1 # validate our min_itemsize @@ -3719,8 +3742,8 @@ def process_filter(field, filt): return obj.loc._getitem_axis(takers, axis=axis_number) - raise ValueError( - "cannot find the field [%s] for filtering!" % field) + raise ValueError("cannot find the field [{field}] for " + "filtering!".format(field=field)) obj = process_filter(field, filt) @@ -3798,8 +3821,8 @@ def read_column(self, column, where=None, start=None, stop=None): if not a.is_data_indexable: raise ValueError( - "column [%s] can not be extracted individually; it is " - "not data indexable" % column) + "column [{column}] can not be extracted individually; " + "it is not data indexable".format(column=column)) # column must be an indexable or a data column c = getattr(self.table.cols, column) @@ -3811,7 +3834,8 @@ def read_column(self, column, where=None, start=None, stop=None): ).take_data(), a.tz, True), name=column) - raise KeyError("column [%s] not found in the table" % column) + raise KeyError( + "column [{column}] not found in the table".format(column=column)) class WORMTable(Table): @@ -4120,14 +4144,17 @@ def write_data_chunk(self, rows, indexes, mask, values): rows = rows[m] except Exception as detail: - raise Exception("cannot create row-data -> %s" % detail) + raise Exception( + "cannot create row-data -> {detail}".format(detail=detail)) try: if len(rows): self.table.append(rows) self.table.flush() except Exception as detail: - raise TypeError("tables cannot write this data -> %s" % detail) + raise TypeError( + "tables cannot write this data -> {detail}".format( + detail=detail)) def delete(self, where=None, start=None, stop=None, **kwargs): @@ -4597,7 +4624,7 @@ def _unconvert_index(data, kind, encoding=None, errors='strict'): elif kind == u'object': index = np.asarray(data[0]) else: # pragma: no cover - raise ValueError('unrecognized index type %s' % kind) + raise ValueError('unrecognized index type {kind}'.format(kind=kind)) return index @@ -4612,7 +4639,7 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None, index = _unconvert_string_array(data, nan_rep=None, encoding=encoding, errors=errors) else: # pragma: no cover - raise ValueError('unrecognized index type %s' % kind) + raise ValueError('unrecognized index type {kind}'.format(kind=kind)) return index @@ -4643,7 +4670,7 @@ def _convert_string_array(data, encoding, errors, itemsize=None): ensured = ensure_object(data.ravel()) itemsize = max(1, libwriters.max_len_string_array(ensured)) - data = np.asarray(data, dtype="S%d" % itemsize) + data = np.asarray(data, dtype="S{size}".format(size=itemsize)) return data @@ -4708,7 +4735,7 @@ def _get_converter(kind, encoding, errors): return lambda x: _unconvert_string_array(x, encoding=encoding, errors=errors) else: # pragma: no cover - raise ValueError('invalid kind %s' % kind) + raise ValueError('invalid kind {kind}'.format(kind=kind)) def _need_convert(kind): diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index d634b5ec4f8f9..fd4509b1ce196 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -163,7 +163,7 @@ def _get_properties(self): if buf in const.encoding_names: self.file_encoding = const.encoding_names[buf] else: - self.file_encoding = "unknown (code=%s)" % str(buf) + self.file_encoding = "unknown (code={name!s})".format(name=buf) # Get platform information buf = self._read_bytes(const.platform_offset, const.platform_length) @@ -435,8 +435,8 @@ def _process_columnsize_subheader(self, offset, length): self.column_count = self._read_int(offset, int_len) if (self.col_count_p1 + self.col_count_p2 != self.column_count): - print("Warning: column count mismatch (%d + %d != %d)\n", - self.col_count_p1, self.col_count_p2, self.column_count) + print("Warning: column count mismatch ({} + {} != {})\n".format( + self.col_count_p1, self.col_count_p2, self.column_count)) # Unknown purpose def _process_subheader_counts(self, offset, length): @@ -694,7 +694,7 @@ def _chunk_to_dataframe(self): js += 1 else: self.close() - raise ValueError("unknown column type %s" % - self._column_types[j]) + raise ValueError("unknown column type {type}".format( + type=self._column_types[j])) return rslt diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2f4093e154a95..5d1163b3e0024 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -240,7 +240,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, try: meta.reflect(only=[table_name], views=True) except sqlalchemy.exc.InvalidRequestError: - raise ValueError("Table %s not found" % table_name) + raise ValueError("Table {name} not found".format(name=table_name)) pandas_sql = SQLDatabase(con, meta=meta) table = pandas_sql.read_table( @@ -250,7 +250,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, if table is not None: return table else: - raise ValueError("Table %s not found" % table_name, con) + raise ValueError("Table {name} not found".format(name=table_name), con) def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, @@ -552,7 +552,8 @@ def __init__(self, name, pandas_sql_engine, frame=None, index=True, self.table = self.pd_sql.get_table(self.name, self.schema) if self.table is None: - raise ValueError("Could not init table '%s'" % name) + raise ValueError( + "Could not init table '{name}'".format(name=name)) def exists(self): return self.pd_sql.has_table(self.name, self.schema) @@ -569,7 +570,8 @@ def _execute_create(self): def create(self): if self.exists(): if self.if_exists == 'fail': - raise ValueError("Table '%s' already exists." % self.name) + raise ValueError( + "Table '{name}' already exists.".format(name=self.name)) elif self.if_exists == 'replace': self.pd_sql.drop_table(self.name, self.schema) self._execute_create() @@ -1161,8 +1163,8 @@ def to_sql(self, frame, name, if_exists='fail', index=True, from sqlalchemy.types import to_instance, TypeEngine for col, my_type in dtype.items(): if not isinstance(to_instance(my_type), TypeEngine): - raise ValueError('The type of %s is not a SQLAlchemy ' - 'type ' % col) + raise ValueError('The type of {column} is not a ' + 'SQLAlchemy type '.format(column=col)) table = SQLTable(name, self, frame=frame, index=index, if_exists=if_exists, index_label=index_label, @@ -1244,7 +1246,8 @@ def _get_unicode_name(name): try: uname = text_type(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError: - raise ValueError("Cannot convert identifier to UTF-8: '%s'" % name) + raise ValueError( + "Cannot convert identifier to UTF-8: '{name}'".format(name=name)) return uname @@ -1305,8 +1308,9 @@ def insert_statement(self): bracketed_names = [escape(column) for column in names] col_names = ','.join(bracketed_names) wildcards = ','.join([wld] * len(names)) - insert_statement = 'INSERT INTO %s (%s) VALUES (%s)' % ( - escape(self.name), col_names, wildcards) + insert_statement = \ + u'INSERT INTO {table} ({columns}) VALUES ({wld})'.format( + table=escape(self.name), columns=col_names, wld=wildcards) return insert_statement def _execute_insert(self, conn, keys, data_iter): @@ -1429,12 +1433,14 @@ def execute(self, *args, **kwargs): try: self.con.rollback() except Exception: # pragma: no cover - ex = DatabaseError("Execution failed on sql: %s\n%s\nunable" - " to rollback" % (args[0], exc)) + ex = DatabaseError( + "Execution failed on sql: {sql}\n{exc}\nunable " + "to rollback".format(sql=args[0], exc=exc)) raise_with_traceback(ex) ex = DatabaseError( - "Execution failed on sql '%s': %s" % (args[0], exc)) + "Execution failed on sql '{sql}': {exc}".format( + sql=args[0], exc=exc)) raise_with_traceback(ex) @staticmethod @@ -1530,8 +1536,8 @@ def to_sql(self, frame, name, if_exists='fail', index=True, if dtype is not None: for col, my_type in dtype.items(): if not isinstance(my_type, str): - raise ValueError('%s (%s) not a string' % ( - col, str(my_type))) + raise ValueError('{column} ({type!s}) not a string'.format( + column=col, type=my_type)) table = SQLiteTable(name, self, frame=frame, index=index, if_exists=if_exists, index_label=index_label, @@ -1546,7 +1552,7 @@ def has_table(self, name, schema=None): wld = '?' query = ("SELECT name FROM sqlite_master " - "WHERE type='table' AND name=%s;") % wld + "WHERE type='table' AND name={wld};").format(wld=wld) return len(self.execute(query, [name, ]).fetchall()) > 0 @@ -1554,7 +1560,8 @@ def get_table(self, table_name, schema=None): return None # not supported in fallback mode def drop_table(self, name, schema=None): - drop_sql = "DROP TABLE %s" % _get_valid_sqlite_name(name) + drop_sql = "DROP TABLE {name}".format( + name=_get_valid_sqlite_name(name)) self.execute(drop_sql) def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b5e7eb24465f5..1b0660171ecac 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -357,7 +357,7 @@ def convert_delta_safe(base, deltas, unit): month = np.ones_like(dates) conv_dates = convert_year_month_safe(year, month) else: - raise ValueError("Date fmt %s not understood" % fmt) + raise ValueError("Date fmt {fmt} not understood".format(fmt=fmt)) if has_bad_values: # Restore NaT for bad values conv_dates[bad_locs] = NaT @@ -452,7 +452,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): d = parse_dates_safe(dates, year=True) conv_dates = d.year else: - raise ValueError("Format %s is not a known Stata date format" % fmt) + raise ValueError( + "Format {fmt} is not a known Stata date format".format(fmt=fmt)) conv_dates = Series(conv_dates, dtype=np.float64) missing_value = struct.unpack('", "big"]: return ">" else: # pragma : no cover - raise ValueError("Endianness %s not understood" % endianness) + raise ValueError( + "Endianness {endian} not understood".format(endian=endianness)) def _pad_bytes(name, length): @@ -1789,7 +1791,8 @@ def _convert_datetime_to_stata_type(fmt): "%tq", "th", "%th", "ty", "%ty"]: return np.float64 # Stata expects doubles for SIFs else: - raise NotImplementedError("Format %s not implemented" % fmt) + raise NotImplementedError( + "Format {fmt} not implemented".format(fmt=fmt)) def _maybe_convert_to_int_keys(convert_dates, varlist): @@ -1840,7 +1843,8 @@ def _dtype_to_stata_type(dtype, column): elif dtype == np.int8: return 251 else: # pragma : no cover - raise NotImplementedError("Data type %s not supported." % dtype) + raise NotImplementedError( + "Data type {dtype} not supported.".format(dtype=dtype)) def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, @@ -1895,7 +1899,8 @@ def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, elif dtype == np.int8 or dtype == np.int16: return "%8.0g" else: # pragma : no cover - raise NotImplementedError("Data type %s not supported." % dtype) + raise NotImplementedError( + "Data type {dtype} not supported.".format(dtype=dtype)) class StataWriter(StataParser): @@ -2389,7 +2394,7 @@ def _prepare_data(self): if typ <= self._max_string_length: has_strings = True data[col] = data[col].fillna('').apply(_pad_bytes, args=(typ,)) - stype = 'S%d' % typ + stype = 'S{type}'.format(type=typ) dtypes.append(('c' + str(i), stype)) string = data[col].str.encode(self._encoding) data_cols.append(string.values.astype(stype))