diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 9f238c22850b7..bc9dcdccfc2e1 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1199,22 +1199,109 @@ numpy array. For instance, dflookup = DataFrame(np.random.rand(20,4), columns = ['A','B','C','D']) dflookup.lookup(list(range(0,10,2)), ['B','C','A','B','D']) -Setting values in mixed-type DataFrame --------------------------------------- +.. _indexing.float64index: -.. _indexing.mixed_type_setting: +Float64Index +------------ + +.. versionadded:: 0.13.0 -Setting values on a mixed-type DataFrame or Panel is supported when using -scalar values, though setting arbitrary vectors is not yet supported: +By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. +This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the +same. .. ipython:: python - df2 = df[:4] - df2['foo'] = 'bar' - print(df2) - df2.ix[2] = np.nan - print(df2) - print(df2.dtypes) + indexf = Index([1.5, 2, 3, 4.5, 5]) + indexf + sf = Series(range(5),index=indexf) + sf + +Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``) + +.. ipython:: python + + sf[3] + sf[3.0] + sf.ix[3] + sf.ix[3.0] + sf.loc[3] + sf.loc[3.0] + +The only positional indexing is via ``iloc`` + +.. ipython:: python + + sf.iloc[3] + +A scalar index that is not found will raise ``KeyError`` + +Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc`` + +.. ipython:: python + + sf[2:4] + sf.ix[2:4] + sf.loc[2:4] + sf.iloc[2:4] + +In float indexes, slicing using floats is allowed + +.. ipython:: python + + sf[2.1:4.6] + sf.loc[2.1:4.6] + +In non-float indexes, slicing using floats will raise a ``TypeError`` + +.. code-block:: python + + In [1]: Series(range(5))[3.5] + TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index) + + In [1]: Series(range(5))[3.5:4.5] + TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index) + +Using a scalar float indexer will be deprecated in a future version, but is allowed for now. + +.. code-block:: python + + In [3]: Series(range(5))[3.0] + Out[3]: 3 + +Here is a typical use-case for using this type of indexing. Imagine that you have a somewhat +irregular timedelta-like indexing scheme, but the data is recorded as floats. This could for +example be millisecond offsets. + +.. ipython:: python + + dfir = concat([DataFrame(randn(5,2), + index=np.arange(5) * 250.0, + columns=list('AB')), + DataFrame(randn(6,2), + index=np.arange(4,10) * 250.1, + columns=list('AB'))]) + dfir + +Selection operations then will always work on a value basis, for all selection operators. + +.. ipython:: python + + dfir[0:1000.4] + dfir.loc[0:1001,'A'] + dfir.loc[1000.4] + +You could then easily pick out the first 1 second (1000 ms) of data then. + +.. ipython:: python + + dfir[0:1000] + +Of course if you need integer based selection, then use ``iloc`` + +.. ipython:: python + + dfir.iloc[0:5] .. _indexing.view_versus_copy: diff --git a/doc/source/release.rst b/doc/source/release.rst index eec2e91f0a755..e39116f9023e1 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -226,6 +226,10 @@ API Changes add top-level ``to_timedelta`` function - ``NDFrame`` now is compatible with Python's toplevel ``abs()`` function (:issue:`4821`). - raise a ``TypeError`` on invalid comparison ops on Series/DataFrame (e.g. integer/datetime) (:issue:`4968`) + - Added a new index type, ``Float64Index``. This will be automatically created when passing floating values in index creation. + This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. + Indexing on other index types are preserved (and positional fallback for ``[],ix``), with the exception, that floating point slicing + on indexes on non ``Float64Index`` will raise a ``TypeError``, e.g. ``Series(range(5))[3.5:4.5]`` (:issue:`263`) Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index bda6fa4cdf021..95e5ff62a2abd 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -116,6 +116,72 @@ Indexing API Changes p p.loc[:,:,'C'] +Float64Index API Change +~~~~~~~~~~~~~~~~~~~~~~~ + + - Added a new index type, ``Float64Index``. This will be automatically created when passing floating values in index creation. + This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the + same. See :ref:`the docs`, (:issue:`263`) + + Construction is by default for floating type values. + + .. ipython:: python + + index = Index([1.5, 2, 3, 4.5, 5]) + index + s = Series(range(5),index=index) + s + + Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``) + + .. ipython:: python + + s[3] + s.ix[3] + s.loc[3] + + The only positional indexing is via ``iloc`` + + .. ipython:: python + + s.iloc[3] + + A scalar index that is not found will raise ``KeyError`` + + Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc`` + + .. ipython:: python + + s[2:4] + s.ix[2:4] + s.loc[2:4] + s.iloc[2:4] + + In float indexes, slicing using floats are allowed + + .. ipython:: python + + s[2.1:4.6] + s.loc[2.1:4.6] + + - Indexing on other index types are preserved (and positional fallback for ``[],ix``), with the exception, that floating point slicing + on indexes on non ``Float64Index`` will now raise a ``TypeError``. + + .. code-block:: python + + In [1]: Series(range(5))[3.5] + TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index) + + In [1]: Series(range(5))[3.5:4.5] + TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index) + + Using a scalar float indexer will be deprecated in a future version, but is allowed for now. + + .. code-block:: python + + In [3]: Series(range(5))[3.0] + Out[3]: 3 + HDFStore API Changes ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.4.x.txt b/doc/source/v0.4.x.txt index 249dec5fd647b..5333bb9ffb157 100644 --- a/doc/source/v0.4.x.txt +++ b/doc/source/v0.4.x.txt @@ -15,8 +15,7 @@ New Features with choice of join method (ENH56_) - :ref:`Added ` method ``get_level_values`` to ``MultiIndex`` (:issue:`188`) -- :ref:`Set ` values in mixed-type - ``DataFrame`` objects via ``.ix`` indexing attribute (:issue:`135`) +- Set values in mixed-type ``DataFrame`` objects via ``.ix`` indexing attribute (:issue:`135`) - Added new ``DataFrame`` :ref:`methods ` ``get_dtype_counts`` and property ``dtypes`` (ENHdc_) - Added :ref:`ignore_index ` option to diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 7dc2ebc3d54e1..3554b8a3f81e1 100755 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -778,7 +778,7 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): class TestAlignment(object): index_types = 'i', 'u', 'dt' - lhs_index_types = index_types + ('f', 's') # 'p' + lhs_index_types = index_types + ('s',) # 'p' def check_align_nested_unary_op(self, engine, parser): skip_if_no_ne(engine) diff --git a/pandas/core/api.py b/pandas/core/api.py index 14af72a2a762a..b4afe90d46842 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,7 @@ from pandas.core.categorical import Categorical, Factor from pandas.core.format import (set_printoptions, reset_printoptions, set_eng_float_format) -from pandas.core.index import Index, Int64Index, MultiIndex +from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0fd02c2bdc3a4..c98790fdc38ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2050,7 +2050,7 @@ def eval(self, expr, **kwargs): kwargs['local_dict'] = _ensure_scope(resolvers=resolvers, **kwargs) return _eval(expr, **kwargs) - def _slice(self, slobj, axis=0, raise_on_error=False): + def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): axis = self._get_block_manager_axis(axis) new_data = self._data.get_slice( slobj, axis=axis, raise_on_error=raise_on_error) diff --git a/pandas/core/index.py b/pandas/core/index.py index 734a6ee15307d..7f136450daf6e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -14,7 +14,7 @@ from pandas.util.decorators import cache_readonly, deprecate from pandas.core.common import isnull import pandas.core.common as com -from pandas.core.common import _values_from_object +from pandas.core.common import _values_from_object, is_float, is_integer from pandas.core.config import get_option @@ -49,10 +49,8 @@ def _shouldbe_timestamp(obj): or tslib.is_datetime64_array(obj) or tslib.is_timestamp_array(obj)) - _Identity = object - class Index(FrozenNDArray): """ @@ -160,8 +158,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, subarr = subarr.copy() elif np.isscalar(data): - raise TypeError('Index(...) must be called with a collection ' - 'of some kind, %s was passed' % repr(data)) + cls._scalar_data_error(data) + else: # other iterable of some kind subarr = com._asarray_tuplesafe(data, dtype=object) @@ -170,6 +168,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, inferred = lib.infer_dtype(subarr) if inferred == 'integer': return Int64Index(subarr.astype('i8'), copy=copy, name=name) + elif inferred in ['floating','mixed-integer-float']: + return Float64Index(subarr, copy=copy, name=name) elif inferred != 'string': if (inferred.startswith('datetime') or tslib.is_timestamp_array(subarr)): @@ -183,6 +183,30 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, subarr._set_names([name]) return subarr + # construction helpers + @classmethod + def _scalar_data_error(cls, data): + raise TypeError('{0}(...) must be called with a collection ' + 'of some kind, {1} was passed'.format(cls.__name__,repr(data))) + + @classmethod + def _string_data_error(cls, data): + raise TypeError('String dtype not supported, you may need ' + 'to explicitly cast to a numeric type') + + @classmethod + def _coerce_to_ndarray(cls, data): + + if not isinstance(data, np.ndarray): + if np.isscalar(data): + cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (list, tuple)): + data = list(data) + data = np.asarray(data) + return data + def __array_finalize__(self, obj): self._reset_identity() if not isinstance(obj, type(self)): @@ -374,12 +398,137 @@ def is_lexsorted_for_tuple(self, tup): def is_unique(self): return self._engine.is_unique + def is_integer(self): + return self.inferred_type in ['integer'] + + def is_floating(self): + return self.inferred_type in ['floating','mixed-integer-float'] + def is_numeric(self): return self.inferred_type in ['integer', 'floating'] + def is_mixed(self): + return 'mixed' in self.inferred_type + def holds_integer(self): return self.inferred_type in ['integer', 'mixed-integer'] + def _convert_scalar_indexer(self, key, typ=None): + """ convert a scalar indexer, right now we are converting floats -> ints + if the index supports it """ + + def to_int(): + ikey = int(key) + if ikey != key: + self._convert_indexer_error(key, 'label') + return ikey + + if typ == 'iloc': + if not (is_integer(key) or is_float(key)): + self._convert_indexer_error(key, 'label') + return to_int() + + if is_float(key): + return to_int() + + return key + + def _validate_slicer(self, key, f): + """ validate and raise if needed on a slice indexers according to the + passed in function """ + + if not f(key.start): + self._convert_indexer_error(key.start, 'slice start value') + if not f(key.stop): + self._convert_indexer_error(key.stop, 'slice stop value') + if not f(key.step): + self._convert_indexer_error(key.step, 'slice step value') + + def _convert_slice_indexer_iloc(self, key): + """ convert a slice indexer for iloc only """ + self._validate_slicer(key, lambda v: v is None or is_integer(v)) + return key + + def _convert_slice_indexer_getitem(self, key, is_index_slice=False): + """ called from the getitem slicers, determine how to treat the key + whether positional or not """ + if self.is_integer() or is_index_slice: + return key + return self._convert_slice_indexer(key) + + def _convert_slice_indexer(self, key, typ=None): + """ convert a slice indexer. disallow floats in the start/stop/step """ + + # validate slicers + def validate(v): + if v is None or is_integer(v): + return True + + # dissallow floats + elif is_float(v): + return False + + return True + + self._validate_slicer(key, validate) + + # figure out if this is a positional indexer + start, stop, step = key.start, key.stop, key.step + + def is_int(v): + return v is None or is_integer(v) + + is_null_slice = start is None and stop is None + is_index_slice = is_int(start) and is_int(stop) + is_positional = is_index_slice and not self.is_integer() + + if typ == 'iloc': + return self._convert_slice_indexer_iloc(key) + elif typ == 'getitem': + return self._convert_slice_indexer_getitem(key, is_index_slice=is_index_slice) + + # convert the slice to an indexer here + + # if we are mixed and have integers + try: + if is_positional and self.is_mixed(): + if start is not None: + i = self.get_loc(start) + if stop is not None: + j = self.get_loc(stop) + is_positional = False + except KeyError: + if self.inferred_type == 'mixed-integer-float': + raise + + if is_null_slice: + indexer = key + elif is_positional: + indexer = key + else: + try: + indexer = self.slice_indexer(start, stop, step) + except Exception: + if is_index_slice: + if self.is_integer(): + raise + else: + indexer = key + else: + raise + + return indexer + + def _convert_list_indexer(self, key, typ=None): + """ convert a list indexer. these should be locations """ + return key + + def _convert_indexer_error(self, key, msg=None): + if msg is None: + msg = 'label' + raise TypeError("the {0} [{1}] is not a proper indexer for this index type ({2})".format(msg, + key, + self.__class__.__name__)) def get_duplicates(self): from collections import defaultdict counter = defaultdict(lambda: 0) @@ -858,6 +1007,11 @@ def get_value(self, series, key): """ s = _values_from_object(series) k = _values_from_object(key) + + # prevent integer truncation bug in indexing + if is_float(k) and not self.is_floating(): + raise KeyError + try: return self._engine.get_value(s, k) except KeyError as e1: @@ -1323,6 +1477,11 @@ def slice_indexer(self, start=None, end=None, step=None): # return a slice if np.isscalar(start_slice) and np.isscalar(end_slice): + + # degenerate cases + if start is None and end is None: + return slice(None, None, step) + return slice(start_slice, end_slice, step) # loc indexers @@ -1488,22 +1647,13 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False): if not isinstance(data, np.ndarray): if np.isscalar(data): - raise ValueError('Index(...) must be called with a collection ' - 'of some kind, %s was passed' % repr(data)) + cls._scalar_data_error(data) - if not isinstance(data, np.ndarray): - if np.isscalar(data): - raise ValueError('Index(...) must be called with a collection ' - 'of some kind, %s was passed' % repr(data)) - - # other iterable of some kind - if not isinstance(data, (list, tuple)): - data = list(data) - data = np.asarray(data) + data = cls._coerce_to_ndarray(data) if issubclass(data.dtype.type, compat.string_types): - raise TypeError('String dtype not supported, you may need ' - 'to explicitly cast to int') + cls._string_data_error(data) + elif issubclass(data.dtype.type, np.integer): # don't force the upcast as we may be dealing # with a platform int @@ -1524,8 +1674,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False): data = np.asarray(data) if issubclass(data.dtype.type, compat.string_types): - raise TypeError('String dtype not supported, you may need ' - 'to explicitly cast to int') + cls._string_data_error(data) + elif issubclass(data.dtype.type, np.integer): # don't force the upcast as we may be dealing # with a platform int @@ -1581,6 +1731,123 @@ def _wrap_joined_index(self, joined, other): return Int64Index(joined, name=name) +class Float64Index(Index): + """ + Immutable ndarray implementing an ordered, sliceable set. The basic object + storing axis labels for all pandas objects. Float64Index is a special case of `Index` + with purely floating point labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: object) + copy : bool + Make a copy of input ndarray + name : object + Name to be stored in the index + + Note + ---- + An Index instance can **only** contain hashable objects + """ + + # when this is not longer object dtype this can be changed + #_engine_type = _index.Float64Engine + + def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False): + + if fastpath: + subarr = data.view(cls) + subarr.name = name + return subarr + + if not isinstance(data, np.ndarray): + if np.isscalar(data): + cls._scalar_data_error(data) + + data = cls._coerce_to_ndarray(data) + + if issubclass(data.dtype.type, compat.string_types): + cls._string_data_error(data) + + if dtype is None: + dtype = np.float64 + + try: + subarr = np.array(data, dtype=dtype, copy=copy) + except: + raise TypeError('Unsafe NumPy casting, you must ' + 'explicitly cast') + + # coerce to object for storage + if not subarr.dtype == np.object_: + subarr = subarr.astype(object) + + subarr = subarr.view(cls) + subarr.name = name + return subarr + + @property + def inferred_type(self): + return 'floating' + + def astype(self, dtype): + if np.dtype(dtype) != np.object_: + raise TypeError( + "Setting %s dtype to anything other than object is not supported" % self.__class__) + return Index(self.values,name=self.name,dtype=object) + + def _convert_scalar_indexer(self, key, typ=None): + + if typ == 'iloc': + return super(Float64Index, self)._convert_scalar_indexer(key, typ=typ) + return key + + def _convert_slice_indexer(self, key, typ=None): + """ convert a slice indexer, by definition these are labels + unless we are iloc """ + if typ == 'iloc': + return self._convert_slice_indexer_iloc(key) + elif typ == 'getitem': + pass + + # allow floats here + self._validate_slicer(key, lambda v: v is None or is_integer(v) or is_float(v)) + + # translate to locations + return self.slice_indexer(key.start,key.stop,key.step) + + def get_value(self, series, key): + """ we always want to get an index value, never a value """ + if not np.isscalar(key): + raise InvalidIndexError + + from pandas.core.indexing import _maybe_droplevels + from pandas.core.series import Series + + k = _values_from_object(key) + loc = self.get_loc(k) + new_values = series.values[loc] + if np.isscalar(new_values): + return new_values + + new_index = self[loc] + new_index = _maybe_droplevels(new_index, k) + return Series(new_values, index=new_index, name=series.name) + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self is other: + return True + + try: + return np.array_equal(self, other) + except TypeError: + # e.g. fails in numpy 1.6 with DatetimeIndex #1681 + return False + class MultiIndex(Index): """ @@ -1801,6 +2068,14 @@ def __unicode__(self): def __len__(self): return len(self.labels[0]) + def _convert_slice_indexer(self, key, typ=None): + """ convert a slice indexer. disallow floats in the start/stop/step """ + + if typ == 'iloc': + return self._convert_slice_indexer_iloc(key) + + return super(MultiIndex,self)._convert_slice_indexer(key, typ=typ) + def _get_names(self): return FrozenList(level.name for level in self.levels) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cb738df6966da..afbeb53d857e2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -6,7 +6,7 @@ from pandas.compat import range, zip import pandas.compat as compat import pandas.core.common as com -from pandas.core.common import (_is_bool_indexer, +from pandas.core.common import (_is_bool_indexer, is_integer_dtype, ABCSeries, ABCDataFrame, ABCPanel) import pandas.lib as lib @@ -16,7 +16,7 @@ def get_indexers_list(): return [ - ('ix' ,_NDFrameIndexer), + ('ix' ,_IXIndexer ), ('iloc',_iLocIndexer ), ('loc' ,_LocIndexer ), ('at' ,_AtIndexer ), @@ -32,6 +32,7 @@ class IndexingError(Exception): class _NDFrameIndexer(object): + _valid_types = None _exception = KeyError def __init__(self, obj, name): @@ -68,8 +69,8 @@ def _get_label(self, label, axis=0): def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=0, raise_on_error=False): - return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error) + def _slice(self, obj, axis=0, raise_on_error=False, typ=None): + return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error, typ=typ) def __setitem__(self, key, value): # kludgetastic @@ -92,8 +93,16 @@ def __setitem__(self, key, value): self._setitem_with_indexer(indexer, value) + def _has_valid_type(self, k, axis): + raise NotImplementedError() + def _has_valid_tuple(self, key): - pass + """ check the key for valid keys across my indexer """ + for i, k in enumerate(key): + if i >= self.obj.ndim: + raise IndexingError('Too many indexers') + if not self._has_valid_type(k,i): + raise ValueError("Location based indexing can only have [%s] types" % self._valid_types) def _convert_tuple(self, key, is_setter=False): keyidx = [] @@ -102,6 +111,17 @@ def _convert_tuple(self, key, is_setter=False): keyidx.append(idx) return tuple(keyidx) + def _convert_scalar_indexer(self, key, axis): + # if we are accessing via lowered dim, use the last dim + ax = self.obj._get_axis(min(axis,self.ndim-1)) + # a scalar + return ax._convert_scalar_indexer(key, typ=self.name) + + def _convert_slice_indexer(self, key, axis): + # if we are accessing via lowered dim, use the last dim + ax = self.obj._get_axis(min(axis,self.ndim-1)) + return ax._convert_slice_indexer(key, typ=self.name) + def _has_valid_setitem_indexer(self, indexer): return True @@ -228,7 +248,9 @@ def _setitem_with_indexer(self, indexer, value): # if we have a partial multiindex, then need to adjust the plane indexer here if len(labels) == 1 and isinstance(self.obj[labels[0]].index,MultiIndex): - index = self.obj[labels[0]].index + item = labels[0] + obj = self.obj[item] + index = obj.index idx = indexer[:info_axis][0] try: if idx in index: @@ -238,8 +260,19 @@ def _setitem_with_indexer(self, indexer, value): plane_indexer = tuple([idx]) + indexer[info_axis + 1:] lplane_indexer = _length_of_indexer(plane_indexer[0],index) + # require that we are setting the right number of values that we are indexing if is_list_like(value) and lplane_indexer != len(value): - raise ValueError("cannot set using a multi-index selection indexer with a different length than the value") + + if len(obj[idx]) != len(value): + raise ValueError("cannot set using a multi-index selection indexer with a different length than the value") + + # we can directly set the series here + # as we select a slice indexer on the mi + idx = index._convert_slice_indexer(idx) + obj = obj.copy() + obj._data = obj._data.setitem(tuple([idx]),value) + self.obj[item] = obj + return # non-mi else: @@ -546,7 +579,7 @@ def _convert_for_reindex(self, key, axis=0): # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) - if _is_integer_dtype(keyarr) and not _is_integer_index(labels): + if is_integer_dtype(keyarr) and not labels.is_integer(): keyarr = com._ensure_platform_int(keyarr) return labels.take(keyarr) @@ -610,6 +643,8 @@ def _getitem_lowerdim(self, tup): raise IndexingError('not applicable') def _getitem_axis(self, key, axis=0): + + self._has_valid_type(key, axis) labels = self.obj._get_axis(axis) if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) @@ -626,10 +661,11 @@ def _getitem_axis(self, key, axis=0): try: return self._get_label(key, axis=axis) except (KeyError, TypeError): - if _is_integer_index(self.obj.index.levels[0]): + if self.obj.index.levels[0].is_integer(): raise - if not _is_integer_index(labels): + # this is the fallback! (for a non-float, non-integer index) + if not labels.is_floating() and not labels.is_integer(): return self._get_loc(key, axis=axis) return self._get_label(key, axis=axis) @@ -658,7 +694,7 @@ def _reindex(keys, level=None): # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) - if _is_integer_dtype(keyarr): + if is_integer_dtype(keyarr) and not labels.is_floating(): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) @@ -747,7 +783,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) - is_int_index = _is_integer_index(labels) + is_int_index = labels.is_integer() if com.is_integer(obj) and not is_int_index: @@ -765,52 +801,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): pass if isinstance(obj, slice): - ltype = labels.inferred_type - - # in case of providing all floats, use label-based indexing - float_slice = (labels.inferred_type == 'floating' - and _is_float_slice(obj)) - - # floats that are within tolerance of int used as positions - int_slice = _is_index_slice(obj) - - null_slice = obj.start is None and obj.stop is None - - # could have integers in the first level of the MultiIndex, - # in which case we wouldn't want to do position-based slicing - position_slice = (int_slice - and not ltype == 'integer' - and not isinstance(labels, MultiIndex) - and not float_slice) - - start, stop = obj.start, obj.stop - - # last ditch effort: if we are mixed and have integers - try: - if position_slice and 'mixed' in ltype: - if start is not None: - i = labels.get_loc(start) - if stop is not None: - j = labels.get_loc(stop) - position_slice = False - except KeyError: - if ltype == 'mixed-integer-float': - raise - - if null_slice or position_slice: - indexer = obj - else: - try: - indexer = labels.slice_indexer(start, stop, obj.step) - except Exception: - if _is_index_slice(obj): - if ltype == 'integer': - raise - indexer = obj - else: - raise - - return indexer + return self._convert_slice_indexer(obj, axis) elif _is_list_like(obj): if com._is_bool_indexer(obj): @@ -824,7 +815,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing - if _is_integer_dtype(objarr) and not is_int_index: + if is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) @@ -879,74 +870,37 @@ def _get_slice_axis(self, slice_obj, axis=0): if not _need_slice(slice_obj): return obj + indexer = self._convert_slice_indexer(slice_obj, axis) - labels = obj._get_axis(axis) - - ltype = labels.inferred_type - - # in case of providing all floats, use label-based indexing - float_slice = (labels.inferred_type == 'floating' - and _is_float_slice(slice_obj)) + if isinstance(indexer, slice): + return self._slice(indexer, axis=axis, typ='iloc') + else: + return self.obj.take(indexer, axis=axis) - # floats that are within tolerance of int used as positions - int_slice = _is_index_slice(slice_obj) +class _IXIndexer(_NDFrameIndexer): + """ A primarily location based indexer, with integer fallback """ - null_slice = slice_obj.start is None and slice_obj.stop is None + def _has_valid_type(self, key, axis): + ax = self.obj._get_axis(axis) - # could have integers in the first level of the MultiIndex, - # in which case we wouldn't want to do position-based slicing - position_slice = (int_slice - and not ltype == 'integer' - and not isinstance(labels, MultiIndex) - and not float_slice) + if isinstance(key, slice): + return True - start, stop = slice_obj.start, slice_obj.stop + elif com._is_bool_indexer(key): + return True - # last ditch effort: if we are mixed and have integers - try: - if position_slice and 'mixed' in ltype: - if start is not None: - i = labels.get_loc(start) - if stop is not None: - j = labels.get_loc(stop) - position_slice = False - except KeyError: - if ltype == 'mixed-integer-float': - raise + elif _is_list_like(key): + return True - if null_slice or position_slice: - indexer = slice_obj else: - try: - indexer = labels.slice_indexer(start, stop, slice_obj.step) - except Exception: - if _is_index_slice(slice_obj): - if ltype == 'integer': - raise - indexer = slice_obj - else: - raise - if isinstance(indexer, slice): - return self._slice(indexer, axis=axis) - else: - return self.obj.take(indexer, axis=axis) + self._convert_scalar_indexer(key, axis) + + return True class _LocationIndexer(_NDFrameIndexer): - _valid_types = None _exception = Exception - def _has_valid_type(self, k, axis): - raise NotImplementedError() - - def _has_valid_tuple(self, key): - """ check the key for valid keys across my indexer """ - for i, k in enumerate(key): - if i >= self.obj.ndim: - raise ValueError('Too many indexers') - if not self._has_valid_type(k,i): - raise ValueError("Location based indexing can only have [%s] types" % self._valid_types) - def __getitem__(self, key): if type(key) is tuple: return self._getitem_tuple(key) @@ -974,7 +928,7 @@ def _get_slice_axis(self, slice_obj, axis=0): indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) if isinstance(indexer, slice): - return self._slice(indexer, axis=axis) + return self._slice(indexer, axis=axis, typ='iloc') else: return self.obj.take(indexer, axis=axis) @@ -993,18 +947,28 @@ def _has_valid_type(self, key, axis): if isinstance(key, slice): - if key.start is not None: - if key.start not in ax: - raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis))) - if key.stop is not None: - if key.stop not in ax: - raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis))) + if ax.is_floating(): + + # allowing keys to be slicers with no fallback + pass + + else: + if key.start is not None: + if key.start not in ax: + raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis))) + if key.stop is not None: + if key.stop not in ax: + raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): + # mi is just a passthru + if isinstance(key, tuple) and isinstance(ax, MultiIndex): + return True + # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): @@ -1014,18 +978,15 @@ def _has_valid_type(self, key, axis): else: - # if its empty we want a KeyError here - if not len(ax): - raise KeyError("The [%s] axis is empty" % self.obj._get_axis_name(axis)) + def error(): + raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) + key = self._convert_scalar_indexer(key, axis) try: if not key in ax: - raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) - except (TypeError): - - # if we have a weird type of key/ax - raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) - + error() + except: + error() return True @@ -1045,6 +1006,7 @@ def _getitem_axis(self, key, axis=0): return self._getitem_iterable(key, axis=axis) else: + self._has_valid_type(key,axis) return self._get_label(key, axis=axis) class _iLocIndexer(_LocationIndexer): @@ -1092,11 +1054,12 @@ def _get_slice_axis(self, slice_obj, axis=0): return obj if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, raise_on_error=True) + return self._slice(slice_obj, axis=axis, raise_on_error=True, typ='iloc') else: return self.obj.take(slice_obj, axis=axis) def _getitem_axis(self, key, axis=0): + if isinstance(key, slice): self._has_valid_type(key,axis) return self._get_slice_axis(key, axis=axis) @@ -1108,8 +1071,13 @@ def _getitem_axis(self, key, axis=0): # a single integer or a list of integers else: - if not (com.is_integer(key) or _is_list_like(key)): - raise ValueError("Cannot index by location index with a non-integer key") + if _is_list_like(key): + pass + else: + key = self._convert_scalar_indexer(key, axis) + + if not com.is_integer(key): + raise TypeError("Cannot index by location index with a non-integer key") return self._get_loc(key,axis=axis) @@ -1200,14 +1168,7 @@ def _convert_to_index_sliceable(obj, key): """ if we are index sliceable, then return my slicer, otherwise return None """ idx = obj.index if isinstance(key, slice): - idx_type = idx.inferred_type - if idx_type == 'floating': - indexer = obj.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = obj.ix._convert_to_indexer(key, axis=0) - return indexer + return idx._convert_slice_indexer(key, typ='getitem') elif isinstance(key, compat.string_types): @@ -1237,31 +1198,7 @@ def _crit(v): return not both_none and (_crit(obj.start) and _crit(obj.stop)) -def _is_int_slice(obj): - def _is_valid_index(x): - return com.is_integer(x) - - def _crit(v): - return v is None or _is_valid_index(v) - - both_none = obj.start is None and obj.stop is None - - return not both_none and (_crit(obj.start) and _crit(obj.stop)) - - -def _is_float_slice(obj): - def _is_valid_index(x): - return com.is_float(x) - - def _crit(v): - return v is None or _is_valid_index(v) - - both_none = obj.start is None and obj.stop is None - - return not both_none and (_crit(obj.start) and _crit(obj.stop)) - - -class _SeriesIndexer(_NDFrameIndexer): +class _SeriesIndexer(_IXIndexer): """ Class to support fancy indexing, potentially using labels @@ -1286,7 +1223,7 @@ def _get_label(self, key, axis=0): def _get_loc(self, key, axis=0): return self.obj.values[key] - def _slice(self, indexer, axis=0): + def _slice(self, indexer, axis=0, typ=None): return self.obj._get_values(indexer) def _setitem_with_indexer(self, indexer, value): @@ -1389,15 +1326,6 @@ def _is_null_slice(obj): obj.stop is None and obj.step is None) -def _is_integer_dtype(arr): - return (issubclass(arr.dtype.type, np.integer) and - not arr.dtype.type == np.datetime64) - - -def _is_integer_index(index): - return index.inferred_type == 'integer' - - def _is_label_like(key): # select a label or row return not isinstance(key, slice) and not _is_list_like(key) @@ -1438,6 +1366,9 @@ def _maybe_droplevels(index, key): # we have dropped too much, so back out return original_index else: - index = index.droplevel(0) + try: + index = index.droplevel(0) + except: + pass return index diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5f90eb9fa31a7..34b65f169b904 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -579,7 +579,7 @@ def _box_item_values(self, key, values): d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) - def _slice(self, slobj, axis=0, raise_on_error=False): + def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): new_data = self._data.get_slice(slobj, axis=axis, raise_on_error=raise_on_error) diff --git a/pandas/core/series.py b/pandas/core/series.py index 942bb700a3718..bf5ec998c9963 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -901,7 +901,8 @@ def _ixs(self, i, axis=0): raise except: if isinstance(i, slice): - return self[i] + indexer = self.index._convert_slice_indexer(i,typ='iloc') + return self._get_values(indexer) else: label = self.index[i] if isinstance(label, Index): @@ -914,10 +915,10 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False - def _slice(self, slobj, axis=0, raise_on_error=False): + def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): if raise_on_error: _check_slice_bounds(slobj, self.values) - + slobj = self.index._convert_slice_indexer(slobj,typ=typ or 'getitem') return self._constructor(self.values[slobj], index=self.index[slobj], name=self.name) @@ -935,7 +936,13 @@ def __getitem__(self, key): elif _is_bool_indexer(key): pass else: + + # we can try to coerce the indexer (or this will raise) + new_key = self.index._convert_scalar_indexer(key) + if type(new_key) != type(key): + return self.__getitem__(new_key) raise + except Exception: raise @@ -950,14 +957,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) + indexer = self.index._convert_slice_indexer(key,typ='getitem') return self._get_values(indexer) else: if isinstance(key, tuple): @@ -980,7 +980,7 @@ def _get_with(self, key): key_type = lib.infer_dtype(key) if key_type == 'integer': - if self.index.inferred_type == 'integer': + if self.index.is_integer() or self.index.is_floating(): return self.reindex(key) else: return self._get_values(key) @@ -1080,10 +1080,7 @@ def _set_with_engine(self, key, value): def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): - if self.index.inferred_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) + indexer = self.index._convert_slice_indexer(key,typ='getitem') return self._set_values(indexer, value) else: if isinstance(key, tuple): @@ -2348,7 +2345,7 @@ def sort(self, axis=0, kind='quicksort', order=None, ascending=True): raise TypeError('This Series is a view of some other array, to ' 'sort in-place you must create a copy') - self[:] = sortedSeries + self._data = sortedSeries._data.copy() self.index = sortedSeries.index def sort_index(self, ascending=True): diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 7d2571e6c3c74..a1b630dedaaab 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -374,7 +374,7 @@ def set_value(self, index, col, value): return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) - def _slice(self, slobj, axis=0, raise_on_error=False): + def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): if axis == 0: if raise_on_error: _check_slice_bounds(slobj, self.index) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index e7a52756089cc..723bf022c3f48 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1110,10 +1110,10 @@ def test_to_string_float_index(self): result = df.to_string() expected = (' 0\n' '1.5 0\n' - '2 1\n' - '3 2\n' - '4 3\n' - '5 4') + '2.0 1\n' + '3.0 2\n' + '4.0 3\n' + '5.0 4') self.assertEqual(result, expected) def test_to_string_ascii_error(self): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 312b5aee18752..82be82ea57dae 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1254,48 +1254,62 @@ def test_getitem_setitem_float_labels(self): assert_frame_equal(result, expected) self.assertEqual(len(result), 2) - # this should raise an exception - with tm.assertRaises(KeyError): - df.ix[1:2] - with tm.assertRaises(KeyError): - df.ix[1:2] = 0 + # loc_float changes this to work properly + result = df.ix[1:2] + expected = df.iloc[0:2] + assert_frame_equal(result, expected) + + df.ix[1:2] = 0 + result = df[1:2] + self.assert_((result==0).all().all()) # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) df = DataFrame(np.random.randn(5, 5), index=index) - # positional slicing! + # positional slicing only via iloc! + result = df.iloc[1.0:5] + expected = df.reindex([2.5, 3.5, 4.5, 5.0]) + assert_frame_equal(result, expected) + self.assertEqual(len(result), 4) + + result = df.iloc[4:5] + expected = df.reindex([5.0]) + assert_frame_equal(result, expected) + self.assertEqual(len(result), 1) + + cp = df.copy() + cp.iloc[1.0:5] = 0 + self.assert_((cp.iloc[1.0:5] == 0).values.all()) + self.assert_((cp.iloc[0:1] == df.iloc[0:1]).values.all()) + + cp = df.copy() + cp.iloc[4:5] = 0 + self.assert_((cp.iloc[4:5] == 0).values.all()) + self.assert_((cp.iloc[0:4] == df.iloc[0:4]).values.all()) + + # float slicing result = df.ix[1.0:5] + expected = df + assert_frame_equal(result, expected) + self.assertEqual(len(result), 5) + + result = df.ix[1.1:5] expected = df.reindex([2.5, 3.5, 4.5, 5.0]) assert_frame_equal(result, expected) self.assertEqual(len(result), 4) - # positional again - result = df.ix[4:5] + result = df.ix[4.51:5] expected = df.reindex([5.0]) assert_frame_equal(result, expected) self.assertEqual(len(result), 1) - # label-based result = df.ix[1.0:5.0] expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) assert_frame_equal(result, expected) self.assertEqual(len(result), 5) cp = df.copy() - # positional slicing! - cp.ix[1.0:5] = 0 - self.assert_((cp.ix[1.0:5] == 0).values.all()) - self.assert_((cp.ix[0:1] == df.ix[0:1]).values.all()) - - cp = df.copy() - # positional again - cp.ix[4:5] = 0 - self.assert_((cp.ix[4:5] == 0).values.all()) - self.assert_((cp.ix[0:4] == df.ix[0:4]).values.all()) - - cp = df.copy() - # label-based cp.ix[1.0:5.0] = 0 self.assert_((cp.ix[1.0:5.0] == 0).values.all()) @@ -10064,15 +10078,15 @@ def test_reindex_with_nans(self): index=[100.0, 101.0, np.nan, 102.0, 103.0]) result = df.reindex(index=[101.0, 102.0, 103.0]) - expected = df.ix[[1, 3, 4]] + expected = df.iloc[[1, 3, 4]] assert_frame_equal(result, expected) result = df.reindex(index=[103.0]) - expected = df.ix[[4]] + expected = df.iloc[[4]] assert_frame_equal(result, expected) result = df.reindex(index=[101.0]) - expected = df.ix[[1]] + expected = df.iloc[[1]] assert_frame_equal(result, expected) def test_reindex_multi(self): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index e3c9da3630975..857836fa698ce 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -12,7 +12,7 @@ import numpy as np from numpy.testing import assert_array_equal -from pandas.core.index import Index, Int64Index, MultiIndex, InvalidIndexError +from pandas.core.index import Index, Float64Index, Int64Index, MultiIndex, InvalidIndexError from pandas.core.frame import DataFrame from pandas.core.series import Series from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, @@ -654,6 +654,88 @@ def test_join_self(self): self.assert_(res is joined) +class TestFloat64Index(unittest.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + self.mixed = Float64Index([1.5, 2, 3, 4, 5]) + self.float = Float64Index(np.arange(5) * 2.5) + + def check_is_index(self, i): + self.assert_(isinstance(i, Index) and not isinstance(i, Float64Index)) + + def check_coerce(self, a, b, is_float_index=True): + self.assert_(a.equals(b)) + if is_float_index: + self.assert_(isinstance(b, Float64Index)) + else: + self.check_is_index(b) + + def test_constructor(self): + + # explicit construction + index = Float64Index([1,2,3,4,5]) + self.assert_(isinstance(index, Float64Index)) + self.assert_((index.values == np.array([1,2,3,4,5],dtype='float64')).all()) + index = Float64Index(np.array([1,2,3,4,5])) + self.assert_(isinstance(index, Float64Index)) + index = Float64Index([1.,2,3,4,5]) + self.assert_(isinstance(index, Float64Index)) + index = Float64Index(np.array([1.,2,3,4,5])) + self.assert_(isinstance(index, Float64Index)) + self.assert_(index.dtype == object) + + index = Float64Index(np.array([1.,2,3,4,5]),dtype=np.float32) + self.assert_(isinstance(index, Float64Index)) + self.assert_(index.dtype == object) + + index = Float64Index(np.array([1,2,3,4,5]),dtype=np.float32) + self.assert_(isinstance(index, Float64Index)) + self.assert_(index.dtype == object) + + # nan handling + result = Float64Index([np.nan, np.nan]) + self.assert_(pd.isnull(result.values).all()) + result = Float64Index(np.array([np.nan])) + self.assert_(pd.isnull(result.values).all()) + result = Index(np.array([np.nan])) + self.assert_(pd.isnull(result.values).all()) + + def test_constructor_invalid(self): + + # invalid + self.assertRaises(TypeError, Float64Index, 0.) + self.assertRaises(TypeError, Float64Index, ['a','b',0.]) + self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')]) + + def test_constructor_coerce(self): + + self.check_coerce(self.mixed,Index([1.5, 2, 3, 4, 5])) + self.check_coerce(self.float,Index(np.arange(5) * 2.5)) + self.check_coerce(self.float,Index(np.array(np.arange(5) * 2.5, dtype=object))) + + def test_constructor_explicit(self): + + # these don't auto convert + self.check_coerce(self.float,Index((np.arange(5) * 2.5), dtype=object), + is_float_index=False) + self.check_coerce(self.mixed,Index([1.5, 2, 3, 4, 5],dtype=object), + is_float_index=False) + + def test_astype(self): + + result = self.float.astype(object) + self.assert_(result.equals(self.float)) + self.assert_(self.float.equals(result)) + self.check_is_index(result) + + i = self.mixed.copy() + i.name = 'foo' + result = i.astype(object) + self.assert_(result.equals(i)) + self.assert_(i.equals(result)) + self.check_is_index(result) + class TestInt64Index(unittest.TestCase): _multiprocess_can_split_ = True @@ -676,7 +758,7 @@ def test_constructor(self): self.assert_(np.array_equal(index, expected)) # scalar raise Exception - self.assertRaises(ValueError, Int64Index, 5) + self.assertRaises(TypeError, Int64Index, 5) # copy arr = self.index.values diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index ced4cbdc4dc36..0eab5ab834533 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -13,7 +13,7 @@ import pandas as pd import pandas.core.common as com from pandas.core.api import (DataFrame, Index, Series, Panel, notnull, isnull, - MultiIndex, DatetimeIndex, Timestamp) + MultiIndex, DatetimeIndex, Float64Index, Timestamp) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal) from pandas import compat, concat @@ -1519,6 +1519,309 @@ def test_cache_updating(self): self.assert_("A+1" in panel.ix[0].columns) self.assert_("A+1" in panel.ix[1].columns) + def test_floating_index_doc_example(self): + + index = Index([1.5, 2, 3, 4.5, 5]) + s = Series(range(5),index=index) + self.assert_(s[3] == 2) + self.assert_(s.ix[3] == 2) + self.assert_(s.loc[3] == 2) + self.assert_(s.iloc[3] == 3) + + def test_floating_index(self): + + # related 236 + # scalar/slicing of a float index + s = Series(np.arange(5), index=np.arange(5) * 2.5) + + # label based slicing + result1 = s[1.0:3.0] + result2 = s.ix[1.0:3.0] + result3 = s.loc[1.0:3.0] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + + # exact indexing when found + result1 = s[5.0] + result2 = s.loc[5.0] + result3 = s.ix[5.0] + self.assert_(result1 == result2) + self.assert_(result1 == result3) + + result1 = s[5] + result2 = s.loc[5] + result3 = s.ix[5] + self.assert_(result1 == result2) + self.assert_(result1 == result3) + + self.assert_(s[5.0] == s[5]) + + # value not found (and no fallbacking at all) + + # scalar integers + self.assertRaises(KeyError, lambda : s.loc[4]) + self.assertRaises(KeyError, lambda : s.ix[4]) + self.assertRaises(KeyError, lambda : s[4]) + + # fancy floats/integers create the correct entry (as nan) + # fancy tests + expected = Series([2, 0], index=Float64Index([5.0, 0.0])) + for fancy_idx in [[5.0, 0.0], [5, 0], np.array([5.0, 0.0]), np.array([5, 0])]: + assert_series_equal(s[fancy_idx], expected) + assert_series_equal(s.loc[fancy_idx], expected) + assert_series_equal(s.ix[fancy_idx], expected) + + # all should return the same as we are slicing 'the same' + result1 = s.loc[2:5] + result2 = s.loc[2.0:5.0] + result3 = s.loc[2.0:5] + result4 = s.loc[2.1:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + # previously this did fallback indexing + result1 = s[2:5] + result2 = s[2.0:5.0] + result3 = s[2.0:5] + result4 = s[2.1:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + result1 = s.ix[2:5] + result2 = s.ix[2.0:5.0] + result3 = s.ix[2.0:5] + result4 = s.ix[2.1:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + # combined test + result1 = s.loc[2:5] + result2 = s.ix[2:5] + result3 = s[2:5] + + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + + # list selection + result1 = s[[0.0,5,10]] + result2 = s.loc[[0.0,5,10]] + result3 = s.ix[[0.0,5,10]] + result4 = s.iloc[[0,2,4]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + result1 = s[[1.6,5,10]] + result2 = s.loc[[1.6,5,10]] + result3 = s.ix[[1.6,5,10]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, Series([np.nan,2,4],index=[1.6,5,10])) + + result1 = s[[0,1,2]] + result2 = s.ix[[0,1,2]] + result3 = s.loc[[0,1,2]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, Series([0.0,np.nan,np.nan],index=[0,1,2])) + + result1 = s.loc[[2.5, 5]] + result2 = s.ix[[2.5, 5]] + assert_series_equal(result1, result2) + assert_series_equal(result1, Series([1,2],index=[2.5,5.0])) + + result1 = s[[2.5]] + result2 = s.ix[[2.5]] + result3 = s.loc[[2.5]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, Series([1],index=[2.5])) + + def test_scalar_indexer(self): + # float indexing checked above + + def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): + + # related 236/4850 + # trying to access with a float index + s = Series(np.arange(len(index)),index=index) + + if iloc is None: + iloc = TypeError + self.assertRaises(iloc, lambda : s.iloc[3.5]) + if loc is None: + loc = TypeError + self.assertRaises(loc, lambda : s.loc[3.5]) + if ix is None: + ix = TypeError + self.assertRaises(ix, lambda : s.ix[3.5]) + if getitem is None: + getitem = TypeError + self.assertRaises(getitem, lambda : s[3.5]) + + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, + tm.makeDateIndex, tm.makePeriodIndex ]: + check_invalid(index()) + check_invalid(Index(np.arange(5) * 2.5),loc=KeyError, ix=KeyError, getitem=KeyError) + + def check_getitem(index): + + s = Series(np.arange(len(index)),index=index) + + # positional selection + result1 = s[5] + result2 = s[5.0] + result3 = s.iloc[5] + result4 = s.iloc[5.0] + + # by value + self.assertRaises(KeyError, lambda : s.loc[5]) + self.assertRaises(KeyError, lambda : s.loc[5.0]) + + # this is fallback, so it works + result5 = s.ix[5] + result6 = s.ix[5.0] + self.assert_(result1 == result2) + self.assert_(result1 == result3) + self.assert_(result1 == result4) + self.assert_(result1 == result5) + self.assert_(result1 == result6) + + # all index types except float/int + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex ]: + check_getitem(index()) + + # exact indexing when found on IntIndex + s = Series(np.arange(10),dtype='int64') + + result1 = s[5.0] + result2 = s.loc[5.0] + result3 = s.ix[5.0] + result4 = s[5] + result5 = s.loc[5] + result6 = s.ix[5] + self.assert_(result1 == result2) + self.assert_(result1 == result3) + self.assert_(result1 == result4) + self.assert_(result1 == result5) + self.assert_(result1 == result6) + + def test_slice_indexer(self): + + def check_slicing_positional(index): + + s = Series(np.arange(len(index))+10,index=index) + + # these are all positional + result1 = s[2:5] + result2 = s.ix[2:5] + result3 = s.iloc[2:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + + # not in the index + self.assertRaises(KeyError, lambda : s.loc[2:5]) + + # make all float slicing fail + self.assertRaises(TypeError, lambda : s[2.0:5]) + self.assertRaises(TypeError, lambda : s[2.0:5.0]) + self.assertRaises(TypeError, lambda : s[2:5.0]) + + self.assertRaises(TypeError, lambda : s.ix[2.0:5]) + self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) + self.assertRaises(TypeError, lambda : s.ix[2:5.0]) + + self.assertRaises(KeyError, lambda : s.loc[2.0:5]) + self.assertRaises(KeyError, lambda : s.loc[2.0:5.0]) + self.assertRaises(KeyError, lambda : s.loc[2:5.0]) + + # these work for now + #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) + #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) + #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + + # all index types except int, float + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex ]: + check_slicing_positional(index()) + + # int + index = tm.makeIntIndex() + s = Series(np.arange(len(index))+10,index) + + # this is positional + result1 = s[2:5] + result4 = s.iloc[2:5] + assert_series_equal(result1, result4) + + # these are all value based + result2 = s.ix[2:5] + result3 = s.loc[2:5] + result4 = s.loc[2.0:5] + result5 = s.loc[2.0:5.0] + result6 = s.loc[2:5.0] + assert_series_equal(result2, result3) + assert_series_equal(result2, result4) + assert_series_equal(result2, result5) + assert_series_equal(result2, result6) + + # make all float slicing fail + self.assertRaises(TypeError, lambda : s[2.0:5]) + self.assertRaises(TypeError, lambda : s[2.0:5.0]) + self.assertRaises(TypeError, lambda : s[2:5.0]) + + self.assertRaises(TypeError, lambda : s.ix[2.0:5]) + self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) + self.assertRaises(TypeError, lambda : s.ix[2:5.0]) + + # these work for now + #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) + #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) + #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + + # float + index = tm.makeFloatIndex() + s = Series(np.arange(len(index))+10,index=index) + + # these are all value based + result1 = s[2:5] + result2 = s.ix[2:5] + result3 = s.loc[2:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + + # these are all valid + result1a = s[2.0:5] + result2a = s[2.0:5.0] + result3a = s[2:5.0] + assert_series_equal(result1a, result2a) + assert_series_equal(result1a, result3a) + + result1b = s.ix[2.0:5] + result2b = s.ix[2.0:5.0] + result3b = s.ix[2:5.0] + assert_series_equal(result1b, result2b) + assert_series_equal(result1b, result3b) + + result1c = s.loc[2.0:5] + result2c = s.loc[2.0:5.0] + result3c = s.loc[2:5.0] + assert_series_equal(result1c, result2c) + assert_series_equal(result1c, result3c) + + assert_series_equal(result1a, result1b) + assert_series_equal(result1a, result1c) + + # these work for now + #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) + #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) + #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 161b740178a4d..c4e75fcb41d45 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -14,6 +14,7 @@ import numpy as np from pandas.util.testing import assert_frame_equal +from numpy.testing import assert_array_equal from pandas.core.reshape import melt, convert_dummies, lreshape, get_dummies import pandas.util.testing as tm @@ -195,11 +196,8 @@ def test_include_na(self): assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([nan], dummy_na=True) - exp_just_na = DataFrame({nan: {0: 1.0}}) - # hack (NaN handling in assert_index_equal) - exp_just_na.columns = res_just_na.columns - assert_frame_equal(res_just_na, exp_just_na) - + exp_just_na = DataFrame(Series(1.0,index=[0]),columns=[nan]) + assert_array_equal(res_just_na.values, exp_just_na.values) class TestConvertDummies(unittest.TestCase): def test_convert_dummies(self): diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 6d3b052154147..98fa5c0a56ccd 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -909,12 +909,11 @@ def test_slice_can_reorder_not_uniquely_indexed(self): result = s[::-1] # it works! def test_slice_float_get_set(self): - result = self.ts[4.0:10.0] - expected = self.ts[4:10] - assert_series_equal(result, expected) - self.ts[4.0:10.0] = 0 - self.assert_((self.ts[4:10] == 0).all()) + self.assertRaises(TypeError, lambda : self.ts[4.0:10.0]) + def f(): + self.ts[4.0:10.0] = 0 + self.assertRaises(TypeError, f) self.assertRaises(TypeError, self.ts.__getitem__, slice(4.5, 10.0)) self.assertRaises(TypeError, self.ts.__setitem__, slice(4.5, 10.0), 0) @@ -932,6 +931,7 @@ def test_slice_floats2(self): self.assert_(len(s.ix[12.5:]) == 7) def test_slice_float64(self): + values = np.arange(10., 50., 2) index = Index(values) @@ -940,19 +940,19 @@ def test_slice_float64(self): s = Series(np.random.randn(20), index=index) result = s[start:end] - expected = s.ix[5:16] + expected = s.iloc[5:16] assert_series_equal(result, expected) - result = s.ix[start:end] + result = s.loc[start:end] assert_series_equal(result, expected) df = DataFrame(np.random.randn(20, 3), index=index) result = df[start:end] - expected = df.ix[5:16] + expected = df.iloc[5:16] tm.assert_frame_equal(result, expected) - result = df.ix[start:end] + result = df.loc[start:end] tm.assert_frame_equal(result, expected) def test_setitem(self): @@ -3254,6 +3254,13 @@ def test_value_counts_nunique(self): #self.assert_(result.index.dtype == 'timedelta64[ns]') self.assert_(result.index.dtype == 'int64') + # basics.rst doc example + series = Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5000 + result = series.nunique() + self.assert_(result == 11) + def test_unique(self): # 714 also, dtype=float diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a5a96d3e03cac..b25f85c961798 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -393,23 +393,36 @@ def getCols(k): return string.ascii_uppercase[:k] -def makeStringIndex(k): +# make index +def makeStringIndex(k=10): return Index([rands(10) for _ in range(k)]) -def makeUnicodeIndex(k): +def makeUnicodeIndex(k=10): return Index([randu(10) for _ in range(k)]) -def makeIntIndex(k): +def makeIntIndex(k=10): return Index(lrange(k)) -def makeFloatIndex(k): +def makeFloatIndex(k=10): values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) return Index(values * (10 ** np.random.randint(0, 9))) +def makeDateIndex(k=10): + dt = datetime(2000, 1, 1) + dr = bdate_range(dt, periods=k) + return DatetimeIndex(dr) + + +def makePeriodIndex(k=10): + dt = datetime(2000, 1, 1) + dr = PeriodIndex(start=dt, periods=k, freq='B') + return dr + +# make series def makeFloatSeries(): index = makeStringIndex(N) return Series(randn(N), index=index) @@ -431,41 +444,6 @@ def getSeriesData(): index = makeStringIndex(N) return dict((c, Series(randn(N), index=index)) for c in getCols(K)) - -def makeDataFrame(): - data = getSeriesData() - return DataFrame(data) - - -def getArangeMat(): - return np.arange(N * K).reshape((N, K)) - - -def getMixedTypeDict(): - index = Index(['a', 'b', 'c', 'd', 'e']) - - data = { - 'A': [0., 1., 2., 3., 4.], - 'B': [0., 1., 0., 1., 0.], - 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], - 'D': bdate_range('1/1/2009', periods=5) - } - - return index, data - - -def makeDateIndex(k): - dt = datetime(2000, 1, 1) - dr = bdate_range(dt, periods=k) - return DatetimeIndex(dr) - - -def makePeriodIndex(k): - dt = datetime(2000, 1, 1) - dr = PeriodIndex(start=dt, periods=k, freq='B') - return dr - - def makeTimeSeries(nper=None): if nper is None: nper = N @@ -490,6 +468,28 @@ def makeTimeDataFrame(nper=None): def getPeriodData(nper=None): return dict((c, makePeriodSeries(nper)) for c in getCols(K)) +# make frame +def makeDataFrame(): + data = getSeriesData() + return DataFrame(data) + + +def getArangeMat(): + return np.arange(N * K).reshape((N, K)) + + +def getMixedTypeDict(): + index = Index(['a', 'b', 'c', 'd', 'e']) + + data = { + 'A': [0., 1., 2., 3., 4.], + 'B': [0., 1., 0., 1., 0.], + 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], + 'D': bdate_range('1/1/2009', periods=5) + } + + return index, data + def makePeriodFrame(nper=None): data = getPeriodData(nper)