Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into multi-index-join
Browse files Browse the repository at this point in the history
  • Loading branch information
harisbal committed Oct 7, 2018
2 parents 2d61a12 + 5551bcf commit 01ae19e
Show file tree
Hide file tree
Showing 167 changed files with 4,125 additions and 2,080 deletions.
1 change: 0 additions & 1 deletion .pep8speaks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@ pycodestyle:
ignore: # Errors and warnings to ignore
- E402, # module level import not at top of file
- E731, # do not assign a lambda expression, use a def
- E741, # do not use variables named 'l', 'O', or 'I'
- W503 # line break before binary operator
19 changes: 7 additions & 12 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,20 @@ matrix:
- dist: trusty
env:
- JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true
# In allow_failures
- dist: trusty
env:
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
# In allow_failures

- dist: trusty
env:
- JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
addons:
apt:
packages:
- xsel

# In allow_failures
- dist: trusty
env:
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true

# In allow_failures
- dist: trusty
env:
Expand All @@ -73,13 +75,6 @@ matrix:
- dist: trusty
env:
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
- dist: trusty
env:
- JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
addons:
apt:
packages:
- xsel
- dist: trusty
env:
- JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
<tr>
<td></td>
<td>
<a href="https://ci.appveyor.com/project/pandas-dev/pandas">
<img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
<a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
<img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
</a>
</td>
</tr>
Expand Down Expand Up @@ -97,7 +97,7 @@ easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.
its way towards this goal.

## Main Features
Here are just a few of the things that pandas does well:
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
try:
hashing = import_module(imp)
break
except:
except (ImportError, TypeError, ValueError):
pass

from .pandas_vb_common import setup # noqa
Expand Down
13 changes: 10 additions & 3 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,14 +505,21 @@ class NSort(object):
param_names = ['keep']

def setup(self, keep):
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
self.df = DataFrame(np.random.randn(100000, 3),
columns=list('ABC'))

def time_nlargest(self, keep):
def time_nlargest_one_column(self, keep):
self.df.nlargest(100, 'A', keep=keep)

def time_nsmallest(self, keep):
def time_nlargest_two_columns(self, keep):
self.df.nlargest(100, ['A', 'B'], keep=keep)

def time_nsmallest_one_column(self, keep):
self.df.nsmallest(100, 'A', keep=keep)

def time_nsmallest_two_columns(self, keep):
self.df.nsmallest(100, ['A', 'B'], keep=keep)


class Describe(object):

Expand Down
83 changes: 49 additions & 34 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,104 +2,119 @@

import numpy as np
import pandas.util.testing as tm
from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
IntervalIndex, CategoricalIndex,
IndexSlice, concat, date_range)
from .pandas_vb_common import setup, Panel # noqa
from pandas import (Series, DataFrame, MultiIndex, Panel,
Int64Index, Float64Index, IntervalIndex,
CategoricalIndex, IndexSlice, concat, date_range)
from .pandas_vb_common import setup # noqa


class NumericSeriesIndexing(object):

goal_time = 0.2
params = [Int64Index, Float64Index]
param = ['index']
params = [
(Int64Index, Float64Index),
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
]
param_names = ['index_dtype', 'index_structure']

def setup(self, index):
def setup(self, index, index_structure):
N = 10**6
idx = index(range(N))
self.data = Series(np.random.rand(N), index=idx)
indices = {
'unique_monotonic_inc': index(range(N)),
'nonunique_monotonic_inc': index(
list(range(55)) + [54] + list(range(55, N - 1))),
}
self.data = Series(np.random.rand(N), index=indices[index_structure])
self.array = np.arange(10000)
self.array_list = self.array.tolist()

def time_getitem_scalar(self, index):
def time_getitem_scalar(self, index, index_structure):
self.data[800000]

def time_getitem_slice(self, index):
def time_getitem_slice(self, index, index_structure):
self.data[:800000]

def time_getitem_list_like(self, index):
def time_getitem_list_like(self, index, index_structure):
self.data[[800000]]

def time_getitem_array(self, index):
def time_getitem_array(self, index, index_structure):
self.data[self.array]

def time_getitem_lists(self, index):
def time_getitem_lists(self, index, index_structure):
self.data[self.array_list]

def time_iloc_array(self, index):
def time_iloc_array(self, index, index_structure):
self.data.iloc[self.array]

def time_iloc_list_like(self, index):
def time_iloc_list_like(self, index, index_structure):
self.data.iloc[[800000]]

def time_iloc_scalar(self, index):
def time_iloc_scalar(self, index, index_structure):
self.data.iloc[800000]

def time_iloc_slice(self, index):
def time_iloc_slice(self, index, index_structure):
self.data.iloc[:800000]

def time_ix_array(self, index):
def time_ix_array(self, index, index_structure):
self.data.ix[self.array]

def time_ix_list_like(self, index):
def time_ix_list_like(self, index, index_structure):
self.data.ix[[800000]]

def time_ix_scalar(self, index):
def time_ix_scalar(self, index, index_structure):
self.data.ix[800000]

def time_ix_slice(self, index):
def time_ix_slice(self, index, index_structure):
self.data.ix[:800000]

def time_loc_array(self, index):
def time_loc_array(self, index, index_structure):
self.data.loc[self.array]

def time_loc_list_like(self, index):
def time_loc_list_like(self, index, index_structure):
self.data.loc[[800000]]

def time_loc_scalar(self, index):
def time_loc_scalar(self, index, index_structure):
self.data.loc[800000]

def time_loc_slice(self, index):
def time_loc_slice(self, index, index_structure):
self.data.loc[:800000]


class NonNumericSeriesIndexing(object):

goal_time = 0.2
params = ['string', 'datetime']
param_names = ['index']
params = [
('string', 'datetime'),
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
]
param_names = ['index_dtype', 'index_structure']

def setup(self, index):
N = 10**5
def setup(self, index, index_structure):
N = 10**6
indexes = {'string': tm.makeStringIndex(N),
'datetime': date_range('1900', periods=N, freq='s')}
index = indexes[index]
if index_structure == 'nonunique_monotonic_inc':
index = index.insert(item=index[2], loc=2)[:-1]
self.s = Series(np.random.rand(N), index=index)
self.lbl = index[80000]

def time_getitem_label_slice(self, index):
def time_getitem_label_slice(self, index, index_structure):
self.s[:self.lbl]

def time_getitem_pos_slice(self, index):
def time_getitem_pos_slice(self, index, index_structure):
self.s[:80000]

def time_get_value(self, index):
def time_get_value(self, index, index_structure):
with warnings.catch_warnings(record=True):
self.s.get_value(self.lbl)

def time_getitem_scalar(self, index):
def time_getitem_scalar(self, index, index_structure):
self.s[self.lbl]

def time_getitem_list_like(self, index, index_structure):
self.s[[self.lbl]]


class DataFrameStringIndexing(object):

Expand Down
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import random
import timeit
import string

import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, Categorical, date_range, read_csv
from pandas.compat import PY2
from pandas.compat import cStringIO as StringIO

from ..pandas_vb_common import setup, BaseIO # noqa
Expand Down Expand Up @@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
names=list('abc'), float_precision=float_precision)

def time_read_csv_python_engine(self, sep, decimal, float_precision):
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
float_precision=None, names=list('abc'))
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
engine='python', float_precision=None, names=list('abc'))


class ReadCSVCategorical(BaseIO):
Expand Down
9 changes: 5 additions & 4 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, MultiIndex, date_range, concat, merge,
merge_asof)
from pandas import (DataFrame, Series, Panel, MultiIndex,
date_range, concat, merge, merge_asof)

try:
from pandas import merge_ordered
except ImportError:
from pandas import ordered_merge as merge_ordered

from .pandas_vb_common import Panel, setup # noqa
from .pandas_vb_common import setup # noqa


class Append(object):
Expand All @@ -29,7 +30,7 @@ def setup(self):
try:
with warnings.catch_warnings(record=True):
self.mdf1.consolidate(inplace=True)
except:
except (AttributeError, TypeError):
pass
self.mdf2 = self.mdf1.copy()
self.mdf2.index = self.df2.index
Expand Down
5 changes: 2 additions & 3 deletions asv_bench/benchmarks/pandas_vb_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
from importlib import import_module

import numpy as np
from pandas import Panel

# Compatibility import for lib
for imp in ['pandas._libs.lib', 'pandas.lib']:
try:
lib = import_module(imp)
break
except:
except (ImportError, TypeError, ValueError):
pass

numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
Expand All @@ -34,7 +33,7 @@ def remove(self, f):
"""Remove created files"""
try:
os.remove(f)
except:
except OSError:
# On Windows, attempting to remove a file that is in use
# causes an exception to be raised
pass
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/panel_ctor.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import warnings
from datetime import datetime, timedelta

from pandas import DataFrame, DatetimeIndex, date_range
from pandas import DataFrame, Panel, DatetimeIndex, date_range

from .pandas_vb_common import Panel, setup # noqa
from .pandas_vb_common import setup # noqa


class DifferentIndexes(object):
Expand Down
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/panel_methods.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import warnings

import numpy as np
from pandas import Panel

from .pandas_vb_common import Panel, setup # noqa
from .pandas_vb_common import setup # noqa


class PanelMethods(object):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except:
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df_func = getattr(df, op)
Expand Down Expand Up @@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
s = pd.Series(np.random.randn(100000)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except:
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s_func = getattr(s, op)
Expand Down
1 change: 0 additions & 1 deletion asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import warnings
from datetime import timedelta

import numpy as np
Expand Down
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ jobs:
- template: ci/azure/windows.yml
parameters:
name: Windows
vmImage: vs2017-win2017
vmImage: vs2017-win2016
- template: ci/azure/windows-py27.yml
parameters:
name: WindowsPy27
vmImage: vs2017-win2017
vmImage: vs2017-win2016
4 changes: 4 additions & 0 deletions ci/azure/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,7 @@ jobs:
- script: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
- task: PublishTestResults@2
inputs:
testResultsFiles: '/tmp/*.xml'
testRunTitle: 'MacOS-35'
Loading

0 comments on commit 01ae19e

Please sign in to comment.