Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into bug/categorical-i…
Browse files Browse the repository at this point in the history
…ndexing-1row-df

* upstream/master: (185 commits)
  ENH: add BooleanArray extension array (pandas-dev#29555)
  DOC: Add link to dev calendar and meeting notes (pandas-dev#29737)
  ENH: Add built-in function for Styler to format the text displayed for missing values (pandas-dev#29118)
  DEPR: remove statsmodels/seaborn compat shims (pandas-dev#29822)
  DEPR: remove Index.summary (pandas-dev#29807)
  DEPR: passing an int to read_excel use_cols (pandas-dev#29795)
  STY: fstrings in io.pytables (pandas-dev#29758)
  BUG: Fix melt with mixed int/str columns (pandas-dev#29792)
  TST: add test for ffill/bfill for non unique multilevel (pandas-dev#29763)
  Changed description of parse_dates in read_excel(). (pandas-dev#29796)
  BUG: pivot_table not returning correct type when margin=True and aggfunc='mean'  (pandas-dev#28248)
  REF: Create _lib/window directory (pandas-dev#29817)
  Fixed small mistake (pandas-dev#29815)
  minor cleanups (pandas-dev#29798)
  DEPR: enforce deprecations in core.internals (pandas-dev#29723)
  add test for unused level raises KeyError (pandas-dev#29760)
  Add documentation linking to sqlalchemy (pandas-dev#29373)
  io/parsers: ensure decimal is str on PythonParser (pandas-dev#29743)
  Reenabled no-unused-function (pandas-dev#29767)
  CLN:F-string in pandas/_libs/tslibs/*.pyx (pandas-dev#29775)
  ...

# Conflicts:
#	pandas/tests/frame/indexing/test_indexing.py
  • Loading branch information
keechongtan committed Nov 25, 2019
2 parents 3e847e9 + 7d7f885 commit ca60804
Show file tree
Hide file tree
Showing 522 changed files with 9,473 additions and 7,183 deletions.
1 change: 1 addition & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
custom: https://pandas.pydata.org/donate.html
github: [numfocus]
tidelift: pypi/pandas
15 changes: 15 additions & 0 deletions .github/workflows/assign.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: Assign
on:
issue_comment:
types: created

jobs:
one:
runs-on: ubuntu-latest
steps:
- name:
run: |
if [[ "${{ github.event.comment.body }}" == "take" ]]; then
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
fi
103 changes: 103 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
name: CI

on:
push:
branches: master
pull_request:
branches: master

env:
ENV_FILE: environment.yml
# TODO: remove export PATH=... in each step once this works
# PATH: $HOME/miniconda3/bin:$PATH

jobs:
checks:
name: Checks
runs-on: ubuntu-latest
steps:

- name: Checkout
uses: actions/checkout@v1

- name: Looking for unwanted patterns
run: ci/code_checks.sh patterns
if: true

- name: Setup environment and build pandas
run: |
export PATH=$HOME/miniconda3/bin:$PATH
ci/setup_env.sh
if: true

- name: Linting
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
ci/code_checks.sh lint
if: true

- name: Dependencies consistency
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
ci/code_checks.sh dependencies
if: true

- name: Checks on imported code
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
ci/code_checks.sh code
if: true

- name: Running doctests
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
ci/code_checks.sh doctests
if: true

- name: Docstring validation
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
ci/code_checks.sh docstrings
if: true

- name: Typing validation
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
ci/code_checks.sh typing
if: true

- name: Testing docstring validation script
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
pytest --capture=no --strict scripts
if: true

- name: Running benchmarks
run: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
cd asv_bench
asv check -E existing
git remote add upstream https://github.com/pandas-dev/pandas.git
git fetch upstream
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
asv machine --yes
ASV_OUTPUT="$(asv dev)"
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
echo "##vso[task.logissue type=error]Benchmarks run with errors"
echo "$ASV_OUTPUT"
exit 1
else
echo "Benchmarks run without errors"
fi
else
echo "Benchmarks did not run, no changes detected"
fi
if: true
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/python/black
rev: stable
rev: 19.10b0
hooks:
- id: black
language_version: python3.7
Expand All @@ -9,7 +9,7 @@ repos:
hooks:
- id: flake8
language: python_venv
additional_dependencies: [flake8-comprehensions]
additional_dependencies: [flake8-comprehensions>=3.1.0]
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.20
hooks:
Expand Down
17 changes: 3 additions & 14 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,9 @@ matrix:
- python: 3.5

include:
- dist: bionic
# 18.04
python: 3.8.0
- dist: trusty
env:
- JOB="3.8-dev" PATTERN="(not slow and not network)"
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"

- dist: trusty
env:
Expand Down Expand Up @@ -85,19 +83,10 @@ install:
- ci/submit_cython_cache.sh
- echo "install done"


before_script:
# display server (for clipboard functionality) needs to be started here,
# does not work if done in install:setup_env.sh (GH-26103)
- export DISPLAY=":99.0"
- echo "sh -e /etc/init.d/xvfb start"
- if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
- sleep 3

script:
- echo "script start"
- echo "$JOB"
- if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
- source activate pandas-dev
- ci/run_tests.sh

after_script:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ lint-diff:
git diff upstream/master --name-only -- "*.py" | xargs flake8

black:
black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
black .

develop: build
python -m pip install --no-build-isolation -e .
Expand Down
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class ValueCounts:

def setup(self, dropna):
n = 5 * 10 ** 5
arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
self.ts = pd.Series(arr).astype("category")

def time_value_counts(self, dropna):
Expand All @@ -102,7 +102,7 @@ def time_rendering(self):
class SetCategories:
def setup(self):
n = 5 * 10 ** 5
arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
self.ts = pd.Series(arr).astype("category")

def time_set_categories(self):
Expand All @@ -112,7 +112,7 @@ def time_set_categories(self):
class RemoveCategories:
def setup(self):
n = 5 * 10 ** 5
arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
self.ts = pd.Series(arr).astype("category")

def time_remove_categories(self):
Expand Down Expand Up @@ -164,9 +164,9 @@ def setup(self, dtype):
np.random.seed(1234)
n = 5 * 10 ** 5
sample_size = 100
arr = [i for i in np.random.randint(0, n // 10, size=n)]
arr = list(np.random.randint(0, n // 10, size=n))
if dtype == "object":
arr = ["s{:04d}".format(i) for i in arr]
arr = [f"s{i:04d}" for i in arr]
self.sample = np.random.choice(arr, sample_size)
self.series = pd.Series(arr).astype("category")

Expand Down Expand Up @@ -225,7 +225,7 @@ def setup(self, index):
elif index == "non_monotonic":
self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories)
else:
raise ValueError("Invalid index param: {}".format(index))
raise ValueError(f"Invalid index param: {index}")

self.scalar = 10000
self.list = list(range(10000))
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class FromLists:
def setup(self):
N = 1000
M = 100
self.data = [[j for j in range(M)] for i in range(N)]
self.data = [list(range(M)) for i in range(N)]

def time_frame_from_lists(self):
self.df = DataFrame(self.data)
Expand Down
8 changes: 3 additions & 5 deletions asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def wrapper(fname):
return wrapper


from .pandas_vb_common import BaseIO # noqa: E402 isort:skip
from .pandas_vb_common import BaseIO # isort:skip


class ParallelGroupbyMethods:
Expand Down Expand Up @@ -250,13 +250,11 @@ def setup(self, dtype):
np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows)
),
"object": DataFrame(
"foo",
index=range(rows),
columns=["object%03d".format(i) for i in range(5)],
"foo", index=range(rows), columns=["object%03d" for _ in range(5)]
),
}

self.fname = "__test_{}__.csv".format(dtype)
self.fname = f"__test_{dtype}__.csv"
df = data[dtype]
df.to_csv(self.fname)

Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/index_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class Indexing:

def setup(self, dtype):
N = 10 ** 6
self.idx = getattr(tm, "make{}Index".format(dtype))(N)
self.idx = getattr(tm, f"make{dtype}Index")(N)
self.array_mask = (np.arange(N) % 3) == 0
self.series_mask = Series(self.array_mask)
self.sorted = self.idx.sort_values()
Expand Down
12 changes: 5 additions & 7 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
param_names = ["bad_date_value"]

def setup(self, bad_date_value):
self.StringIO_input = StringIO(("%s,\n" % bad_date_value) * 50000)
self.StringIO_input = StringIO((f"{bad_date_value},\n") * 50000)

def time_read_csv(self, bad_date_value):
read_csv(
Expand Down Expand Up @@ -202,7 +202,7 @@ def setup(self, sep, thousands):
data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))
df = DataFrame(data)
if thousands is not None:
fmt = ":{}".format(thousands)
fmt = f":{thousands}"
fmt = "{" + fmt + "}"
df = df.applymap(lambda x: fmt.format(x))
df.to_csv(self.fname, sep=sep)
Expand Down Expand Up @@ -231,7 +231,7 @@ def setup(self, sep, decimal, float_precision):
floats = [
"".join(random.choice(string.digits) for _ in range(28)) for _ in range(15)
]
rows = sep.join(["0{}".format(decimal) + "{}"] * 3) + "\n"
rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n"
data = rows * 5
data = data.format(*floats) * 200 # 1000 x 3 strings csv
self.StringIO_input = StringIO(data)
Expand Down Expand Up @@ -309,9 +309,7 @@ class ReadCSVCachedParseDates(StringIORewind):
param_names = ["do_cache"]

def setup(self, do_cache):
data = (
"\n".join("10/{}".format(year) for year in range(2000, 2100)) + "\n"
) * 10
data = ("\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n") * 10
self.StringIO_input = StringIO(data)

def time_read_csv_cached(self, do_cache):
Expand All @@ -336,7 +334,7 @@ class ReadCSVMemoryGrowth(BaseIO):
def setup(self):
with open(self.fname, "w") as f:
for i in range(self.num_rows):
f.write("{i}\n".format(i=i))
f.write(f"{i}\n")

def mem_parser_chunks(self):
# see gh-24805.
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _generate_dataframe():
C = 5
df = DataFrame(
np.random.randn(N, C),
columns=["float{}".format(i) for i in range(C)],
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
)
df["object"] = tm.makeStringIndex(N)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def setup(self, format):
C = 5
self.df = DataFrame(
np.random.randn(N, C),
columns=["float{}".format(i) for i in range(C)],
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
)
self.df["object"] = tm.makeStringIndex(N)
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def setup(self, orient, index):
}
df = DataFrame(
np.random.randn(N, 5),
columns=["float_{}".format(i) for i in range(5)],
columns=[f"float_{i}" for i in range(5)],
index=indexes[index],
)
df.to_json(self.fname, orient=orient)
Expand All @@ -43,7 +43,7 @@ def setup(self, index):
}
df = DataFrame(
np.random.randn(N, 5),
columns=["float_{}".format(i) for i in range(5)],
columns=[f"float_{i}" for i in range(5)],
index=indexes[index],
)
df.to_json(self.fname, orient="records", lines=True)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/msgpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def setup(self):
C = 5
self.df = DataFrame(
np.random.randn(N, C),
columns=["float{}".format(i) for i in range(C)],
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
)
self.df["object"] = tm.makeStringIndex(N)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def setup(self):
C = 5
self.df = DataFrame(
np.random.randn(N, C),
columns=["float{}".format(i) for i in range(C)],
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
)
self.df["object"] = tm.makeStringIndex(N)
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def setup(self, connection):
"sqlite": sqlite3.connect(":memory:"),
}
self.table_name = "test_type"
self.query_all = "SELECT * FROM {}".format(self.table_name)
self.query_all = f"SELECT * FROM {self.table_name}"
self.con = con[connection]
self.df = DataFrame(
{
Expand Down Expand Up @@ -58,7 +58,7 @@ def setup(self, connection, dtype):
"sqlite": sqlite3.connect(":memory:"),
}
self.table_name = "test_type"
self.query_col = "SELECT {} FROM {}".format(dtype, self.table_name)
self.query_col = f"SELECT {dtype} FROM {self.table_name}"
self.con = con[connection]
self.df = DataFrame(
{
Expand Down
Loading

0 comments on commit ca60804

Please sign in to comment.