Skip to content

Commit

Permalink
Better lint (#42)
Browse files Browse the repository at this point in the history
* lint

* doc
  • Loading branch information
xadupre authored Sep 7, 2024
1 parent 9753f32 commit 73e5e64
Show file tree
Hide file tree
Showing 16 changed files with 86 additions and 85 deletions.
3 changes: 0 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ pandas-streaming: streaming API over pandas
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
:alt: Build Status Windows

.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main

.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/

Expand Down
1 change: 0 additions & 1 deletion _doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import sys
import os
from sphinx_runpython.github_link import make_linkcode_resolve
Expand Down
2 changes: 1 addition & 1 deletion _doc/examples/first_step.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
First steps with pandas_streaming
=================================
A few difference between :epkg:`pandas` and *pandas_streaming*.
pandas to pandas_streaming
Expand Down
3 changes: 0 additions & 3 deletions _doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ pandas-streaming: streaming API over pandas
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
:alt: Build Status Windows

.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main

.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/

Expand Down
4 changes: 2 additions & 2 deletions _unittests/ut_df/test_connex_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_split_connex2(self):
for k, v in sorted(stats[0].items()):
rows.append(f"{k}={v}")
raise AssertionError(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
s1, s2, train, test, "\n".join(rows)
)
)
Expand Down Expand Up @@ -212,7 +212,7 @@ def test_split_connex_missing(self):
for k, v in sorted(stats[0].items()):
rows.append(f"{k}={v}")
raise AssertionError(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
s1, s2, train, test, "\n".join(rows)
)
)
Expand Down
1 change: 0 additions & 1 deletion _unittests/ut_df/test_connex_split_big.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import os
import unittest
from collections import Counter
Expand Down
2 changes: 0 additions & 2 deletions _unittests/ut_df/test_connex_split_cat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-

import unittest
from collections import Counter
import pandas
Expand Down
8 changes: 4 additions & 4 deletions _unittests/ut_df/test_streaming_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_train_test_split_streaming_tiny(self):

def test_train_test_split_streaming_strat(self):
sdf = dummy_streaming_dataframe(
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(0, 100)]
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(100)]
)
trsdf, tesdf = sdf.train_test_split(
streaming=True, unique_rows=True, stratify="tify"
Expand Down Expand Up @@ -324,9 +324,9 @@ def test_concatv(self):
self.assertEqualDataFrame(m1.to_dataframe(), df)
m1 = sdf20.concat(df30, axis=0)
self.assertEqualDataFrame(m1.to_dataframe(), df)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
self.assertEqualDataFrame(m1.to_dataframe(), df)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
self.assertEqualDataFrame(m1.to_dataframe(), df)

df20["cint"] = df20["cint"].astype(float)
Expand Down Expand Up @@ -490,7 +490,7 @@ def test_read_csv_names(self):
def test_add_column(self):
df = pandas.DataFrame(data=dict(X=[4.5, 6, 7], Y=["a", "b", "c"]))
sdf = StreamingDataFrame.read_df(df)
sdf2 = sdf.add_column("d", lambda row: int(1))
sdf2 = sdf.add_column("d", lambda _row: 1)
df2 = sdf2.to_dataframe()
df["d"] = 1
self.assertEqualDataFrame(df, df2)
Expand Down
8 changes: 3 additions & 5 deletions pandas_streaming/data/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
if asfloat:
df = DataFrame(
dict(
cfloat=[_ + 0.1 for _ in range(0, n)],
cstr=[f"s{i}" for i in range(0, n)],
cfloat=[_ + 0.1 for _ in range(n)],
cstr=[f"s{i}" for i in range(n)],
)
)
else:
df = DataFrame(
dict(cint=list(range(0, n)), cstr=[f"s{i}" for i in range(0, n)])
)
df = DataFrame(dict(cint=list(range(n)), cstr=[f"s{i}" for i in range(n)]))
for k, v in cols.items():
df[k] = v
return StreamingDataFrame.read_df(df, chunksize=chunksize)
8 changes: 3 additions & 5 deletions pandas_streaming/df/connex_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ class ImbalancedSplitException(Exception):
Raised when an imbalanced split is detected.
"""

pass


def train_test_split_weights(
df,
Expand Down Expand Up @@ -72,7 +70,7 @@ def train_test_split_weights(
weights = list(df[weights])
if len(weights) != df.shape[0]:
raise ValueError(
"Dimension mismatch between weights and dataframe "
"Dimension mismatch between weights and dataframe " # noqa: UP030
"{0} != {1}".format(df.shape[0], len(weights))
)

Expand All @@ -97,7 +95,7 @@ def train_test_split_weights(
test_ids = []
test_weights = 0
train_weights = 0
for i in range(0, df.shape[0]):
for i in range(df.shape[0]):
w = weights[i]
if balance == 0:
h = randint(0, 1)
Expand All @@ -116,7 +114,7 @@ def train_test_split_weights(
r = abs(train_weights - test_weights) / (1.0 * (train_weights + test_weights))
if r >= fail_imbalanced:
raise ImbalancedSplitException( # pragma: no cover
"Split is imbalanced: train_weights={0} test_weights={1} r={2}."
"Split is imbalanced: train_weights={0} test_weights={1} r={2}." # noqa: UP030
"".format(train_weights, test_weights, r)
)

Expand Down
45 changes: 23 additions & 22 deletions pandas_streaming/df/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ class StreamingDataFrameSchemaError(Exception):
Reveals an issue with inconsistant schemas.
"""

pass


class StreamingDataFrame:
"""
Expand Down Expand Up @@ -273,9 +271,11 @@ def localf(a0=args[0]):
**kwargs_create,
)

def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
def fct1(
st=st, args=args, chunksize=chunksize, kw=kwargs.copy() # noqa: B008
):
st.seek(0)
for r in pandas.read_json(
for r in pandas.read_json( # noqa: UP028
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
):
yield r
Expand All @@ -293,8 +293,8 @@ def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
**kwargs_create,
)

def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
for r in pandas.read_json(
def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
for r in pandas.read_json( # noqa: UP028
*args, chunksize=chunksize, nrows=chunksize, **kw
):
yield r
Expand All @@ -318,10 +318,10 @@ def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
**kwargs_create,
)

def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
if hasattr(st, "seek"):
st.seek(0)
for r in pandas.read_json(
for r in pandas.read_json( # noqa: UP028
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
):
yield r
Expand Down Expand Up @@ -438,7 +438,7 @@ def __iter__(self):
elif self.check_schema:
if list(it.columns) != sch[0]: # pylint: disable=E1136
raise StreamingDataFrameSchemaError( # pragma: no cover
"Column names are different after row {0}\nFirst chunk: {1}"
"Column names are different after row {0}\nFirst chunk: {1}" # noqa: UP030
"\nCurrent chunk: {2}".format(rows, sch[0], list(it.columns))
) # pylint: disable=E1136
if list(it.dtypes) != sch[1]: # pylint: disable=E1136
Expand All @@ -454,7 +454,7 @@ def __iter__(self):
errdf = errdf[errdf["diff"]]
errdf.to_csv(tdf, sep=",", index=False)
raise StreamingDataFrameSchemaError(
"Column types are different after row {0}. You may use option "
"Column types are different after row {0}. You may use option " # noqa: UP030
'dtype={{"column_name": str}} to force the type on this column.'
"\n---\n{1}".format(rows, tdf.getvalue())
)
Expand Down Expand Up @@ -502,9 +502,7 @@ def to_csv(self, path_or_buf=None, **kwargs) -> "StreamingDataFrame":
st = StringIO()
close = False
elif isinstance(path_or_buf, str):
st = open( # pylint: disable=R1732
path_or_buf, "w", encoding=kwargs.get("encoding")
)
st = open(path_or_buf, "w", encoding=kwargs.get("encoding")) # noqa: SIM115
close = True
else:
st = path_or_buf
Expand Down Expand Up @@ -537,7 +535,7 @@ def iterrows(self):
See :epkg:`pandas:DataFrame:iterrows`.
"""
for df in self:
for it in df.iterrows():
for it in df.iterrows(): # noqa: UP028
yield it

def head(self, n=5) -> pandas.DataFrame:
Expand Down Expand Up @@ -579,7 +577,8 @@ def where(self, *args, **kwargs) -> "StreamingDataFrame":
"""
kwargs["inplace"] = False
return StreamingDataFrame(
lambda: map(lambda df: df.where(*args, **kwargs), self), **self.get_kwargs()
lambda: map(lambda df: df.where(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame":
Expand Down Expand Up @@ -608,7 +607,7 @@ def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame"
df = sdf.to_df()
return StreamingDataFrame.read_df(df, chunksize=df.shape[0])
return StreamingDataFrame(
lambda: map(lambda df: df.sample(**kwargs), self),
lambda: map(lambda df: df.sample(**kwargs), self), # noqa: C417
**self.get_kwargs(),
stable=False,
)
Expand Down Expand Up @@ -684,7 +683,7 @@ def drop(
if inplace:
raise NotImplementedError(f"drop is not implemented for inplace={inplace}.")
return StreamingDataFrame(
lambda: map(
lambda: map( # noqa: C417
lambda df: df.drop(
labels,
axis=axis,
Expand All @@ -706,7 +705,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
"""
return StreamingDataFrame(
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
Expand All @@ -716,7 +716,7 @@ def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
"""
return StreamingDataFrame(
lambda: map(lambda df: df.applymap(*args, **kwargs), self),
lambda: map(lambda df: df.applymap(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

Expand Down Expand Up @@ -773,7 +773,7 @@ def _concath(self, others):
others = [others]

def iterateh(self, others):
cols = tuple([self] + others)
cols = (self, *others)
for dfs in zip(*cols):
nrows = [_.shape[0] for _ in dfs]
if min(nrows) != max(nrows):
Expand Down Expand Up @@ -1382,7 +1382,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
)
if len(self.columns) != 1:
raise RuntimeError( # pragma: no cover
f"A series can contain only one column not " f"{len(self.columns)!r}."
f"A series can contain only one column not {len(self.columns)!r}."
)

def apply(self, *args, **kwargs) -> "StreamingDataFrame":
Expand All @@ -1391,7 +1391,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
This function returns a @see cl StreamingSeries.
"""
return StreamingSeries(
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

def __add__(self, value):
Expand Down
Loading

0 comments on commit 73e5e64

Please sign in to comment.