From b695ffeceb158097f321fa071879966f004fb88b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 1 Apr 2022 08:09:23 -0700 Subject: [PATCH] Bump black to 22.3.0 --- .pre-commit-config.yaml | 2 +- .../environment-3.10-jdk11-dev.yaml | 2 +- .../environment-3.10-jdk8-dev.yaml | 2 +- .../environment-3.8-jdk11-dev.yaml | 2 +- .../environment-3.8-jdk8-dev.yaml | 2 +- .../environment-3.9-jdk11-dev.yaml | 2 +- .../environment-3.9-jdk8-dev.yaml | 2 +- dask_sql/cmd.py | 9 ++++--- dask_sql/context.py | 3 ++- dask_sql/input_utils/convert.py | 6 ++++- dask_sql/java.py | 5 +++- dask_sql/physical/rel/custom/analyze.py | 5 +++- dask_sql/physical/rel/logical/aggregate.py | 5 +++- dask_sql/physical/rel/logical/join.py | 6 ++++- dask_sql/physical/rel/logical/window.py | 3 ++- dask_sql/physical/rex/core/call.py | 25 +++++++++++++---- dask_sql/physical/utils/filter.py | 19 ++++++++++--- dask_sql/server/app.py | 4 ++- docker/conda.txt | 2 +- setup.py | 8 ++++-- tests/integration/fixtures.py | 5 +++- tests/integration/test_cmd.py | 3 ++- tests/integration/test_create.py | 6 ++++- tests/integration/test_filter.py | 27 ++++++++++++++----- tests/integration/test_function.py | 10 +++---- tests/integration/test_groupby.py | 11 ++++++-- tests/integration/test_join.py | 19 +++++++++---- tests/integration/test_rex.py | 8 ++++-- tests/integration/test_schema.py | 2 +- tests/integration/test_select.py | 16 ++++++++--- tests/integration/test_show.py | 5 +++- tests/integration/test_sort.py | 16 +++++++++-- tests/integration/test_union.py | 3 ++- tests/unit/test_call.py | 8 ++++-- tests/unit/test_context.py | 24 ++++++++++++++--- tests/unit/test_utils.py | 8 ++++-- 36 files changed, 217 insertions(+), 68 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e50a5f1d5..50af0bbb4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 22.3.0 hooks: - id: black language_version: python3 diff --git a/continuous_integration/environment-3.10-jdk11-dev.yaml b/continuous_integration/environment-3.10-jdk11-dev.yaml index e58e00c87..eae7fc83d 100644 --- a/continuous_integration/environment-3.10-jdk11-dev.yaml +++ b/continuous_integration/environment-3.10-jdk11-dev.yaml @@ -5,7 +5,7 @@ channels: dependencies: - adagio>=0.2.3 - antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2021.11.1 diff --git a/continuous_integration/environment-3.10-jdk8-dev.yaml b/continuous_integration/environment-3.10-jdk8-dev.yaml index 4ad533e94..59183a513 100644 --- a/continuous_integration/environment-3.10-jdk8-dev.yaml +++ b/continuous_integration/environment-3.10-jdk8-dev.yaml @@ -5,7 +5,7 @@ channels: dependencies: - adagio>=0.2.3 - antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2021.11.1 diff --git a/continuous_integration/environment-3.8-jdk11-dev.yaml b/continuous_integration/environment-3.8-jdk11-dev.yaml index ddf84ab5a..1a00258f7 100644 --- a/continuous_integration/environment-3.8-jdk11-dev.yaml +++ b/continuous_integration/environment-3.8-jdk11-dev.yaml @@ -5,7 +5,7 @@ channels: dependencies: - adagio>=0.2.3 - antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2021.11.1 diff --git a/continuous_integration/environment-3.8-jdk8-dev.yaml b/continuous_integration/environment-3.8-jdk8-dev.yaml index bc6980584..9aaf4cd95 100644 --- a/continuous_integration/environment-3.8-jdk8-dev.yaml +++ b/continuous_integration/environment-3.8-jdk8-dev.yaml @@ -5,7 +5,7 @@ channels: dependencies: - adagio>=0.2.3 - antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2021.11.1 diff --git a/continuous_integration/environment-3.9-jdk11-dev.yaml b/continuous_integration/environment-3.9-jdk11-dev.yaml index 7f7b19be2..df68fedd4 100644 --- a/continuous_integration/environment-3.9-jdk11-dev.yaml +++ b/continuous_integration/environment-3.9-jdk11-dev.yaml @@ -5,7 +5,7 @@ channels: dependencies: - adagio>=0.2.3 - antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2021.11.1 diff --git a/continuous_integration/environment-3.9-jdk8-dev.yaml b/continuous_integration/environment-3.9-jdk8-dev.yaml index a7f35a132..f27acff9c 100644 --- a/continuous_integration/environment-3.9-jdk8-dev.yaml +++ b/continuous_integration/environment-3.9-jdk8-dev.yaml @@ -5,7 +5,7 @@ channels: dependencies: - adagio>=0.2.3 - antlr4-python3-runtime>=4.9.2 -- black=19.10b0 +- black=22.3.0 - ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2021.11.1 diff --git a/dask_sql/cmd.py b/dask_sql/cmd.py index a7ed0c6e2..7bd5894d1 100644 --- a/dask_sql/cmd.py +++ b/dask_sql/cmd.py @@ -78,8 +78,8 @@ def _parse_meta_command(sql): def _meta_commands(sql: str, context: Context, client: Client) -> Union[bool, Client]: """ - parses metacommands and prints their result - returns True if meta commands detected + parses metacommands and prints their result + returns True if meta commands detected """ cmd, schema_name = _parse_meta_command(sql) available_commands = [ @@ -147,7 +147,10 @@ def _meta_commands(sql: str, context: Context, client: Client) -> Union[bool, Cl def cmd_loop( - context: Context = None, client: Client = None, startup=False, log_level=None, + context: Context = None, + client: Client = None, + startup=False, + log_level=None, ): # pragma: no cover """ Run a REPL for answering SQL queries using ``dask-sql``. diff --git a/dask_sql/context.py b/dask_sql/context.py index 008ce76c1..98cc46e21 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -908,7 +908,8 @@ def _register_callable( if replace: schema.function_lists = list( filter( - lambda f: f.name.lower() != lower_name, schema.function_lists, + lambda f: f.name.lower() != lower_name, + schema.function_lists, ) ) del schema.functions[lower_name] diff --git a/dask_sql/input_utils/convert.py b/dask_sql/input_utils/convert.py index e43df2334..d730926ee 100644 --- a/dask_sql/input_utils/convert.py +++ b/dask_sql/input_utils/convert.py @@ -55,7 +55,11 @@ def to_dc( maybe persist them to cluster memory before. """ filled_get_dask_dataframe = lambda *args: cls._get_dask_dataframe( - *args, table_name=table_name, format=format, gpu=gpu, **kwargs, + *args, + table_name=table_name, + format=format, + gpu=gpu, + **kwargs, ) if isinstance(input_item, list): diff --git a/dask_sql/java.py b/dask_sql/java.py index adbbc50a9..27d340221 100644 --- a/dask_sql/java.py +++ b/dask_sql/java.py @@ -86,7 +86,10 @@ def _set_or_check_java_home(): logger.debug(f"Starting JVM from path {jvmpath}...") jpype.startJVM( - *jvmArgs, ignoreUnrecognized=True, convertStrings=False, jvmpath=jvmpath, + *jvmArgs, + ignoreUnrecognized=True, + convertStrings=False, + jvmpath=jvmpath, ) logger.debug("...having started JVM") diff --git a/dask_sql/physical/rel/custom/analyze.py b/dask_sql/physical/rel/custom/analyze.py index 6bd7a1bfa..860e22c2e 100644 --- a/dask_sql/physical/rel/custom/analyze.py +++ b/dask_sql/physical/rel/custom/analyze.py @@ -61,7 +61,10 @@ def convert( ) ) statistics = statistics.append( - pd.Series({col: col for col in columns}, name="col_name",) + pd.Series( + {col: col for col in columns}, + name="col_name", + ) ) cc = ColumnContainer(statistics.columns) diff --git a/dask_sql/physical/rel/logical/aggregate.py b/dask_sql/physical/rel/logical/aggregate.py index c8cfab62c..cae4e42ed 100644 --- a/dask_sql/physical/rel/logical/aggregate.py +++ b/dask_sql/physical/rel/logical/aggregate.py @@ -179,7 +179,10 @@ def convert( # Do all aggregates df_result, output_column_order = self._do_aggregations( - rel, dc, group_columns, context, + rel, + dc, + group_columns, + context, ) # SQL does not care about the index, but we do not want to have any multiindices diff --git a/dask_sql/physical/rel/logical/join.py b/dask_sql/physical/rel/logical/join.py index 1f2f02ed6..925396c91 100644 --- a/dask_sql/physical/rel/logical/join.py +++ b/dask_sql/physical/rel/logical/join.py @@ -100,7 +100,11 @@ def convert( # The resulting dataframe will contain all (renamed) columns from the lhs and rhs # plus the added columns df = self._join_on_columns( - df_lhs_renamed, df_rhs_renamed, lhs_on, rhs_on, join_type, + df_lhs_renamed, + df_rhs_renamed, + lhs_on, + rhs_on, + join_type, ) else: # 5. We are in the complex join case diff --git a/dask_sql/physical/rel/logical/window.py b/dask_sql/physical/rel/logical/window.py index adf3e6da3..e8541a41c 100644 --- a/dask_sql/physical/rel/logical/window.py +++ b/dask_sql/physical/rel/logical/window.py @@ -176,7 +176,8 @@ def map_on_each_group( upper_bound.is_current_row or upper_bound.offset == 0 ): windowed_group = partitioned_group.rolling( - window=lower_bound.offset + 1, min_periods=0, + window=lower_bound.offset + 1, + min_periods=0, ) else: lower_offset = lower_bound.offset if not lower_bound.is_current_row else 0 diff --git a/dask_sql/physical/rex/core/call.py b/dask_sql/physical/rex/core/call.py index b17ad38f7..55580e741 100644 --- a/dask_sql/physical/rex/core/call.py +++ b/dask_sql/physical/rex/core/call.py @@ -246,7 +246,10 @@ class IsFalseOperation(Operation): def __init__(self): super().__init__(self.false_) - def false_(self, df: SeriesOrScalar,) -> SeriesOrScalar: + def false_( + self, + df: SeriesOrScalar, + ) -> SeriesOrScalar: """ Returns true where `df` is false (where `df` can also be just a scalar). Returns false on nan. @@ -263,7 +266,10 @@ class IsTrueOperation(Operation): def __init__(self): super().__init__(self.true_) - def true_(self, df: SeriesOrScalar,) -> SeriesOrScalar: + def true_( + self, + df: SeriesOrScalar, + ) -> SeriesOrScalar: """ Returns true where `df` is true (where `df` can also be just a scalar). Returns false on nan. @@ -280,7 +286,10 @@ class NotOperation(Operation): def __init__(self): super().__init__(self.not_) - def not_(self, df: SeriesOrScalar,) -> SeriesOrScalar: + def not_( + self, + df: SeriesOrScalar, + ) -> SeriesOrScalar: """ Returns not `df` (where `df` can also be just a scalar). """ @@ -296,7 +305,10 @@ class IsNullOperation(Operation): def __init__(self): super().__init__(self.null) - def null(self, df: SeriesOrScalar,) -> SeriesOrScalar: + def null( + self, + df: SeriesOrScalar, + ) -> SeriesOrScalar: """ Returns true where `df` is null (where `df` can also be just a scalar). """ @@ -328,7 +340,10 @@ def __init__(self): super().__init__(self.regex) def regex( - self, test: SeriesOrScalar, regex: str, escape: str = None, + self, + test: SeriesOrScalar, + regex: str, + escape: str = None, ) -> SeriesOrScalar: """ Returns true, if the string test matches the given regex diff --git a/dask_sql/physical/utils/filter.py b/dask_sql/physical/utils/filter.py index 67e4026f5..ddfacd6ab 100644 --- a/dask_sql/physical/utils/filter.py +++ b/dask_sql/physical/utils/filter.py @@ -90,7 +90,8 @@ def attempt_predicate_pushdown(ddf: dd.DataFrame) -> dd.DataFrame: # Regenerate collection with filtered IO layer try: return dsk.layers[name]._regenerate_collection( - dsk, new_kwargs={io_layer: {"filters": filters}}, + dsk, + new_kwargs={io_layer: {"filters": filters}}, ) except ValueError as err: # Most-likely failed to apply filters in read_parquet. @@ -198,7 +199,10 @@ def __init__(self, layer, creation_info): self.creation_info = creation_info def _regenerate_collection( - self, dsk, new_kwargs: dict = None, _regen_cache: dict = None, + self, + dsk, + new_kwargs: dict = None, + _regen_cache: dict = None, ): """Regenerate a Dask collection for this layer using the provided inputs and key-word arguments @@ -223,7 +227,9 @@ def _regenerate_collection( else: inputs.append( dsk.layers[key]._regenerate_collection( - dsk, new_kwargs=new_kwargs, _regen_cache=_regen_cache, + dsk, + new_kwargs=new_kwargs, + _regen_cache=_regen_cache, ) ) @@ -334,7 +340,12 @@ def _blockwise_comparison_dnf(op, indices: list, dsk: RegenerableGraph): right = _get_blockwise_input(1, indices, dsk) def _inv(symbol: str): - return {">": "<", "<": ">", ">=": "<=", "<=": ">=",}.get(symbol, symbol) + return { + ">": "<", + "<": ">", + ">=": "<=", + "<=": ">=", + }.get(symbol, symbol) if is_arraylike(left) and hasattr(left, "item") and left.size == 1: left = left.item() diff --git a/dask_sql/server/app.py b/dask_sql/server/app.py index 34217629e..634de3856 100644 --- a/dask_sql/server/app.py +++ b/dask_sql/server/app.py @@ -272,7 +272,9 @@ def main(): # pragma: no cover def _init_app( - app: FastAPI, context: Context = None, client: dask.distributed.Client = None, + app: FastAPI, + context: Context = None, + client: dask.distributed.Client = None, ): app.c = context or Context() app.future_list = {} diff --git a/docker/conda.txt b/docker/conda.txt index b008852cd..d833ac877 100644 --- a/docker/conda.txt +++ b/docker/conda.txt @@ -20,5 +20,5 @@ dask-ml>=2022.1.22 scikit-learn>=0.24.2 intake>=0.6.0 pre-commit>=2.11.1 -black=19.10b0 +black=22.3.0 isort=5.7.0 diff --git a/setup.py b/setup.py index 2d04f3320..e63b9a3d5 100755 --- a/setup.py +++ b/setup.py @@ -109,7 +109,7 @@ def build(self): "scikit-learn>=0.24.2", "intake>=0.6.0", "pre-commit", - "black==19.10b0", + "black==22.3.0", "isort==5.7.0", ], "fugue": ["fugue[sql]>=0.5.3"], @@ -122,5 +122,9 @@ def build(self): }, zip_safe=False, cmdclass=cmdclass, - command_options={"build_sphinx": {"source_dir": ("setup.py", "docs"),}}, + command_options={ + "build_sphinx": { + "source_dir": ("setup.py", "docs"), + } + }, ) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 75b98a9f7..5be6a930b 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -57,7 +57,10 @@ def df_wide(): def df(): np.random.seed(42) return pd.DataFrame( - {"a": [1.0] * 100 + [2.0] * 200 + [3.0] * 400, "b": 10 * np.random.rand(700),} + { + "a": [1.0] * 100 + [2.0] * 200 + [3.0] * 400, + "b": 10 * np.random.rand(700), + } ) diff --git a/tests/integration/test_cmd.py b/tests/integration/test_cmd.py index dd8b05083..145b4962f 100644 --- a/tests/integration/test_cmd.py +++ b/tests/integration/test_cmd.py @@ -100,7 +100,8 @@ def test_meta_commands(c, client, capsys): assert "Schema not_exists not available\n" == captured.out with pytest.raises( - OSError, match="Timed out .* to tcp://localhost:8787 after 5 s", + OSError, + match="Timed out .* to tcp://localhost:8787 after 5 s", ): with dask_config.set({"distributed.comm.timeouts.connect": 5}): client = _meta_commands("\\dsc localhost:8787", context=c, client=client) diff --git a/tests/integration/test_create.py b/tests/integration/test_create.py index b6d513f4e..cac4a3099 100644 --- a/tests/integration/test_create.py +++ b/tests/integration/test_create.py @@ -35,7 +35,11 @@ def test_create_from_csv(c, df, temporary_data_file, gpu): @pytest.mark.parametrize( - "gpu", [False, pytest.param(True, marks=pytest.mark.gpu),], + "gpu", + [ + False, + pytest.param(True, marks=pytest.mark.gpu), + ], ) def test_cluster_memory(client, c, df, gpu): client.publish_dataset(df=dd.from_pandas(df, npartitions=1)) diff --git a/tests/integration/test_filter.py b/tests/integration/test_filter.py index 345b9d9e1..a5231a5e2 100644 --- a/tests/integration/test_filter.py +++ b/tests/integration/test_filter.py @@ -47,7 +47,8 @@ def test_filter_complicated(c, df): expected_df = df[((df["a"] < 3) & ((df["b"] > 1) & (df["b"] < 3)))] assert_frame_equal( - return_df, expected_df, + return_df, + expected_df, ) @@ -60,7 +61,8 @@ def test_filter_with_nan(c): else: expected_df = pd.DataFrame({"c": [3]}, dtype="float") assert_frame_equal( - return_df, expected_df, + return_df, + expected_df, ) @@ -69,13 +71,17 @@ def test_string_filter(c, string_table): return_df = return_df.compute() assert_frame_equal( - return_df, string_table.head(1), + return_df, + string_table.head(1), ) @pytest.mark.parametrize( "input_table", - ["datetime_table", pytest.param("gpu_datetime_table", marks=pytest.mark.gpu),], + [ + "datetime_table", + pytest.param("gpu_datetime_table", marks=pytest.mark.gpu), + ], ) def test_filter_cast_date(c, input_table, request): datetime_table = request.getfixturevalue(input_table) @@ -95,7 +101,10 @@ def test_filter_cast_date(c, input_table, request): @pytest.mark.parametrize( "input_table", - ["datetime_table", pytest.param("gpu_datetime_table", marks=pytest.mark.gpu),], + [ + "datetime_table", + pytest.param("gpu_datetime_table", marks=pytest.mark.gpu), + ], ) def test_filter_cast_timestamp(c, input_table, request): datetime_table = request.getfixturevalue(input_table) @@ -202,7 +211,13 @@ def test_filtered_csv(tmpdir, c): # any unexpected errors # Write simple csv dataset - df = pd.DataFrame({"a": [1, 2, 3] * 5, "b": range(15), "c": ["A"] * 15,},) + df = pd.DataFrame( + { + "a": [1, 2, 3] * 5, + "b": range(15), + "c": ["A"] * 15, + }, + ) dd.from_pandas(df, npartitions=3).to_csv(tmpdir + "/*.csv", index=False) # Read back with dask and apply WHERE query diff --git a/tests/integration/test_function.py b/tests/integration/test_function.py index a116e2459..c6342877c 100644 --- a/tests/integration/test_function.py +++ b/tests/integration/test_function.py @@ -9,7 +9,7 @@ def test_custom_function(c, df): def f(x): - return x ** 2 + return x**2 c.register_function(f, "f", [("x", np.float64)], np.float64) @@ -146,7 +146,7 @@ def f(row, k1, k2): def test_multiple_definitions(c, df_simple): def f(x): - return x ** 2 + return x**2 c.register_function(f, "f", [("x", np.float64)], np.float64) c.register_function(f, "f", [("x", np.int64)], np.int64) @@ -162,7 +162,7 @@ def f(x): assert_frame_equal(return_df.reset_index(drop=True), df_simple[["a", "b"]] ** 2) def f(x): - return x ** 3 + return x**3 c.register_function(f, "f", [("x", np.float64)], np.float64, replace=True) c.register_function(f, "f", [("x", np.int64)], np.int64) @@ -195,14 +195,14 @@ def test_aggregate_function(c): def test_reregistration(c): def f(x): - return x ** 2 + return x**2 # The same is fine c.register_function(f, "f", [("x", np.float64)], np.float64) c.register_function(f, "f", [("x", np.int64)], np.int64) def f(x): - return x ** 3 + return x**3 # A different not with pytest.raises(ValueError): diff --git a/tests/integration/test_groupby.py b/tests/integration/test_groupby.py index e6eba9060..0fef45679 100644 --- a/tests/integration/test_groupby.py +++ b/tests/integration/test_groupby.py @@ -93,7 +93,11 @@ def test_group_by_filtered2(c): df = df.compute() expected_df = pd.DataFrame( - {"user_id": [1, 2, 3], "S1": [np.NaN, 4.0, np.NaN], "S2": [3, 4, 3],}, + { + "user_id": [1, 2, 3], + "S1": [np.NaN, 4.0, np.NaN], + "S2": [3, 4, 3], + }, ) assert_frame_equal(df, expected_df) @@ -351,7 +355,10 @@ def test_stats_aggregation(c, timeseries_df): @pytest.mark.parametrize( "input_table", - ["user_table_1", pytest.param("gpu_user_table_1", marks=pytest.mark.gpu),], + [ + "user_table_1", + pytest.param("gpu_user_table_1", marks=pytest.mark.gpu), + ], ) @pytest.mark.parametrize("split_out", [None, 2, 4]) def test_groupby_split_out(c, input_table, split_out, request): diff --git a/tests/integration/test_join.py b/tests/integration/test_join.py index fa9d626f9..44cd1e070 100644 --- a/tests/integration/test_join.py +++ b/tests/integration/test_join.py @@ -17,7 +17,8 @@ def test_join(c): {"user_id": [1, 1, 2, 2], "b": [3, 3, 1, 3], "c": [1, 2, 3, 3]} ) assert_frame_equal( - df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), expected_df, + df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), + expected_df, ) @@ -31,7 +32,8 @@ def test_join_inner(c): {"user_id": [1, 1, 2, 2], "b": [3, 3, 1, 3], "c": [1, 2, 3, 3]} ) assert_frame_equal( - df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), expected_df, + df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), + expected_df, ) @@ -71,7 +73,8 @@ def test_join_left(c): } ) assert_frame_equal( - df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), expected_df, + df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), + expected_df, ) @@ -91,7 +94,8 @@ def test_join_right(c): } ) assert_frame_equal( - df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), expected_df, + df.sort_values(["user_id", "b", "c"]).reset_index(drop=True), + expected_df, ) @@ -119,7 +123,12 @@ def test_join_complex(c): df = df.compute() df_expected = pd.DataFrame( - {"a": [1, 1, 2], "b": [1.1, 1.1, 2.2], "a0": [2, 3, 3], "b0": [2.2, 3.3, 3.3],} + { + "a": [1, 1, 2], + "b": [1.1, 1.1, 2.2], + "a0": [2, 3, 3], + "b0": [2.2, 3.3, 3.3], + } ) assert_frame_equal(df.sort_values(["a", "b0"]).reset_index(drop=True), df_expected) diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index 1b870699f..aa6096b9e 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -124,7 +124,10 @@ def test_random(c, df): @pytest.mark.parametrize( "input_table", - ["string_table", pytest.param("gpu_string_table", marks=pytest.mark.gpu),], + [ + "string_table", + pytest.param("gpu_string_table", marks=pytest.mark.gpu), + ], ) def test_not(c, input_table, request): string_table = request.getfixturevalue(input_table) @@ -505,7 +508,8 @@ def test_string_functions(c, gpu): ) assert_frame_equal( - df.head(1), expected_df, + df.head(1), + expected_df, ) diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index bf36e90dc..50cc90c3d 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -40,7 +40,7 @@ def test_function(c): c.sql("USE SCHEMA root") def f(x): - return x ** 2 + return x**2 c.register_function(f, "f", [("x", np.float64)], np.float64, schema_name="other") diff --git a/tests/integration/test_select.py b/tests/integration/test_select.py index f7c20df3d..c6ce6a5a7 100644 --- a/tests/integration/test_select.py +++ b/tests/integration/test_select.py @@ -58,7 +58,11 @@ def test_select_expr(c, df): result_df = result_df.compute() expected_df = pd.DataFrame( - {"a": df["a"] + 1, "bla": df["b"], '"df"."a" - 1': df["a"] - 1,} + { + "a": df["a"] + 1, + "bla": df["b"], + '"df"."a" - 1': df["a"] - 1, + } ) assert_frame_equal(result_df, expected_df) @@ -121,7 +125,10 @@ def test_timezones(c, datetime_table): @pytest.mark.parametrize( "input_table", - ["datetime_table", pytest.param("gpu_datetime_table", marks=pytest.mark.gpu),], + [ + "datetime_table", + pytest.param("gpu_datetime_table", marks=pytest.mark.gpu), + ], ) def test_date_casting(c, input_table, request): datetime_table = request.getfixturevalue(input_table) @@ -151,7 +158,10 @@ def test_date_casting(c, input_table, request): @pytest.mark.parametrize( "input_table", - ["datetime_table", pytest.param("gpu_datetime_table", marks=pytest.mark.gpu),], + [ + "datetime_table", + pytest.param("gpu_datetime_table", marks=pytest.mark.gpu), + ], ) def test_timestamp_casting(c, input_table, request): datetime_table = request.getfixturevalue(input_table) diff --git a/tests/integration/test_show.py b/tests/integration/test_show.py index 41e315a95..c9d217b66 100644 --- a/tests/integration/test_show.py +++ b/tests/integration/test_show.py @@ -78,7 +78,10 @@ def test_columns(c): expected_df = pd.DataFrame( { - "Column": ["user_id", "b",], + "Column": [ + "user_id", + "b", + ], "Type": ["bigint", "bigint"], "Extra": [""] * 2, "Comment": [""] * 2, diff --git a/tests/integration/test_sort.py b/tests/integration/test_sort.py index 5825e3567..9023da438 100644 --- a/tests/integration/test_sort.py +++ b/tests/integration/test_sort.py @@ -270,7 +270,12 @@ def test_sort_with_nan_many_partitions(gpu): xd = pd c = Context() - df = xd.DataFrame({"a": [float("nan"), 1] * 30, "b": [1, 2, 3] * 20,}) + df = xd.DataFrame( + { + "a": [float("nan"), 1] * 30, + "b": [1, 2, 3] * 20, + } + ) c.create_table("df", dd.from_pandas(df, npartitions=10)) df_result = ( @@ -294,7 +299,14 @@ def test_sort_with_nan_many_partitions(gpu): df_result = c.sql("SELECT * FROM df ORDER BY a").compute().reset_index(drop=True) - dd.assert_eq(df_result, xd.DataFrame({"a": [1] * 30 + [float("nan")] * 30,})) + dd.assert_eq( + df_result, + xd.DataFrame( + { + "a": [1] * 30 + [float("nan")] * 30, + } + ), + ) @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)]) diff --git a/tests/integration/test_union.py b/tests/integration/test_union.py index 801102b95..e199b45d1 100644 --- a/tests/integration/test_union.py +++ b/tests/integration/test_union.py @@ -47,6 +47,7 @@ def test_union_mixed(c, df, long_table): long_table["II"] = long_table["I"] expected_df = pd.concat( - [df.rename(columns={"a": "I", "b": "II"}), long_table], ignore_index=True, + [df.rename(columns={"a": "I", "b": "II"}), long_table], + ignore_index=True, ) assert_frame_equal(result_df.reset_index(drop=True), expected_df) diff --git a/tests/unit/test_call.py b/tests/unit/test_call.py index 255c465bb..2af33551d 100644 --- a/tests/unit/test_call.py +++ b/tests/unit/test_call.py @@ -157,10 +157,14 @@ def test_simple_ops(): def test_math_operations(): assert_series_equal( - ops_mapping["abs"](-df1.a).compute(), pd.Series([1, 2, 3]), check_names=False, + ops_mapping["abs"](-df1.a).compute(), + pd.Series([1, 2, 3]), + check_names=False, ) assert_series_equal( - ops_mapping["round"](df1.a).compute(), pd.Series([1, 2, 3]), check_names=False, + ops_mapping["round"](df1.a).compute(), + pd.Series([1, 2, 3]), + check_names=False, ) assert_series_equal( ops_mapping["floor"](df1.a).compute(), diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index b84f9fa11..697c0aee5 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -88,7 +88,14 @@ def test_explain(gpu): @pytest.mark.parametrize( - "gpu", [False, pytest.param(True, marks=pytest.mark.gpu,),], + "gpu", + [ + False, + pytest.param( + True, + marks=pytest.mark.gpu, + ), + ], ) def test_sql(gpu): c = Context() @@ -112,7 +119,14 @@ def test_sql(gpu): @pytest.mark.parametrize( - "gpu", [False, pytest.param(True, marks=pytest.mark.gpu,),], + "gpu", + [ + False, + pytest.param( + True, + marks=pytest.mark.gpu, + ), + ], ) def test_input_types(temporary_data_file, gpu): c = Context() @@ -151,7 +165,11 @@ def assert_correct_output(gpu): @pytest.mark.parametrize( - "gpu", [False, pytest.param(True, marks=pytest.mark.gpu),], + "gpu", + [ + False, + pytest.param(True, marks=pytest.mark.gpu), + ], ) def test_tables_from_stack(gpu): c = Context() diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9016c0330..5b2df6563 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -70,7 +70,8 @@ def test_exception_parsing(): assert str(e) == expected e = ParsingException( - "SELECT * FROM df", """Lexical error at line 1, column 3. Message""", + "SELECT * FROM df", + """Lexical error at line 1, column 3. Message""", ) expected = """Can not parse the given SQL: Lexical error at line 1, column 3. Message @@ -97,7 +98,10 @@ def test_exception_parsing(): \tWHERE x = 3""" assert str(e) == expected - e = ParsingException("SELECT *", "Message",) + e = ParsingException( + "SELECT *", + "Message", + ) assert str(e) == "Message"