REFACTOR-#4902: use isort

Signed-off-by: Anatoly Myachev <[email protected]>
modin-project · Sep 12, 2023 · 6a58edd · 6a58edd
1 parent def4722
commit 6a58edd
Show file tree

Hide file tree

Showing 282 changed files with 1,753 additions and 2,389 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,16 +26,17 @@ env:
   MODIN_GITHUB_CI: true
 
 jobs:
-  lint-black:
-    name: lint (black)
+  lint-black-isort:
+    name: lint (black and isort)
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
       - uses: ./.github/actions/python-only
-      - run: pip install black
+      - run: pip install black isort>=5.12
       # NOTE: keep the black command here in sync with the pre-commit hook in
       # /contributing/pre-commit
       - run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
+      - run: isort . --check-only
 
   lint-mypy:
     name: lint (mypy)

diff --git a/asv_bench/benchmarks/benchmarks.py b/asv_bench/benchmarks/benchmarks.py
@@ -19,25 +19,15 @@
 # define `MODIN_ASV_USE_IMPL` env var to choose library for using in performance
 # measurements
 
+import math
+
 import numpy as np
 import pandas._testing as tm
-import math
 
-from .utils import (
-    generate_dataframe,
-    gen_nan_data,
-    RAND_LOW,
-    RAND_HIGH,
-    random_string,
-    random_columns,
-    random_booleans,
-    GROUPBY_NGROUPS,
-    IMPL,
-    execute,
-    translator_groupby_ngroups,
-    get_benchmark_shapes,
-    trigger_import,
-)
+from .utils import (GROUPBY_NGROUPS, IMPL, RAND_HIGH, RAND_LOW, execute,
+                    gen_nan_data, generate_dataframe, get_benchmark_shapes,
+                    random_booleans, random_columns, random_string,
+                    translator_groupby_ngroups, trigger_import)
 
 
 class BaseTimeGroupBy:

diff --git a/asv_bench/benchmarks/hdk/benchmarks.py b/asv_bench/benchmarks/hdk/benchmarks.py
@@ -13,27 +13,16 @@
 
 """General Modin on HDK storage format benchmarks."""
 
-from ..utils import (
-    generate_dataframe,
-    gen_nan_data,
-    RAND_LOW,
-    RAND_HIGH,
-    GROUPBY_NGROUPS,
-    IMPL,
-    execute,
-    translator_groupby_ngroups,
-    random_columns,
-    random_booleans,
-    trigger_import,
-    get_benchmark_shapes,
-)
 import numpy as np
 import pandas
 
-from ..benchmarks import (
-    TimeIndexing as TimeIndexingPandasExecution,
-    TimeIndexingColumns as TimeIndexingColumnsPandasExecution,
-)
+from ..benchmarks import TimeIndexing as TimeIndexingPandasExecution
+from ..benchmarks import \
+    TimeIndexingColumns as TimeIndexingColumnsPandasExecution
+from ..utils import (GROUPBY_NGROUPS, IMPL, RAND_HIGH, RAND_LOW, execute,
+                     gen_nan_data, generate_dataframe, get_benchmark_shapes,
+                     random_booleans, random_columns,
+                     translator_groupby_ngroups, trigger_import)
 
 
 class TimeJoin:

diff --git a/asv_bench/benchmarks/hdk/io.py b/asv_bench/benchmarks/hdk/io.py
@@ -13,18 +13,10 @@
 
 """IO Modin on HDK storage format benchmarks."""
 
-from ..utils import (
-    generate_dataframe,
-    RAND_LOW,
-    RAND_HIGH,
-    ASV_USE_IMPL,
-    IMPL,
-    get_shape_id,
-    trigger_import,
-    get_benchmark_shapes,
-)
-
 from ..io.csv import TimeReadCsvTrueFalseValues  # noqa: F401
+from ..utils import (ASV_USE_IMPL, IMPL, RAND_HIGH, RAND_LOW,
+                     generate_dataframe, get_benchmark_shapes, get_shape_id,
+                     trigger_import)
 
 
 class TimeReadCsvNames:

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
@@ -13,18 +13,9 @@
 
 import numpy as np
 
-from ..utils import (
-    generate_dataframe,
-    RAND_LOW,
-    RAND_HIGH,
-    ASV_USE_IMPL,
-    ASV_USE_STORAGE_FORMAT,
-    IMPL,
-    execute,
-    get_shape_id,
-    prepare_io_data,
-    get_benchmark_shapes,
-)
+from ..utils import (ASV_USE_IMPL, ASV_USE_STORAGE_FORMAT, IMPL, RAND_HIGH,
+                     RAND_LOW, execute, generate_dataframe,
+                     get_benchmark_shapes, get_shape_id, prepare_io_data)
 
 
 class BaseReadCsv:

diff --git a/asv_bench/benchmarks/io/parquet.py b/asv_bench/benchmarks/io/parquet.py
@@ -11,14 +11,8 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
-from ..utils import (
-    ASV_USE_IMPL,
-    IMPL,
-    execute,
-    get_shape_id,
-    prepare_io_data_parquet,
-    get_benchmark_shapes,
-)
+from ..utils import (ASV_USE_IMPL, IMPL, execute, get_benchmark_shapes,
+                     get_shape_id, prepare_io_data_parquet)
 
 
 class TimeReadParquet:

diff --git a/asv_bench/benchmarks/scalability/scalability_benchmarks.py b/asv_bench/benchmarks/scalability/scalability_benchmarks.py
@@ -17,20 +17,15 @@
 from modin.pandas.utils import from_pandas
 
 try:
-    from modin.utils import to_pandas, to_numpy
+    from modin.utils import to_numpy, to_pandas
 except ImportError:
     # This provides compatibility with older versions of the Modin, allowing us to test old commits.
     from modin.pandas.utils import to_pandas
+
 import pandas
 
-from ..utils import (
-    gen_data,
-    generate_dataframe,
-    RAND_LOW,
-    RAND_HIGH,
-    execute,
-    get_benchmark_shapes,
-)
+from ..utils import (RAND_HIGH, RAND_LOW, execute, gen_data,
+                     generate_dataframe, get_benchmark_shapes)
 
 
 class TimeFromPandas:

diff --git a/asv_bench/benchmarks/utils/__init__.py b/asv_bench/benchmarks/utils/__init__.py
@@ -13,27 +13,13 @@
 
 """Modin benchmarks utils."""
 
-from .compatibility import (
-    ASV_USE_IMPL,
-    ASV_USE_STORAGE_FORMAT,
-)
-from .data_shapes import RAND_LOW, RAND_HIGH, GROUPBY_NGROUPS, get_benchmark_shapes
-from .common import (
-    IMPL,
-    execute,
-    get_shape_id,
-    gen_data,
-    gen_nan_data,
-    generate_dataframe,
-    prepare_io_data,
-    prepare_io_data_parquet,
-    random_string,
-    random_columns,
-    random_booleans,
-    translator_groupby_ngroups,
-    trigger_import,
-    setup,
-)
+from .common import (IMPL, execute, gen_data, gen_nan_data, generate_dataframe,
+                     get_shape_id, prepare_io_data, prepare_io_data_parquet,
+                     random_booleans, random_columns, random_string, setup,
+                     translator_groupby_ngroups, trigger_import)
+from .compatibility import ASV_USE_IMPL, ASV_USE_STORAGE_FORMAT
+from .data_shapes import (GROUPBY_NGROUPS, RAND_HIGH, RAND_LOW,
+                          get_benchmark_shapes)
 
 __all__ = [
     "ASV_USE_IMPL",

diff --git a/asv_bench/benchmarks/utils/common.py b/asv_bench/benchmarks/utils/common.py
@@ -20,19 +20,17 @@
 """
 
 import logging
-import modin.pandas
-import pandas
-import numpy as np
 import uuid
 from typing import Optional, Union
 
-from .compatibility import (
-    ASV_USE_IMPL,
-    ASV_DATASET_SIZE,
-    ASV_USE_ENGINE,
-    ASV_USE_STORAGE_FORMAT,
-)
-from .data_shapes import RAND_LOW, RAND_HIGH
+import numpy as np
+import pandas
+
+import modin.pandas
+
+from .compatibility import (ASV_DATASET_SIZE, ASV_USE_ENGINE, ASV_USE_IMPL,
+                            ASV_USE_STORAGE_FORMAT)
+from .data_shapes import RAND_HIGH, RAND_LOW
 
 POSSIBLE_IMPL = {
     "modin": modin.pandas,

diff --git a/asv_bench/benchmarks/utils/compatibility.py b/asv_bench/benchmarks/utils/compatibility.py
@@ -14,6 +14,7 @@
 """Compatibility layer for parameters used by ASV."""
 
 import os
+
 import modin.pandas as pd
 
 try:
@@ -24,7 +25,8 @@
     NPARTITIONS = pd.DEFAULT_NPARTITIONS
 
 try:
-    from modin.config import TestDatasetSize, AsvImplementation, Engine, StorageFormat
+    from modin.config import (AsvImplementation, Engine, StorageFormat,
+                              TestDatasetSize)
 
     ASV_USE_IMPL = AsvImplementation.get()
     ASV_DATASET_SIZE = TestDatasetSize.get() or "Small"

diff --git a/asv_bench/benchmarks/utils/data_shapes.py b/asv_bench/benchmarks/utils/data_shapes.py
@@ -13,10 +13,10 @@
 
 """Define data shapes."""
 
-import os
 import json
+import os
 
-from .compatibility import ASV_USE_STORAGE_FORMAT, ASV_DATASET_SIZE
+from .compatibility import ASV_DATASET_SIZE, ASV_USE_STORAGE_FORMAT
 
 RAND_LOW = 0
 RAND_HIGH = 1_000_000_000 if ASV_USE_STORAGE_FORMAT == "hdk" else 100

diff --git a/asv_bench/test/test_utils.py b/asv_bench/test/test_utils.py
@@ -11,14 +11,14 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
-import pytest
-from unittest.mock import patch, mock_open, Mock
-import numpy as np
+from unittest.mock import Mock, mock_open, patch
 
-from benchmarks.utils import data_shapes, get_benchmark_shapes, execute
+import numpy as np
+import pytest
+from benchmarks.utils import data_shapes, execute, get_benchmark_shapes
 
-from modin.config import AsvDataSizeConfig
 import modin.pandas as pd
+from modin.config import AsvDataSizeConfig
 
 
 @pytest.mark.parametrize(

diff --git a/ci/teamcity/comment_on_pr.py b/ci/teamcity/comment_on_pr.py
@@ -7,10 +7,11 @@
 ```
 """
 
-from github import Github
 import os
 import sys
 
+from github import Github
+
 # Check if this is a pull request or not based on the environment variable
 try:
     pr_id = int(os.environ["GITHUB_PR_NUMBER"].split("/")[-1])

diff --git a/docs/conf.py b/docs/conf.py
@@ -6,9 +6,9 @@
 # full list see the documentation:
 # http://www.sphinx-doc.org/en/stable/config
 
+import os
 # -- Project information -----------------------------------------------------
 import sys
-import os
 import types
 
 import ray
@@ -54,7 +54,6 @@ def noop_decorator(*args, **kwargs):
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 import modin
-
 from modin.config.__main__ import export_config_help
 
 configs_file_path = os.path.abspath(

diff --git a/environment-dev.yml b/environment-dev.yml
@@ -60,6 +60,7 @@ dependencies:
   - flake8-print>=5.0.0
   - mypy>=1.0.0
   - pandas-stubs>=2.0.0
+  - isort>=5.12
 
   - pip:
       - asv==0.5.1

diff --git a/examples/docker/modin-hdk/census-hdk.py b/examples/docker/modin-hdk/census-hdk.py
@@ -12,11 +12,11 @@
 # governing permissions and limitations under the License.
 
 import sys
-from utils import measure
-import modin.pandas as pd
-
 
 import numpy as np
+from utils import measure
+
+import modin.pandas as pd
 
 
 def read(filename):
@@ -192,12 +192,12 @@ def cod(y_test, y_pred):
 
 def ml(X, y, random_state, n_runs, test_size):
     # to not install ML dependencies unless required
-    from sklearn import config_context
     import sklearnex
+    from sklearn import config_context
 
     sklearnex.patch_sklearn()
-    from sklearn.model_selection import train_test_split
     import sklearn.linear_model as lm
+    from sklearn.model_selection import train_test_split
 
     clf = lm.Ridge()
 

diff --git a/examples/docker/modin-hdk/nyc-taxi-hdk.py b/examples/docker/modin-hdk/nyc-taxi-hdk.py
@@ -12,10 +12,12 @@
 # governing permissions and limitations under the License.
 
 import sys
+
 from utils import measure
+
 import modin.pandas as pd
-from modin.pandas.test.utils import df_equals
 from modin.experimental.sql import query
+from modin.pandas.test.utils import df_equals
 
 
 def read(filename):

diff --git a/examples/docker/modin-hdk/plasticc-hdk.py b/examples/docker/modin-hdk/plasticc-hdk.py
@@ -14,10 +14,11 @@
 import sys
 from collections import OrderedDict
 from functools import partial
-from utils import measure
-import modin.pandas as pd
 
 import numpy as np
+from utils import measure
+
+import modin.pandas as pd
 
 
 ################ helper functions ###############################
@@ -194,8 +195,8 @@ def etl(df, df_meta):
 
 def ml(train_final, test_final):
     # to not install ML dependencies unless required
-    import xgboost as xgb
     import sklearnex
+    import xgboost as xgb
 
     sklearnex.patch_sklearn()