modin-project · dchigarev · Sep 15, 2023 · Sep 12, 2023 · Sep 12, 2023 · Sep 12, 2023
@@ -26,16 +26,17 @@ env:
   MODIN_GITHUB_CI: true
 
 jobs:
-  lint-black:
-    name: lint (black)
+  lint-black-isort:
+    name: lint (black and isort)
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
       - uses: ./.github/actions/python-only
-      - run: pip install black
+      - run: pip install black isort>=5.12
       # NOTE: keep the black command here in sync with the pre-commit hook in
       # /contributing/pre-commit
       - run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
+      - run: isort . --check-only
 
   lint-mypy:
     name: lint (mypy)
@@ -77,7 +78,7 @@ jobs:
       - uses: ./.github/actions/upload-coverage
 
   test-clean-install:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     strategy:
       matrix:
         os:
@@ -99,7 +100,7 @@ jobs:
           MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
 
   test-internals:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -124,7 +125,7 @@ jobs:
       - uses: ./.github/actions/upload-coverage
 
   test-defaults:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -155,7 +156,7 @@ jobs:
       - uses: ./.github/actions/upload-coverage
 
   test-hdk:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -212,7 +213,7 @@ jobs:
 
   test-asv-benchmarks:
     if: github.event_name == 'pull_request'
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -322,7 +323,7 @@ jobs:
               "${{ steps.filter.outputs.ray }}" "${{ steps.filter.outputs.dask }}" >> $GITHUB_OUTPUT
 
   test-all-unidist:
-    needs: [lint-flake8, lint-black, execution-filter]
+    needs: [lint-flake8, lint-black-isort, execution-filter]
     if: github.event_name == 'push' || needs.execution-filter.outputs.unidist == 'true'
     runs-on: ubuntu-latest
     defaults:
@@ -387,7 +388,7 @@ jobs:
       - uses: ./.github/actions/upload-coverage
 
   test-all:
-    needs: [lint-flake8, lint-black, execution-filter]
+    needs: [lint-flake8, lint-black-isort, execution-filter]
     strategy:
       matrix:
         os:
@@ -521,7 +522,7 @@ jobs:
         if: matrix.os == 'windows'
 
   test-sanity:
-    needs: [lint-flake8, lint-black, execution-filter]
+    needs: [lint-flake8, lint-black-isort, execution-filter]
     if: github.event_name == 'pull_request'
     strategy:
       matrix:
@@ -644,7 +645,7 @@ jobs:
       - uses: ./.github/actions/upload-coverage
 
   test-experimental:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -673,7 +674,7 @@ jobs:
       - uses: ./.github/actions/upload-coverage
 
   test-pyarrow:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -703,7 +704,7 @@ jobs:
       - run: python -m pytest modin/pandas/test/test_io.py::TestCsv --verbose
 
   test-spreadsheet:
-    needs: [lint-flake8, lint-black]
+    needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest
     defaults:
       run:

@@ -19,23 +19,24 @@
 # define `MODIN_ASV_USE_IMPL` env var to choose library for using in performance
 # measurements
 
+import math
+
 import numpy as np
 import pandas._testing as tm
-import math
 
 from .utils import (
-    generate_dataframe,
-    gen_nan_data,
-    RAND_LOW,
-    RAND_HIGH,
-    random_string,
-    random_columns,
-    random_booleans,
     GROUPBY_NGROUPS,
     IMPL,
+    RAND_HIGH,
+    RAND_LOW,
     execute,
-    translator_groupby_ngroups,
+    gen_nan_data,
+    generate_dataframe,
     get_benchmark_shapes,
+    random_booleans,
+    random_columns,
+    random_string,
+    translator_groupby_ngroups,
     trigger_import,
 )
 

@@ -13,26 +13,24 @@
 
 """General Modin on HDK storage format benchmarks."""
 
+import numpy as np
+import pandas
+
+from ..benchmarks import TimeIndexing as TimeIndexingPandasExecution
+from ..benchmarks import TimeIndexingColumns as TimeIndexingColumnsPandasExecution
 from ..utils import (
-    generate_dataframe,
-    gen_nan_data,
-    RAND_LOW,
-    RAND_HIGH,
     GROUPBY_NGROUPS,
     IMPL,
+    RAND_HIGH,
+    RAND_LOW,
     execute,
-    translator_groupby_ngroups,
-    random_columns,
+    gen_nan_data,
+    generate_dataframe,
+    get_benchmark_shapes,
     random_booleans,
+    random_columns,
+    translator_groupby_ngroups,
     trigger_import,
-    get_benchmark_shapes,
-)
-import numpy as np
-import pandas
-
-from ..benchmarks import (
-    TimeIndexing as TimeIndexingPandasExecution,
-    TimeIndexingColumns as TimeIndexingColumnsPandasExecution,
 )
 
 

@@ -13,19 +13,18 @@
 
 """IO Modin on HDK storage format benchmarks."""
 
+from ..io.csv import TimeReadCsvTrueFalseValues  # noqa: F401
 from ..utils import (
-    generate_dataframe,
-    RAND_LOW,
-    RAND_HIGH,
     ASV_USE_IMPL,
     IMPL,
+    RAND_HIGH,
+    RAND_LOW,
+    generate_dataframe,
+    get_benchmark_shapes,
     get_shape_id,
     trigger_import,
-    get_benchmark_shapes,
 )
 
-from ..io.csv import TimeReadCsvTrueFalseValues  # noqa: F401
-
 
 class TimeReadCsvNames:
     shapes = get_benchmark_shapes("hdk.TimeReadCsvNames")

@@ -14,16 +14,16 @@
 import numpy as np
 
 from ..utils import (
-    generate_dataframe,
-    RAND_LOW,
-    RAND_HIGH,
     ASV_USE_IMPL,
     ASV_USE_STORAGE_FORMAT,
     IMPL,
+    RAND_HIGH,
+    RAND_LOW,
     execute,
+    generate_dataframe,
+    get_benchmark_shapes,
     get_shape_id,
     prepare_io_data,
-    get_benchmark_shapes,
 )
 
 

@@ -15,9 +15,9 @@
     ASV_USE_IMPL,
     IMPL,
     execute,
+    get_benchmark_shapes,
     get_shape_id,
     prepare_io_data_parquet,
-    get_benchmark_shapes,
 )
 
 

@@ -17,18 +17,19 @@
 from modin.pandas.utils import from_pandas
 
 try:
-    from modin.utils import to_pandas, to_numpy
+    from modin.utils import to_numpy, to_pandas
 except ImportError:
     # This provides compatibility with older versions of the Modin, allowing us to test old commits.
     from modin.pandas.utils import to_pandas
+
 import pandas
 
 from ..utils import (
-    gen_data,
-    generate_dataframe,
-    RAND_LOW,
     RAND_HIGH,
+    RAND_LOW,
     execute,
+    gen_data,
+    generate_dataframe,
     get_benchmark_shapes,
 )
 

@@ -13,27 +13,24 @@
 
 """Modin benchmarks utils."""
 
-from .compatibility import (
-    ASV_USE_IMPL,
-    ASV_USE_STORAGE_FORMAT,
-)
-from .data_shapes import RAND_LOW, RAND_HIGH, GROUPBY_NGROUPS, get_benchmark_shapes
 from .common import (
     IMPL,
     execute,
-    get_shape_id,
     gen_data,
     gen_nan_data,
     generate_dataframe,
+    get_shape_id,
     prepare_io_data,
     prepare_io_data_parquet,
-    random_string,
-    random_columns,
     random_booleans,
+    random_columns,
+    random_string,
+    setup,
     translator_groupby_ngroups,
     trigger_import,
-    setup,
 )
+from .compatibility import ASV_USE_IMPL, ASV_USE_STORAGE_FORMAT
+from .data_shapes import GROUPBY_NGROUPS, RAND_HIGH, RAND_LOW, get_benchmark_shapes
 
 __all__ = [
     "ASV_USE_IMPL",

@@ -20,19 +20,21 @@
 """
 
 import logging
-import modin.pandas
-import pandas
-import numpy as np
 import uuid
 from typing import Optional, Union
 
+import numpy as np
+import pandas
+
+import modin.pandas
+
 from .compatibility import (
-    ASV_USE_IMPL,
     ASV_DATASET_SIZE,
     ASV_USE_ENGINE,
+    ASV_USE_IMPL,
     ASV_USE_STORAGE_FORMAT,
 )
-from .data_shapes import RAND_LOW, RAND_HIGH
+from .data_shapes import RAND_HIGH, RAND_LOW
 
 POSSIBLE_IMPL = {
     "modin": modin.pandas,

@@ -14,6 +14,7 @@
 """Compatibility layer for parameters used by ASV."""
 
 import os
+
 import modin.pandas as pd
 
 try:
@@ -24,7 +25,7 @@
     NPARTITIONS = pd.DEFAULT_NPARTITIONS
 
 try:
-    from modin.config import TestDatasetSize, AsvImplementation, Engine, StorageFormat
+    from modin.config import AsvImplementation, Engine, StorageFormat, TestDatasetSize
 
     ASV_USE_IMPL = AsvImplementation.get()
     ASV_DATASET_SIZE = TestDatasetSize.get() or "Small"

@@ -13,10 +13,10 @@
 
 """Define data shapes."""
 
-import os
 import json
+import os
 
-from .compatibility import ASV_USE_STORAGE_FORMAT, ASV_DATASET_SIZE
+from .compatibility import ASV_DATASET_SIZE, ASV_USE_STORAGE_FORMAT
 
 RAND_LOW = 0
 RAND_HIGH = 1_000_000_000 if ASV_USE_STORAGE_FORMAT == "hdk" else 100

@@ -11,14 +11,14 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
-import pytest
-from unittest.mock import patch, mock_open, Mock
-import numpy as np
+from unittest.mock import Mock, mock_open, patch
 
-from benchmarks.utils import data_shapes, get_benchmark_shapes, execute
+import numpy as np
+import pytest
+from benchmarks.utils import data_shapes, execute, get_benchmark_shapes
 
-from modin.config import AsvDataSizeConfig
 import modin.pandas as pd
+from modin.config import AsvDataSizeConfig
 
 
 @pytest.mark.parametrize(

@@ -7,10 +7,11 @@
 ```
 """
 
-from github import Github
 import os
 import sys
 
+from github import Github
+
 # Check if this is a pull request or not based on the environment variable
 try:
     pr_id = int(os.environ["GITHUB_PR_NUMBER"].split("/")[-1])