Skip to content

Commit

Permalink
pre-commit: spell, whitespace, and mypy check (#1091)
Browse files Browse the repository at this point in the history
close #1077

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)

URL: #1091
  • Loading branch information
madsbk authored Jan 30, 2023
1 parent 4f0922c commit 43969d7
Show file tree
Hide file tree
Showing 18 changed files with 57 additions and 33 deletions.
23 changes: 23 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
Expand All @@ -11,5 +16,23 @@ repos:
rev: 3.8.3
hooks:
- id: flake8
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
exclude: |
(?x)^(
.*test.*|
^CHANGELOG.md$|
^.*versioneer.py$
)
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.991'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
args: ["--module=dask_cuda", "--ignore-missing-imports"]
pass_filenames: false

default_language_version:
python: python3
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ sphinx:
configuration: rtd/conf.py

formats:
- htmlzip
- htmlzip
2 changes: 1 addition & 1 deletion dask_cuda/benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def bandwidth_statistics(
logs:
the ``dask_worker.incoming_transfer_log`` object
ignore_size: int (optional)
ignore messsages whose total byte count is smaller than this
ignore messages whose total byte count is smaller than this
value (if provided)
Returns
Expand Down
2 changes: 1 addition & 1 deletion dask_cuda/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def cuda():
"--rmm-async/--no-rmm-async",
default=False,
show_default=True,
help="""Initialize each worker withh RMM and set it to use RMM's asynchronous
help="""Initialize each worker with RMM and set it to use RMM's asynchronous
allocator. See ``rmm.mr.CudaAsyncMemoryResource`` for more info.
.. warning::
Expand Down
4 changes: 2 additions & 2 deletions dask_cuda/disk_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ class SpillToDiskProperties:
def __init__(
self,
root_dir: Union[str, os.PathLike],
shared_filesystem: bool = None,
gds: bool = None,
shared_filesystem: Optional[bool] = None,
gds: Optional[bool] = None,
):
"""
Parameters
Expand Down
6 changes: 3 additions & 3 deletions dask_cuda/explicit_comms/dataframe/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ async def send_recv_partitions(
myrank
The rank of this worker.
rank_to_out_part_ids
dict that for each worker rank specifices a set of output partition IDs.
dict that for each worker rank specifies a set of output partition IDs.
If the worker shouldn't return any partitions, it is excluded from the
dict. Partition IDs are global integers `0..npartitions` and corresponds
to the dict keys returned by `group_split_dispatch`.
Expand Down Expand Up @@ -332,9 +332,9 @@ async def shuffle_task(
stage_name: str
Name of the stage to retrieve the input keys from.
rank_to_inkeys: dict
dict that for each worker rank specifices the set of staged input keys.
dict that for each worker rank specifies the set of staged input keys.
rank_to_out_part_ids: dict
dict that for each worker rank specifices a set of output partition IDs.
dict that for each worker rank specifies a set of output partition IDs.
If the worker shouldn't return any partitions, it is excluded from the
dict. Partition IDs are global integers `0..npartitions` and corresponds
to the dict keys returned by `group_split_dispatch`.
Expand Down
2 changes: 1 addition & 1 deletion dask_cuda/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _create_cuda_context():
try:
distributed.comm.ucx.init_once()
except ModuleNotFoundError:
# UCX intialization has to be delegated to Distributed, it will take care
# UCX initialization has to be delegated to Distributed, it will take care
# of setting correct environment variables and importing `ucp` after that.
# Therefore if ``import ucp`` fails we can just continue here.
pass
Expand Down
2 changes: 1 addition & 1 deletion dask_cuda/is_spillable_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def is_device_object_cudf_index(s):


def cudf_spilling_status() -> Optional[bool]:
"""Check the status of cudf's build-in spilling
"""Check the status of cudf's built-in spilling
Returns:
- True if cudf's internal spilling is enabled, or
Expand Down
10 changes: 6 additions & 4 deletions dask_cuda/proxify_device_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _register_incompatible_types():
"""Lazy register types that ProxifyHostFile should unproxify on retrieval.
It reads the config key "jit-unspill-incompatible"
(DASK_JIT_UNSPILL_INCOMPATIBLE), which should be a comma seperated
(DASK_JIT_UNSPILL_INCOMPATIBLE), which should be a comma separated
list of types. The default value is:
DASK_JIT_UNSPILL_INCOMPATIBLE="cupy.ndarray"
"""
Expand Down Expand Up @@ -51,8 +51,8 @@ def f(paths):

def proxify_device_objects(
obj: T,
proxied_id_to_proxy: MutableMapping[int, ProxyObject] = None,
found_proxies: List[ProxyObject] = None,
proxied_id_to_proxy: Optional[MutableMapping[int, ProxyObject]] = None,
found_proxies: Optional[List[ProxyObject]] = None,
excl_proxies: bool = False,
mark_as_explicit_proxies: bool = False,
) -> T:
Expand Down Expand Up @@ -135,7 +135,9 @@ def unproxify_device_objects(
pxy = obj._pxy_get(copy=True)
if only_incompatible_types:
if incompatible_types and isinstance(obj, incompatible_types):
obj = obj._pxy_deserialize(maybe_evict=False, proxy_detail=pxy)
obj = obj._pxy_deserialize( # type: ignore
maybe_evict=False, proxy_detail=pxy
)
elif not skip_explicit_proxies or not pxy.explicit_proxy:
pxy.explicit_proxy = False
obj = obj._pxy_deserialize(maybe_evict=False, proxy_detail=pxy)
Expand Down
14 changes: 7 additions & 7 deletions dask_cuda/proxify_host_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class ProxiesOnDevice(Proxies):
In this case the tally of the total device memory usage is incorrect.
"""

def __init__(self):
def __init__(self) -> None:
super().__init__()
self.proxy_id_to_dev_mems: Dict[int, Set[DeviceMemoryId]] = {}
self.dev_mem_to_proxy_ids: DefaultDict[DeviceMemoryId, Set[int]] = defaultdict(
Expand Down Expand Up @@ -477,7 +477,7 @@ class ProxifyHostFile(MutableMapping):
spill_on_demand: bool or None, default None
Enables spilling when the RMM memory pool goes out of memory. If ``None``,
the "spill-on-demand" config value are used, which defaults to True.
Notice, enabling this does nothing when RMM isn't availabe or not used.
Notice, enabling this does nothing when RMM isn't available or not used.
gds_spilling: bool
Enable GPUDirect Storage spilling. If ``None``, the "gds-spilling" config
value are used, which defaults to ``False``.
Expand All @@ -497,10 +497,10 @@ def __init__(
*,
device_memory_limit: int,
memory_limit: int,
shared_filesystem: bool = None,
compatibility_mode: bool = None,
spill_on_demand: bool = None,
gds_spilling: bool = None,
shared_filesystem: Optional[bool] = None,
compatibility_mode: Optional[bool] = None,
spill_on_demand: Optional[bool] = None,
gds_spilling: Optional[bool] = None,
):
if cudf_spilling_status():
warnings.warn(
Expand Down Expand Up @@ -635,7 +635,7 @@ def evict(self) -> int:
def fast(self):
"""Alternative access to `.evict()` used by Dask
Dask expects `.fast.evict()` to be availabe for manually triggering
Dask expects `.fast.evict()` to be available for manually triggering
of CPU-to-Disk spilling.
"""
if len(self.manager._host) == 0:
Expand Down
10 changes: 6 additions & 4 deletions dask_cuda/proxy_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@


def asproxy(
obj: object, serializers: Iterable[str] = None, subclass: Type["ProxyObject"] = None
obj: object,
serializers: Optional[Iterable[str]] = None,
subclass: Optional[Type["ProxyObject"]] = None,
) -> "ProxyObject":
"""Wrap `obj` in a ProxyObject object if it isn't already.
Expand Down Expand Up @@ -344,7 +346,7 @@ class ProxyObject:
Attributes
----------
_pxy: ProxyDetail
Details of all proxy information of the underlaying proxied object.
Details of all proxy information of the underlying proxied object.
Access to _pxy is not pass-through to the proxied object, which is
the case for most other access to the ProxyObject.
Expand Down Expand Up @@ -380,7 +382,7 @@ def __del__(self):
def _pxy_serialize(
self,
serializers: Iterable[str],
proxy_detail: ProxyDetail = None,
proxy_detail: Optional[ProxyDetail] = None,
) -> None:
"""Inplace serialization of the proxied object using the `serializers`
Expand Down Expand Up @@ -410,7 +412,7 @@ def _pxy_serialize(
self._pxy_cache.pop("device_memory_objects", None)

def _pxy_deserialize(
self, maybe_evict: bool = True, proxy_detail: ProxyDetail = None
self, maybe_evict: bool = True, proxy_detail: Optional[ProxyDetail] = None
):
"""Inplace deserialization of the proxied object
Expand Down
2 changes: 1 addition & 1 deletion dask_cuda/tests/test_cudf_builtin_spilling.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

@pytest.fixture
def manager(request):
"""Fixture to enable and make a spilling manager availabe"""
"""Fixture to enable and make a spilling manager available"""
kwargs = dict(getattr(request, "param", {}))
set_global_manager(manager=SpillManager(**kwargs))
yield get_global_manager()
Expand Down
2 changes: 1 addition & 1 deletion dask_cuda/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ def get_gpu_uuid_from_index(device_index=0):
def get_worker_config(dask_worker):
from .proxify_host_file import ProxifyHostFile

# assume homogenous cluster
# assume homogeneous cluster
plugin_vals = dask_worker.plugins.values()
ret = {}

Expand Down
2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
1 change: 0 additions & 1 deletion docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,3 @@ Explicit-comms
.. currentmodule:: dask_cuda.explicit_comms.comms
.. autoclass:: CommsContext
:members:

1 change: 0 additions & 1 deletion docs/source/examples/best-practices.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,3 @@ With UCX and NVLink, we greatly reduced the wall clock time to: ``347.43 ms +/-
0 | ucx://127.0.0.1:35954
1 | ucx://127.0.0.1:53584
================================================================================

3 changes: 1 addition & 2 deletions docs/source/ucx.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,7 @@ therefore do something like the following:

.. note::

To confirm that no bad fork calls are occuring, start jobs with
To confirm that no bad fork calls are occurring, start jobs with
``UCX_IB_FORK_INIT=n``. UCX will produce a warning ``UCX WARN IB:
ibv_fork_init() was disabled or failed, yet a fork() has been
issued.`` if the application calls ``fork()``.

2 changes: 1 addition & 1 deletion rtd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

0 comments on commit 43969d7

Please sign in to comment.