Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize buffer utility functions #546

Merged
merged 33 commits into from
Aug 12, 2020
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
376df5c
Space out code from docstring
jakirkham Aug 11, 2020
96d0802
Space out license header and directives
jakirkham Aug 11, 2020
7d196b5
Include 2020 in copyright
jakirkham Aug 11, 2020
0e34465
Use double quotes
jakirkham Aug 11, 2020
468e5ae
Drop `None` from `default` in `.get(...)`
jakirkham Aug 11, 2020
79d7673
Just use `getattr` instead of `hasattr`
jakirkham Aug 11, 2020
8b11ecb
Assign `strides` from `iface`
jakirkham Aug 11, 2020
4eff637
Include `itemsize` in `nbytes` reduction
jakirkham Aug 11, 2020
64d874e
Inline message into `ValueError`
jakirkham Aug 11, 2020
2b0d514
Drop old Numba workaround
jakirkham Aug 11, 2020
64ef5ec
Let Cython coerce `uintptr_t` to a Python `int`
jakirkham Aug 11, 2020
1115256
Get `readonly` from the buffer protocol too
jakirkham Aug 11, 2020
60b448c
Inline error message
jakirkham Aug 11, 2020
b840202
Drop unused import
jakirkham Aug 11, 2020
7fec7d7
Assign `len(shape)` to `ndim` for simplicity
jakirkham Aug 11, 2020
7a01b19
Assign `min_size` if `check_min_size` has a value
jakirkham Aug 11, 2020
5888c3a
Drop unused local imports
jakirkham Aug 11, 2020
90bef34
Type variables and switch to `for`-loops
jakirkham Aug 11, 2020
dd614ab
Enable some Cython directives for faster indexing
jakirkham Aug 11, 2020
113e679
Check `strides is not None` first
jakirkham Aug 11, 2020
ecf870b
Try to make isort and flake8 both happy
jakirkham Aug 11, 2020
84eedca
Just use `memoryview` for NumPy as well
jakirkham Aug 11, 2020
686ff3e
Check `memoryview` is C-contiguous
jakirkham Aug 11, 2020
55ce3d2
Define `min_size` near usage
jakirkham Aug 11, 2020
920c3b8
Add `.pxd` file for Cython `utils`
jakirkham Aug 11, 2020
840ad9b
Add `__init__.pxd` for `cimport`s
jakirkham Aug 11, 2020
345ccab
Use relative `cimport` for `ucx_api_dep`
jakirkham Aug 11, 2020
bcb7aea
Use `cimport` for `get_buffer_data`
jakirkham Aug 11, 2020
9825388
Drop `uintptr_t` cast with `get_buffer_data`
jakirkham Aug 11, 2020
4c09768
Use relative `topological_distance_dep` `cimport`
jakirkham Aug 11, 2020
e8d164e
Run `isort` on `ucp/_libs/ucx_api.pyx`
jakirkham Aug 11, 2020
3021780
Split out directives from license comment
jakirkham Aug 11, 2020
a701d64
Add 2020 copyright header
jakirkham Aug 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ucp/_libs/__init__.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3
2 changes: 1 addition & 1 deletion ucp/_libs/topological_distance.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# cython: language_level=3

import pynvml
from topological_distance_dep cimport *
from .topological_distance_dep cimport *


cdef class TopologicalDistance:
Expand Down
20 changes: 7 additions & 13 deletions ucp/_libs/ucx_api.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ from libc.stdint cimport uintptr_t
from libc.stdio cimport FILE, fclose, fflush
from libc.stdlib cimport free
from libc.string cimport memset
from ucx_api_dep cimport *

from .ucx_api_dep cimport *
from .utils cimport get_buffer_data

from ..exceptions import (
UCXCanceled,
Expand All @@ -22,7 +24,6 @@ from ..exceptions import (
log_errors,
)
from ..utils import nvtx_annotate
from .utils import get_buffer_data


# Struct used as requests by UCX
Expand Down Expand Up @@ -722,9 +723,7 @@ def tag_send_nb(
name: str, optional
Descriptive name of the operation
"""
cdef void *data = <void*><uintptr_t>(
get_buffer_data(buffer, check_writable=False)
)
cdef void *data = <void*>get_buffer_data(buffer, check_writable=False)
cdef ucp_send_callback_t _send_cb = <ucp_send_callback_t>_send_callback
cdef ucs_status_ptr_t status = ucp_tag_send_nb(
ep._handle,
Expand Down Expand Up @@ -836,9 +835,7 @@ def tag_recv_nb(
when the `worker` closes.
"""

cdef void *data = <void*><uintptr_t>(
get_buffer_data(buffer, check_writable=True)
)
cdef void *data = <void*>get_buffer_data(buffer, check_writable=True)
cdef ucp_tag_recv_callback_t _tag_recv_cb = (
<ucp_tag_recv_callback_t>_tag_recv_callback
)
Expand Down Expand Up @@ -904,8 +901,7 @@ def stream_send_nb(
name: str, optional
Descriptive name of the operation
"""
cdef void *data = <void*><uintptr_t>(get_buffer_data(buffer,
check_writable=False))
cdef void *data = <void*>get_buffer_data(buffer, check_writable=False)
cdef ucp_send_callback_t _send_cb = <ucp_send_callback_t>_send_callback
cdef ucs_status_ptr_t status = ucp_stream_send_nb(
ep._handle,
Expand Down Expand Up @@ -999,9 +995,7 @@ def stream_recv_nb(
Descriptive name of the operation
"""

cdef void *data = <void*><uintptr_t>(
get_buffer_data(buffer, check_writable=True)
)
cdef void *data = <void*>get_buffer_data(buffer, check_writable=True)
cdef size_t length
cdef ucp_stream_recv_callback_t _stream_recv_cb = (
<ucp_stream_recv_callback_t>_stream_recv_callback
Expand Down
11 changes: 11 additions & 0 deletions ucp/_libs/utils.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3


from libc.stdint cimport uintptr_t


cpdef uintptr_t get_buffer_data(buffer, bint check_writable=*) except *
cpdef Py_ssize_t get_buffer_nbytes(buffer, check_min_size, bint cuda_support) except *
102 changes: 52 additions & 50 deletions ucp/_libs/utils.pyx
Original file line number Diff line number Diff line change
@@ -1,38 +1,30 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3
import asyncio
import operator
from functools import reduce


from cpython.memoryview cimport PyMemoryView_GET_BUFFER
from cython cimport boundscheck, wraparound
from libc.stdint cimport uintptr_t

from ..exceptions import UCXCloseError, UCXError


def get_buffer_data(buffer, check_writable=False):
cpdef uintptr_t get_buffer_data(buffer, bint check_writable=False) except *:
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns data pointer of the buffer. Raising ValueError if the buffer
is read only and check_writable=True is set.
"""
iface = None
if hasattr(buffer, "__cuda_array_interface__"):
iface = buffer.__cuda_array_interface__
elif hasattr(buffer, "__array_interface__"):
iface = buffer.__array_interface__

cdef dict iface = getattr(buffer, "__cuda_array_interface__", None)

cdef uintptr_t data_ptr
cdef bint data_readonly
if iface is not None:
data_ptr, data_readonly = iface['data']
data_ptr, data_readonly = iface["data"]
else:
mview = memoryview(buffer)
data_ptr = int(<uintptr_t>PyMemoryView_GET_BUFFER(mview).buf)
data_readonly = mview.readonly

# Workaround for numba giving None, rather than an 0.
# https://github.com/cupy/cupy/issues/2104 for more info.
if data_ptr is None:
data_ptr = 0
jakirkham marked this conversation as resolved.
Show resolved Hide resolved
data_ptr = <uintptr_t>PyMemoryView_GET_BUFFER(mview).buf
data_readonly = <bint>PyMemoryView_GET_BUFFER(mview).readonly
kkraus14 marked this conversation as resolved.
Show resolved Hide resolved

if data_ptr == 0:
raise NotImplementedError("zero-sized buffers isn't supported")
Expand All @@ -43,51 +35,61 @@ def get_buffer_data(buffer, check_writable=False):
return data_ptr


def get_buffer_nbytes(buffer, check_min_size, cuda_support):
@boundscheck(False)
@wraparound(False)
cpdef Py_ssize_t get_buffer_nbytes(buffer, check_min_size, bint cuda_support) except *:
"""
Returns the size of the buffer in bytes. Returns ValueError
if `check_min_size` is greater than the size of the buffer
"""

iface = None
if hasattr(buffer, "__cuda_array_interface__"):
iface = buffer.__cuda_array_interface__
if not cuda_support:
msg = "UCX is not configured with CUDA support, please add " \
"`cuda_copy` and/or `cuda_ipc` to the UCX_TLS environment" \
"variable and that the ucx-proc=*=gpu package is " \
"installed. See " \
"https://ucx-py.readthedocs.io/en/latest/install.html for " \
"more information."
raise ValueError(msg)
elif hasattr(buffer, "__array_interface__"):
iface = buffer.__array_interface__
cdef dict iface = getattr(buffer, "__cuda_array_interface__", None)
if not cuda_support and iface is not None:
raise ValueError(
"UCX is not configured with CUDA support, please add "
"`cuda_copy` and/or `cuda_ipc` to the UCX_TLS environment"
"variable and that the ucx-proc=*=gpu package is "
"installed. See "
"https://ucx-py.readthedocs.io/en/latest/install.html for "
"more information."
)

cdef tuple shape, strides
cdef Py_ssize_t i, s, itemsize, ndim, nbytes
if iface is not None:
import numpy
itemsize = int(numpy.dtype(iface['typestr']).itemsize)
itemsize = numpy.dtype(iface["typestr"]).itemsize
# Making sure that the elements in shape is integers
shape = [int(s) for s in iface['shape']]
nbytes = reduce(operator.mul, shape, 1) * itemsize
shape = iface["shape"]
ndim = len(shape)
nbytes = itemsize
for i in range(ndim):
nbytes *= <Py_ssize_t>shape[i]
# Check that data is contiguous
if len(shape) > 0 and iface.get("strides", None) is not None:
strides = [int(s) for s in iface['strides']]
if len(strides) != len(shape):
msg = "The length of shape and strides must be equal"
raise ValueError(msg)
strides = iface.get("strides")
if strides is not None and ndim > 0:
if len(strides) != ndim:
raise ValueError(
"The length of shape and strides must be equal"
)
s = itemsize
for i in reversed(range(len(shape))):
if s != strides[i]:
for i from ndim > i >= 0 by 1:
if s != <Py_ssize_t>strides[i]:
raise ValueError("Array must be contiguous")
s *= shape[i]
if iface.get("mask", None) is not None:
s *= <Py_ssize_t>shape[i]
if iface.get("mask") is not None:
raise NotImplementedError("mask attribute not supported")
else:
mview = memoryview(buffer)
nbytes = mview.nbytes
if not mview.contiguous:
raise ValueError("buffer must be contiguous")
if not mview.c_contiguous:
raise ValueError("buffer must be C-contiguous")

if check_min_size is not None and nbytes < check_min_size:
raise ValueError("the nbytes is greater than the size of the buffer!")
cdef Py_ssize_t min_size
if check_min_size is not None:
min_size = check_min_size
if nbytes < min_size:
raise ValueError(
"the nbytes is greater than the size of the buffer!"
)
return nbytes