Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for numpy2 #1969

Merged
merged 2 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/daily-test-build-numpy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@ jobs:
# https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
- python-version: "3.12"
numpy-version: "1.26.4"
- python-version: "3.12"
numpy-version: "2.0.0rc2"
- python-version: "3.11"
numpy-version: "1.23.2"
- python-version: "3.11"
numpy-version: "2.0.0rc2"
- python-version: "3.10"
numpy-version: "1.21.6"
- python-version: "3.10"
numpy-version: "2.0.0rc2"
- python-version: "3.9"
numpy-version: "1.19.3"
- python-version: "3.8"
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ repos:
hooks:
- id: black
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.284
rev: v0.4.4
hooks:
- id: ruff
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,8 @@ extend-select = ["I001"]
extend-exclude = ["doc"]
fix = true

[tool.ruff.lint]
select = ["NPY201"]

[tool.ruff.per-file-ignores]
"tiledb/__init__.py" = ["F401"]
12 changes: 2 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
from ctypes import CDLL, POINTER, Structure, byref, c_char_p, c_int, c_void_p

from pkg_resources import resource_filename
import numpy as np
from pybind11.setup_helpers import Pybind11Extension
from setuptools import Extension, find_packages, setup

Expand Down Expand Up @@ -478,16 +478,8 @@ class build_ext(cython_build_ext):
"""

def build_extensions(self):
"""
Lazily append numpy's include directory to Extension includes.

This is done here rather than at module scope because setup.py
may be run before numpy has been installed, in which case
importing numpy and calling `numpy.get_include()` will fail.
"""
numpy_incl = resource_filename("numpy", "core/include")
for ext in self.extensions:
ext.include_dirs.append(numpy_incl)
ext.include_dirs.append(np.get_include())

find_or_install_libtiledb(self)

Expand Down
2 changes: 1 addition & 1 deletion tiledb/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def is_ndarray_like(arr):
elif shape and dtype:
if np.issubdtype(np.bytes_, dtype):
dtype = np.dtype("S")
elif np.issubdtype(dtype, np.unicode_):
elif np.issubdtype(dtype, np.str_):
dtype = np.dtype("U")

ndim = len(shape)
Expand Down
35 changes: 17 additions & 18 deletions tiledb/libtiledb.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ import io
import warnings
import collections.abc
from collections import OrderedDict
from json import dumps as json_dumps
from json import loads as json_loads
from json import dumps as json_dumps, loads as json_loads

from ._generated_version import version_tuple as tiledbpy_version
from .array_schema import ArraySchema
Expand All @@ -35,10 +34,7 @@ np.import_array()

# Integer types supported by Python / System
_inttypes = (int, np.integer)

# Numpy initialization code (critical)
# https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array
np.import_array()
np.set_printoptions(legacy='1.21') # use unified numpy printing


cdef tiledb_ctx_t* safe_ctx_ptr(object ctx):
Expand Down Expand Up @@ -145,8 +141,7 @@ cdef _write_array(
if attr.isvar:
try:
if attr.isnullable:
if(np.issubdtype(attr.dtype, np.unicode_)
or np.issubdtype(attr.dtype, np.string_)
if(np.issubdtype(attr.dtype, np.str_)
or np.issubdtype(attr.dtype, np.bytes_)):
attr_val = np.array(["" if v is None else v for v in values[i]])
else:
Expand Down Expand Up @@ -601,7 +596,7 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
dim = dom.dim(r)
dim_dtype = dim.dtype

if array.mode == 'r' and (np.issubdtype(dim_dtype, np.unicode_) or np.issubdtype(dim_dtype, np.bytes_)):
if array.mode == 'r' and (np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)):
# NED can only be retrieved in read mode
ned = array.nonempty_domain()
(dim_lb, dim_ub) = ned[r] if ned else (None, None)
Expand All @@ -612,7 +607,11 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
if not isinstance(dim_slice, slice):
raise IndexError("invalid index type: {!r}".format(type(dim_slice)))

# numpy2 doesn't allow addition beween int and np.int64 - NEP 50
start, stop, step = dim_slice.start, dim_slice.stop, dim_slice.step
start = np.int64(start) if isinstance(start, int) else start
stop = np.int64(stop) if isinstance(stop, int) else stop
step = np.int64(step) if isinstance(step, int) else step

if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_):
if start is None or stop is None:
Expand Down Expand Up @@ -1489,7 +1488,7 @@ cdef class Array(object):

cdef _ndarray_is_varlen(self, np.ndarray array):
return (np.issubdtype(array.dtype, np.bytes_) or
np.issubdtype(array.dtype, np.unicode_) or
np.issubdtype(array.dtype, np.str_) or
array.dtype == object)

@property
Expand Down Expand Up @@ -2503,8 +2502,8 @@ cdef class DenseArrayImpl(Array):
dtype=np.uint8
)
else:
if (np.issubdtype(attr.dtype, np.string_) and not
(np.issubdtype(attr_val.dtype, np.string_) or attr_val.dtype == np.dtype('O'))):
if (np.issubdtype(attr.dtype, np.bytes_) and not
(np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))):
raise ValueError("Cannot write a string value to non-string "
"typed attribute '{}'!".format(name))

Expand All @@ -2518,7 +2517,7 @@ cdef class DenseArrayImpl(Array):
dtype=np.uint8
)

if np.issubdtype(attr.dtype, np.string_):
if np.issubdtype(attr.dtype, np.bytes_):
attr_val = np.array(
["" if v is None else v for v in attr_val])
else:
Expand Down Expand Up @@ -2552,8 +2551,8 @@ cdef class DenseArrayImpl(Array):
if attr.isnullable and name not in nullmaps:
nullmaps[name] = np.array([int(v is None) for v in val], dtype=np.uint8)
else:
if (np.issubdtype(attr.dtype, np.string_) and not
(np.issubdtype(val.dtype, np.string_) or val.dtype == np.dtype('O'))):
if (np.issubdtype(attr.dtype, np.bytes_) and not
(np.issubdtype(val.dtype, np.bytes_) or val.dtype == np.dtype('O'))):
raise ValueError("Cannot write a string value to non-string "
"typed attribute '{}'!".format(name))

Expand Down Expand Up @@ -3040,8 +3039,8 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
nullmaps[name] = np.array(
[int(v is not None) for v in attr_val], dtype=np.uint8)
else:
if (np.issubdtype(attr.dtype, np.string_)
and not (np.issubdtype(attr_val.dtype, np.string_)
if (np.issubdtype(attr.dtype, np.bytes_)
and not (np.issubdtype(attr_val.dtype, np.bytes_)
or attr_val.dtype == np.dtype('O'))):
raise ValueError("Cannot write a string value to non-string "
"typed attribute '{}'!".format(name))
Expand All @@ -3053,7 +3052,7 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
nullmaps[name] = np.array(
[int(v is not None) for v in attr_val], dtype=np.uint8)

if np.issubdtype(attr.dtype, np.string_):
if np.issubdtype(attr.dtype, np.bytes_):
attr_val = np.array(["" if v is None else v for v in attr_val])
else:
attr_val = np.nan_to_num(attr_val)
Expand Down
2 changes: 1 addition & 1 deletion tiledb/multirange_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def __init__(
# Until list attributes are supported in core, error with a clear message.
if use_arrow and any(
(attr.isvar or len(attr.dtype) > 1)
and attr.dtype not in (np.unicode_, np.bytes_)
and attr.dtype not in (np.str_, np.bytes_)
for attr in map(array.attr, query.attrs or ())
):
raise TileDBError(
Expand Down
2 changes: 1 addition & 1 deletion tiledb/tests/test_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_minimal_attribute(self):
self.assertEqual(attr, attr)
self.assertTrue(attr.isanon)
self.assertEqual(attr.name, "")
self.assertEqual(attr.dtype, np.float_)
self.assertEqual(attr.dtype, np.float64)
self.assertFalse(attr.isvar)
self.assertFalse(attr.isnullable)

Expand Down
20 changes: 10 additions & 10 deletions tiledb/tests/test_libtiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,10 +1230,10 @@ def test_reopen_dense_array(self, use_timestamps):

def test_data_begins_with_null_chars(self):
path = self.path("test_data_begins_with_null_chars")
data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.unicode_)
data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.str_)

dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data)))
att = tiledb.Attr(dtype=np.unicode_, var=True)
att = tiledb.Attr(dtype=np.str_, var=True)
schema = tiledb.ArraySchema(dom, (att,))
tiledb.Array.create(path, schema)

Expand Down Expand Up @@ -1325,12 +1325,12 @@ def test_varlen_write_unicode(self):
"",
"hhhhhhhhhh",
],
dtype=np.unicode_,
dtype=np.str_,
)

# basic write
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A)))
att = tiledb.Attr(dtype=np.unicode_, var=True)
att = tiledb.Attr(dtype=np.str_, var=True)

schema = tiledb.ArraySchema(dom, (att,))

Expand Down Expand Up @@ -1487,7 +1487,7 @@ def test_varlen_write_fixedunicode(self):

# basic write
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A)))
att = tiledb.Attr(dtype=np.unicode_)
att = tiledb.Attr(dtype=np.str_)

schema = tiledb.ArraySchema(dom, (att,))

Expand Down Expand Up @@ -1991,7 +1991,7 @@ def test_sparse_bytes(self, fx_sparse_cell_order):

def test_sparse_unicode(self, fx_sparse_cell_order):
dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int))
att = tiledb.Attr("", var=True, dtype=np.unicode_)
att = tiledb.Attr("", var=True, dtype=np.str_)
schema = tiledb.ArraySchema(
domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order
)
Expand Down Expand Up @@ -3514,11 +3514,11 @@ def test_incomplete_dense_varlen(self, non_overlapping_ranges):
ncells = 10
path = self.path("incomplete_dense_varlen")
str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)]
data = np.array(str_data, dtype=np.unicode_)
data = np.array(str_data, dtype=np.str_)

# basic write
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data)))
att = tiledb.Attr(dtype=np.unicode_, var=True)
att = tiledb.Attr(dtype=np.str_, var=True)

schema = tiledb.ArraySchema(dom, (att,))

Expand Down Expand Up @@ -3556,12 +3556,12 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range

path = self.path("incomplete_sparse_varlen")
str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)]
data = np.array(str_data, dtype=np.unicode_)
data = np.array(str_data, dtype=np.str_)
coords = np.arange(ncells)

# basic write
dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data)))
att = tiledb.Attr(dtype=np.unicode_, var=True)
att = tiledb.Attr(dtype=np.str_, var=True)

schema = tiledb.ArraySchema(
dom, (att,), sparse=True, allows_duplicates=allows_duplicates
Expand Down
6 changes: 4 additions & 2 deletions tiledb/tests/test_pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,8 +1324,10 @@ def test_incomplete_df(self, allows_duplicates, non_overlapping_ranges):
data[validity_idx] = None

# TODO - not supported
# str_data = np.array([rand_utf8(random.randint(0, n)) for n in range(ncells)],
# dtype=np.unicode_)
# str_data = np.array(
# [rand_utf8(random.randint(0, n)) for n in range(ncells)],
# dtype=np.str_,
# )
# str_data[validity_idx] = None

df = pd.DataFrame({"int64": pd.Series(data, dtype=pd.Int64Dtype())})
Expand Down
Loading