TileDB-Inc · kounelisagis · May 20, 2024 · May 17, 2024 · May 17, 2024
diff --git a/.github/workflows/daily-test-build-numpy.yml b/.github/workflows/daily-test-build-numpy.yml
@@ -21,10 +21,16 @@ jobs:
           # https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
           - python-version: "3.12"
             numpy-version: "1.26.4"
+          - python-version: "3.12"
+            numpy-version: "2.0.0rc2"
           - python-version: "3.11"
             numpy-version: "1.23.2"
+          - python-version: "3.11"
+            numpy-version: "2.0.0rc2"
           - python-version: "3.10"
             numpy-version: "1.21.6"
+          - python-version: "3.10"
+            numpy-version: "2.0.0rc2"
           - python-version: "3.9"
             numpy-version: "1.19.3"
           - python-version: "3.8"

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,6 +4,6 @@ repos:
     hooks:
     - id: black
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.284
+    rev: v0.4.4
     hooks:
     - id: ruff
diff --git a/pyproject.toml b/pyproject.toml
@@ -72,5 +72,8 @@ extend-select = ["I001"]
 extend-exclude = ["doc"]
 fix = true
 
+[tool.ruff.lint]
+select = ["NPY201"]
+
 [tool.ruff.per-file-ignores]
 "tiledb/__init__.py" = ["F401"]
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 import sys
 from ctypes import CDLL, POINTER, Structure, byref, c_char_p, c_int, c_void_p
 
-from pkg_resources import resource_filename
+import numpy as np
 from pybind11.setup_helpers import Pybind11Extension
 from setuptools import Extension, find_packages, setup
 
@@ -478,16 +478,8 @@ class build_ext(cython_build_ext):
             """
 
             def build_extensions(self):
-                """
-                Lazily append numpy's include directory to Extension includes.
-
-                This is done here rather than at module scope because setup.py
-                may be run before numpy has been installed, in which case
-                importing numpy and calling `numpy.get_include()` will fail.
-                """
-                numpy_incl = resource_filename("numpy", "core/include")
                 for ext in self.extensions:
-                    ext.include_dirs.append(numpy_incl)
+                    ext.include_dirs.append(np.get_include())
 
                 find_or_install_libtiledb(self)
 

diff --git a/tiledb/highlevel.py b/tiledb/highlevel.py
@@ -224,7 +224,7 @@ def is_ndarray_like(arr):
     elif shape and dtype:
         if np.issubdtype(np.bytes_, dtype):
             dtype = np.dtype("S")
-        elif np.issubdtype(dtype, np.unicode_):
+        elif np.issubdtype(dtype, np.str_):
             dtype = np.dtype("U")
 
         ndim = len(shape)

diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx
@@ -12,8 +12,7 @@ import io
 import warnings
 import collections.abc
 from collections import OrderedDict
-from json import dumps as json_dumps
-from json import loads as json_loads
+from json import dumps as json_dumps, loads as json_loads
 
 from ._generated_version import version_tuple as tiledbpy_version
 from .array_schema import ArraySchema
@@ -35,10 +34,7 @@ np.import_array()
 
 # Integer types supported by Python / System
 _inttypes = (int, np.integer)
-
-# Numpy initialization code (critical)
-# https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array
-np.import_array()
+np.set_printoptions(legacy='1.21') # use unified numpy printing
 
 
 cdef tiledb_ctx_t* safe_ctx_ptr(object ctx):
@@ -145,8 +141,7 @@ cdef _write_array(
         if attr.isvar:
             try:
                 if attr.isnullable:
-                    if(np.issubdtype(attr.dtype, np.unicode_) 
-                        or np.issubdtype(attr.dtype, np.string_) 
+                    if(np.issubdtype(attr.dtype, np.str_) 
                         or np.issubdtype(attr.dtype, np.bytes_)):
                         attr_val = np.array(["" if v is None else v for v in values[i]])
                     else:
@@ -601,7 +596,7 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
         dim = dom.dim(r)
         dim_dtype = dim.dtype
 
-        if array.mode == 'r' and (np.issubdtype(dim_dtype, np.unicode_) or np.issubdtype(dim_dtype, np.bytes_)):
+        if array.mode == 'r' and (np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)):
             # NED can only be retrieved in read mode
             ned = array.nonempty_domain()
             (dim_lb, dim_ub) = ned[r] if ned else (None, None)
@@ -612,7 +607,11 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
         if not isinstance(dim_slice, slice):
             raise IndexError("invalid index type: {!r}".format(type(dim_slice)))
 
+        # numpy2 doesn't allow addition beween int and np.int64 - NEP 50
         start, stop, step = dim_slice.start, dim_slice.stop, dim_slice.step
+        start = np.int64(start) if isinstance(start, int) else start
+        stop = np.int64(stop) if isinstance(stop, int) else stop
+        step = np.int64(step) if isinstance(step, int) else step
 
         if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_):
             if start is None or stop is None:
@@ -1489,7 +1488,7 @@ cdef class Array(object):
 
     cdef _ndarray_is_varlen(self, np.ndarray array):
         return  (np.issubdtype(array.dtype, np.bytes_) or
-                 np.issubdtype(array.dtype, np.unicode_) or
+                 np.issubdtype(array.dtype, np.str_) or
                  array.dtype == object)
 
     @property
@@ -2503,8 +2502,8 @@ cdef class DenseArrayImpl(Array):
                                 dtype=np.uint8
                             )
                     else:
-                        if (np.issubdtype(attr.dtype, np.string_) and not
-                            (np.issubdtype(attr_val.dtype, np.string_) or attr_val.dtype == np.dtype('O'))):
+                        if (np.issubdtype(attr.dtype, np.bytes_) and not
+                            (np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))):
                             raise ValueError("Cannot write a string value to non-string "
                                             "typed attribute '{}'!".format(name))
 
@@ -2518,7 +2517,7 @@ cdef class DenseArrayImpl(Array):
                                     dtype=np.uint8
                                 )
 
-                            if np.issubdtype(attr.dtype, np.string_):
+                            if np.issubdtype(attr.dtype, np.bytes_):
                                 attr_val = np.array(
                                     ["" if v is None else v for v in attr_val])
                             else:
@@ -2552,8 +2551,8 @@ cdef class DenseArrayImpl(Array):
                     if attr.isnullable and name not in nullmaps:
                         nullmaps[name] = np.array([int(v is None) for v in val], dtype=np.uint8)
                 else:
-                    if (np.issubdtype(attr.dtype, np.string_) and not
-                        (np.issubdtype(val.dtype, np.string_) or val.dtype == np.dtype('O'))):
+                    if (np.issubdtype(attr.dtype, np.bytes_) and not
+                        (np.issubdtype(val.dtype, np.bytes_) or val.dtype == np.dtype('O'))):
                         raise ValueError("Cannot write a string value to non-string "
                                         "typed attribute '{}'!".format(name))
 
@@ -3040,8 +3039,8 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
                     nullmaps[name] = np.array(
                         [int(v is not None) for v in attr_val], dtype=np.uint8)
             else:
-                if (np.issubdtype(attr.dtype, np.string_) 
-                    and not (np.issubdtype(attr_val.dtype, np.string_) 
+                if (np.issubdtype(attr.dtype, np.bytes_) 
+                    and not (np.issubdtype(attr_val.dtype, np.bytes_) 
                     or attr_val.dtype == np.dtype('O'))):
                     raise ValueError("Cannot write a string value to non-string "
                                         "typed attribute '{}'!".format(name))
@@ -3053,7 +3052,7 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
                         nullmaps[name] = np.array(
                             [int(v is not None) for v in attr_val], dtype=np.uint8)
 
-                    if np.issubdtype(attr.dtype, np.string_):
+                    if np.issubdtype(attr.dtype, np.bytes_):
                         attr_val = np.array(["" if v is None else v for v in attr_val])
                     else:
                         attr_val = np.nan_to_num(attr_val)

diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
@@ -422,7 +422,7 @@ def __init__(
         # Until list attributes are supported in core, error with a clear message.
         if use_arrow and any(
             (attr.isvar or len(attr.dtype) > 1)
-            and attr.dtype not in (np.unicode_, np.bytes_)
+            and attr.dtype not in (np.str_, np.bytes_)
             for attr in map(array.attr, query.attrs or ())
         ):
             raise TileDBError(

diff --git a/tiledb/tests/test_attribute.py b/tiledb/tests/test_attribute.py
@@ -16,7 +16,7 @@ def test_minimal_attribute(self):
         self.assertEqual(attr, attr)
         self.assertTrue(attr.isanon)
         self.assertEqual(attr.name, "")
-        self.assertEqual(attr.dtype, np.float_)
+        self.assertEqual(attr.dtype, np.float64)
         self.assertFalse(attr.isvar)
         self.assertFalse(attr.isnullable)
 

diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py
@@ -1230,10 +1230,10 @@ def test_reopen_dense_array(self, use_timestamps):
 
     def test_data_begins_with_null_chars(self):
         path = self.path("test_data_begins_with_null_chars")
-        data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.unicode_)
+        data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.str_)
 
         dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data)))
-        att = tiledb.Attr(dtype=np.unicode_, var=True)
+        att = tiledb.Attr(dtype=np.str_, var=True)
         schema = tiledb.ArraySchema(dom, (att,))
         tiledb.Array.create(path, schema)
 
@@ -1325,12 +1325,12 @@ def test_varlen_write_unicode(self):
                 "",
                 "hhhhhhhhhh",
             ],
-            dtype=np.unicode_,
+            dtype=np.str_,
         )
 
         # basic write
         dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A)))
-        att = tiledb.Attr(dtype=np.unicode_, var=True)
+        att = tiledb.Attr(dtype=np.str_, var=True)
 
         schema = tiledb.ArraySchema(dom, (att,))
 
@@ -1487,7 +1487,7 @@ def test_varlen_write_fixedunicode(self):
 
         # basic write
         dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A)))
-        att = tiledb.Attr(dtype=np.unicode_)
+        att = tiledb.Attr(dtype=np.str_)
 
         schema = tiledb.ArraySchema(dom, (att,))
 
@@ -1991,7 +1991,7 @@ def test_sparse_bytes(self, fx_sparse_cell_order):
 
     def test_sparse_unicode(self, fx_sparse_cell_order):
         dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int))
-        att = tiledb.Attr("", var=True, dtype=np.unicode_)
+        att = tiledb.Attr("", var=True, dtype=np.str_)
         schema = tiledb.ArraySchema(
             domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order
         )
@@ -3514,11 +3514,11 @@ def test_incomplete_dense_varlen(self, non_overlapping_ranges):
         ncells = 10
         path = self.path("incomplete_dense_varlen")
         str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)]
-        data = np.array(str_data, dtype=np.unicode_)
+        data = np.array(str_data, dtype=np.str_)
 
         # basic write
         dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data)))
-        att = tiledb.Attr(dtype=np.unicode_, var=True)
+        att = tiledb.Attr(dtype=np.str_, var=True)
 
         schema = tiledb.ArraySchema(dom, (att,))
 
@@ -3556,12 +3556,12 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range
 
         path = self.path("incomplete_sparse_varlen")
         str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)]
-        data = np.array(str_data, dtype=np.unicode_)
+        data = np.array(str_data, dtype=np.str_)
         coords = np.arange(ncells)
 
         # basic write
         dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data)))
-        att = tiledb.Attr(dtype=np.unicode_, var=True)
+        att = tiledb.Attr(dtype=np.str_, var=True)
 
         schema = tiledb.ArraySchema(
             dom, (att,), sparse=True, allows_duplicates=allows_duplicates

diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py
@@ -1324,8 +1324,10 @@ def test_incomplete_df(self, allows_duplicates, non_overlapping_ranges):
         data[validity_idx] = None
 
         # TODO - not supported
-        # str_data = np.array([rand_utf8(random.randint(0, n)) for n in range(ncells)],
-        #                dtype=np.unicode_)
+        # str_data = np.array(
+        #     [rand_utf8(random.randint(0, n)) for n in range(ncells)],
+        #     dtype=np.str_,
+        # )
         # str_data[validity_idx] = None
 
         df = pd.DataFrame({"int64": pd.Series(data, dtype=pd.Int64Dtype())})