From 9430e9abcc3cd8d31e62b786b75242e4f2899822 Mon Sep 17 00:00:00 2001 From: Isaiah Norton Date: Fri, 17 May 2024 08:17:25 -0400 Subject: [PATCH 1/8] Fix syntax error in nightly build workflow (#1970) * Fix syntax error in nightly build workflow * Update test for https://github.com/TileDB-Inc/TileDB/pull/4973 --- .github/workflows/daily-test-build-numpy.yml | 2 +- tiledb/tests/cc/test_cc.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/daily-test-build-numpy.yml b/.github/workflows/daily-test-build-numpy.yml index 3fbfcc4cb7..58bc0402eb 100644 --- a/.github/workflows/daily-test-build-numpy.yml +++ b/.github/workflows/daily-test-build-numpy.yml @@ -34,7 +34,7 @@ jobs: TILEDB_VERSION: ${{ github.event.inputs.libtiledb_version }} # 11.7 necessary due to: https://github.com/actions/setup-python/issues/682#issuecomment-1604261330 #MACOSX_DEPLOYMENT_TARGET: "10.15" - MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os == 'macos-12' && matrix.python-version == '3.8' ? '11.7' : '11' }} + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os == 'macos-12' && matrix.python-version == '3.8' && '11.7' || '11' }} VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite' steps: - name: Checkout TileDB-Py `dev` diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py index 2aed7e104b..0f441907e0 100644 --- a/tiledb/tests/cc/test_cc.py +++ b/tiledb/tests/cc/test_cc.py @@ -275,8 +275,11 @@ def test_schema(): with pytest.raises(lt.TileDBError): schema._tile_order = lt.LayoutType.HILBERT - schema._tile_order = lt.LayoutType.UNORDERED - assert schema._tile_order == lt.LayoutType.UNORDERED + if tiledb.libtiledb.version() >= (2, 24, 0): + with pytest.raises(lt.TileDBError): + schema._tile_order = lt.LayoutType.UNORDERED + schema._tile_order = lt.LayoutType.ROW_MAJOR + assert schema._tile_order == lt.LayoutType.ROW_MAJOR # TODO schema._set_coords_filter_list(...) # TODO assert schema._coords_filter_list() == lt.FilterListType.NONE From 5b1f60ec8ea4957e51add6120d6b59522519a67e Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Sat, 18 May 2024 06:25:09 +0300 Subject: [PATCH 2/8] Add Array.query in docs and improve docs in general (#1965) - Added `Array.query` in docs, both `SparseArray` and `DenseArray`. - Fixed some minor errors/warnings while building docs like importing modules from `tiledb.libtiledb` instead of `tiledb`. - Setting default language for docs - it was causing warning. - Fixed some typos, like double 'a' - Addressed the issue with new `__repr__` of `OrderedDict` in Python 3.12 using [np.testing.assert_equal](https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_equal.html). --- doc/source/conf.py | 2 +- doc/source/python-api.rst | 21 ++++++----- tiledb/__init__.py | 17 ++++++--- tiledb/array_schema.py | 2 +- tiledb/ctx.py | 2 +- tiledb/filter.py | 6 ++-- tiledb/libtiledb.pyx | 76 +++++++++++++++++++++++---------------- tiledb/vfs.py | 11 +++--- 8 files changed, 78 insertions(+), 59 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index dc89f4d73e..a13dc2032b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -75,7 +75,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/doc/source/python-api.rst b/doc/source/python-api.rst index c2dc44e379..994e368f9c 100644 --- a/doc/source/python-api.rst +++ b/doc/source/python-api.rst @@ -68,7 +68,7 @@ Filters .. automethod:: __getitem__(idx) .. automethod:: __len__ -.. autoclass:: tiledb.libtiledb.CompressionFilter +.. autoclass:: tiledb.CompressionFilter :members: .. autoclass:: tiledb.GzipFilter :members: @@ -116,22 +116,21 @@ Dense Array ----------- .. autoclass:: tiledb.DenseArray - :members: - - .. automethod:: __getitem__(selection) - .. automethod:: __setitem__(selection, value) - .. automethod:: query - .. automethod:: from_numpy(uri, array, ctx=None, **kwargs) + :members: query + :special-members: __getitem__, __setitem__ Sparse Array ------------ .. autoclass:: tiledb.SparseArray - :members: + :members: query + :special-members: __getitem__, __setitem__ - .. automethod:: __getitem__(selection) - .. automethod:: __setitem__(selection, value) - .. automethod:: query +Query +--------------- + +.. autoclass:: tiledb.libtiledb.Query + :members: Query Condition --------------- diff --git a/tiledb/__init__.py b/tiledb/__init__.py index ec61839fdb..d3ae82b81c 100644 --- a/tiledb/__init__.py +++ b/tiledb/__init__.py @@ -55,6 +55,7 @@ Bzip2Filter, ChecksumMD5Filter, ChecksumSHA256Filter, + CompressionFilter, DeltaFilter, DictionaryFilter, DoubleDeltaFilter, @@ -89,6 +90,8 @@ ) from .libtiledb import ( Array, + DenseArrayImpl, + SparseArrayImpl, consolidate, ls, move, @@ -101,8 +104,6 @@ vacuum, walk, ) -from .libtiledb import DenseArrayImpl as DenseArray -from .libtiledb import SparseArrayImpl as SparseArray from .multirange_indexing import EmptyRange from .object import Object from .parquet_ import from_parquet @@ -132,13 +133,19 @@ try: from tiledb.cloud.cloudarray import CloudArray except ImportError: - pass + + class DenseArray(DenseArrayImpl): + pass + + class SparseArray(SparseArrayImpl): + pass + else: - class DenseArray(DenseArray, CloudArray): + class DenseArray(DenseArrayImpl, CloudArray): pass - class SparseArray(SparseArray, CloudArray): + class SparseArray(SparseArrayImpl, CloudArray): pass del CloudArray diff --git a/tiledb/array_schema.py b/tiledb/array_schema.py index 260a0f337f..4e399c81b6 100644 --- a/tiledb/array_schema.py +++ b/tiledb/array_schema.py @@ -322,7 +322,7 @@ def version(self) -> int: """The array's schema (storage) version. :rtype: int - :raises :py:exc:`tiledb.TileDBError` + :raises: :py:exc:`tiledb.TileDBError` """ return self._version diff --git a/tiledb/ctx.py b/tiledb/ctx.py index 441f054e2a..05d2b85b5e 100644 --- a/tiledb/ctx.py +++ b/tiledb/ctx.py @@ -209,7 +209,7 @@ def dict(self, prefix: str = ""): :param str prefix: return only parameters with a given prefix :rtype: dict - :return: Config parameter / values as a a Python dict + :return: Config parameter / values as a Python dict """ return dict(ConfigItems(self, prefix=prefix)) diff --git a/tiledb/filter.py b/tiledb/filter.py index 871a8cf0a5..cf3351076f 100644 --- a/tiledb/filter.py +++ b/tiledb/filter.py @@ -326,8 +326,7 @@ class DoubleDeltaFilter(CompressionFilter): :param level: -1 (default) sets the compressor level to the default level as specified in TileDB core. Otherwise, sets the compressor level to the given value. :type level: int - :param reinterp_dtype: (optional) sets the compressor to compress the data treating - as the new datatype. + :param reinterp_dtype: (optional) sets the compressor to compress the data treating as the new datatype. **Example:** @@ -501,7 +500,8 @@ class PositiveDeltaFilter(Filter): :param ctx: A TileDB Context :type ctx: tiledb.Ctx :param window: -1 (default) sets the max window size for the filter to the default window size as specified in TileDB core. Otherwise, sets the compressor level to the given value. - :type window: int + :type window: int + **Example:** >>> import tiledb, numpy as np, tempfile diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 29a33de467..380d1e1c03 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -1525,8 +1525,8 @@ cdef class Array(object): ** Example ** - >>> import tiledb, numpy as np - >>> + >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> dim1 = tiledb.Dim("d1", domain=(1, 4)) >>> dim2 = tiledb.Dim("d2", domain=(1, 3)) >>> dom = tiledb.Domain(dim1, dim2) @@ -1551,21 +1551,30 @@ cdef class Array(object): ... A[:] = {"a1": a1_data, "l1": l1_data, "l2": l2_data, "l3": l3_data} ... ... with tiledb.open(tmp, "r") as A: - ... A.label_index(["l1"])[3:4] # doctest: +ELLIPSIS - ... A.label_index(["l1", "l3"])[2, 0.5:1.0] # doctest: +ELLIPSIS - ... A.label_index(["l2"])[:, -1:0] # doctest: +ELLIPSIS - ... A.label_index(["l3"])[:, 0.5:1.0] # doctest: +ELLIPSIS - OrderedDict(...'l1'... array([4, 3])..., ...'a1'... array([[1, 2, 3], - [4, 5, 6]])...) - OrderedDict(...'l3'... array([0.5, 1. ])..., ...'l1'... array([2])..., ...'a1'... array([[8, 9]])...) - OrderedDict(...'l2'... array([-1, 0])..., ...'a1'... array([[ 1, 2], - [ 4, 5], - [ 7, 8], - [10, 11]])...) - OrderedDict(...'l3'... array([0.5, 1. ])..., ...'a1'... array([[ 2, 3], - [ 5, 6], - [ 8, 9], - [11, 12]])...) + ... np.testing.assert_equal( + ... A.label_index(["l1"])[3:4], + ... OrderedDict({"l1": [4, 3], "a1": [[1, 2, 3], [4, 5, 6]]}), + ... ) + ... np.testing.assert_equal( + ... A.label_index(["l1", "l3"])[2, 0.5:1.0], + ... OrderedDict( + ... {"l3": [0.5, 1.0], "l1": [2], "a1": [[8, 9]]} + ... ), + ... ) + ... np.testing.assert_equal( + ... A.label_index(["l2"])[:, -1:0], + ... OrderedDict( + ... {"l2": [-1, 0], + ... "a1": [[1, 2], [4, 5], [7, 8], [10, 11]]}, + ... ), + ... ) + ... np.testing.assert_equal( + ... A.label_index(["l3"])[:, 0.5:1.0], + ... OrderedDict( + ... {"l3": [0.5, 1.], + ... "a1": [[2, 3], [5, 6], [8, 9], [11, 12]]}, + ... ), + ... ) :param labels: List of labels to use when querying. Can only use at most one label per dimension. @@ -1574,6 +1583,7 @@ cdef class Array(object): query the array on the corresponding dimension. :returns: dict of {'label/attribute': result}. :raises: :py:exc:`tiledb.TileDBError` + """ # Delayed to avoid circular import from .multirange_indexing import LabelIndexer @@ -2158,8 +2168,7 @@ cdef class DenseArrayImpl(Array): def query(self, attrs=None, attr_cond=None, cond=None, dims=None, coords=False, order='C', use_arrow=None, return_arrow=False, return_incomplete=False): - """ - Construct a proxy Query object for easy subarray queries of cells + """Construct a proxy Query object for easy subarray queries of cells for an item or region of the array across one or more attributes. Optionally subselect over attributes, return dense result coordinate values, @@ -2202,8 +2211,8 @@ cdef class DenseArrayImpl(Array): ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # Access specific attributes individually. - ... A.query(attrs=("a1",))[0:5] # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([0, 0, 0, 0, 0])...) + ... np.testing.assert_equal(A.query(attrs=("a1",))[0:5], + ... {"a1": np.zeros(5)}) """ if not self.isopen or self.mode != 'r': @@ -2257,8 +2266,8 @@ cdef class DenseArrayImpl(Array): ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # A[0:5], attribute a1, row-major without coordinates - ... A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C') # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([0, 0, 0, 0, 0])...) + ... np.testing.assert_equal(A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C'), + ... OrderedDict({'a1': np.zeros(5)})) """ from .subarray import Subarray @@ -3178,6 +3187,7 @@ cdef class SparseArrayImpl(Array): **Example:** >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( @@ -3195,10 +3205,12 @@ cdef class SparseArrayImpl(Array): ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... # Return an OrderedDict with values and coordinates - ... A[0:3, 0:10] # doctest: +ELLIPSIS + ... np.testing.assert_equal(A[0:3, 0:10], OrderedDict({'a1': np.array([1, 2]), + ... 'a2': np.array([3, 4]), 'y': np.array([0, 2], dtype=np.uint64), + ... 'x': np.array([0, 3], dtype=np.uint64)})) ... # Return just the "x" coordinates values - ... A[0:3, 0:10]["x"] # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([1, 2])..., ...'a2'... array([3, 4])..., ...'y'... array([0, 2], dtype=uint64)..., ...'x'... array([0, 3], dtype=uint64)...) + ... A[0:3, 0:10]["x"] + array([0, 3], dtype=uint64) With a floating-point array domain, index bounds are inclusive, e.g.: @@ -3255,6 +3267,7 @@ cdef class SparseArrayImpl(Array): **Example:** >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( @@ -3271,8 +3284,8 @@ cdef class SparseArrayImpl(Array): ... A[I, J] = {"a1": np.array([1, 2]), ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: - ... A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10] # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([1, 2])...) + ... np.testing.assert_equal(A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10], + ... OrderedDict({'a1': np.array([1, 2])})) """ if not self.isopen or self.mode not in ('r', 'd'): @@ -3364,6 +3377,7 @@ cdef class SparseArrayImpl(Array): **Example:** >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( @@ -3381,8 +3395,10 @@ cdef class SparseArrayImpl(Array): ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... # A[0:3, 0:10], attribute a1, row-major without coordinates - ... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G') # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([1, 2])...) + ... np.testing.assert_equal( + ... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G'), + ... OrderedDict({'a1': np.array([1, 2])}) + ... ) """ from .subarray import Subarray diff --git a/tiledb/vfs.py b/tiledb/vfs.py index 456232c782..105fc842cf 100644 --- a/tiledb/vfs.py +++ b/tiledb/vfs.py @@ -95,7 +95,7 @@ def write(self, file: lt.FileHandle, buff: Union[str, bytes]): """ if isinstance(file, FileIO): raise lt.TileDBError( - "`tiledb.VFS().open` now returns a a FileIO object. Use " + "`tiledb.VFS().open` now returns a FileIO object. Use " "`FileIO.write`. This message will be removed in 0.21.0.", ) if isinstance(buff, str): @@ -115,7 +115,7 @@ def read(self, file: lt.FileHandle, offset: int, nbytes: int) -> bytes: """ if isinstance(file, FileIO): raise lt.TileDBError( - "`tiledb.VFS().open` now returns a a FileIO object. Use " + "`tiledb.VFS().open` now returns a FileIO object. Use " "`FileIO.seek` and `FileIO.read`. This message will be removed " "in 0.21.0." ) @@ -436,9 +436,7 @@ def flush(self): def seek(self, offset: int, whence: int = 0): """ :param int offset: Byte position to set the file pointer - :param int whence: Reference point. A whence value of 0 measures from the - beginning of the file, 1 uses the current file position, and 2 uses the - end of the file as the reference point. whence can be omitted and defaults to 0. + :param int whence: Reference point. A whence value of 0 measures from the beginning of the file, 1 uses the current file position, and 2 uses the end of the file as the reference point. whence can be omitted and defaults to 0. """ if not np.issubdtype(type(offset), np.integer): raise TypeError( @@ -475,8 +473,7 @@ def read(self, size: int = -1) -> bytes: """ Read the file from the current pointer position. - :param int size: Number of bytes to read. By default, size is set to -1 - which will read until the end of the file. + :param int size: Number of bytes to read. By default, size is set to -1 which will read until the end of the file. :rtype: bytes :return: The bytes in the file From caa99067acd5dbfc26b3336b52e78907bd6ae38b Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Mon, 20 May 2024 12:10:10 +0300 Subject: [PATCH 3/8] Add support for numpy2 (#1969) --- .github/workflows/daily-test-build-numpy.yml | 6 ++++ .pre-commit-config.yaml | 2 +- pyproject.toml | 3 ++ setup.py | 12 ++----- tiledb/highlevel.py | 2 +- tiledb/libtiledb.pyx | 35 ++++++++++---------- tiledb/multirange_indexing.py | 2 +- tiledb/tests/test_attribute.py | 2 +- tiledb/tests/test_libtiledb.py | 20 +++++------ tiledb/tests/test_pandas_dataframe.py | 6 ++-- 10 files changed, 46 insertions(+), 44 deletions(-) diff --git a/.github/workflows/daily-test-build-numpy.yml b/.github/workflows/daily-test-build-numpy.yml index 58bc0402eb..37b01afbbe 100644 --- a/.github/workflows/daily-test-build-numpy.yml +++ b/.github/workflows/daily-test-build-numpy.yml @@ -21,10 +21,16 @@ jobs: # https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg - python-version: "3.12" numpy-version: "1.26.4" + - python-version: "3.12" + numpy-version: "2.0.0rc2" - python-version: "3.11" numpy-version: "1.23.2" + - python-version: "3.11" + numpy-version: "2.0.0rc2" - python-version: "3.10" numpy-version: "1.21.6" + - python-version: "3.10" + numpy-version: "2.0.0rc2" - python-version: "3.9" numpy-version: "1.19.3" - python-version: "3.8" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5239d37dc3..f3fd8e17ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,6 @@ repos: hooks: - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.284 + rev: v0.4.4 hooks: - id: ruff diff --git a/pyproject.toml b/pyproject.toml index e3ec53a8f2..da864ddd41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,5 +72,8 @@ extend-select = ["I001"] extend-exclude = ["doc"] fix = true +[tool.ruff.lint] +select = ["NPY201"] + [tool.ruff.per-file-ignores] "tiledb/__init__.py" = ["F401"] diff --git a/setup.py b/setup.py index 99a119e4c5..2b0f6c5858 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ import sys from ctypes import CDLL, POINTER, Structure, byref, c_char_p, c_int, c_void_p -from pkg_resources import resource_filename +import numpy as np from pybind11.setup_helpers import Pybind11Extension from setuptools import Extension, find_packages, setup @@ -478,16 +478,8 @@ class build_ext(cython_build_ext): """ def build_extensions(self): - """ - Lazily append numpy's include directory to Extension includes. - - This is done here rather than at module scope because setup.py - may be run before numpy has been installed, in which case - importing numpy and calling `numpy.get_include()` will fail. - """ - numpy_incl = resource_filename("numpy", "core/include") for ext in self.extensions: - ext.include_dirs.append(numpy_incl) + ext.include_dirs.append(np.get_include()) find_or_install_libtiledb(self) diff --git a/tiledb/highlevel.py b/tiledb/highlevel.py index 6bc4ab1cea..5a1b11b6b3 100644 --- a/tiledb/highlevel.py +++ b/tiledb/highlevel.py @@ -224,7 +224,7 @@ def is_ndarray_like(arr): elif shape and dtype: if np.issubdtype(np.bytes_, dtype): dtype = np.dtype("S") - elif np.issubdtype(dtype, np.unicode_): + elif np.issubdtype(dtype, np.str_): dtype = np.dtype("U") ndim = len(shape) diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 380d1e1c03..0b919b7031 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -12,8 +12,7 @@ import io import warnings import collections.abc from collections import OrderedDict -from json import dumps as json_dumps -from json import loads as json_loads +from json import dumps as json_dumps, loads as json_loads from ._generated_version import version_tuple as tiledbpy_version from .array_schema import ArraySchema @@ -35,10 +34,7 @@ np.import_array() # Integer types supported by Python / System _inttypes = (int, np.integer) - -# Numpy initialization code (critical) -# https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array -np.import_array() +np.set_printoptions(legacy='1.21') # use unified numpy printing cdef tiledb_ctx_t* safe_ctx_ptr(object ctx): @@ -145,8 +141,7 @@ cdef _write_array( if attr.isvar: try: if attr.isnullable: - if(np.issubdtype(attr.dtype, np.unicode_) - or np.issubdtype(attr.dtype, np.string_) + if(np.issubdtype(attr.dtype, np.str_) or np.issubdtype(attr.dtype, np.bytes_)): attr_val = np.array(["" if v is None else v for v in values[i]]) else: @@ -601,7 +596,7 @@ def index_domain_subarray(array: Array, dom, idx: tuple): dim = dom.dim(r) dim_dtype = dim.dtype - if array.mode == 'r' and (np.issubdtype(dim_dtype, np.unicode_) or np.issubdtype(dim_dtype, np.bytes_)): + if array.mode == 'r' and (np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)): # NED can only be retrieved in read mode ned = array.nonempty_domain() (dim_lb, dim_ub) = ned[r] if ned else (None, None) @@ -612,7 +607,11 @@ def index_domain_subarray(array: Array, dom, idx: tuple): if not isinstance(dim_slice, slice): raise IndexError("invalid index type: {!r}".format(type(dim_slice))) + # numpy2 doesn't allow addition beween int and np.int64 - NEP 50 start, stop, step = dim_slice.start, dim_slice.stop, dim_slice.step + start = np.int64(start) if isinstance(start, int) else start + stop = np.int64(stop) if isinstance(stop, int) else stop + step = np.int64(step) if isinstance(step, int) else step if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_): if start is None or stop is None: @@ -1503,7 +1502,7 @@ cdef class Array(object): cdef _ndarray_is_varlen(self, np.ndarray array): return (np.issubdtype(array.dtype, np.bytes_) or - np.issubdtype(array.dtype, np.unicode_) or + np.issubdtype(array.dtype, np.str_) or array.dtype == object) @property @@ -2526,8 +2525,8 @@ cdef class DenseArrayImpl(Array): dtype=np.uint8 ) else: - if (np.issubdtype(attr.dtype, np.string_) and not - (np.issubdtype(attr_val.dtype, np.string_) or attr_val.dtype == np.dtype('O'))): + if (np.issubdtype(attr.dtype, np.bytes_) and not + (np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) @@ -2541,7 +2540,7 @@ cdef class DenseArrayImpl(Array): dtype=np.uint8 ) - if np.issubdtype(attr.dtype, np.string_): + if np.issubdtype(attr.dtype, np.bytes_): attr_val = np.array( ["" if v is None else v for v in attr_val]) else: @@ -2575,8 +2574,8 @@ cdef class DenseArrayImpl(Array): if attr.isnullable and name not in nullmaps: nullmaps[name] = np.array([int(v is None) for v in val], dtype=np.uint8) else: - if (np.issubdtype(attr.dtype, np.string_) and not - (np.issubdtype(val.dtype, np.string_) or val.dtype == np.dtype('O'))): + if (np.issubdtype(attr.dtype, np.bytes_) and not + (np.issubdtype(val.dtype, np.bytes_) or val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) @@ -3063,8 +3062,8 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps): nullmaps[name] = np.array( [int(v is not None) for v in attr_val], dtype=np.uint8) else: - if (np.issubdtype(attr.dtype, np.string_) - and not (np.issubdtype(attr_val.dtype, np.string_) + if (np.issubdtype(attr.dtype, np.bytes_) + and not (np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) @@ -3076,7 +3075,7 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps): nullmaps[name] = np.array( [int(v is not None) for v in attr_val], dtype=np.uint8) - if np.issubdtype(attr.dtype, np.string_): + if np.issubdtype(attr.dtype, np.bytes_): attr_val = np.array(["" if v is None else v for v in attr_val]) else: attr_val = np.nan_to_num(attr_val) diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py index 010836f18f..5509435bad 100644 --- a/tiledb/multirange_indexing.py +++ b/tiledb/multirange_indexing.py @@ -422,7 +422,7 @@ def __init__( # Until list attributes are supported in core, error with a clear message. if use_arrow and any( (attr.isvar or len(attr.dtype) > 1) - and attr.dtype not in (np.unicode_, np.bytes_) + and attr.dtype not in (np.str_, np.bytes_) for attr in map(array.attr, query.attrs or ()) ): raise TileDBError( diff --git a/tiledb/tests/test_attribute.py b/tiledb/tests/test_attribute.py index d79c8c683a..4f91211589 100644 --- a/tiledb/tests/test_attribute.py +++ b/tiledb/tests/test_attribute.py @@ -16,7 +16,7 @@ def test_minimal_attribute(self): self.assertEqual(attr, attr) self.assertTrue(attr.isanon) self.assertEqual(attr.name, "") - self.assertEqual(attr.dtype, np.float_) + self.assertEqual(attr.dtype, np.float64) self.assertFalse(attr.isvar) self.assertFalse(attr.isnullable) diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index cd13ad9cde..7e936c86bc 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -1230,10 +1230,10 @@ def test_reopen_dense_array(self, use_timestamps): def test_data_begins_with_null_chars(self): path = self.path("test_data_begins_with_null_chars") - data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.unicode_) + data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.str_) dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.Array.create(path, schema) @@ -1325,12 +1325,12 @@ def test_varlen_write_unicode(self): "", "hhhhhhhhhh", ], - dtype=np.unicode_, + dtype=np.str_, ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,)) @@ -1487,7 +1487,7 @@ def test_varlen_write_fixedunicode(self): # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.unicode_) + att = tiledb.Attr(dtype=np.str_) schema = tiledb.ArraySchema(dom, (att,)) @@ -1991,7 +1991,7 @@ def test_sparse_bytes(self, fx_sparse_cell_order): def test_sparse_unicode(self, fx_sparse_cell_order): dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) - att = tiledb.Attr("", var=True, dtype=np.unicode_) + att = tiledb.Attr("", var=True, dtype=np.str_) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order ) @@ -3514,11 +3514,11 @@ def test_incomplete_dense_varlen(self, non_overlapping_ranges): ncells = 10 path = self.path("incomplete_dense_varlen") str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] - data = np.array(str_data, dtype=np.unicode_) + data = np.array(str_data, dtype=np.str_) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,)) @@ -3556,12 +3556,12 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range path = self.path("incomplete_sparse_varlen") str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] - data = np.array(str_data, dtype=np.unicode_) + data = np.array(str_data, dtype=np.str_) coords = np.arange(ncells) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema( dom, (att,), sparse=True, allows_duplicates=allows_duplicates diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py index 51649e7bfc..30f56eff29 100644 --- a/tiledb/tests/test_pandas_dataframe.py +++ b/tiledb/tests/test_pandas_dataframe.py @@ -1324,8 +1324,10 @@ def test_incomplete_df(self, allows_duplicates, non_overlapping_ranges): data[validity_idx] = None # TODO - not supported - # str_data = np.array([rand_utf8(random.randint(0, n)) for n in range(ncells)], - # dtype=np.unicode_) + # str_data = np.array( + # [rand_utf8(random.randint(0, n)) for n in range(ncells)], + # dtype=np.str_, + # ) # str_data[validity_idx] = None df = pd.DataFrame({"int64": pd.Series(data, dtype=pd.Int64Dtype())}) From 6af0b5c832de28c6d188c214f7aad68e6384dbd7 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Mon, 20 May 2024 17:14:45 +0300 Subject: [PATCH 4/8] Expose WebP enums (#1974) --- tiledb/filter.py | 3 +++ tiledb/tests/test_webp.py | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tiledb/filter.py b/tiledb/filter.py index cf3351076f..dd9403c439 100644 --- a/tiledb/filter.py +++ b/tiledb/filter.py @@ -754,6 +754,9 @@ class WebpFilter(Filter): lt.FilterOption.WEBP_LOSSLESS, ) + # Expose WebP enums at the top level + WebpInputFormat = lt.WebpInputFormat + def __init__( self, input_format: lt.WebpInputFormat = None, diff --git a/tiledb/tests/test_webp.py b/tiledb/tests/test_webp.py index 08bdef51a3..a7c9eb0eb1 100644 --- a/tiledb/tests/test_webp.py +++ b/tiledb/tests/test_webp.py @@ -17,12 +17,12 @@ "format, quality, lossless", [ ( - tiledb.filter.lt.WebpInputFormat.WEBP_RGB, + tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGB, 100.0, False, ), # Test setting format with enum values - (tiledb.filter.lt.WebpInputFormat.WEBP_BGR, 50.0, True), - (tiledb.filter.lt.WebpInputFormat.WEBP_RGBA, 25.5, False), + (tiledb.filter.WebpFilter.WebpInputFormat.WEBP_BGR, 50.0, True), + (tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGBA, 25.5, False), (4, 0.0, True), # Test setting format with integral type ], ) @@ -31,7 +31,7 @@ def test_webp_ctor(format, quality, lossless): input_format=format, quality=quality, lossless=lossless ) np.testing.assert_equal( - webp_filter.input_format, tiledb.filter.lt.WebpInputFormat(format) + webp_filter.input_format, tiledb.filter.WebpFilter.WebpInputFormat(format) ) np.testing.assert_equal(webp_filter.quality, quality) np.testing.assert_equal(webp_filter.lossless, lossless) @@ -117,16 +117,18 @@ def make_image_data(width, height, pixel_depth): @pytest.mark.parametrize( "colorspace", [ - tiledb.filter.lt.WebpInputFormat.WEBP_RGB, - tiledb.filter.lt.WebpInputFormat.WEBP_BGR, - tiledb.filter.lt.WebpInputFormat.WEBP_RGBA, - tiledb.filter.lt.WebpInputFormat.WEBP_BGRA, + tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGB, + tiledb.filter.WebpFilter.WebpInputFormat.WEBP_BGR, + tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGBA, + tiledb.filter.WebpFilter.WebpInputFormat.WEBP_BGRA, ], ) @pytest.mark.parametrize("lossless", [True, False]) def test_webp_filter(width, height, colorspace, lossless): pixel_depth = ( - 3 if int(colorspace) < int(tiledb.filter.lt.WebpInputFormat.WEBP_RGBA) else 4 + 3 + if int(colorspace) < int(tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGBA) + else 4 ) data = make_image_data(width, height, pixel_depth) data = np.array(data, dtype=np.uint8).reshape(height, width * pixel_depth) From b55f628443c440ba2ad09d59a0fb227f91cfd0a9 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Tue, 21 May 2024 11:50:51 +0300 Subject: [PATCH 5/8] Update HISTORY and version for 0.29.1 (#1978) --- HISTORY.md | 16 ++++++++++++++++ misc/azure-release.yml | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 121ee4cb59..c45e618182 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,19 @@ +# Release 0.29.1 + +## Improvements + +* Expose WebP enums by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1974 +* Add Array.query in docs and improve docs in general by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1965 +* Add support for creating WKB/WKT attributes by @jp-dark in https://github.com/TileDB-Inc/TileDB-Py/pull/1912 +* Add wrapping for ls recursive by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1968 +* Fix compatibility for delete_fragments by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1966 + +## Build system changes + +* Add support for numpy2 by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1969 +* Fix syntax error in nightly build workflow by @ihnorton in https://github.com/TileDB-Inc/TileDB-Py/pull/1970 +* Set an upper bound for numpy to dodge 2.0 by @sgillies in https://github.com/TileDB-Inc/TileDB-Py/pull/1963 + # Release 0.29.0 * TileDB-Py 0.29.0 includes TileDB Embedded [2.23.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.23.0) diff --git a/misc/azure-release.yml b/misc/azure-release.yml index b884b89983..4634648bb5 100644 --- a/misc/azure-release.yml +++ b/misc/azure-release.yml @@ -6,7 +6,7 @@ stages: LIBTILEDB_VERSION: dev LIBTILEDB_SHA: dev ${{ else }}: - TILEDBPY_VERSION: 0.29.0 + TILEDBPY_VERSION: 0.29.1 # NOTE: *must* update both LIBTILEDB_VERSION and LIBTILEDB_SHA LIBTILEDB_VERSION: "2.23.0" # NOTE: *must* update both LIBTILEDB_VERSION and LIBTILEDB_SHA From 3f5b4c9c2baad293acd7496285965763aa6de01c Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 22 May 2024 13:06:48 +0300 Subject: [PATCH 6/8] Fix Query constructor to return error for dense arrays with return_incomplete=True (#1976) --- tiledb/libtiledb.pyx | 4 ++++ tiledb/tests/test_libtiledb.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 0b919b7031..02373596b4 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -1895,6 +1895,10 @@ cdef class Query(object): if not use_arrow: raise TileDBError("Cannot initialize return_arrow with use_arrow=False") self.use_arrow = use_arrow + + if return_incomplete and not array.schema.sparse: + raise TileDBError("Incomplete queries are only supported for sparse arrays at this time") + self.return_incomplete = return_incomplete self.domain_index = DomainIndexer(array, query=self) diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index 7e936c86bc..e8736b1ca7 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -3598,6 +3598,22 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range T2.multi_index[101:105][""], np.array([], dtype=np.dtype(" Date: Wed, 22 May 2024 13:09:50 +0300 Subject: [PATCH 7/8] Fix CompressionFilter docstring example (#1973) --- tiledb/filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tiledb/filter.py b/tiledb/filter.py index dd9403c439..5bf646f393 100644 --- a/tiledb/filter.py +++ b/tiledb/filter.py @@ -79,7 +79,7 @@ class CompressionFilter(Filter): >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, - ... filters=tiledb.FilterList([tiledb.GzipFilter(level=10)])) + ... filters=tiledb.FilterList([tiledb.CompressionFilter(level=10)])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) From 299bbbd8fa43b746ebac92c2aed4b2ff2c784da8 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Thu, 23 May 2024 21:02:20 +0300 Subject: [PATCH 8/8] Fix ModuleNotFoundError: No module named 'numpy' on build (#1979) Co-authored-by: Isaiah Norton --- HISTORY.md | 1 + pyproject.toml | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index c45e618182..518f5909f3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ ## Build system changes +* Fix ModuleNotFoundError: No module named 'numpy' on build by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1979 * Add support for numpy2 by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1969 * Fix syntax error in nightly build workflow by @ihnorton in https://github.com/TileDB-Inc/TileDB-Py/pull/1970 * Set an upper bound for numpy to dodge 2.0 by @sgillies in https://github.com/TileDB-Inc/TileDB-Py/pull/1963 diff --git a/pyproject.toml b/pyproject.toml index da864ddd41..d53cc7b2f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,12 @@ [build-system] -requires = ["setuptools>=64", "wheel", "pybind11", "Cython"] +requires = [ + "setuptools>=64", + "wheel", + "pybind11", + "Cython", + "numpy== 1.17.*,<2.0 ; python_version == '3.8' and platform_machine != 'aarch64'", + "numpy>=1.25 ; python_version >= '3.9'", +] build-backend = "setuptools.build_meta" [project]