From cce501a92a56d5ce22bd6cf5d71a72b2ae9d18fe Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 31 Oct 2023 11:09:17 +0000 Subject: [PATCH] Add links to numcodecs docs in tutorial (#1535) * Fix numcodecs links * Add release note --- docs/conf.py | 1 + docs/release.rst | 8 ++++++++ docs/tutorial.rst | 17 +++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1ffaeddef4..e33d10b2f6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -331,6 +331,7 @@ def setup(app): intersphinx_mapping = { "python": ("https://docs.python.org/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "numcodecs": ("https://numcodecs.readthedocs.io/en/stable/", None), } diff --git a/docs/release.rst b/docs/release.rst index 2f9b93a361..02552a8f93 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased ---------- +Docs +~~~~ + +* Add links to ``numcodecs`` docs in the tutorial. + By :user:`David Stansby ` :issue:`1535`. + Maintenance ~~~~~~~~~~~ @@ -33,6 +39,8 @@ Maintenance * Allow ``black`` code formatter to be run with any Python version. By :user:`David Stansby ` :issue:`1549`. + + .. _release_2.16.1: 2.16.1 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index f335db18d0..e563c16040 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1175,8 +1175,9 @@ A fixed-length unicode dtype is also available, e.g.:: For variable-length strings, the ``object`` dtype can be used, but a codec must be provided to encode the data (see also :ref:`tutorial_objects` below). At the time of writing there are four codecs available that can encode variable length string -objects: :class:`numcodecs.VLenUTF8`, :class:`numcodecs.JSON`, :class:`numcodecs.MsgPack`. -and :class:`numcodecs.Pickle`. E.g. using ``VLenUTF8``:: +objects: :class:`numcodecs.vlen.VLenUTF8`, :class:`numcodecs.json.JSON`, +:class:`numcodecs.msgpacks.MsgPack`. and :class:`numcodecs.pickles.Pickle`. +E.g. using ``VLenUTF8``:: >>> import numcodecs >>> z = zarr.array(text_data, dtype=object, object_codec=numcodecs.VLenUTF8()) @@ -1201,8 +1202,8 @@ is a short-hand for ``dtype=object, object_codec=numcodecs.VLenUTF8()``, e.g.:: 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) Variable-length byte strings are also supported via ``dtype=object``. Again an -``object_codec`` is required, which can be one of :class:`numcodecs.VLenBytes` or -:class:`numcodecs.Pickle`. For convenience, ``dtype=bytes`` (or ``dtype=str`` on Python +``object_codec`` is required, which can be one of :class:`numcodecs.vlen.VLenBytes` or +:class:`numcodecs.pickles.Pickle`. For convenience, ``dtype=bytes`` (or ``dtype=str`` on Python 2.7) can be used as a short-hand for ``dtype=object, object_codec=numcodecs.VLenBytes()``, e.g.:: @@ -1218,7 +1219,7 @@ e.g.:: b'\xe0\xb9\x80\xe0\xb8\xae\xe0\xb8\xa5\xe0\xb9\x82\xe0\xb8\xa5\xe0\xb9\x80\xe0\xb8\xa7\xe0\xb8\xb4\xe0\xb8\xa5\xe0\xb8\x94\xe0\xb9\x8c'], dtype=object) If you know ahead of time all the possible string values that can occur, you could -also use the :class:`numcodecs.Categorize` codec to encode each unique string value as an +also use the :class:`numcodecs.categorize.Categorize` codec to encode each unique string value as an integer. E.g.:: >>> categorize = numcodecs.Categorize(greetings, dtype=object) @@ -1245,7 +1246,7 @@ The best codec to use will depend on what type of objects are present in the arr At the time of writing there are three codecs available that can serve as a general purpose object codec and support encoding of a mixture of object types: -:class:`numcodecs.JSON`, :class:`numcodecs.MsgPack`. and :class:`numcodecs.Pickle`. +:class:`numcodecs.json.JSON`, :class:`numcodecs.msgpacks.MsgPack`. and :class:`numcodecs.pickles.Pickle`. For example, using the JSON codec:: @@ -1258,7 +1259,7 @@ For example, using the JSON codec:: array([42, 'foo', list(['bar', 'baz', 'qux']), {'a': 1, 'b': 2.2}, None], dtype=object) Not all codecs support encoding of all object types. The -:class:`numcodecs.Pickle` codec is the most flexible, supporting encoding any type +:class:`numcodecs.pickles.Pickle` codec is the most flexible, supporting encoding any type of Python object. However, if you are sharing data with anyone other than yourself, then Pickle is not recommended as it is a potential security risk. This is because malicious code can be embedded within pickled data. The JSON and MsgPack codecs do not have any @@ -1270,7 +1271,7 @@ Ragged arrays If you need to store an array of arrays, where each member array can be of any length and stores the same primitive type (a.k.a. a ragged array), the -:class:`numcodecs.VLenArray` codec can be used, e.g.:: +:class:`numcodecs.vlen.VLenArray` codec can be used, e.g.:: >>> z = zarr.empty(4, dtype=object, object_codec=numcodecs.VLenArray(int)) >>> z