From a43c8ed29f903d2ba7010c56daaa6c609019a772 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Fri, 8 Sep 2023 23:09:32 +0100
Subject: [PATCH 01/41] Avoid pandas 2.1.0 due to timestamp bug

Pandas 2.1.0 DataFrame constructor bug causeing timestamps to have
inconsistent units (https://github.com/pandas-dev/pandas/issues/55014).
---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index ad41e42d..beaef34a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,7 +9,7 @@ dependencies:
   # Currently dask and numpy==1.16.0 clash
   # TODO: add support for numpy>=1.23
   - numpy!=1.15.0,!=1.16.0
-  - pandas>=0.23.0,!=1.0.0
+  - pandas>=0.23.0,!=1.0.0,!=2.1.0
   - pyarrow>=0.17.1,!=1.0.0
   - simplejson
   - minimalkv>=1.4.2

From f2ea716ffaefecee367c2390619ced82b0c07bda Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 14 Sep 2023 17:05:48 +0100
Subject: [PATCH 02/41] Coerce timestamps to nanoseconds when converting to
 pandas

---
 plateau/core/common_metadata.py   | 17 +++++++++++++++--
 plateau/core/index.py             | 17 +++++++++++++----
 plateau/serialization/_csv.py     |  2 +-
 plateau/serialization/_parquet.py |  9 +++++++--
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/plateau/core/common_metadata.py b/plateau/core/common_metadata.py
index 31983786..9dcc3f54 100644
--- a/plateau/core/common_metadata.py
+++ b/plateau/core/common_metadata.py
@@ -736,7 +736,9 @@ def _dict_to_binary(dct):
     return simplejson.dumps(dct, sort_keys=True).encode("utf8")
 
 
-def empty_dataframe_from_schema(schema, columns=None, date_as_object=False):
+def empty_dataframe_from_schema(
+    schema, columns=None, date_as_object=False, coerce_temporal_nanoseconds=True
+):
     """Create an empty DataFrame from provided schema.
 
     Parameters
@@ -746,6 +748,10 @@ def empty_dataframe_from_schema(schema, columns=None, date_as_object=False):
     columns: Union[None, List[str]]
         Optional list of columns that should be part of the resulting DataFrame. All columns in that list must also be
         part of the provided schema.
+    date_as_object: bool
+        Cast dates to objects.
+    coerce_temporal_nanoseconds: bool
+        Coerce date32, date64, duration and timestamp units to nanoseconds to retain behaviour of pandas 1.x.
 
     Returns
     -------
@@ -753,7 +759,14 @@ def empty_dataframe_from_schema(schema, columns=None, date_as_object=False):
         Empty DataFrame with requested columns and types.
     """
 
-    df = schema.internal().empty_table().to_pandas(date_as_object=date_as_object)
+    df = (
+        schema.internal()
+        .empty_table()
+        .to_pandas(
+            date_as_object=date_as_object,
+            coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
+        )
+    )
 
     df.columns = df.columns.map(ensure_string_type)
     if columns is not None:
diff --git a/plateau/core/index.py b/plateau/core/index.py
index 6a8631fc..3cfaa759 100644
--- a/plateau/core/index.py
+++ b/plateau/core/index.py
@@ -136,11 +136,18 @@ def __repr__(self) -> str:
             class_=type(self).__name__, attrs=", ".join(repr_str)
         )
 
-    def observed_values(self, date_as_object=True) -> np.ndarray:
+    def observed_values(
+        self, date_as_object=True, coerce_temporal_nanoseconds=True
+    ) -> np.ndarray:
         """Return an array of all observed values."""
         keys = np.array(list(self.index_dct.keys()))
         labeled_array = pa.array(keys, type=self.dtype)
-        return np.array(labeled_array.to_pandas(date_as_object=date_as_object))
+        return np.array(
+            labeled_array.to_pandas(
+                date_as_object=date_as_object,
+                coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
+            )
+        )
 
     @staticmethod
     def normalize_value(dtype: pa.DataType, value: Any) -> Any:
@@ -476,7 +483,9 @@ def as_flat_series(
         table = _index_dct_to_table(
             self.index_dct, column=self.column, dtype=self.dtype
         )
-        df = table.to_pandas(date_as_object=date_as_object)
+        df = table.to_pandas(
+            date_as_object=date_as_object, coerce_temporal_nanoseconds=True
+        )
 
         if predicates is not None:
             # If there is a conjunction without any reference to the index
@@ -862,7 +871,7 @@ def _parquet_bytes_to_dict(column: str, index_buffer: bytes):
     if column_type == pa.timestamp("us"):
         column_type = pa.timestamp("ns")
 
-    df = table.to_pandas()
+    df = table.to_pandas(coerce_temporal_nanoseconds=True)
 
     index_dct = dict(
         zip(df[column].values, (list(x) for x in df[_PARTITION_COLUMN_NAME].values))
diff --git a/plateau/serialization/_csv.py b/plateau/serialization/_csv.py
index 5560d4b9..0cab490e 100644
--- a/plateau/serialization/_csv.py
+++ b/plateau/serialization/_csv.py
@@ -85,7 +85,7 @@ def restore_dataframe(
 
     def store(self, store, key_prefix, df):
         if isinstance(df, pa.Table):
-            df = df.to_pandas()
+            df = df.to_pandas(coerce_temporal_nanoseconds=True)
         key = f"{key_prefix}.csv"
         result_stream = BytesIO()
         iostream: BufferedIOBase
diff --git a/plateau/serialization/_parquet.py b/plateau/serialization/_parquet.py
index 953571e3..c557a381 100644
--- a/plateau/serialization/_parquet.py
+++ b/plateau/serialization/_parquet.py
@@ -259,7 +259,10 @@ def _restore_dataframe(
                         df = (
                             parquet_file.schema.to_arrow_schema()
                             .empty_table()
-                            .to_pandas(date_as_object=date_as_object)
+                            .to_pandas(
+                                date_as_object=date_as_object,
+                                coerce_temporal_nanoseconds=True,
+                            )
                         )
                         index = pd.Index(
                             pd.RangeIndex(start=0, stop=parquet_file.metadata.num_rows),
@@ -284,7 +287,9 @@ def _restore_dataframe(
 
         table = _reset_dictionary_columns(table, exclude=categories)
 
-        df = table.to_pandas(date_as_object=date_as_object)
+        df = table.to_pandas(
+            date_as_object=date_as_object, coerce_temporal_nanoseconds=True
+        )
 
         # XXX: Patch until Pyarrow bug is resolved: https://issues.apache.org/jira/browse/ARROW-18099?filter=-2
         if categories:

From fa15a47ed828304f66951a26e5db53d784aba37a Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 18 Sep 2023 13:10:21 +0100
Subject: [PATCH 03/41] Prevent dask from converting objects to strings

---
 plateau/io/dask/compression.py              |  11 +-
 plateau/io/dask/dataframe.py                | 149 +++++++++++---------
 tests/io/dask/dataframe/test_compression.py |  11 +-
 tests/io/dask/dataframe/test_update.py      |  15 +-
 4 files changed, 102 insertions(+), 84 deletions(-)

diff --git a/plateau/io/dask/compression.py b/plateau/io/dask/compression.py
index e179ea41..eab81b05 100644
--- a/plateau/io/dask/compression.py
+++ b/plateau/io/dask/compression.py
@@ -2,6 +2,7 @@
 from functools import partial
 from typing import List, Union
 
+import dask
 import dask.dataframe as dd
 import pandas as pd
 
@@ -109,7 +110,8 @@ def pack_payload(df: dd.DataFrame, group_key: Union[List[str], str]) -> dd.DataF
 
     _pack_payload = partial(pack_payload_pandas, group_key=group_key)
 
-    return df.map_partitions(_pack_payload, meta=packed_meta)
+    with dask.config.set({"dataframe.convert-string": False}):
+        return df.map_partitions(_pack_payload, meta=packed_meta)
 
 
 def unpack_payload_pandas(
@@ -154,6 +156,7 @@ def unpack_payload(df: dd.DataFrame, unpack_meta: pd.DataFrame) -> dd.DataFrame:
         )
         return df
 
-    return df.map_partitions(
-        unpack_payload_pandas, unpack_meta=unpack_meta, meta=unpack_meta
-    )
+    with dask.config.set({"dataframe.convert-string": False}):
+        return df.map_partitions(
+            unpack_payload_pandas, unpack_meta=unpack_meta, meta=unpack_meta
+        )
diff --git a/plateau/io/dask/dataframe.py b/plateau/io/dask/dataframe.py
index 36fda4be..e0aea8ef 100644
--- a/plateau/io/dask/dataframe.py
+++ b/plateau/io/dask/dataframe.py
@@ -150,18 +150,20 @@ def read_dataset_as_ddf(
         divisions.sort()
         divisions_lst = list(divisions)
         divisions_lst.append(divisions[-1])
-    ddf = from_map(
-        ReadPlateauPartition(columns=columns),
-        mps,
-        meta=meta,
-        label="read-plateau",
-        divisions=divisions_lst,
-        store=ds_factory.store_factory,
-        categoricals=categoricals,
-        predicate_pushdown_to_io=predicate_pushdown_to_io,
-        dates_as_object=dates_as_object,
-        predicates=predicates,
-    )
+
+    with dask.config.set({"dataframe.convert-string": False}):
+        ddf = from_map(
+            ReadPlateauPartition(columns=columns),
+            mps,
+            meta=meta,
+            label="read-plateau",
+            divisions=divisions_lst,
+            store=ds_factory.store_factory,
+            categoricals=categoricals,
+            predicate_pushdown_to_io=predicate_pushdown_to_io,
+            dates_as_object=dates_as_object,
+            predicates=predicates,
+        )
     if dask_index_on:
         return ddf.set_index(dask_index_on, divisions=divisions_lst, sorted=True)
     else:
@@ -329,21 +331,24 @@ def store_dataset_from_ddf(
 
     if not overwrite:
         raise_if_dataset_exists(dataset_uuid=dataset_uuid, store=store)
-    mp_ser = _write_dataframe_partitions(
-        ddf=ddf,
-        store=ds_factory.store_factory,
-        dataset_uuid=dataset_uuid,
-        table=table,
-        secondary_indices=secondary_indices,
-        shuffle=shuffle,
-        repartition_ratio=repartition_ratio,
-        num_buckets=num_buckets,
-        sort_partitions_by=sort_partitions_by,
-        df_serializer=df_serializer,
-        metadata_version=metadata_version,
-        partition_on=partition_on,
-        bucket_by=bucket_by,
-    )
+
+    with dask.config.set({"dataframe.convert-string": False}):
+        mp_ser = _write_dataframe_partitions(
+            ddf=ddf,
+            store=ds_factory.store_factory,
+            dataset_uuid=dataset_uuid,
+            table=table,
+            secondary_indices=secondary_indices,
+            shuffle=shuffle,
+            repartition_ratio=repartition_ratio,
+            num_buckets=num_buckets,
+            sort_partitions_by=sort_partitions_by,
+            df_serializer=df_serializer,
+            metadata_version=metadata_version,
+            partition_on=partition_on,
+            bucket_by=bucket_by,
+        )
+
     return mp_ser.reduction(
         chunk=_id,
         aggregate=_commit_store_from_reduction,
@@ -471,21 +476,22 @@ def update_dataset_from_ddf(
     inferred_indices = _ensure_compatible_indices(ds_factory, secondary_indices)
     del secondary_indices
 
-    mp_ser = _write_dataframe_partitions(
-        ddf=ddf,
-        store=ds_factory.store_factory if ds_factory else store,
-        dataset_uuid=dataset_uuid or ds_factory.dataset_uuid,
-        table=table,
-        secondary_indices=inferred_indices,
-        shuffle=shuffle,
-        repartition_ratio=repartition_ratio,
-        num_buckets=num_buckets,
-        sort_partitions_by=sort_partitions_by,
-        df_serializer=df_serializer,
-        metadata_version=metadata_version,
-        partition_on=cast(List[str], partition_on),
-        bucket_by=bucket_by,
-    )
+    with dask.config.set({"dataframe.convert-string": False}):
+        mp_ser = _write_dataframe_partitions(
+            ddf=ddf,
+            store=ds_factory.store_factory if ds_factory else store,
+            dataset_uuid=dataset_uuid or ds_factory.dataset_uuid,
+            table=table,
+            secondary_indices=inferred_indices,
+            shuffle=shuffle,
+            repartition_ratio=repartition_ratio,
+            num_buckets=num_buckets,
+            sort_partitions_by=sort_partitions_by,
+            df_serializer=df_serializer,
+            metadata_version=metadata_version,
+            partition_on=cast(List[str], partition_on),
+            bucket_by=bucket_by,
+        )
 
     return mp_ser.reduction(
         chunk=_id,
@@ -567,24 +573,26 @@ def collect_dataset_metadata(
     mps = list(
         dispatch_metapartitions_from_factory(dataset_factory, predicates=predicates)
     )
-    if mps:
-        random.shuffle(mps)
-        # ensure that even with sampling at least one metapartition is returned
-        cutoff_index = max(1, int(len(mps) * frac))
-        mps = mps[:cutoff_index]
-        ddf = dd.from_delayed(
-            [
-                dask.delayed(MetaPartition.get_parquet_metadata)(
-                    mp, store=dataset_factory.store_factory
-                )
-                for mp in mps
-            ],
-            meta=_METADATA_SCHEMA,
-        )
-    else:
-        df = pd.DataFrame(columns=_METADATA_SCHEMA.keys())
-        df = df.astype(_METADATA_SCHEMA)
-        ddf = dd.from_pandas(df, npartitions=1)
+
+    with dask.config.set({"dataframe.convert-string": False}):
+        if mps:
+            random.shuffle(mps)
+            # ensure that even with sampling at least one metapartition is returned
+            cutoff_index = max(1, int(len(mps) * frac))
+            mps = mps[:cutoff_index]
+            ddf = dd.from_delayed(
+                [
+                    dask.delayed(MetaPartition.get_parquet_metadata)(
+                        mp, store=dataset_factory.store_factory
+                    )
+                    for mp in mps
+                ],
+                meta=_METADATA_SCHEMA,
+            )
+        else:
+            df = pd.DataFrame(columns=_METADATA_SCHEMA.keys())
+            df = df.astype(_METADATA_SCHEMA)
+            ddf = dd.from_pandas(df, npartitions=1)
 
     return ddf
 
@@ -651,12 +659,15 @@ def hash_dataset(
         columns=columns,
         dates_as_object=True,
     )
-    if not group_key:
-        return ddf.map_partitions(_hash_partition, meta="uint64").astype("uint64")
-    else:
-        ddf2 = pack_payload(ddf, group_key=group_key)
-        return (
-            ddf2.groupby(group_key)
-            .apply(_unpack_hash, unpack_meta=ddf._meta, subset=subset, meta="uint64")
-            .astype("uint64")
-        )
+    with dask.config.set({"dataframe.convert-string": False}):
+        if not group_key:
+            return ddf.map_partitions(_hash_partition, meta="uint64").astype("uint64")
+        else:
+            ddf2 = pack_payload(ddf, group_key=group_key)
+            return (
+                ddf2.groupby(group_key)
+                .apply(
+                    _unpack_hash, unpack_meta=ddf._meta, subset=subset, meta="uint64"
+                )
+                .astype("uint64")
+            )
diff --git a/tests/io/dask/dataframe/test_compression.py b/tests/io/dask/dataframe/test_compression.py
index c18fcb98..2ca358cd 100644
--- a/tests/io/dask/dataframe/test_compression.py
+++ b/tests/io/dask/dataframe/test_compression.py
@@ -1,3 +1,4 @@
+import dask
 import dask.dataframe as dd
 import pandas as pd
 import pandas.testing as pdt
@@ -13,9 +14,10 @@
 
 def test_pack_payload(df_all_types):
     # For a single row dataframe the packing actually has a few more bytes
-    df = dd.from_pandas(
-        pd.concat([df_all_types] * 10, ignore_index=True), npartitions=3
-    )
+    with dask.config.set({"dataframe.convert-string": False}):
+        df = dd.from_pandas(
+            pd.concat([df_all_types] * 10, ignore_index=True), npartitions=3
+        )
     size_before = df.memory_usage(deep=True).sum()
 
     packed_df = pack_payload(df, group_key=list(df.columns[-2:]))
@@ -66,7 +68,8 @@ def test_pack_payload_pandas_empty(df_all_types):
 @pytest.mark.parametrize("num_group_cols", [1, 4])
 def test_pack_payload_roundtrip(df_all_types, num_group_cols):
     group_key = list(df_all_types.columns[-num_group_cols:])
-    df_all_types = dd.from_pandas(df_all_types, npartitions=2)
+    with dask.config.set({"dataframe.convert-string": False}):
+        df_all_types = dd.from_pandas(df_all_types, npartitions=2)
     pdt.assert_frame_equal(
         df_all_types.compute(),
         unpack_payload(
diff --git a/tests/io/dask/dataframe/test_update.py b/tests/io/dask/dataframe/test_update.py
index 3bb7c827..fe0c190f 100644
--- a/tests/io/dask/dataframe/test_update.py
+++ b/tests/io/dask/dataframe/test_update.py
@@ -24,13 +24,14 @@ def _id(part):
 def _update_dataset(partitions, *args, **kwargs):
     # TODO: Simplify once parse_input_to_metapartition is removed / obsolete
 
-    if isinstance(partitions, pd.DataFrame):
-        partitions = dd.from_pandas(partitions, npartitions=1)
-    elif partitions is not None:
-        delayed_partitions = [dask.delayed(_id)(part) for part in partitions]
-        partitions = dd.from_delayed(delayed_partitions)
-    else:
-        partitions = None
+    with dask.config.set({"dataframe.convert-string": False}):
+        if isinstance(partitions, pd.DataFrame):
+            partitions = dd.from_pandas(partitions, npartitions=1)
+        elif partitions is not None:
+            delayed_partitions = [dask.delayed(_id)(part) for part in partitions]
+            partitions = dd.from_delayed(delayed_partitions)
+        else:
+            partitions = None
 
     # Replace `table_name` with `table` keyword argument to enable shared test code
     # via `bound_update_dataset` fixture

From c182ffbf6f2cb712bab5f5922f3921fb7ed7386d Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 18 Sep 2023 13:16:40 +0100
Subject: [PATCH 04/41] Avoid dask 2023.9.2 due to failing tests

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index beaef34a..5ad2f3c8 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - dask!=2021.5.1,!=2021.6.0  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+  - dask!=2021.5.1,!=2021.6.0, !=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash

From 8b1e0af898a55bcf25fe5c8c494f85db56cac2c8 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 18 Sep 2023 18:41:29 +0100
Subject: [PATCH 05/41] Cast metadata bytes to object to get around pandas bug

---
 plateau/core/common_metadata.py   | 13 ++++++-------
 plateau/serialization/_parquet.py |  5 ++++-
 plateau/serialization/_util.py    |  8 ++++++++
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/plateau/core/common_metadata.py b/plateau/core/common_metadata.py
index 9dcc3f54..9cc6224d 100644
--- a/plateau/core/common_metadata.py
+++ b/plateau/core/common_metadata.py
@@ -16,6 +16,7 @@
 from plateau.core.naming import SINGLE_TABLE
 from plateau.core.utils import ensure_string_type
 from plateau.serialization._parquet import PARQUET_VERSION
+from plateau.serialization._util import schema_metadata_bytes_to_object
 
 _logger = logging.getLogger()
 
@@ -758,14 +759,12 @@ def empty_dataframe_from_schema(
     DataFrame
         Empty DataFrame with requested columns and types.
     """
+    # HACK: Cast bytes to object in metadata until Pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/50127
+    schema = schema_metadata_bytes_to_object(schema.internal())
 
-    df = (
-        schema.internal()
-        .empty_table()
-        .to_pandas(
-            date_as_object=date_as_object,
-            coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
-        )
+    df = schema.empty_table().to_pandas(
+        date_as_object=date_as_object,
+        coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
     )
 
     df.columns = df.columns.map(ensure_string_type)
diff --git a/plateau/serialization/_parquet.py b/plateau/serialization/_parquet.py
index c557a381..553547dd 100644
--- a/plateau/serialization/_parquet.py
+++ b/plateau/serialization/_parquet.py
@@ -23,7 +23,7 @@
     filter_df_from_predicates,
 )
 from ._io_buffer import BlockBuffer
-from ._util import ensure_unicode_string_type
+from ._util import ensure_unicode_string_type, schema_metadata_bytes_to_object
 
 try:
     # Only check for BotoStore instance if boto is really installed
@@ -287,6 +287,9 @@ def _restore_dataframe(
 
         table = _reset_dictionary_columns(table, exclude=categories)
 
+        # HACK: Cast bytes to object in metadata until Pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/50127
+        table = table.cast(schema_metadata_bytes_to_object(table.schema))
+
         df = table.to_pandas(
             date_as_object=date_as_object, coerce_temporal_nanoseconds=True
         )
diff --git a/plateau/serialization/_util.py b/plateau/serialization/_util.py
index f556c981..223eca6d 100644
--- a/plateau/serialization/_util.py
+++ b/plateau/serialization/_util.py
@@ -1,3 +1,6 @@
+from pyarrow import Schema
+
+
 def _check_contains_null(val):
     if isinstance(val, bytes):
         for byte in val:
@@ -16,3 +19,8 @@ def ensure_unicode_string_type(obj):
         return obj.decode("utf8")
     else:
         return str(obj)
+
+
+def schema_metadata_bytes_to_object(schema: Schema) -> Schema:
+    meta = schema.metadata[b"pandas"].decode().replace("bytes", "object").encode()
+    return schema.with_metadata({b"pandas": meta})

From d35b3e044b7dbbd14da0f78d513965b524195959 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 19 Sep 2023 09:44:30 +0100
Subject: [PATCH 06/41] Generate arrow-compat reference data for 13.0.0

---
 reference-data/arrow-compat/13.0.0.parquet | Bin 0 -> 18661 bytes
 tests/serialization/test_arrow_compat.py   |   1 +
 2 files changed, 1 insertion(+)
 create mode 100644 reference-data/arrow-compat/13.0.0.parquet

diff --git a/reference-data/arrow-compat/13.0.0.parquet b/reference-data/arrow-compat/13.0.0.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2c97acf946bb59bb072778a371a1ee64131ff9ce
GIT binary patch
literal 18661
zcmc&+Z){uD6@LyP4P6+a)W(5~MPa#v^^gB0ZqgOTvz<6j?KDYK+c83wpY1qKZ70M|
z90w{@h)sOp1KJ0Krin=iO&hEDG@)ujsDo)jXqwQ3_%KZqAErs0(1a$0CWO$=x%d5l
z_t{Ppmy|m1-E+@5zkANP=bn44vLG(`oW0I|=Rn-q;}nhwLhpbeT>Ftza6aT5Jqcrj
z&OT?~u)7yW*{4f*ME52L!ab8c@Yw@@;PVbgk3g<FhMoOG6GGqM(B$3Ot*vbH@@jb_
zTMzhq%cWZVNTFP)6smP+kE_S^bBF772l0~rR!W9W0!qQzBf!TX{1OCsA9Nm9Q1=hR
z5d3^*Mc%Ja-0BmoKz-ImVV-p*H+zSjM>#7Aj$2pq2Z!rV4)XdP<nx|kO|150qn5ii
zgGU?y&FT>xb@$>YdPIBUuH(Za!_FRg9p7ReDOKyfpmi0@A0PFR>*#h(tVzjjLH+ho
z9%}n2x_K9Q<*t3BHpCGw7=JAG?pG*o^$8tn9R+4<9DR8Au(Q`<fBPu<?Erax0HVk$
z-bU$d@yFi=HGdpcqiE7Dh(5W8+_-1o?r0nO!S><)=6xE_0S0v2Cf=WSxL$OS4~~)7
zjt!e6-<nNLLU_^esgnzj7uXEm3r_OM@s8ki6wCmT6ItBAeb=o8uFjIY$Yq)1&-as8
z??<RS4PCS7OAl%mmGc`qoD-VNpw}ND&p*(SK?j_J$|=)#98aG2UN+|jSZvRGFT3F{
z*M#qXXV^JlJVUmtrQAloVBI*^9Ih7}<kMkta~RPnVmWpboZogH!xLIyC$z)ib~s$Z
zBlI`mIO3+~vBN>$|L(9;7#iynhTOf<#zvVQy9=&m^7%vOfJe_Z1T*1}e84{9JgF@G
zczh6KKKJqO`!fCa{_DquIpN6vjt;`qV@?->U}XXz9yHH!TSh?8T_Z9$C>DM)s+tGz
zHR}b6?|p~sLkIbKguFF^hOh|Xh~*f`RN6cnp^4pq+1-ny>=VqVdo$tu_<L%IxqI_j
z8Rtt5*DDV4&xgr74@0K26~)D7*sH>|zx?_)**nqvMtuDW1u6-74&YRuwNVq^x4y6L
zT=!ALTQ5}#!EtKc8xGe^2l*u-UyNa?B8%0YY!q-;VaV5V@n~Fuu3SV5Yo(+K@y~vs
zmW}Qsa=}R9G9$n7D0%KtFtWyPlE#H+G%{$Et3)G2PCZC%AkPQO$WQP~AH3I2sr8`@
z*<j(%PLP`?x>5`p;oH|~M!0|XXfb>zJrH&F{m*Ig$J2*$T;CeFSNukB0lyC1uYBa1
zZ{Oey&R1$F>kd;WTD^tNw~m^e|K+&0!_b-r*Z(^}ei!IU1$S%K{pgHl-CjHzb;kWi
zkh~q-w{hW|h=VQh)`(e4K91N)07LQtBnae#DV^%-c3C-rlH{jT<nzf#d6zpwRzXjg
zy%scAf%<Lv<_|TEeXR1M%iVQdbC8eEk~hzGboL(%#Er*HAmo6iAU-)qZk+1~L`N|U
z)@UEEfBlgbuhc4Xz_RrE-^a<jk0T<UAZ}_sn0dYsHS^p_i#5$fuZrZ0CpxkYzdSQ+
z5m=H^M;Wu>mtn2)I3ky16hqzhEO{eLz78P-g_mc7p|AWCg1fJlTPswu&Y&>S<2Lp?
zpfu`>pM&t7=ylU-4|^E>V?A!A2V!PHmvC5)nppIAcrHVaj$v#he*M0|dl5yCTjRuV
z<fzhf8X0V`_3Bh1gYIV7R>9aq0Igq!rn_JGg6&z|xGGFOpi{@%!Lr640M-#JYX=J)
z8wS=VWLEu6qdi_1E>-4~qeKlljvoeeW*rma*x_;tFh*;5!HHyAE<+l|N38J12I6I$
zh91$Cd}GJsFCH5{2^65w;FF6h!Aawaz6HmbAXXM=Aw#9(V8G7=vy)ENiU*12Lxu!1
zeEDtEg}~(WJEnR|8x8n1M^MaiQqpY9l+)xerkal~UD>a)Wn4W&!$V&^csn|e<QwS;
zlH1V1Be$X7M!tbg80miBigc^vM7q^0BHixnkS_IPNLTtPx7a1=X7_>|a>s$(mW}{9
z{98Qf{w<pv_RW|a_HCAQ`?g6A`G&avYacm`Ya2O?YZp1xs}ebcs}4EnWySUBL#`gu
z?|NtCxF8IjboXs$tNCor{n~@ijFzhT!tUkVMtQqZt&N@?dGeXjD=-F6qg&Z(u`qga
zWVD*C6kq_0J9w+rvs-m|_0s2!O?dRt#{zx4`V_n{d6%j1f-Kf3Wwlf&=Pw(_7_7_n
z<|e8el`CY5V7s!}R0$i>dLdWG@k*hdh3Yp8)1MhFRH3n6suq#=((=ry$<eDiFh&%k
zhOeeklmU>HY{G`$Wj`sG(oc1W5bbyMBmk9HtydBpZ;64CZ7~GL4~PM3d0)^DflB1*
zr^JJ65BOVXXG9zA0sq0Z+gl%&XK#B-Jh=AB7E7lzo0U%<P&I3g*(ukkJoSuv2mtII
zk1Fgv&!~q0z~1+$!ru9edI$h)-H#e<{ZEMp)y}?(w2c6b%nboXzGV#feWX>jGHTMO
z3|9>YfikK)1}sSyzgFa^kw?vHV7P{}je58Tjztr;VXT^zQIjTR_*NCMZ}j-;G#W=K
z60IvlIE?MdYK^+Il}+V?&#GNVFsas2j7!!L+N$L3Rm#uZraB5oC1Yu&61E@jP*jIa
zD^$mg%GBXDs(6VIc)1x+9k-}r)7z-xnGr;Cji`=WRCQ!PPI3c@ysV}STlCP$tyY8^
z(He@%sL4x(%4WT3jsMPeI3`_QNND3GZ5qM2EN#5i!f+NZEVOZsI>umC*FG{(EH5;)
zagI91U{%*XI#4VpJk(*1GCJL=t8IknZW6{aUq>*j(+)qQLi<Re7oV*{w!X1N?<3`s
zw3Xdz;&o)R3HObS2FBe^VXIc!sN$$EaN2v?I|_G#QCYi==m%P0^s3+<5SNxNq|UN0
zn%vi?JopnRCPh(PgunQ6UKGd0P*I!_#iAH4(y>s;KyTpSv`U{Z&a-#;q*5q7CW;La
z_~17R2{$4@gLlvngZZH|u}XP66HSh%{qsB0Y_cv@6Qv8KkW^^SZDmsvS7PfM#aMN2
zO{(O}u~MTLt4yp(sY^I#JD=LE#lvDL7g$=$RTp7QY{>6Bxo|8Pi%i#I;kmthG}%Bo
ziL@_|^cDY9EuZpjr~OQKCavn+UP<}NI9IABYf>2XWi~UZ-79H7M{jfnu+QwJ14|RR
zC}I*z`P9TF$iI@g7(?BF(bp&>r@V!9h{}ysrb^jLay=iW^m-<}v<5P~D;ELpY^{_@
zO{`1)rA?`lqw<o;rD%M)SXwn<&oyEGOdaf};DU9f=b9_&@@l5CyR(vB+)I?Gj!HI_
ztYv1V)V0MbGtFFOW+Ls+)Fl6mH?bEhQM+;0BSp)5`I&k}icYQNBZ+4ER@N1r@JZ3#
zsdyOuZHw1G&ur2b@BI2=hj>@K-M|ZR=}j!JPz;$$xhzGOR;9{J9ef@HpF=Fpt%1)s
zrRw7DJo7<g1!8$6J-?ctEjQ$OwQPFcn}Qh3N2dK<!zuf=4NfIi+TiSt|Iwa`Put*B
zVx$eu?)m*f+ZaWE7Td)q#>RLze4bCScw)KIoV&CfVVGA9L4JiC>eJ_`-A%}=k}|J*
zSQE|8u*&n=V(m_hGCQnzJHsl^Ym2ozamui=7;Lehe7NuAc(&h9fn*nB)o@nY=lwu(
zJDZx{$-o)g=A80Ll`@{6<wK3v%G8czyYJ_AB!9ye*NVo=sRcA%DV6lDq?Xq5(P_1|
zS-sdOLLH<#x92AEvJE(XEfvyBtVZI-9r7mOG@vz<PfCX-^mU8mpL;r!nwR6-SI$Rg
z;JjK|Q){7FZw>1c-8qMJxhAvb8LVmK8i*HLI$o#^4hc&>TCQi7?e{3fl!n?Wb*a@k
zLa|X<wzUrWDe28ZtvJ8FBuUlzJhN*@c2jF9yIf+b-+1;j$*|hcB*R!|+SFq*k766n
zeq{Bca#rU!^`c1zC+3YZI*a?(XPUkq<6+X%p{}pzX3P3IOkbyUWG5=mzOF)ft?DT~
zons$dt8ylG6i+Cxh0jo4tMf=luUO*<*Q)rtb6?q>J&1Lj#yvMjAr{7K<0r;j>Buf%
z+ufSo9%J`AdOzaruQKE$Z^3r|x46UE>@B5^vcqGb^K4V4=dg00u*Qv%-ccMgol2}(
zbGU(Sk4cGjJ4_0FADHZOIK$K;o|zq#{a}Ylq3;8eZA`bTqx5t;Jo|}jwZ^fJB|}}s
zi*J(*>%MPa56Hcgz+4&XfSp{4^)H}bP=ek<6ZQ_?g&Tzw_8O)FY|m%Px$69;<R7QK
zjl^<dI}sjl#HvtRz+Cl?TuC-_{_>85GQ+WEd|lj5H0P$EYZS%Sk&@%#QNs5raYYp8
zDD4XLtf1p`Nfc)q(7TF^W3TB7Hl)H0=v75xKoqlL0!E7wc+X*5s)p^Um>7nBR07{;
z@%=&rSi|GUx`u788QQkW;rk>iMG;~c-bB<6qVNMJLi=l>>hWOvYw;4LQ_!p_hGa2>
zv{|*L6(yYSU!U-JQ1Wz`3c-cQ^6){#HHje>OZXb!Is@|os3?|IeKEd16g5lphaa1C
z6FWN|j~6*3;)zv{2OdJONc^Eoq5V1f%IvM=0T{aIS@3wM(hGZS@S-S&7kVY2>dYkI
zW%w4hWcy6>@uF^;M`;gHT*CR@NU*hiP*jMXFUT&2B4{7_1dfIEx8tYwWoIeQpVs&(
z$nzcZ?<J_cHGXQZ`HuN_=aK)s#!uq`3@EQZ!jF#}Foop0aK$Sw#ASQM#P|aEo5o{;
zZ*QbU)n1@Di}69{hgMENYZwKGS$v=p*1yFE+9wu8LtPDn%L;!Z98>vOP?_V4E`{<_
zWTBuuAL4siS?@G|z0}|I&O?!0K>K)dioOs(o~bB$C!<Va4C6VpuqUs#fk6PN1b-MH
ze451%5jRXo>Fufc6DbSGzqgL@4R6ykE%0u_y`hdgVsshiPZd!=yoDkms*i4ZW-101
zr-rG2<esg39~bA4|HPugZ?k`?kSyHOBq}~q@_=jUeu(cWcoUE)l~hLO)1}iS#j>2z
zS60qmoGvb)eote6`a@yh@i5Eo!S@c-Ui;LaXj1(%%fLUi=xLq5OzRJqP(R&|v$v{$
z+E4YzQU8gCNxyl281zF*2i1%Cy^6evB1MJ2rT=k$RO#8%8X5SPA=D=q(VtXWi+I=h
zzbMZK7z~8M-%`Im{%F4%^2;phpK2)loyA{BMEg_w(VZ4n@N^#kR6oSPJnHxCbN!%C
z(SMPmQ1I+iKg2*}86-_MWO>T@soTF)zQO-60{(Acp#dp{PQ^(fIw`u2^5Av4)qcR^
zTU7R=de_Fm^AX@bkyG*P;-KX`H{l6ikmrZRgjqkuN9WUg!0H>Ges<nrT+8(eFoAcA
z80WA~3?4?r2RvA*P?leqd^(O<A*AHTsLT()>5o#9JSMnDHnUq#Zx`yPa+{l{=$D`)
e@|T<=?CVeXr#tZPn-2Fs;TLBFVFCUJNB;-H&C0I;

literal 0
HcmV?d00001

diff --git a/tests/serialization/test_arrow_compat.py b/tests/serialization/test_arrow_compat.py
index f5a99467..10e0dedc 100644
--- a/tests/serialization/test_arrow_compat.py
+++ b/tests/serialization/test_arrow_compat.py
@@ -27,6 +27,7 @@
     "9.0.0",
     "10.0.1",
     "11.0.0",
+    "13.0.0",
 ]
 
 

From 5715758e4dc0b51d604c96963eacf8dfa96a788c Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 19 Sep 2023 10:57:47 +0100
Subject: [PATCH 07/41] Change package version in docs to match environment.yml

---
 docs/environment-docs.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index c130e24e..6cdcc4d3 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -3,12 +3,12 @@ channels:
   - conda-forge
 dependencies:
   - python>=3.8
-  - dask[dataframe]
+  - dask[dataframe]!=2023.9.2
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
   - numpy!=1.15.0,!=1.16.0
-  - pandas>=0.23.0, !=1.0.0
+  - pandas>=0.23.0, !=1.0.0,!=2.1.0
   - pyarrow>=0.17.1,!=1.0.0
   - simplejson
   - minimalkv

From 1dbe22951d8ee0eb2cc95d04882f6c04478d537a Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 19 Sep 2023 17:08:15 +0100
Subject: [PATCH 08/41] Only use coerce timestamps arg on pyarrow>=13

---
 plateau/core/common_metadata.py   | 17 +++++++++++----
 plateau/core/index.py             | 28 ++++++++++++++++++++-----
 plateau/serialization/_csv.py     | 10 ++++++++-
 plateau/serialization/_parquet.py | 35 ++++++++++++++++++++++---------
 4 files changed, 70 insertions(+), 20 deletions(-)

diff --git a/plateau/core/common_metadata.py b/plateau/core/common_metadata.py
index 9cc6224d..c5433f6c 100644
--- a/plateau/core/common_metadata.py
+++ b/plateau/core/common_metadata.py
@@ -10,6 +10,7 @@
 import pyarrow.parquet as pq
 import simplejson
 from minimalkv import KeyValueStore
+from packaging import version
 
 from plateau.core import naming
 from plateau.core._compat import load_json
@@ -29,6 +30,8 @@
     "normalize_column_order",
 )
 
+PYARROW_LT_13 = version.parse(pa.__version__) < version.parse("13")
+
 
 class SchemaWrapper:
     """Wrapper object for pyarrow.Schema to handle forwards and backwards
@@ -753,6 +756,7 @@ def empty_dataframe_from_schema(
         Cast dates to objects.
     coerce_temporal_nanoseconds: bool
         Coerce date32, date64, duration and timestamp units to nanoseconds to retain behaviour of pandas 1.x.
+        Only applicable to pandas version >= 2.0 and PyArrow version >= 13.0.0.
 
     Returns
     -------
@@ -762,10 +766,15 @@ def empty_dataframe_from_schema(
     # HACK: Cast bytes to object in metadata until Pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/50127
     schema = schema_metadata_bytes_to_object(schema.internal())
 
-    df = schema.empty_table().to_pandas(
-        date_as_object=date_as_object,
-        coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
-    )
+    if PYARROW_LT_13:
+        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+        # as it was introduced for backwards compatibility with pandas 1.x
+        df = schema.empty_table().to_pandas(date_as_object=date_as_object)
+    else:
+        df = schema.empty_table().to_pandas(
+            date_as_object=date_as_object,
+            coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
+        )
 
     df.columns = df.columns.map(ensure_string_type)
     if columns is not None:
diff --git a/plateau/core/index.py b/plateau/core/index.py
index 3cfaa759..d6a79aeb 100644
--- a/plateau/core/index.py
+++ b/plateau/core/index.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
+from packaging import version
 from toolz.itertoolz import partition_all
 
 import plateau.core._time
@@ -37,6 +38,8 @@
     "PartitionIndex",
 )
 
+PYARROW_LT_13 = version.parse(pa.__version__) < version.parse("13")
+
 
 class IndexBase(CopyMixin):
     """Initialize an IndexBase.
@@ -142,8 +145,13 @@ def observed_values(
         """Return an array of all observed values."""
         keys = np.array(list(self.index_dct.keys()))
         labeled_array = pa.array(keys, type=self.dtype)
+
+        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+        # as it was introduced for backwards compatibility with pandas 1.x
         return np.array(
-            labeled_array.to_pandas(
+            labeled_array.to_pandas(date_as_object=date_as_object)
+            if PYARROW_LT_13
+            else labeled_array.to_pandas(
                 date_as_object=date_as_object,
                 coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
             )
@@ -483,9 +491,14 @@ def as_flat_series(
         table = _index_dct_to_table(
             self.index_dct, column=self.column, dtype=self.dtype
         )
-        df = table.to_pandas(
-            date_as_object=date_as_object, coerce_temporal_nanoseconds=True
-        )
+        if PYARROW_LT_13:
+            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+            # as it was introduced for backwards compatibility with pandas 1.x
+            df = table.to_pandas(date_as_object=date_as_object)
+        else:
+            df = table.to_pandas(
+                date_as_object=date_as_object, coerce_temporal_nanoseconds=True
+            )
 
         if predicates is not None:
             # If there is a conjunction without any reference to the index
@@ -871,7 +884,12 @@ def _parquet_bytes_to_dict(column: str, index_buffer: bytes):
     if column_type == pa.timestamp("us"):
         column_type = pa.timestamp("ns")
 
-    df = table.to_pandas(coerce_temporal_nanoseconds=True)
+    if PYARROW_LT_13:
+        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+        # as it was introduced for backwards compatibility with pandas 1.x
+        df = table.to_pandas()
+    else:
+        df = table.to_pandas(coerce_temporal_nanoseconds=True)
 
     index_dct = dict(
         zip(df[column].values, (list(x) for x in df[_PARTITION_COLUMN_NAME].values))
diff --git a/plateau/serialization/_csv.py b/plateau/serialization/_csv.py
index 0cab490e..f8be37ab 100644
--- a/plateau/serialization/_csv.py
+++ b/plateau/serialization/_csv.py
@@ -8,6 +8,7 @@
 import pandas as pd
 import pyarrow as pa
 from minimalkv import KeyValueStore
+from packaging import version
 from pandas.errors import EmptyDataError
 
 from ._generic import (
@@ -18,6 +19,8 @@
     filter_df_from_predicates,
 )
 
+PYARROW_LT_13 = version.parse(pa.__version__) < version.parse("13")
+
 
 class CsvSerializer(DataFrameSerializer):
     def __init__(self, compress=True):
@@ -85,7 +88,12 @@ def restore_dataframe(
 
     def store(self, store, key_prefix, df):
         if isinstance(df, pa.Table):
-            df = df.to_pandas(coerce_temporal_nanoseconds=True)
+            if PYARROW_LT_13:
+                # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+                # as it was introduced for backwards compatibility with pandas 1.x
+                df = df.to_pandas()
+            else:
+                df = df.to_pandas(coerce_temporal_nanoseconds=True)
         key = f"{key_prefix}.csv"
         result_stream = BytesIO()
         iostream: BufferedIOBase
diff --git a/plateau/serialization/_parquet.py b/plateau/serialization/_parquet.py
index 553547dd..e70b9cee 100644
--- a/plateau/serialization/_parquet.py
+++ b/plateau/serialization/_parquet.py
@@ -41,6 +41,7 @@
 BACKOFF_TIME = 0.01  # 10 ms
 PYARROW_LT_6 = version.parse(pa.__version__) < version.parse("6")
 PYARROW_LT_8 = version.parse(pa.__version__) < version.parse("8")
+PYARROW_LT_13 = version.parse(pa.__version__) < version.parse("13")
 
 # Since pyarrow 6, the Parquet version/features can be selected more granular.
 # Version 2.0 is equal to 2.4 but 2.4 doesn't trigger deprecation warnings.
@@ -256,14 +257,23 @@ def _restore_dataframe(
                     # ARROW-5139 Column projection with empty columns returns a table w/out index
                     if columns == []:
                         # Create an arrow table with expected index length.
-                        df = (
-                            parquet_file.schema.to_arrow_schema()
-                            .empty_table()
-                            .to_pandas(
-                                date_as_object=date_as_object,
-                                coerce_temporal_nanoseconds=True,
+                        if PYARROW_LT_13:
+                            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+                            # as it was introduced for backwards compatibility with pandas 1.x
+                            df = (
+                                parquet_file.schema.to_arrow_schema()
+                                .empty_table()
+                                .to_pandas(date_as_object=date_as_object)
+                            )
+                        else:
+                            df = (
+                                parquet_file.schema.to_arrow_schema()
+                                .empty_table()
+                                .to_pandas(
+                                    date_as_object=date_as_object,
+                                    coerce_temporal_nanoseconds=True,
+                                )
                             )
-                        )
                         index = pd.Index(
                             pd.RangeIndex(start=0, stop=parquet_file.metadata.num_rows),
                             dtype="int64",
@@ -290,9 +300,14 @@ def _restore_dataframe(
         # HACK: Cast bytes to object in metadata until Pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/50127
         table = table.cast(schema_metadata_bytes_to_object(table.schema))
 
-        df = table.to_pandas(
-            date_as_object=date_as_object, coerce_temporal_nanoseconds=True
-        )
+        if PYARROW_LT_13:
+            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+            # as it was introduced for backwards compatibility with pandas 1.x
+            df = table.to_pandas(date_as_object=date_as_object)
+        else:
+            df = table.to_pandas(
+                date_as_object=date_as_object, coerce_temporal_nanoseconds=True
+            )
 
         # XXX: Patch until Pyarrow bug is resolved: https://issues.apache.org/jira/browse/ARROW-18099?filter=-2
         if categories:

From 062f28375b79d1a5f623bc267b09438bebd2221b Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 09:43:25 +0100
Subject: [PATCH 09/41] Remove pyarrow<8 tests from ci.yml

---
 .github/workflows/ci.yml | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 52a302f4..124630d4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,25 +25,13 @@ jobs:
       matrix:
         numfocus_nightly: [false]
         os: ["ubuntu-latest"]
-        pyarrow: ["3.0.0", "4.0.1", "nightly"]
+        pyarrow: ["nightly"]
         python: ["3.8"]
         include:
           - numfocus_nightly: true
             os: "ubuntu-latest"
-            pyarrow: "4.0.1"
-            python: "3.10"
-          - numfocus_nightly: false
-            os: "ubuntu-latest"
-            pyarrow: "5.0.0"
-            python: "3.9"
-          - numfocus_nightly: false
-            os: "ubuntu-latest"
-            pyarrow: "6.0.1"
-            python: "3.9"
-          - numfocus_nightly: false
-            os: "ubuntu-latest"
-            pyarrow: "7.0.0"
-            python: "3.10"
+            pyarrow: "13.0.0"
+            python: "3.11"
           - numfocus_nightly: false
             os: "ubuntu-latest"
             pyarrow: "8.0.1"
@@ -60,10 +48,18 @@ jobs:
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
             python: "3.11"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "12.0.0"
+            python: "3.11"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "13.0.0"
+            python: "3.11"
           - numfocus_nightly: false
             os: "macos-latest"
-            pyarrow: "4.0.1"
-            python: "3.8"
+            pyarrow: "13.0.0"
+            python: "3.11"
     continue-on-error: ${{ matrix.numfocus_nightly || matrix.pyarrow == 'nightly' }}
 
     runs-on: ${{ matrix.os }}

From f909ffeaf218e39e4f59d607c34c3de80916388e Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 10:02:55 +0100
Subject: [PATCH 10/41] Generate arrow-compat reference data for 12.0.0

---
 reference-data/arrow-compat/12.0.0.parquet | Bin 0 -> 18562 bytes
 tests/serialization/test_arrow_compat.py   |   1 +
 2 files changed, 1 insertion(+)
 create mode 100644 reference-data/arrow-compat/12.0.0.parquet

diff --git a/reference-data/arrow-compat/12.0.0.parquet b/reference-data/arrow-compat/12.0.0.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..33c0b3b4de07c72b1e9e76e975b435dac1e59256
GIT binary patch
literal 18562
zcmcg!Z){uD6@LyP4JC|FYR`eJWZ806)=>Y+pC(-gJlnD3)J~H$wVfbT`Pq)+)OJGb
z#BrcfH8F%HK1>K9G$Ax0H0@7?5JFQmp^5<?_%KaqAJ8;S`!r2Vs?a_lgeHV`&b{yd
zyU%tSyD4?vyXT&Be)pVn&pr28WkFo>IlG;`&c2wl%PAZagzi2;xb}ooaGr1ukHT2L
zv&Y#p=<dc*_URH%>fQuFxPPh(KD*!#eBR~g63BJOptE;iqF?A4n7S{$y`8RKUMp^<
zt7HD|Vxdy)E>v?RXP2wX^}56LmV<anZzBx@qX1EGb_wv&55EKf-us=$70A7VFa$sU
zwZiUIC~ox$;H%HtD9p1i<B#2g&ZC@VOpcqD@v+18X9s!XF7id!pe9p$veC(1n86ng
zz-9FXj=H<?6Mdn*aToFNkwIsdyoh_5FAC+VZ_=~`=7~>w$gj}bnoN_1I|BK4M|qHK
zg6Hjf$P4!z7_uRJaJBejv2?FOajQ@0O6w>vTf^tZeS^+!i{)*C=k-2v>s|;Rt85#M
zcf=Dn`!!D-RfA`W8#^D~Pkwp-fgRB_+$Wod_|G5IQ1&sDJG>Zw-r;)5L4JLVeEYzl
zN$l-e(<Fo!1RpxN*et=!u>JEm`NQ#+u(cGxfR7VU++cm#tp%&jhL&h$dE$-lk}q6{
zlPA7ow*31;nl0s2hJH4OBQxb!1LQ9QEt#^<*{>WWJ;(8Q`RG-1CV-W8ee|ju{&J1?
z()R|Pea3@gr(DQvW^<;Ua?Rm-(LvrBB+m~bG({%IVuJHK&SQ803+w=PINT10OE^h?
z1BfGTdfYl3<b_8Dox;EfoVD(5X>+qk57q_OGI{sJIpEQ=4ZuwJBOjqBoukUSkH`8!
z=1)KSQ_pJe1ONG1ZcaGz_0fKqdd%rU5UdIS#Qo+uZp#P=x@SlR2erV*-&bt|`0CXh
z#rKKB^{IorKSaKK1Px&k!V$|clBsfeE<|&<0kgXsN7*NsPxofR`N|K~;Bt3o(=yIq
zI9#te$Xh=k-wZ=)vlYd~X4s;_wZH!M_vyRQ{6>7e3I)mqc@E%IpS4jF-Y<!|Vcka&
zZ?#a$O^#FR-f*}+aFBnEkoQKg9+AarPc{m;t1u*Jxi&PeKvxZ-g|$l2g!pfds#T%;
zh+Gd+xXj3(pClig1S4zwCTU!FMk9kpxehc!<Wzjr2J(EcjQj+@^uc>`RILDI$Oa3)
zeTsbhqmER3M)>yim=W&l9xaCNrYE4zzSmEa=TEoge7-$!pZSd7#{4>P|Mrrb-UEX(
zIA5uptUFAhXw?-u-#Th?{*7^MhoLnMuK&##`P$!+dhPbC`|=shy4`pv>Wus5B>CUO
zfsG62L=0?+mqN^1@^QpY0vM7HAVDB6PU}>ccZ$jhlpvo@k$0ya<z3$lSp_{|_S&Sm
z?9*?{TW2+mJ*?ED%iVKbbC6fgl3UX)9sGv_@zpsK2sxlBh&RrWuYTMTh?Zg)tkFDP
zUwcA}S85eGU|D*7=W%lFaYV!u#7(V-GtYaXW}drgnWnkuA5W6^o@~iF{MyX0MPNxv
z9c9dhH-cK_aYU}iD2A%*Ir4dcydOXa3NOzD1K;>31b0s*vz{xZos+^um)qFRfYPWZ
zb`HXKqT5YNJ?v8Sj&!+ci^Du0{lS@kR6k#TMd!}$Qf7|~#jZct{{SlQa%*fDg&ZX+
zn{l=cq;5qC&emQ08jBt|0f>5KM7m3aZ?XxW8&`sP2DIt;nk{AIA>bRb@-<u3$RO}N
zDf8(s5=~&baCtJD90zJxaZKK)7;!|1VT;Ns-~g8Bf|Js)8ip*3qgcR=^u>xe4UMm>
z=thpmUOF~73KXDWY$>&q;KaybZ<FIp5Igm?(4f+BaLmsHvx7|5isyvpG=>Bdc=3JI
zg}~(OI-+_=8y)j&_NSQToTFKnDW`jXM714Vy0RT*%ecA*1_!=*xZ{08a_2jGq|^O3
z(&=s)xzl|t(&4TX=}5nbbfnWmI@*^Zo#?2L_IE>Wv&YfN-UDfKzkuA4`#fp?9h`Lj
zj!fEpuO;oii;_;?6-k@#cL%?;k#=0VNINb~q^%bn(uRu)IqX%#b?bzyi}bo~4jmVS
zfl+tQR=S)`SKMzt{OoX{oXzcB&TJNUO6AJ%*`cSN9lio%@HD)gF6VQ@qeH{xbSVb|
zSi8YnrJCNZ!mF1)Z*IY(k3Qz;<JG6(g~_{2h390kMk#BBTrqpuIL2UIuGY6u-LPE!
zQUp7tt-4Csls0miDvp<O)ie~hX_)@(aIOqJ@j^L|yqA{4k4+6<)qydh7&d%wjG_#H
ztYi~5{G$0Oxy$%;iwMzvS4RP;ylTCY$?=957}*xX<oF>mK&kGVv_qg0dHN~w@Y=`x
z4YV_&jrKAB;kDaaAC_ltdrCaK_NfL-r!<?DPaRS<Yi-#n*Qh-8jM@eO_Krst_MT_d
zHUO~qJ*u#GKBKk)fUWycgRTE5@vz$2w}`e8ppm&Dz{t0a0b3PT)yk+zqcU7J90ba!
z?ijEnS!{!F)X1Y|H85Pk*+xCw1IMBXyDU~s%BV?`GJLy=*!OXKbsCMM6p7XqA{@r{
zRJlT3+Q_Ez!DrR3BbZd{D8?o02u)S;_A2G)Zc`nFqmr>SQVH9ScPOgErWLB=MrG=7
z6IHxK2)x`3sE%7yvFS}z@yrM!xkgmSEvi~FASbzjL|#@?hAn#N<VGvPjc5%;Wz^)Q
zTxqLXx5j^KI~<cPFC?^alQxZDT$VQ8Xkj>u7Z%z$M;&9Zs%st@D3%u*+BioYW3Z}g
z9vvu_6CUa?M;V=N)zvgYbT<iOnXe<5)oF*HQK5aL(2LJ@E?wQ+ruUIzLE27l*YP^C
zRfqe=W)0(RH@96WY?g7<=RfT|?Hz_Y!LY1dM?~7#@KwRxCoU~rNS<Y16S=QXd+=v0
zpAbcH5&q)Oc~Kk}19>qlig_`Zr(=PDfnLMG8I?YtpJ(szNu^MFR1|9>@WF2s60C)Q
z2JfID3iAVJqNU=_Y9uk9^3U%|vx%xyju$Qz0#dF%x1CN-T#0UM=A-4gb*YpsMhmrk
zv^23UB`@KeoosTi5(|oj%-GU;ro0GaVoiSE%><*9(a=mK8l2nDMiMoY6HocFNMG?!
zSF%apPRh@8uclO;J1a?F5$8(fL`4dszSXVO<ldE(pQASt2JGSe)Y#HQCW4s6LN+<E
z1@f<~UW}q{!04;x64TyXDnR8%OVfpPDY20aQhIeYwX_a0yek(0?`);8nw;2>{7YL>
zDMRHY5=)WTa=x%;!k($a{BRZQr{IEhrRM4@sp8sdX>WHWwYVQIP#vXoGErF#PpfN-
zmcsQ+DLj$#uT~^~*c;!E7O34g>yaYG{cN~ek|NXV*-*Tmx}9}JCVWz4Z#ou4f7{~q
z&oi4e#XG;T*dpE)Zzu3VTzccnD-^?OsaTXEOKVaoTm_#`g3loq=hngJTT*#(Z=U&}
zwgR!dlA2%3&K7I(yh=JX?@dAsW<xXnj^UJj+XSZ)D@|~A#{Xzf#ivbhDlyUoXXpHW
zp=pewKl9Dv6Juk%6F$!-Sv;{^Y0h0*jxfwC2Oz&f4)y8t)ZP~4RY{rGIjr$|YgpxZ
zO|f<+MwuN}yscrC=QYLJnK)%wSqwJVPd?mtay;Abr?Er_W7Tk0+UNbT#7;Unzq<-&
zY?E`!CzXnLeiqvruchf-$#&n*?MnWdEv^-fms1O9yizjZT}dvjXCpIeZL@Z<mWMh>
zcW%#3WMvz0{94GRmROC%jXUH`z-d5hD4&!HOz7(t$v^kZYI0tVZ(lJR3B!4{w64}d
zv)&5UC%SVE>2ghG%`;fj$TbizwsgEu8?*^aHd3svF5B->iYW!PRq|4!bA)1}vTSP|
z^i#r{hFWocV@Z<A^I2xsmh7h1Qg*q-RKM};XOdyHp-G0Z&a|n=WFEyfoc+k^Mdhr{
zaOy>q3{K1&WwaLejn6cFJ;uYNr$b#|&CC|{b(p?RYspSjo_$?~@*34sdOF8GxK`y%
zY$={lUIU+@yhi7dj$X3H5w2D7ck8~gJ$n%AIE{O5jzTPq*Tzqbx73ndz_xo8yFJG4
zcl3V5+h0Y<N#308{%>%Hv)Nlp9c72dK<C+}OwVEEK4Fa;BfX_KW;&Hvv*vIE-5!$?
z>votF`T;Q6=WvFpK|C`%DEq+<lR`fLCfk^9R!8aSc6bgF*J_PpA4`V1iWlD|8P<K@
zydIEyDPwa*r~`I01=hcSenA0x3w78#co(kalGtmQ9%FlcwU{Z-Z%O`f+S`aPM|a}E
z@mjPDwFS&o@5rS@J>xI#N+>fJt;aURop^n28oEXiY#k{%9v&rppB7g{agNfiK+g&~
zPM1V6T!Y?KXdHV@SFj-!tU<3T6a}J~7UM9Q55ao|+fo&5PesKb^rPbVK8x=cYQP#C
zN7i+0bA@T!DueG+s1!wrL3k5UJBY#$oCxi&g|f$k?XSg4lukjjrWlgN0McgFnpTu>
zzJFuF<3Y(YK`I0nBFn=E5!WOJSS;adZ2Jt%2cV)@RP{yq`cTv?%^!Yj(oO8{dOTj_
z42h@KJRW!mz#{R7E`|2z=qt0gk_TYuqG!S5p-M08H^Gad7+&a=fT}Z7fS2K0*p}@x
z!^exdX&$9LL~#k{dqb0r?SrBM^n5{fF%Uxg&?j&#tiKsQwJ$qMasG_PPeGn<nSVb{
z?XB@sd(F4Zzc-Kk=QVyB4`4uf{ULsQWPm9k*M%!yaUmw#D=Nkoz~3|;<9vG~Eu!`U
z#aWCGIzO;-3R=S`ILP7ym9YLTKF~g~C>rW&7+hBPYr&|>&w|PvUvw#ypCSuP%JU(<
zmzDL-@YhTIP47Gu$py5JC!^>K@Z*_^qIWXNBt|iw0}K1|dTSU2kV^1}0m7$Q{19=&
z1eD&Mnm>`Ufc*O#7~k+V6K;Ta3+@e7<PjswFn>Ca`r$1Q3Q&D?(=$^ss5mu9{Ui5m
z<@>lehy14&6@HujONFH2o+eT8p@IioOZP);U%?xPM5&}QI-f3`CMlNXl)kcZ_TqGL
z0rh)o2h$%20*{ATb|1cXp!PbT{&=104=)4%^rEM6{xYpUSU~-BKhEB+{uw{jA4C18
zY9{^W{bA4#DIHWV;`b`@CJN;h{)Ya?`4OdOPiti0UxrYhT10<RX$|6C=l{GsA7C&L
z3V%cW`uL;$YRE6MsDHYq@V6F!0TJy_?MHW7P{GrB{8Rl91M{fgbHMe3K1KgUibBD2
zK>ZK{p=FRXRg>i@=cjJ}Quzk|!wC4lhJ^;C6gm|r1?Z&62FioinMV5ok8e@gkLq0;
z2hWFq|5QfBvxkG0^W20dctM^Y6ys+76d#>W^8u@Gc>39ShjA^}E5HQa&10OyIx%<{
z5g+hir9xSLVe;uXW`%%~A0sk9{H8xjN%9lHJ+zhHer6|EeJr!J^%(sUbV&Y^bBKNY
X3I8$&{`pdS{}Y}*BM1xde;@ikLi3(C

literal 0
HcmV?d00001

diff --git a/tests/serialization/test_arrow_compat.py b/tests/serialization/test_arrow_compat.py
index 10e0dedc..087d64ca 100644
--- a/tests/serialization/test_arrow_compat.py
+++ b/tests/serialization/test_arrow_compat.py
@@ -27,6 +27,7 @@
     "9.0.0",
     "10.0.1",
     "11.0.0",
+    "12.0.0",
     "13.0.0",
 ]
 

From f0f06622ef79cf696209abf14d474fff7cadc868 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 10:09:46 +0100
Subject: [PATCH 11/41] Avoid pandas 2.1.0 in numfocus nightly ci test

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 124630d4..657ca4b8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,6 +30,7 @@ jobs:
         include:
           - numfocus_nightly: true
             os: "ubuntu-latest"
+            pandas: "2.0.3" # Avoid due to bug in Pandas 2.1.0 (#55014)
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false

From 3c56634e8ac13d626716a4f0aea1e7870c2b3fa7 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 10:55:31 +0100
Subject: [PATCH 12/41] Avoid pandas 2.1.0.* in numfocus_nightly pip install

---
 .github/workflows/ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 657ca4b8..c5b4f7ae 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,7 +30,6 @@ jobs:
         include:
           - numfocus_nightly: true
             os: "ubuntu-latest"
-            pandas: "2.0.3" # Avoid due to bug in Pandas 2.1.0 (#55014)
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false
@@ -97,10 +96,11 @@ jobs:
         # nightlies and the latest release would otherwise work together.
         run: micromamba update -c arrow-nightlies -c conda-forge arrow-cpp pyarrow
         if: matrix.pyarrow == 'nightly'
-      - name: Pip Instal NumFOCUS nightly
+      - name: Pip Install NumFOCUS nightly
         # NumFOCUS nightly wheels, contains numpy and pandas
         # TODO(gh-45): Re-add numpy
-        run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas
+        # TODO: Remove pandas version stipulation once https://github.com/pandas-dev/pandas/issues/55014 is fixed
+        run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas!=2.1.0.*"
         if: matrix.numfocus_nightly
       - name: Test import
         run: |

From 60079ee98dc63817d2e2dedd3e1590197be95491 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 14:57:10 +0100
Subject: [PATCH 13/41] Add changelog entry and update setup.cfg

---
 CHANGES.rst | 6 ++++++
 setup.cfg   | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 82747f66..f781b7d3 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,12 @@
 Changelog
 =========
 
+Plateau 4.2.0 (unreleased)
+==========================
+
+* Support pandas 2
+* No longer test for pyarrow < 8
+
 Plateau 4.1.5 (2023-03-14)
 ==========================
 
diff --git a/setup.cfg b/setup.cfg
index 8c54f0e8..d822c11f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,12 +13,12 @@ classifiers =
 [options]
 include_package_data = true
 install_requires =
-    dask[dataframe]!=2021.5.1,!=2021.6.0  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+    dask[dataframe]!=2021.5.1,!=2021.6.0,!=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
     decorator
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash
     numpy!=1.15.0,!=1.16.0
-    pandas>=0.23.0, !=1.0.0
+    pandas>=0.23.0,!=1.0.0,!=2.1.0
     pyarrow>=0.17.1,!=1.0.0
     simplejson
     minimalkv>=1.4.2

From 2ccb2feedab16aac2cbb3f681760f56a87e3ece7 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 15:25:32 +0100
Subject: [PATCH 14/41] Shrink PR

---
 .github/workflows/ci.yml                   |  12 ++----------
 reference-data/arrow-compat/12.0.0.parquet | Bin 18562 -> 0 bytes
 tests/serialization/test_arrow_compat.py   |   1 -
 3 files changed, 2 insertions(+), 11 deletions(-)
 delete mode 100644 reference-data/arrow-compat/12.0.0.parquet

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c5b4f7ae..351cbb35 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,7 +30,7 @@ jobs:
         include:
           - numfocus_nightly: true
             os: "ubuntu-latest"
-            pyarrow: "13.0.0"
+            pyarrow: "11.0.0"
             python: "3.11"
           - numfocus_nightly: false
             os: "ubuntu-latest"
@@ -48,17 +48,9 @@ jobs:
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
             python: "3.11"
-          - numfocus_nightly: false
-            os: "ubuntu-latest"
-            pyarrow: "12.0.0"
-            python: "3.11"
-          - numfocus_nightly: false
-            os: "ubuntu-latest"
-            pyarrow: "13.0.0"
-            python: "3.11"
           - numfocus_nightly: false
             os: "macos-latest"
-            pyarrow: "13.0.0"
+            pyarrow: "11.0.0"
             python: "3.11"
     continue-on-error: ${{ matrix.numfocus_nightly || matrix.pyarrow == 'nightly' }}
 
diff --git a/reference-data/arrow-compat/12.0.0.parquet b/reference-data/arrow-compat/12.0.0.parquet
deleted file mode 100644
index 33c0b3b4de07c72b1e9e76e975b435dac1e59256..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 18562
zcmcg!Z){uD6@LyP4JC|FYR`eJWZ806)=>Y+pC(-gJlnD3)J~H$wVfbT`Pq)+)OJGb
z#BrcfH8F%HK1>K9G$Ax0H0@7?5JFQmp^5<?_%KaqAJ8;S`!r2Vs?a_lgeHV`&b{yd
zyU%tSyD4?vyXT&Be)pVn&pr28WkFo>IlG;`&c2wl%PAZagzi2;xb}ooaGr1ukHT2L
zv&Y#p=<dc*_URH%>fQuFxPPh(KD*!#eBR~g63BJOptE;iqF?A4n7S{$y`8RKUMp^<
zt7HD|Vxdy)E>v?RXP2wX^}56LmV<anZzBx@qX1EGb_wv&55EKf-us=$70A7VFa$sU
zwZiUIC~ox$;H%HtD9p1i<B#2g&ZC@VOpcqD@v+18X9s!XF7id!pe9p$veC(1n86ng
zz-9FXj=H<?6Mdn*aToFNkwIsdyoh_5FAC+VZ_=~`=7~>w$gj}bnoN_1I|BK4M|qHK
zg6Hjf$P4!z7_uRJaJBejv2?FOajQ@0O6w>vTf^tZeS^+!i{)*C=k-2v>s|;Rt85#M
zcf=Dn`!!D-RfA`W8#^D~Pkwp-fgRB_+$Wod_|G5IQ1&sDJG>Zw-r;)5L4JLVeEYzl
zN$l-e(<Fo!1RpxN*et=!u>JEm`NQ#+u(cGxfR7VU++cm#tp%&jhL&h$dE$-lk}q6{
zlPA7ow*31;nl0s2hJH4OBQxb!1LQ9QEt#^<*{>WWJ;(8Q`RG-1CV-W8ee|ju{&J1?
z()R|Pea3@gr(DQvW^<;Ua?Rm-(LvrBB+m~bG({%IVuJHK&SQ803+w=PINT10OE^h?
z1BfGTdfYl3<b_8Dox;EfoVD(5X>+qk57q_OGI{sJIpEQ=4ZuwJBOjqBoukUSkH`8!
z=1)KSQ_pJe1ONG1ZcaGz_0fKqdd%rU5UdIS#Qo+uZp#P=x@SlR2erV*-&bt|`0CXh
z#rKKB^{IorKSaKK1Px&k!V$|clBsfeE<|&<0kgXsN7*NsPxofR`N|K~;Bt3o(=yIq
zI9#te$Xh=k-wZ=)vlYd~X4s;_wZH!M_vyRQ{6>7e3I)mqc@E%IpS4jF-Y<!|Vcka&
zZ?#a$O^#FR-f*}+aFBnEkoQKg9+AarPc{m;t1u*Jxi&PeKvxZ-g|$l2g!pfds#T%;
zh+Gd+xXj3(pClig1S4zwCTU!FMk9kpxehc!<Wzjr2J(EcjQj+@^uc>`RILDI$Oa3)
zeTsbhqmER3M)>yim=W&l9xaCNrYE4zzSmEa=TEoge7-$!pZSd7#{4>P|Mrrb-UEX(
zIA5uptUFAhXw?-u-#Th?{*7^MhoLnMuK&##`P$!+dhPbC`|=shy4`pv>Wus5B>CUO
zfsG62L=0?+mqN^1@^QpY0vM7HAVDB6PU}>ccZ$jhlpvo@k$0ya<z3$lSp_{|_S&Sm
z?9*?{TW2+mJ*?ED%iVKbbC6fgl3UX)9sGv_@zpsK2sxlBh&RrWuYTMTh?Zg)tkFDP
zUwcA}S85eGU|D*7=W%lFaYV!u#7(V-GtYaXW}drgnWnkuA5W6^o@~iF{MyX0MPNxv
z9c9dhH-cK_aYU}iD2A%*Ir4dcydOXa3NOzD1K;>31b0s*vz{xZos+^um)qFRfYPWZ
zb`HXKqT5YNJ?v8Sj&!+ci^Du0{lS@kR6k#TMd!}$Qf7|~#jZct{{SlQa%*fDg&ZX+
zn{l=cq;5qC&emQ08jBt|0f>5KM7m3aZ?XxW8&`sP2DIt;nk{AIA>bRb@-<u3$RO}N
zDf8(s5=~&baCtJD90zJxaZKK)7;!|1VT;Ns-~g8Bf|Js)8ip*3qgcR=^u>xe4UMm>
z=thpmUOF~73KXDWY$>&q;KaybZ<FIp5Igm?(4f+BaLmsHvx7|5isyvpG=>Bdc=3JI
zg}~(OI-+_=8y)j&_NSQToTFKnDW`jXM714Vy0RT*%ecA*1_!=*xZ{08a_2jGq|^O3
z(&=s)xzl|t(&4TX=}5nbbfnWmI@*^Zo#?2L_IE>Wv&YfN-UDfKzkuA4`#fp?9h`Lj
zj!fEpuO;oii;_;?6-k@#cL%?;k#=0VNINb~q^%bn(uRu)IqX%#b?bzyi}bo~4jmVS
zfl+tQR=S)`SKMzt{OoX{oXzcB&TJNUO6AJ%*`cSN9lio%@HD)gF6VQ@qeH{xbSVb|
zSi8YnrJCNZ!mF1)Z*IY(k3Qz;<JG6(g~_{2h390kMk#BBTrqpuIL2UIuGY6u-LPE!
zQUp7tt-4Csls0miDvp<O)ie~hX_)@(aIOqJ@j^L|yqA{4k4+6<)qydh7&d%wjG_#H
ztYi~5{G$0Oxy$%;iwMzvS4RP;ylTCY$?=957}*xX<oF>mK&kGVv_qg0dHN~w@Y=`x
z4YV_&jrKAB;kDaaAC_ltdrCaK_NfL-r!<?DPaRS<Yi-#n*Qh-8jM@eO_Krst_MT_d
zHUO~qJ*u#GKBKk)fUWycgRTE5@vz$2w}`e8ppm&Dz{t0a0b3PT)yk+zqcU7J90ba!
z?ijEnS!{!F)X1Y|H85Pk*+xCw1IMBXyDU~s%BV?`GJLy=*!OXKbsCMM6p7XqA{@r{
zRJlT3+Q_Ez!DrR3BbZd{D8?o02u)S;_A2G)Zc`nFqmr>SQVH9ScPOgErWLB=MrG=7
z6IHxK2)x`3sE%7yvFS}z@yrM!xkgmSEvi~FASbzjL|#@?hAn#N<VGvPjc5%;Wz^)Q
zTxqLXx5j^KI~<cPFC?^alQxZDT$VQ8Xkj>u7Z%z$M;&9Zs%st@D3%u*+BioYW3Z}g
z9vvu_6CUa?M;V=N)zvgYbT<iOnXe<5)oF*HQK5aL(2LJ@E?wQ+ruUIzLE27l*YP^C
zRfqe=W)0(RH@96WY?g7<=RfT|?Hz_Y!LY1dM?~7#@KwRxCoU~rNS<Y16S=QXd+=v0
zpAbcH5&q)Oc~Kk}19>qlig_`Zr(=PDfnLMG8I?YtpJ(szNu^MFR1|9>@WF2s60C)Q
z2JfID3iAVJqNU=_Y9uk9^3U%|vx%xyju$Qz0#dF%x1CN-T#0UM=A-4gb*YpsMhmrk
zv^23UB`@KeoosTi5(|oj%-GU;ro0GaVoiSE%><*9(a=mK8l2nDMiMoY6HocFNMG?!
zSF%apPRh@8uclO;J1a?F5$8(fL`4dszSXVO<ldE(pQASt2JGSe)Y#HQCW4s6LN+<E
z1@f<~UW}q{!04;x64TyXDnR8%OVfpPDY20aQhIeYwX_a0yek(0?`);8nw;2>{7YL>
zDMRHY5=)WTa=x%;!k($a{BRZQr{IEhrRM4@sp8sdX>WHWwYVQIP#vXoGErF#PpfN-
zmcsQ+DLj$#uT~^~*c;!E7O34g>yaYG{cN~ek|NXV*-*Tmx}9}JCVWz4Z#ou4f7{~q
z&oi4e#XG;T*dpE)Zzu3VTzccnD-^?OsaTXEOKVaoTm_#`g3loq=hngJTT*#(Z=U&}
zwgR!dlA2%3&K7I(yh=JX?@dAsW<xXnj^UJj+XSZ)D@|~A#{Xzf#ivbhDlyUoXXpHW
zp=pewKl9Dv6Juk%6F$!-Sv;{^Y0h0*jxfwC2Oz&f4)y8t)ZP~4RY{rGIjr$|YgpxZ
zO|f<+MwuN}yscrC=QYLJnK)%wSqwJVPd?mtay;Abr?Er_W7Tk0+UNbT#7;Unzq<-&
zY?E`!CzXnLeiqvruchf-$#&n*?MnWdEv^-fms1O9yizjZT}dvjXCpIeZL@Z<mWMh>
zcW%#3WMvz0{94GRmROC%jXUH`z-d5hD4&!HOz7(t$v^kZYI0tVZ(lJR3B!4{w64}d
zv)&5UC%SVE>2ghG%`;fj$TbizwsgEu8?*^aHd3svF5B->iYW!PRq|4!bA)1}vTSP|
z^i#r{hFWocV@Z<A^I2xsmh7h1Qg*q-RKM};XOdyHp-G0Z&a|n=WFEyfoc+k^Mdhr{
zaOy>q3{K1&WwaLejn6cFJ;uYNr$b#|&CC|{b(p?RYspSjo_$?~@*34sdOF8GxK`y%
zY$={lUIU+@yhi7dj$X3H5w2D7ck8~gJ$n%AIE{O5jzTPq*Tzqbx73ndz_xo8yFJG4
zcl3V5+h0Y<N#308{%>%Hv)Nlp9c72dK<C+}OwVEEK4Fa;BfX_KW;&Hvv*vIE-5!$?
z>votF`T;Q6=WvFpK|C`%DEq+<lR`fLCfk^9R!8aSc6bgF*J_PpA4`V1iWlD|8P<K@
zydIEyDPwa*r~`I01=hcSenA0x3w78#co(kalGtmQ9%FlcwU{Z-Z%O`f+S`aPM|a}E
z@mjPDwFS&o@5rS@J>xI#N+>fJt;aURop^n28oEXiY#k{%9v&rppB7g{agNfiK+g&~
zPM1V6T!Y?KXdHV@SFj-!tU<3T6a}J~7UM9Q55ao|+fo&5PesKb^rPbVK8x=cYQP#C
zN7i+0bA@T!DueG+s1!wrL3k5UJBY#$oCxi&g|f$k?XSg4lukjjrWlgN0McgFnpTu>
zzJFuF<3Y(YK`I0nBFn=E5!WOJSS;adZ2Jt%2cV)@RP{yq`cTv?%^!Yj(oO8{dOTj_
z42h@KJRW!mz#{R7E`|2z=qt0gk_TYuqG!S5p-M08H^Gad7+&a=fT}Z7fS2K0*p}@x
z!^exdX&$9LL~#k{dqb0r?SrBM^n5{fF%Uxg&?j&#tiKsQwJ$qMasG_PPeGn<nSVb{
z?XB@sd(F4Zzc-Kk=QVyB4`4uf{ULsQWPm9k*M%!yaUmw#D=Nkoz~3|;<9vG~Eu!`U
z#aWCGIzO;-3R=S`ILP7ym9YLTKF~g~C>rW&7+hBPYr&|>&w|PvUvw#ypCSuP%JU(<
zmzDL-@YhTIP47Gu$py5JC!^>K@Z*_^qIWXNBt|iw0}K1|dTSU2kV^1}0m7$Q{19=&
z1eD&Mnm>`Ufc*O#7~k+V6K;Ta3+@e7<PjswFn>Ca`r$1Q3Q&D?(=$^ss5mu9{Ui5m
z<@>lehy14&6@HujONFH2o+eT8p@IioOZP);U%?xPM5&}QI-f3`CMlNXl)kcZ_TqGL
z0rh)o2h$%20*{ATb|1cXp!PbT{&=104=)4%^rEM6{xYpUSU~-BKhEB+{uw{jA4C18
zY9{^W{bA4#DIHWV;`b`@CJN;h{)Ya?`4OdOPiti0UxrYhT10<RX$|6C=l{GsA7C&L
z3V%cW`uL;$YRE6MsDHYq@V6F!0TJy_?MHW7P{GrB{8Rl91M{fgbHMe3K1KgUibBD2
zK>ZK{p=FRXRg>i@=cjJ}Quzk|!wC4lhJ^;C6gm|r1?Z&62FioinMV5ok8e@gkLq0;
z2hWFq|5QfBvxkG0^W20dctM^Y6ys+76d#>W^8u@Gc>39ShjA^}E5HQa&10OyIx%<{
z5g+hir9xSLVe;uXW`%%~A0sk9{H8xjN%9lHJ+zhHer6|EeJr!J^%(sUbV&Y^bBKNY
X3I8$&{`pdS{}Y}*BM1xde;@ikLi3(C

diff --git a/tests/serialization/test_arrow_compat.py b/tests/serialization/test_arrow_compat.py
index 087d64ca..10e0dedc 100644
--- a/tests/serialization/test_arrow_compat.py
+++ b/tests/serialization/test_arrow_compat.py
@@ -27,7 +27,6 @@
     "9.0.0",
     "10.0.1",
     "11.0.0",
-    "12.0.0",
     "13.0.0",
 ]
 

From d52f76cfea307b7edb67eda1b561b605d3a34649 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 20 Sep 2023 16:32:03 +0100
Subject: [PATCH 15/41] Add dask tests for lines marked as uncovered by codecov

---
 tests/io/dask/dataframe/test_read.py | 40 ++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/tests/io/dask/dataframe/test_read.py b/tests/io/dask/dataframe/test_read.py
index 348e091f..e6480d50 100644
--- a/tests/io/dask/dataframe/test_read.py
+++ b/tests/io/dask/dataframe/test_read.py
@@ -277,3 +277,43 @@ def restore_dataframe(cls, store, key, filter_query, columns, *args, **kwargs):
     )["colA"]
     assert_dask_eq(ddf_auto, ddf_manual)
     assert fake_called
+
+
+def test_dask_index_on_non_string_raises(store_factory):
+    dataset_uuid = "dataset_uuid"
+    colA = 1
+    df1 = pd.DataFrame({colA: [1, 2]})
+    store_dataframes_as_dataset(
+        store=store_factory, dataset_uuid=dataset_uuid, dfs=[df1]
+    )
+    with pytest.raises(
+        TypeError,
+        match=f"The paramter `dask_index_on` must be a string but got {type(colA)}",
+    ):
+        read_dataset_as_ddf(
+            dataset_uuid=dataset_uuid,
+            store=store_factory,
+            table="table",
+            dask_index_on=colA,
+        )
+
+
+def test_dask_dispatch_by_raises_if_index_on_not_none(store_factory):
+    dataset_uuid = "dataset_uuid"
+    colA = "ColumnA"
+    df1 = pd.DataFrame({colA: [1, 2]})
+    store_dataframes_as_dataset(
+        store=store_factory, dataset_uuid=dataset_uuid, dfs=[df1]
+    )
+    with pytest.raises(
+        ValueError,
+        match="`read_dataset_as_ddf` got parameters `dask_index_on` and `dispatch_by`. "
+        "Note that `dispatch_by` can only be used if `dask_index_on` is None.",
+    ):
+        read_dataset_as_ddf(
+            dataset_uuid=dataset_uuid,
+            store=store_factory,
+            table="table",
+            dask_index_on=colA,
+            dispatch_by=[colA],
+        )

From fae951d1db1c36f0c64eafda57805fea48c8f1a7 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 21 Sep 2023 12:45:37 +0100
Subject: [PATCH 16/41] Check conda env before verbose import

---
 .github/workflows/ci.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 351cbb35..8a7c3203 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -94,12 +94,15 @@ jobs:
         # TODO: Remove pandas version stipulation once https://github.com/pandas-dev/pandas/issues/55014 is fixed
         run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas!=2.1.0.*"
         if: matrix.numfocus_nightly
+        # TODO: Remove check conda env stage and -vvv from import
+      - name: Check conda env
+        run: mamba list
       - name: Test import
         run: |
           python -c "import plateau"
           python -c "import plateau.api"
           python -c "import plateau.api.dataset"
-          python -c "import plateau.api.serialization"
+          python -vvv -c "import plateau.api.serialization"
           python -c "import plateau.core"
           python -c "import plateau.io"
           python -c "import plateau.io_components"

From 91ecebfd05f7148a91faee1a816ef24c55c0dc61 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 21 Sep 2023 12:51:43 +0100
Subject: [PATCH 17/41] Use micromamba instead of mamba

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8a7c3203..401d63bb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -96,7 +96,7 @@ jobs:
         if: matrix.numfocus_nightly
         # TODO: Remove check conda env stage and -vvv from import
       - name: Check conda env
-        run: mamba list
+        run: micromamba list
       - name: Test import
         run: |
           python -c "import plateau"

From 62481925aacf614ebfd3a25530b3b082fcbb4524 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 21 Sep 2023 13:03:37 +0100
Subject: [PATCH 18/41] Pin pandas<2.1.0 due to bug in 2.1.0 and 2.1.1

---
 .github/workflows/ci.yml  | 2 +-
 docs/environment-docs.yml | 2 +-
 environment.yml           | 2 +-
 setup.cfg                 | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 401d63bb..bdf5a8f3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -92,7 +92,7 @@ jobs:
         # NumFOCUS nightly wheels, contains numpy and pandas
         # TODO(gh-45): Re-add numpy
         # TODO: Remove pandas version stipulation once https://github.com/pandas-dev/pandas/issues/55014 is fixed
-        run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas!=2.1.0.*"
+        run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas<2.1.0"
         if: matrix.numfocus_nightly
         # TODO: Remove check conda env stage and -vvv from import
       - name: Check conda env
diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index 6cdcc4d3..5c1f8528 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -8,7 +8,7 @@ dependencies:
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
   - numpy!=1.15.0,!=1.16.0
-  - pandas>=0.23.0, !=1.0.0,!=2.1.0
+  - pandas>=0.23.0, !=1.0.0, <2.1.0
   - pyarrow>=0.17.1,!=1.0.0
   - simplejson
   - minimalkv
diff --git a/environment.yml b/environment.yml
index 5ad2f3c8..6724228f 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,7 +9,7 @@ dependencies:
   # Currently dask and numpy==1.16.0 clash
   # TODO: add support for numpy>=1.23
   - numpy!=1.15.0,!=1.16.0
-  - pandas>=0.23.0,!=1.0.0,!=2.1.0
+  - pandas>=0.23.0,!=1.0.0,<2.1.0
   - pyarrow>=0.17.1,!=1.0.0
   - simplejson
   - minimalkv>=1.4.2
diff --git a/setup.cfg b/setup.cfg
index d822c11f..3bedce64 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,7 +18,7 @@ install_requires =
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash
     numpy!=1.15.0,!=1.16.0
-    pandas>=0.23.0,!=1.0.0,!=2.1.0
+    pandas>=0.23.0,!=1.0.0,<2.1.0
     pyarrow>=0.17.1,!=1.0.0
     simplejson
     minimalkv>=1.4.2

From 807650c684bb4ba250d7fee8001370b941f3195a Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 25 Sep 2023 13:40:57 +0100
Subject: [PATCH 19/41] Check if adding pyarrow 13 tests improves coverage

---
 .github/workflows/ci.yml                   |  16 ++++++++++++++++
 reference-data/arrow-compat/12.0.0.parquet | Bin 0 -> 18562 bytes
 tests/serialization/test_arrow_compat.py   |   1 +
 3 files changed, 17 insertions(+)
 create mode 100644 reference-data/arrow-compat/12.0.0.parquet

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bdf5a8f3..ed2f8874 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,6 +32,10 @@ jobs:
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
             python: "3.11"
+          - numfocus_nightly: true
+            os: "ubuntu-latest"
+            pyarrow: "13.0.0"
+            python: "3.11"
           - numfocus_nightly: false
             os: "ubuntu-latest"
             pyarrow: "8.0.1"
@@ -48,10 +52,22 @@ jobs:
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
             python: "3.11"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "12.0.0"
+            python: "3.11"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "13.0.0"
+            python: "3.11"
           - numfocus_nightly: false
             os: "macos-latest"
             pyarrow: "11.0.0"
             python: "3.11"
+            - numfocus_nightly: false
+            os: "macos-latest"
+            pyarrow: "13.0.0"
+            python: "3.11"
     continue-on-error: ${{ matrix.numfocus_nightly || matrix.pyarrow == 'nightly' }}
 
     runs-on: ${{ matrix.os }}
diff --git a/reference-data/arrow-compat/12.0.0.parquet b/reference-data/arrow-compat/12.0.0.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..33c0b3b4de07c72b1e9e76e975b435dac1e59256
GIT binary patch
literal 18562
zcmcg!Z){uD6@LyP4JC|FYR`eJWZ806)=>Y+pC(-gJlnD3)J~H$wVfbT`Pq)+)OJGb
z#BrcfH8F%HK1>K9G$Ax0H0@7?5JFQmp^5<?_%KaqAJ8;S`!r2Vs?a_lgeHV`&b{yd
zyU%tSyD4?vyXT&Be)pVn&pr28WkFo>IlG;`&c2wl%PAZagzi2;xb}ooaGr1ukHT2L
zv&Y#p=<dc*_URH%>fQuFxPPh(KD*!#eBR~g63BJOptE;iqF?A4n7S{$y`8RKUMp^<
zt7HD|Vxdy)E>v?RXP2wX^}56LmV<anZzBx@qX1EGb_wv&55EKf-us=$70A7VFa$sU
zwZiUIC~ox$;H%HtD9p1i<B#2g&ZC@VOpcqD@v+18X9s!XF7id!pe9p$veC(1n86ng
zz-9FXj=H<?6Mdn*aToFNkwIsdyoh_5FAC+VZ_=~`=7~>w$gj}bnoN_1I|BK4M|qHK
zg6Hjf$P4!z7_uRJaJBejv2?FOajQ@0O6w>vTf^tZeS^+!i{)*C=k-2v>s|;Rt85#M
zcf=Dn`!!D-RfA`W8#^D~Pkwp-fgRB_+$Wod_|G5IQ1&sDJG>Zw-r;)5L4JLVeEYzl
zN$l-e(<Fo!1RpxN*et=!u>JEm`NQ#+u(cGxfR7VU++cm#tp%&jhL&h$dE$-lk}q6{
zlPA7ow*31;nl0s2hJH4OBQxb!1LQ9QEt#^<*{>WWJ;(8Q`RG-1CV-W8ee|ju{&J1?
z()R|Pea3@gr(DQvW^<;Ua?Rm-(LvrBB+m~bG({%IVuJHK&SQ803+w=PINT10OE^h?
z1BfGTdfYl3<b_8Dox;EfoVD(5X>+qk57q_OGI{sJIpEQ=4ZuwJBOjqBoukUSkH`8!
z=1)KSQ_pJe1ONG1ZcaGz_0fKqdd%rU5UdIS#Qo+uZp#P=x@SlR2erV*-&bt|`0CXh
z#rKKB^{IorKSaKK1Px&k!V$|clBsfeE<|&<0kgXsN7*NsPxofR`N|K~;Bt3o(=yIq
zI9#te$Xh=k-wZ=)vlYd~X4s;_wZH!M_vyRQ{6>7e3I)mqc@E%IpS4jF-Y<!|Vcka&
zZ?#a$O^#FR-f*}+aFBnEkoQKg9+AarPc{m;t1u*Jxi&PeKvxZ-g|$l2g!pfds#T%;
zh+Gd+xXj3(pClig1S4zwCTU!FMk9kpxehc!<Wzjr2J(EcjQj+@^uc>`RILDI$Oa3)
zeTsbhqmER3M)>yim=W&l9xaCNrYE4zzSmEa=TEoge7-$!pZSd7#{4>P|Mrrb-UEX(
zIA5uptUFAhXw?-u-#Th?{*7^MhoLnMuK&##`P$!+dhPbC`|=shy4`pv>Wus5B>CUO
zfsG62L=0?+mqN^1@^QpY0vM7HAVDB6PU}>ccZ$jhlpvo@k$0ya<z3$lSp_{|_S&Sm
z?9*?{TW2+mJ*?ED%iVKbbC6fgl3UX)9sGv_@zpsK2sxlBh&RrWuYTMTh?Zg)tkFDP
zUwcA}S85eGU|D*7=W%lFaYV!u#7(V-GtYaXW}drgnWnkuA5W6^o@~iF{MyX0MPNxv
z9c9dhH-cK_aYU}iD2A%*Ir4dcydOXa3NOzD1K;>31b0s*vz{xZos+^um)qFRfYPWZ
zb`HXKqT5YNJ?v8Sj&!+ci^Du0{lS@kR6k#TMd!}$Qf7|~#jZct{{SlQa%*fDg&ZX+
zn{l=cq;5qC&emQ08jBt|0f>5KM7m3aZ?XxW8&`sP2DIt;nk{AIA>bRb@-<u3$RO}N
zDf8(s5=~&baCtJD90zJxaZKK)7;!|1VT;Ns-~g8Bf|Js)8ip*3qgcR=^u>xe4UMm>
z=thpmUOF~73KXDWY$>&q;KaybZ<FIp5Igm?(4f+BaLmsHvx7|5isyvpG=>Bdc=3JI
zg}~(OI-+_=8y)j&_NSQToTFKnDW`jXM714Vy0RT*%ecA*1_!=*xZ{08a_2jGq|^O3
z(&=s)xzl|t(&4TX=}5nbbfnWmI@*^Zo#?2L_IE>Wv&YfN-UDfKzkuA4`#fp?9h`Lj
zj!fEpuO;oii;_;?6-k@#cL%?;k#=0VNINb~q^%bn(uRu)IqX%#b?bzyi}bo~4jmVS
zfl+tQR=S)`SKMzt{OoX{oXzcB&TJNUO6AJ%*`cSN9lio%@HD)gF6VQ@qeH{xbSVb|
zSi8YnrJCNZ!mF1)Z*IY(k3Qz;<JG6(g~_{2h390kMk#BBTrqpuIL2UIuGY6u-LPE!
zQUp7tt-4Csls0miDvp<O)ie~hX_)@(aIOqJ@j^L|yqA{4k4+6<)qydh7&d%wjG_#H
ztYi~5{G$0Oxy$%;iwMzvS4RP;ylTCY$?=957}*xX<oF>mK&kGVv_qg0dHN~w@Y=`x
z4YV_&jrKAB;kDaaAC_ltdrCaK_NfL-r!<?DPaRS<Yi-#n*Qh-8jM@eO_Krst_MT_d
zHUO~qJ*u#GKBKk)fUWycgRTE5@vz$2w}`e8ppm&Dz{t0a0b3PT)yk+zqcU7J90ba!
z?ijEnS!{!F)X1Y|H85Pk*+xCw1IMBXyDU~s%BV?`GJLy=*!OXKbsCMM6p7XqA{@r{
zRJlT3+Q_Ez!DrR3BbZd{D8?o02u)S;_A2G)Zc`nFqmr>SQVH9ScPOgErWLB=MrG=7
z6IHxK2)x`3sE%7yvFS}z@yrM!xkgmSEvi~FASbzjL|#@?hAn#N<VGvPjc5%;Wz^)Q
zTxqLXx5j^KI~<cPFC?^alQxZDT$VQ8Xkj>u7Z%z$M;&9Zs%st@D3%u*+BioYW3Z}g
z9vvu_6CUa?M;V=N)zvgYbT<iOnXe<5)oF*HQK5aL(2LJ@E?wQ+ruUIzLE27l*YP^C
zRfqe=W)0(RH@96WY?g7<=RfT|?Hz_Y!LY1dM?~7#@KwRxCoU~rNS<Y16S=QXd+=v0
zpAbcH5&q)Oc~Kk}19>qlig_`Zr(=PDfnLMG8I?YtpJ(szNu^MFR1|9>@WF2s60C)Q
z2JfID3iAVJqNU=_Y9uk9^3U%|vx%xyju$Qz0#dF%x1CN-T#0UM=A-4gb*YpsMhmrk
zv^23UB`@KeoosTi5(|oj%-GU;ro0GaVoiSE%><*9(a=mK8l2nDMiMoY6HocFNMG?!
zSF%apPRh@8uclO;J1a?F5$8(fL`4dszSXVO<ldE(pQASt2JGSe)Y#HQCW4s6LN+<E
z1@f<~UW}q{!04;x64TyXDnR8%OVfpPDY20aQhIeYwX_a0yek(0?`);8nw;2>{7YL>
zDMRHY5=)WTa=x%;!k($a{BRZQr{IEhrRM4@sp8sdX>WHWwYVQIP#vXoGErF#PpfN-
zmcsQ+DLj$#uT~^~*c;!E7O34g>yaYG{cN~ek|NXV*-*Tmx}9}JCVWz4Z#ou4f7{~q
z&oi4e#XG;T*dpE)Zzu3VTzccnD-^?OsaTXEOKVaoTm_#`g3loq=hngJTT*#(Z=U&}
zwgR!dlA2%3&K7I(yh=JX?@dAsW<xXnj^UJj+XSZ)D@|~A#{Xzf#ivbhDlyUoXXpHW
zp=pewKl9Dv6Juk%6F$!-Sv;{^Y0h0*jxfwC2Oz&f4)y8t)ZP~4RY{rGIjr$|YgpxZ
zO|f<+MwuN}yscrC=QYLJnK)%wSqwJVPd?mtay;Abr?Er_W7Tk0+UNbT#7;Unzq<-&
zY?E`!CzXnLeiqvruchf-$#&n*?MnWdEv^-fms1O9yizjZT}dvjXCpIeZL@Z<mWMh>
zcW%#3WMvz0{94GRmROC%jXUH`z-d5hD4&!HOz7(t$v^kZYI0tVZ(lJR3B!4{w64}d
zv)&5UC%SVE>2ghG%`;fj$TbizwsgEu8?*^aHd3svF5B->iYW!PRq|4!bA)1}vTSP|
z^i#r{hFWocV@Z<A^I2xsmh7h1Qg*q-RKM};XOdyHp-G0Z&a|n=WFEyfoc+k^Mdhr{
zaOy>q3{K1&WwaLejn6cFJ;uYNr$b#|&CC|{b(p?RYspSjo_$?~@*34sdOF8GxK`y%
zY$={lUIU+@yhi7dj$X3H5w2D7ck8~gJ$n%AIE{O5jzTPq*Tzqbx73ndz_xo8yFJG4
zcl3V5+h0Y<N#308{%>%Hv)Nlp9c72dK<C+}OwVEEK4Fa;BfX_KW;&Hvv*vIE-5!$?
z>votF`T;Q6=WvFpK|C`%DEq+<lR`fLCfk^9R!8aSc6bgF*J_PpA4`V1iWlD|8P<K@
zydIEyDPwa*r~`I01=hcSenA0x3w78#co(kalGtmQ9%FlcwU{Z-Z%O`f+S`aPM|a}E
z@mjPDwFS&o@5rS@J>xI#N+>fJt;aURop^n28oEXiY#k{%9v&rppB7g{agNfiK+g&~
zPM1V6T!Y?KXdHV@SFj-!tU<3T6a}J~7UM9Q55ao|+fo&5PesKb^rPbVK8x=cYQP#C
zN7i+0bA@T!DueG+s1!wrL3k5UJBY#$oCxi&g|f$k?XSg4lukjjrWlgN0McgFnpTu>
zzJFuF<3Y(YK`I0nBFn=E5!WOJSS;adZ2Jt%2cV)@RP{yq`cTv?%^!Yj(oO8{dOTj_
z42h@KJRW!mz#{R7E`|2z=qt0gk_TYuqG!S5p-M08H^Gad7+&a=fT}Z7fS2K0*p}@x
z!^exdX&$9LL~#k{dqb0r?SrBM^n5{fF%Uxg&?j&#tiKsQwJ$qMasG_PPeGn<nSVb{
z?XB@sd(F4Zzc-Kk=QVyB4`4uf{ULsQWPm9k*M%!yaUmw#D=Nkoz~3|;<9vG~Eu!`U
z#aWCGIzO;-3R=S`ILP7ym9YLTKF~g~C>rW&7+hBPYr&|>&w|PvUvw#ypCSuP%JU(<
zmzDL-@YhTIP47Gu$py5JC!^>K@Z*_^qIWXNBt|iw0}K1|dTSU2kV^1}0m7$Q{19=&
z1eD&Mnm>`Ufc*O#7~k+V6K;Ta3+@e7<PjswFn>Ca`r$1Q3Q&D?(=$^ss5mu9{Ui5m
z<@>lehy14&6@HujONFH2o+eT8p@IioOZP);U%?xPM5&}QI-f3`CMlNXl)kcZ_TqGL
z0rh)o2h$%20*{ATb|1cXp!PbT{&=104=)4%^rEM6{xYpUSU~-BKhEB+{uw{jA4C18
zY9{^W{bA4#DIHWV;`b`@CJN;h{)Ya?`4OdOPiti0UxrYhT10<RX$|6C=l{GsA7C&L
z3V%cW`uL;$YRE6MsDHYq@V6F!0TJy_?MHW7P{GrB{8Rl91M{fgbHMe3K1KgUibBD2
zK>ZK{p=FRXRg>i@=cjJ}Quzk|!wC4lhJ^;C6gm|r1?Z&62FioinMV5ok8e@gkLq0;
z2hWFq|5QfBvxkG0^W20dctM^Y6ys+76d#>W^8u@Gc>39ShjA^}E5HQa&10OyIx%<{
z5g+hir9xSLVe;uXW`%%~A0sk9{H8xjN%9lHJ+zhHer6|EeJr!J^%(sUbV&Y^bBKNY
X3I8$&{`pdS{}Y}*BM1xde;@ikLi3(C

literal 0
HcmV?d00001

diff --git a/tests/serialization/test_arrow_compat.py b/tests/serialization/test_arrow_compat.py
index 10e0dedc..087d64ca 100644
--- a/tests/serialization/test_arrow_compat.py
+++ b/tests/serialization/test_arrow_compat.py
@@ -27,6 +27,7 @@
     "9.0.0",
     "10.0.1",
     "11.0.0",
+    "12.0.0",
     "13.0.0",
 ]
 

From d45d1609d48399b47eb58ebeaee6439e46e95f19 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 25 Sep 2023 13:51:13 +0100
Subject: [PATCH 20/41] Fix yaml error

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ed2f8874..e330da0b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -64,7 +64,7 @@ jobs:
             os: "macos-latest"
             pyarrow: "11.0.0"
             python: "3.11"
-            - numfocus_nightly: false
+          - numfocus_nightly: false
             os: "macos-latest"
             pyarrow: "13.0.0"
             python: "3.11"

From fdcecc353d7d665864f057c074876d5c8885483e Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 25 Sep 2023 17:01:18 +0100
Subject: [PATCH 21/41] Fix pyarrow install command and re-add removed tests

---
 .github/workflows/ci.yml | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e330da0b..a8bc60a8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,6 +28,10 @@ jobs:
         pyarrow: ["nightly"]
         python: ["3.8"]
         include:
+          - numfocus_nightly: true
+            os: "ubuntu-latest"
+            pyarrow: "4.0.1"
+            python: "3.10"
           - numfocus_nightly: true
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
@@ -36,6 +40,18 @@ jobs:
             os: "ubuntu-latest"
             pyarrow: "13.0.0"
             python: "3.11"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "5.0.0"
+            python: "3.9"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "6.0.1"
+            python: "3.9"
+          - numfocus_nightly: false
+            os: "ubuntu-latest"
+            pyarrow: "7.0.0"
+            python: "3.10"
           - numfocus_nightly: false
             os: "ubuntu-latest"
             pyarrow: "8.0.1"
@@ -60,6 +76,10 @@ jobs:
             os: "ubuntu-latest"
             pyarrow: "13.0.0"
             python: "3.11"
+          - numfocus_nightly: false
+            os: "macos-latest"
+            pyarrow: "4.0.1"
+            python: "3.8"
           - numfocus_nightly: false
             os: "macos-latest"
             pyarrow: "11.0.0"
@@ -96,7 +116,7 @@ jobs:
       - name: Install repository
         run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e .
       - name: Install Pyarrow (non-nightly)
-        run: micromamba install pyarrow==${{ matrix.pyarrow }}
+        run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0"
         if: matrix.pyarrow != 'nightly'
       - name: Install Pyarrow (nightly)
         # Install both arrow-cpp and pyarrow to make sure that we have the
@@ -110,15 +130,12 @@ jobs:
         # TODO: Remove pandas version stipulation once https://github.com/pandas-dev/pandas/issues/55014 is fixed
         run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas<2.1.0"
         if: matrix.numfocus_nightly
-        # TODO: Remove check conda env stage and -vvv from import
-      - name: Check conda env
-        run: micromamba list
       - name: Test import
         run: |
           python -c "import plateau"
           python -c "import plateau.api"
           python -c "import plateau.api.dataset"
-          python -vvv -c "import plateau.api.serialization"
+          python -c "import plateau.api.serialization"
           python -c "import plateau.core"
           python -c "import plateau.io"
           python -c "import plateau.io_components"

From 0414b7854e66bc3fe6c75b89934df4b9bd6069e7 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Mon, 25 Sep 2023 18:19:45 +0100
Subject: [PATCH 22/41] Experiment with tests for backwards compatibility

---
 .github/workflows/ci.yml | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a8bc60a8..75708758 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,67 +24,93 @@ jobs:
       fail-fast: false
       matrix:
         numfocus_nightly: [false]
+        backwards_compat: [false]
         os: ["ubuntu-latest"]
         pyarrow: ["nightly"]
         python: ["3.8"]
         include:
           - numfocus_nightly: true
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "4.0.1"
             python: "3.10"
           - numfocus_nightly: true
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
             python: "3.11"
           - numfocus_nightly: true
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false
+            backwards_compat: true
+            os: "ubuntu-latest"
+            pyarrow: "11.0.0"
+            python: "3.11"
+          - numfocus_nightly: false
+            backwards_compat: true
+            os: "ubuntu-latest"
+            pyarrow: "13.0.0"
+            python: "3.11"
+          - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "5.0.0"
             python: "3.9"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "6.0.1"
             python: "3.9"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "7.0.0"
             python: "3.10"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "8.0.1"
             python: "3.10"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "9.0.0"
             python: "3.10"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "10.0.1"
             python: "3.11"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "11.0.0"
             python: "3.11"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "12.0.0"
             python: "3.11"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "ubuntu-latest"
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "macos-latest"
             pyarrow: "4.0.1"
             python: "3.8"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "macos-latest"
             pyarrow: "11.0.0"
             python: "3.11"
           - numfocus_nightly: false
+            backwards_compat: false
             os: "macos-latest"
             pyarrow: "13.0.0"
             python: "3.11"
@@ -117,7 +143,10 @@ jobs:
         run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e .
       - name: Install Pyarrow (non-nightly)
         run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0"
-        if: matrix.pyarrow != 'nightly'
+        if: matrix.pyarrow != 'nightly' && !matrix.backwards_compat
+      - name: Downgrade pandas<2 to test backwards compatibility
+        run: micromamba install -y "pandas<2"
+        if: matrix.backwards_compat
       - name: Install Pyarrow (nightly)
         # Install both arrow-cpp and pyarrow to make sure that we have the
         # latest nightly of both packages. It is sadly not guaranteed that the

From 1c2352ddc11e6fceb7a16db75d9c3864d28568e6 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 10:59:49 +0100
Subject: [PATCH 23/41] Allow install of specific pandas version

---
 .github/workflows/ci.yml | 59 ++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 75708758..30d00d7d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,94 +24,74 @@ jobs:
       fail-fast: false
       matrix:
         numfocus_nightly: [false]
-        backwards_compat: [false]
         os: ["ubuntu-latest"]
+        pandas: [""]
         pyarrow: ["nightly"]
         python: ["3.8"]
         include:
           - numfocus_nightly: true
-            backwards_compat: false
-            os: "ubuntu-latest"
-            pyarrow: "4.0.1"
-            python: "3.10"
-          - numfocus_nightly: true
-            backwards_compat: false
-            os: "ubuntu-latest"
-            pyarrow: "11.0.0"
-            python: "3.11"
-          - numfocus_nightly: true
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: true
             os: "ubuntu-latest"
+            pandas: "1.5.3"
             pyarrow: "11.0.0"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: true
             os: "ubuntu-latest"
+            pandas: "1.5.3"
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "5.0.0"
             python: "3.9"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "6.0.1"
             python: "3.9"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "7.0.0"
             python: "3.10"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "8.0.1"
             python: "3.10"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "9.0.0"
             python: "3.10"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "10.0.1"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "11.0.0"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "12.0.0"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: false
             os: "ubuntu-latest"
+            pandas: ""
             pyarrow: "13.0.0"
             python: "3.11"
           - numfocus_nightly: false
-            backwards_compat: false
-            os: "macos-latest"
-            pyarrow: "4.0.1"
-            python: "3.8"
-          - numfocus_nightly: false
-            backwards_compat: false
-            os: "macos-latest"
-            pyarrow: "11.0.0"
-            python: "3.11"
-          - numfocus_nightly: false
-            backwards_compat: false
             os: "macos-latest"
+            pandas: ""
             pyarrow: "13.0.0"
             python: "3.11"
     continue-on-error: ${{ matrix.numfocus_nightly || matrix.pyarrow == 'nightly' }}
@@ -142,21 +122,22 @@ jobs:
       - name: Install repository
         run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e .
       - name: Install Pyarrow (non-nightly)
+        # Don't pin python as older versions of pyarrow require older versions of python
         run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0"
-        if: matrix.pyarrow != 'nightly' && !matrix.backwards_compat
-      - name: Downgrade pandas<2 to test backwards compatibility
-        run: micromamba install -y "pandas<2"
-        if: matrix.backwards_compat
+        if: matrix.pyarrow != 'nightly' && matrix.pandas = ''
       - name: Install Pyarrow (nightly)
         # Install both arrow-cpp and pyarrow to make sure that we have the
         # latest nightly of both packages. It is sadly not guaranteed that the
         # nightlies and the latest release would otherwise work together.
         run: micromamba update -c arrow-nightlies -c conda-forge arrow-cpp pyarrow
         if: matrix.pyarrow == 'nightly'
+      - name: Install Pyarrow (downgrade pandas)
+        run: micromamba install -y pyarrow==${{ matrix.pyarrow }} pandas==${{ matrix.pandas }}
+        if: matrix.pyarrow != 'nightly' && matrix.pandas != ''
       - name: Pip Install NumFOCUS nightly
         # NumFOCUS nightly wheels, contains numpy and pandas
         # TODO(gh-45): Re-add numpy
-        # TODO: Remove pandas version stipulation once https://github.com/pandas-dev/pandas/issues/55014 is fixed
+        # TODO: Remove pandas version pin once https://github.com/pandas-dev/pandas/issues/55014 is fixed
         run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas<2.1.0"
         if: matrix.numfocus_nightly
       - name: Test import

From 84cdbdb136629e5f360ab9304152e8b3a830b3e3 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 11:09:47 +0100
Subject: [PATCH 24/41] Fix yaml error

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 30d00d7d..4735cf91 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -124,7 +124,7 @@ jobs:
       - name: Install Pyarrow (non-nightly)
         # Don't pin python as older versions of pyarrow require older versions of python
         run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0"
-        if: matrix.pyarrow != 'nightly' && matrix.pandas = ''
+        if: matrix.pyarrow != 'nightly' && matrix.pandas == ''
       - name: Install Pyarrow (nightly)
         # Install both arrow-cpp and pyarrow to make sure that we have the
         # latest nightly of both packages. It is sadly not guaranteed that the

From c14edda8c6f9afb6efec2ce59ae304e7039fc37c Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 11:32:02 +0100
Subject: [PATCH 25/41] Update changelog and re-add pyarrow 4.0.1 to ci.yml

---
 .github/workflows/ci.yml | 12 ++++++------
 CHANGES.rst              |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4735cf91..2358f186 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,18 +26,18 @@ jobs:
         numfocus_nightly: [false]
         os: ["ubuntu-latest"]
         pandas: [""]
-        pyarrow: ["nightly"]
+        pyarrow: ["3.0.0", "4.0.1", "nightly"]
         python: ["3.8"]
         include:
           - numfocus_nightly: true
             os: "ubuntu-latest"
             pandas: ""
-            pyarrow: "13.0.0"
-            python: "3.11"
+            pyarrow: "4.0.1"
+            python: "3.10"
           - numfocus_nightly: false
             os: "ubuntu-latest"
             pandas: "1.5.3"
-            pyarrow: "11.0.0"
+            pyarrow: "4.0.1"
             python: "3.11"
           - numfocus_nightly: false
             os: "ubuntu-latest"
@@ -92,8 +92,8 @@ jobs:
           - numfocus_nightly: false
             os: "macos-latest"
             pandas: ""
-            pyarrow: "13.0.0"
-            python: "3.11"
+            pyarrow: "4.0.1"
+            python: "3.8"
     continue-on-error: ${{ matrix.numfocus_nightly || matrix.pyarrow == 'nightly' }}
 
     runs-on: ${{ matrix.os }}
diff --git a/CHANGES.rst b/CHANGES.rst
index f781b7d3..45820f55 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -6,7 +6,8 @@ Plateau 4.2.0 (unreleased)
 ==========================
 
 * Support pandas 2
-* No longer test for pyarrow < 8
+* Test pyarrow 12 and 13
+* Prevent dask from casting all object dtypes to strings
 
 Plateau 4.1.5 (2023-03-14)
 ==========================

From 2c80c10b09a158e588d44ed30d5677a2b903ccc1 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 11:54:39 +0100
Subject: [PATCH 26/41] Remove test for pyarrow==3.0.0 as incompatible with
 pandas 2

---
 .github/workflows/ci.yml | 2 +-
 CHANGES.rst              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2358f186..ab59f764 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
         numfocus_nightly: [false]
         os: ["ubuntu-latest"]
         pandas: [""]
-        pyarrow: ["3.0.0", "4.0.1", "nightly"]
+        pyarrow: ["4.0.1", "nightly"]
         python: ["3.8"]
         include:
           - numfocus_nightly: true
diff --git a/CHANGES.rst b/CHANGES.rst
index 45820f55..952a02d7 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -8,6 +8,7 @@ Plateau 4.2.0 (unreleased)
 * Support pandas 2
 * Test pyarrow 12 and 13
 * Prevent dask from casting all object dtypes to strings
+* Remove tests for pyarrow<=3 as they fail with pandas>=2
 
 Plateau 4.1.5 (2023-03-14)
 ==========================

From 657704a390df6ebb74f4faf94160e3b751df4a76 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 11:55:06 +0100
Subject: [PATCH 27/41] asv no longer supports dev, use run instead

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ab59f764..329cfdc2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -160,7 +160,7 @@ jobs:
       - name: Running benchmarks
         run: |
           asv --config ./asv_bench/asv.conf.json machine --machine github --os unknown --arch unknown --cpu unknown --ram unknown
-          asv --config ./asv_bench/asv.conf.json dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
+          asv --config ./asv_bench/asv.conf.json run | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
           if grep "failed" benchmarks.log > /dev/null ; then
               exit 1
           fi

From 1f9720716a3a7d11d53510dbdffc058b62d4dfdb Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 12:27:56 +0100
Subject: [PATCH 28/41] Add environment arg to asv run

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 329cfdc2..abe63090 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -160,7 +160,7 @@ jobs:
       - name: Running benchmarks
         run: |
           asv --config ./asv_bench/asv.conf.json machine --machine github --os unknown --arch unknown --cpu unknown --ram unknown
-          asv --config ./asv_bench/asv.conf.json run | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
+          asv --config ./asv_bench/asv.conf.json run -E existing:same | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
           if grep "failed" benchmarks.log > /dev/null ; then
               exit 1
           fi

From c0218c339c8e18b96a003c76893c5ac86ab75ae8 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 14:16:59 +0100
Subject: [PATCH 29/41] Use astype when seting Series type due to change in
 pandas behaviour

---
 plateau/serialization/testing.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/plateau/serialization/testing.py b/plateau/serialization/testing.py
index b3cd33f2..ad1bf995 100644
--- a/plateau/serialization/testing.py
+++ b/plateau/serialization/testing.py
@@ -36,16 +36,16 @@ def get_dataframe_not_nested(n):
             "bool": pd.Series(
                 [1] * int(np.floor(n / 2)) + [0] * int(np.ceil(n / 2)), dtype=np.bool_
             ),
-            "int8": pd.Series(range(n), dtype=np.int8),
-            "int16": pd.Series(range(n), dtype=np.int16),
-            "int32": pd.Series(range(n), dtype=np.int32),
-            "int64": pd.Series(range(n), dtype=np.int64),
-            "uint8": pd.Series(range(n), dtype=np.uint8),
-            "uint16": pd.Series(range(n), dtype=np.uint16),
-            "uint32": pd.Series(range(n), dtype=np.uint32),
-            "uint64": pd.Series(range(n), dtype=np.uint64),
-            "float32": pd.Series([float(x) for x in range(n)], dtype=np.float32),
-            "float64": pd.Series([float(x) for x in range(n)], dtype=np.float64),
+            "int8": pd.Series(range(n)).astype(np.int8),
+            "int16": pd.Series(range(n)).astype(np.int16),
+            "int32": pd.Series(range(n)).astype(np.int32),
+            "int64": pd.Series(range(n)).astype(np.int64),
+            "uint8": pd.Series(range(n)).astype(np.uint8),
+            "uint16": pd.Series(range(n)).astype(np.uint16),
+            "uint32": pd.Series(range(n)).astype(np.uint32),
+            "uint64": pd.Series(range(n)).astype(np.uint64),
+            "float32": pd.Series([float(x) for x in range(n)]).astype(np.float32),
+            "float64": pd.Series([float(x) for x in range(n)]).astype(np.float64),
             "date": pd.Series(
                 [date(2018, 1, x % 31 + 1) for x in range(1, n + 1)], dtype=object
             ),

From efb827fb880a712b4d5e3305757f04767e59d778 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 15:03:19 +0100
Subject: [PATCH 30/41] Pin dask<2023.9.2

---
 docs/environment-docs.yml | 4 ++--
 environment.yml           | 3 ++-
 setup.cfg                 | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index 5c1f8528..21ad1776 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -3,12 +3,12 @@ channels:
   - conda-forge
 dependencies:
   - python>=3.8
-  - dask[dataframe]!=2023.9.2
+  - dask[dataframe]<2023.9.2
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
   - numpy!=1.15.0,!=1.16.0
-  - pandas>=0.23.0, !=1.0.0, <2.1.0
+  - pandas>=0.23.0,!=1.0.0,<2.1.0
   - pyarrow>=0.17.1,!=1.0.0
   - simplejson
   - minimalkv
diff --git a/environment.yml b/environment.yml
index 6724228f..5b88d959 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,7 +3,8 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - dask!=2021.5.1,!=2021.6.0, !=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+  # TODO: Investigate issue with dask 2023.9.2
+  - dask!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
diff --git a/setup.cfg b/setup.cfg
index 3bedce64..00579a8b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,7 +13,7 @@ classifiers =
 [options]
 include_package_data = true
 install_requires =
-    dask[dataframe]!=2021.5.1,!=2021.6.0,!=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+    dask[dataframe]!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
     decorator
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash

From 61feb7490e5b0d8fe5b020fe25c506220a7bb27e Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 15:35:01 +0100
Subject: [PATCH 31/41] Add no-py-pin to pandas downgrade step

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abe63090..6fdfa8d3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -132,7 +132,7 @@ jobs:
         run: micromamba update -c arrow-nightlies -c conda-forge arrow-cpp pyarrow
         if: matrix.pyarrow == 'nightly'
       - name: Install Pyarrow (downgrade pandas)
-        run: micromamba install -y pyarrow==${{ matrix.pyarrow }} pandas==${{ matrix.pandas }}
+        run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} pandas==${{ matrix.pandas }}
         if: matrix.pyarrow != 'nightly' && matrix.pandas != ''
       - name: Pip Install NumFOCUS nightly
         # NumFOCUS nightly wheels, contains numpy and pandas

From ff4793f54e0630311bc90463ee59aba602abac9b Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 16:26:39 +0100
Subject: [PATCH 32/41] Return to !=2023.9.2 due to broken CI

---
 docs/environment-docs.yml | 2 +-
 environment.yml           | 2 +-
 setup.cfg                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index 21ad1776..91aa41f6 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - python>=3.8
-  - dask[dataframe]<2023.9.2
+  - dask[dataframe]!=2023.9.2
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
diff --git a/environment.yml b/environment.yml
index 5b88d959..fd9f04dc 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,7 +4,7 @@ channels:
   - nodefaults
 dependencies:
   # TODO: Investigate issue with dask 2023.9.2
-  - dask!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+  - dask!=2021.5.1,!=2021.6.0,!=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
diff --git a/setup.cfg b/setup.cfg
index 00579a8b..3bedce64 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,7 +13,7 @@ classifiers =
 [options]
 include_package_data = true
 install_requires =
-    dask[dataframe]!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+    dask[dataframe]!=2021.5.1,!=2021.6.0,!=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
     decorator
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash

From f2d4a644b26df5681fdf474236ea286716f02850 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 16:33:44 +0100
Subject: [PATCH 33/41] Switch CI operation order

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6fdfa8d3..ec436927 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -119,8 +119,6 @@ jobs:
           cache-env: true
           extra-specs: |
             python=${{ matrix.PYTHON_VERSION }}
-      - name: Install repository
-        run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e .
       - name: Install Pyarrow (non-nightly)
         # Don't pin python as older versions of pyarrow require older versions of python
         run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0"
@@ -140,6 +138,8 @@ jobs:
         # TODO: Remove pandas version pin once https://github.com/pandas-dev/pandas/issues/55014 is fixed
         run: python -m pip install --pre --upgrade --timeout=60 --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple "pandas<2.1.0"
         if: matrix.numfocus_nightly
+      - name: Install repository
+        run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e .
       - name: Test import
         run: |
           python -c "import plateau"

From d088f3adafab3eeb1cf3df53274a88ad6148886d Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Tue, 26 Sep 2023 16:46:18 +0100
Subject: [PATCH 34/41] Test whether <2023.9.2 breaks CI

---
 docs/environment-docs.yml | 2 +-
 environment.yml           | 2 +-
 setup.cfg                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index 91aa41f6..21ad1776 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - python>=3.8
-  - dask[dataframe]!=2023.9.2
+  - dask[dataframe]<2023.9.2
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
diff --git a/environment.yml b/environment.yml
index fd9f04dc..5b88d959 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,7 +4,7 @@ channels:
   - nodefaults
 dependencies:
   # TODO: Investigate issue with dask 2023.9.2
-  - dask!=2021.5.1,!=2021.6.0,!=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+  - dask!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash
diff --git a/setup.cfg b/setup.cfg
index 3bedce64..00579a8b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,7 +13,7 @@ classifiers =
 [options]
 include_package_data = true
 install_requires =
-    dask[dataframe]!=2021.5.1,!=2021.6.0,!=2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+    dask[dataframe]!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
     decorator
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash

From db6509212df832e6584534e398570f47ff03ab82 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 27 Sep 2023 09:08:36 +0100
Subject: [PATCH 35/41] Pin asv<0.6 due to API change

---
 .github/workflows/ci.yml | 2 +-
 environment.yml          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ec436927..a063842c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -160,7 +160,7 @@ jobs:
       - name: Running benchmarks
         run: |
           asv --config ./asv_bench/asv.conf.json machine --machine github --os unknown --arch unknown --cpu unknown --ram unknown
-          asv --config ./asv_bench/asv.conf.json run -E existing:same | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
+          asv --config ./asv_bench/asv.conf.json dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
           if grep "failed" benchmarks.log > /dev/null ; then
               exit 1
           fi
diff --git a/environment.yml b/environment.yml
index 5b88d959..2773e990 100644
--- a/environment.yml
+++ b/environment.yml
@@ -37,6 +37,6 @@ dependencies:
   # CLI
   - ipython
   # ASV // Benchmark
-  - asv
+  - asv<0.6
   # Packaging infrastructure
   - python-build

From 7a7d1b3daa7782ae0c1e229ee044bf9a95f274d2 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 27 Sep 2023 09:12:29 +0100
Subject: [PATCH 36/41] Pin pyarrow>=4 due to pandas 2 incompatibility

---
 docs/environment-docs.yml | 2 +-
 environment.yml           | 2 +-
 setup.cfg                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index 21ad1776..5fe39c58 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -9,7 +9,7 @@ dependencies:
   # Currently dask and numpy==1.16.0 clash
   - numpy!=1.15.0,!=1.16.0
   - pandas>=0.23.0,!=1.0.0,<2.1.0
-  - pyarrow>=0.17.1,!=1.0.0
+  - pyarrow>=4
   - simplejson
   - minimalkv
   - toolz
diff --git a/environment.yml b/environment.yml
index 2773e990..99a00260 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,7 +11,7 @@ dependencies:
   # TODO: add support for numpy>=1.23
   - numpy!=1.15.0,!=1.16.0
   - pandas>=0.23.0,!=1.0.0,<2.1.0
-  - pyarrow>=0.17.1,!=1.0.0
+  - pyarrow>=4
   - simplejson
   - minimalkv>=1.4.2
   - toolz
diff --git a/setup.cfg b/setup.cfg
index 00579a8b..c3581723 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -19,7 +19,7 @@ install_requires =
     # Currently dask and numpy==1.16.0 clash
     numpy!=1.15.0,!=1.16.0
     pandas>=0.23.0,!=1.0.0,<2.1.0
-    pyarrow>=0.17.1,!=1.0.0
+    pyarrow>=4
     simplejson
     minimalkv>=1.4.2
     toolz

From 9b627d2b7663d9aeae9f64c84a5a1b5f96601e61 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Wed, 27 Sep 2023 09:33:57 +0100
Subject: [PATCH 37/41] Pin asv during micromamba install

---
 .github/workflows/ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a063842c..fbbe51bd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -121,7 +121,8 @@ jobs:
             python=${{ matrix.PYTHON_VERSION }}
       - name: Install Pyarrow (non-nightly)
         # Don't pin python as older versions of pyarrow require older versions of python
-        run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0"
+        # Pin asv so it doesn't get updated before the benchmarks are run
+        run: micromamba install -y --no-py-pin pyarrow==${{ matrix.pyarrow }} "pandas<2.1.0" "asv<0.6"
         if: matrix.pyarrow != 'nightly' && matrix.pandas == ''
       - name: Install Pyarrow (nightly)
         # Install both arrow-cpp and pyarrow to make sure that we have the

From 5c1b49feb70c4b2b2f3279fe7a10c6c0eaeae324 Mon Sep 17 00:00:00 2001
From: Izer Onadim <143251429+IzerOnadimQC@users.noreply.github.com>
Date: Thu, 28 Sep 2023 10:03:07 +0100
Subject: [PATCH 38/41] Remove square bracket notation from
 environment-docs.yml

Co-authored-by: Jan Tilly <jantilly@gmail.com>
---
 docs/environment-docs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml
index 5fe39c58..db25109f 100644
--- a/docs/environment-docs.yml
+++ b/docs/environment-docs.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - python>=3.8
-  - dask[dataframe]<2023.9.2
+  - dask<2023.9.2
   - decorator
   - msgpack-python>=0.5.2
   # Currently dask and numpy==1.16.0 clash

From 66bae069e8bdb7e20ae9b20ee1a5d0205e62d45e Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 28 Sep 2023 10:04:22 +0100
Subject: [PATCH 39/41] Remove square bracket notation for dask in setup.cfg

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index c3581723..42f82803 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,7 +13,7 @@ classifiers =
 [options]
 include_package_data = true
 install_requires =
-    dask[dataframe]!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+    dask!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
     decorator
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash

From d27900d9b35ecbf7350eb0a545ed81290ce5c900 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 28 Sep 2023 11:12:35 +0100
Subject: [PATCH 40/41] Refactor PYARROW_LT_13 condition to remove repeated
 code

---
 plateau/core/common_metadata.py   | 15 +++++-------
 plateau/core/index.py             | 32 ++++++++++---------------
 plateau/serialization/_csv.py     | 11 ++++-----
 plateau/serialization/_parquet.py | 39 +++++++++++--------------------
 4 files changed, 37 insertions(+), 60 deletions(-)

diff --git a/plateau/core/common_metadata.py b/plateau/core/common_metadata.py
index c5433f6c..efc72849 100644
--- a/plateau/core/common_metadata.py
+++ b/plateau/core/common_metadata.py
@@ -766,15 +766,12 @@ def empty_dataframe_from_schema(
     # HACK: Cast bytes to object in metadata until Pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/50127
     schema = schema_metadata_bytes_to_object(schema.internal())
 
-    if PYARROW_LT_13:
-        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
-        # as it was introduced for backwards compatibility with pandas 1.x
-        df = schema.empty_table().to_pandas(date_as_object=date_as_object)
-    else:
-        df = schema.empty_table().to_pandas(
-            date_as_object=date_as_object,
-            coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
-        )
+    # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+    # as it was introduced for backwards compatibility with pandas 1.x
+    _coerce = {}
+    if not PYARROW_LT_13:
+        _coerce["coerce_temporal_nanoseconds"] = coerce_temporal_nanoseconds
+    df = schema.empty_table().to_pandas(date_as_object=date_as_object, **_coerce)
 
     df.columns = df.columns.map(ensure_string_type)
     if columns is not None:
diff --git a/plateau/core/index.py b/plateau/core/index.py
index d6a79aeb..53aa904a 100644
--- a/plateau/core/index.py
+++ b/plateau/core/index.py
@@ -148,13 +148,11 @@ def observed_values(
 
         # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
         # as it was introduced for backwards compatibility with pandas 1.x
+        _coerce = {}
+        if not PYARROW_LT_13:
+            _coerce["coerce_temporal_nanoseconds"] = coerce_temporal_nanoseconds
         return np.array(
-            labeled_array.to_pandas(date_as_object=date_as_object)
-            if PYARROW_LT_13
-            else labeled_array.to_pandas(
-                date_as_object=date_as_object,
-                coerce_temporal_nanoseconds=coerce_temporal_nanoseconds,
-            )
+            labeled_array.to_pandas(date_as_object=date_as_object, **_coerce)
         )
 
     @staticmethod
@@ -491,14 +489,10 @@ def as_flat_series(
         table = _index_dct_to_table(
             self.index_dct, column=self.column, dtype=self.dtype
         )
-        if PYARROW_LT_13:
-            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
-            # as it was introduced for backwards compatibility with pandas 1.x
-            df = table.to_pandas(date_as_object=date_as_object)
-        else:
-            df = table.to_pandas(
-                date_as_object=date_as_object, coerce_temporal_nanoseconds=True
-            )
+        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+        # as it was introduced for backwards compatibility with pandas 1.x
+        _coerce = {} if PYARROW_LT_13 else {"coerce_temporal_nanoseconds": True}
+        df = table.to_pandas(date_as_object=date_as_object, **_coerce)
 
         if predicates is not None:
             # If there is a conjunction without any reference to the index
@@ -884,12 +878,10 @@ def _parquet_bytes_to_dict(column: str, index_buffer: bytes):
     if column_type == pa.timestamp("us"):
         column_type = pa.timestamp("ns")
 
-    if PYARROW_LT_13:
-        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
-        # as it was introduced for backwards compatibility with pandas 1.x
-        df = table.to_pandas()
-    else:
-        df = table.to_pandas(coerce_temporal_nanoseconds=True)
+    # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+    # as it was introduced for backwards compatibility with pandas 1.x
+    _coerce = {} if PYARROW_LT_13 else {"coerce_temporal_nanoseconds": True}
+    df = table.to_pandas(**_coerce)
 
     index_dct = dict(
         zip(df[column].values, (list(x) for x in df[_PARTITION_COLUMN_NAME].values))
diff --git a/plateau/serialization/_csv.py b/plateau/serialization/_csv.py
index f8be37ab..ac4926cc 100644
--- a/plateau/serialization/_csv.py
+++ b/plateau/serialization/_csv.py
@@ -88,12 +88,11 @@ def restore_dataframe(
 
     def store(self, store, key_prefix, df):
         if isinstance(df, pa.Table):
-            if PYARROW_LT_13:
-                # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
-                # as it was introduced for backwards compatibility with pandas 1.x
-                df = df.to_pandas()
-            else:
-                df = df.to_pandas(coerce_temporal_nanoseconds=True)
+            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+            # as it was introduced for backwards compatibility with pandas 1.x
+            _coerce = {} if PYARROW_LT_13 else {"coerce_temporal_nanoseconds": True}
+            df = df.to_pandas(**_coerce)
+
         key = f"{key_prefix}.csv"
         result_stream = BytesIO()
         iostream: BufferedIOBase
diff --git a/plateau/serialization/_parquet.py b/plateau/serialization/_parquet.py
index e70b9cee..b7391d27 100644
--- a/plateau/serialization/_parquet.py
+++ b/plateau/serialization/_parquet.py
@@ -256,24 +256,17 @@ def _restore_dataframe(
                 else:
                     # ARROW-5139 Column projection with empty columns returns a table w/out index
                     if columns == []:
+                        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+                        # as it was introduced for backwards compatibility with pandas 1.x
+                        _coerce = {}
+                        if not PYARROW_LT_13:
+                            _coerce["coerce_temporal_nanoseconds"] = True
                         # Create an arrow table with expected index length.
-                        if PYARROW_LT_13:
-                            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
-                            # as it was introduced for backwards compatibility with pandas 1.x
-                            df = (
-                                parquet_file.schema.to_arrow_schema()
-                                .empty_table()
-                                .to_pandas(date_as_object=date_as_object)
-                            )
-                        else:
-                            df = (
-                                parquet_file.schema.to_arrow_schema()
-                                .empty_table()
-                                .to_pandas(
-                                    date_as_object=date_as_object,
-                                    coerce_temporal_nanoseconds=True,
-                                )
-                            )
+                        df = (
+                            parquet_file.schema.to_arrow_schema()
+                            .empty_table()
+                            .to_pandas(date_as_object=date_as_object, **_coerce)
+                        )
                         index = pd.Index(
                             pd.RangeIndex(start=0, stop=parquet_file.metadata.num_rows),
                             dtype="int64",
@@ -300,14 +293,10 @@ def _restore_dataframe(
         # HACK: Cast bytes to object in metadata until Pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/50127
         table = table.cast(schema_metadata_bytes_to_object(table.schema))
 
-        if PYARROW_LT_13:
-            # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
-            # as it was introduced for backwards compatibility with pandas 1.x
-            df = table.to_pandas(date_as_object=date_as_object)
-        else:
-            df = table.to_pandas(
-                date_as_object=date_as_object, coerce_temporal_nanoseconds=True
-            )
+        # Prior to pyarrow 13.0.0 coerce_temporal_nanoseconds didn't exist
+        # as it was introduced for backwards compatibility with pandas 1.x
+        _coerce = {} if PYARROW_LT_13 else {"coerce_temporal_nanoseconds": True}
+        df = table.to_pandas(date_as_object=date_as_object, **_coerce)
 
         # XXX: Patch until Pyarrow bug is resolved: https://issues.apache.org/jira/browse/ARROW-18099?filter=-2
         if categories:

From 41509b2008afb5d4bc4b98ec4c77914534ad7aa6 Mon Sep 17 00:00:00 2001
From: Izer Onadim <izer.onadim@quantco.com>
Date: Thu, 28 Sep 2023 11:58:33 +0100
Subject: [PATCH 41/41] Add square brackets back to setup.cfg

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 42f82803..c3581723 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,7 +13,7 @@ classifiers =
 [options]
 include_package_data = true
 install_requires =
-    dask!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
+    dask[dataframe]!=2021.5.1,!=2021.6.0,<2023.9.2  # gh475 - 2021.5.1 and 2021.6.0 broke ci, omit those versions
     decorator
     msgpack>=0.5.2
     # Currently dask and numpy==1.16.0 clash