From 563fcfba3ec0cbfa498d46d82ae81a3bd0b0deb3 Mon Sep 17 00:00:00 2001 From: "Bradford (Lynch) Levy" Date: Sun, 15 Sep 2024 10:51:55 -0700 Subject: [PATCH] Clarify require schema format in `read_csv` The `read_csv` function expects that the order of the columns in a provided schema match the order of the columns in the CSV file being read. This was not documented and led to unexpected behavior. --- py-polars/polars/io/csv/functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py index ceba49391560..71f71ef6ce68 100644 --- a/py-polars/polars/io/csv/functions.py +++ b/py-polars/polars/io/csv/functions.py @@ -111,7 +111,8 @@ def read_csv( schema Provide the schema. This means that polars doesn't do schema inference. This argument expects the complete schema, whereas `schema_overrides` can be - used to partially overwrite a schema. + used to partially overwrite a schema. Note that the order of the columns in + the provided `schema` must match the order of the columns in the CSV being read. schema_overrides Overwrite dtypes for specific or all columns during schema inference. null_values