From 56026d5f3a1a92c7f7c73e571cffbfdc823bce52 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 13 Sep 2024 08:03:20 -0300 Subject: [PATCH] Update Polars to v0.43.1 (#985) The most relevant change is internal. Now most of the strings are represented by the PlSmallStr type. Changes upstream: - https://github.com/pola-rs/polars/releases/tag/rs-0.43.0 - Version 0.43.0 - https://github.com/pola-rs/polars/releases/tag/rs-0.43.1 - Version 0.43.1 --- native/explorer/Cargo.lock | 226 +++++++++++---------- native/explorer/Cargo.toml | 10 +- native/explorer/src/dataframe.rs | 20 +- native/explorer/src/dataframe/io.rs | 24 ++- native/explorer/src/datatypes.rs | 2 +- native/explorer/src/datatypes/ex_dtypes.rs | 7 +- native/explorer/src/expressions.rs | 2 +- native/explorer/src/lazyframe.rs | 17 +- native/explorer/src/lazyframe/io.rs | 8 +- native/explorer/src/series.rs | 76 +++---- native/explorer/src/series/from_list.rs | 33 +-- 11 files changed, 230 insertions(+), 195 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index be8eeacfa..c9259f2b1 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -4,19 +4,13 @@ version = 3 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "adler2" version = "2.0.0" @@ -83,9 +77,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "4e1496f8fb1fbf272686b8d37f523dab3e4a7443300055e74cdaa449f3114356" [[package]] name = "argminmax" @@ -176,17 +170,17 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide 0.7.4", + "miniz_oxide", "object", "rustc-demangle", + "windows-targets", ] [[package]] @@ -285,11 +279,20 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" -version = "1.1.16" +version = "1.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9d013ecb737093c0e86b151a7b837993cf9ec6c502946cfb44bedc392421e0b" +checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" dependencies = [ "jobserver", "libc", @@ -370,6 +373,21 @@ dependencies = [ "cc", ] +[[package]] +name = "compact_str" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -527,7 +545,6 @@ dependencies = [ "mimalloc", "object_store", "polars", - "polars-json", "polars-ops", "rand", "rand_pcg", @@ -556,7 +573,7 @@ checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", "libz-ng-sys", - "miniz_oxide 0.8.0", + "miniz_oxide", ] [[package]] @@ -707,9 +724,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" [[package]] name = "glob" @@ -877,9 +894,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba" dependencies = [ "bytes", "futures-channel", @@ -947,9 +964,9 @@ checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4" [[package]] name = "itertools" @@ -1179,15 +1196,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "miniz_oxide" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" -dependencies = [ - "adler", -] - [[package]] name = "miniz_oxide" version = "0.8.0" @@ -1445,9 +1453,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad002eb9c541b4f7e0c7c759cefe884a0350e15d241231ac4be31c5568c15070" +checksum = "0e248cf2f0069277f8fe80d413cfb9240c7dd1cfa382b5674c1b4afa57222747" dependencies = [ "getrandom", "polars-arrow", @@ -1465,9 +1473,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d19c6db79cb6a3c55af3b5a3976276edaab64cbf7f69b392617c2af30d7742" +checksum = "2981d5b2f34c84069a39fceca0d36dffeb97db8cadba101e7ea6605c8d42294d" dependencies = [ "ahash", "atoi", @@ -1490,6 +1498,7 @@ dependencies = [ "parking_lot", "polars-arrow-format", "polars-error", + "polars-schema", "polars-utils", "ryu", "serde", @@ -1512,9 +1521,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30194a5ff325f61d6fcb62dc215c9210f308fc4fc85a493ef777dbcd938cba24" +checksum = "5a97b2a5c9b880ab7e52553c40a336fdb6e3244bf896b4d4917700defe8085d5" dependencies = [ "bytemuck", "either", @@ -1528,9 +1537,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ba2a3b736d55b92a12889672d0197dc25ad321ab23eba4168a3b6316a6b6349" +checksum = "d5bc2cadcca904a9dc4d2c2b437c346712806e9a678bf17c7e94ebf622faae76" dependencies = [ "ahash", "bitflags", @@ -1546,13 +1555,13 @@ dependencies = [ "polars-compute", "polars-error", "polars-row", + "polars-schema", "polars-utils", "rand", "rand_distr", "rayon", "regex", "serde", - "smartstring", "thiserror", "version_check", "xxhash-rust", @@ -1560,9 +1569,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07101d1803ca2046cdb3a8adb1523ddcc879229860f0ac56a853034269dec1e1" +checksum = "56b0a8eb9b1e56a4640de6887d613cb4de73c4e09d491f3b779855d4c3bcb9ba" dependencies = [ "object_store", "polars-arrow-format", @@ -1573,14 +1582,15 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5c69634ddbb0f44186cd1c42d166963fc756f9cc994438e941bc2703ddbbab" +checksum = "34e9c0e8c7ba93aac64051b92dc68eac5a0e9543cf44ca784467db2c035821fe" dependencies = [ "ahash", "bitflags", "once_cell", "polars-arrow", + "polars-compute", "polars-core", "polars-io", "polars-ops", @@ -1588,14 +1598,13 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", ] [[package]] name = "polars-io" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a48ddf416ae185336c3d7880d2e05b7e55686e3e0da1014e5e7325eff9c7d722" +checksum = "454ebbebe1cb8cb4768adca44b8fc9431abc3c91d5927f6824e73f916bced911" dependencies = [ "ahash", "async-trait", @@ -1609,6 +1618,7 @@ dependencies = [ "fs4", "futures", "glob", + "hashbrown", "home", "itoa", "memchr", @@ -1622,6 +1632,7 @@ dependencies = [ "polars-error", "polars-json", "polars-parquet", + "polars-schema", "polars-time", "polars-utils", "rayon", @@ -1632,7 +1643,6 @@ dependencies = [ "serde_json", "simd-json", "simdutf8", - "smartstring", "tokio", "tokio-util", "url", @@ -1641,9 +1651,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0a43388585a922524e8bbaa1ed1391c9c4b0768a644585609afa9a2fd5fc702" +checksum = "4ca086fbbff6e46efbc97032e93f92690c1fc9c662fd5e1f13a42922bd7d3aa4" dependencies = [ "ahash", "chrono", @@ -1663,9 +1673,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a514a85df9e7d501c71c96f094861d0608b05a3f533447b1c0ea9cf714162fcb" +checksum = "7e61c062e833d2376de0a4cf745504449215cbf499cea293cb592e674ffb39ca" dependencies = [ "ahash", "bitflags", @@ -1684,16 +1694,15 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", "tokio", "version_check", ] [[package]] name = "polars-mem-engine" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d057df81b17b4f0ea0e4424ee34f755e6b9ccfba432ecb2fe57dc4da6da2713" +checksum = "c0643812829cc990e1533a5bf48c21a1b3eaa46aabf2967b0f53f99097cbc74c" dependencies = [ "futures", "memmap2", @@ -1713,9 +1722,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ba44233249b7937491b5d2bdbf14e4ad534c0a65d06548c3bc418fc3e60791" +checksum = "5ac14a136d87bea798f3db51d5987556deb2293da34bfc8b105ebffa05f6e810" dependencies = [ "ahash", "argminmax", @@ -1735,22 +1744,22 @@ dependencies = [ "polars-core", "polars-error", "polars-json", + "polars-schema", "polars-utils", "rand", "rand_distr", "rayon", "regex", "serde_json", - "smartstring", "unicode-reverse", "version_check", ] [[package]] name = "polars-parquet" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2993265079ffa07dd16277189444424f8d787b00b01c6f6e001f58bab543ce" +checksum = "491f5af321169259d5b1294c9fe8ed89faaeac34b4dec4abcedc0d1b3d11013a" dependencies = [ "ahash", "async-stream", @@ -1760,6 +1769,7 @@ dependencies = [ "ethnum", "flate2", "futures", + "hashbrown", "lz4", "num-traits", "parquet-format-safe", @@ -1776,9 +1786,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ccba94c4fa9fded0f41730f7649574c72d6d938a840731c7e4eea4e7ed5cecf" +checksum = "29215c31f599295cc0f803c42fc812cc518db6d5ed4d6c7cc03daf3976a0add5" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1796,7 +1806,6 @@ dependencies = [ "polars-row", "polars-utils", "rayon", - "smartstring", "tokio", "uuid", "version_check", @@ -1804,13 +1813,14 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6b29cc53d6c086c09b11050b01c25c28f6a91339036ba1fb1250fcf0d89e74" +checksum = "e3f728df4bc643492a2057a0a125c7e550cbcfe35b391444653ad294be9ab190" dependencies = [ "ahash", "bitflags", "bytemuck", + "bytes", "chrono", "chrono-tz 0.8.6", "either", @@ -1830,16 +1840,15 @@ dependencies = [ "rayon", "recursive", "regex", - "smartstring", "strum_macros", "version_check", ] [[package]] name = "polars-row" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e11f43f48466c4b1caa6dc61c381dc10c2d67b87fcb74bc996e21c4f7b0a311" +checksum = "4eb931f0929ca7498b3ed5056357d2d364cad42cce95383a7e3983dbceb4bed1" dependencies = [ "bytemuck", "polars-arrow", @@ -1847,11 +1856,24 @@ dependencies = [ "polars-utils", ] +[[package]] +name = "polars-schema" +version = "0.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7e1234b942d3244024ecbac9c7f5a48a52a815f8ca4b9d075fbba16afb1a39" +dependencies = [ + "indexmap", + "polars-error", + "polars-utils", + "serde", + "version_check", +] + [[package]] name = "polars-sql" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e9338806e7254618eb819cc632c34b75b71d462222a913f9c1035ed81911ddc" +checksum = "ce52bfd2ef1e2e18ac26d7d7ea3f9132b199cff06d975156703fa5badcfae187" dependencies = [ "hex", "once_cell", @@ -1862,6 +1884,7 @@ dependencies = [ "polars-ops", "polars-plan", "polars-time", + "polars-utils", "rand", "serde", "serde_json", @@ -1870,9 +1893,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a601ab9a62e733b8b560b37642321cb1933faa194864739f6a59d6dfc4d686" +checksum = "9925ab75e1d859ae2283ca09d7683198b0b9ff5afecd03f2c9180f3e36e35056" dependencies = [ "atoi", "bytemuck", @@ -1886,27 +1909,28 @@ dependencies = [ "polars-ops", "polars-utils", "regex", - "smartstring", ] [[package]] name = "polars-utils" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19dd73207bd15efb0ae5c9c3ece3227927ed6a16ad63578acec342378e6bdcb4" +checksum = "b44846e1fc6ae1dfdc7f65a37af7d270d0a6a17a58fff76716561f5b887a8ad7" dependencies = [ "ahash", "bytemuck", "bytes", + "compact_str", "hashbrown", "indexmap", + "libc", "memmap2", "num-traits", "once_cell", "polars-error", "raw-cpuid", "rayon", - "smartstring", + "serde", "stacker", "sysinfo", "version_check", @@ -2106,9 +2130,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853" dependencies = [ "bitflags", ] @@ -2236,9 +2260,9 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustix" -version = "0.38.36" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ "bitflags", "errno", @@ -2283,9 +2307,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.12" +version = "0.23.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" +checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8" dependencies = [ "once_cell", "ring", @@ -2339,9 +2363,9 @@ checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" -version = "0.102.7" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ "ring", "rustls-pki-types", @@ -2371,11 +2395,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2409,18 +2433,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", @@ -2503,18 +2527,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "serde", - "static_assertions", - "version_check", -] - [[package]] name = "snafu" version = "0.7.5" @@ -2841,9 +2853,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index a46289c2a..1abe37d3e 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -33,7 +33,7 @@ object_store = { version = "0.10", default-features = false, optional = true } mimalloc = { version = "*", default-features = false } [dependencies.polars] -version = "0.42" +version = "0.43" default-features = false features = [ "abs", @@ -81,15 +81,9 @@ features = [ ] [dependencies.polars-ops] -version = "0.42" +version = "0.43" features = ["abs", "ewma", "cum_agg", "cov"] -# This dep is only needed to activate "timezones" feature -# for the polars-json crate. We should remove when Polars fixes it. -[dependencies.polars-json] -version = "*" -features = ["timezones", "chrono-tz"] - [features] default = ["ndjson", "cloud", "nif_version_2_15"] diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index 1ca0d8663..4b80591e8 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -29,7 +29,11 @@ pub fn df_transpose( #[rustler::nif] pub fn df_names(df: ExDataFrame) -> Result, ExplorerError> { - let names = to_string_names(df.get_column_names()); + let names = df + .get_column_names() + .iter() + .map(|name| name.to_string()) + .collect(); Ok(names) } @@ -73,10 +77,10 @@ pub fn df_concat_columns(dfs: Vec) -> Result, groups: Vec<&str>, ) -> Result { - let idx = UInt32Chunked::from_vec("idx", indices); + let idx = UInt32Chunked::from_vec("idx".into(), indices); let new_df = if groups.is_empty() { df.take(&idx)? } else { @@ -167,7 +171,7 @@ pub fn df_sample_n( seed: Option, groups: Vec, ) -> Result { - let n_s = Series::new("n", &[n]); + let n_s = Series::new("n".into(), &[n]); let new_df = if groups.is_empty() { df.sample_n(&n_s, replace, shuffle, seed)? } else { @@ -187,7 +191,7 @@ pub fn df_sample_frac( seed: Option, groups: Vec, ) -> Result { - let frac_s = Series::new("frac", &[frac]); + let frac_s = Series::new("frac".into(), &[frac]); let new_df = if groups.is_empty() { df.sample_frac(&frac_s, replace, shuffle, seed)? } else { @@ -396,7 +400,7 @@ pub fn df_pivot_wider( .collect(); for (id_name, new_name) in id_columns.iter().zip(&temp_id_names) { - df.rename(id_name, new_name)?; + df.rename(id_name, new_name.into())?; } let mut new_df = pivot_stable( @@ -465,7 +469,7 @@ pub fn df_lazy(df: ExDataFrame) -> Result { #[rustler::nif(schedule = "DirtyCpu")] pub fn df_re_dtype(pattern: &str) -> Result { - let s = Series::new("dummy", [""]) + let s = Series::new("dummy".into(), [""]) .into_frame() .lazy() .with_column(col("dummy").str().extract_groups(pattern)?.alias("dummy")) diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index a1051997e..8a6d5617e 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -56,7 +56,12 @@ pub fn df_from_csv( .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_columns(column_names.map(Arc::from)) + .with_columns(column_names.map(|names| { + names + .iter() + .map(|name| PlSmallStr::from_string(name.clone())) + .collect() + })) .with_parse_options( CsvParseOptions::default() .with_encoding(encoding) @@ -64,7 +69,9 @@ pub fn df_from_csv( .with_try_parse_dates(parse_dates) .with_separator(delimiter_as_byte) .with_eol_char(eol_delimiter.unwrap_or(b'\n')) - .with_null_values(Some(NullValues::AllColumns(null_vals))), + .with_null_values(Some(NullValues::AllColumns( + null_vals.iter().map(|val| val.into()).collect(), + ))), ) .try_into_reader_with_file_path(Some(filename.into()))? .finish(); @@ -79,7 +86,7 @@ pub fn schema_from_dtypes_pairs( return Ok(None); } - let mut schema = Schema::new(); + let mut schema = Schema::with_capacity(dtypes.len()); for (name, ex_dtype) in dtypes { let dtype = DataType::try_from(&ex_dtype)?; schema.with_column(name.into(), dtype); @@ -174,7 +181,12 @@ pub fn df_load_csv( .with_has_header(has_header) .with_infer_schema_length(infer_schema_length) .with_n_rows(stop_after_n_rows) - .with_columns(column_names.map(Arc::from)) + .with_columns(column_names.map(|names| { + names + .iter() + .map(|name| PlSmallStr::from_string(name.clone())) + .collect() + })) .with_skip_rows(skip_rows) .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) @@ -183,7 +195,9 @@ pub fn df_load_csv( CsvParseOptions::default() .with_separator(delimiter_as_byte) .with_encoding(encoding) - .with_null_values(Some(NullValues::AllColumns(null_vals))) + .with_null_values(Some(NullValues::AllColumns( + null_vals.iter().map(|x| x.into()).collect(), + ))) .with_try_parse_dates(parse_dates) .with_eol_char(eol_delimiter.unwrap_or(b'\n')), ) diff --git a/native/explorer/src/datatypes.rs b/native/explorer/src/datatypes.rs index 059ea0caf..4ddb84e36 100644 --- a/native/explorer/src/datatypes.rs +++ b/native/explorer/src/datatypes.rs @@ -493,7 +493,7 @@ impl<'tz> Literal for ExDateTime<'tz> { Expr::Literal(LiteralValue::DateTime( ndt.and_utc().timestamp_micros(), TimeUnit::Microseconds, - Some(time_zone), + Some(time_zone.into()), )) } } diff --git a/native/explorer/src/datatypes/ex_dtypes.rs b/native/explorer/src/datatypes/ex_dtypes.rs index 87249bfb5..c93220127 100644 --- a/native/explorer/src/datatypes/ex_dtypes.rs +++ b/native/explorer/src/datatypes/ex_dtypes.rs @@ -107,8 +107,7 @@ impl TryFrom<&DataType> for ExSeriesDtype { let mut struct_fields = Vec::new(); for field in fields { - struct_fields - .push((field.name().to_string(), Self::try_from(field.data_type())?)); + struct_fields.push((field.name().to_string(), Self::try_from(field.dtype())?)); } Ok(ExSeriesDtype::Struct(struct_fields)) @@ -160,7 +159,7 @@ impl TryFrom<&ExSeriesDtype> for DataType { } ExSeriesDtype::Datetime(ex_timeunit, tz_option) => Ok(DataType::Datetime( ex_timeunit.try_into()?, - Some(tz_option.clone()), + Some(tz_option.into()), )), ExSeriesDtype::Duration(ex_timeunit) => Ok(DataType::Duration(ex_timeunit.try_into()?)), ExSeriesDtype::List(inner) => { @@ -169,7 +168,7 @@ impl TryFrom<&ExSeriesDtype> for DataType { ExSeriesDtype::Struct(fields) => Ok(DataType::Struct( fields .iter() - .map(|(k, v)| Ok(Field::new(k.as_str(), v.try_into()?))) + .map(|(k, v)| Ok(Field::new(k.into(), v.try_into()?))) .collect::, Self::Error>>()?, )), } diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index a30b29e5f..9d6fb6104 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -980,7 +980,7 @@ pub fn expr_atan(expr: ExExpr) -> ExExpr { #[rustler::nif] pub fn expr_strptime(expr: ExExpr, format_string: &str) -> ExExpr { let options = StrptimeOptions { - format: Some(format_string.to_string()), + format: Some(format_string.into()), strict: false, exact: true, cache: true, diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index 4f6675f41..348f5ce0e 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -65,7 +65,7 @@ pub fn lf_tail( pub fn lf_names(data: ExLazyFrame) -> Result, ExplorerError> { let mut lf = data.clone_inner(); let names = lf - .schema()? + .collect_schema()? .iter_names() .map(|smart_string| smart_string.to_string()) .collect(); @@ -76,9 +76,9 @@ pub fn lf_names(data: ExLazyFrame) -> Result, ExplorerError> { #[rustler::nif] pub fn lf_dtypes(data: ExLazyFrame) -> Result, ExplorerError> { let mut dtypes: Vec = vec![]; - let schema = data.clone_inner().schema()?; + let schema = data.clone_inner().collect_schema()?; - for dtype in schema.iter_dtypes() { + for (_name, dtype) in schema.iter_names_and_dtypes() { dtypes.push(ExSeriesDtype::try_from(dtype)?) } @@ -108,7 +108,7 @@ pub fn lf_slice( let result_lf = if groups.is_empty() { lf.slice(offset, length) } else { - let groups_exprs: Vec = groups.iter().map(|group| col(group)).collect(); + let groups_exprs: Vec = groups.iter().map(col).collect(); lf.group_by_stable(groups_exprs) .agg([col("*").slice(offset, length)]) .explode([col("*").exclude(groups)]) @@ -181,6 +181,7 @@ pub fn lf_distinct( columns_to_keep: Option>, ) -> Result { let df = data.clone_inner(); + let subset = subset.iter().map(|x| x.into()).collect::>(); let new_df = df.unique_stable(Some(subset), UniqueKeepStrategy::First); match columns_to_keep { @@ -219,11 +220,9 @@ pub fn lf_summarise_with( // We do add a "shadow" column to be able to group by it. // This is going to force some aggregations like "mode" to be always inside // a "list". - let s = Series::new_null("__explorer_null_for_group__", 1); - ldf.with_column(s.lit()) - .group_by_stable(["__explorer_null_for_group__"]) + ldf.group_by_stable([1.lit().alias("__explorer_literal_for_group__")]) .agg(aggs) - .select(&[col("*").exclude(["__explorer_null_for_group__"])]) + .select(&[col("*").exclude(["__explorer_literal_for_group__"])]) } else { ldf.group_by_stable(groups).agg(aggs) }; @@ -344,7 +343,7 @@ pub fn lf_concat_columns(ldfs: Vec) -> Result = ldf - .schema() + .collect_schema() .expect("should be able to get schema") .iter_names() .map(|smart_string| smart_string.to_string()) diff --git a/native/explorer/src/lazyframe/io.rs b/native/explorer/src/lazyframe/io.rs index 9183f49a0..bfb00cdc4 100644 --- a/native/explorer/src/lazyframe/io.rs +++ b/native/explorer/src/lazyframe/io.rs @@ -19,7 +19,7 @@ pub fn lf_from_parquet( }; let cols: Vec = if let Some(cols) = columns { - cols.iter().map(|column| col(column)).collect() + cols.iter().map(col).collect() } else { vec![all()] }; @@ -43,7 +43,7 @@ pub fn lf_from_parquet_cloud( ..Default::default() }; let cols: Vec = if let Some(cols) = columns { - cols.iter().map(|column| col(column)).collect() + cols.iter().map(col).collect() } else { vec![all()] }; @@ -247,7 +247,9 @@ pub fn lf_from_csv( .with_rechunk(do_rechunk) .with_encoding(encoding) .with_dtype_overwrite(schema_from_dtypes_pairs(dtypes)?) - .with_null_values(Some(NullValues::AllColumns(null_vals))) + .with_null_values(Some(NullValues::AllColumns( + null_vals.iter().map(|x| x.into()).collect(), + ))) .with_eol_char(eol_delimiter.unwrap_or(b'\n')) .finish()?; diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index b29988126..ad4b9aa9b 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -7,9 +7,8 @@ use crate::{ }; use encoding::encode_naive_datetime; -// use encoding::encode_datetime; -use polars::prelude::*; //{lazy::dsl::Expr, }; +use polars::prelude::*; use polars_ops::chunked_array::cov::{cov, pearson_corr}; use polars_ops::prelude::peaks::*; use rustler::{Binary, Encoder, Env, Term}; @@ -30,7 +29,7 @@ pub fn s_name(data: ExSeries) -> Result { #[rustler::nif] pub fn s_rename(data: ExSeries, name: &str) -> Result { let mut s = data.clone_inner(); - s.rename(name); + s.rename(name.into()); Ok(ExSeries::new(s)) } @@ -121,7 +120,7 @@ fn checked_div(data: ExSeries, other: ExSeries) -> Result 1 => { let num = data.i64()?.get(0).unwrap(); Ok(Series::new( - data.name(), + data.name().clone(), other.i64()?.apply(|v| v.and_then(|v| num.checked_div(v))), )) } @@ -196,7 +195,7 @@ pub fn s_unordered_distinct(series: ExSeries) -> Result #[rustler::nif(schedule = "DirtyCpu")] pub fn s_frequencies(series: ExSeries) -> Result { - let df = series.value_counts(true, true, "counts".to_string(), false)?; + let df = series.value_counts(true, true, "counts".into(), false)?; Ok(ExDataFrame::new(df)) } @@ -212,12 +211,18 @@ pub fn s_cut( let left_close = false; // Cut is going to return a Series of a Struct. We need to convert it to a DF. - let cut_series = cut(&series, bins, labels, left_close, true)?; + let cut_series = cut( + &series, + bins, + labels.map(|vec| vec.iter().map(|label| label.into()).collect()), + left_close, + true, + )?; let mut cut_df = DataFrame::new(cut_series.struct_()?.fields_as_series())?; let cut_df = cut_df.insert_column(0, series)?; - cut_df.set_column_names(&[ + cut_df.set_column_names([ "values", break_point_label.unwrap_or("break_point"), category_label.unwrap_or("category"), @@ -241,7 +246,7 @@ pub fn s_qcut( let qcut_series: Series = qcut( &series, quantiles, - labels, + labels.map(|vec| vec.iter().map(|label| label.into()).collect()), left_close, allow_duplicates, true, @@ -250,7 +255,7 @@ pub fn s_qcut( let mut qcut_df = DataFrame::new(qcut_series.struct_()?.fields_as_series())?; let qcut_df = qcut_df.insert_column(0, series)?; - qcut_df.set_column_names(&[ + qcut_df.set_column_names([ "values", break_point_label.unwrap_or("break_point"), category_label.unwrap_or("category"), @@ -261,7 +266,7 @@ pub fn s_qcut( #[rustler::nif(schedule = "DirtyCpu")] pub fn s_slice_by_indices(series: ExSeries, indices: Vec) -> Result { - let idx = UInt32Chunked::from_vec("idx", indices); + let idx = UInt32Chunked::from_vec("idx".into(), indices); let s1 = series.take(&idx)?; Ok(ExSeries::new(s1)) } @@ -410,7 +415,7 @@ pub fn s_in(s: ExSeries, rhs: ExSeries) -> Result { } } - let r_logical = Series::new("r_logical", r_ids); + let r_logical = Series::new("r_logical".into(), r_ids); is_in(&l_logical.clone().into_series(), &r_logical)? } @@ -593,7 +598,7 @@ pub fn s_window_median( .clone_inner() .into_frame() .lazy() - .select([col(series.name()).rolling_median(opts)]) + .select([col(series.name().clone()).rolling_median(opts)]) .collect()? .column(series.name())? .clone(); @@ -845,7 +850,7 @@ pub fn s_product(s: ExSeries) -> Result { .clone_inner() .into_frame() .lazy() - .select([col(s.name()).product()]) + .select([col(s.name().clone()).product()]) .collect()? .column(s.name())? .clone(); @@ -863,7 +868,7 @@ pub fn s_variance(s: ExSeries, ddof: u8) -> Result { .clone_inner() .into_frame() .lazy() - .select([col(s.name()).var(ddof)]) + .select([col(s.name().clone()).var(ddof)]) .collect()? .column(s.name())? .clone(); @@ -881,7 +886,7 @@ pub fn s_standard_deviation(s: ExSeries, ddof: u8) -> Result( }, _ => encoding::term_from_value( s.quantile_reduce(quantile, strategy)? - .into_series("quantile") + .into_series("quantile".into()) .cast(dtype)? .get(0)?, env, @@ -1103,7 +1108,7 @@ pub fn s_categories(s: ExSeries) -> Result { DataType::Categorical(Some(mapping), _) => { let size = mapping.len() as u32; let categories: Vec<&str> = (0..size).map(|id| mapping.get(id)).collect(); - let series = Series::new("categories", &categories); + let series = Series::new("categories".into(), &categories); Ok(ExSeries::new(series)) } _ => panic!("Cannot get categories from non categorical series"), @@ -1355,7 +1360,7 @@ pub fn s_substring( .clone_inner() .into_frame() .lazy() - .select([col(s.name()).str().slice(offset.lit(), length)]) + .select([col(s.name().clone()).str().slice(offset.lit(), length)]) .collect()? .column(s.name())? .clone(); @@ -1366,7 +1371,7 @@ pub fn s_substring( pub fn s_split(s1: ExSeries, by: &str) -> Result { let s2 = s1 .str()? - .split(&ChunkedArray::new("a", &[by])) + .split(&ChunkedArray::new("a".into(), &[by])) .into_series(); Ok(ExSeries::new(s2)) @@ -1378,12 +1383,12 @@ pub fn s_split_into(s1: ExSeries, by: &str, names: Vec) -> Result Result Result { let s1 = clip( &s, - &Series::new("min_clip", &[min]), - &Series::new("max_clip", &[max]), + &Series::new("min_clip".into(), &[min]), + &Series::new("max_clip".into(), &[max]), )?; Ok(ExSeries::new(s1)) @@ -1513,8 +1518,8 @@ pub fn s_clip_integer(s: ExSeries, min: i64, max: i64) -> Result Result { let s1 = clip( &s, - &Series::new("min_clip", &[min]), - &Series::new("max_clip", &[max]), + &Series::new("min_clip".into(), &[min]), + &Series::new("max_clip".into(), &[max]), )?; Ok(ExSeries::new(s1)) @@ -1560,7 +1565,7 @@ pub fn s_atan(s: ExSeries) -> Result { pub fn s_join(s1: ExSeries, separator: &str) -> Result { let s2 = s1 .list()? - .lst_join(&ChunkedArray::new("a", &[separator]), true)? + .lst_join(&ChunkedArray::new("a".into(), &[separator]), true)? .into_series(); Ok(ExSeries::new(s2)) @@ -1586,7 +1591,7 @@ fn s_member( .clone_inner() .into_frame() .lazy() - .select([col(s.name()).list().contains(value_expr)]) + .select([col(s.name().clone()).list().contains(value_expr)]) .collect()? .column(s.name())? .clone(); @@ -1600,7 +1605,10 @@ pub fn s_field(s: ExSeries, name: &str) -> Result { .clone_inner() .into_frame() .lazy() - .select([col(s.name()).struct_().field_by_name(name).alias(name)]) + .select([col(s.name().clone()) + .struct_() + .field_by_name(name) + .alias(name)]) .collect()? .column(name)? .clone(); @@ -1614,10 +1622,10 @@ pub fn s_json_decode(s: ExSeries, ex_dtype: ExSeriesDtype) -> Result Result Result Result { let len = u32::try_from(series.len())?; - let s = Series::new("row_index", 0..len); + let s = Series::new("row_index".into(), 0..len); Ok(ExSeries::new(s)) } @@ -1672,10 +1680,10 @@ pub fn s_re_named_captures(s1: ExSeries, pattern: &str) -> Result Result>, ExplorerError>>()?; - Series::new(name, values) + Series::new(name.into(), values) .cast(&DataType::Date) .map(ExSeries::new) .map_err(|error| { @@ -76,7 +76,7 @@ pub fn s_from_list_naive_datetime( }) .collect::>, ExplorerError>>()?; - Series::new(name, values) + Series::new(name.into(), values) .cast(&DataType::Datetime(timeunit, None)) .map(ExSeries::new) .map_err(|error| { @@ -119,8 +119,11 @@ pub fn s_from_list_datetime( }) .collect::>, ExplorerError>>()?; - Series::new(name, values) - .cast(&DataType::Datetime(timeunit, time_zone)) + Series::new(name.into(), values) + .cast(&DataType::Datetime( + timeunit, + time_zone.map(|value| value.into()), + )) .map(ExSeries::new) .map_err(|error| { ExplorerError::Other(format!( @@ -160,7 +163,7 @@ pub fn s_from_list_duration( }) .collect::>, ExplorerError>>()?; - Series::new(name, values) + Series::new(name.into(), values) .cast(&DataType::Duration(timeunit)) .map(ExSeries::new) .map_err(|error| { @@ -196,7 +199,7 @@ pub fn s_from_list_time(name: &str, val: Term) -> Result>, ExplorerError>>()?; - Series::new(name, values) + Series::new(name.into(), values) .cast(&DataType::Time) .map(ExSeries::new) .map_err(|error| { @@ -208,8 +211,8 @@ pub fn s_from_list_time(name: &str, val: Term) -> Result ExSeries { - let s = Series::new_null(name, length); - ExSeries::new(Series::new(name, s)) + let s = Series::new_null(name.into(), length); + ExSeries::new(Series::new(name.into(), s)) } macro_rules! from_list { @@ -217,7 +220,7 @@ macro_rules! from_list { #[rustler::nif(schedule = "DirtyCpu")] pub fn $name(name: &str, val: Term) -> NifResult { val.decode::>>() - .map(|values| ExSeries::new(Series::new(name, values.as_slice()))) + .map(|values| ExSeries::new(Series::new(name.into(), values.as_slice()))) } }; } @@ -274,7 +277,7 @@ macro_rules! from_list_float { }) .collect::>>>() .map(|values| { - ExSeries::new(Series::new(name, values)) + ExSeries::new(Series::new(name.into(), values)) }) } }; @@ -291,14 +294,14 @@ pub fn s_from_list_binary(name: &str, val: Term) -> NifResult { .map(|maybe_bin| maybe_bin.map(|bin| bin.as_slice())) }) .collect::>>>() - .map(|values| ExSeries::new(Series::new(name, values))) + .map(|values| ExSeries::new(Series::new(name.into(), values))) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_from_list_categories(name: &str, val: Term) -> NifResult { let decoded = val.decode::>>()?; Ok(ExSeries::new( - Series::new(name, decoded.as_slice()) + Series::new(name.into(), decoded.as_slice()) .cast(&DataType::Categorical(None, CategoricalOrdering::default())) .map_err(|err| { let message = format!( @@ -329,7 +332,7 @@ pub fn s_from_list_of_series( }) .collect(); - Series::new(name, lists).cast(&dtype).map_err(|err| { + Series::new(name.into(), lists).cast(&dtype).map_err(|err| { let message = format!("from_list/2 cannot create series of lists: {err:?}"); Error::RaiseTerm(Box::new(message)) }) @@ -347,7 +350,7 @@ pub fn s_from_list_of_series_as_structs( let series_vec = series_term.decode::>()?; StructChunked::from_series( - name, + name.into(), series_vec .into_iter() .map(|s| s.clone_inner()) @@ -371,7 +374,7 @@ macro_rules! from_binary { let transmuted = unsafe { slice::from_raw_parts(slice.as_ptr() as *const $type, slice.len() / $bytes) }; - ExSeries::new(Series::new(name, transmuted)) + ExSeries::new(Series::new(name.into(), transmuted)) } }; }