diff --git a/poetry.lock b/poetry.lock index 16f24b3..1dcbc07 100644 --- a/poetry.lock +++ b/poetry.lock @@ -150,6 +150,10 @@ files = [ {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"}, {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"}, {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"}, {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"}, @@ -162,8 +166,14 @@ files = [ {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"}, {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, @@ -174,8 +184,24 @@ files = [ {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"}, {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"}, + {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"}, + {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"}, {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"}, @@ -185,6 +211,10 @@ files = [ {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"}, {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"}, {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"}, {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"}, @@ -196,6 +226,10 @@ files = [ {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"}, {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"}, {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"}, {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"}, @@ -208,6 +242,10 @@ files = [ {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"}, {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"}, {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"}, {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"}, @@ -220,6 +258,10 @@ files = [ {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"}, {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"}, {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"}, {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, @@ -462,6 +504,28 @@ files = [ {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, ] +[[package]] +name = "fastexcel" +version = "0.12.0" +description = "A fast excel file reader for Python, written in Rust" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fastexcel-0.12.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d40b2c8ccb122e15cf89c2b972a679a937eca3e90b3e69c6db24f3666b11cff9"}, + {file = "fastexcel-0.12.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2aade78706bc3f7a5861083267a038a49e809f3ee1abe6cceda7b8420092e61e"}, + {file = "fastexcel-0.12.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:993b905e61b98eb45a33409ac78b8a14b28bd3a3bcf9a4f36c1dae3e65c3dafb"}, + {file = "fastexcel-0.12.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de72203c67702010931bc4730ddb35904841b0ad0d8b6654c69b62c3d7b19eca"}, + {file = "fastexcel-0.12.0-cp38-abi3-win_amd64.whl", hash = "sha256:e5326fae6c28e2239dfdc19bc2cbb121b509e6f0aefa4e6e43b0cf84bd33dea6"}, + {file = "fastexcel-0.12.0.tar.gz", hash = "sha256:1624e2c6385fe08d5ac21392c3a5bd91156fbeebaf6986e6e7f684adc0e0ecbe"}, +] + +[package.dependencies] +pyarrow = ">=8.0.0" + +[package.extras] +pandas = ["pandas (>=1.4.4)"] +polars = ["polars (>=0.16.14)"] + [[package]] name = "fonttools" version = "4.54.1" @@ -946,6 +1010,47 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "polars" +version = "1.12.0" +description = "Blazingly fast DataFrame library" +optional = false +python-versions = ">=3.9" +files = [ + {file = "polars-1.12.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f3c4e4e423c373dda07b4c8a7ff12aa02094b524767d0ca306b1eba67f2d99e"}, + {file = "polars-1.12.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa6f9862f0cec6353243920d9b8d858c21ec8f25f91af203dea6ff91980e140d"}, + {file = "polars-1.12.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afb03647b5160737d2119532ee8ffe825de1d19d87f81bbbb005131786f7d59b"}, + {file = "polars-1.12.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ea96aba5eb3dab8f0e6abf05ab3fc2136b329261860ef8661d20f5456a2d78e0"}, + {file = "polars-1.12.0-cp39-abi3-win_amd64.whl", hash = "sha256:a228a4b320a36d03a9ec9dfe7241b6d80a2f119b2dceb1da953166655e4cf43c"}, + {file = "polars-1.12.0.tar.gz", hash = "sha256:fb5c92de1a8f7d0a3f923fe48ea89eb518bdf55315ae917012350fa072bd64f4"}, +] + +[package.extras] +adbc = ["adbc-driver-manager[dbapi]", "adbc-driver-sqlite[dbapi]"] +all = ["polars[async,cloudpickle,database,deltalake,excel,fsspec,graph,iceberg,numpy,pandas,plot,pyarrow,pydantic,style,timezone]"] +async = ["gevent"] +calamine = ["fastexcel (>=0.9)"] +cloudpickle = ["cloudpickle"] +connectorx = ["connectorx (>=0.3.2)"] +database = ["nest-asyncio", "polars[adbc,connectorx,sqlalchemy]"] +deltalake = ["deltalake (>=0.15.0)"] +excel = ["polars[calamine,openpyxl,xlsx2csv,xlsxwriter]"] +fsspec = ["fsspec"] +gpu = ["cudf-polars-cu12"] +graph = ["matplotlib"] +iceberg = ["pyiceberg (>=0.5.0)"] +numpy = ["numpy (>=1.16.0)"] +openpyxl = ["openpyxl (>=3.0.0)"] +pandas = ["pandas", "polars[pyarrow]"] +plot = ["altair (>=5.4.0)"] +pyarrow = ["pyarrow (>=7.0.0)"] +pydantic = ["pydantic"] +sqlalchemy = ["polars[pandas]", "sqlalchemy"] +style = ["great-tables (>=0.8.0)"] +timezone = ["backports-zoneinfo", "tzdata"] +xlsx2csv = ["xlsx2csv (>=0.8.0)"] +xlsxwriter = ["xlsxwriter"] + [[package]] name = "psycopg" version = "3.2.3" @@ -983,6 +1088,60 @@ files = [ [package.dependencies] typing-extensions = ">=4.6" +[[package]] +name = "pyarrow" +version = "18.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"}, + {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310"}, + {file = "pyarrow-18.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2"}, + {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280"}, + {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954"}, + {file = "pyarrow-18.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad"}, + {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802"}, + {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7"}, + {file = "pyarrow-18.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8"}, + {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f"}, + {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b"}, + {file = "pyarrow-18.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a"}, + {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea"}, + {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420"}, + {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8"}, + {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03"}, + {file = "pyarrow-18.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2"}, + {file = "pyarrow-18.0.0.tar.gz", hash = "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5"}, +] + +[package.extras] +test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] + [[package]] name = "pycparser" version = "2.22" @@ -1251,6 +1410,17 @@ files = [ {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, ] +[[package]] +name = "xlsxwriter" +version = "3.2.0" +description = "A Python module for creating Excel XLSX files." +optional = false +python-versions = ">=3.6" +files = [ + {file = "XlsxWriter-3.2.0-py3-none-any.whl", hash = "sha256:ecfd5405b3e0e228219bcaf24c2ca0915e012ca9464a14048021d21a995d490e"}, + {file = "XlsxWriter-3.2.0.tar.gz", hash = "sha256:9977d0c661a72866a61f9f7a809e25ebbb0fb7036baa3b9fe74afcfca6b3cb8c"}, +] + [[package]] name = "zopfli" version = "0.2.3" @@ -1327,4 +1497,4 @@ test = ["pytest"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "24c0c9c9e3462e85c0c4602cd63e9c1e64a4fb7a14bdfe78c03431b8ba5f1a2a" +content-hash = "56bd7950c5ab8346d2a66f88c21e6f84ffce9e5eea33423f65f5c6506b49ed34" diff --git a/pyproject.toml b/pyproject.toml index c4dd27f..fa9688a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,9 +16,12 @@ weasyprint = "62.3" paramiko = "3.4.1,<3.5.0" boto3 = "^1.35.34" pandas = "^2.2.3" +polars = "^1.12.0" psycopg = "^3.2.3" psycopg-pool = "^3.2.3" openpyxl = "^3.1.5" +XlsxWriter = "^3.2.0" +fastexcel = "^0.12.0" [tool.poetry.group.dev.dependencies] pylint = "^3.3.1" diff --git a/quipus/data_sources/csv_data_source.py b/quipus/data_sources/csv_data_source.py index 70bdd9a..3f18959 100644 --- a/quipus/data_sources/csv_data_source.py +++ b/quipus/data_sources/csv_data_source.py @@ -1,6 +1,7 @@ -from typing import Optional, List +from pathlib import Path +from typing import Union, Optional, List -import pandas as pd +import polars as pl class CSVDataSource: @@ -8,53 +9,63 @@ class CSVDataSource: CSV DataSource class to manage data retrieval from CSV files. Attributes: - file_path (str): Path to the CSV file. + file_path (Union[Path, str]): Path to the CSV file. delimiter (str): Delimiter used in the CSV file. encoding (str): Encoding of the CSV file. - dataframe (Optional[pd.DataFrame]): Loaded data as a pandas DataFrame. + dataframe (Optional[pl.DataFrame]): Loaded data as a polars DataFrame. """ - def __init__(self, file_path: str, delimiter: str = ",", encoding: str = "utf-8"): + def __init__( + self, + file_path: Union[Path, str], + delimiter: str = ",", + encoding: str = "utf8" + ): self.file_path = file_path self.delimiter = delimiter self.encoding = encoding - self.dataframe: Optional[pd.DataFrame] = None + self.dataframe: Optional[pl.DataFrame] = None self.__load_data() def __load_data(self) -> None: """ - Load data from the CSV file into a pandas DataFrame. + Load data from the CSV file into a polars DataFrame. """ - self.dataframe = pd.read_csv( - self.file_path, delimiter=self.delimiter, encoding=self.encoding + self.dataframe = pl.read_csv( + source=self.file_path, + separator=self.delimiter, + encoding=self.encoding ) @property - def file_path(self) -> str: + def file_path(self) -> Union[Path, str]: """ Get the path to the CSV file. Returns: - str: Path to the CSV file. + Union[Path, str]: Path to the CSV file. """ return self.__file_path @file_path.setter - def file_path(self, file_path: str) -> None: + def file_path(self, file_path: Union[Path, str]) -> None: """ Set the path to the CSV file. Args: - file_path (str): Path to the CSV file. + file_path (Union[Path, str]): Path to the CSV file. Raises: TypeError: If 'file_path' is not a string. ValueError: If 'file_path' is an empty string. """ - if not isinstance(file_path, str): - raise TypeError("'file_path' must be a string.") - if not file_path.strip(): - raise ValueError("'file_path' cannot be an empty string.") + if not isinstance(file_path, (Path, str)): + raise TypeError("'file_path' must be either a string or 'Path' object.") + + # Ensure that path exists + file_path = Path(file_path) if isinstance(file_path, str) else file_path + if not file_path.exists() or file_path.is_dir(): + raise FileNotFoundError(f"'{file_path}' does not exist.") self.__file_path = file_path @property @@ -98,12 +109,12 @@ def encoding(self, encoding: str) -> None: raise TypeError("'encoding' must be a string.") self.__encoding = encoding - def fetch_data(self) -> pd.DataFrame: + def fetch_data(self) -> pl.DataFrame: """ - Fetch all data from the CSV file as a pandas DataFrame. + Fetch all data from the CSV file as a polars DataFrame. Returns: - pd.DataFrame: Data loaded from the CSV file. + pl.DataFrame: Data loaded from the CSV file. """ if self.dataframe is None: raise RuntimeError("No data loaded from the CSV file.") @@ -120,15 +131,15 @@ def get_columns(self) -> List[str]: raise RuntimeError("No data loaded from the CSV file.") return list(self.dataframe.columns) - def filter_data(self, query: str) -> pd.DataFrame: + def filter_data(self, query: str) -> pl.DataFrame: """ - Filter the CSV data using a pandas query string. + Filter the CSV data using a polars query string. Args: query (str): Query string to filter the data. Returns: - pd.DataFrame: Filtered data based on the query. + pl.DataFrame: Filtered data based on the query. Raises: RuntimeError: If no data is loaded. @@ -138,7 +149,7 @@ def filter_data(self, query: str) -> pd.DataFrame: raise RuntimeError("No data loaded from the CSV file.") try: - return self.dataframe.query(query) + return self.dataframe.sql(query=query) except Exception as e: raise ValueError(f"Invalid query: {query}") from e diff --git a/quipus/data_sources/dataframe_data_source.py b/quipus/data_sources/dataframe_data_source.py index 1b59670..c5bbcb3 100644 --- a/quipus/data_sources/dataframe_data_source.py +++ b/quipus/data_sources/dataframe_data_source.py @@ -1,50 +1,50 @@ from typing import List -import pandas as pd +import polars as pl class DataFrameDataSource: """ - Pandas DataFrame DataSource to manage data retrieval from DataFrames. + polars DataFrame DataSource to manage data retrieval from DataFrames. Attributes: - dataframe (pd.DataFrame): DataFrame containing the data. + dataframe (pl.DataFrame): DataFrame containing the data. """ - def __init__(self, dataframe: pd.DataFrame): + def __init__(self, dataframe: pl.DataFrame): self.dataframe = dataframe @property - def dataframe(self) -> pd.DataFrame: + def dataframe(self) -> pl.DataFrame: """ Get the DataFrame containing the data. Returns: - pd.DataFrame: DataFrame containing the data. + pl.DataFrame: DataFrame containing the data. """ return self.__dataframe @dataframe.setter - def dataframe(self, dataframe: pd.DataFrame) -> None: + def dataframe(self, dataframe: pl.DataFrame) -> None: """ Set the DataFrame containing the data. Args: - dataframe (pd.DataFrame): DataFrame containing the data. + dataframe (pl.DataFrame): DataFrame containing the data. Raises: - TypeError: If 'dataframe' is not a pandas DataFrame. + TypeError: If 'dataframe' is not a polars DataFrame. """ - if not isinstance(dataframe, pd.DataFrame): - raise TypeError("'dataframe' must be a pandas DataFrame.") + if not isinstance(dataframe, pl.DataFrame): + raise TypeError("'dataframe' must be a polars DataFrame.") self.__dataframe = dataframe - def fetch_data(self) -> pd.DataFrame: + def fetch_data(self) -> pl.DataFrame: """ Fetch data from the DataFrame. Returns: - pd.DataFrame: DataFrame containing the data. + pl.DataFrame: DataFrame containing the data. """ if self.dataframe is None: raise RuntimeError("No data loaded in the DataFrame.") @@ -61,7 +61,7 @@ def get_columns(self) -> List[str]: raise RuntimeError("No data loaded in the DataFrame.") return list(self.dataframe.columns) - def filter_data(self, query: str) -> pd.DataFrame: + def filter_data(self, query: str) -> pl.DataFrame: """ Filter the data in the DataFrame using a query. @@ -69,7 +69,7 @@ def filter_data(self, query: str) -> pd.DataFrame: query (str): Query to filter the data. Returns: - pd.DataFrame: Filtered DataFrame. + pl.DataFrame: Filtered DataFrame. Raises: RuntimeError: If no data is loaded in the DataFrame. @@ -86,7 +86,7 @@ def filter_data(self, query: str) -> pd.DataFrame: if query.strip() == "": raise ValueError("Query cannot be an empty string.") - return self.dataframe.query(query) + return self.dataframe.sql(query) def __str__(self) -> str: """ diff --git a/quipus/data_sources/xlsx_data_source.py b/quipus/data_sources/xlsx_data_source.py index 64fb0dc..33c1cfb 100644 --- a/quipus/data_sources/xlsx_data_source.py +++ b/quipus/data_sources/xlsx_data_source.py @@ -1,6 +1,7 @@ -from typing import Optional, List +from pathlib import Path +from typing import Union, Optional, List -import pandas as pd +import polars as pl class XLSXDataSource: @@ -8,35 +9,35 @@ class XLSXDataSource: XLSX DataSource class to manage data retrieval from Excel (.xlsx) files. Attributes: - file_path (str): Path to the Excel file. + file_path (Union[Path, str]): Path to the Excel file. sheet_name (str): Name of the sheet to load from the Excel file. - dataframe (Optional[pd.DataFrame]): Loaded data as a pandas DataFrame. + dataframe (Optional[pl.DataFrame]): Loaded data as a polars DataFrame. """ - def __init__(self, file_path: str, sheet_name: str): + def __init__(self, file_path: Union[Path, str], sheet_name: str): self.file_path = file_path self.sheet_name = sheet_name - self.dataframe: Optional[pd.DataFrame] = None + self.dataframe: Optional[pl.DataFrame] = None self.__load_data() def __load_data(self) -> None: """ - Load data from the Excel file into a pandas DataFrame. + Load data from the Excel file into a polars DataFrame. """ - self.dataframe = pd.read_excel(self.file_path, sheet_name=self.sheet_name) + self.dataframe = pl.read_excel(self.file_path, sheet_name=self.sheet_name) @property - def file_path(self) -> str: + def file_path(self) -> Union[Path, str]: """ Get the path to the Excel file. Returns: - str: Path to the Excel file. + Union[Path, str]: Path to the Excel file. """ return self.__file_path @file_path.setter - def file_path(self, file_path: str) -> None: + def file_path(self, file_path: Union[Path, str]) -> None: """ Set the path to the Excel file. @@ -47,11 +48,14 @@ def file_path(self, file_path: str) -> None: TypeError: If 'file_path' is not a string. ValueError: If 'file_path' is an empty string. """ - if not isinstance(file_path, str): - raise TypeError("'file_path' must be a string.") - if not file_path.strip(): - raise ValueError("'file_path' cannot be an empty string.") - self.__file_path = file_path + if not isinstance(file_path, (Path, str)): + raise TypeError("'file_path' must be either a string or 'Path' object.") + + # Ensure if path exists + path = Path(file_path) if isinstance(file_path, str) else file_path + if not path.exists() or path.is_dir(): + raise FileNotFoundError(f"'{file_path}' does not exist.") + self.__file_path = path @property def sheet_name(self) -> str: @@ -77,12 +81,12 @@ def sheet_name(self, sheet_name: str) -> None: raise TypeError("'sheet_name' must be a string.") self.__sheet_name = sheet_name - def fetch_data(self) -> pd.DataFrame: + def fetch_data(self) -> pl.DataFrame: """ - Fetch all data from the Excel sheet as a pandas DataFrame. + Fetch all data from the Excel sheet as a polars DataFrame. Returns: - pd.DataFrame: Data loaded from the Excel sheet. + pl.DataFrame: Data loaded from the Excel sheet. """ if self.dataframe is None: raise RuntimeError("No data loaded from the Excel file.") @@ -99,15 +103,15 @@ def get_columns(self) -> List[str]: raise RuntimeError("No data loaded from the Excel file.") return list(self.dataframe.columns) - def filter_data(self, query: str) -> pd.DataFrame: + def filter_data(self, query: str) -> pl.DataFrame: """ - Filter the Excel data using a pandas query string. + Filter the Excel data using a polars query string. Args: query (str): Query string to filter the data. Returns: - pd.DataFrame: Filtered data based on the query. + pl.DataFrame: Filtered data based on the query. Raises: RuntimeError: If no data is loaded. @@ -117,7 +121,7 @@ def filter_data(self, query: str) -> pd.DataFrame: raise RuntimeError("No data loaded from the Excel file.") try: - return self.dataframe.query(query) + return self.dataframe.sql(query) except Exception: raise ValueError("Invalid query provided.") diff --git a/quipus/models/certificate_factory.py b/quipus/models/certificate_factory.py deleted file mode 100644 index b25f12b..0000000 --- a/quipus/models/certificate_factory.py +++ /dev/null @@ -1,47 +0,0 @@ -import pandas as pd - -from .certificate import Certificate - - -class CertificateFactory: - """ - Factory class to create Certificate objects - - Methods: - - create_one_certificate: create a single Certificate object from a pd.Series - - create_certificates: create a list of Certificate objects from a DataFrame - """ - @staticmethod - def create_one_certificate(row: pd.Series) -> Certificate: - """ - Create a single Certificate object from a row in a DataFrame - - Args: - row (pd.Series): a row in a DataFrame containing the certificate data - - Returns: - Certificate: a Certificate object created from the row - """ - return Certificate( - completion_date=row["completion_date"], - content=row["content"], - entity=row["entity"], - name=row["name"], - duration=row.get("duration", None), - validity_checker=row.get("validity_checker", None), - ) - - @staticmethod - def create_certificates(df: pd.DataFrame) -> list[Certificate]: - """ - Create a list of Certificate objects from a DataFrame - - Args: - df (pd.DataFrame): a DataFrame containing the certificate data - - Returns: - list[Certificate]: a list of Certificate objects created from the DataFrame - """ - return [ - CertificateFactory.create_one_certificate(row) for _, row in df.iterrows() - ] diff --git a/quipus/services/template_manager.py b/quipus/services/template_manager.py index fd9da96..d499f1e 100644 --- a/quipus/services/template_manager.py +++ b/quipus/services/template_manager.py @@ -93,8 +93,10 @@ def from_source(self, source_type: Literal["csv"], **kwargs) -> Self: def from_csv(self, path_to_file: str) -> Self: csv_data_source = CSVDataSource(file_path=path_to_file) - self.data = csv_data_source.fetch_data().to_dict(orient="records") - + fetched_data = csv_data_source.fetch_data() + self.data = [] + for row in fetched_data.iter_rows(named=True): + self.data.append(row) return self def with_multiple_templates(self, templates: list[Template]): diff --git a/tests/test_certificate_factory.py b/tests/test_certificate_factory.py deleted file mode 100644 index b7add70..0000000 --- a/tests/test_certificate_factory.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest -import pandas as pd - -from quipus import CertificateFactory, Certificate - - -@pytest.fixture -def sample_row(): - return pd.Series( - { - "completion_date": "2024-10-27", - "content": "Test content 1", - "entity": "Test entity 1", - "name": "Test name 1", - "duration": "Test duration 1", - "validity_checker": "https://example.com/check", - } - ) - - -@pytest.fixture -def sample_dataframe(): - data = { - "completion_date": ["2024-10-27", "2024-10-25"], - "content": ["Test content 1", "Test content 2"], - "entity": ["Test entity 1", "Test entity 2"], - "name": ["Test name 1", "Test name 2"], - "duration": ["Test duration 1", "Test duration 2"], - "validity_checker": [ - "https://example.com/check1", - "https://example.com/check2", - ], - } - return pd.DataFrame(data) - - -def test_create_one_certificate(sample_row): - certificate = CertificateFactory.create_one_certificate(sample_row) - assert isinstance(certificate, Certificate) - assert certificate.completion_date == "2024-10-27" - assert certificate.content == "Test content 1" - assert certificate.entity == "Test entity 1" - assert certificate.name == "Test name 1" - assert certificate.duration == "Test duration 1" - assert certificate.validity_checker == "https://example.com/check" - - -def test_create_certificates(sample_dataframe): - certificates = CertificateFactory.create_certificates(sample_dataframe) - - assert isinstance(certificates, list) - assert len(certificates) == len(sample_dataframe) - assert all(isinstance(cert, Certificate) for cert in certificates) - - assert certificates[0].completion_date == "2024-10-27" - assert certificates[0].content == "Test content 1" - assert certificates[0].entity == "Test entity 1" - assert certificates[0].name == "Test name 1" - assert certificates[0].duration == "Test duration 1" - assert certificates[0].validity_checker == "https://example.com/check1" - - assert certificates[1].completion_date == "2024-10-25" - assert certificates[1].content == "Test content 2" - assert certificates[1].entity == "Test entity 2" - assert certificates[1].name == "Test name 2" - assert certificates[1].duration == "Test duration 2" - assert certificates[1].validity_checker == "https://example.com/check2" diff --git a/tests/test_csv_source.py b/tests/test_csv_source.py index 2c6e323..ee66dd3 100644 --- a/tests/test_csv_source.py +++ b/tests/test_csv_source.py @@ -1,4 +1,7 @@ +from pathlib import Path + import pytest +import polars as pl import pandas as pd from quipus import CSVDataSource @@ -8,11 +11,11 @@ def test_csv_data_source_valid_initialization(tmp_path): csv_file = tmp_path / "test.csv" csv_file.write_text("col1,col2\n1,2\n3,4") - data_source = CSVDataSource(file_path=str(csv_file)) + data_source = CSVDataSource(file_path=csv_file) - assert data_source.file_path == str(csv_file) + assert str(data_source.file_path) == str(csv_file) assert data_source.delimiter == "," - assert data_source.encoding == "utf-8" + assert data_source.encoding == "utf8" assert data_source.dataframe is not None @@ -22,7 +25,7 @@ def test_csv_data_source_invalid_file_path_type(): def test_csv_data_source_empty_file_path(): - with pytest.raises(ValueError): + with pytest.raises(FileNotFoundError): CSVDataSource(file_path="") @@ -40,8 +43,8 @@ def test_csv_data_source_fetch_data(tmp_path): df = data_source.fetch_data() - expected_df = pd.DataFrame({"col1": [1, 3], "col2": [2, 4]}) - pd.testing.assert_frame_equal(df.reset_index(drop=True), expected_df) + expected_df = pl.DataFrame({"col1": [1, 3], "col2": [2, 4]}) + df.equals(expected_df) def test_csv_data_source_fetch_data_no_data(tmp_path): @@ -86,10 +89,10 @@ def test_csv_data_source_filter_data(tmp_path): data_source = CSVDataSource(file_path=str(csv_file)) - filtered_df = data_source.filter_data("col1 > 2") + filtered_df = data_source.filter_data("SELECT * FROM self WHERE col1 > 2") - expected_df = pd.DataFrame({"col1": [3, 5], "col2": [4, 6]}, index=[1, 2]) - pd.testing.assert_frame_equal(filtered_df, expected_df) + expected_df = pl.DataFrame({"col1": [3, 5], "col2": [4, 6]}) + filtered_df.equals(expected_df) def test_csv_data_source_filter_data_invalid_query(tmp_path): @@ -112,7 +115,7 @@ def test_csv_data_source_filter_data_no_data(tmp_path): data_source.dataframe = None with pytest.raises(RuntimeError, match="No data loaded from the CSV file."): - data_source.filter_data("col1 > 2") + data_source.filter_data("SELECT * FROM self WHERE col1 > 2") def test_csv_data_source_str(tmp_path): @@ -146,14 +149,16 @@ def test_csv_data_source_invalid_encoding(tmp_path): data_source.encoding = 123 -def test_csv_data_source_invalid_delimiter_init(): - csv_file = "test.csv" +def test_csv_data_source_invalid_delimiter_init(tmp_path): + csv_file = tmp_path / "test.csv" + csv_file.write_text("col1,col2\n1,2\n3,4") with pytest.raises(TypeError): CSVDataSource(file_path=csv_file, delimiter=123) -def test_csv_data_source_invalid_encoding_init(): - csv_file = "test.csv" +def test_csv_data_source_invalid_encoding_init(tmp_path): + csv_file = tmp_path / "test.csv" + csv_file.write_text("col1,col2\n1,2\n3,4") with pytest.raises(TypeError): CSVDataSource(file_path=csv_file, encoding=123) @@ -162,7 +167,7 @@ def test_csv_data_source_empty_csv(tmp_path): csv_file = tmp_path / "empty.csv" csv_file.write_text("") - with pytest.raises(pd.errors.EmptyDataError): + with pytest.raises(pl.exceptions.NoDataError): CSVDataSource(file_path=str(csv_file)) @@ -176,15 +181,5 @@ def test_csv_data_source_invalid_csv(tmp_path): df = data_source.fetch_data() assert df is not None - assert not df.empty + assert not df.is_empty() assert df.shape == (1, 7) - - -def test_csv_data_source_read_csv_exception(monkeypatch): - def mock_read_csv(*args, **kwargs): - raise pd.errors.ParserError("Mocked parser error") - - monkeypatch.setattr(pd, "read_csv", mock_read_csv) - - with pytest.raises(pd.errors.ParserError, match="Mocked parser error"): - CSVDataSource(file_path="any.csv") diff --git a/tests/test_dataframe_source.py b/tests/test_dataframe_source.py index 1ebe2bd..d9d31a2 100644 --- a/tests/test_dataframe_source.py +++ b/tests/test_dataframe_source.py @@ -1,4 +1,5 @@ import pytest +import polars as pl import pandas as pd from quipus.data_sources.dataframe_data_source import DataFrameDataSource @@ -6,7 +7,7 @@ @pytest.fixture def sample_dataframe(): - return pd.DataFrame( + return pl.DataFrame( { "A": [1, 2, 3, 4], "B": [10, 20, 30, 40], @@ -22,12 +23,12 @@ def dataframe_source(sample_dataframe): @pytest.fixture def dataframe_source_empty(): - return DataFrameDataSource(pd.DataFrame()) + return DataFrameDataSource(pl.DataFrame()) def test_fetch_data(dataframe_source, sample_dataframe): fetched_data = dataframe_source.fetch_data() - pd.testing.assert_frame_equal(fetched_data, sample_dataframe) + fetched_data.equals(sample_dataframe) def test_get_columns(dataframe_source): @@ -35,8 +36,8 @@ def test_get_columns(dataframe_source): def test_filter_data(dataframe_source): - filtered_data = dataframe_source.filter_data("A > 2") - expected_filtered_data = pd.DataFrame( + filtered_data = dataframe_source.filter_data("SELECT * FROM self WHERE A > 2") + expected_filtered_data = pl.DataFrame( { "A": [3, 4], "B": [30, 40], @@ -44,9 +45,7 @@ def test_filter_data(dataframe_source): } ) - pd.testing.assert_frame_equal( - filtered_data.reset_index(drop=True), expected_filtered_data - ) + filtered_data.equals(expected_filtered_data) def test_filter_data_invalid_query(dataframe_source): diff --git a/tests/test_xlsx_source.py b/tests/test_xlsx_source.py index 1b9a8b3..5aede65 100644 --- a/tests/test_xlsx_source.py +++ b/tests/test_xlsx_source.py @@ -1,16 +1,17 @@ import pytest +import polars as pl import pandas as pd from quipus import XLSXDataSource def test_xlsx_data_source_valid_initialization(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1, 3], "col2": [2, 4]}) - data.to_excel(xlsx_file, index=False) + data = pl.DataFrame({"col1": [1, 3], "col2": [2, 4]}) + data.write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") - assert data_source.file_path == str(xlsx_file) + assert str(data_source.file_path) == str(xlsx_file) assert data_source.sheet_name == "Sheet1" assert data_source.dataframe is not None @@ -19,7 +20,7 @@ def test_xlsx_data_source_invalid_file_path_type(): XLSXDataSource(file_path=123, sheet_name="Sheet1") def test_xlsx_data_source_empty_file_path(): - with pytest.raises(ValueError): + with pytest.raises(FileNotFoundError): XLSXDataSource(file_path="", sheet_name="Sheet1") def test_xlsx_data_source_file_not_found(): @@ -28,18 +29,18 @@ def test_xlsx_data_source_file_not_found(): def test_xlsx_data_source_fetch_data(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1, 3], "col2": [2, 4]}) - data.to_excel(xlsx_file, index=False) + data = pl.DataFrame({"col1": [1, 3], "col2": [2, 4]}) + data.write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") df = data_source.fetch_data() - pd.testing.assert_frame_equal(df.reset_index(drop=True), data) + df.equals(data) def test_xlsx_data_source_get_columns(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) - data.to_excel(xlsx_file, index=False) + data = pl.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + data.write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") columns = data_source.get_columns() @@ -48,26 +49,26 @@ def test_xlsx_data_source_get_columns(tmp_path): def test_xlsx_data_source_filter_data(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1, 3, 5], "col2": [2, 4, 6]}) - data.to_excel(xlsx_file, index=False) + data = pl.DataFrame({"col1": [1, 3, 5], "col2": [2, 4, 6]}) + data.write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") - filtered_df = data_source.filter_data("col1 > 2") + filtered_df = data_source.filter_data("SELECT * FROM self WHERE col1 > 2") - expected_df = pd.DataFrame({"col1": [3, 5], "col2": [4, 6]}, index=[1, 2]) - pd.testing.assert_frame_equal(filtered_df, expected_df) + expected_df = pl.DataFrame({"col1": [3, 5], "col2": [4, 6]}) + filtered_df.equals(expected_df) def test_xlsx_data_source_invalid_sheet_name(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1], "col2": [2]}) - data.to_excel(xlsx_file, index=False, sheet_name="Sheet1") + data = pl.DataFrame({"col1": [1], "col2": [2]}) + data.write_excel(xlsx_file, worksheet="Sheet1") with pytest.raises(ValueError): XLSXDataSource(file_path=str(xlsx_file), sheet_name="InvalidSheet") def test_xlsx_data_source_no_data_loaded(tmp_path): xlsx_file = tmp_path / "test.xlsx" - pd.DataFrame({"col1": [1], "col2": [2]}).to_excel(xlsx_file, index=False) + pl.DataFrame({"col1": [1], "col2": [2]}).write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") data_source.dataframe = None @@ -77,8 +78,8 @@ def test_xlsx_data_source_no_data_loaded(tmp_path): def test_xlsx_data_source_invalid_query(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) - data.to_excel(xlsx_file, index=False) + data = pl.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + data.write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") @@ -87,8 +88,8 @@ def test_xlsx_data_source_invalid_query(tmp_path): def test_xlsx_data_source_str(tmp_path): xlsx_file = tmp_path / "test.xlsx" - data = pd.DataFrame({"col1": [1], "col2": [2]}) - data.to_excel(xlsx_file, index=False) + data = pl.DataFrame({"col1": [1], "col2": [2]}) + data.write_excel(xlsx_file) data_source = XLSXDataSource(file_path=str(xlsx_file), sheet_name="Sheet1") expected_str = f"XLSXDataSource(file_path={str(xlsx_file)}, sheet_name=Sheet1)"