From 51070e307facc4e0ded15dd09e3148177c2f96b3 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Tue, 27 Aug 2024 16:00:45 +0200 Subject: [PATCH 01/34] Draft builder --- Cargo.lock | 2250 +++++++++++++++++++++- swiftide-integrations/Cargo.toml | 2 + swiftide-integrations/src/lancedb/mod.rs | 126 ++ swiftide-integrations/src/lib.rs | 2 + swiftide/tests/lancedb.rs | 57 + 5 files changed, 2397 insertions(+), 40 deletions(-) create mode 100644 swiftide-integrations/src/lancedb/mod.rs create mode 100644 swiftide/tests/lancedb.rs diff --git a/Cargo.lock b/Cargo.lock index 3d604680..1276350d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "const-random", "getrandom", "once_cell", "version_check", @@ -142,6 +143,227 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arrow" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", + "zstd", +] + +[[package]] +name = "arrow-json" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.2.6", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "arrow-select" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "52.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax 0.8.4", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -177,6 +399,35 @@ dependencies = [ "async-trait", ] +[[package]] +name = "async-io" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +dependencies = [ + "async-lock", + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-lite", + "log", + "parking", + "polling", + "rustix 0.37.27", + "slab", + "socket2 0.4.10", + "waker-fn", +] + +[[package]] +name = "async-lock" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" +dependencies = [ + "event-listener 2.5.3", +] + [[package]] name = "async-openai" version = "0.23.4" @@ -203,6 +454,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "async-priority-channel" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acde96f444d31031f760c5c43dc786b97d3e1cb2ee49dd06898383fe9a999758" +dependencies = [ + "event-listener 4.0.3", +] + +[[package]] +name = "async-recursion" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "async-stream" version = "0.3.5" @@ -236,6 +507,21 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "async_cell" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "834eee9ce518130a3b4d5af09ecc43e9d6b57ee76613f227a1ddd6b77c7a62bc" + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -279,7 +565,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.1.0", "hex", "http 0.2.12", "ring", @@ -316,7 +602,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.1.0", "http 0.2.12", "http-body 0.4.6", "once_cell", @@ -349,6 +635,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-sdk-dynamodb" +version = "1.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c15099eaa98164c0ebcd3a3c1bf981b23796bade0dc8e156c843b750444e7a14" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand 2.1.0", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-sso" version = "1.37.0" @@ -512,7 +821,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.1.0", "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", @@ -723,6 +1032,27 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bitpacking" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +dependencies = [ + "crunchy", +] + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -819,6 +1149,18 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +[[package]] +name = "bytecount" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" + +[[package]] +name = "bytemuck" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31" + [[package]] name = "byteorder" version = "1.5.0" @@ -844,6 +1186,37 @@ dependencies = [ "either", ] +[[package]] +name = "camino" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", +] + [[package]] name = "case_insensitive_string" version = "0.2.4" @@ -880,6 +1253,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + [[package]] name = "cfg-if" version = "1.0.0" @@ -901,6 +1280,28 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "chrono-tz" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf 0.11.2", +] + +[[package]] +name = "chrono-tz-build" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +dependencies = [ + "parse-zoneinfo", + "phf 0.11.2", + "phf_codegen 0.11.2", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -973,6 +1374,17 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + [[package]] name = "compact_str" version = "0.8.0" @@ -987,6 +1399,15 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "console" version = "0.15.8" @@ -1000,6 +1421,26 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "cookie" version = "0.18.1" @@ -1100,6 +1541,15 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -1120,11 +1570,20 @@ dependencies = [ ] [[package]] -name = "crossbeam-utils" -version = "0.8.20" +name = "crossbeam-queue" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" - +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "crunchy" version = "0.2.2" @@ -1164,6 +1623,27 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "darling" version = "0.20.10" @@ -1199,12 +1679,318 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "data-encoding" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" +[[package]] +name = "datafusion" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab9d55a9cd2634818953809f75ebe5248b00dd43c3227efb2a51a2d5feaad54e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-array", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "glob", + "half", + "hashbrown 0.14.5", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "num_cpus", + "object_store", + "parking_lot", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", +] + +[[package]] +name = "datafusion-common" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "def66b642959e7f96f5d2da22e1f43d3bd35598f821e5ce351a0553e0f1b7367" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "instant", + "libc", + "num_cpus", + "object_store", + "sqlparser", +] + +[[package]] +name = "datafusion-common-runtime" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f104bb9cb44c06c9badf8a0d7e0855e5f7fa5e395b887d7f835e8a9457dc1352" +dependencies = [ + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ac0fd8b5d80bbca3fc3b6f40da4e9f6907354824ec3b18bbd83fee8cf5c3c3e" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2103d2cc16fb11ef1fa993a6cac57ed5cb028601db4b97566c90e5fa77aa1e68" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "paste", + "serde_json", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-functions" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a369332afd0ef5bd565f6db2139fb9f1dfdd0afa75a7f70f000b74208d76994f" +dependencies = [ + "arrow", + "base64 0.22.1", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown 0.14.5", + "hex", + "itertools 0.12.1", + "log", + "rand", + "regex", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92718db1aff70c47e5abf9fc975768530097059e5db7c7b78cd64b5e9a11fc77" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", + "paste", + "sqlparser", +] + +[[package]] +name = "datafusion-functions-array" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30bb80f46ff3dcf4bb4510209c2ba9b8ce1b716ac8b7bf70c6bf7dca6260c831" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "itertools 0.12.1", + "log", + "paste", +] + +[[package]] +name = "datafusion-optimizer" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82f34692011bec4fdd6fc18c264bf8037b8625d801e6dd8f5111af15cb6d71d3" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.5", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "paste", + "regex-syntax 0.8.4", +] + +[[package]] +name = "datafusion-physical-expr" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45538630defedb553771434a437f7ca8f04b9b3e834344aafacecb27dc65d5e5" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "base64 0.22.1", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "hex", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "paste", + "petgraph", + "regex", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d8a72b0ca908e074aaeca52c14ddf5c28d22361e9cb6bc79bb733cd6661b536" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "hashbrown 0.14.5", + "rand", +] + +[[package]] +name = "datafusion-physical-plan" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b504eae6107a342775e22e323e9103f7f42db593ec6103b28605b7b7b1405c4a" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.2.6", + "itertools 0.12.1", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "40.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5db33f323f41b95ae201318ba654a9bf11113e58a51a1dff977b1a836d3d889" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "log", + "regex", + "sqlparser", + "strum", +] + [[package]] name = "deadpool" version = "0.10.0" @@ -1223,6 +2009,26 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" +[[package]] +name = "deepsize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cdb987ec36f6bf7bfbea3f928b75590b736fc42af8e54d97592481351b2b96c" +dependencies = [ + "deepsize_derive", +] + +[[package]] +name = "deepsize_derive" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990101d41f3bc8c1a45641024377ee284ecc338e5ecf3ea0f0e236d897c72796" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "deranged" version = "0.3.11" @@ -1318,6 +2124,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "docker_credential" version = "1.3.1" @@ -1335,6 +2147,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + [[package]] name = "dtoa" version = "1.0.9" @@ -1426,12 +2244,38 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "version_check", +] + [[package]] name = "esaxx-rs" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "event-listener" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b215c49b2b248c855fb73579eb1f4f26c38ffdc12973e20e07b91d78d5646e" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + [[package]] name = "eventsource-stream" version = "0.2.3" @@ -1483,6 +2327,12 @@ dependencies = [ "tendril", ] +[[package]] +name = "fastdivide" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59668941c55e5c186b8b58c391629af56774ec768f73c08bbcd56f09348eb00b" + [[package]] name = "fastembed" version = "3.14.1" @@ -1498,6 +2348,15 @@ dependencies = [ "tokenizers", ] +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.1.0" @@ -1516,6 +2375,22 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "24.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + [[package]] name = "flate2" version = "1.0.30" @@ -1563,8 +2438,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] -name = "futf" -version = "0.1.5" +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix 0.38.34", + "windows-sys 0.52.0", +] + +[[package]] +name = "fsst" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9178c033f667093b96b8f8e26e62dc5e96b81fa45ee8703933a33892fbb96991" +dependencies = [ + "rand", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futf" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" dependencies = [ @@ -1620,6 +2520,21 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-macro" version = "0.3.30" @@ -1693,8 +2608,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1703,6 +2620,12 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "globset" version = "0.4.14" @@ -1773,6 +2696,7 @@ checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", + "num-traits", ] [[package]] @@ -1930,6 +2854,12 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "0.2.12" @@ -1998,6 +2928,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.30" @@ -2015,7 +2951,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.5.7", "tokio", "tower-service", "tracing", @@ -2154,7 +3090,7 @@ dependencies = [ "http-body 1.0.1", "hyper 1.4.1", "pin-project-lite", - "socket2", + "socket2 0.5.7", "tokio", "tower", "tower-service", @@ -2176,6 +3112,15 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -2312,6 +3257,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.48.0", ] [[package]] @@ -2320,7 +3279,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" dependencies = [ - "socket2", + "socket2 0.5.7", "widestring", "windows-sys 0.48.0", "winreg 0.50.0", @@ -2392,28 +3351,523 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] -name = "jobserver" -version = "0.1.31" +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lance" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7215c36aa9b0319b11369c0955e1e6bf42b47a7b7266279e0742ae1638df1284" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "async_cell", + "aws-credential-types", + "aws-sdk-dynamodb", + "byteorder", + "bytes", + "chrono", + "dashmap", + "datafusion", + "datafusion-functions", + "datafusion-physical-expr", + "deepsize", + "futures", + "half", + "itertools 0.12.1", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-encoding", + "lance-file", + "lance-index", + "lance-io", + "lance-linalg", + "lance-table", + "lazy_static", + "log", + "moka", + "num_cpus", + "object_store", + "pin-project", + "prost 0.12.6", + "prost-build", + "rand", + "roaring", + "serde", + "serde_json", + "snafu", + "tantivy", + "tempfile", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "lance-arrow" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73975ef4393a50a24cfc827e751633038c1f456f4fb16b1c1c2aa0ca827db1b0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "getrandom", + "half", + "num-traits", + "rand", +] + +[[package]] +name = "lance-core" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfe010f4bdab8e6484540c7c03d01e3c263cd664080a946cab0961a166cb8da9" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "byteorder", + "bytes", + "chrono", + "datafusion-common", + "datafusion-sql", + "deepsize", + "futures", + "lance-arrow", + "lazy_static", + "libc", + "log", + "mock_instant", + "moka", + "num_cpus", + "object_store", + "pin-project", + "prost 0.12.6", + "rand", + "roaring", + "serde_json", + "snafu", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", +] + +[[package]] +name = "lance-datafusion" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4789165245fe0f4588c514faa3dfd727229a6b022272c6ef72c378682f71faa7" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-trait", + "datafusion", + "datafusion-common", + "datafusion-functions", + "datafusion-physical-expr", + "futures", + "lance-arrow", + "lance-core", + "log", + "prost 0.12.6", + "snafu", + "tokio", +] + +[[package]] +name = "lance-encoding" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c37853a7f2903ec77add0a2f4cca23890ca4e8b2bf01a27dc6d111b4b1116f2b" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-schema", + "arrow-select", + "bytes", + "fsst", + "futures", + "hyperloglogplus", + "lance-arrow", + "lance-core", + "log", + "num-traits", + "num_cpus", + "prost 0.12.6", + "prost-build", + "prost-types 0.12.6", + "rand", + "snafu", + "tokio", + "tracing", + "zstd", +] + +[[package]] +name = "lance-file" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6fa4833bd13f45ea2c8868cad9b02b8de19ffeec0e62342f4bbc1f678abcac7" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "byteorder", + "bytes", + "datafusion-common", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lance-encoding", + "lance-io", + "log", + "num-traits", + "num_cpus", + "object_store", + "prost 0.12.6", + "prost-build", + "prost-types 0.12.6", + "roaring", + "snafu", + "tempfile", + "tokio", + "tracing", +] + +[[package]] +name = "lance-index" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "948ac3698a815d2f217441c34cb59e852e0a1c715164ca4cb1686e584262ffbf" +dependencies = [ + "arrow", + "arrow-array", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "bitvec", + "bytes", + "crossbeam-queue", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-sql", + "deepsize", + "futures", + "half", + "itertools 0.12.1", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-encoding", + "lance-file", + "lance-io", + "lance-linalg", + "lance-table", + "lazy_static", + "log", + "moka", + "num-traits", + "num_cpus", + "object_store", + "prost 0.12.6", + "prost-build", + "rand", + "rayon", + "roaring", + "serde", + "serde_json", + "snafu", + "tantivy", + "tempfile", + "tokio", + "tracing", +] + +[[package]] +name = "lance-io" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169f7fc1f472b3ad8c38bc3aff3c5bb4e978813fd3bfc4aedd8bee48355c5423" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-priority-channel", + "async-recursion", + "async-trait", + "aws-config", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lazy_static", + "log", + "num_cpus", + "object_store", + "path_abs", + "pin-project", + "prost 0.12.6", + "prost-build", + "rand", + "shellexpand", + "snafu", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "lance-linalg" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08820921b613efec6b8909dfbbb2056ccd79baffa3d813014fb6c610c62c3bdb" +dependencies = [ + "arrow-array", + "arrow-ord", + "arrow-schema", + "bitvec", + "cc", + "deepsize", + "futures", + "half", + "lance-arrow", + "lance-core", + "lazy_static", + "log", + "num-traits", + "num_cpus", + "rand", + "rayon", + "tokio", + "tracing", +] + +[[package]] +name = "lance-table" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a367224fcce42d88c0193151577d70c8b8dc7179ea5136c82d0cfd0d91768f17" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", + "async-trait", + "aws-credential-types", + "aws-sdk-dynamodb", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lance-file", + "lance-io", + "lazy_static", + "log", + "object_store", + "prost 0.12.6", + "prost-build", + "prost-types 0.12.6", + "rand", + "rangemap", + "roaring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "lance-testing" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40fd0bc5dedee83c040bf31be9618129d0e189ce41f6010fd1922c4079895763" +dependencies = [ + "arrow-array", + "arrow-schema", + "lance-arrow", + "num-traits", + "rand", +] + +[[package]] +name = "lancedb" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66550022ce988360e73508e58e4b89491ec67acf59111287e401cf9a426903" +dependencies = [ + "arrow", + "arrow-array", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-ord", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "datafusion-physical-plan", + "futures", + "half", + "lance", + "lance-datafusion", + "lance-encoding", + "lance-index", + "lance-linalg", + "lance-testing", + "lazy_static", + "log", + "num-traits", + "object_store", + "pin-project", + "regex", + "serde", + "serde_json", + "serde_with", + "snafu", + "tokio", + "url", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" dependencies = [ - "libc", + "static_assertions", ] [[package]] -name = "js-sys" -version = "0.3.69" +name = "lexical-write-float" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "wasm-bindgen", + "lexical-util", + "lexical-write-integer", + "static_assertions", ] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "lexical-write-integer" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] [[package]] name = "libc" @@ -2421,6 +3875,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.1.3" @@ -2437,6 +3897,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -2459,6 +3925,15 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lru" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "lru-cache" version = "0.1.2" @@ -2468,12 +3943,30 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + [[package]] name = "mac" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mach2" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" +dependencies = [ + "libc", +] + [[package]] name = "macro_rules_attribute" version = "0.2.0" @@ -2547,12 +4040,41 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "measure_time" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +dependencies = [ + "instant", + "log", +] + [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -2596,6 +4118,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mock_instant" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9366861eb2a2c436c20b12c8dbec5f798cea6b47ad99216be0282942e2c81ea0" +dependencies = [ + "once_cell", +] + [[package]] name = "mockall" version = "0.13.0" @@ -2622,6 +4153,31 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "moka" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa6e72583bf6830c956235bff0d5afec8cf2952f579ebad18ae7821a917d950f" +dependencies = [ + "async-io", + "async-lock", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "futures-util", + "once_cell", + "parking_lot", + "quanta", + "rustc_version", + "scheduled-thread-pool", + "skeptic", + "smallvec", + "tagptr", + "thiserror", + "triomphe", + "uuid", +] + [[package]] name = "monostate" version = "0.1.13" @@ -2643,6 +4199,18 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "multimap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + [[package]] name = "native-tls" version = "0.2.12" @@ -2699,6 +4267,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -2733,6 +4315,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -2740,6 +4344,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -2767,6 +4372,37 @@ dependencies = [ "memchr", ] +[[package]] +name = "object_store" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "chrono", + "futures", + "humantime", + "hyper 1.4.1", + "itertools 0.13.0", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand", + "reqwest", + "ring", + "rustls-pemfile 2.1.2", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + [[package]] name = "ollama-rs" version = "0.2.0" @@ -2787,6 +4423,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oneshot" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e296cf87e61c9cfc1a61c3c63a0f7f286ed4554e0e22be84e8a38e1d264a2a29" + [[package]] name = "onig" version = "6.4.0" @@ -2904,6 +4546,21 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "ownedbytes" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "parking" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" + [[package]] name = "parking_lot" version = "0.12.3" @@ -2952,12 +4609,33 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path_abs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ef02f6342ac01d8a93b65f96db53fe68a92a15f41144f97fb00a9e669633c3" +dependencies = [ + "serde", + "serde_derive", + "std_prelude", + "stfu8", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -3009,6 +4687,16 @@ dependencies = [ "sha2", ] +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap 2.2.6", +] + [[package]] name = "phf" version = "0.10.1" @@ -3165,6 +4853,22 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "polling" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "concurrent-queue", + "libc", + "log", + "pin-project-lite", + "windows-sys 0.48.0", +] + [[package]] name = "portable-atomic" version = "1.7.0" @@ -3215,6 +4919,16 @@ dependencies = [ "termtree", ] +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn 2.0.74", +] + [[package]] name = "proc-macro2" version = "1.0.86" @@ -3224,6 +4938,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", + "prost-derive 0.12.6", +] + [[package]] name = "prost" version = "0.13.1" @@ -3231,7 +4955,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.1", +] + +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.12.1", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost 0.12.6", + "prost-types 0.12.6", + "regex", + "syn 2.0.74", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.74", ] [[package]] @@ -3241,19 +4999,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.74", ] +[[package]] +name = "prost-types" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost 0.12.6", +] + [[package]] name = "prost-types" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" dependencies = [ - "prost", + "prost 0.13.1", ] [[package]] @@ -3272,6 +5039,17 @@ dependencies = [ "psl-types", ] +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.6.0", + "memchr", + "unicase", +] + [[package]] name = "pulldown-cmark" version = "0.11.0" @@ -3292,8 +5070,8 @@ dependencies = [ "anyhow", "derive_builder", "futures-util", - "prost", - "prost-types", + "prost 0.13.1", + "prost-types 0.13.1", "reqwest", "serde", "serde_json", @@ -3301,6 +5079,22 @@ dependencies = [ "tonic", ] +[[package]] +name = "quanta" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" +dependencies = [ + "crossbeam-utils", + "libc", + "mach2", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -3360,7 +5154,7 @@ checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" dependencies = [ "libc", "once_cell", - "socket2", + "socket2 0.5.7", "windows-sys 0.52.0", ] @@ -3373,6 +5167,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -3403,6 +5203,31 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rangemap" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684" + +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -3462,7 +5287,7 @@ dependencies = [ "rustls-pki-types", "ryu", "sha1_smol", - "socket2", + "socket2 0.5.7", "tokio", "tokio-retry", "tokio-rustls 0.26.0", @@ -3646,6 +5471,26 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "roaring" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3667,6 +5512,20 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.37.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + [[package]] name = "rustix" version = "0.38.34" @@ -3676,7 +5535,7 @@ dependencies = [ "bitflags 2.6.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.14", "windows-sys 0.52.0", ] @@ -3822,6 +5681,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "scheduled-thread-pool" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" +dependencies = [ + "parking_lot", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3895,6 +5763,9 @@ name = "semver" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +dependencies = [ + "serde", +] [[package]] name = "serde" @@ -4016,6 +5887,15 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shellexpand" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da03fa3b94cc19e3ebfc88c4229c49d8f08cdbd1228870a45f0ffdf84988e14b" +dependencies = [ + "dirs", +] + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -4037,6 +5917,30 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +[[package]] +name = "skeptic" +version = "0.13.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" +dependencies = [ + "bytecount", + "cargo_metadata", + "error-chain", + "glob", + "pulldown-cmark 0.9.6", + "tempfile", + "walkdir", +] + +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.9" @@ -4052,6 +5956,38 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "snafu" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "socket2" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "socket2" version = "0.5.7" @@ -4123,6 +6059,27 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "sqlparser" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -4135,6 +6092,18 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "std_prelude" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" + +[[package]] +name = "stfu8" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978" + [[package]] name = "string_cache" version = "0.8.7" @@ -4317,6 +6286,7 @@ dependencies = [ "indoc", "insta", "itertools 0.13.0", + "lancedb", "mockall", "ollama-rs", "qdrant-client", @@ -4454,6 +6424,159 @@ dependencies = [ "libc", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "tantivy" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8d0582f186c0a6d55655d24543f15e43607299425c5ad8352c242b914b31856" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64 0.22.1", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "itertools 0.12.1", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "num_cpus", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools 0.12.1", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax 0.8.4", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" +dependencies = [ + "nom", +] + +[[package]] +name = "tantivy-sstable" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" +dependencies = [ + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" +dependencies = [ + "serde", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tar" version = "0.4.41" @@ -4478,8 +6601,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", - "fastrand", - "rustix", + "fastrand 2.1.0", + "rustix 0.38.34", "windows-sys 0.52.0", ] @@ -4611,7 +6734,7 @@ dependencies = [ "either", "itertools 0.13.0", "once_cell", - "pulldown-cmark", + "pulldown-cmark 0.11.0", "regex", "strum", "thiserror", @@ -4679,6 +6802,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -4748,7 +6880,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.5.7", "tokio-macros", "windows-sys 0.52.0", ] @@ -4862,10 +6994,10 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.1", "rustls-native-certs 0.7.1", "rustls-pemfile 2.1.2", - "socket2", + "socket2 0.5.7", "tokio", "tokio-rustls 0.26.0", "tokio-stream", @@ -5029,12 +7161,28 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "triomphe" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6631e42e10b40c0690bf92f404ebcfe6e1fdb480391d15f17cc8e96eeed5369" + [[package]] name = "try-lock" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.17.0" @@ -5047,7 +7195,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20093f826c4866dc7d4808ecd9ed351e3021b94194f25f490f0ea9ea030e1f87" dependencies = [ - "fastrand", + "fastrand 2.1.0", "serde", "serde_json", "ureq", @@ -5218,6 +7366,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8parse" version = "0.2.2" @@ -5231,6 +7385,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", + "serde", ] [[package]] @@ -5257,6 +7412,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -5609,6 +7770,15 @@ dependencies = [ "url", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xattr" version = "1.3.1" @@ -5616,8 +7786,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", - "linux-raw-sys", - "rustix", + "linux-raw-sys 0.4.14", + "rustix 0.38.34", ] [[package]] diff --git a/swiftide-integrations/Cargo.toml b/swiftide-integrations/Cargo.toml index b4654ac0..49b24ecc 100644 --- a/swiftide-integrations/Cargo.toml +++ b/swiftide-integrations/Cargo.toml @@ -60,6 +60,7 @@ aws-sdk-bedrockruntime = { version = "1.37", features = [ secrecy = { version = "0.8.0", optional = true } reqwest = { version = "0.12.5", optional = true, default-features = false } ollama-rs = { version = "0.2.0", optional = true } +lancedb = { version = "0.9.0", optional = true } [dev-dependencies] swiftide-core = { path = "../swiftide-core", features = ["test-utils"] } @@ -105,6 +106,7 @@ aws-bedrock = [ "dep:aws-credential-types", "dep:aws-sdk-bedrockruntime", ] +lancedb = ["dep:lancedb"] [lints] workspace = true diff --git a/swiftide-integrations/src/lancedb/mod.rs b/swiftide-integrations/src/lancedb/mod.rs new file mode 100644 index 00000000..bf83cfbb --- /dev/null +++ b/swiftide-integrations/src/lancedb/mod.rs @@ -0,0 +1,126 @@ +use std::sync::Arc; + +use derive_builder::Builder; +use lancedb::arrow::arrow_schema::{DataType, Field, Schema}; +use swiftide_core::indexing::EmbeddedField; + +#[derive(Builder, Clone)] +#[builder(setter(into))] +pub struct LanceDB { + client: Arc, + #[builder(default = "self.default_schema_from_fields()")] + schema: Arc, + + table_name: String, + vector_size: i32, + uri: String, + #[builder(private, default)] + fields: Vec, +} + +impl std::fmt::Debug for LanceDB { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("LanceDB") + .field("schema", &self.schema) + .finish() + } +} + +impl LanceDB { + pub fn builder() -> LanceDBBuilder { + LanceDBBuilder::default() + } +} + +impl LanceDBBuilder { + pub fn with_vector(&mut self, config: impl Into) -> &mut Self { + if self.fields.is_none() { + self.fields(Vec::default()); + } + + self.fields + .as_mut() + .expect("Fields should be initialized") + .push(FieldConfig::Vector(config.into())); + + self + } + + pub fn with_sparse_vector(&mut self, config: impl Into) -> &mut Self { + if self.fields.is_none() { + self.fields(Vec::default()); + } + self.fields + .as_mut() + .expect("Fields should be initialized") + .push(FieldConfig::SparseVector(config.into())); + + self + } + + fn default_schema_from_fields(&self) -> Arc { + let mut fields = Vec::new(); + let vector_size = self.vector_size; + + for ref field in self.fields.clone().unwrap_or_default() { + match field { + FieldConfig::Vector(config) => { + let vector_size = config.vector_size.or(vector_size).expect( + "Vector size should be set either in the field or in the LanceDB builder", + ); + + fields.push(Field::new( + config.embedded_field.to_string(), + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::Float32, false)), + vector_size, + ), + false, + )); + } + FieldConfig::SparseVector(config) => { + fields.push(Field::new( + format!("{}_sparse", config.embedded_field), + DataType::Float64, + true, + )); + } + } + } + Arc::new(Schema::new(fields)) + } +} + +#[derive(Clone)] +pub enum FieldConfig { + Vector(VectorConfig), + SparseVector(SparseVectorConfig), +} + +#[derive(Clone)] +pub struct VectorConfig { + embedded_field: EmbeddedField, + vector_size: Option, +} + +impl Into for EmbeddedField { + fn into(self) -> VectorConfig { + VectorConfig { + embedded_field: self, + vector_size: None, + } + } +} + +#[derive(Clone)] +pub struct SparseVectorConfig { + embedded_field: EmbeddedField, +} + +impl Into for EmbeddedField { + fn into(self) -> SparseVectorConfig { + SparseVectorConfig { + embedded_field: self, + } + } +} diff --git a/swiftide-integrations/src/lib.rs b/swiftide-integrations/src/lib.rs index dbb1f4a3..18498341 100644 --- a/swiftide-integrations/src/lib.rs +++ b/swiftide-integrations/src/lib.rs @@ -6,6 +6,8 @@ pub mod aws_bedrock; pub mod fastembed; #[cfg(feature = "groq")] pub mod groq; +#[cfg(feature = "lancedb")] +pub mod lancedb; #[cfg(feature = "ollama")] pub mod ollama; #[cfg(feature = "openai")] diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs new file mode 100644 index 00000000..3ee24f51 --- /dev/null +++ b/swiftide/tests/lancedb.rs @@ -0,0 +1,57 @@ +use swiftide::indexing::{ + transformers::{ChunkCode, MetadataQACode}, + EmbeddedField, +}; +use swiftide_indexing::{loaders, persist, transformers, Pipeline}; +use swiftide_integrations::{fastembed::FastEmbed, lancedb::LanceDB}; +use swiftide_test_utils::{mock_chat_completions, mock_embeddings, openai_client}; +use temp_dir::TempDir; +use wiremock::MockServer; + +#[test_log::test(tokio::test)] +async fn test_sparse_indexing_pipeline() { + // Setup temporary directory and file for testing + let tempdir = TempDir::new().unwrap(); + let codefile = tempdir.child("main.rs"); + std::fs::write(&codefile, "fn main() { println!(\"Hello, World!\"); }").unwrap(); + + // Setup mock servers to simulate API responses + let mock_server = MockServer::start().await; + mock_chat_completions(&mock_server).await; + + let openai_client = openai_client(&mock_server.uri(), "text-embedding-3-small", "gpt-4o"); + + let fastembed_sparse = FastEmbed::try_default_sparse().unwrap(); + let fastembed = FastEmbed::try_default().unwrap(); + let memory_storage = persist::MemoryStorage::default(); + + let result = + Pipeline::from_loader(loaders::FileLoader::new(tempdir.path()).with_extensions(&["rs"])) + .then_chunk(ChunkCode::try_for_language("rust").unwrap()) + .then(MetadataQACode::new(openai_client)) + .then_in_batch(20, transformers::SparseEmbed::new(fastembed_sparse)) + .then_in_batch(20, transformers::Embed::new(fastembed)) + .log_nodes() + .then_store_with( + LanceDB::builder() + .uri(tempdir.child("lancedb").to_str().unwrap()) + .vector_size(384) + .with_vector(EmbeddedField::Combined) + .with_sparse_vector(EmbeddedField::Combined) + .table_name("swiftide_test") + .build() + .unwrap(), + ) + .then_store_with(memory_storage.clone()) + .run() + .await; + + let node = memory_storage + .get_all_values() + .await + .first() + .unwrap() + .clone(); + + result.expect("Indexing pipeline failed"); +} From a1d5b0c9c2caf6db7d8c64f3f53c2c45de954c82 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 09:55:07 +0200 Subject: [PATCH 02/34] Add tests and dedicated methods for embedded field names --- swiftide-core/Cargo.toml | 1 + swiftide-core/src/node.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/swiftide-core/Cargo.toml b/swiftide-core/Cargo.toml index 92da1f71..46dc63c0 100644 --- a/swiftide-core/Cargo.toml +++ b/swiftide-core/Cargo.toml @@ -31,6 +31,7 @@ tera = { version = "1.20", default-features = false } uuid = { version = "1.10", features = ["v4"] } [dev-dependencies] +test-case = { workspace = true } [features] test-utils = ["dep:mockall"] diff --git a/swiftide-core/src/node.rs b/swiftide-core/src/node.rs index 6fef7160..6ada45fe 100644 --- a/swiftide-core/src/node.rs +++ b/swiftide-core/src/node.rs @@ -215,9 +215,35 @@ pub enum EmbeddedField { Metadata(String), } +impl EmbeddedField { + /// Returns the name of the field when it would be a sparse vector + pub fn sparse_field_name(&self) -> String { + format!("{self}_sparse") + } + + /// Returns the name of the field when it would be a dense vector + pub fn field_name(&self) -> String { + format!("{self}") + } +} + #[allow(clippy::from_over_into)] impl Into for EmbeddedField { fn into(self) -> String { self.to_string() } } + +#[cfg(test)] +mod tests { + use super::*; + use test_case::test_case; + + #[test_case(&EmbeddedField::Combined, ["Combined", "Combined_sparse"])] + #[test_case(&EmbeddedField::Chunk, ["Chunk", "Chunk_sparse"])] + #[test_case(&EmbeddedField::Metadata("test".into()), ["Metadata: test", "Metadata: test_sparse"])] + fn field_name_tests(embedded_field: &EmbeddedField, expected: [&str; 2]) { + assert_eq!(embedded_field.field_name(), expected[0]); + assert_eq!(embedded_field.sparse_field_name(), expected[1]); + } +} From 51dce0c9213ae4ec3a0280ca08baa1b5c226e7bf Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 16:11:58 +0200 Subject: [PATCH 03/34] Persist compiles --- Cargo.lock | 19 ++- swiftide-core/src/indexing_stream.rs | 16 ++ swiftide-integrations/Cargo.toml | 7 +- .../src/lancedb/connection_pool.rs | 68 +++++++++ swiftide-integrations/src/lancedb/mod.rs | 143 +++++++++++++++--- swiftide-integrations/src/lancedb/persist.rs | 140 +++++++++++++++++ swiftide/tests/lancedb.rs | 3 +- 7 files changed, 369 insertions(+), 27 deletions(-) create mode 100644 swiftide-integrations/src/lancedb/connection_pool.rs create mode 100644 swiftide-integrations/src/lancedb/persist.rs diff --git a/Cargo.lock b/Cargo.lock index 1276350d..6cfb1e9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2003,11 +2003,25 @@ dependencies = [ "tokio", ] +[[package]] +name = "deadpool" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6541a3916932fe57768d4be0b1ffb5ec7cbf74ca8c903fdfd5c0fe8aa958f0ed" +dependencies = [ + "deadpool-runtime", + "num_cpus", + "tokio", +] + [[package]] name = "deadpool-runtime" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" +dependencies = [ + "tokio", +] [[package]] name = "deepsize" @@ -6234,6 +6248,7 @@ dependencies = [ "strum", "strum_macros", "tera", + "test-case", "tokio", "tokio-stream", "tracing", @@ -6273,12 +6288,14 @@ name = "swiftide-integrations" version = "0.9.0" dependencies = [ "anyhow", + "arrow-array", "async-openai", "async-trait", "aws-config", "aws-credential-types", "aws-sdk-bedrockruntime", "chrono", + "deadpool 0.12.1", "derive_builder", "fastembed", "futures-util", @@ -7755,7 +7772,7 @@ dependencies = [ "assert-json-diff", "async-trait", "base64 0.21.7", - "deadpool", + "deadpool 0.10.0", "futures", "http 1.1.0", "http-body-util", diff --git a/swiftide-core/src/indexing_stream.rs b/swiftide-core/src/indexing_stream.rs index 32a8153f..5b814ef3 100644 --- a/swiftide-core/src/indexing_stream.rs +++ b/swiftide-core/src/indexing_stream.rs @@ -42,6 +42,18 @@ impl Into for Vec> { } } +impl Into for Vec { + fn into(self) -> IndexingStream { + IndexingStream::from_nodes(self) + } +} + +impl Into for anyhow::Error { + fn into(self) -> IndexingStream { + IndexingStream::iter(vec![Err(self)]) + } +} + impl Into for Result> { fn into(self) -> IndexingStream { match self { @@ -81,4 +93,8 @@ impl IndexingStream { inner: stream::iter(iter).boxed(), } } + + pub fn from_nodes(nodes: Vec) -> Self { + IndexingStream::iter(nodes.into_iter().map(Ok)) + } } diff --git a/swiftide-integrations/Cargo.toml b/swiftide-integrations/Cargo.toml index 49b24ecc..da2f5ca5 100644 --- a/swiftide-integrations/Cargo.toml +++ b/swiftide-integrations/Cargo.toml @@ -61,6 +61,11 @@ secrecy = { version = "0.8.0", optional = true } reqwest = { version = "0.12.5", optional = true, default-features = false } ollama-rs = { version = "0.2.0", optional = true } lancedb = { version = "0.9.0", optional = true } +deadpool = { version = "0.12", optional = true, features = [ + "managed", + "rt_tokio_1", +] } +arrow-array = { version = "52.2", optional = true } [dev-dependencies] swiftide-core = { path = "../swiftide-core", features = ["test-utils"] } @@ -106,7 +111,7 @@ aws-bedrock = [ "dep:aws-credential-types", "dep:aws-sdk-bedrockruntime", ] -lancedb = ["dep:lancedb"] +lancedb = ["dep:lancedb", "dep:deadpool", "dep:arrow-array"] [lints] workspace = true diff --git a/swiftide-integrations/src/lancedb/connection_pool.rs b/swiftide-integrations/src/lancedb/connection_pool.rs new file mode 100644 index 00000000..aa4c0cd2 --- /dev/null +++ b/swiftide-integrations/src/lancedb/connection_pool.rs @@ -0,0 +1,68 @@ +use anyhow::Context as _; +use anyhow::Result; +use deadpool::managed::Manager; +use derive_builder::Builder; +use lancedb::connection::ConnectBuilder; + +#[derive(Builder, Debug, Clone)] +#[builder(setter(into), build_fn(error = "anyhow::Error"))] +pub struct LanceDBPoolManager { + uri: String, + api_key: Option, + region: Option, + storage_options: Vec<(String, String)>, +} + +pub type LanceDBConnectionPool = deadpool::managed::Pool; + +impl LanceDBPoolManager { + pub fn builder() -> LanceDBPoolManagerBuilder { + LanceDBPoolManagerBuilder::default() + } +} + +// impl LanceDBConnectionPoolBuilder { +// fn default_pool_manager(&self) -> Result { +// LanceDBPoolManagerBuilder::default() +// .uri(self.uri.clone().context("Expected URI")?) +// .api_key(self.api_key.clone().flatten()) +// .region(self.region.clone().flatten()) +// .storage_options(self.storage_options.clone().unwrap_or_default()) +// .build() +// } +// } + +impl Manager for LanceDBPoolManager { + type Type = lancedb::Connection; + type Error = anyhow::Error; + + async fn create(&self) -> Result { + let mut builder = ConnectBuilder::new(&self.uri); + + if let Some(api_key) = &self.api_key { + builder = builder.api_key(api_key); + } + + if let Some(region) = &self.region { + builder = builder.region(region); + } + + for (key, value) in &self.storage_options { + builder = builder.storage_option(key, value); + } + + builder + .execute() + .await + .context("Failed to create LanceDB connection") + } + + async fn recycle( + &self, + _obj: &mut Self::Type, + _metrics: &deadpool::managed::Metrics, + ) -> deadpool::managed::RecycleResult { + // NOTE: Should work fine with drop + Ok(()) + } +} diff --git a/swiftide-integrations/src/lancedb/mod.rs b/swiftide-integrations/src/lancedb/mod.rs index bf83cfbb..f72b1179 100644 --- a/swiftide-integrations/src/lancedb/mod.rs +++ b/swiftide-integrations/src/lancedb/mod.rs @@ -1,20 +1,58 @@ +use std::pin::Pin; use std::sync::Arc; +use anyhow::Context as _; +use anyhow::Result; +use connection_pool::LanceDBConnectionPool; +use connection_pool::LanceDBPoolManager; +use deadpool::managed::Object; use derive_builder::Builder; use lancedb::arrow::arrow_schema::{DataType, Field, Schema}; use swiftide_core::indexing::EmbeddedField; +pub mod connection_pool; +pub mod persist; #[derive(Builder, Clone)] -#[builder(setter(into))] +#[builder(setter(into, strip_option), build_fn(error = "anyhow::Error"))] pub struct LanceDB { - client: Arc, - #[builder(default = "self.default_schema_from_fields()")] + /// Connection pool for LanceDB + /// By default will use settings provided when creating the LanceDB instance. + #[builder(default = "self.default_connection_pool()?")] + connection_pool: Arc, + + /// Set the URI for the LanceDB instance. Required unless a connection pool is provided. + uri: Option, + #[builder(default = "Some(10)")] + /// The maximum number of connections to LanceDB, defaults to 10. + pool_size: Option, + + /// API key for LanceDB + api_key: Option, + /// Region for LanceDB + region: Option, + /// Storage options + storage_options: Vec<(String, String)>, + + #[builder(private, default = "self.default_schema_from_fields()")] schema: Arc, + /// The name of the table to store the data + /// By default will use `swiftide` + #[builder(default = "\"swiftide\".into()")] table_name: String, - vector_size: i32, - uri: String, - #[builder(private, default)] + + /// Default sizes of vectors. Vectors can also be of different + /// sizes by specifying the size in the vector configuration. + vector_size: Option, + + /// Batch size for storing nodes in LanceDB. Default is 256. + #[builder(default = "256")] + batch_size: usize, + + /// Field configuration for LanceDB, will result in the eventual schema. + /// + /// Supports multiple field types, see [`FieldConfig`] for more details. + #[builder(default)] fields: Vec, } @@ -30,6 +68,17 @@ impl LanceDB { pub fn builder() -> LanceDBBuilder { LanceDBBuilder::default() } + + /// Get a connection to LanceDB from the connection pool + /// + /// # Errors + /// + /// Returns an error if the connection cannot be retrieved. + pub async fn get_connection(&self) -> Result> { + Box::pin(self.connection_pool.get()) + .await + .map_err(|e| anyhow::anyhow!(e)) + } } impl LanceDBBuilder { @@ -46,15 +95,14 @@ impl LanceDBBuilder { self } - pub fn with_sparse_vector(&mut self, config: impl Into) -> &mut Self { + pub fn with_metadata(&mut self, config: impl Into) -> &mut Self { if self.fields.is_none() { self.fields(Vec::default()); } self.fields .as_mut() .expect("Fields should be initialized") - .push(FieldConfig::SparseVector(config.into())); - + .push(FieldConfig::Metadata(config.into())); self } @@ -65,12 +113,12 @@ impl LanceDBBuilder { for ref field in self.fields.clone().unwrap_or_default() { match field { FieldConfig::Vector(config) => { - let vector_size = config.vector_size.or(vector_size).expect( + let vector_size = config.vector_size.or(vector_size.flatten()).expect( "Vector size should be set either in the field or in the LanceDB builder", ); fields.push(Field::new( - config.embedded_field.to_string(), + config.field_name(), DataType::FixedSizeList( Arc::new(Field::new("item", DataType::Float32, false)), vector_size, @@ -78,23 +126,42 @@ impl LanceDBBuilder { false, )); } - FieldConfig::SparseVector(config) => { - fields.push(Field::new( - format!("{}_sparse", config.embedded_field), - DataType::Float64, - true, - )); + FieldConfig::Chunk => { + fields.push(Field::new("chunk", DataType::Utf8, true)); + } + FieldConfig::Metadata(config) => { + fields.push(Field::new(&config.field, DataType::Utf8, true)); } } } Arc::new(Schema::new(fields)) } + + fn default_connection_pool(&self) -> Result> { + let mgr = LanceDBPoolManager::builder() + .uri(self.uri.clone().flatten().context("URI should be set")?) + .api_key(self.api_key.clone().flatten()) + .region(self.region.clone().flatten()) + .storage_options(self.storage_options.clone().unwrap_or_default()) + .build()?; + + LanceDBConnectionPool::builder(mgr) + .max_size( + self.pool_size + .flatten() + .context("Pool size should be set")?, + ) + .build() + .map(Arc::new) + .map_err(Into::into) + } } #[derive(Clone)] pub enum FieldConfig { Vector(VectorConfig), - SparseVector(SparseVectorConfig), + Metadata(MetadataConfig), + Chunk, } #[derive(Clone)] @@ -103,10 +170,19 @@ pub struct VectorConfig { vector_size: Option, } -impl Into for EmbeddedField { - fn into(self) -> VectorConfig { +impl VectorConfig { + pub fn field_name(&self) -> String { + format!( + "vector_{}", + normalize_field_name(&self.embedded_field.to_string()) + ) + } +} + +impl From for VectorConfig { + fn from(val: EmbeddedField) -> Self { VectorConfig { - embedded_field: self, + embedded_field: val, vector_size: None, } } @@ -117,10 +193,29 @@ pub struct SparseVectorConfig { embedded_field: EmbeddedField, } -impl Into for EmbeddedField { - fn into(self) -> SparseVectorConfig { +impl From for SparseVectorConfig { + fn from(val: EmbeddedField) -> Self { SparseVectorConfig { - embedded_field: self, + embedded_field: val, + } + } +} + +#[derive(Clone)] +pub struct MetadataConfig { + field: String, +} + +impl> From for MetadataConfig { + fn from(val: T) -> Self { + MetadataConfig { + field: normalize_field_name(val.as_ref()), } } } + +fn normalize_field_name(field: &str) -> String { + field + .to_lowercase() + .replace(|c: char| !c.is_alphanumeric(), "_") +} diff --git a/swiftide-integrations/src/lancedb/persist.rs b/swiftide-integrations/src/lancedb/persist.rs new file mode 100644 index 00000000..fddeb32d --- /dev/null +++ b/swiftide-integrations/src/lancedb/persist.rs @@ -0,0 +1,140 @@ +use std::sync::Arc; + +use anyhow::Context as _; +use anyhow::Result; +use arrow_array::types::Float32Type; +use arrow_array::types::Utf8Type; +use arrow_array::Array; +use arrow_array::FixedSizeListArray; +use arrow_array::GenericByteArray; +use arrow_array::RecordBatch; +use arrow_array::RecordBatchIterator; +use async_trait::async_trait; +use lancedb::arrow::arrow_schema::ArrowError; +use swiftide_core::indexing::IndexingStream; +use swiftide_core::indexing::Node; +use swiftide_core::Persist; + +use super::FieldConfig; +use super::LanceDB; + +#[async_trait] +impl Persist for LanceDB { + async fn setup(&self) -> Result<()> { + let conn = self.get_connection().await?; + let schema = self.schema.clone(); + + conn.create_empty_table(&self.table_name, schema) + .execute() + .await + .map(|_| ()) + .map_err(Into::into) + } + + async fn store(&self, node: Node) -> Result { + let mut nodes = vec![node; 1]; + self.store_nodes(&nodes).await?; + + let node = nodes.swap_remove(0); + + Ok(node) + } + + async fn batch_store(&self, nodes: Vec) -> IndexingStream { + self.store_nodes(&nodes).await.map(|_| nodes).into() + } + + fn batch_size(&self) -> Option { + Some(self.batch_size) + } +} + +impl LanceDB { + async fn store_nodes(&self, nodes: &[Node]) -> Result<()> { + let schema = self.schema.clone(); + + let batches = self.extract_arrow_batches_from_nodes(nodes)?; + + let data = RecordBatchIterator::new( + vec![RecordBatch::try_new(schema.clone(), batches) + .context("Could not create batches")?] + .into_iter() + .map(Ok), + schema.clone(), + ); + + let conn = self.get_connection().await?; + let table = conn.open_table(&self.table_name).execute().await?; + let mut merge_insert = table.merge_insert(&["id"]); + + merge_insert + .when_matched_update_all(None) + .when_not_matched_insert_all(); + + merge_insert.execute(Box::new(data)).await?; + + Ok(()) + } + + fn extract_arrow_batches_from_nodes( + &self, + nodes: &[Node], + ) -> core::result::Result>, anyhow::Error> { + let fields = self.fields.as_slice(); + let mut batches: Vec> = Vec::with_capacity(fields.len()); + + for field in fields { + match field { + FieldConfig::Vector(config) => { + let mut row = Vec::with_capacity(nodes.len()); + let vector_size = config + .vector_size + .or(self.vector_size) + .context("Expected vector size to be set for field")?; + + for node in nodes { + let data = node + .vectors + .as_ref() + // TODO: verify compiler optimizes the double loops away + .map(|v| v.get(&config.embedded_field)) + .flatten() + .map(|v| v.into_iter().map(|f| Some(*f))); + + row.push(data) + } + batches.push(Arc::new(FixedSizeListArray::from_iter_primitive::< + Float32Type, + _, + _, + >(row, vector_size))); + } + FieldConfig::Metadata(config) => { + let mut row = Vec::with_capacity(nodes.len()); + + for node in nodes { + let data = node + .metadata + .get(&config.field) + // TODO: Verify this gives the correct data + .map(|v| v.as_str()) + .flatten(); + + row.push(data) + } + batches.push(Arc::new(GenericByteArray::::from_iter(row))); + } + FieldConfig::Chunk => { + let mut row = Vec::with_capacity(nodes.len()); + + for node in nodes { + let data = Some(node.chunk.as_str()); + row.push(data); + } + batches.push(Arc::new(GenericByteArray::::from_iter(row))); + } + } + } + Ok(batches) + } +} diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index 3ee24f51..f53ddf5c 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -1,4 +1,5 @@ use swiftide::indexing::{ + transformers::metadata_qa_code::NAME as METADATA_QA_CODE_NAME, transformers::{ChunkCode, MetadataQACode}, EmbeddedField, }; @@ -37,7 +38,7 @@ async fn test_sparse_indexing_pipeline() { .uri(tempdir.child("lancedb").to_str().unwrap()) .vector_size(384) .with_vector(EmbeddedField::Combined) - .with_sparse_vector(EmbeddedField::Combined) + .with_metadata(METADATA_QA_CODE_NAME) .table_name("swiftide_test") .build() .unwrap(), From c98579d4a39cecd33ce001cb6a63f9daa7f50030 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 19:17:50 +0200 Subject: [PATCH 04/34] Fix tests --- Cargo.lock | 2 + swiftide-core/src/node.rs | 1 + .../src/lancedb/connection_pool.rs | 14 +-- swiftide-integrations/src/lancedb/mod.rs | 79 ++++++------ swiftide-integrations/src/lancedb/persist.rs | 26 ++-- swiftide/Cargo.toml | 30 +++-- swiftide/tests/lancedb.rs | 115 +++++++++++++----- 7 files changed, 162 insertions(+), 105 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6cfb1e9f..b3f42b2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6212,8 +6212,10 @@ name = "swiftide" version = "0.9.0" dependencies = [ "anyhow", + "arrow-array", "async-openai", "insta", + "lancedb", "mockall", "qdrant-client", "serde", diff --git a/swiftide-core/src/node.rs b/swiftide-core/src/node.rs index 6ada45fe..76fa8701 100644 --- a/swiftide-core/src/node.rs +++ b/swiftide-core/src/node.rs @@ -116,6 +116,7 @@ impl Node { /// /// Embeddable data mapped to their `EmbeddedField`. pub fn as_embeddables(&self) -> Vec<(EmbeddedField, String)> { + // TODO: Figure out a clever way to do zero copy let mut embeddables = Vec::new(); if self.embed_mode == EmbedMode::SingleWithMetadata || self.embed_mode == EmbedMode::Both { diff --git a/swiftide-integrations/src/lancedb/connection_pool.rs b/swiftide-integrations/src/lancedb/connection_pool.rs index aa4c0cd2..670638aa 100644 --- a/swiftide-integrations/src/lancedb/connection_pool.rs +++ b/swiftide-integrations/src/lancedb/connection_pool.rs @@ -8,8 +8,11 @@ use lancedb::connection::ConnectBuilder; #[builder(setter(into), build_fn(error = "anyhow::Error"))] pub struct LanceDBPoolManager { uri: String, + #[builder(default)] api_key: Option, + #[builder(default)] region: Option, + #[builder(default)] storage_options: Vec<(String, String)>, } @@ -21,17 +24,6 @@ impl LanceDBPoolManager { } } -// impl LanceDBConnectionPoolBuilder { -// fn default_pool_manager(&self) -> Result { -// LanceDBPoolManagerBuilder::default() -// .uri(self.uri.clone().context("Expected URI")?) -// .api_key(self.api_key.clone().flatten()) -// .region(self.region.clone().flatten()) -// .storage_options(self.storage_options.clone().unwrap_or_default()) -// .build() -// } -// } - impl Manager for LanceDBPoolManager { type Type = lancedb::Connection; type Error = anyhow::Error; diff --git a/swiftide-integrations/src/lancedb/mod.rs b/swiftide-integrations/src/lancedb/mod.rs index f72b1179..a26b407c 100644 --- a/swiftide-integrations/src/lancedb/mod.rs +++ b/swiftide-integrations/src/lancedb/mod.rs @@ -1,4 +1,3 @@ -use std::pin::Pin; use std::sync::Arc; use anyhow::Context as _; @@ -14,23 +13,29 @@ pub mod persist; #[derive(Builder, Clone)] #[builder(setter(into, strip_option), build_fn(error = "anyhow::Error"))] +#[allow(dead_code)] pub struct LanceDB { - /// Connection pool for LanceDB - /// By default will use settings provided when creating the LanceDB instance. + /** + Connection pool for `LanceDB` + By default will use settings provided when creating the instance. + */ #[builder(default = "self.default_connection_pool()?")] connection_pool: Arc, - /// Set the URI for the LanceDB instance. Required unless a connection pool is provided. + /// Set the URI. Required unless a connection pool is provided. uri: Option, + /// The maximum number of connections, defaults to 10. #[builder(default = "Some(10)")] - /// The maximum number of connections to LanceDB, defaults to 10. pool_size: Option, - /// API key for LanceDB + /// Optional API key + #[builder(default)] api_key: Option, - /// Region for LanceDB + /// Optional Region + #[builder(default)] region: Option, /// Storage options + #[builder(default)] storage_options: Vec<(String, String)>, #[builder(private, default = "self.default_schema_from_fields()")] @@ -45,14 +50,14 @@ pub struct LanceDB { /// sizes by specifying the size in the vector configuration. vector_size: Option, - /// Batch size for storing nodes in LanceDB. Default is 256. + /// Batch size for storing nodes in `LanceDB`. Default is 256. #[builder(default = "256")] batch_size: usize, - /// Field configuration for LanceDB, will result in the eventual schema. + /// Field configuration for `LanceDB`, will result in the eventual schema. /// /// Supports multiple field types, see [`FieldConfig`] for more details. - #[builder(default)] + #[builder(default = "self.default_fields()")] fields: Vec, } @@ -69,11 +74,13 @@ impl LanceDB { LanceDBBuilder::default() } - /// Get a connection to LanceDB from the connection pool - /// - /// # Errors - /// - /// Returns an error if the connection cannot be retrieved. + /** + Get a connection to `LanceDB` from the connection pool + + # Errors + + Returns an error if the connection cannot be retrieved. + */ pub async fn get_connection(&self) -> Result> { Box::pin(self.connection_pool.get()) .await @@ -84,7 +91,7 @@ impl LanceDB { impl LanceDBBuilder { pub fn with_vector(&mut self, config: impl Into) -> &mut Self { if self.fields.is_none() { - self.fields(Vec::default()); + self.fields(self.default_fields()); } self.fields @@ -97,7 +104,7 @@ impl LanceDBBuilder { pub fn with_metadata(&mut self, config: impl Into) -> &mut Self { if self.fields.is_none() { - self.fields(Vec::default()); + self.fields(self.default_fields()); } self.fields .as_mut() @@ -106,11 +113,16 @@ impl LanceDBBuilder { self } + #[allow(clippy::unused_self)] + fn default_fields(&self) -> Vec { + vec![FieldConfig::ID, FieldConfig::Chunk] + } + fn default_schema_from_fields(&self) -> Arc { let mut fields = Vec::new(); let vector_size = self.vector_size; - for ref field in self.fields.clone().unwrap_or_default() { + for field in self.fields.as_deref().unwrap_or(&self.default_fields()) { match field { FieldConfig::Vector(config) => { let vector_size = config.vector_size.or(vector_size.flatten()).expect( @@ -120,18 +132,21 @@ impl LanceDBBuilder { fields.push(Field::new( config.field_name(), DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::Float32, false)), + Arc::new(Field::new("item", DataType::Float32, true)), vector_size, ), - false, + true, )); } FieldConfig::Chunk => { - fields.push(Field::new("chunk", DataType::Utf8, true)); + fields.push(Field::new("chunk", DataType::Utf8, false)); } FieldConfig::Metadata(config) => { fields.push(Field::new(&config.field, DataType::Utf8, true)); } + FieldConfig::ID => { + fields.push(Field::new("id", DataType::UInt64, false)); + } } } Arc::new(Schema::new(fields)) @@ -146,11 +161,7 @@ impl LanceDBBuilder { .build()?; LanceDBConnectionPool::builder(mgr) - .max_size( - self.pool_size - .flatten() - .context("Pool size should be set")?, - ) + .max_size(self.pool_size.flatten().unwrap_or(10)) .build() .map(Arc::new) .map_err(Into::into) @@ -162,6 +173,7 @@ pub enum FieldConfig { Vector(VectorConfig), Metadata(MetadataConfig), Chunk, + ID, } #[derive(Clone)] @@ -188,19 +200,6 @@ impl From for VectorConfig { } } -#[derive(Clone)] -pub struct SparseVectorConfig { - embedded_field: EmbeddedField, -} - -impl From for SparseVectorConfig { - fn from(val: EmbeddedField) -> Self { - SparseVectorConfig { - embedded_field: val, - } - } -} - #[derive(Clone)] pub struct MetadataConfig { field: String, @@ -214,7 +213,7 @@ impl> From for MetadataConfig { } } -fn normalize_field_name(field: &str) -> String { +pub(crate) fn normalize_field_name(field: &str) -> String { field .to_lowercase() .replace(|c: char| !c.is_alphanumeric(), "_") diff --git a/swiftide-integrations/src/lancedb/persist.rs b/swiftide-integrations/src/lancedb/persist.rs index fddeb32d..4cbbfbc6 100644 --- a/swiftide-integrations/src/lancedb/persist.rs +++ b/swiftide-integrations/src/lancedb/persist.rs @@ -3,18 +3,20 @@ use std::sync::Arc; use anyhow::Context as _; use anyhow::Result; use arrow_array::types::Float32Type; +use arrow_array::types::UInt64Type; use arrow_array::types::Utf8Type; use arrow_array::Array; use arrow_array::FixedSizeListArray; use arrow_array::GenericByteArray; +use arrow_array::PrimitiveArray; use arrow_array::RecordBatch; use arrow_array::RecordBatchIterator; use async_trait::async_trait; -use lancedb::arrow::arrow_schema::ArrowError; use swiftide_core::indexing::IndexingStream; use swiftide_core::indexing::Node; use swiftide_core::Persist; +use super::normalize_field_name; use super::FieldConfig; use super::LanceDB; @@ -41,7 +43,7 @@ impl Persist for LanceDB { } async fn batch_store(&self, nodes: Vec) -> IndexingStream { - self.store_nodes(&nodes).await.map(|_| nodes).into() + self.store_nodes(&nodes).await.map(|()| nodes).into() } fn batch_size(&self) -> Option { @@ -97,11 +99,10 @@ impl LanceDB { .vectors .as_ref() // TODO: verify compiler optimizes the double loops away - .map(|v| v.get(&config.embedded_field)) - .flatten() - .map(|v| v.into_iter().map(|f| Some(*f))); + .and_then(|v| v.get(&config.embedded_field)) + .map(|v| v.iter().map(|f| Some(*f))); - row.push(data) + row.push(data); } batches.push(Arc::new(FixedSizeListArray::from_iter_primitive::< Float32Type, @@ -117,10 +118,9 @@ impl LanceDB { .metadata .get(&config.field) // TODO: Verify this gives the correct data - .map(|v| v.as_str()) - .flatten(); + .and_then(|v| v.as_str().map(normalize_field_name)); - row.push(data) + row.push(data); } batches.push(Arc::new(GenericByteArray::::from_iter(row))); } @@ -133,6 +133,14 @@ impl LanceDB { } batches.push(Arc::new(GenericByteArray::::from_iter(row))); } + FieldConfig::ID => { + let mut row = Vec::with_capacity(nodes.len()); + for node in nodes { + let data = Some(node.calculate_hash()); + row.push(data); + } + batches.push(Arc::new(PrimitiveArray::::from_iter(row))); + } } } Ok(batches) diff --git a/swiftide/Cargo.toml b/swiftide/Cargo.toml index 6e955231..0a9c7f4c 100644 --- a/swiftide/Cargo.toml +++ b/swiftide/Cargo.toml @@ -22,15 +22,15 @@ swiftide-query = { path = "../swiftide-query", version = "0.9" } [features] default = [] all = [ - "qdrant", - "redis", - "tree-sitter", - "openai", - "fastembed", - "scraping", - "aws-bedrock", - "groq", - "ollama", + "qdrant", + "redis", + "tree-sitter", + "openai", + "fastembed", + "scraping", + "aws-bedrock", + "groq", + "ollama", ] # Qdrant for storage qdrant = ["swiftide-integrations/qdrant"] @@ -38,8 +38,8 @@ qdrant = ["swiftide-integrations/qdrant"] redis = ["swiftide-integrations/redis"] # Tree-sitter for code operations and chunking tree-sitter = [ - "swiftide-integrations/tree-sitter", - "swiftide-indexing/tree-sitter", + "swiftide-integrations/tree-sitter", + "swiftide-indexing/tree-sitter", ] # OpenAI for embedding and prompting openai = ["swiftide-integrations/openai"] @@ -53,6 +53,8 @@ fastembed = ["swiftide-integrations/fastembed"] scraping = ["swiftide-integrations/scraping"] # AWS Bedrock for prompting aws-bedrock = ["swiftide-integrations/aws-bedrock"] +# Lancdb +lancedb = ["swiftide-integrations/lancedb"] # Testing, internal only test-utils = [] @@ -63,7 +65,7 @@ swiftide-test-utils = { path = "../swiftide-test-utils" } async-openai = { workspace = true } qdrant-client = { workspace = true, default-features = false, features = [ - "serde", + "serde", ] } anyhow = { workspace = true } @@ -78,6 +80,10 @@ serde = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } +# TODO: Remove me when lancedb tests use query pipeline +lancedb = { version = "0.9" } +arrow-array = { version = "52.2" } + [lints] workspace = true diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index f53ddf5c..0684169d 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -1,7 +1,16 @@ -use swiftide::indexing::{ - transformers::metadata_qa_code::NAME as METADATA_QA_CODE_NAME, - transformers::{ChunkCode, MetadataQACode}, - EmbeddedField, +use arrow_array::*; +use arrow_array::{ + cast::AsArray, types::Utf8Type, AnyDictionaryArray, Array, RecordBatch, StringArray, +}; +use lancedb::query::ExecutableQuery; +use swiftide::{ + indexing::{ + transformers::{ + metadata_qa_code::NAME as METADATA_QA_CODE_NAME, ChunkCode, MetadataQACode, + }, + EmbeddedField, + }, + query::TryStreamExt as _, }; use swiftide_indexing::{loaders, persist, transformers, Pipeline}; use swiftide_integrations::{fastembed::FastEmbed, lancedb::LanceDB}; @@ -10,11 +19,12 @@ use temp_dir::TempDir; use wiremock::MockServer; #[test_log::test(tokio::test)] -async fn test_sparse_indexing_pipeline() { +async fn test_lancedb() { // Setup temporary directory and file for testing let tempdir = TempDir::new().unwrap(); let codefile = tempdir.child("main.rs"); - std::fs::write(&codefile, "fn main() { println!(\"Hello, World!\"); }").unwrap(); + let code = "fn main() { println!(\"Hello, World!\"); }"; + std::fs::write(&codefile, code).unwrap(); // Setup mock servers to simulate API responses let mock_server = MockServer::start().await; @@ -22,37 +32,76 @@ async fn test_sparse_indexing_pipeline() { let openai_client = openai_client(&mock_server.uri(), "text-embedding-3-small", "gpt-4o"); - let fastembed_sparse = FastEmbed::try_default_sparse().unwrap(); let fastembed = FastEmbed::try_default().unwrap(); - let memory_storage = persist::MemoryStorage::default(); - - let result = - Pipeline::from_loader(loaders::FileLoader::new(tempdir.path()).with_extensions(&["rs"])) - .then_chunk(ChunkCode::try_for_language("rust").unwrap()) - .then(MetadataQACode::new(openai_client)) - .then_in_batch(20, transformers::SparseEmbed::new(fastembed_sparse)) - .then_in_batch(20, transformers::Embed::new(fastembed)) - .log_nodes() - .then_store_with( - LanceDB::builder() - .uri(tempdir.child("lancedb").to_str().unwrap()) - .vector_size(384) - .with_vector(EmbeddedField::Combined) - .with_metadata(METADATA_QA_CODE_NAME) - .table_name("swiftide_test") - .build() - .unwrap(), - ) - .then_store_with(memory_storage.clone()) - .run() - .await; - - let node = memory_storage - .get_all_values() + + let lancedb = LanceDB::builder() + .uri(tempdir.child("lancedb").to_str().unwrap()) + .vector_size(384) + .with_vector(EmbeddedField::Combined) + .with_metadata(METADATA_QA_CODE_NAME) + .table_name("swiftide_test") + .build() + .unwrap(); + + Pipeline::from_loader(loaders::FileLoader::new(tempdir.path()).with_extensions(&["rs"])) + .then_chunk(ChunkCode::try_for_language("rust").unwrap()) + .then(MetadataQACode::new(openai_client)) + .then_in_batch(20, transformers::Embed::new(fastembed)) + .log_nodes() + .then_store_with(lancedb.clone()) + .run() + .await + .unwrap(); + + // Assert that + // * Vector got persisted + // * Metadata field got persisted + // * Indirectly correct table got created + + // Temporary tests to check before query pipeline + let conn = lancedb.get_connection().await.unwrap(); + let table = conn.open_table("swiftide_test").execute().await.unwrap(); + + let result: RecordBatch = table + .query() + .execute() .await + .unwrap() + .try_collect::>() + .await + .unwrap() .first() .unwrap() .clone(); - result.expect("Indexing pipeline failed"); + assert_eq!(result.num_rows(), 1); + assert_eq!(result.num_columns(), 4); + assert!(result.column_by_name("id").is_some()); + assert_eq!( + result + .column_by_name("chunk") + .unwrap() + .as_any() + .downcast_ref::() // as_string() doesn't work, wtf + .unwrap() + .value(0), + code + ); + // assert_eq!( + // result + // .column_by_name("questions_and_answers__code_") + // .unwrap() + // .as_string_view() + // .value(0), + // "\n\nHello there, how may I assist you today?" + // ); + assert_eq!( + result + .column_by_name("vector_combined") + .unwrap() + .as_fixed_size_list() + .value(0) + .len(), + 384 + ); } From 2952160aed96f5f1382e43b714749c9fac90ffe3 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 19:24:04 +0200 Subject: [PATCH 05/34] Clippy --- swiftide/tests/lancedb.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index 0684169d..8e89959e 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -1,6 +1,6 @@ use arrow_array::*; use arrow_array::{ - cast::AsArray, types::Utf8Type, AnyDictionaryArray, Array, RecordBatch, StringArray, + cast::AsArray, Array, RecordBatch, StringArray, }; use lancedb::query::ExecutableQuery; use swiftide::{ @@ -12,9 +12,9 @@ use swiftide::{ }, query::TryStreamExt as _, }; -use swiftide_indexing::{loaders, persist, transformers, Pipeline}; +use swiftide_indexing::{loaders, transformers, Pipeline}; use swiftide_integrations::{fastembed::FastEmbed, lancedb::LanceDB}; -use swiftide_test_utils::{mock_chat_completions, mock_embeddings, openai_client}; +use swiftide_test_utils::{mock_chat_completions, openai_client}; use temp_dir::TempDir; use wiremock::MockServer; From 3e568eba2a730c6d39b4b9d420ec86e1f953b143 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 19:24:13 +0200 Subject: [PATCH 06/34] fmt --- swiftide/tests/lancedb.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index 8e89959e..e71a121b 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -1,7 +1,5 @@ use arrow_array::*; -use arrow_array::{ - cast::AsArray, Array, RecordBatch, StringArray, -}; +use arrow_array::{cast::AsArray, Array, RecordBatch, StringArray}; use lancedb::query::ExecutableQuery; use swiftide::{ indexing::{ From d80952d731cbde861b8e6c6c6dd8d377e53ece76 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 19:36:19 +0200 Subject: [PATCH 07/34] Fix metadata --- swiftide-integrations/src/lancedb/mod.rs | 2 ++ swiftide-integrations/src/lancedb/persist.rs | 4 ++-- swiftide/tests/lancedb.rs | 20 ++++++++++++-------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/swiftide-integrations/src/lancedb/mod.rs b/swiftide-integrations/src/lancedb/mod.rs index a26b407c..958f1e21 100644 --- a/swiftide-integrations/src/lancedb/mod.rs +++ b/swiftide-integrations/src/lancedb/mod.rs @@ -203,12 +203,14 @@ impl From for VectorConfig { #[derive(Clone)] pub struct MetadataConfig { field: String, + original_field: String, } impl> From for MetadataConfig { fn from(val: T) -> Self { MetadataConfig { field: normalize_field_name(val.as_ref()), + original_field: val.as_ref().to_string(), } } } diff --git a/swiftide-integrations/src/lancedb/persist.rs b/swiftide-integrations/src/lancedb/persist.rs index 4cbbfbc6..9266a096 100644 --- a/swiftide-integrations/src/lancedb/persist.rs +++ b/swiftide-integrations/src/lancedb/persist.rs @@ -116,9 +116,9 @@ impl LanceDB { for node in nodes { let data = node .metadata - .get(&config.field) + .get(&config.original_field) // TODO: Verify this gives the correct data - .and_then(|v| v.as_str().map(normalize_field_name)); + .and_then(|v| v.as_str()); row.push(data); } diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index e71a121b..02c539fc 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -74,6 +74,7 @@ async fn test_lancedb() { assert_eq!(result.num_rows(), 1); assert_eq!(result.num_columns(), 4); + dbg!(result.columns()); assert!(result.column_by_name("id").is_some()); assert_eq!( result @@ -85,14 +86,17 @@ async fn test_lancedb() { .value(0), code ); - // assert_eq!( - // result - // .column_by_name("questions_and_answers__code_") - // .unwrap() - // .as_string_view() - // .value(0), - // "\n\nHello there, how may I assist you today?" - // ); + assert_eq!( + result + .column_by_name("questions_and_answers__code_") + .unwrap() + .as_any() + .downcast_ref::() // as_string() doesn't work, wtf + .unwrap() + .value(0), + "\n\nHello there, how may I assist you today?" + ); + assert_eq!( result .column_by_name("vector_combined") From 759e670068a1bbe2852626b552f8579235379469 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Wed, 28 Aug 2024 19:38:45 +0200 Subject: [PATCH 08/34] Add protoc to workflows --- .github/workflows/bench.yml | 2 ++ .github/workflows/coverage.yml | 2 ++ .github/workflows/release.yml | 2 ++ .github/workflows/test.yml | 6 ++++++ 4 files changed, 12 insertions(+) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 8eda59ee..b776afaf 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -19,6 +19,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable + - name: Install Protoc + uses: arduino/setup-protoc@v3 - name: Run benchmark run: cargo bench -p benchmarks -- --output-format bencher | tee benchmarks/output.txt diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 955057ab..a813a97a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -25,6 +25,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable with: components: llvm-tools + - name: Install Protoc + uses: arduino/setup-protoc@v3 - name: Install cargo-llvm-cov uses: taiki-e/install-action@v2 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a2f4b3d1..b2974d90 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,6 +24,8 @@ jobs: fetch-depth: 0 - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable + - name: Install Protoc + uses: arduino/setup-protoc@v3 - name: Run release-plz uses: MarcoIeni/release-plz-action@v0.5 env: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 25ad9ff2..da83127d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,6 +28,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable with: components: rustfmt + - name: Install Protoc + uses: arduino/setup-protoc@v3 - uses: r7kamura/rust-problem-matchers@v1 - name: Check typos uses: crate-ci/typos@master @@ -45,6 +47,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable + - name: Install Protoc + uses: arduino/setup-protoc@v3 - uses: r7kamura/rust-problem-matchers@v1 - name: "Test" run: cargo test --verbose --workspace --all-features @@ -60,6 +64,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable with: components: clippy,rustfmt + - name: Install Protoc + uses: arduino/setup-protoc@v3 - name: Cache Cargo dependencies uses: Swatinem/rust-cache@v2 - uses: r7kamura/rust-problem-matchers@v1 From bd6a591a10b4749afdaebbc0fdc6733ecaecd006 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Fri, 30 Aug 2024 18:48:08 +0200 Subject: [PATCH 09/34] Implement retrieve --- swiftide-core/src/query.rs | 4 +- swiftide-integrations/src/lancedb/mod.rs | 39 ++++++++--- swiftide-integrations/src/lancedb/persist.rs | 4 +- swiftide-integrations/src/lancedb/retrieve.rs | 69 +++++++++++++++++++ swiftide/tests/lancedb.rs | 55 ++++++++++++--- 5 files changed, 152 insertions(+), 19 deletions(-) create mode 100644 swiftide-integrations/src/lancedb/retrieve.rs diff --git a/swiftide-core/src/query.rs b/swiftide-core/src/query.rs index 851ef58c..ccd114cf 100644 --- a/swiftide-core/src/query.rs +++ b/swiftide-core/src/query.rs @@ -54,7 +54,7 @@ impl Query { } #[allow(dead_code)] - fn history(&self) -> &Vec { + pub fn history(&self) -> &Vec { &self.transformation_history } } @@ -153,7 +153,7 @@ impl> From for Query { } #[allow(dead_code)] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum TransformationEvent { Transformed { before: String, diff --git a/swiftide-integrations/src/lancedb/mod.rs b/swiftide-integrations/src/lancedb/mod.rs index 958f1e21..4715fcf3 100644 --- a/swiftide-integrations/src/lancedb/mod.rs +++ b/swiftide-integrations/src/lancedb/mod.rs @@ -10,7 +10,20 @@ use lancedb::arrow::arrow_schema::{DataType, Field, Schema}; use swiftide_core::indexing::EmbeddedField; pub mod connection_pool; pub mod persist; +pub mod retrieve; +/** +`LanceDB` is a columnar database that separates data and compute. + +This enables local, embedded databases, or storing in a cloud storage. + +See examples for more information. + +Implements `Persist` and `Retrieve`. + +Note: For querying large tables you manually need to create an index. You can get an +active connection via `get_connection`. +*/ #[derive(Builder, Clone)] #[builder(setter(into, strip_option), build_fn(error = "anyhow::Error"))] #[allow(dead_code)] @@ -89,6 +102,7 @@ impl LanceDB { } impl LanceDBBuilder { + #[allow(clippy::missing_panics_doc)] pub fn with_vector(&mut self, config: impl Into) -> &mut Self { if self.fields.is_none() { self.fields(self.default_fields()); @@ -96,19 +110,20 @@ impl LanceDBBuilder { self.fields .as_mut() - .expect("Fields should be initialized") + .unwrap() .push(FieldConfig::Vector(config.into())); self } + #[allow(clippy::missing_panics_doc)] pub fn with_metadata(&mut self, config: impl Into) -> &mut Self { if self.fields.is_none() { self.fields(self.default_fields()); } self.fields .as_mut() - .expect("Fields should be initialized") + .unwrap() .push(FieldConfig::Metadata(config.into())); self } @@ -138,14 +153,11 @@ impl LanceDBBuilder { true, )); } - FieldConfig::Chunk => { - fields.push(Field::new("chunk", DataType::Utf8, false)); - } - FieldConfig::Metadata(config) => { - fields.push(Field::new(&config.field, DataType::Utf8, true)); + FieldConfig::Chunk | FieldConfig::Metadata(_) => { + fields.push(Field::new(field.field_name(), DataType::Utf8, false)); } FieldConfig::ID => { - fields.push(Field::new("id", DataType::UInt64, false)); + fields.push(Field::new(field.field_name(), DataType::UInt64, false)); } } } @@ -176,6 +188,17 @@ pub enum FieldConfig { ID, } +impl FieldConfig { + pub fn field_name(&self) -> String { + match self { + FieldConfig::Vector(config) => config.field_name(), + FieldConfig::Metadata(config) => config.field.clone(), + FieldConfig::Chunk => "chunk".into(), + FieldConfig::ID => "id".into(), + } + } +} + #[derive(Clone)] pub struct VectorConfig { embedded_field: EmbeddedField, diff --git a/swiftide-integrations/src/lancedb/persist.rs b/swiftide-integrations/src/lancedb/persist.rs index 9266a096..323b4cec 100644 --- a/swiftide-integrations/src/lancedb/persist.rs +++ b/swiftide-integrations/src/lancedb/persist.rs @@ -16,12 +16,12 @@ use swiftide_core::indexing::IndexingStream; use swiftide_core::indexing::Node; use swiftide_core::Persist; -use super::normalize_field_name; use super::FieldConfig; use super::LanceDB; #[async_trait] impl Persist for LanceDB { + #[tracing::instrument(skip_all)] async fn setup(&self) -> Result<()> { let conn = self.get_connection().await?; let schema = self.schema.clone(); @@ -33,6 +33,7 @@ impl Persist for LanceDB { .map_err(Into::into) } + #[tracing::instrument(skip_all)] async fn store(&self, node: Node) -> Result { let mut nodes = vec![node; 1]; self.store_nodes(&nodes).await?; @@ -42,6 +43,7 @@ impl Persist for LanceDB { Ok(node) } + #[tracing::instrument(skip_all)] async fn batch_store(&self, nodes: Vec) -> IndexingStream { self.store_nodes(&nodes).await.map(|()| nodes).into() } diff --git a/swiftide-integrations/src/lancedb/retrieve.rs b/swiftide-integrations/src/lancedb/retrieve.rs new file mode 100644 index 00000000..53d67f84 --- /dev/null +++ b/swiftide-integrations/src/lancedb/retrieve.rs @@ -0,0 +1,69 @@ +use anyhow::{Context as _, Result}; +use arrow_array::StringArray; +use async_trait::async_trait; +use futures_util::TryStreamExt; +use itertools::Itertools; +use lancedb::query::{ExecutableQuery, QueryBase as _}; +use swiftide_core::{ + querying::{search_strategies::SimilaritySingleEmbedding, states, Query}, + Retrieve, +}; + +use super::{FieldConfig, LanceDB}; + +#[async_trait] +impl Retrieve for LanceDB { + #[tracing::instrument] + async fn retrieve( + &self, + search_strategy: &SimilaritySingleEmbedding, + query: Query, + ) -> Result> { + let Some(embedding) = &query.embedding else { + anyhow::bail!("No embedding for query") + }; + + let table = self + .get_connection() + .await? + .open_table(&self.table_name) + .execute() + .await?; + + let vector_fields = self + .fields + .iter() + .filter(|field| matches!(field, FieldConfig::Vector(_))) + .collect_vec(); + + if vector_fields.is_empty() || vector_fields.len() > 1 { + anyhow::bail!("Zero or multiple vector fields configured in schema") + } + + let column_name = vector_fields.first().map(|v| v.field_name()).unwrap(); + + let result = table + .query() + .nearest_to(embedding.as_slice())? + .column(&column_name) + .limit(usize::try_from(search_strategy.top_k())?) + .execute() + .await? + .try_collect::>() + .await? + .first() + .context("Failed to retrieve documents")? + .to_owned(); + + let documents: Vec = result + .column_by_name("chunk") + .and_then(|raw_array| raw_array.as_any().downcast_ref::()) + .context("Could not cast documents to strings")? + .iter() + .flatten() + .map_into() + .collect(); + + Ok(query.retrieved_documents(documents)) + } +} diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index 02c539fc..ca0604dc 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -1,6 +1,8 @@ use arrow_array::*; use arrow_array::{cast::AsArray, Array, RecordBatch, StringArray}; +use lancedb::index::Index; use lancedb::query::ExecutableQuery; +use swiftide::query::{self, states, Query, TransformationEvent}; use swiftide::{ indexing::{ transformers::{ @@ -12,6 +14,7 @@ use swiftide::{ }; use swiftide_indexing::{loaders, transformers, Pipeline}; use swiftide_integrations::{fastembed::FastEmbed, lancedb::LanceDB}; +use swiftide_query::{answers, query_transformers, response_transformers}; use swiftide_test_utils::{mock_chat_completions, openai_client}; use temp_dir::TempDir; use wiremock::MockServer; @@ -43,20 +46,56 @@ async fn test_lancedb() { Pipeline::from_loader(loaders::FileLoader::new(tempdir.path()).with_extensions(&["rs"])) .then_chunk(ChunkCode::try_for_language("rust").unwrap()) - .then(MetadataQACode::new(openai_client)) - .then_in_batch(20, transformers::Embed::new(fastembed)) + .then(MetadataQACode::new(openai_client.clone())) + .then_in_batch(20, transformers::Embed::new(fastembed.clone())) .log_nodes() .then_store_with(lancedb.clone()) .run() .await .unwrap(); - // Assert that - // * Vector got persisted - // * Metadata field got persisted - // * Indirectly correct table got created + let conn = lancedb.get_connection().await.unwrap(); + let tbl = conn.open_table("swiftide_test").execute().await.unwrap(); + + // tbl.create_index(&["vector_combined"], Index::Auto) + // .execute() + // .await + // .unwrap(); + + let query_pipeline = query::Pipeline::default() + .then_transform_query(query_transformers::GenerateSubquestions::from_client( + openai_client.clone(), + )) + .then_transform_query(query_transformers::Embed::from_client(fastembed.clone())) + .then_retrieve(lancedb.clone()) + .then_transform_response(response_transformers::Summary::from_client( + openai_client.clone(), + )) + .then_answer(answers::Simple::from_client(openai_client.clone())); + + let result: Query = query_pipeline.query("What is swiftide?").await.unwrap(); + + dbg!(&result); + + assert_eq!( + result.answer(), + "\n\nHello there, how may I assist you today?" + ); + let TransformationEvent::Retrieved { documents, .. } = result + .history() + .iter() + .find(|e| matches!(e, TransformationEvent::Retrieved { .. })) + .unwrap() + else { + panic!("No documents found") + }; - // Temporary tests to check before query pipeline + assert_eq!( + documents.first().unwrap(), + "fn main() { println!(\"Hello, World!\"); }" + ); + + // Manually assert everything was stored as expected let conn = lancedb.get_connection().await.unwrap(); let table = conn.open_table("swiftide_test").execute().await.unwrap(); @@ -96,7 +135,7 @@ async fn test_lancedb() { .value(0), "\n\nHello there, how may I assist you today?" ); - + assert_eq!( result .column_by_name("vector_combined") From 18ebbc20ab653afce67b00d84cb36fc0b0a7bb9a Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Fri, 30 Aug 2024 18:49:08 +0200 Subject: [PATCH 10/34] Clean up test --- swiftide/tests/lancedb.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/swiftide/tests/lancedb.rs b/swiftide/tests/lancedb.rs index ca0604dc..84119a0a 100644 --- a/swiftide/tests/lancedb.rs +++ b/swiftide/tests/lancedb.rs @@ -1,6 +1,4 @@ -use arrow_array::*; use arrow_array::{cast::AsArray, Array, RecordBatch, StringArray}; -use lancedb::index::Index; use lancedb::query::ExecutableQuery; use swiftide::query::{self, states, Query, TransformationEvent}; use swiftide::{ @@ -54,14 +52,6 @@ async fn test_lancedb() { .await .unwrap(); - let conn = lancedb.get_connection().await.unwrap(); - let tbl = conn.open_table("swiftide_test").execute().await.unwrap(); - - // tbl.create_index(&["vector_combined"], Index::Auto) - // .execute() - // .await - // .unwrap(); - let query_pipeline = query::Pipeline::default() .then_transform_query(query_transformers::GenerateSubquestions::from_client( openai_client.clone(), From bafaf8fbb269fa36434f085efec14245027e5ee0 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Fri, 30 Aug 2024 21:24:35 +0200 Subject: [PATCH 11/34] Add example --- Cargo.lock | 1 + examples/Cargo.toml | 6 ++++ examples/lancedb.rs | 75 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 examples/lancedb.rs diff --git a/Cargo.lock b/Cargo.lock index e292ace7..809f35c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2597,6 +2597,7 @@ dependencies = [ "serde_json", "spider", "swiftide", + "temp-dir", "tokio", "tracing-subscriber", ] diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 1d9750fe..3e6bbbeb 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -20,12 +20,14 @@ swiftide = { path = "../swiftide/", features = [ "groq", "ollama", "fluvio", + "lancedb", ] } tracing-subscriber = "0.3" serde_json = "1.0" spider = "1.98" qdrant-client = "1.10.3" fluvio = "0.23.1" +temp-dir = "0.1.13" [[example]] doc-scrape-examples = true @@ -85,3 +87,7 @@ path = "hybrid_search.rs" [[example]] name = "fluvio" path = "fluvio.rs" + +[[example]] +name = "lancedb" +path = "lancedb.rs" diff --git a/examples/lancedb.rs b/examples/lancedb.rs new file mode 100644 index 00000000..75a38f4f --- /dev/null +++ b/examples/lancedb.rs @@ -0,0 +1,75 @@ +/** +* This example demonstrates how to use the LanceDB integration with Swiftide +*/ +use swiftide::{ + indexing::{ + self, + loaders::FileLoader, + transformers::{ + metadata_qa_text::NAME as METADATA_QA_TEXT_NAME, ChunkMarkdown, Embed, MetadataQAText, + }, + EmbeddedField, + }, + integrations::{self, lancedb::LanceDB, qdrant::Qdrant}, + query::{self, answers, query_transformers, response_transformers}, +}; +use temp_dir::TempDir; + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt::init(); + + let openai_client = integrations::openai::OpenAI::builder() + .default_embed_model("text-embedding-3-small") + .default_prompt_model("gpt-4o-mini") + .build()?; + + let tempdir = TempDir::new().unwrap(); + + // Configure lancedb with a default vector size, a single embedding + // and in addition to embedding the text metadata, also store it in a field + let lancedb = LanceDB::builder() + .uri(tempdir.child("lancedb").to_str().unwrap()) + .vector_size(1536) + .with_vector(EmbeddedField::Combined) + .with_metadata(METADATA_QA_TEXT_NAME) + .table_name("swiftide_test") + .build() + .unwrap(); + + indexing::Pipeline::from_loader(FileLoader::new("README.md")) + .then_chunk(ChunkMarkdown::from_chunk_range(10..2048)) + .then(MetadataQAText::new(openai_client.clone())) + .then_in_batch(10, Embed::new(openai_client.clone())) + .then_store_with(lancedb.clone()) + .run() + .await?; + + // By default the search strategy is SimilaritySingleEmbedding + // which takes the latest query, embeds it, and does a similarity search + // + // LanceDB will return an error if multiple embeddings are set + // + // The pipeline generates subquestions to increase semantic coverage, embeds these in a single + // embedding, retrieves the default top_k documents, summarizes them and uses that as context + // for the final answer. + let pipeline = query::Pipeline::default() + .then_transform_query(query_transformers::GenerateSubquestions::from_client( + openai_client.clone(), + )) + .then_transform_query(query_transformers::Embed::from_client( + openai_client.clone(), + )) + .then_retrieve(lancedb.clone()) + .then_transform_response(response_transformers::Summary::from_client( + openai_client.clone(), + )) + .then_answer(answers::Simple::from_client(openai_client.clone())); + + let result = pipeline + .query("What is swiftide? Please provide an elaborate explanation") + .await?; + + println!("{:?}", result.answer()); + Ok(()) +} From 096628bf6648d885a29973af1e48636c55954221 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 12:13:48 +0200 Subject: [PATCH 12/34] Clippy --- examples/lancedb.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lancedb.rs b/examples/lancedb.rs index 75a38f4f..026a7404 100644 --- a/examples/lancedb.rs +++ b/examples/lancedb.rs @@ -10,7 +10,7 @@ use swiftide::{ }, EmbeddedField, }, - integrations::{self, lancedb::LanceDB, qdrant::Qdrant}, + integrations::{self, lancedb::LanceDB}, query::{self, answers, query_transformers, response_transformers}, }; use temp_dir::TempDir; From 193fb78ef8bdb2034cb56272c22c0a8bf44dcf6e Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 13:11:56 +0200 Subject: [PATCH 13/34] Add CC0-1.0 to allowed licenses --- deny.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/deny.toml b/deny.toml index d7567e78..335dc7e0 100644 --- a/deny.toml +++ b/deny.toml @@ -14,6 +14,7 @@ allow = [ "MPL-2.0", "Apache-2.0 WITH LLVM-exception", "Unlicense", + "CC0-1.0", ] exceptions = [{ allow = ["OpenSSL"], crate = "ring" }] From 694bc856ac65e82715d8eddccc570c29dfd46e46 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 13:22:00 +0200 Subject: [PATCH 14/34] Add rust cache to all ci actions --- .github/workflows/coverage.yml | 2 ++ .github/workflows/test.yml | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index a813a97a..8982d797 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -27,6 +27,8 @@ jobs: components: llvm-tools - name: Install Protoc uses: arduino/setup-protoc@v3 + - name: Cache Cargo dependencies + uses: Swatinem/rust-cache@v2 - name: Install cargo-llvm-cov uses: taiki-e/install-action@v2 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index da83127d..82d0a1a6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,6 +30,8 @@ jobs: components: rustfmt - name: Install Protoc uses: arduino/setup-protoc@v3 + - name: Cache Cargo dependencies + uses: Swatinem/rust-cache@v2 - uses: r7kamura/rust-problem-matchers@v1 - name: Check typos uses: crate-ci/typos@master @@ -49,6 +51,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable - name: Install Protoc uses: arduino/setup-protoc@v3 + - name: Cache Cargo dependencies + uses: Swatinem/rust-cache@v2 - uses: r7kamura/rust-problem-matchers@v1 - name: "Test" run: cargo test --verbose --workspace --all-features From 797469b62a84dd72313c160fd35f0eba4c1a3cc0 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 14:49:58 +0200 Subject: [PATCH 15/34] Add other two licenses --- deny.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deny.toml b/deny.toml index 335dc7e0..d414541a 100644 --- a/deny.toml +++ b/deny.toml @@ -15,6 +15,8 @@ allow = [ "Apache-2.0 WITH LLVM-exception", "Unlicense", "CC0-1.0", + "zlib-acknowledgement", + "0BSD", ] exceptions = [{ allow = ["OpenSSL"], crate = "ring" }] From 95455b107539c0fe82e916de68b0c4b296ef1312 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 15:16:17 +0200 Subject: [PATCH 16/34] Cargo update --- Cargo.lock | 233 ++++++++++++++++++++++++++++------------------------- 1 file changed, 123 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e0aaa9b5..6d575bf1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -169,7 +169,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -323,7 +323,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.4.0", + "indexmap 2.5.0", "lexical-core", "num", "serde", @@ -615,7 +615,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -682,7 +682,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -699,7 +699,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -734,6 +734,19 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "auto_encoder" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cfd944c56af79853532412fe19a63faa0cca5e0872ce3de7970dd28fd63dcc1" +dependencies = [ + "chardetng", + "encoding_rs", + "percent-encoding", + "phf 0.11.2", + "phf_codegen 0.11.2", +] + [[package]] name = "auto_enums" version = "0.8.6" @@ -743,7 +756,7 @@ dependencies = [ "derive_utils", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -807,9 +820,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16838e6c9e12125face1c1eff1343c75e3ff540de98ff7ebd61874a89bcfeb9" +checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -819,14 +832,15 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f42c2d4218de4dcd890a109461e2f799a1a2ba3bcd2cde9af88360f5df9266c6" +checksum = "2424565416eef55906f9f8cece2072b6b6a76075e3ff81483ebe938a89a4c05f" dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", "aws-smithy-http", + "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", @@ -843,9 +857,9 @@ dependencies = [ [[package]] name = "aws-sdk-bedrockruntime" -version = "1.46.0" +version = "1.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b82c4d7551f64fa61954705cd7b29e0e56cf9f2635528213bd3a5c5396bdb1f3" +checksum = "f57609ec26f8f5c18623ab1110659421e5d2aa21e037fa160a8a3b69d27a488e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -866,9 +880,9 @@ dependencies = [ [[package]] name = "aws-sdk-dynamodb" -version = "1.42.0" +version = "1.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aadafd673822026e7ae6be7900c7886f609514b620874c9e3054f4ae38ab82f" +checksum = "6befba9ce7b81b58b1249c854da754e2236dbee548a736b96230216ebf9bcadc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -889,9 +903,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.39.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11822090cf501c316c6f75711d77b96fba30658e3867a7762e5e2f5d32d31e81" +checksum = "e5879bec6e74b648ce12f6085e7245417bc5f6d672781028384d2e494be3eb6d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -911,9 +925,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78a2a06ff89176123945d1bbe865603c4d7101bea216a550bb4d2e4e9ba74d74" +checksum = "4ef4cd9362f638c22a3b959fd8df292e7e47fdf170270f86246b97109b5f2f7d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -933,9 +947,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.39.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20a91795850826a6f456f4a48eff1dfa59a0e69bdbf5b8c50518fd372106574" +checksum = "0b1e2735d2ab28b35ecbb5496c9d41857f52a0d6a0075bbf6a8af306045ea6f6" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1947,7 +1961,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -1992,7 +2006,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2003,7 +2017,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2056,7 +2070,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.5", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "num_cpus", @@ -2216,7 +2230,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.5", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "paste", @@ -2245,7 +2259,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "hex", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "paste", @@ -2291,7 +2305,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "once_cell", @@ -2382,33 +2396,33 @@ dependencies = [ [[package]] name = "derive_builder" -version = "0.20.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" +checksum = "cd33f37ee6a119146a1781d3356a7c26028f83d779b2e04ecd45fdc75c76877b" dependencies = [ "derive_builder_macro", ] [[package]] name = "derive_builder_core" -version = "0.20.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" +checksum = "7431fa049613920234f22c47fdc33e6cf3ee83067091ea4277a3f8c4587aae38" dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "derive_builder_macro" -version = "0.20.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" +checksum = "4abae7035bf79b9877b779505d8cf3749285b80c43941eda66604841889451dc" dependencies = [ "derive_builder_core", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2419,7 +2433,7 @@ checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2430,7 +2444,7 @@ checksum = "65f152f4b8559c4da5d574bafc7af85454d706b4c5fe8b530d508cacbb6807ea" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2518,7 +2532,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2557,7 +2571,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2577,7 +2591,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -3034,7 +3048,7 @@ checksum = "c36cb221d72fb3b4bdc9b53d448859a979b5f87660d41f7155994bb2f33d0c70" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -3315,7 +3329,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -3456,7 +3470,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.4.0", + "indexmap 2.5.0", "slab", "tokio", "tokio-util", @@ -3475,7 +3489,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.4.0", + "indexmap 2.5.0", "slab", "tokio", "tokio-util", @@ -3651,7 +3665,7 @@ dependencies = [ "markup5ever", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -4064,9 +4078,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -4125,7 +4139,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5119,7 +5133,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5165,7 +5179,7 @@ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5303,7 +5317,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5365,9 +5379,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.36.3" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "memchr", ] @@ -5480,7 +5494,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5639,7 +5653,7 @@ dependencies = [ "regex", "regex-syntax 0.8.4", "structmeta", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5706,7 +5720,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5727,7 +5741,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.4.0", + "indexmap 2.5.0", ] [[package]] @@ -5809,7 +5823,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -5847,7 +5861,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -6020,7 +6034,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -6048,7 +6062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" dependencies = [ "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -6063,12 +6077,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" +checksum = "3b2ecbe40f08db5c006b5764a2645f7f3f141ce756412ac9e1dd6087e6d32995" dependencies = [ "bytes", - "prost-derive 0.13.1", + "prost-derive 0.13.2", ] [[package]] @@ -6088,7 +6102,7 @@ dependencies = [ "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.76", + "syn 2.0.77", "tempfile", ] @@ -6102,20 +6116,20 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "prost-derive" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" +checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -6129,11 +6143,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" +checksum = "60caa6738c7369b940c3d49246a8d1749323674c65cb13010134f5c9bad5b519" dependencies = [ - "prost 0.13.1", + "prost 0.13.2", ] [[package]] @@ -6176,15 +6190,15 @@ dependencies = [ [[package]] name = "qdrant-client" -version = "1.11.1" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26955138c3afae5f9d36decd09f1e4e02e266132359b8631093e9cddb1662600" +checksum = "0ad523a9b4633360e81fbb9affb54ee42ca09a873130e173d90c1cf5dc2e158a" dependencies = [ "anyhow", "derive_builder", "futures-util", - "prost 0.13.1", - "prost-types 0.13.1", + "prost 0.13.2", + "prost-types 0.13.2", "reqwest", "serde", "serde_json", @@ -6637,9 +6651,9 @@ dependencies = [ [[package]] name = "rgb" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f86ae463694029097b846d8f99fd5536740602ae00022c0c50c5600720b2f71" +checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" dependencies = [ "bytemuck", ] @@ -6984,7 +6998,7 @@ checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7007,7 +7021,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7041,7 +7055,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.4.0", + "indexmap 2.5.0", "serde", "serde_derive", "serde_json", @@ -7058,7 +7072,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7067,7 +7081,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.4.0", + "indexmap 2.5.0", "itoa", "ryu", "serde", @@ -7260,17 +7274,16 @@ dependencies = [ [[package]] name = "spider" -version = "2.2.4" +version = "2.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f00760dffa7861915e6363263a3d6c7543c4c1237feae0205fb40a5b8d208b" +checksum = "5269634df56f83019cfa5713877b18c95f567608edc5d59d56e54b63d916c2d1" dependencies = [ "ahash", + "auto_encoder", "bytes", "case_insensitive_string", - "chardetng", "cssparser", "ego-tree", - "encoding_rs", "fast_html5ever", "hashbrown 0.14.5", "lazy_static", @@ -7331,7 +7344,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7405,7 +7418,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7416,7 +7429,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7438,7 +7451,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7582,7 +7595,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7645,9 +7658,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.76" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -7943,7 +7956,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7954,7 +7967,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", "test-case-core", ] @@ -7977,7 +7990,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -8044,7 +8057,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -8166,9 +8179,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.39.3" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", @@ -8190,7 +8203,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -8277,7 +8290,7 @@ version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ - "indexmap 2.4.0", + "indexmap 2.5.0", "serde", "serde_spanned", "toml_datetime", @@ -8299,7 +8312,7 @@ version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ - "indexmap 2.4.0", + "indexmap 2.5.0", "serde", "serde_spanned", "toml_datetime", @@ -8327,7 +8340,7 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost 0.13.1", + "prost 0.13.2", "rustls-native-certs 0.7.3", "rustls-pemfile 2.1.3", "socket2 0.5.7", @@ -8392,7 +8405,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -8827,7 +8840,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", "wasm-bindgen-shared", ] @@ -8861,7 +8874,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -8897,9 +8910,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.3" +version = "0.26.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +checksum = "0bd24728e5af82c6c4ec1b66ac4844bdf8156257fccda846ec58b42cd0cdbe6a" dependencies = [ "rustls-pki-types", ] @@ -9251,7 +9264,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] From 4f68b575a73ca3c9aac8983253f857e3beb93fd5 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 17:49:05 +0200 Subject: [PATCH 17/34] Clean up deps --- Cargo.lock | 162 +++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 42 ++++++------ examples/Cargo.toml | 10 +-- 3 files changed, 188 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d575bf1..125e263a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -830,6 +830,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-lc-rs" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae74d9bd0a7530e8afd1770739ad34b36838829d6ad61818f9230f683f5ad77" +dependencies = [ + "aws-lc-sys", + "mirai-annotations", + "paste", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f0e249228c6ad2d240c2dc94b714d711629d52bad946075d8e9b2f5391f0703" +dependencies = [ + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", + "libc", + "paste", +] + [[package]] name = "aws-runtime" version = "1.4.2" @@ -1263,6 +1290,29 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.6.0", + "cexpr", + "clang-sys", + "itertools 0.10.5", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn 2.0.77", + "which", +] + [[package]] name = "bit_field" version = "0.10.2" @@ -1548,6 +1598,15 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-expr" version = "0.15.8" @@ -1645,6 +1704,17 @@ dependencies = [ "half", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.16" @@ -1670,6 +1740,15 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "cmake" +version = "0.1.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +dependencies = [ + "cc", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -2523,6 +2602,12 @@ dependencies = [ "dtoa", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "educe" version = "0.6.0" @@ -2964,10 +3049,12 @@ dependencies = [ "cfg-if", "fluvio-wasm-timer", "futures-lite 2.3.0", + "futures-rustls", "futures-util", "openssl", "openssl-sys", "pin-project", + "rustls-pemfile 2.1.3", "socket2 0.5.7", "thiserror", "tracing", @@ -3220,6 +3307,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "fsst" version = "0.15.1" @@ -3332,6 +3425,17 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "futures-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f2f12607f92c69b12ed746fabf9ca4f5c482cba46679c1a75b874ed7c26adb" +dependencies = [ + "futures-io", + "rustls 0.23.12", + "rustls-pki-types", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -4704,6 +4808,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "lebe" version = "0.5.2" @@ -4826,6 +4936,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "libloading" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +dependencies = [ + "cfg-if", + "windows-targets 0.48.5", +] + [[package]] name = "libm" version = "0.2.8" @@ -5101,6 +5221,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mirai-annotations" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" + [[package]] name = "mock_instant" version = "0.3.2" @@ -6196,10 +6322,8 @@ checksum = "0ad523a9b4633360e81fbb9affb54ee42ca09a873130e173d90c1cf5dc2e158a" dependencies = [ "anyhow", "derive_builder", - "futures-util", "prost 0.13.2", "prost-types 0.13.2", - "reqwest", "serde", "serde_json", "thiserror", @@ -6779,6 +6903,7 @@ version = "0.23.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" dependencies = [ + "aws-lc-rs", "log", "once_cell", "ring", @@ -6854,6 +6979,7 @@ version = "0.102.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -7132,6 +7258,12 @@ dependencies = [ "dirs", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -8923,6 +9055,18 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.35", +] + [[package]] name = "widestring" version = "1.1.0" @@ -9272,6 +9416,20 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] [[package]] name = "zstd" diff --git a/Cargo.toml b/Cargo.toml index a6f34401..379b25f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,28 +28,28 @@ insta.opt-level = 3 similar.opt-level = 3 [workspace.dependencies] -anyhow = { version = "1.0" } -async-trait = "0.1" -derive_builder = "0.20" -futures-util = "0.3" -tokio = { version = "1.38", features = ["full"] } -tokio-stream = "0.1" -tracing = { version = "0.1", features = ["log"] } -num_cpus = "1.16" -pin-project-lite = "0.2" -itertools = { version = "0.13" } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -strum = "0.26" -strum_macros = "0.26" -lazy_static = { version = "1.5.0" } -chrono = { version = "0.4" } -indoc = "2.0" -regex = "1.10.5" +anyhow = { version = "1.0", default-features = false } +async-trait = { version = "0.1", default-features = false } +derive_builder = { version = "0.20", default-features = false } +futures-util = { version = "0.3", default-features = false } +tokio = { version = "1.38", default-features = false } +tokio-stream = { version = "0.1", default-features = false } +tracing = { version = "0.1", features = ["log"], default-features = false } +num_cpus = { version = "1.16", default-features = false } +pin-project-lite = { version = "0.2", default-features = false } +itertools = { version = "0.13", default-features = false } +serde = { version = "1.0", features = ["derive"], default-features = false } +serde_json = { version = "1.0", default-features = false } +strum = { version = "0.26", default-features = false } +strum_macros = { version = "0.26", default-features = false } +lazy_static = { version = "1.5.0", default-features = false } +chrono = { version = "0.4", default-features = false } +indoc = { version = "2.0", default-features = false } +regex = { version = "1.10.6", default-features = false } # Integrations spider = { version = "2.2" } -async-openai = { version = "0.24" } +async-openai = { version = "0.24", default-features = false } qdrant-client = { version = "1.10", default-features = false, features = [ "serde", ] } @@ -62,6 +62,10 @@ temp-dir = "0.1.13" wiremock = "0.6.0" test-case = "3.3.1" insta = { version = "1.39.0", features = ["yaml"] } +fluvio = { version = "0.23", default-features = false, features = [ + "compress", + "rustls", +] } [workspace.lints.rust] unsafe_code = "forbid" diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 42804acd..c0addb06 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true homepage.workspace = true [dependencies] -tokio = { version = "1.0", features = ["full"] } +tokio = { workspace = true, features = ["full"] } swiftide = { path = "../swiftide/", features = [ "all", "scraping", @@ -23,11 +23,11 @@ swiftide = { path = "../swiftide/", features = [ "lancedb", ] } tracing-subscriber = "0.3" -serde_json = "1.0" +serde_json = { workspace = true } spider = { workspace = true } -qdrant-client = "1.10.3" -fluvio = "0.23.1" -temp-dir = "0.1.13" +qdrant-client = { workspace = true } +fluvio = { workspace = true } +temp-dir = { workspace = true } [[example]] doc-scrape-examples = true From a1f3b95ed8473e1da36f756bda53b1c043ee9269 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 17:49:29 +0200 Subject: [PATCH 18/34] And then update --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 125e263a..41593329 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1299,7 +1299,7 @@ dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -4943,7 +4943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] From 0adb25e60f5807f4e3053aa42317f7940e2e10b6 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 18:05:11 +0200 Subject: [PATCH 19/34] Further clean up deps --- Cargo.lock | 182 ------------------------------- Cargo.toml | 7 +- benchmarks/Cargo.toml | 10 +- swiftide-integrations/Cargo.toml | 11 +- swiftide/Cargo.toml | 6 +- 5 files changed, 13 insertions(+), 203 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41593329..614a7c49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -830,33 +830,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-lc-rs" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae74d9bd0a7530e8afd1770739ad34b36838829d6ad61818f9230f683f5ad77" -dependencies = [ - "aws-lc-sys", - "mirai-annotations", - "paste", - "zeroize", -] - -[[package]] -name = "aws-lc-sys" -version = "0.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f0e249228c6ad2d240c2dc94b714d711629d52bad946075d8e9b2f5391f0703" -dependencies = [ - "bindgen", - "cc", - "cmake", - "dunce", - "fs_extra", - "libc", - "paste", -] - [[package]] name = "aws-runtime" version = "1.4.2" @@ -1290,29 +1263,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags 2.6.0", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.77", - "which", -] - [[package]] name = "bit_field" version = "0.10.2" @@ -1598,15 +1548,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-expr" version = "0.15.8" @@ -1704,17 +1645,6 @@ dependencies = [ "half", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" version = "4.5.16" @@ -1740,15 +1670,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" -[[package]] -name = "cmake" -version = "0.1.51" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" -dependencies = [ - "cc", -] - [[package]] name = "color_quant" version = "1.1.0" @@ -2602,12 +2523,6 @@ dependencies = [ "dtoa", ] -[[package]] -name = "dunce" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" - [[package]] name = "educe" version = "0.6.0" @@ -2999,12 +2914,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94399c0eb4224a511515fdf134f5ccd05b3c7ea1de566b9ab3cac63e6a4bc8f2" dependencies = [ "bytes", - "flate2", - "lz4_flex", "serde", - "snap", "thiserror", - "zstd", ] [[package]] @@ -3049,12 +2960,8 @@ dependencies = [ "cfg-if", "fluvio-wasm-timer", "futures-lite 2.3.0", - "futures-rustls", "futures-util", - "openssl", - "openssl-sys", "pin-project", - "rustls-pemfile 2.1.3", "socket2 0.5.7", "thiserror", "tracing", @@ -3307,12 +3214,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "fsst" version = "0.15.1" @@ -3425,17 +3326,6 @@ dependencies = [ "syn 2.0.77", ] -[[package]] -name = "futures-rustls" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f2f12607f92c69b12ed746fabf9ca4f5c482cba46679c1a75b874ed7c26adb" -dependencies = [ - "futures-io", - "rustls 0.23.12", - "rustls-pki-types", -] - [[package]] name = "futures-sink" version = "0.3.30" @@ -4808,12 +4698,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "lebe" version = "0.5.2" @@ -4936,16 +4820,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "libloading" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" -dependencies = [ - "cfg-if", - "windows-targets 0.52.6", -] - [[package]] name = "libm" version = "0.2.8" @@ -5221,12 +5095,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "mirai-annotations" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" - [[package]] name = "mock_instant" version = "0.3.2" @@ -5629,15 +5497,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "openssl-src" -version = "300.3.1+3.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91" -dependencies = [ - "cc", -] - [[package]] name = "openssl-sys" version = "0.9.103" @@ -5646,7 +5505,6 @@ checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" dependencies = [ "cc", "libc", - "openssl-src", "pkg-config", "vcpkg", ] @@ -6903,7 +6761,6 @@ version = "0.23.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" dependencies = [ - "aws-lc-rs", "log", "once_cell", "ring", @@ -6979,7 +6836,6 @@ version = "0.102.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" dependencies = [ - "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -7258,12 +7114,6 @@ dependencies = [ "dirs", ] -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -7367,12 +7217,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - [[package]] name = "socket2" version = "0.4.10" @@ -9055,18 +8899,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.35", -] - [[package]] name = "widestring" version = "1.1.0" @@ -9416,20 +9248,6 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" -dependencies = [ - "zeroize_derive", -] - -[[package]] -name = "zeroize_derive" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.77", -] [[package]] name = "zstd" diff --git a/Cargo.toml b/Cargo.toml index 379b25f3..af2e303b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,9 @@ async-openai = { version = "0.24", default-features = false } qdrant-client = { version = "1.10", default-features = false, features = [ "serde", ] } +fluvio = { version = "0.23", default-features = false } +lancedb = { version = "0.9" } +arrow-array = { version = "52.2" } # Testing test-log = "0.2.16" @@ -62,10 +65,6 @@ temp-dir = "0.1.13" wiremock = "0.6.0" test-case = "3.3.1" insta = { version = "1.39.0", features = ["yaml"] } -fluvio = { version = "0.23", default-features = false, features = [ - "compress", - "rustls", -] } [workspace.lints.rust] unsafe_code = "forbid" diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 00fce76f..8f229d56 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -12,13 +12,13 @@ repository.workspace = true homepage.workspace = true [dev-dependencies] -tokio = { version = "1.0", features = ["full"] } -swiftide = { path = "../swiftide/", features = ["all"] } +tokio = { workspace = true, features = ["full"] } +swiftide = { path = "../swiftide", features = ["all"] } tracing-subscriber = "0.3" -serde_json = "1.0" +serde_json = { workspace = true } criterion = { version = "0.5.1", features = ["html_reports", "async_tokio"] } -anyhow = "1.0" -futures-util = "0.3" +anyhow = { workspace = true } +futures-util = { workspace = true } [[bench]] name = "fileloader" diff --git a/swiftide-integrations/Cargo.toml b/swiftide-integrations/Cargo.toml index 029d9ed6..be975559 100644 --- a/swiftide-integrations/Cargo.toml +++ b/swiftide-integrations/Cargo.toml @@ -61,18 +61,13 @@ aws-sdk-bedrockruntime = { version = "1.37", features = [ secrecy = { version = "0.8.0", optional = true } reqwest = { version = "0.12.5", optional = true, default-features = false } ollama-rs = { version = "0.2.0", optional = true } -lancedb = { version = "0.9.0", optional = true } deadpool = { version = "0.12", optional = true, features = [ "managed", "rt_tokio_1", ] } -arrow-array = { version = "52.2", optional = true } -# Unfortunately their rustls version fails to compile -# fluvio = { version = "0.23", default-features = false, features = [ -# "compress", -# "rustls", -# ], optional = true } -fluvio = { version = "0.23", optional = true } +fluvio = { workspace = true, optional = true } +arrow-array = { workspace = true, optional = true } +lancedb = { workspace = true, optional = true } [dev-dependencies] swiftide-core = { path = "../swiftide-core", features = ["test-utils"] } diff --git a/swiftide/Cargo.toml b/swiftide/Cargo.toml index b077ca02..eca93a8e 100644 --- a/swiftide/Cargo.toml +++ b/swiftide/Cargo.toml @@ -81,10 +81,8 @@ insta = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } - -# TODO: Remove me when lancedb tests use query pipeline -lancedb = { version = "0.9" } -arrow-array = { version = "52.2" } +lancedb = { workspace = true } +arrow-array = { workspace = true } [lints] workspace = true From 3ca3e0f3bc00e91f9ac3d4f753e0f913e4c13e7c Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 18:53:46 +0200 Subject: [PATCH 20/34] Only run test targets --- .github/workflows/coverage.yml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 8982d797..7a5dcb53 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -35,7 +35,7 @@ jobs: tool: cargo-llvm-cov - name: Generate code coverage run: | - cargo llvm-cov --lcov --output-path target/lcov.info --all-features + cargo llvm-cov --lcov --output-path target/lcov.info --all-features --tests - name: Coveralls uses: coverallsapp/github-action@v2 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 82d0a1a6..cd38aa7f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -55,7 +55,7 @@ jobs: uses: Swatinem/rust-cache@v2 - uses: r7kamura/rust-problem-matchers@v1 - name: "Test" - run: cargo test --verbose --workspace --all-features + run: cargo test --all-features --tests clippy: name: Clippy From 96c86f76010c32fa2147d201901b6a8b032b2e5f Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 21:30:52 +0200 Subject: [PATCH 21/34] Try this --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 7a5dcb53..f34473fc 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -35,7 +35,7 @@ jobs: tool: cargo-llvm-cov - name: Generate code coverage run: | - cargo llvm-cov --lcov --output-path target/lcov.info --all-features --tests + cargo llvm-cov --lcov --output-path target/lcov.info --all-features --tests --frozen --locked --workspace - name: Coveralls uses: coverallsapp/github-action@v2 From 9278d3ad364e343a03587bfa554f38ee4c786aeb Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 21:36:48 +0200 Subject: [PATCH 22/34] No frozen --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index f34473fc..48def7fd 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -35,7 +35,7 @@ jobs: tool: cargo-llvm-cov - name: Generate code coverage run: | - cargo llvm-cov --lcov --output-path target/lcov.info --all-features --tests --frozen --locked --workspace + cargo llvm-cov --lcov --output-path target/lcov.info --all-features --tests --locked --workspace - name: Coveralls uses: coverallsapp/github-action@v2 From d5acd79b00be6c390a2aa9522889be480ce24df2 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sat, 31 Aug 2024 22:52:03 +0200 Subject: [PATCH 23/34] More cleanup --- .github/workflows/coverage.yml | 2 +- benchmarks/Cargo.toml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 48def7fd..bb2cd146 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -35,7 +35,7 @@ jobs: tool: cargo-llvm-cov - name: Generate code coverage run: | - cargo llvm-cov --lcov --output-path target/lcov.info --all-features --tests --locked --workspace + cargo llvm-cov --lcov --output-path target/lcov.info --all-features --workspace - name: Coveralls uses: coverallsapp/github-action@v2 diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 8f229d56..f316da07 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -16,7 +16,10 @@ tokio = { workspace = true, features = ["full"] } swiftide = { path = "../swiftide", features = ["all"] } tracing-subscriber = "0.3" serde_json = { workspace = true } -criterion = { version = "0.5.1", features = ["html_reports", "async_tokio"] } +criterion = { version = "0.5.1", features = [ + "html_reports", + "async_tokio", +], default_features = false } anyhow = { workspace = true } futures-util = { workspace = true } From 63680cd111d8316278ce03b22d685265d5aa3724 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 10:36:46 +0200 Subject: [PATCH 24/34] Try with tarpaulin nightly --- .github/workflows/coverage.yml | 9 +++------ .github/workflows/test.yml | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index bb2cd146..93399ccf 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -7,16 +7,13 @@ on: - master concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-coverage cancel-in-progress: true jobs: test: name: coverage runs-on: ubuntu-latest - env: - RUST_LOG: swiftide=debug - RUST_BACKTRACE: 1 steps: - name: Checkout repository uses: actions/checkout@v4 @@ -32,10 +29,10 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@v2 with: - tool: cargo-llvm-cov + tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo llvm-cov --lcov --output-path target/lcov.info --all-features --workspace + cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 240 --coveralls - name: Coveralls uses: coverallsapp/github-action@v2 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cd38aa7f..08cdfd06 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,7 +8,7 @@ on: - master concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-test cancel-in-progress: true env: From 96d5fb335073d5935ccb9366401810465cf39a9a Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 10:40:29 +0200 Subject: [PATCH 25/34] Nightly with llvm preview --- .github/workflows/coverage.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 93399ccf..87405164 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -19,9 +19,9 @@ jobs: uses: actions/checkout@v4 - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable + - uses: dtolnay/rust-toolchain@nightly with: - components: llvm-tools + components: llvm-tools-preview - name: Install Protoc uses: arduino/setup-protoc@v3 - name: Cache Cargo dependencies From bb7943d2bf95a84a4c4ce715c4d8a1d82d69fccb Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 11:59:16 +0200 Subject: [PATCH 26/34] Add coveralls repo token to tarpaulin --- .github/workflows/coverage.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 87405164..a1210b2b 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -32,7 +32,7 @@ jobs: tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 240 --coveralls + cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} - - name: Coveralls - uses: coverallsapp/github-action@v2 + # - name: Coveralls + # uses: coverallsapp/github-action@v2 From 6bfa61ad09691e1a92e42e4af4addc1b12f7c25c Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 12:20:31 +0200 Subject: [PATCH 27/34] Focus on tests and use llvm --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index a1210b2b..7bf027a6 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -32,7 +32,7 @@ jobs: tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} + cargo +nightly tarpaulin --verbose --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} --engine llvm # - name: Coveralls # uses: coverallsapp/github-action@v2 From 370b51c851ab83fc546879c8bf8f6ab623193be6 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 13:03:18 +0200 Subject: [PATCH 28/34] Try stable instead of nightly --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 7bf027a6..d6e99442 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -32,7 +32,7 @@ jobs: tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo +nightly tarpaulin --verbose --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} --engine llvm + cargo tarpaulin --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} --engine llvm # - name: Coveralls # uses: coverallsapp/github-action@v2 From 7adbaf2d7a8e333aa4c0f3e03e328cf4d503286a Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 13:51:23 +0200 Subject: [PATCH 29/34] Disable llvm --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index d6e99442..db7dcdde 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -32,7 +32,7 @@ jobs: tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo tarpaulin --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} --engine llvm + cargo tarpaulin --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} # - name: Coveralls # uses: coverallsapp/github-action@v2 From b08f0d2b22637bbd41f4b6a84d2deb3f33e4a9f7 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 14:13:15 +0200 Subject: [PATCH 30/34] Enable verbose --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index db7dcdde..7477c8ee 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -32,7 +32,7 @@ jobs: tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo tarpaulin --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} + cargo tarpaulin --verbose --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} # - name: Coveralls # uses: coverallsapp/github-action@v2 From 8ac06055cd07e42310d2f97ead8fcd609a2905b1 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 14:59:59 +0200 Subject: [PATCH 31/34] Increase timeout --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 7477c8ee..ff8aaf1f 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -32,7 +32,7 @@ jobs: tool: cargo-tarpaulin - name: Generate code coverage run: | - cargo tarpaulin --verbose --all-features --tests --timeout 240 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} + cargo tarpaulin --verbose --all-features --tests --timeout 1200 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} # - name: Coveralls # uses: coverallsapp/github-action@v2 From 8d0673cf3a37042b6388865f37ea796e93797fbd Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 15:03:04 +0200 Subject: [PATCH 32/34] Stable tool chain --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index ff8aaf1f..eb9dcf23 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -19,7 +19,7 @@ jobs: uses: actions/checkout@v4 - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@nightly + - uses: dtolnay/rust-toolchain@stable with: components: llvm-tools-preview - name: Install Protoc From 9a9916b6661f550b78790230d7857623589d9c92 Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 15:05:01 +0200 Subject: [PATCH 33/34] Update crates --- Cargo.lock | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 614a7c49..0e4a6b79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1861,8 +1861,6 @@ dependencies = [ "num-traits", "once_cell", "oorandom", - "plotters", - "rayon", "regex", "serde", "serde_derive", @@ -5877,34 +5875,6 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" -[[package]] -name = "plotters" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" - -[[package]] -name = "plotters-svg" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" -dependencies = [ - "plotters-backend", -] - [[package]] name = "png" version = "0.17.13" @@ -7250,9 +7220,9 @@ dependencies = [ [[package]] name = "spider" -version = "2.2.7" +version = "2.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5269634df56f83019cfa5713877b18c95f567608edc5d59d56e54b63d916c2d1" +checksum = "c87ce3b37cd12435be0f504f6fcafed8d77fd315941c345ef8974b48e3759320" dependencies = [ "ahash", "auto_encoder", From 9d7932e9741a4409aaea8a7c841cef1a73084b5d Mon Sep 17 00:00:00 2001 From: Timon Vonk Date: Sun, 1 Sep 2024 15:57:57 +0200 Subject: [PATCH 34/34] Disable coverage for now --- .github/workflows/coverage.yml | 76 +++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index eb9dcf23..e8b4e000 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -1,38 +1,38 @@ -name: Coverage - -on: - pull_request: - push: - branches: - - master - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-coverage - cancel-in-progress: true - -jobs: - test: - name: coverage - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - with: - components: llvm-tools-preview - - name: Install Protoc - uses: arduino/setup-protoc@v3 - - name: Cache Cargo dependencies - uses: Swatinem/rust-cache@v2 - - name: Install cargo-llvm-cov - uses: taiki-e/install-action@v2 - with: - tool: cargo-tarpaulin - - name: Generate code coverage - run: | - cargo tarpaulin --verbose --all-features --tests --timeout 1200 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} - - # - name: Coveralls - # uses: coverallsapp/github-action@v2 +# name: Coverage +# +# on: +# pull_request: +# push: +# branches: +# - master +# +# concurrency: +# group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-coverage +# cancel-in-progress: true +# +# jobs: +# test: +# name: coverage +# runs-on: ubuntu-latest +# steps: +# - name: Checkout repository +# uses: actions/checkout@v4 +# +# - uses: actions/checkout@v4 +# - uses: dtolnay/rust-toolchain@stable +# with: +# components: llvm-tools-preview +# - name: Install Protoc +# uses: arduino/setup-protoc@v3 +# - name: Cache Cargo dependencies +# uses: Swatinem/rust-cache@v2 +# - name: Install cargo-llvm-cov +# uses: taiki-e/install-action@v2 +# with: +# tool: cargo-tarpaulin +# - name: Generate code coverage +# run: | +# cargo tarpaulin --verbose --all-features --tests --timeout 1200 --coveralls ${{ secrets.COVERALLS_REPO_TOKEN }} +# +# # - name: Coveralls +# # uses: coverallsapp/github-action@v2