From 32b9975b25fc31154fd65165cc882334bacd8880 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Wed, 18 Oct 2023 11:01:02 +0200 Subject: [PATCH 01/22] wip --- Cargo.lock | 3124 ++++++++++++++++++++++++++++---- crates/llm-ls/Cargo.toml | 3 + crates/llm-ls/src/main.rs | 1 + crates/llm-ls/src/retrieval.rs | 62 + 4 files changed, 2837 insertions(+), 353 deletions(-) create mode 100644 crates/llm-ls/src/retrieval.rs diff --git a/Cargo.lock b/Cargo.lock index c7185c2..fdeec02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,20 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.2" @@ -37,6 +51,42 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.4" @@ -72,7 +122,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" dependencies = [ - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -82,43 +132,632 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "arrow" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" +dependencies = [ + "ahash", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", +] + +[[package]] +name = "arrow-csv" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43ef855dc6b126dc197f43e061d4de46b9d4c033aa51c2587657f7508242cef1" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "zstd 0.12.4", +] + +[[package]] +name = "arrow-json" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03d7e3b04dd688ccec354fe449aed56b831679f03e44ee2c1cfc4045067b69c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown", +] + +[[package]] +name = "arrow-schema" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" + +[[package]] +name = "arrow-select" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cebbb282d6b9244895f4a9a912e55e57bce112554c7fa91fcec5459cb421ab" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num", + "regex", + "regex-syntax 0.7.5", +] + +[[package]] +name = "async-recursion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "async-trait" +version = "0.1.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "async_cell" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "834eee9ce518130a3b4d5af09ecc43e9d6b57ee76613f227a1ddd6b77c7a62bc" + +[[package]] +name = "auto_impl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fee3da8ef1276b0bee5dd1c7258010d8fffd31801447323115a25560e1327b89" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "aws-config" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6b3804dca60326e07205179847f17a4fce45af3a1106939177ad41ac08a6de" +dependencies = [ + "aws-credential-types", + "aws-http", + "aws-sdk-sso", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-json", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http", + "hyper", + "ring 0.16.20", + "time", + "tokio", + "tower", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a66ac8ef5fa9cf01c2d999f39d16812e90ec1467bd382cbbb74ba23ea86201" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "fastrand", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-http" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e626370f9ba806ae4c439e49675fd871f5767b093075cdf4fef16cac42ba900" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-types", + "aws-types", + "bytes", + "http", + "http-body", + "lazy_static", + "percent-encoding", + "pin-project-lite", + "tracing", +] + +[[package]] +name = "aws-runtime" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ac5cf0ff19c1bca0cea7932e11b239d1025a45696a4f44f72ea86e2b8bdd07" +dependencies = [ + "aws-credential-types", + "aws-http", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "fastrand", + "http", + "percent-encoding", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-dynamodb" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10aef6843bfc2dabfccad27f7e1ab303942bbda19f7ea7777d0d74388d073db4" +dependencies = [ + "aws-credential-types", + "aws-http", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http", + "regex", + "tokio-stream", + "tracing", +] + +[[package]] +name = "aws-sdk-sso" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "903f888ff190e64f6f5c83fb0f8d54f9c20481f1dc26359bb8896f5d99908949" +dependencies = [ + "aws-credential-types", + "aws-http", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http", + "regex", + "tokio-stream", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47ad6bf01afc00423d781d464220bf69fb6a674ad6629cbbcb06d88cdc2be82" +dependencies = [ + "aws-credential-types", + "aws-http", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "http", + "regex", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b28f4910bb956b7ab320b62e98096402354eca976c587d1eeccd523d9bac03" +dependencies = [ + "aws-smithy-http", + "form_urlencoded", + "hex", + "hmac", + "http", + "once_cell", + "percent-encoding", + "regex", + "sha2", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cdb73f85528b9d19c23a496034ac53703955a59323d581c06aa27b4e4e247af" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", + "tokio-stream", +] + +[[package]] +name = "aws-smithy-client" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c27b2756264c82f830a91cb4d2d485b2d19ad5bea476d9a966e03d27f27ba59a" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-types", + "bytes", + "fastrand", + "http", + "http-body", + "hyper", + "hyper-rustls", + "lazy_static", + "pin-project-lite", + "rustls", + "tokio", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-http" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54cdcf365d8eee60686885f750a34c190e513677db58bbc466c44c588abf4199" +dependencies = [ + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http", + "http-body", + "hyper", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "aws-smithy-http-tower" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "822de399d0ce62829a69dfa8c5cd08efdbe61a7426b953e2268f8b8b52a607bd" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "http", + "http-body", + "pin-project-lite", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1e7ab8fa7ad10c193af7ae56d2420989e9f4758bf03601a342573333ea34f" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-query" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28556a3902091c1f768a34f6c998028921bdab8d47d92586f363f14a4a32d047" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "0.56.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "745e096b3553e7e0f40622aa04971ce52765af82bebdeeac53aa6fc82fe801e6" +dependencies = [ + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http", + "http-body", + "once_cell", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", ] [[package]] -name = "anyhow" -version = "1.0.75" +name = "aws-smithy-runtime-api" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "93d0ae0c9cfd57944e9711ea610b48a963fb174a53aabacc08c5794a594b1d02" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "http", + "tokio", + "tracing", +] [[package]] -name = "async-trait" -version = "0.1.73" +name = "aws-smithy-types" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" +checksum = "d90dbc8da2f6be461fa3c1906b20af8f79d14968fe47f2b7d29d086f62a51728" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", + "base64-simd", + "itoa", + "num-integer", + "ryu", + "serde", + "time", ] [[package]] -name = "auto_impl" -version = "1.1.0" +name = "aws-smithy-xml" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fee3da8ef1276b0bee5dd1c7258010d8fffd31801447323115a25560e1327b89" +checksum = "e01d2dedcdd8023043716cfeeb3c6c59f2d447fce365d8e194838891794b23b6" dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", + "xmlparser", ] [[package]] -name = "autocfg" -version = "1.1.0" +name = "aws-types" +version = "0.56.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "85aa0451bf8af1bf22a4f028d5d28054507a14be43cb8ac0597a8471fba9edfe" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-types", + "http", + "rustc_version", + "tracing", +] [[package]] name = "axum" @@ -192,9 +831,19 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.3" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" + +[[package]] +name = "base64-simd" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] [[package]] name = "base64ct" @@ -214,6 +863,28 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq 0.3.0", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -223,23 +894,66 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "bytecount" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" + +[[package]] +name = "bytemuck" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "bytes-utils" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] [[package]] name = "bzip2" @@ -262,6 +976,37 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e34637b3140142bdf929fb439e8aa4ebad7651ebf7b1080b3930aa16ac1459ff" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", +] + [[package]] name = "cc" version = "1.0.83" @@ -278,6 +1023,43 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets 0.48.5", +] + +[[package]] +name = "chrono-tz" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "cipher" version = "0.4.4" @@ -290,9 +1072,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.7" +version = "4.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" +checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272" dependencies = [ "clap_builder", "clap_derive", @@ -300,9 +1082,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.7" +version = "4.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" +checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1" dependencies = [ "anstream", "anstyle", @@ -319,7 +1101,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] @@ -334,12 +1116,49 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "comfy-table" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "constant_time_eq" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation" version = "0.9.3" @@ -358,9 +1177,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" dependencies = [ "libc", ] @@ -417,69 +1236,294 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7014432223f4d721cb9786cd88bb89e7464e0ba984d4a7f49db7787f5f268674" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "glob", + "half", + "hashbrown", + "indexmap", + "itertools", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "percent-encoding", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", +] + +[[package]] +name = "datafusion-common" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3903ed8f102892f17b48efa437f3542159241d41c564f0d1e78efdc5e663aa" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "num_cpus", + "object_store", + "parquet", + "sqlparser", +] + +[[package]] +name = "datafusion-execution" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "780b73b2407050e53f51a9781868593f694102c59e622de9a8aafc0343c4f237" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24c382676338d8caba6c027ba0da47260f65ffedab38fda78f6d8043f607557c" dependencies = [ - "generic-array", - "typenum", + "ahash", + "arrow", + "arrow-array", + "datafusion-common", + "sqlparser", + "strum", + "strum_macros", ] [[package]] -name = "darling" -version = "0.14.4" +name = "datafusion-optimizer" +version = "32.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "3f2904a432f795484fd45e29ded4537152adb60f636c05691db34fcd94c92c96" dependencies = [ - "darling_core", - "darling_macro", + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown", + "itertools", + "log", + "regex-syntax 0.7.5", ] [[package]] -name = "darling_core" -version = "0.14.4" +name = "datafusion-physical-expr" +version = "32.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +checksum = "57b4968e9a998dc0476c4db7a82f280e2026b25f464e4aa0c3bb9807ee63ddfd" dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 1.0.109", + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "base64 0.21.5", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-expr", + "half", + "hashbrown", + "hex", + "indexmap", + "itertools", + "libc", + "log", + "md-5", + "paste", + "petgraph", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", ] [[package]] -name = "darling_macro" -version = "0.14.4" +name = "datafusion-physical-plan" +version = "32.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +checksum = "efd0d1fe54e37a47a2d58a1232c22786f2c28ad35805fdcd08f0253a8b0aaa90" dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "futures", + "half", + "hashbrown", + "indexmap", + "itertools", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", + "uuid", ] [[package]] -name = "dashmap" -version = "5.5.3" +name = "datafusion-sql" +version = "32.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +checksum = "b568d44c87ead99604d704f942e257c8a236ee1bbf890ee3e034ad659dcb2c21" dependencies = [ - "cfg-if", - "hashbrown 0.14.0", - "lock_api", - "once_cell", - "parking_lot_core", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "log", + "sqlparser", ] [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", +] [[package]] name = "derive_builder" @@ -523,6 +1567,33 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "either" version = "1.9.0" @@ -546,12 +1617,21 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.5" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "version_check", ] [[package]] @@ -566,11 +1646,27 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -599,18 +1695,18 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] [[package]] name = "futures" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" dependencies = [ "futures-channel", "futures-core", @@ -623,9 +1719,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" dependencies = [ "futures-core", "futures-sink", @@ -633,15 +1729,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" [[package]] name = "futures-executor" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" dependencies = [ "futures-core", "futures-task", @@ -650,38 +1746,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" dependencies = [ "futures-channel", "futures-core", @@ -707,9 +1803,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", @@ -718,15 +1814,21 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.0" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "glob" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" dependencies = [ "bytes", "fnv", @@ -734,7 +1836,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap", "slab", "tokio", "tokio-util", @@ -742,16 +1844,25 @@ dependencies = [ ] [[package]] -name = "hashbrown" -version = "0.12.3" +name = "half" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "heck" @@ -761,9 +1872,15 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "hex" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "hmac" @@ -780,14 +1897,14 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" dependencies = [ - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "http" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" dependencies = [ "bytes", "fnv", @@ -817,6 +1934,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.27" @@ -834,7 +1957,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -842,138 +1965,481 @@ dependencies = [ ] [[package]] -name = "hyper-rustls" -version = "0.24.1" +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http", + "hyper", + "log", + "rustls", + "rustls-native-certs", + "tokio", + "tokio-rustls", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lance" +version = "0.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de45410d4b9564d1479aa9ce5098c1777ded4fa368ba46f80b06a5aa69e10ee0" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "async_cell", + "aws-config", + "aws-credential-types", + "aws-sdk-dynamodb", + "base64 0.21.5", + "byteorder", + "bytes", + "chrono", + "dashmap", + "datafusion", + "futures", + "half", + "http", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-datagen", + "lance-index", + "lance-linalg", + "lazy_static", + "log", + "lru_time_cache", + "moka", + "nohash-hasher", + "num-traits", + "num_cpus", + "object_store", + "ordered-float 3.9.2", + "pin-project", + "prost", + "prost-build", + "prost-types", + "rand", + "roaring", + "serde", + "serde_json", + "shellexpand", + "snafu", + "tempfile", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "lance-arrow" +version = "0.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "492886fc285f0fbcc114141cb0a214babf4c6e623391f36a5e95a9c65b9b8a86" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num-traits", + "rand", + "serde", +] + +[[package]] +name = "lance-core" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" +checksum = "8fa37b4c270d49c0cbb1d0c826f85057aafc7b8dc560888fcbc9c6d168d3b950" dependencies = [ - "futures-util", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-dynamodb", + "byteorder", + "bytes", + "chrono", + "datafusion-common", + "datafusion-sql", + "futures", "http", - "hyper", - "rustls", + "lance-arrow", + "lazy_static", + "log", + "mock_instant", + "moka", + "num-traits", + "num_cpus", + "object_store", + "pin-project", + "prost", + "prost-build", + "prost-types", + "rand", + "roaring", + "serde", + "serde_json", + "shellexpand", + "snafu", "tokio", - "tokio-rustls", + "tracing", + "url", + "uuid", ] [[package]] -name = "hyper-tls" -version = "0.5.0" +name = "lance-datafusion" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +checksum = "88c5f299e03fa65505d8d0a206ad9fe51871fbd7d9ee9859e3691677ee2b2104" dependencies = [ - "bytes", - "hyper", - "native-tls", - "tokio", - "tokio-native-tls", + "arrow", + "arrow-array", + "arrow-schema", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "futures", + "lance-arrow", + "lance-core", ] [[package]] -name = "ident_case" -version = "1.0.1" +name = "lance-datagen" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +checksum = "b673feb8202b44bf740f495aeb26e902d172dc12af7a93e0083b9d87e1a82980" +dependencies = [ + "arrow", + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "rand", + "rand_xoshiro", +] [[package]] -name = "idna" -version = "0.4.0" +name = "lance-index" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "e16e3b195987247db991ca339320b5c0c2d6a6aa0b800503e8b0c9fd0144edcc" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "arrow", + "arrow-arith", + "arrow-array", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "futures", + "half", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-linalg", + "log", + "nohash-hasher", + "num-traits", + "num_cpus", + "object_store", + "pin-project", + "prost", + "prost-build", + "rand", + "roaring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", ] [[package]] -name = "indexmap" -version = "1.9.3" +name = "lance-linalg" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "a9303c6c271355dae3963f06b74671f4de4e9356ae1fef2b9ae0fe7dff2ffabc" dependencies = [ - "autocfg", - "hashbrown 0.12.3", + "arrow-array", + "arrow-schema", + "arrow-select", + "cc", + "futures", + "half", + "lance-arrow", + "log", + "num-traits", + "num_cpus", + "rand", + "tokio", + "tracing", ] [[package]] -name = "indexmap" -version = "2.0.1" +name = "lance-testing" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e" +checksum = "a54bf786b0fe66d134ab8705b191a057ed2f8a656988cd5950793f2a8b1727a8" dependencies = [ - "equivalent", - "hashbrown 0.14.0", + "arrow-array", + "arrow-schema", + "lance-arrow", + "num-traits", + "rand", ] [[package]] -name = "inout" -version = "0.1.3" +name = "lazy_static" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "generic-array", + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", ] [[package]] -name = "ipnet" -version = "2.8.0" +name = "lexical-parse-float" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] [[package]] -name = "itertools" -version = "0.11.0" +name = "lexical-parse-integer" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "either", + "lexical-util", + "static_assertions", ] [[package]] -name = "itoa" -version = "1.0.9" +name = "lexical-util" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] [[package]] -name = "jobserver" -version = "0.1.27" +name = "lexical-write-float" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "libc", + "lexical-util", + "lexical-write-integer", + "static_assertions", ] [[package]] -name = "js-sys" -version = "0.3.64" +name = "lexical-write-integer" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "wasm-bindgen", + "lexical-util", + "static_assertions", ] [[package]] -name = "lazy_static" -version = "1.4.0" +name = "libc" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] -name = "libc" -version = "0.2.147" +name = "libm" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.1", + "libc", + "redox_syscall", +] [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" [[package]] name = "llm-ls" version = "0.4.0" dependencies = [ + "arrow-array", + "arrow-schema", "home", "reqwest", "ropey", @@ -1009,13 +2475,14 @@ dependencies = [ "tree-sitter-swift", "tree-sitter-typescript", "uuid", + "vectordb", ] [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -1027,6 +2494,12 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "lru_time_cache" +version = "0.11.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9106e1d747ffd48e6be5bb2d97fa706ed25b144fbee4d5c02eae110cd8d6badd" + [[package]] name = "lsp-client" version = "0.1.0" @@ -1051,6 +2524,35 @@ dependencies = [ "url", ] +[[package]] +name = "lz4" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "mach2" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8" +dependencies = [ + "libc", +] + [[package]] name = "macro_rules_attribute" version = "0.2.0" @@ -1078,15 +2580,25 @@ dependencies = [ [[package]] name = "matchit" -version = "0.7.2" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed1202b2a6f884ae56f04cff409ab315c5ce26b5e58d7412e484f01fd52f52ef" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] [[package]] name = "memchr" -version = "2.6.3" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memoffset" @@ -1120,13 +2632,22 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" dependencies = [ "libc", "wasi", - "windows-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "mock_instant" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c1a54de846c4006b88b1516731cc1f6026eb5dc4bcb186aa071ef66d40524ec" +dependencies = [ + "once_cell", ] [[package]] @@ -1138,11 +2659,33 @@ dependencies = [ "tokio", ] +[[package]] +name = "moka" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa6e72583bf6830c956235bff0d5afec8cf2952f579ebad18ae7821a917d950f" +dependencies = [ + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "once_cell", + "parking_lot", + "quanta", + "rustc_version", + "scheduled-thread-pool", + "skeptic", + "smallvec", + "tagptr", + "thiserror", + "triomphe", + "uuid", +] + [[package]] name = "monostate" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f370ae88093ec6b11a710dec51321a61d420fafd1bad6e30d01bd9c920e8ee" +checksum = "e404e13820ea0df0eda93aa294e0c80de76a0daa6bec590d376fbec6d7810394" dependencies = [ "monostate-impl", "serde", @@ -1150,15 +2693,21 @@ dependencies = [ [[package]] name = "monostate-impl" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce" +checksum = "531c82a934da419bed3da09bd87d6e98c72f8d4aa755427b3b009c2b8b8c433c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] +[[package]] +name = "multimap" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" + [[package]] name = "native-tls" version = "0.2.11" @@ -1177,6 +2726,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -1197,6 +2752,83 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -1216,6 +2848,36 @@ dependencies = [ "memchr", ] +[[package]] +name = "object_store" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" +dependencies = [ + "async-trait", + "base64 0.21.5", + "bytes", + "chrono", + "futures", + "humantime", + "hyper", + "itertools", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand", + "reqwest", + "ring 0.16.20", + "rustls-pemfile", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -1246,9 +2908,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" dependencies = [ "bitflags 2.4.1", "cfg-if", @@ -1267,27 +2929,57 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", ] [[package]] -name = "openssl-probe" -version = "0.1.5" +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "ordered-float" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] [[package]] -name = "openssl-sys" -version = "0.9.93" +name = "ordered-float" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", + "num-traits", ] +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" + [[package]] name = "overload" version = "0.1.1" @@ -1306,15 +2998,58 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "parquet" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.21.5", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "hashbrown", + "lz4", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd 0.12.4", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", ] [[package]] @@ -1348,9 +3083,57 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] [[package]] name = "pin-project" @@ -1369,7 +3152,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] @@ -1390,12 +3173,28 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "prettyplease" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +dependencies = [ + "proc-macro2", + "syn 2.0.39", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1422,13 +3221,104 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" +dependencies = [ + "bytes", + "heck", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 2.0.39", + "tempfile", + "which", +] + +[[package]] +name = "prost-derive" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "prost-types" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" +dependencies = [ + "prost", +] + +[[package]] +name = "pulldown-cmark" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a1a2f1f0a7ecff9c31abbe177637be0e97a0aef46cf8738ece09327985d998" +dependencies = [ + "bitflags 1.3.2", + "memchr", + "unicase", +] + +[[package]] +name = "quanta" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" +dependencies = [ + "crossbeam-utils", + "libc", + "mach2", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + +[[package]] +name = "quick-xml" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.33" @@ -1468,6 +3358,24 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core", +] + +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rayon" version = "1.8.0" @@ -1501,23 +3409,34 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.3.5" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_users" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" -version = "1.9.5" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.3.8", - "regex-syntax 0.7.5", + "regex-automata 0.4.3", + "regex-syntax 0.8.2", ] [[package]] @@ -1531,13 +3450,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.8" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] @@ -1552,13 +3471,19 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "reqwest" -version = "0.11.20" +version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" +checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" dependencies = [ - "base64 0.21.3", + "base64 0.21.5", "bytes", "encoding_rs", "futures-core", @@ -1582,6 +3507,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "system-configuration", "tokio", "tokio-native-tls", "tokio-rustls", @@ -1596,6 +3522,12 @@ dependencies = [ "winreg", ] +[[package]] +name = "retain_mut" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" + [[package]] name = "ring" version = "0.16.20" @@ -1605,17 +3537,42 @@ dependencies = [ "cc", "libc", "once_cell", - "spin", - "untrusted", + "spin 0.5.2", + "untrusted 0.7.1", "web-sys", "winapi", ] +[[package]] +name = "ring" +version = "0.17.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin 0.9.8", + "untrusted 0.9.0", + "windows-sys 0.48.0", +] + +[[package]] +name = "roaring" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" +dependencies = [ + "bytemuck", + "byteorder", + "retain_mut", +] + [[package]] name = "ropey" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ce7a2c43a32e50d666e33c5a80251b31147bb4b49024bcab11fb6f20c671ed" +checksum = "93411e420bcd1a75ddd1dc3caf18c23155eda2c090631a85af21ba19e97093b5" dependencies = [ "smallvec", "str_indices", @@ -1627,48 +3584,69 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rustix" -version = "0.38.13" +version = "0.38.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" +checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" dependencies = [ "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "rustls" -version = "0.21.7" +version = "0.21.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" +checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" dependencies = [ "log", - "ring", + "ring 0.17.5", "rustls-webpki", "sct", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64 0.21.3", + "base64 0.21.5", ] [[package]] name = "rustls-webpki" -version = "0.101.4" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring", - "untrusted", + "ring 0.17.5", + "untrusted 0.9.0", ] [[package]] @@ -1683,13 +3661,31 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" dependencies = [ - "windows-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "scheduled-thread-pool" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" +dependencies = [ + "parking_lot", ] [[package]] @@ -1700,12 +3696,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring", - "untrusted", + "ring 0.17.5", + "untrusted 0.9.0", ] [[package]] @@ -1731,31 +3727,46 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +dependencies = [ + "serde", +] + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + [[package]] name = "serde" -version = "1.0.188" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] name = "serde_json" -version = "1.0.105" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -1774,13 +3785,13 @@ dependencies = [ [[package]] name = "serde_repr" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" +checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] @@ -1797,11 +3808,11 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.25" +version = "0.9.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" +checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" dependencies = [ - "indexmap 2.0.1", + "indexmap", "itoa", "ryu", "serde", @@ -1832,42 +3843,100 @@ dependencies = [ [[package]] name = "sharded-slab" -version = "0.1.4" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" dependencies = [ "lazy_static", ] [[package]] -name = "signal-hook-registry" -version = "1.4.1" +name = "shellexpand" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da03fa3b94cc19e3ebfc88c4229c49d8f08cdbd1228870a45f0ffdf84988e14b" +dependencies = [ + "dirs", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "skeptic" +version = "0.13.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" +dependencies = [ + "bytecount", + "cargo_metadata", + "error-chain", + "glob", + "pulldown-cmark", + "tempfile", + "walkdir", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" + +[[package]] +name = "snafu" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ - "libc", + "doc-comment", + "snafu-derive", ] [[package]] -name = "slab" -version = "0.4.9" +name = "snafu-derive" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" dependencies = [ - "autocfg", + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] -name = "smallvec" -version = "1.11.0" +name = "snap" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "socket2" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" dependencies = [ "libc", "winapi", @@ -1875,12 +3944,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.3" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1889,6 +3958,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "spm_precompiled" version = "0.1.4" @@ -1901,11 +3976,38 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "sqlparser" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "str_indices" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f026164926842ec52deb1938fae44f83dfdb82d0a5b0270c5bd5935ab74d6dd" +checksum = "e9557cb6521e8d009c51a8666f09356f4b817ba9ba0981a305bd86aee47bd35c" [[package]] name = "strsim" @@ -1913,6 +4015,28 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.39", +] + [[package]] name = "subtle" version = "2.5.0" @@ -1932,9 +4056,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.31" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2", "quote", @@ -1947,17 +4071,44 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "tempfile" -version = "3.8.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if", "fastrand", "redox_syscall", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2004,7 +4155,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] @@ -2017,14 +4168,26 @@ dependencies = [ "once_cell", ] +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float 2.10.1", +] + [[package]] name = "time" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", "itoa", + "powerfmt", "serde", "time-core", "time-macros", @@ -2032,19 +4195,28 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -2093,9 +4265,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.32.0" +version = "1.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" dependencies = [ "backtrace", "bytes", @@ -2105,20 +4277,20 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.3", + "socket2 0.5.5", "tokio-macros", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] @@ -2141,11 +4313,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" -version = "0.7.8" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -2209,7 +4392,7 @@ checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] @@ -2220,11 +4403,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -2233,31 +4415,32 @@ dependencies = [ [[package]] name = "tracing-appender" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d48f71a791638519505cefafe162606f706c25592e4bde4d97600c0195312e" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" dependencies = [ "crossbeam-channel", + "thiserror", "time", "tracing-subscriber", ] [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", ] [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", "valuable", @@ -2265,12 +4448,12 @@ dependencies = [ [[package]] name = "tracing-log" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ - "lazy_static", "log", + "once_cell", "tracing-core", ] @@ -2286,9 +4469,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ "matchers", "nu-ansi-term", @@ -2447,9 +4630,9 @@ dependencies = [ [[package]] name = "tree-sitter-md" -version = "0.1.5" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a237fa10f6b466b76c783c79b08cc172581e547ef1dbb6ddf1f8b4e230157e1" +checksum = "3c20d3ef8d202430b644a307e6299d84bf8ed87fa1b796e4638f8805a595060c" dependencies = [ "cc", "tree-sitter", @@ -2535,18 +4718,43 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "triomphe" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0c5a71827ac326072b6405552093e2ad2accd25a32fd78d4edc82d98c7f2409" + [[package]] name = "try-lock" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" @@ -2555,9 +4763,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" @@ -2583,6 +4791,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -2601,11 +4815,17 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -2613,6 +4833,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8parse" version = "0.2.1" @@ -2621,9 +4847,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.5.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" dependencies = [ "getrandom", "rand", @@ -2642,12 +4868,57 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vectordb" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "975a735e82c92c7a8597b837ad62fdb1b60c0d0ac5a139a2f07a03a484cebf77" +dependencies = [ + "arrow", + "arrow-array", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "futures", + "half", + "lance", + "lance-index", + "lance-linalg", + "lance-testing", + "log", + "num-traits", + "object_store", + "snafu", + "tokio", + "url", +] + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2665,9 +4936,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2675,24 +4946,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12" dependencies = [ "cfg-if", "js-sys", @@ -2702,9 +4973,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2712,22 +4983,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.39", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" [[package]] name = "wasm-streams" @@ -2744,9 +5015,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" dependencies = [ "js-sys", "wasm-bindgen", @@ -2754,9 +5025,21 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.2" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" + +[[package]] +name = "which" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] [[package]] name = "winapi" @@ -2774,19 +5057,46 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] @@ -2795,13 +5105,28 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] @@ -2810,42 +5135,84 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "winreg" version = "0.50.0" @@ -2853,7 +5220,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ "cfg-if", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2877,6 +5244,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f58e7b3ca8977093aae6b87b6a7730216fc4c53a6530bab5c43a783cd810c1a8" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "xshell" version = "0.2.5" @@ -2905,6 +5278,32 @@ dependencies = [ "zip", ] +[[package]] +name = "zerocopy" +version = "0.7.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" + [[package]] name = "zip" version = "0.6.6" @@ -2914,7 +5313,7 @@ dependencies = [ "aes", "byteorder", "bzip2", - "constant_time_eq", + "constant_time_eq 0.1.5", "crc32fast", "crossbeam-utils", "flate2", @@ -2922,7 +5321,7 @@ dependencies = [ "pbkdf2", "sha1", "time", - "zstd", + "zstd 0.11.2+zstd.1.5.2", ] [[package]] @@ -2931,7 +5330,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe 6.0.6", ] [[package]] @@ -2944,6 +5352,16 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.9+zstd.1.5.5" diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index 7ac7cec..2df062f 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" name = "llm-ls" [dependencies] +arrow-array = "47" +arrow-schema = "47" home = "0.5" ropey = { version = "1.6", default-features = false, features = [ "simd", @@ -53,6 +55,7 @@ tree-sitter-rust = "0.20" tree-sitter-scala = "0.20" tree-sitter-swift = "0.3" tree-sitter-typescript = "0.20" +vectordb = "0.3" [dependencies.uuid] version = "1.4" diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 318be3b..320519b 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -22,6 +22,7 @@ use uuid::Uuid; mod adaptors; mod document; mod language_id; +mod retrieval; const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600); pub const NAME: &str = "llm-ls"; diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs new file mode 100644 index 0000000..f207a8f --- /dev/null +++ b/crates/llm-ls/src/retrieval.rs @@ -0,0 +1,62 @@ +use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt32Array}; +use arrow_schema::{DataType, Field, Schema}; +use std::{path::PathBuf, sync::Arc}; +use vectordb::{Database, Table}; + +async fn initialse_database(cache_path: PathBuf) -> Table { + let uri = cache_path.join("database"); + let db = Database::connect(uri.to_str().expect("path should be utf8")) + .await + .expect("failed to open database"); + let table = match db.open_table("code-slices").await { + Ok(table) => table, + Err(vectordb::error::Error::TableNotFound { .. }) => { + let schema = Schema::new(vec![ + Field::new("workspace_root", DataType::Utf8, false), + Field::new("file_url", DataType::Utf8, false), + Field::new("start_line_no", DataType::UInt32, false), + Field::new("end_line_no", DataType::UInt32, false), + Field::new("window_size", DataType::UInt32, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(StringArray::from(Vec::<&str>::new())), + Arc::new(StringArray::from(Vec::<&str>::new())), + Arc::new(UInt32Array::from(Vec::::new())), + Arc::new(UInt32Array::from(Vec::::new())), + Arc::new(UInt32Array::from(Vec::::new())), + ], + ) + .expect("failure while defining schema"); + db.create_table( + "code-slices", + RecordBatchIterator::new(vec![batch.clone()].into_iter().map(Ok), batch.schema()), + None, + ) + .await + .expect("failed to create table") + } + Err(err) => panic!("error while opening table: {}", err), + }; + table +} + +pub(crate) struct SnippetRetriever { + db: Table, +} + +impl SnippetRetriever { + /// # Panics + /// + /// Panics if the database cannot be initialised. + pub(crate) async fn new(cache_path: PathBuf) -> Self { + Self { + db: initialse_database(cache_path).await, + } + } + + pub(crate) async fn build_workspace_snippets(workspace_root: String) {} + + pub(crate) async fn update_document(file_url: String) {} +} From 407be56bbfe7422772e78c163cf640880a113f3a Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Mon, 11 Dec 2023 10:57:41 +0100 Subject: [PATCH 02/22] wip: add starencoder model to generate embeddings --- Cargo.lock | 648 +++++++++++++++++++++++++++++++-- crates/gitignore/Cargo.toml | 15 + crates/gitignore/src/lib.rs | 212 +++++++++++ crates/llm-ls/Cargo.toml | 13 +- crates/llm-ls/src/adaptors.rs | 4 + crates/llm-ls/src/error.rs | 65 ++++ crates/llm-ls/src/main.rs | 218 +++++++---- crates/llm-ls/src/retrieval.rs | 379 ++++++++++++++++++- 8 files changed, 1446 insertions(+), 108 deletions(-) create mode 100644 crates/gitignore/Cargo.toml create mode 100644 crates/gitignore/src/lib.rs create mode 100644 crates/llm-ls/src/error.rs diff --git a/Cargo.lock b/Cargo.lock index fdeec02..6eda475 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -932,6 +932,20 @@ name = "bytemuck" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] [[package]] name = "byteorder" @@ -985,6 +999,61 @@ dependencies = [ "serde", ] +[[package]] +name = "candle-core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60d9b91c73bc662dc45aff607f5ffe79724b7cf7d7c8dc12a72b25921683b67" +dependencies = [ + "byteorder", + "gemm", + "half", + "memmap2", + "num-traits", + "num_cpus", + "rand 0.8.5", + "rand_distr", + "rayon", + "safetensors", + "thiserror", + "yoke", + "zip", +] + +[[package]] +name = "candle-nn" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb6e13e7076439309786482d4d4c1b4e1f2b102ca93513372d5419ffcf5df25" +dependencies = [ + "candle-core", + "half", + "num-traits", + "rayon", + "safetensors", + "serde", + "thiserror", +] + +[[package]] +name = "candle-transformers" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0169336de9dc62dd84e19af1408e53ef8ad07eef8fc103cfebf5b6c3e3f23c2c" +dependencies = [ + "byteorder", + "candle-core", + "candle-nn", + "num-traits", + "rand 0.8.5", + "rayon", + "serde", + "serde_json", + "serde_plain", + "tracing", + "wav", +] + [[package]] name = "cargo-platform" version = "0.1.5" @@ -1127,6 +1196,19 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "console" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.45.0", +] + [[package]] name = "const-random" version = "0.1.17" @@ -1355,7 +1437,7 @@ dependencies = [ "parquet", "percent-encoding", "pin-project-lite", - "rand", + "rand 0.8.5", "sqlparser", "tempfile", "tokio", @@ -1399,7 +1481,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand", + "rand 0.8.5", "tempfile", "url", ] @@ -1464,7 +1546,7 @@ dependencies = [ "md-5", "paste", "petgraph", - "rand", + "rand 0.8.5", "regex", "sha2", "unicode-segmentation", @@ -1497,7 +1579,7 @@ dependencies = [ "once_cell", "parking_lot", "pin-project-lite", - "rand", + "rand 0.8.5", "tokio", "uuid", ] @@ -1594,12 +1676,28 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dyn-stack" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" +dependencies = [ + "bytemuck", + "reborrow", +] + [[package]] name = "either" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding_rs" version = "0.8.33" @@ -1702,6 +1800,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "futures" version = "0.3.29" @@ -1791,6 +1895,123 @@ dependencies = [ "slab", ] +[[package]] +name = "gemm" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3afa707040531a7527477fd63a81ea4f6f3d26037a2f96776e57fb843b258e" +dependencies = [ + "dyn-stack", + "gemm-c32", + "gemm-c64", + "gemm-common", + "gemm-f16", + "gemm-f32", + "gemm-f64", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3973a4c30c73f26a099113953d0c772bb17ee2e07976c0a06b8fe1f38a57d" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30362894b93dada374442cb2edf4512ddf19513c9bec88e06a445bcb6b22e64f" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "988499faa80566b046b4fee2c5f15af55b5a20c1fe8486b112ebb34efa045ad6" +dependencies = [ + "bytemuck", + "dyn-stack", + "half", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f16" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6cf2854a12371684c38d9a865063a27661812a3ff5803454c5742e8f5a388ce" +dependencies = [ + "dyn-stack", + "gemm-common", + "gemm-f32", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bc84003cf6d950a7c7ca714ad6db281b6cef5c7d462f5cd9ad90ea2409c7227" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.16.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35187ef101a71eed0ecd26fb4a6255b4192a12f1c5335f3a795698f2d9b6cf33" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1818,6 +2039,15 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "gitignore" +version = "0.1.0" +dependencies = [ + "glob", + "tempdir", + "thiserror", +] + [[package]] name = "glob" version = "0.3.1" @@ -1849,9 +2079,12 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" dependencies = [ + "bytemuck", "cfg-if", "crunchy", "num-traits", + "rand 0.8.5", + "rand_distr", ] [[package]] @@ -1882,6 +2115,27 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hf-hub" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732" +dependencies = [ + "dirs", + "futures", + "indicatif", + "log", + "native-tls", + "num_cpus", + "rand 0.8.5", + "reqwest", + "serde", + "serde_json", + "thiserror", + "tokio", + "ureq", +] + [[package]] name = "hmac" version = "0.12.1" @@ -2042,6 +2296,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + [[package]] name = "inout" version = "0.1.3" @@ -2051,6 +2318,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2147,7 +2423,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand", + "rand 0.8.5", "roaring", "serde", "serde_json", @@ -2173,7 +2449,7 @@ dependencies = [ "arrow-select", "half", "num-traits", - "rand", + "rand 0.8.5", "serde", ] @@ -2215,7 +2491,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand", + "rand 0.8.5", "roaring", "serde", "serde_json", @@ -2256,7 +2532,7 @@ dependencies = [ "arrow-cast", "arrow-schema", "chrono", - "rand", + "rand 0.8.5", "rand_xoshiro", ] @@ -2292,7 +2568,7 @@ dependencies = [ "pin-project", "prost", "prost-build", - "rand", + "rand 0.8.5", "roaring", "serde", "serde_json", @@ -2317,7 +2593,7 @@ dependencies = [ "log", "num-traits", "num_cpus", - "rand", + "rand 0.8.5", "tokio", "tracing", ] @@ -2332,7 +2608,7 @@ dependencies = [ "arrow-schema", "lance-arrow", "num-traits", - "rand", + "rand 0.8.5", ] [[package]] @@ -2440,11 +2716,18 @@ version = "0.4.0" dependencies = [ "arrow-array", "arrow-schema", + "candle-core", + "candle-nn", + "candle-transformers", + "futures-util", + "gitignore", + "hf-hub", "home", "reqwest", "ropey", "serde", "serde_json", + "thiserror", "tokenizers", "tokio", "tower-lsp", @@ -2456,6 +2739,7 @@ dependencies = [ "tree-sitter-c", "tree-sitter-c-sharp", "tree-sitter-cpp", + "tree-sitter-css", "tree-sitter-elixir", "tree-sitter-erlang", "tree-sitter-go", @@ -2467,6 +2751,7 @@ dependencies = [ "tree-sitter-lua", "tree-sitter-md", "tree-sitter-objc", + "tree-sitter-php", "tree-sitter-python", "tree-sitter-r", "tree-sitter-ruby", @@ -2600,6 +2885,16 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "memmap2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", + "stable_deref_trait", +] + [[package]] name = "memoffset" version = "0.9.0" @@ -2783,6 +3078,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" dependencies = [ + "bytemuck", "num-traits", ] @@ -2839,6 +3135,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.32.1" @@ -2865,7 +3167,7 @@ dependencies = [ "parking_lot", "percent-encoding", "quick-xml", - "rand", + "rand 0.8.5", "reqwest", "ring 0.16.20", "rustls-pemfile", @@ -3059,7 +3361,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", - "rand_core", + "rand_core 0.6.4", "subtle", ] @@ -3123,7 +3425,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -3173,6 +3475,12 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "portable-atomic" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b" + [[package]] name = "powerfmt" version = "0.2.0" @@ -3293,6 +3601,18 @@ dependencies = [ "unicase", ] +[[package]] +name = "pulp" +version = "0.18.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16785ee69419641c75affff7c9fdbdb7c0ab26dc9a5fb5218c2a2e9e4ef2087d" +dependencies = [ + "bytemuck", + "libm", + "num-complex", + "reborrow", +] + [[package]] name = "quanta" version = "0.11.1" @@ -3328,6 +3648,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.8.5" @@ -3336,7 +3669,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -3346,9 +3679,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", ] +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.6.4" @@ -3358,13 +3706,23 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" dependencies = [ - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -3407,6 +3765,21 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + [[package]] name = "redox_syscall" version = "0.4.1" @@ -3477,6 +3850,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + [[package]] name = "reqwest" version = "0.11.22" @@ -3528,6 +3910,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" +[[package]] +name = "riff" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" + [[package]] name = "ring" version = "0.16.20" @@ -3661,6 +4049,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +[[package]] +name = "safetensors" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93279b86b3de76f820a8854dd06cbc33cfa57a417b19c47f6a25280112fb1df" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "same-file" version = "1.0.6" @@ -3783,6 +4181,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_plain" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50" +dependencies = [ + "serde", +] + [[package]] name = "serde_repr" version = "0.1.17" @@ -3997,6 +4404,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -4071,6 +4484,18 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "synstructure" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", + "unicode-xid", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -4098,6 +4523,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" version = "3.8.1" @@ -4122,7 +4557,7 @@ dependencies = [ "home", "lsp-client", "lsp-types", - "rand", + "rand 0.8.5", "reqwest", "ropey", "serde", @@ -4234,9 +4669,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokenizers" -version = "0.14.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9be88c795d8b9f9c4002b3a8f26a6d0876103a6f523b32ea3bac52d8560c17c" +checksum = "062b8a9613d6017633b80fb55fbb33f1aff006c36225a3025630753398034b3c" dependencies = [ "aho-corasick", "derive_builder", @@ -4249,7 +4684,7 @@ dependencies = [ "monostate", "onig", "paste", - "rand", + "rand 0.8.5", "rayon", "rayon-cond", "regex", @@ -4538,6 +4973,16 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-css" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3306ddefa1d2681adda2613d11974ffabfbeb215e23235da6c862f3493a04fd" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-elixir" version = "0.1.0" @@ -4550,9 +4995,9 @@ dependencies = [ [[package]] name = "tree-sitter-erlang" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d110d62a7ae35b985d8cfbc4de6e9281c7cbf268c466e30ebb31c2d3f861141" +checksum = "e812f0b7cf3ee07049dd4433c420dec32e421467084456fcd4c6151bf6817b07" dependencies = [ "cc", "tree-sitter", @@ -4648,6 +5093,16 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-php" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18b689aaa57bd1f0707e5c0728004e7f737b16768644a7e745d23021330797de" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-python" version = "0.20.4" @@ -4797,6 +5252,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -4821,6 +5282,25 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cdd25c339e200129fe4de81451814e5228c9b771d57378817d6117cc2b3f97" +dependencies = [ + "base64 0.21.5", + "flate2", + "log", + "native-tls", + "once_cell", + "rustls", + "rustls-webpki", + "serde", + "serde_json", + "url", + "webpki-roots", +] + [[package]] name = "url" version = "2.5.0" @@ -4852,7 +5332,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" dependencies = [ "getrandom", - "rand", + "rand 0.8.5", "serde", ] @@ -5013,6 +5493,15 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wav" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609" +dependencies = [ + "riff", +] + [[package]] name = "web-sys" version = "0.3.66" @@ -5081,6 +5570,15 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -5099,6 +5597,21 @@ dependencies = [ "windows-targets 0.52.0", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -5129,6 +5642,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -5141,6 +5660,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5153,6 +5678,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5165,6 +5696,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5177,6 +5714,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5189,6 +5732,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -5201,6 +5750,12 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -5278,6 +5833,30 @@ dependencies = [ "zip", ] +[[package]] +name = "yoke" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65e71b2e4f287f467794c671e2b8f8a5f3716b3c829079a1c44740148eff07e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.26" @@ -5298,6 +5877,27 @@ dependencies = [ "syn 2.0.39", ] +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", + "synstructure", +] + [[package]] name = "zeroize" version = "1.7.0" diff --git a/crates/gitignore/Cargo.toml b/crates/gitignore/Cargo.toml new file mode 100644 index 0000000..cc6e292 --- /dev/null +++ b/crates/gitignore/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "gitignore" +version = "0.1.0" +edition.workspace = true +license.workspace = true +authors.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +glob = "0.3" +thiserror = "1" + +[dev-dependencies] +tempdir = "0.3" diff --git a/crates/gitignore/src/lib.rs b/crates/gitignore/src/lib.rs new file mode 100644 index 0000000..3046765 --- /dev/null +++ b/crates/gitignore/src/lib.rs @@ -0,0 +1,212 @@ +use std::{ + fmt::Debug, + fs::{canonicalize, File}, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, +}; + +use glob::{MatchOptions, Pattern}; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("non utf8 path")] + NonUtf8Path, + #[error("glob pattern error: {0}")] + Pattern(#[from] glob::PatternError), +} + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub struct Rule { + negate: bool, + pattern: Pattern, + _source_line: usize, +} + +impl Rule { + pub fn parse( + mut pattern: String, + base_path: impl AsRef, + _source_line: usize, + ) -> Result> { + if pattern.trim().is_empty() || pattern.starts_with('#') { + return Ok(None); + } + let negate = if pattern.starts_with('!') { + pattern.remove(0); + true + } else { + false + }; + let directory = if pattern.ends_with('/') { + pattern.pop(); + true + } else { + false + }; + let anchored = pattern.contains('/'); + let pattern = if anchored { + let base = format!("{}/{pattern}", base_path.as_ref().to_str().unwrap()); + if directory { + format!("{base}/**") + } else { + base + } + } else if !pattern.starts_with("**") { + let base = format!("**/{pattern}"); + if directory { + format!("{base}/**") + } else { + base + } + } else { + pattern + }; + Ok(Some(Self { + negate, + pattern: Pattern::new(&pattern)?, + _source_line, + })) + } +} + +#[derive(Debug)] +pub struct Gitignore { + rules: Vec, + _source_file: PathBuf, +} + +impl Gitignore { + /// Parses a `.gitignore` file at `path`. + /// + /// If `path` is a directory, attempts to read `{dir}/.gitignore`. + pub fn parse(path: impl AsRef) -> Result { + let mut path = canonicalize(path)?; + if path.is_dir() { + path = path.join(".gitignore"); + } + let reader = BufReader::new(File::open(&path)?); + let mut rules = Vec::new(); + for (line_nb, line) in reader.lines().enumerate() { + let line = line?; + if let Some(rule) = Rule::parse(line, path.parent().unwrap(), line_nb + 1)? { + rules.push(rule); + } + } + Ok(Self { + rules, + _source_file: path, + }) + } + + pub fn ignored(&self, path: impl AsRef) -> Result { + let path = canonicalize(path)?; + let match_opts = MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, + }; + for rule in &self.rules { + println!("matching {} to {rule:?}", path.to_str().unwrap()); + let path_str = path.to_str().ok_or(Error::NonUtf8Path)?; + let to_match = if path.is_dir() { + format!("{path_str}/") + } else { + path_str.to_owned() + }; + if rule.pattern.matches_with(&to_match, match_opts) { + return Ok(!rule.negate); + } + } + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Once; + + use super::*; + + static INIT: Once = Once::new(); + + fn create_gitignore(rules: &str, name: &str) -> Gitignore { + INIT.call_once(|| { + std::env::set_current_dir(canonicalize("../..").unwrap()).unwrap(); + }); + std::fs::write(name, rules).unwrap(); + let gitignore = Gitignore::parse(name).unwrap(); + std::fs::remove_file(name).unwrap(); + gitignore + } + + #[test] + fn test_regular_pattern() { + let gitignore = create_gitignore("Cargo.toml", "regular_pattern"); + assert!(gitignore.ignored("Cargo.toml").unwrap()); + assert!(!gitignore.ignored("LICENSE").unwrap()); + } + + #[test] + fn test_glob_pattern() { + let gitignore = create_gitignore("crates/**/Cargo.toml", "glob_pattern"); + assert!(gitignore.ignored("crates/gitignore/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/lsp-client/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/mock_server/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/testbed/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/src/main.rs").unwrap()); + assert!(!gitignore.ignored("crates/lsp-client/src/lib.rs").unwrap()); + assert!(!gitignore.ignored("crates/testbed/src/main.rs").unwrap()); + } + + #[test] + fn test_negate_glob_pattern() { + let gitignore = create_gitignore("!crates/**/Cargo.toml", "negate_glob_pattern"); + assert!(!gitignore.ignored("crates/gitignore/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/lsp-client/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/mock_server/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/testbed/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/src/main.rs").unwrap()); + assert!(!gitignore.ignored("crates/lsp-client/src/lib.rs").unwrap()); + assert!(!gitignore.ignored("crates/testbed/src/main.rs").unwrap()); + } + + #[test] + fn test_start_glob_pattern() { + let gitignore = create_gitignore("**/crates/", "start_glob_pattern"); + assert!(gitignore.ignored("crates/").unwrap()); + assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(gitignore + .ignored("crates/testbed/repositories/simple/src/main.rs") + .unwrap()); + assert!(!gitignore.ignored("xtask/").unwrap()); + assert!(!gitignore.ignored("README.md").unwrap()); + } + + #[test] + fn test_relative_path() { + let gitignore = create_gitignore("crates/", "relative_path"); + assert!(gitignore.ignored("crates/").unwrap()); + assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(gitignore + .ignored("crates/testbed/repositories/simple/src/main.rs") + .unwrap()); + assert!(!gitignore.ignored("xtask/").unwrap()); + assert!(!gitignore.ignored("README.md").unwrap()); + } + + #[test] + fn test_negate_pattern() { + let gitignore = create_gitignore( + "!Cargo.toml\n\ + Cargo.toml", + "negate_pattern", + ); + assert!(!gitignore.ignored("Cargo.toml").unwrap()); + } +} diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index 2df062f..dd12938 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -9,6 +9,12 @@ name = "llm-ls" [dependencies] arrow-array = "47" arrow-schema = "47" +candle = { version = "0.3", package = "candle-core", default-features = false } +candle-nn = "0.3" +candle-transformers = "0.3" +futures-util = "0.3" +gitignore = { path = "../gitignore" } +hf-hub = { version = "0.3", features = ["tokio"] } home = "0.5" ropey = { version = "1.6", default-features = false, features = [ "simd", @@ -20,7 +26,8 @@ reqwest = { version = "0.11", default-features = false, features = [ ] } serde = { version = "1", features = ["derive"] } serde_json = "1" -tokenizers = { version = "0.14", default-features = false, features = ["onig"] } +thiserror = "1" +tokenizers = { version = "0.15", default-features = false, features = ["onig"] } tokio = { version = "1", features = [ "fs", "io-std", @@ -37,8 +44,9 @@ tree-sitter-bash = "0.20" tree-sitter-c = "0.20" tree-sitter-cpp = "0.20" tree-sitter-c-sharp = "0.20" +tree-sitter-css = "0.20" tree-sitter-elixir = "0.1" -tree-sitter-erlang = "0.2" +tree-sitter-erlang = "0.3" tree-sitter-go = "0.20" tree-sitter-html = "0.19" tree-sitter-java = "0.20" @@ -48,6 +56,7 @@ tree-sitter-kotlin = "0.3.1" tree-sitter-lua = "0.0.19" tree-sitter-md = "0.1" tree-sitter-objc = "3" +tree-sitter-php = "0.20.0" tree-sitter-python = "0.20" tree-sitter-r = "0.19" tree-sitter-ruby = "0.20" diff --git a/crates/llm-ls/src/adaptors.rs b/crates/llm-ls/src/adaptors.rs index 553fc87..3564235 100644 --- a/crates/llm-ls/src/adaptors.rs +++ b/crates/llm-ls/src/adaptors.rs @@ -62,6 +62,10 @@ fn build_api_headers(api_token: Option<&String>, ide: Ide) -> Result Result, jsonrpc::Error> { + // TODO: + // APIResponse::Generation(gen) => Ok(vec![gen]), + // APIResponse::Generations(gens) => Ok(gens), + // APIResponse::Error(err) => Err(err), let generations = match serde_json::from_str(text).map_err(internal_error)? { APIResponse::Generation(gen) => vec![gen], APIResponse::Generations(gens) => gens, diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs new file mode 100644 index 0000000..b6cbef3 --- /dev/null +++ b/crates/llm-ls/src/error.rs @@ -0,0 +1,65 @@ +use std::fmt::Display; + +use hf_hub::api::tokio::ApiError; +use tower_lsp::jsonrpc::Error as LspError; +use tracing::error; + +use crate::APIError; + +pub fn internal_error(err: E) -> LspError { + let err_msg = err.to_string(); + error!(err_msg); + LspError { + code: tower_lsp::jsonrpc::ErrorCode::InternalError, + message: err_msg.into(), + data: None, + } +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("backend api error: {0}")] + Api(#[from] APIError), + #[error("arrow error: {0}")] + Arrow(#[from] arrow_schema::ArrowError), + #[error("candle error: {0}")] + Candle(#[from] candle::Error), + #[error("gitignore error: {0}")] + Gitignore(#[from] gitignore::Error), + #[error("hugging face api error: {0}")] + HfApi(#[from] ApiError), + #[error("http error: {0}")] + Http(#[from] reqwest::Error), + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("invalid header value: {0}")] + InvalidHeaderValue(#[from] reqwest::header::InvalidHeaderValue), + #[error("invalid repository id")] + InvalidRepositoryId, + #[error("invalid tokenizer path")] + InvalidTokenizerPath, + #[error("index out of bounds: {0}")] + OutOfBoundIndexing(usize), + #[error("line out of bounds: {0}")] + OutOfBoundLine(usize), + #[error("slice out of bounds: {0}..{1}")] + OutOfBoundSlice(usize, usize), + #[error("rope error: {0}")] + Rope(#[from] ropey::Error), + #[error("serde json error: {0}")] + SerdeJson(#[from] serde_json::Error), + #[error("tokenizer error: {0}")] + Tokenizer(#[from] tokenizers::Error), + #[error("tokio join error: {0}")] + TokioJoin(#[from] tokio::task::JoinError), + #[error("vector db error: {0}")] + VectorDb(#[from] vectordb::error::Error), +} + +pub type Result = std::result::Result; + +impl From for LspError { + fn from(err: Error) -> Self { + internal_error(err) + } +} diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 320519b..2d0679b 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -1,6 +1,8 @@ use adaptors::{adapt_body, adapt_headers, parse_generations}; use document::Document; +use error::{Error, Result}; use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, USER_AGENT}; +use retrieval::SnippetRetriever; use ropey::Rope; use serde::{Deserialize, Deserializer, Serialize}; use std::collections::HashMap; @@ -10,8 +12,10 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokenizers::Tokenizer; use tokio::io::AsyncWriteExt; -use tokio::sync::RwLock; -use tower_lsp::jsonrpc::{Error, Result}; +use tokio::sync::{mpsc, RwLock}; +use tower_lsp::jsonrpc::Result as LspResult; +use tower_lsp::lsp_types::notification::Progress; +use tower_lsp::lsp_types::request::WorkDoneProgressCreate; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer, LspService, Server}; use tracing::{debug, error, info, info_span, warn, Instrument}; @@ -19,8 +23,11 @@ use tracing_appender::rolling; use tracing_subscriber::EnvFilter; use uuid::Uuid; +use crate::error::internal_error; + mod adaptors; mod document; +mod error; mod language_id; mod retrieval; @@ -30,10 +37,10 @@ pub const VERSION: &str = env!("CARGO_PKG_VERSION"); const HF_INFERENCE_API_HOSTNAME: &str = "api-inference.huggingface.co"; fn get_position_idx(rope: &Rope, row: usize, col: usize) -> Result { - Ok(rope.try_line_to_char(row).map_err(internal_error)? + Ok(rope.try_line_to_char(row)? + col.min( rope.get_line(row.min(rope.len_lines().saturating_sub(1))) - .ok_or_else(|| internal_error(format!("failed to find line at {row}")))? + .ok_or(Error::OutOfBoundLine(row))? .len_chars() .saturating_sub(1), )) @@ -82,15 +89,11 @@ fn should_complete(document: &Document, position: Position) -> Result Result &str { + &self.error + } +} + impl Display for APIError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.error) @@ -219,6 +218,10 @@ struct Backend { workspace_folders: Arc>>>, tokenizer_map: Arc>>>, unauthenticated_warn_at: Arc>, + snippet_retriever: Arc>, + supports_progress_bar: Arc>, + cancel_snippet_build_tx: mpsc::Sender<()>, + cancel_snippet_build_rx: Arc>>, } #[derive(Debug, Deserialize, Serialize)] @@ -294,16 +297,6 @@ struct CompletionResult { completions: Vec, } -pub fn internal_error(err: E) -> Error { - let err_msg = err.to_string(); - error!(err_msg); - Error { - code: tower_lsp::jsonrpc::ErrorCode::InternalError, - message: err_msg.into(), - data: None, - } -} - fn build_prompt( pos: Position, text: &Rope, @@ -332,10 +325,7 @@ fn build_prompt( if let Some(before_line) = before_line { let before_line = before_line.to_string(); let tokens = if let Some(tokenizer) = tokenizer.clone() { - tokenizer - .encode(before_line.clone(), false) - .map_err(internal_error)? - .len() + tokenizer.encode(before_line.clone(), false)?.len() } else { before_line.len() }; @@ -348,10 +338,7 @@ fn build_prompt( if let Some(after_line) = after_line { let after_line = after_line.to_string(); let tokens = if let Some(tokenizer) = tokenizer.clone() { - tokenizer - .encode(after_line.clone(), false) - .map_err(internal_error)? - .len() + tokenizer.encode(after_line.clone(), false)?.len() } else { after_line.len() }; @@ -387,10 +374,7 @@ fn build_prompt( } let line = line.to_string(); let tokens = if let Some(tokenizer) = tokenizer.clone() { - tokenizer - .encode(line.clone(), false) - .map_err(internal_error)? - .len() + tokenizer.encode(line.clone(), false)?.len() } else { line.len() }; @@ -425,8 +409,7 @@ async fn request_completion( .json(&json) .headers(headers) .send() - .await - .map_err(internal_error)?; + .await?; let model = ¶ms.model; let generations = parse_generations( @@ -437,7 +420,7 @@ async fn request_completion( info!( model, compute_generations_ms = time, - generations = serde_json::to_string(&generations).map_err(internal_error)?, + generations = serde_json::to_string(&generations)?, "{model} computed generations in {time} ms" ); generations @@ -483,20 +466,13 @@ async fn download_tokenizer_file( if to.as_ref().exists() { return Ok(()); } - tokio::fs::create_dir_all( - to.as_ref() - .parent() - .ok_or_else(|| internal_error("invalid tokenizer path"))?, - ) - .await - .map_err(internal_error)?; + tokio::fs::create_dir_all(to.as_ref().parent().ok_or(Error::InvalidTokenizerPath)?).await?; let headers = build_headers(api_token, ide)?; let mut file = tokio::fs::OpenOptions::new() .write(true) .create(true) .open(to) - .await - .map_err(internal_error)?; + .await?; let http_client = http_client.clone(); let url = url.to_owned(); tokio::spawn(async move { @@ -528,8 +504,7 @@ async fn download_tokenizer_file( } }; }) - .await - .map_err(internal_error)?; + .await?; Ok(()) } @@ -555,7 +530,15 @@ async fn get_tokenizer( } }, TokenizerConfig::HuggingFace { repository } => { - let path = cache_dir.as_ref().join(repository).join("tokenizer.json"); + let (org, repo) = repository + .split_once('/') + .map(|(org, repo)| (org, repo)) + .ok_or(Error::InvalidRepositoryId)?; + let path = cache_dir + .as_ref() + .join(org) + .join(repo) + .join("tokenizer.json"); let url = format!("https://huggingface.co/{repository}/resolve/main/tokenizer.json"); download_tokenizer_file(http_client, &url, api_token, &path, ide).await?; @@ -596,7 +579,7 @@ fn build_url(model: &str) -> String { } impl Backend { - async fn get_completions(&self, params: CompletionParams) -> Result { + async fn get_completions(&self, params: CompletionParams) -> LspResult { let request_id = Uuid::new_v4(); let span = info_span!("completion_request", %request_id); async move { @@ -673,7 +656,7 @@ impl Backend { }.instrument(span).await } - async fn accept_completion(&self, accepted: AcceptedCompletion) -> Result<()> { + async fn accept_completion(&self, accepted: AcceptedCompletion) -> LspResult<()> { info!( request_id = %accepted.request_id, accepted_position = accepted.accepted_completion, @@ -683,7 +666,7 @@ impl Backend { Ok(()) } - async fn reject_completion(&self, rejected: RejectedCompletion) -> Result<()> { + async fn reject_completion(&self, rejected: RejectedCompletion) -> LspResult<()> { info!( request_id = %rejected.request_id, shown_completions = serde_json::to_string(&rejected.shown_completions).map_err(internal_error)?, @@ -695,8 +678,23 @@ impl Backend { #[tower_lsp::async_trait] impl LanguageServer for Backend { - async fn initialize(&self, params: InitializeParams) -> Result { - *self.workspace_folders.write().await = params.workspace_folders; + async fn initialize(&self, params: InitializeParams) -> LspResult { + *self.workspace_folders.write().await = params.workspace_folders.clone(); + *self.supports_progress_bar.write().await = params + .capabilities + .window + .map(|window| window.work_done_progress.unwrap_or(false)) + .unwrap_or(false); + let position_encoding = params.capabilities.general.and_then(|general_cap| { + general_cap.position_encodings.and_then(|encodings| { + if encodings.contains(&PositionEncodingKind::UTF8) { + Some(PositionEncodingKind::UTF8) + } else { + // self.client.show_message(MessageType::WARNING, "llm-ls only supports UTF-8 position encoding, defaulting to UTF-16 which might cause offsetting errors").await; + None + } + }) + }); Ok(InitializeResult { server_info: Some(ServerInfo { name: "llm-ls".to_owned(), @@ -706,12 +704,78 @@ impl LanguageServer for Backend { text_document_sync: Some(TextDocumentSyncCapability::Kind( TextDocumentSyncKind::INCREMENTAL, )), + position_encoding, ..Default::default() }, }) } async fn initialized(&self, _: InitializedParams) { + let client = self.client.clone(); + let snippet_retriever = self.snippet_retriever.clone(); + let supports_progress_bar = self.supports_progress_bar.clone(); + let workspace_folders = self.workspace_folders.clone(); + let token = NumberOrString::Number(42); + + let token_copy = NumberOrString::Number(42); + let handle = tokio::spawn(async move { + let guard = workspace_folders.read().await; + if let Some(workspace_folders) = guard.as_ref() { + if *supports_progress_bar.read().await { + match client + .send_request::(WorkDoneProgressCreateParams { + token: token.clone(), + }) + .await + { + Ok(_) => (), + Err(err) => { + error!("err: {err}"); + return; + } + }; + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( + WorkDoneProgressBegin { + title: "creating workspace embeddings".to_owned(), + ..Default::default() + }, + )), + }) + .await; + } + snippet_retriever + .write() + .await + .build_workspace_snippets( + client.clone(), + token.clone(), + workspace_folders[0].uri.path(), + ) + .await + .expect("failed to build workspace snippets"); + } + }); + let mut guard = self.cancel_snippet_build_rx.write().await; + tokio::select! { + _ = handle => { + if *self.supports_progress_bar.read().await { + self.client + .send_notification::(ProgressParams { + token: token_copy, + value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( + WorkDoneProgressEnd { + ..Default::default() + }, + )), + }) + .await; + } + }, + _ = guard.recv() => return, + } self.client .log_message(MessageType::INFO, "llm-ls initialized") .await; @@ -759,6 +823,7 @@ impl LanguageServer for Backend { if let Some(doc) = doc { for change in ¶ms.content_changes { if let Some(range) = change.range { + // TODO: self.snippet_retriever.write().await.update_document(uri).await?; match doc.change(range, &change.text).await { Ok(()) => info!("{uri} changed"), Err(err) => error!("error when changing {uri}: {err}"), @@ -790,8 +855,12 @@ impl LanguageServer for Backend { info!("{uri} closed"); } - async fn shutdown(&self) -> Result<()> { + async fn shutdown(&self) -> LspResult<()> { debug!("shutdown"); + self.cancel_snippet_build_tx + .send(()) + .await + .map_err(internal_error)?; Ok(()) } } @@ -799,15 +868,12 @@ impl LanguageServer for Backend { fn build_headers(api_token: Option<&String>, ide: Ide) -> Result { let mut headers = HeaderMap::new(); let user_agent = format!("{NAME}/{VERSION}; rust/unknown; ide/{ide:?}"); - headers.insert( - USER_AGENT, - HeaderValue::from_str(&user_agent).map_err(internal_error)?, - ); + headers.insert(USER_AGENT, HeaderValue::from_str(&user_agent)?); if let Some(api_token) = api_token { headers.insert( AUTHORIZATION, - HeaderValue::from_str(&format!("Bearer {api_token}")).map_err(internal_error)?, + HeaderValue::from_str(&format!("Bearer {api_token}"))?, ); } @@ -847,6 +913,12 @@ async fn main() { .build() .expect("failed to build reqwest unsafe client"); + let snippet_retriever = Arc::new(RwLock::new( + SnippetRetriever::new(cache_dir.clone(), 20, 10) + .await + .expect("failed to initialise snippet retriever"), + )); + let (cancel_snippet_build_tx, rx) = mpsc::channel(1); let (service, socket) = LspService::build(|client| Backend { cache_dir, client, @@ -860,6 +932,10 @@ async fn main() { .checked_sub(MAX_WARNING_REPEAT) .expect("instant to be in bounds"), )), + snippet_retriever, + supports_progress_bar: Arc::new(RwLock::new(false)), + cancel_snippet_build_tx, + cancel_snippet_build_rx: Arc::new(RwLock::new(rx)), }) .custom_method("llm-ls/getCompletions", Backend::get_completions) .custom_method("llm-ls/acceptCompletion", Backend::accept_completion) diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index f207a8f..e7d6da6 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -1,31 +1,217 @@ -use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt32Array}; +use crate::error::Result; +use arrow_array::builder::ListBuilder; +use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, StringArray, UInt32Array}; use arrow_schema::{DataType, Field, Schema}; +use candle::utils::{cuda_is_available, metal_is_available}; +use candle::{Device, Tensor}; +use candle_nn::VarBuilder; +use candle_transformers::models::bert::{BertModel, Config, DTYPE}; +use futures_util::StreamExt; +use gitignore::Gitignore; +use hf_hub::{api::tokio::Api, Repo, RepoType}; +use std::collections::VecDeque; +use std::path::Path; use std::{path::PathBuf, sync::Arc}; +use tokenizers::Tokenizer; +use tokio::io::AsyncReadExt; +use tokio::task::spawn_blocking; +use tower_lsp::lsp_types::notification::Progress; +use tower_lsp::lsp_types::{ + NumberOrString, ProgressParams, ProgressParamsValue, Range, WorkDoneProgress, + WorkDoneProgressReport, +}; +use tower_lsp::Client; +use tracing::{error, info, warn}; use vectordb::{Database, Table}; +// TODO: +// - create sliding window and splitting of files logic +// - handle ipynb +// - handle updates + +async fn file_is_empty(file_path: impl AsRef) -> Result { + let mut content = String::new(); + tokio::fs::File::open(&file_path) + .await? + .read_to_string(&mut content) + .await?; + Ok(content.trim().is_empty()) +} + +fn is_code_file(file_name: &Path) -> bool { + let code_extensions = [ + "ada", + "adb", + "ads", + "c", + "h", + "cpp", + "hpp", + "cc", + "cxx", + "hxx", + "cs", + "css", + "scss", + "sass", + "less", + "java", + "js", + "jsx", + "ts", + "tsx", + "php", + "phtml", + "html", + "xml", + "json", + "yaml", + "yml", + "ini", + "toml", + "cfg", + "conf", + "sh", + "bash", + "zsh", + "ps1", + "psm1", + "bat", + "cmd", + "py", + "rb", + "swift", + "pl", + "pm", + "t", + "r", + "rs", + "go", + "kt", + "kts", + "sql", + "md", + "markdown", + "txt", + "lua", + "ex", + "exs", + "erl", + "rb", + "scala", + "sc", + "ml", + "mli", + "zig", + "clj", + "cljs", + "cljc", + "cljx", + "cr", + "Dockerfile", + "fs", + "fsi", + "fsx", + "hs", + "lhs", + "groovy", + "jsonnet", + "jl", + "nim", + "rkt", + "scm", + "tf", + "nix", + "vue", + "svelte", + "lisp", + "lsp", + "el", + "elc", + "eln", + ]; + + let extension = file_name.extension().and_then(|ext| ext.to_str()); + + if let Some(ext) = extension { + code_extensions.contains(&ext.to_lowercase().as_str()) + } else { + false + } +} + +async fn build_model_and_tokenizer() -> Result<(BertModel, Tokenizer)> { + let device = device(false)?; + let model_id = "bigcode/starencoder".to_string(); + let revision = "main".to_string(); + let repo = Repo::with_revision(model_id, RepoType::Model, revision); + let (config_filename, tokenizer_filename, weights_filename) = { + let api = Api::new()?; + let api = api.repo(repo); + let config = api.get("config.json").await?; + let tokenizer = api.get("tokenizer.json").await?; + let weights = api.get("pytorch_model.bin").await?; + (config, tokenizer, weights) + }; + let config = tokio::fs::read_to_string(config_filename).await?; + let config: Config = serde_json::from_str(&config)?; + let tokenizer = Tokenizer::from_file(tokenizer_filename)?; + + let vb = VarBuilder::from_pth(&weights_filename, DTYPE, &device)?; + let model = BertModel::load(vb, &config)?; + Ok((model, tokenizer)) +} + +fn device(cpu: bool) -> Result { + if cpu { + Ok(Device::Cpu) + } else if cuda_is_available() { + Ok(Device::new_cuda(0)?) + } else if metal_is_available() { + Ok(Device::new_metal(0)?) + } else { + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + warn!("Running on CPU, to run on GPU(metal), use the `-metal` binary"); + } + #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))] + { + warn!("Running on CPU, to run on GPU, use the `-cuda` binary"); + } + Ok(Device::Cpu) + } +} + async fn initialse_database(cache_path: PathBuf) -> Table { let uri = cache_path.join("database"); let db = Database::connect(uri.to_str().expect("path should be utf8")) .await .expect("failed to open database"); - let table = match db.open_table("code-slices").await { + match db.open_table("code-slices").await { Ok(table) => table, Err(vectordb::error::Error::TableNotFound { .. }) => { let schema = Schema::new(vec![ - Field::new("workspace_root", DataType::Utf8, false), + Field::new( + "vector", + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::Float32, true)), + 768, + ), + false, + ), + Field::new("content", DataType::Utf8, false), Field::new("file_url", DataType::Utf8, false), Field::new("start_line_no", DataType::UInt32, false), Field::new("end_line_no", DataType::UInt32, false), - Field::new("window_size", DataType::UInt32, false), ]); let batch = RecordBatch::try_new( Arc::new(schema), vec![ + Arc::new(ListBuilder::new(Float32Array::builder(768)).finish()), Arc::new(StringArray::from(Vec::<&str>::new())), Arc::new(StringArray::from(Vec::<&str>::new())), Arc::new(UInt32Array::from(Vec::::new())), Arc::new(UInt32Array::from(Vec::::new())), - Arc::new(UInt32Array::from(Vec::::new())), ], ) .expect("failure while defining schema"); @@ -38,25 +224,196 @@ async fn initialse_database(cache_path: PathBuf) -> Table { .expect("failed to create table") } Err(err) => panic!("error while opening table: {}", err), - }; - table + } } pub(crate) struct SnippetRetriever { db: Table, + model: Arc, + tokenizer: Tokenizer, + window_size: usize, + window_step: usize, } impl SnippetRetriever { /// # Panics /// /// Panics if the database cannot be initialised. - pub(crate) async fn new(cache_path: PathBuf) -> Self { - Self { + pub(crate) async fn new( + cache_path: PathBuf, + window_size: usize, + window_step: usize, + ) -> Result { + let (model, tokenizer) = build_model_and_tokenizer().await?; + Ok(Self { db: initialse_database(cache_path).await, + model: Arc::new(model), + tokenizer, + window_size, + window_step, + }) + } + + pub(crate) async fn build_workspace_snippets( + &mut self, + client: Client, + token: NumberOrString, + workspace_root: &str, + ) -> Result<()> { + let workspace_root = PathBuf::from(workspace_root); + let mut files = Vec::new(); + let gitignore = Gitignore::parse(&workspace_root)?; + + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( + WorkDoneProgressReport { + message: Some("listing workspace files".to_owned()), + ..Default::default() + }, + )), + }) + .await; + let mut stack = VecDeque::new(); + stack.push_back(workspace_root); + while let Some(src) = stack.pop_back() { + let mut entries = tokio::fs::read_dir(&src).await?; + while let Some(entry) = entries.next_entry().await? { + let entry_type = entry.file_type().await?; + + let src_path = entry.path(); + + if gitignore.ignored(&src_path)? { + continue; + } + + if entry_type.is_dir() { + stack.push_back(src_path); + } else if entry_type.is_file() + && is_code_file(&src_path) + && !file_is_empty(&src_path).await? + { + files.push(src_path); + } + } + } + for (i, file) in files.iter().enumerate() { + let file_url = file.to_str().expect("file path should be utf8").to_string(); + info!("adding {file_url} to embeddings"); + self.add_document(file_url).await?; + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( + WorkDoneProgressReport { + message: Some(format!("({i}/{}) done", files.len())), + ..Default::default() + }, + )), + }) + .await; } + + Ok(()) } - pub(crate) async fn build_workspace_snippets(workspace_root: String) {} + pub(crate) async fn add_document(&mut self, file_url: String) -> Result<()> { + let file = tokio::fs::read_to_string(&file_url).await?; + let lines = file.split('\n').collect::>(); + let mut embeddings = ListBuilder::new(Float32Array::builder(768)); + let mut snippets = Vec::new(); + let mut file_urls = Vec::new(); + let mut start_line_no = Vec::new(); + let mut end_line_no = Vec::new(); + for start_line in (0..lines.len()).step_by(self.window_step) { + let end_line = (start_line + self.window_size - 1).min(lines.len()); + if self + .exists(format!( + "file_url = {file_url} AND start_line_no = {start_line} AND end_line_no = {end_line}" + )) + .await? + { + continue; + } + let window = lines[start_line..end_line].to_vec(); + let snippet = window.join("\n"); + let tokenizer = self + .tokenizer + .with_padding(None) + .with_truncation(None)? + .clone(); + let model = self.model.clone(); + let snippet_clone = snippet.clone(); + let result = spawn_blocking(move || -> Result> { + let tokens = tokenizer.encode(snippet_clone, true)?.get_ids().to_vec(); + let token_ids = Tensor::new(&tokens[..], &model.device)?.unsqueeze(0)?; + let token_type_ids = token_ids.zeros_like()?; + let embedding = model.forward(&token_ids, &token_type_ids)?; + let (_n_sentence, n_tokens, _hidden_size) = embedding.dims3()?; + let embedding = (embedding.sum(1)? / (n_tokens as f64))?; + let embedding = embedding.get(0)?.to_vec1::()?; + Ok(embedding) + }) + .await?; + let embedding = match result { + Ok(e) => e, + Err(err) => { + error!( + "error generating embedding for {file_url}[{start_line}, {end_line}]: {err}", + ); + continue; + } + }; + embeddings.values().append_slice(&embedding); + embeddings.append(true); + snippets.push(snippet.clone()); + file_urls.push(file_url.clone()); + start_line_no.push(start_line as u32); + end_line_no.push(end_line as u32); + } - pub(crate) async fn update_document(file_url: String) {} + let batch = RecordBatch::try_new( + self.db.schema(), + vec![ + Arc::new(embeddings.finish()), + Arc::new(StringArray::from(snippets)), + Arc::new(StringArray::from(file_urls)), + Arc::new(UInt32Array::from(start_line_no)), + Arc::new(UInt32Array::from(end_line_no)), + ], + )?; + self.db + .add( + RecordBatchIterator::new(vec![batch].into_iter().map(Ok), self.db.schema()), + None, + ) + .await?; + Ok(()) + } + + pub(crate) async fn update_document(&mut self, file_url: String, range: Range) { + // TODO: + // - delete elements matching Range + // - keep the smallest start line to create new windows from + // - build new windows based on range + // - insert them into table + } + + pub(crate) async fn exists(&self, filter: String) -> Result { + let mut results = self + .db + .search(Float32Array::from(vec![0.0; 768])) + .filter(Some(filter)) + .execute() + .await?; + let first = results.next().await; + let exists = first.is_some(); + info!("exists: {exists}"); + Ok(exists) + } + + pub(crate) async fn search(&self, snippet: String, filter: &str) -> Result { + Ok("toto".to_string()) + } } From c8b77a9b511678bf2998ccdf0a1f951b3935f92b Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Sun, 28 Jan 2024 22:32:37 +0100 Subject: [PATCH 03/22] feat: add suppot for tcp connections to enable debugging --- Cargo.lock | 541 +++++++++++++++++---------------- crates/gitignore/src/lib.rs | 1 - crates/llm-ls/Cargo.toml | 17 +- crates/llm-ls/src/main.rs | 37 ++- crates/llm-ls/src/retrieval.rs | 82 +++-- 5 files changed, 388 insertions(+), 290 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6eda475..e3d7358 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,21 +51,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - [[package]] name = "allocator-api2" version = "0.2.16" @@ -155,9 +140,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" +checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" dependencies = [ "ahash", "arrow-arith", @@ -177,9 +162,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956" +checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -192,9 +177,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" +checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" dependencies = [ "ahash", "arrow-buffer", @@ -209,9 +194,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195" +checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" dependencies = [ "bytes", "half", @@ -220,15 +205,16 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee" +checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "base64 0.21.5", "chrono", "comfy-table", "half", @@ -238,9 +224,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ef855dc6b126dc197f43e061d4de46b9d4c033aa51c2587657f7508242cef1" +checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" dependencies = [ "arrow-array", "arrow-buffer", @@ -257,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a" +checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" dependencies = [ "arrow-buffer", "arrow-schema", @@ -269,9 +255,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a" +checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" dependencies = [ "arrow-array", "arrow-buffer", @@ -279,14 +265,14 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", - "zstd 0.12.4", + "zstd 0.13.0", ] [[package]] name = "arrow-json" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03d7e3b04dd688ccec354fe449aed56b831679f03e44ee2c1cfc4045067b69c" +checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" dependencies = [ "arrow-array", "arrow-buffer", @@ -304,9 +290,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4" +checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" dependencies = [ "arrow-array", "arrow-buffer", @@ -319,9 +305,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" +checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" dependencies = [ "ahash", "arrow-array", @@ -334,15 +320,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" +checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" [[package]] name = "arrow-select" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" +checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" dependencies = [ "ahash", "arrow-array", @@ -354,9 +340,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "47.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cebbb282d6b9244895f4a9a912e55e57bce112554c7fa91fcec5459cb421ab" +checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" dependencies = [ "arrow-array", "arrow-buffer", @@ -365,7 +351,7 @@ dependencies = [ "arrow-select", "num", "regex", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] @@ -894,27 +880,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "brotli" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - [[package]] name = "bumpalo" version = "3.14.0" @@ -1405,9 +1370,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7014432223f4d721cb9786cd88bb89e7464e0ba984d4a7f49db7787f5f268674" +checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0" dependencies = [ "ahash", "arrow", @@ -1429,13 +1394,11 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools", + "itertools 0.12.0", "log", "num_cpus", - "object_store", + "object_store 0.8.0", "parking_lot", - "parquet", - "percent-encoding", "pin-project-lite", "rand 0.8.5", "sqlparser", @@ -1448,9 +1411,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3903ed8f102892f17b48efa437f3542159241d41c564f0d1e78efdc5e663aa" +checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613" dependencies = [ "ahash", "arrow", @@ -1459,17 +1422,17 @@ dependencies = [ "arrow-schema", "chrono", "half", + "libc", "num_cpus", - "object_store", - "parquet", + "object_store 0.8.0", "sqlparser", ] [[package]] name = "datafusion-execution" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "780b73b2407050e53f51a9781868593f694102c59e622de9a8aafc0343c4f237" +checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d" dependencies = [ "arrow", "chrono", @@ -1479,7 +1442,7 @@ dependencies = [ "futures", "hashbrown", "log", - "object_store", + "object_store 0.8.0", "parking_lot", "rand 0.8.5", "tempfile", @@ -1488,14 +1451,15 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24c382676338d8caba6c027ba0da47260f65ffedab38fda78f6d8043f607557c" +checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e" dependencies = [ "ahash", "arrow", "arrow-array", "datafusion-common", + "paste", "sqlparser", "strum", "strum_macros", @@ -1503,9 +1467,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f2904a432f795484fd45e29ded4537152adb60f636c05691db34fcd94c92c96" +checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618" dependencies = [ "arrow", "async-trait", @@ -1514,21 +1478,22 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown", - "itertools", + "itertools 0.12.0", "log", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] name = "datafusion-physical-expr" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b4968e9a998dc0476c4db7a82f280e2026b25f464e4aa0c3bb9807ee63ddfd" +checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", + "arrow-ord", "arrow-schema", "base64 0.21.5", "blake2", @@ -1540,8 +1505,7 @@ dependencies = [ "hashbrown", "hex", "indexmap", - "itertools", - "libc", + "itertools 0.12.0", "log", "md-5", "paste", @@ -1555,9 +1519,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd0d1fe54e37a47a2d58a1232c22786f2c28ad35805fdcd08f0253a8b0aaa90" +checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631" dependencies = [ "ahash", "arrow", @@ -1574,7 +1538,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools", + "itertools 0.12.0", "log", "once_cell", "parking_lot", @@ -1586,9 +1550,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "32.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b568d44c87ead99604d704f942e257c8a236ee1bbf890ee3e034ad659dcb2c21" +checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5" dependencies = [ "arrow", "arrow-schema", @@ -2029,8 +1993,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -2327,12 +2293,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "ipnet" version = "2.9.0" @@ -2348,6 +2308,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -2374,9 +2343,9 @@ dependencies = [ [[package]] name = "lance" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de45410d4b9564d1479aa9ce5098c1777ded4fa368ba46f80b06a5aa69e10ee0" +checksum = "9ec22c8a248c4d43df70cdb9cfb37dbf9ef52a78fe22f35e52f18ec4ad071fae" dependencies = [ "arrow", "arrow-arith", @@ -2401,24 +2370,28 @@ dependencies = [ "chrono", "dashmap", "datafusion", + "datafusion-physical-expr", "futures", "half", "http", + "itertools 0.12.0", "lance-arrow", "lance-core", "lance-datafusion", "lance-datagen", + "lance-file", "lance-index", + "lance-io", "lance-linalg", + "lance-table", "lazy_static", "log", "lru_time_cache", "moka", - "nohash-hasher", "num-traits", "num_cpus", - "object_store", - "ordered-float 3.9.2", + "object_store 0.9.0", + "ordered-float", "pin-project", "prost", "prost-build", @@ -2438,15 +2411,16 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "492886fc285f0fbcc114141cb0a214babf4c6e623391f36a5e95a9c65b9b8a86" +checksum = "2c1fa4216265df12914512e27f69e31ca75f1d05380150e1b66552f85da55553" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "getrandom", "half", "num-traits", "rand 0.8.5", @@ -2455,9 +2429,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fa37b4c270d49c0cbb1d0c826f85057aafc7b8dc560888fcbc9c6d168d3b950" +checksum = "66139c54511551e35f44ddea0b38a5863f5945896f462fbc696ff6edcff7da84" dependencies = [ "arrow-arith", "arrow-array", @@ -2469,9 +2443,6 @@ dependencies = [ "arrow-select", "async-recursion", "async-trait", - "aws-config", - "aws-credential-types", - "aws-sdk-dynamodb", "byteorder", "bytes", "chrono", @@ -2486,7 +2457,7 @@ dependencies = [ "moka", "num-traits", "num_cpus", - "object_store", + "object_store 0.9.0", "pin-project", "prost", "prost-build", @@ -2505,13 +2476,15 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88c5f299e03fa65505d8d0a206ad9fe51871fbd7d9ee9859e3691677ee2b2104" +checksum = "49ce73fa8ab31b108522e5c5867d11619fef9c8355f419754b7d8d190c07b9f4" dependencies = [ "arrow", "arrow-array", + "arrow-ord", "arrow-schema", + "async-trait", "datafusion", "datafusion-common", "datafusion-expr", @@ -2519,13 +2492,14 @@ dependencies = [ "futures", "lance-arrow", "lance-core", + "tokio", ] [[package]] name = "lance-datagen" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b673feb8202b44bf740f495aeb26e902d172dc12af7a93e0083b9d87e1a82980" +checksum = "dc8a9ccb2cdddf3b721b9a431e5b5069b3a5dfbaa3a2baa4bc1c7c0498326bc7" dependencies = [ "arrow", "arrow-array", @@ -2536,11 +2510,44 @@ dependencies = [ "rand_xoshiro", ] +[[package]] +name = "lance-file" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98387578b9b9841120fea2a574842ca5c3cce4db687b745d6a9b07452d3a0484" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "byteorder", + "bytes", + "datafusion-common", + "futures", + "lance-arrow", + "lance-core", + "lance-io", + "num-traits", + "num_cpus", + "object_store 0.9.0", + "prost", + "prost-build", + "roaring", + "snafu", + "tokio", + "tracing", +] + [[package]] name = "lance-index" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16e3b195987247db991ca339320b5c0c2d6a6aa0b800503e8b0c9fd0144edcc" +checksum = "495973c420530d8673ac1a0aa287e8986345a76f45b5a363513032040d5c6e74" dependencies = [ "arrow", "arrow-arith", @@ -2559,31 +2566,72 @@ dependencies = [ "lance-arrow", "lance-core", "lance-datafusion", + "lance-file", + "lance-io", "lance-linalg", + "lance-table", "log", - "nohash-hasher", "num-traits", "num_cpus", - "object_store", + "object_store 0.9.0", "pin-project", "prost", "prost-build", "rand 0.8.5", "roaring", + "rustc_version", "serde", "serde_json", "snafu", + "tempfile", "tokio", "tracing", ] +[[package]] +name = "lance-io" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d332fc360fd44fa141416ad7ed039a7529ffff045c3c23427b9b38fac19e8f7e" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "aws-config", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "futures", + "lance-arrow", + "lance-core", + "lazy_static", + "num_cpus", + "object_store 0.9.0", + "pin-project", + "prost", + "prost-build", + "shellexpand", + "snafu", + "tokio", + "tracing", + "url", +] + [[package]] name = "lance-linalg" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9303c6c271355dae3963f06b74671f4de4e9356ae1fef2b9ae0fe7dff2ffabc" +checksum = "f7110faad69079b4abad8cf2e754458ad2fde7b3860b0ba59ea6f85b54c60280" dependencies = [ "arrow-array", + "arrow-ord", "arrow-schema", "arrow-select", "cc", @@ -2598,11 +2646,49 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-table" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b729632272b766e84447a70a56e7fec4a3b8ad4035367ab2f41cfcad44dc6cd4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", + "async-trait", + "aws-credential-types", + "aws-sdk-dynamodb", + "byteorder", + "bytes", + "chrono", + "futures", + "lance-arrow", + "lance-core", + "lance-file", + "lance-io", + "lazy_static", + "log", + "object_store 0.9.0", + "prost", + "prost-build", + "prost-types", + "rand 0.8.5", + "roaring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "uuid", +] + [[package]] name = "lance-testing" -version = "0.8.17" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a54bf786b0fe66d134ab8705b191a057ed2f8a656988cd5950793f2a8b1727a8" +checksum = "b71d1b344dd4f25b4f2e3279c4abfeee13c5e88d52fe6818dce57e59c0d29c42" dependencies = [ "arrow-array", "arrow-schema", @@ -2719,10 +2805,13 @@ dependencies = [ "candle-core", "candle-nn", "candle-transformers", + "clap", "futures-util", "gitignore", "hf-hub", "home", + "lance-linalg", + "lsp-client", "reqwest", "ropey", "serde", @@ -2809,26 +2898,6 @@ dependencies = [ "url", ] -[[package]] -name = "lz4" -version = "1.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "mach2" version = "0.4.1" @@ -3021,12 +3090,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "nohash-hasher" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" - [[package]] name = "nom" version = "7.1.3" @@ -3152,9 +3215,30 @@ dependencies = [ [[package]] name = "object_store" -version = "0.7.1" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.11.0", + "parking_lot", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" +checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" dependencies = [ "async-trait", "base64 0.21.5", @@ -3163,14 +3247,14 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools", + "itertools 0.12.0", "parking_lot", "percent-encoding", "quick-xml", "rand 0.8.5", "reqwest", - "ring 0.16.20", - "rustls-pemfile", + "ring 0.17.5", + "rustls-pemfile 2.0.0", "serde", "serde_json", "snafu", @@ -3258,15 +3342,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "3.9.2" @@ -3311,40 +3386,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "parquet" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.21.5", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "hashbrown", - "lz4", - "num", - "num-bigint", - "object_store", - "paste", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd 0.12.4", -] - [[package]] name = "parse-zoneinfo" version = "0.3.0" @@ -3554,7 +3595,7 @@ checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" dependencies = [ "bytes", "heck", - "itertools", + "itertools 0.11.0", "log", "multimap", "once_cell", @@ -3575,7 +3616,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" dependencies = [ "anyhow", - "itertools", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.39", @@ -3631,9 +3672,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" dependencies = [ "memchr", "serde", @@ -3751,7 +3792,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" dependencies = [ "either", - "itertools", + "itertools 0.11.0", "rayon", ] @@ -3885,7 +3926,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile", + "rustls-native-certs", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", @@ -4013,7 +4055,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "schannel", "security-framework", ] @@ -4027,6 +4069,22 @@ dependencies = [ "base64 0.21.5", ] +[[package]] +name = "rustls-pemfile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" +dependencies = [ + "base64 0.21.5", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -4333,12 +4391,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "snap" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" - [[package]] name = "socket2" version = "0.4.10" @@ -4385,9 +4437,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.38.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" dependencies = [ "log", "sqlparser_derive", @@ -4395,13 +4447,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.1.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.39", ] [[package]] @@ -4603,17 +4655,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float 2.10.1", -] - [[package]] name = "time" version = "0.3.30" @@ -4677,7 +4718,7 @@ dependencies = [ "derive_builder", "esaxx-rs", "getrandom", - "itertools", + "itertools 0.11.0", "lazy_static", "log", "macro_rules_attribute", @@ -4995,9 +5036,9 @@ dependencies = [ [[package]] name = "tree-sitter-erlang" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e812f0b7cf3ee07049dd4433c420dec32e421467084456fcd4c6151bf6817b07" +checksum = "93ced5145ebb17f83243bf055b74e108da7cc129e12faab4166df03f59b287f4" dependencies = [ "cc", "tree-sitter", @@ -5095,9 +5136,9 @@ dependencies = [ [[package]] name = "tree-sitter-php" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b689aaa57bd1f0707e5c0728004e7f737b16768644a7e745d23021330797de" +checksum = "0db3788e709a5adfb583683a4b686a084e41a0f9e5a2fcb9a8e358f11481036a" dependencies = [ "cc", "tree-sitter", @@ -5155,9 +5196,9 @@ dependencies = [ [[package]] name = "tree-sitter-swift" -version = "0.3.6" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eee2dbeb101a88a1d9e4883e3fbda6c799cf676f6a1cf59e4fc3862e67e70118" +checksum = "452e6ee0a14b82a0dcd93400b8d3fe3784fdbd775191a89ef84586e5ccec6be7" dependencies = [ "cc", "tree-sitter", @@ -5185,16 +5226,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - [[package]] name = "typenum" version = "1.17.0" @@ -5350,14 +5381,15 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vectordb" -version = "0.3.8" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "975a735e82c92c7a8597b837ad62fdb1b60c0d0ac5a139a2f07a03a484cebf77" +checksum = "8933a0e6f7862f994ef34221d767a511941155b30f72d0dc112ada028a1e7f74" dependencies = [ "arrow", "arrow-array", "arrow-cast", "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "async-trait", @@ -5371,7 +5403,9 @@ dependencies = [ "lance-testing", "log", "num-traits", - "object_store", + "object_store 0.9.0", + "serde", + "serde_json", "snafu", "tokio", "url", @@ -5935,11 +5969,11 @@ dependencies = [ [[package]] name = "zstd" -version = "0.12.4" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ - "zstd-safe 6.0.6", + "zstd-safe 7.0.0", ] [[package]] @@ -5954,11 +5988,10 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.6" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" dependencies = [ - "libc", "zstd-sys", ] diff --git a/crates/gitignore/src/lib.rs b/crates/gitignore/src/lib.rs index 3046765..805ddbe 100644 --- a/crates/gitignore/src/lib.rs +++ b/crates/gitignore/src/lib.rs @@ -110,7 +110,6 @@ impl Gitignore { require_literal_leading_dot: false, }; for rule in &self.rules { - println!("matching {} to {rule:?}", path.to_str().unwrap()); let path_str = path.to_str().ok_or(Error::NonUtf8Path)?; let to_match = if path.is_dir() { format!("{path_str}/") diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index dd12938..8011649 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -7,15 +7,17 @@ edition = "2021" name = "llm-ls" [dependencies] -arrow-array = "47" -arrow-schema = "47" +arrow-array = "49" +arrow-schema = "49" candle = { version = "0.3", package = "candle-core", default-features = false } candle-nn = "0.3" candle-transformers = "0.3" +clap = { version = "4", features = ["derive"] } futures-util = "0.3" gitignore = { path = "../gitignore" } hf-hub = { version = "0.3", features = ["tokio"] } home = "0.5" +lance-linalg = "0.9.9" ropey = { version = "1.6", default-features = false, features = [ "simd", "cr_lines", @@ -46,7 +48,7 @@ tree-sitter-cpp = "0.20" tree-sitter-c-sharp = "0.20" tree-sitter-css = "0.20" tree-sitter-elixir = "0.1" -tree-sitter-erlang = "0.3" +tree-sitter-erlang = "0.4" tree-sitter-go = "0.20" tree-sitter-html = "0.19" tree-sitter-java = "0.20" @@ -56,16 +58,19 @@ tree-sitter-kotlin = "0.3.1" tree-sitter-lua = "0.0.19" tree-sitter-md = "0.1" tree-sitter-objc = "3" -tree-sitter-php = "0.20.0" +tree-sitter-php = "0.21" tree-sitter-python = "0.20" tree-sitter-r = "0.19" tree-sitter-ruby = "0.20" tree-sitter-rust = "0.20" tree-sitter-scala = "0.20" -tree-sitter-swift = "0.3" +tree-sitter-swift = "0.4" tree-sitter-typescript = "0.20" -vectordb = "0.3" +vectordb = "0.4" [dependencies.uuid] version = "1.4" features = ["v4", "fast-rng", "serde"] + +[dev-dependencies] +lsp-client = { path = "../lsp-client" } diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 2d0679b..54fd565 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -1,4 +1,5 @@ use adaptors::{adapt_body, adapt_headers, parse_generations}; +use clap::Parser; use document::Document; use error::{Error, Result}; use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, USER_AGENT}; @@ -12,6 +13,7 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokenizers::Tokenizer; use tokio::io::AsyncWriteExt; +use tokio::net::TcpListener; use tokio::sync::{mpsc, RwLock}; use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::notification::Progress; @@ -532,7 +534,6 @@ async fn get_tokenizer( TokenizerConfig::HuggingFace { repository } => { let (org, repo) = repository .split_once('/') - .map(|(org, repo)| (org, repo)) .ok_or(Error::InvalidRepositoryId)?; let path = cache_dir .as_ref() @@ -751,7 +752,7 @@ impl LanguageServer for Backend { .await .build_workspace_snippets( client.clone(), - token.clone(), + token, workspace_folders[0].uri.path(), ) .await @@ -880,10 +881,22 @@ fn build_headers(api_token: Option<&String>, ide: Ide) -> Result { Ok(headers) } +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Wether to use a tcp socket for data transfer + #[arg(long = "port")] + socket: Option, + + /// Wether to use stdio transport for data transfer, ignored because it is the default + /// behaviour + #[arg(short, long, default_value_t = true)] + stdio: bool, +} + #[tokio::main] async fn main() { - let stdin = tokio::io::stdin(); - let stdout = tokio::io::stdout(); + let args = Args::parse(); let home_dir = home::home_dir().ok_or(()).expect("failed to find home dir"); let cache_dir = home_dir.join(".cache/llm_ls"); @@ -942,5 +955,19 @@ async fn main() { .custom_method("llm-ls/rejectCompletion", Backend::reject_completion) .finish(); - Server::new(stdin, stdout, socket).serve(service).await; + if let Some(port) = args.socket { + let addr = format!("127.0.0.1:{port}"); + let listener = TcpListener::bind(&addr) + .await + .unwrap_or_else(|_| panic!("failed to bind tcp listener to {addr}")); + let (stream, _) = listener + .accept() + .await + .unwrap_or_else(|_| panic!("failed to accept new connections on {addr}")); + let (read, write) = tokio::io::split(stream); + Server::new(read, write, socket).serve(service).await; + } else { + let (stdin, stdout) = (tokio::io::stdin(), tokio::io::stdout()); + Server::new(stdin, stdout, socket).serve(service).await; + } } diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index e7d6da6..c20c633 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -1,6 +1,6 @@ use crate::error::Result; -use arrow_array::builder::ListBuilder; -use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, StringArray, UInt32Array}; +use arrow_array::builder::{FixedSizeListBuilder, Float32Builder}; +use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt32Array}; use arrow_schema::{DataType, Field, Schema}; use candle::utils::{cuda_is_available, metal_is_available}; use candle::{Device, Tensor}; @@ -9,6 +9,7 @@ use candle_transformers::models::bert::{BertModel, Config, DTYPE}; use futures_util::StreamExt; use gitignore::Gitignore; use hf_hub::{api::tokio::Api, Repo, RepoType}; +use lance_linalg::distance::MetricType; use std::collections::VecDeque; use std::path::Path; use std::{path::PathBuf, sync::Arc}; @@ -22,7 +23,9 @@ use tower_lsp::lsp_types::{ }; use tower_lsp::Client; use tracing::{error, info, warn}; -use vectordb::{Database, Table}; +use vectordb::error::Error; +use vectordb::table::ReadParams; +use vectordb::{Connection, Database, Table}; // TODO: // - create sliding window and splitting of files logic @@ -182,15 +185,18 @@ fn device(cpu: bool) -> Result { } } -async fn initialse_database(cache_path: PathBuf) -> Table { +async fn initialse_database(cache_path: PathBuf) -> Arc { let uri = cache_path.join("database"); let db = Database::connect(uri.to_str().expect("path should be utf8")) .await .expect("failed to open database"); - match db.open_table("code-slices").await { + match db + .open_table_with_params("code-slices", ReadParams::default()) + .await + { Ok(table) => table, Err(vectordb::error::Error::TableNotFound { .. }) => { - let schema = Schema::new(vec![ + let schema = Arc::new(Schema::new(vec![ Field::new( "vector", DataType::FixedSizeList( @@ -203,11 +209,11 @@ async fn initialse_database(cache_path: PathBuf) -> Table { Field::new("file_url", DataType::Utf8, false), Field::new("start_line_no", DataType::UInt32, false), Field::new("end_line_no", DataType::UInt32, false), - ]); + ])); let batch = RecordBatch::try_new( - Arc::new(schema), + schema.clone(), vec![ - Arc::new(ListBuilder::new(Float32Array::builder(768)).finish()), + Arc::new(FixedSizeListBuilder::new(Float32Builder::new(), 768).finish()), Arc::new(StringArray::from(Vec::<&str>::new())), Arc::new(StringArray::from(Vec::<&str>::new())), Arc::new(UInt32Array::from(Vec::::new())), @@ -217,7 +223,10 @@ async fn initialse_database(cache_path: PathBuf) -> Table { .expect("failure while defining schema"); db.create_table( "code-slices", - RecordBatchIterator::new(vec![batch.clone()].into_iter().map(Ok), batch.schema()), + Box::new(RecordBatchIterator::new( + vec![batch].into_iter().map(Ok), + schema, + )), None, ) .await @@ -228,7 +237,7 @@ async fn initialse_database(cache_path: PathBuf) -> Table { } pub(crate) struct SnippetRetriever { - db: Table, + db: Arc, model: Arc, tokenizer: Tokenizer, window_size: usize, @@ -260,9 +269,10 @@ impl SnippetRetriever { token: NumberOrString, workspace_root: &str, ) -> Result<()> { + info!("building workspace snippets"); let workspace_root = PathBuf::from(workspace_root); let mut files = Vec::new(); - let gitignore = Gitignore::parse(&workspace_root)?; + let gitignore = Gitignore::parse(&workspace_root).ok(); client .send_notification::(ProgressParams { @@ -284,8 +294,10 @@ impl SnippetRetriever { let src_path = entry.path(); - if gitignore.ignored(&src_path)? { - continue; + if let Some(gitignore) = &gitignore { + if gitignore.ignored(&src_path)? { + continue; + } } if entry_type.is_dir() { @@ -300,7 +312,6 @@ impl SnippetRetriever { } for (i, file) in files.iter().enumerate() { let file_url = file.to_str().expect("file path should be utf8").to_string(); - info!("adding {file_url} to embeddings"); self.add_document(file_url).await?; client .send_notification::(ProgressParams { @@ -321,7 +332,7 @@ impl SnippetRetriever { pub(crate) async fn add_document(&mut self, file_url: String) -> Result<()> { let file = tokio::fs::read_to_string(&file_url).await?; let lines = file.split('\n').collect::>(); - let mut embeddings = ListBuilder::new(Float32Array::builder(768)); + let mut embeddings = FixedSizeListBuilder::new(Float32Builder::new(), 768); let mut snippets = Vec::new(); let mut file_urls = Vec::new(); let mut start_line_no = Vec::new(); @@ -330,14 +341,22 @@ impl SnippetRetriever { let end_line = (start_line + self.window_size - 1).min(lines.len()); if self .exists(format!( - "file_url = {file_url} AND start_line_no = {start_line} AND end_line_no = {end_line}" + "file_url = '{file_url}' AND start_line_no = {start_line} AND end_line_no = {end_line}" )) .await? { + info!("snippet {file_url}[{start_line}, {end_line}] already indexed"); continue; } let window = lines[start_line..end_line].to_vec(); let snippet = window.join("\n"); + if snippet.is_empty() { + continue; + } + if snippet.len() > 1024 { + warn!("snippet {file_url}[{start_line}, {end_line}] is too big to be indexed"); + continue; + } let tokenizer = self .tokenizer .with_padding(None) @@ -385,7 +404,10 @@ impl SnippetRetriever { )?; self.db .add( - RecordBatchIterator::new(vec![batch].into_iter().map(Ok), self.db.schema()), + Box::new(RecordBatchIterator::new( + vec![batch].into_iter().map(Ok), + self.db.schema(), + )), None, ) .await?; @@ -403,13 +425,25 @@ impl SnippetRetriever { pub(crate) async fn exists(&self, filter: String) -> Result { let mut results = self .db - .search(Float32Array::from(vec![0.0; 768])) - .filter(Some(filter)) - .execute() + .search(&[0.]) + .metric_type(MetricType::Cosine) + .filter(&filter) + .execute_stream() .await?; - let first = results.next().await; - let exists = first.is_some(); - info!("exists: {exists}"); + let exists = if let Some(record_batch) = results.next().await { + let record_batch = record_batch.map_err(Into::::into)?; + if record_batch.num_rows() > 0 { + true + } else { + info!("record batch: {record_batch:?}"); + false + } + } else { + false + }; + if !exists { + info!("filter: {filter}"); + } Ok(exists) } From 15859bc7951fd474f77be6f6460a8e413f1c9b39 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Sun, 28 Jan 2024 23:02:44 +0100 Subject: [PATCH 04/22] refactor: error handling --- crates/llm-ls/src/adaptors.rs | 92 ++++++++++++++--------------------- crates/llm-ls/src/error.rs | 14 +++++- crates/llm-ls/src/main.rs | 9 ++-- 3 files changed, 52 insertions(+), 63 deletions(-) diff --git a/crates/llm-ls/src/adaptors.rs b/crates/llm-ls/src/adaptors.rs index 3564235..ba03ae4 100644 --- a/crates/llm-ls/src/adaptors.rs +++ b/crates/llm-ls/src/adaptors.rs @@ -6,7 +6,8 @@ use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, USER_AGENT}; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::fmt::Display; -use tower_lsp::jsonrpc; + +use crate::error::{Error, Result}; fn build_tgi_body(prompt: String, params: &RequestParams) -> Value { serde_json::json!({ @@ -21,57 +22,43 @@ fn build_tgi_body(prompt: String, params: &RequestParams) -> Value { }) } -fn build_tgi_headers(api_token: Option<&String>, ide: Ide) -> Result { +fn build_tgi_headers(api_token: Option<&String>, ide: Ide) -> Result { let mut headers = HeaderMap::new(); let user_agent = format!("{NAME}/{VERSION}; rust/unknown; ide/{ide:?}"); - headers.insert( - USER_AGENT, - HeaderValue::from_str(&user_agent).map_err(internal_error)?, - ); + headers.insert(USER_AGENT, HeaderValue::from_str(&user_agent)?); if let Some(api_token) = api_token { headers.insert( AUTHORIZATION, - HeaderValue::from_str(&format!("Bearer {api_token}")).map_err(internal_error)?, + HeaderValue::from_str(&format!("Bearer {api_token}"))?, ); } Ok(headers) } -fn parse_tgi_text(text: &str) -> Result, jsonrpc::Error> { - let generations = - match serde_json::from_str(text).map_err(internal_error)? { - APIResponse::Generation(gen) => vec![gen], - APIResponse::Generations(_) => { - return Err(internal_error( - "You are attempting to parse a result in the API inference format when using the `tgi` adaptor", - )) - } - APIResponse::Error(err) => return Err(internal_error(err)), - }; - Ok(generations) +fn parse_tgi_text(text: &str) -> Result> { + match serde_json::from_str(text)? { + APIResponse::Generation(gen) => Ok(vec![gen]), + APIResponse::Generations(_) => Err(Error::InvalidAdaptor), + APIResponse::Error(err) => Err(Error::Tgi(err)), + } } fn build_api_body(prompt: String, params: &RequestParams) -> Value { build_tgi_body(prompt, params) } -fn build_api_headers(api_token: Option<&String>, ide: Ide) -> Result { +fn build_api_headers(api_token: Option<&String>, ide: Ide) -> Result { build_tgi_headers(api_token, ide) } -fn parse_api_text(text: &str) -> Result, jsonrpc::Error> { - // TODO: - // APIResponse::Generation(gen) => Ok(vec![gen]), - // APIResponse::Generations(gens) => Ok(gens), - // APIResponse::Error(err) => Err(err), - let generations = match serde_json::from_str(text).map_err(internal_error)? { - APIResponse::Generation(gen) => vec![gen], - APIResponse::Generations(gens) => gens, - APIResponse::Error(err) => return Err(internal_error(err)), - }; - Ok(generations) +fn parse_api_text(text: &str) -> Result> { + match serde_json::from_str(text)? { + APIResponse::Generation(gen) => Ok(vec![gen]), + APIResponse::Generations(gens) => Ok(gens), + APIResponse::Error(err) => Err(Error::InferenceApi(err)), + } } fn build_ollama_body(prompt: String, params: &CompletionParams) -> Value { @@ -88,7 +75,7 @@ fn build_ollama_body(prompt: String, params: &CompletionParams) -> Value { } }) } -fn build_ollama_headers() -> Result { +fn build_ollama_headers() -> Result { Ok(HeaderMap::new()) } @@ -112,12 +99,11 @@ enum OllamaAPIResponse { Error(APIError), } -fn parse_ollama_text(text: &str) -> Result, jsonrpc::Error> { - let generations = match serde_json::from_str(text).map_err(internal_error)? { - OllamaAPIResponse::Generation(gen) => vec![gen.into()], - OllamaAPIResponse::Error(err) => return Err(internal_error(err)), - }; - Ok(generations) +fn parse_ollama_text(text: &str) -> Result> { + match serde_json::from_str(text)? { + OllamaAPIResponse::Generation(gen) => Ok(vec![gen.into()]), + OllamaAPIResponse::Error(err) => Err(Error::Ollama(err)), + } } fn build_openai_body(prompt: String, params: &CompletionParams) -> Value { @@ -131,7 +117,7 @@ fn build_openai_body(prompt: String, params: &CompletionParams) -> Value { }) } -fn build_openai_headers(api_token: Option<&String>, ide: Ide) -> Result { +fn build_openai_headers(api_token: Option<&String>, ide: Ide) -> Result { build_api_headers(api_token, ide) } @@ -177,7 +163,7 @@ struct OpenAIErrorDetail { } #[derive(Debug, Deserialize)] -struct OpenAIError { +pub struct OpenAIError { detail: Vec, } @@ -200,13 +186,13 @@ enum OpenAIAPIResponse { Error(OpenAIError), } -fn parse_openai_text(text: &str) -> Result, jsonrpc::Error> { - match serde_json::from_str(text).map_err(internal_error) { - Ok(OpenAIAPIResponse::Generation(completion)) => { +fn parse_openai_text(text: &str) -> Result> { + let open_ai_response = serde_json::from_str(text)?; + match open_ai_response { + OpenAIAPIResponse::Generation(completion) => { Ok(completion.choices.into_iter().map(|x| x.into()).collect()) } - Ok(OpenAIAPIResponse::Error(err)) => Err(internal_error(err)), - Err(err) => Err(internal_error(err)), + OpenAIAPIResponse::Error(err) => Err(Error::OpenAI(err)), } } @@ -216,11 +202,7 @@ pub(crate) const OLLAMA: &str = "ollama"; pub(crate) const OPENAI: &str = "openai"; pub(crate) const DEFAULT_ADAPTOR: &str = HUGGING_FACE; -fn unknown_adaptor_error(adaptor: Option<&String>) -> jsonrpc::Error { - internal_error(format!("Unknown adaptor {:?}", adaptor)) -} - -pub fn adapt_body(prompt: String, params: &CompletionParams) -> Result { +pub fn adapt_body(prompt: String, params: &CompletionParams) -> Result { match params .adaptor .as_ref() @@ -231,7 +213,7 @@ pub fn adapt_body(prompt: String, params: &CompletionParams) -> Result Ok(build_api_body(prompt, ¶ms.request_params)), OLLAMA => Ok(build_ollama_body(prompt, params)), OPENAI => Ok(build_openai_body(prompt, params)), - _ => Err(unknown_adaptor_error(params.adaptor.as_ref())), + adaptor => Err(Error::UnknownAdaptor(adaptor.to_owned())), } } @@ -239,22 +221,22 @@ pub fn adapt_headers( adaptor: Option<&String>, api_token: Option<&String>, ide: Ide, -) -> Result { +) -> Result { match adaptor.unwrap_or(&DEFAULT_ADAPTOR.to_string()).as_str() { TGI => build_tgi_headers(api_token, ide), HUGGING_FACE => build_api_headers(api_token, ide), OLLAMA => build_ollama_headers(), OPENAI => build_openai_headers(api_token, ide), - _ => Err(unknown_adaptor_error(adaptor)), + adaptor => Err(Error::UnknownAdaptor(adaptor.to_owned())), } } -pub fn parse_generations(adaptor: Option<&String>, text: &str) -> jsonrpc::Result> { +pub fn parse_generations(adaptor: Option<&String>, text: &str) -> Result> { match adaptor.unwrap_or(&DEFAULT_ADAPTOR.to_string()).as_str() { TGI => parse_tgi_text(text), HUGGING_FACE => parse_api_text(text), OLLAMA => parse_ollama_text(text), OPENAI => parse_openai_text(text), - _ => Err(unknown_adaptor_error(adaptor)), + adaptor => Err(Error::UnknownAdaptor(adaptor.to_owned())), } } diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index b6cbef3..98641ab 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -18,8 +18,6 @@ pub fn internal_error(err: E) -> LspError { #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("backend api error: {0}")] - Api(#[from] APIError), #[error("arrow error: {0}")] Arrow(#[from] arrow_schema::ArrowError), #[error("candle error: {0}")] @@ -32,12 +30,20 @@ pub enum Error { Http(#[from] reqwest::Error), #[error("io error: {0}")] Io(#[from] std::io::Error), + #[error("inference api error: {0}")] + InferenceApi(APIError), + #[error("You are attempting to parse a result in the API inference format when using the `tgi` adaptor")] + InvalidAdaptor, #[error("invalid header value: {0}")] InvalidHeaderValue(#[from] reqwest::header::InvalidHeaderValue), #[error("invalid repository id")] InvalidRepositoryId, #[error("invalid tokenizer path")] InvalidTokenizerPath, + #[error("ollama error: {0}")] + Ollama(APIError), + #[error("openai error: {0}")] + OpenAI(crate::adaptors::OpenAIError), #[error("index out of bounds: {0}")] OutOfBoundIndexing(usize), #[error("line out of bounds: {0}")] @@ -48,10 +54,14 @@ pub enum Error { Rope(#[from] ropey::Error), #[error("serde json error: {0}")] SerdeJson(#[from] serde_json::Error), + #[error("tgi error: {0}")] + Tgi(APIError), #[error("tokenizer error: {0}")] Tokenizer(#[from] tokenizers::Error), #[error("tokio join error: {0}")] TokioJoin(#[from] tokio::task::JoinError), + #[error("unknown adaptor: {0}")] + UnknownAdaptor(String), #[error("vector db error: {0}")] VectorDb(#[from] vectordb::error::Error), } diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 54fd565..07ccbe7 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -400,7 +400,7 @@ async fn request_completion( ) -> Result> { let t = Instant::now(); - let json = adapt_body(prompt, params).map_err(internal_error)?; + let json = adapt_body(prompt, params)?; let headers = adapt_headers( params.adaptor.as_ref(), params.api_token.as_ref(), @@ -414,10 +414,7 @@ async fn request_completion( .await?; let model = ¶ms.model; - let generations = parse_generations( - params.adaptor.as_ref(), - res.text().await.map_err(internal_error)?.as_str(), - ); + let generations = parse_generations(params.adaptor.as_ref(), res.text().await?.as_str())?; let time = t.elapsed().as_millis(); info!( model, @@ -425,7 +422,7 @@ async fn request_completion( generations = serde_json::to_string(&generations)?, "{model} computed generations in {time} ms" ); - generations + Ok(generations) } fn format_generations( From d0a02eb0ae3ce9162e258f8d09f36b97b5dbe639 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Sun, 28 Jan 2024 23:30:53 +0100 Subject: [PATCH 05/22] refactor: remove unnecessary comment --- crates/llm-ls/src/document.rs | 6 +++++- crates/llm-ls/src/main.rs | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/llm-ls/src/document.rs b/crates/llm-ls/src/document.rs index 689e33b..fbea2ba 100644 --- a/crates/llm-ls/src/document.rs +++ b/crates/llm-ls/src/document.rs @@ -237,7 +237,11 @@ impl Document { }, ) } else { - let removal_idx = self.text.try_line_to_char(range.end.line as usize).map_err(internal_error)? + (range.end.character as usize); + let removal_idx = self + .text + .try_line_to_char(range.end.line as usize) + .map_err(internal_error)? + + (range.end.character as usize); let slice_size = removal_idx - start_idx; self.text .try_remove(start_idx..removal_idx) diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 07ccbe7..568d851 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -808,8 +808,7 @@ impl LanguageServer for Backend { return; } - // ignore the output scheme - if uri.starts_with("output:") { + if params.text_document.uri.scheme() == "output" { return; } From 11d2906a3db6ff21dce989e642a5c4fb84da2162 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 6 Feb 2024 10:01:01 +0100 Subject: [PATCH 06/22] feat: add tinyvec-embed --- Cargo.lock | 1435 ++++++++++++++---------- crates/llm-ls/Cargo.toml | 8 +- crates/llm-ls/src/retrieval.rs | 25 +- crates/tinyvec-embed/Cargo.toml | 24 + crates/tinyvec-embed/README.md | 5 + crates/tinyvec-embed/src/db.rs | 272 +++++ crates/tinyvec-embed/src/error.rs | 29 + crates/tinyvec-embed/src/lib.rs | 3 + crates/tinyvec-embed/src/similarity.rs | 51 + 9 files changed, 1248 insertions(+), 604 deletions(-) create mode 100644 crates/tinyvec-embed/Cargo.toml create mode 100644 crates/tinyvec-embed/README.md create mode 100644 crates/tinyvec-embed/src/db.rs create mode 100644 crates/tinyvec-embed/src/error.rs create mode 100644 crates/tinyvec-embed/src/lib.rs create mode 100644 crates/tinyvec-embed/src/similarity.rs diff --git a/Cargo.lock b/Cargo.lock index e3d7358..f09c1fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,9 +30,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", "const-random", @@ -51,6 +51,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "allocator-api2" version = "0.2.16" @@ -74,9 +89,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.4" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -88,43 +103,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220" [[package]] name = "anstyle-parse" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" [[package]] name = "arrayref" @@ -140,11 +155,10 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "ahash", "arrow-arith", "arrow-array", "arrow-buffer", @@ -162,9 +176,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ "arrow-array", "arrow-buffer", @@ -177,9 +191,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ "ahash", "arrow-buffer", @@ -188,15 +202,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown", + "hashbrown 0.14.3", "num", ] [[package]] name = "arrow-buffer" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half", @@ -205,16 +219,16 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "base64 0.21.5", + "base64 0.21.7", "chrono", "comfy-table", "half", @@ -224,9 +238,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" dependencies = [ "arrow-array", "arrow-buffer", @@ -243,9 +257,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", @@ -255,9 +269,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,14 +279,15 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", + "lz4_flex", "zstd 0.13.0", ] [[package]] name = "arrow-json" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -290,9 +305,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ "arrow-array", "arrow-buffer", @@ -305,9 +320,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash", "arrow-array", @@ -315,20 +330,20 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown", + "hashbrown 0.14.3", ] [[package]] name = "arrow-schema" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" [[package]] name = "arrow-select" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash", "arrow-array", @@ -340,9 +355,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -354,6 +369,24 @@ dependencies = [ "regex-syntax 0.8.2", ] +[[package]] +name = "async-compression" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd 0.13.0", + "zstd-safe 7.0.0", +] + [[package]] name = "async-recursion" version = "1.0.5" @@ -362,18 +395,18 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] name = "async-trait" -version = "0.1.74" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -384,14 +417,13 @@ checksum = "834eee9ce518130a3b4d5af09ecc43e9d6b57ee76613f227a1ddd6b77c7a62bc" [[package]] name = "auto_impl" -version = "1.1.0" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fee3da8ef1276b0bee5dd1c7258010d8fffd31801447323115a25560e1327b89" +checksum = "823b8bb275161044e2ac7a25879cb3e2480cb403e3943022c7c769c599b756aa" dependencies = [ - "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.48", ] [[package]] @@ -817,9 +849,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64-simd" @@ -837,6 +869,15 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -845,9 +886,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" [[package]] name = "blake2" @@ -880,6 +921,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.14.0" @@ -894,9 +956,9 @@ checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" dependencies = [ "bytemuck_derive", ] @@ -909,7 +971,7 @@ checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -966,9 +1028,9 @@ dependencies = [ [[package]] name = "candle-core" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60d9b91c73bc662dc45aff607f5ffe79724b7cf7d7c8dc12a72b25921683b67" +checksum = "6db8659ea87ee8197d2fc627348916cce0561330ee7ae3874e771691d3cecb2f" dependencies = [ "byteorder", "gemm", @@ -987,9 +1049,9 @@ dependencies = [ [[package]] name = "candle-nn" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb6e13e7076439309786482d4d4c1b4e1f2b102ca93513372d5419ffcf5df25" +checksum = "7ddce8312032760a6791d6adc9c56dc54fd7c1be38d85dcc4862f1c75228bbc7" dependencies = [ "candle-core", "half", @@ -1002,9 +1064,9 @@ dependencies = [ [[package]] name = "candle-transformers" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0169336de9dc62dd84e19af1408e53ef8ad07eef8fc103cfebf5b6c3e3f23c2c" +checksum = "68834a0cacb7e002d1f4abfe26a7cd1237e2ba342fddcf2e30913c4edb96409d" dependencies = [ "byteorder", "candle-core", @@ -1021,9 +1083,9 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e34637b3140142bdf929fb439e8aa4ebad7651ebf7b1080b3930aa16ac1459ff" +checksum = "ceed8ef69d8518a5dda55c07425450b58a4e1946f4951eab6d7191ee86c2443d" dependencies = [ "serde", ] @@ -1059,9 +1121,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" dependencies = [ "android-tzdata", "iana-time-zone", @@ -1069,14 +1131,14 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.48.5", + "windows-targets 0.52.0", ] [[package]] name = "chrono-tz" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" +checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7" dependencies = [ "chrono", "chrono-tz-build", @@ -1106,9 +1168,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.10" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" dependencies = [ "clap_builder", "clap_derive", @@ -1116,9 +1178,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.9" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" dependencies = [ "anstream", "anstyle", @@ -1135,7 +1197,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -1163,15 +1225,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] @@ -1208,9 +1270,9 @@ checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -1218,15 +1280,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "cpufeatures" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] @@ -1242,46 +1304,37 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" dependencies = [ - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crunchy" @@ -1362,7 +1415,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown", + "hashbrown 0.14.3", "lock_api", "once_cell", "parking_lot_core", @@ -1370,16 +1423,19 @@ dependencies = [ [[package]] name = "datafusion" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0" +checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" dependencies = [ "ahash", "arrow", "arrow-array", + "arrow-ipc", "arrow-schema", + "async-compression", "async-trait", "bytes", + "bzip2", "chrono", "dashmap", "datafusion-common", @@ -1389,16 +1445,18 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-sql", + "flate2", "futures", "glob", "half", - "hashbrown", + "hashbrown 0.14.3", "indexmap", - "itertools 0.12.0", + "itertools 0.12.1", "log", "num_cpus", - "object_store 0.8.0", + "object_store", "parking_lot", + "parquet", "pin-project-lite", "rand 0.8.5", "sqlparser", @@ -1407,13 +1465,15 @@ dependencies = [ "tokio-util", "url", "uuid", + "xz2", + "zstd 0.13.0", ] [[package]] name = "datafusion-common" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613" +checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" dependencies = [ "ahash", "arrow", @@ -1424,15 +1484,16 @@ dependencies = [ "half", "libc", "num_cpus", - "object_store 0.8.0", + "object_store", + "parquet", "sqlparser", ] [[package]] name = "datafusion-execution" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d" +checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" dependencies = [ "arrow", "chrono", @@ -1440,9 +1501,9 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown", + "hashbrown 0.14.3", "log", - "object_store 0.8.0", + "object_store", "parking_lot", "rand 0.8.5", "tempfile", @@ -1451,9 +1512,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e" +checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" dependencies = [ "ahash", "arrow", @@ -1467,9 +1528,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618" +checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" dependencies = [ "arrow", "async-trait", @@ -1477,17 +1538,17 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown", - "itertools 0.12.0", + "hashbrown 0.14.3", + "itertools 0.12.1", "log", "regex-syntax 0.8.2", ] [[package]] name = "datafusion-physical-expr" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735" +checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" dependencies = [ "ahash", "arrow", @@ -1495,17 +1556,17 @@ dependencies = [ "arrow-buffer", "arrow-ord", "arrow-schema", - "base64 0.21.5", + "base64 0.21.7", "blake2", "blake3", "chrono", "datafusion-common", "datafusion-expr", "half", - "hashbrown", + "hashbrown 0.14.3", "hex", "indexmap", - "itertools 0.12.0", + "itertools 0.12.1", "log", "md-5", "paste", @@ -1519,9 +1580,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631" +checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" dependencies = [ "ahash", "arrow", @@ -1536,9 +1597,9 @@ dependencies = [ "datafusion-physical-expr", "futures", "half", - "hashbrown", + "hashbrown 0.14.3", "indexmap", - "itertools 0.12.0", + "itertools 0.12.1", "log", "once_cell", "parking_lot", @@ -1550,9 +1611,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5" +checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" dependencies = [ "arrow", "arrow-schema", @@ -1562,11 +1623,27 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-substrait" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad6bef77af3d8a697ae63ffbcb5aa66b74cd08ea93a31e2e757da75b2f1452f" +dependencies = [ + "async-recursion", + "chrono", + "datafusion", + "itertools 0.12.1", + "object_store", + "prost", + "prost-types", + "substrait", +] + [[package]] name = "deranged" -version = "0.3.9" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ "powerfmt", ] @@ -1640,6 +1717,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dyn-clone" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" + [[package]] name = "dyn-stack" version = "0.10.0" @@ -1671,6 +1754,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -1772,9 +1867,9 @@ checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" [[package]] name = "futures" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -1787,9 +1882,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -1797,15 +1892,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -1814,38 +1909,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -1861,9 +1956,9 @@ dependencies = [ [[package]] name = "gemm" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b3afa707040531a7527477fd63a81ea4f6f3d26037a2f96776e57fb843b258e" +checksum = "e97d506c68f4fb12325b52a638e7d54cc87e3593a4ded0de60218b6dfd65f645" dependencies = [ "dyn-stack", "gemm-c32", @@ -1881,9 +1976,9 @@ dependencies = [ [[package]] name = "gemm-c32" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc3973a4c30c73f26a099113953d0c772bb17ee2e07976c0a06b8fe1f38a57d" +checksum = "0dd16f26e8f34661edc906d8c9522b59ec1655c865a98a58950d0246eeaca9da" dependencies = [ "dyn-stack", "gemm-common", @@ -1896,9 +1991,9 @@ dependencies = [ [[package]] name = "gemm-c64" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30362894b93dada374442cb2edf4512ddf19513c9bec88e06a445bcb6b22e64f" +checksum = "a8e34381bc060b47fbd25522a281799ef763cd27f43bbd1783d935774659242a" dependencies = [ "dyn-stack", "gemm-common", @@ -1911,9 +2006,9 @@ dependencies = [ [[package]] name = "gemm-common" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "988499faa80566b046b4fee2c5f15af55b5a20c1fe8486b112ebb34efa045ad6" +checksum = "22518a76339b09276f77c3166c44262e55f633712fe8a44fd0573505887feeab" dependencies = [ "bytemuck", "dyn-stack", @@ -1926,13 +2021,14 @@ dependencies = [ "raw-cpuid", "rayon", "seq-macro", + "sysctl", ] [[package]] name = "gemm-f16" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6cf2854a12371684c38d9a865063a27661812a3ff5803454c5742e8f5a388ce" +checksum = "70409bbf3ef83b38cbe4a58cd4b797c1c27902505bdd926a588ea61b6c550a84" dependencies = [ "dyn-stack", "gemm-common", @@ -1948,9 +2044,9 @@ dependencies = [ [[package]] name = "gemm-f32" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bc84003cf6d950a7c7ca714ad6db281b6cef5c7d462f5cd9ad90ea2409c7227" +checksum = "5ea3068edca27f100964157211782eba19e961aa4d0d2bdac3e1775a51aa7680" dependencies = [ "dyn-stack", "gemm-common", @@ -1963,9 +2059,9 @@ dependencies = [ [[package]] name = "gemm-f64" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35187ef101a71eed0ecd26fb4a6255b4192a12f1c5335f3a795698f2d9b6cf33" +checksum = "5fd41e8f5a60dce8d8acd852a3f4b22f8e18be957e1937731be692c037652510" dependencies = [ "dyn-stack", "gemm-common", @@ -1988,9 +2084,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "js-sys", @@ -2005,6 +2101,19 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "git2" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" +dependencies = [ + "bitflags 2.4.2", + "libc", + "libgit2-sys", + "log", + "url", +] + [[package]] name = "gitignore" version = "0.1.0" @@ -2022,9 +2131,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -2053,6 +2162,15 @@ dependencies = [ "rand_distr", ] +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.14.3" @@ -2071,9 +2189,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" [[package]] name = "hex" @@ -2113,11 +2231,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -2133,9 +2251,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http", @@ -2162,9 +2280,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", @@ -2177,7 +2295,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2", "tokio", "tower-service", "tracing", @@ -2215,9 +2333,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.58" +version = "0.1.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2254,12 +2372,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.1.0" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.3", ] [[package]] @@ -2293,6 +2411,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "ipnet" version = "2.9.0" @@ -2310,18 +2434,18 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" @@ -2334,26 +2458,23 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" dependencies = [ "wasm-bindgen", ] [[package]] name = "lance" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ec22c8a248c4d43df70cdb9cfb37dbf9ef52a78fe22f35e52f18ec4ad071fae" +checksum = "11fcea8ebf582235a9bd02c84a5176308adbd58e8f96924f3bceedeca91edffb" dependencies = [ "arrow", "arrow-arith", "arrow-array", "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", "arrow-ord", "arrow-row", "arrow-schema", @@ -2361,10 +2482,8 @@ dependencies = [ "async-recursion", "async-trait", "async_cell", - "aws-config", "aws-credential-types", "aws-sdk-dynamodb", - "base64 0.21.5", "byteorder", "bytes", "chrono", @@ -2373,8 +2492,7 @@ dependencies = [ "datafusion-physical-expr", "futures", "half", - "http", - "itertools 0.12.0", + "itertools 0.12.1", "lance-arrow", "lance-core", "lance-datafusion", @@ -2388,19 +2506,16 @@ dependencies = [ "log", "lru_time_cache", "moka", - "num-traits", "num_cpus", - "object_store 0.9.0", - "ordered-float", + "object_store", + "ordered-float 3.9.2", "pin-project", "prost", "prost-build", - "prost-types", "rand 0.8.5", "roaring", "serde", "serde_json", - "shellexpand", "snafu", "tempfile", "tokio", @@ -2411,9 +2526,9 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c1fa4216265df12914512e27f69e31ca75f1d05380150e1b66552f85da55553" +checksum = "157efb4b07f65f06bec19c3ed35359e7f779b78c8c9a9cfd23271f7ea7193b5b" dependencies = [ "arrow-array", "arrow-buffer", @@ -2429,19 +2544,13 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66139c54511551e35f44ddea0b38a5863f5945896f462fbc696ff6edcff7da84" +checksum = "7c98504213ced58921ce058255c262dc35fa0d2c6ad493beffc98a696e46b4de" dependencies = [ - "arrow-arith", "arrow-array", "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", "arrow-schema", - "arrow-select", - "async-recursion", "async-trait", "byteorder", "bytes", @@ -2449,36 +2558,27 @@ dependencies = [ "datafusion-common", "datafusion-sql", "futures", - "http", "lance-arrow", "lazy_static", - "log", "mock_instant", "moka", - "num-traits", - "num_cpus", - "object_store 0.9.0", + "object_store", "pin-project", "prost", - "prost-build", - "prost-types", "rand 0.8.5", "roaring", - "serde", "serde_json", - "shellexpand", "snafu", "tokio", "tracing", "url", - "uuid", ] [[package]] name = "lance-datafusion" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ce73fa8ab31b108522e5c5867d11619fef9c8355f419754b7d8d190c07b9f4" +checksum = "09d8e782ede7bdacc40c40556705267a18fdf54e882b957837228c15f7492a8a" dependencies = [ "arrow", "arrow-array", @@ -2487,19 +2587,22 @@ dependencies = [ "async-trait", "datafusion", "datafusion-common", - "datafusion-expr", "datafusion-physical-expr", + "datafusion-substrait", "futures", "lance-arrow", "lance-core", + "prost", + "snafu", + "substrait", "tokio", ] [[package]] name = "lance-datagen" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc8a9ccb2cdddf3b721b9a431e5b5069b3a5dfbaa3a2baa4bc1c7c0498326bc7" +checksum = "9e8e393d8efb16bb1af6cc05bec6674c299343e45cbbd5f287d663e33f038dfa" dependencies = [ "arrow", "arrow-array", @@ -2512,21 +2615,17 @@ dependencies = [ [[package]] name = "lance-file" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98387578b9b9841120fea2a574842ca5c3cce4db687b745d6a9b07452d3a0484" +checksum = "708bfb033ceaab8c711bb6384636936f7a02469f05f898346aeee3d85c1188d3" dependencies = [ "arrow-arith", "arrow-array", "arrow-buffer", - "arrow-cast", - "arrow-data", "arrow-schema", "arrow-select", "async-recursion", "async-trait", - "byteorder", - "bytes", "datafusion-common", "futures", "lance-arrow", @@ -2534,7 +2633,7 @@ dependencies = [ "lance-io", "num-traits", "num_cpus", - "object_store 0.9.0", + "object_store", "prost", "prost-build", "roaring", @@ -2545,12 +2644,11 @@ dependencies = [ [[package]] name = "lance-index" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "495973c420530d8673ac1a0aa287e8986345a76f45b5a363513032040d5c6e74" +checksum = "1087c33f6f4945c7b9a2d5fae664bb2c1c649adbf7ba6014d375134705c8874f" dependencies = [ "arrow", - "arrow-arith", "arrow-array", "arrow-ord", "arrow-schema", @@ -2561,6 +2659,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", + "datafusion-sql", "futures", "half", "lance-arrow", @@ -2573,8 +2672,7 @@ dependencies = [ "log", "num-traits", "num_cpus", - "object_store 0.9.0", - "pin-project", + "object_store", "prost", "prost-build", "rand 0.8.5", @@ -2590,9 +2688,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d332fc360fd44fa141416ad7ed039a7529ffff045c3c23427b9b38fac19e8f7e" +checksum = "3b62513c79e3b012e010e08a4b8b5bc9ff7834fc5bd19c3584ce059bda42517b" dependencies = [ "arrow-arith", "arrow-array", @@ -2613,7 +2711,7 @@ dependencies = [ "lance-core", "lazy_static", "num_cpus", - "object_store 0.9.0", + "object_store", "pin-project", "prost", "prost-build", @@ -2626,18 +2724,18 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7110faad69079b4abad8cf2e754458ad2fde7b3860b0ba59ea6f85b54c60280" +checksum = "0949ea77e3968bac761d340b63d57569c6900c46b3ae754bad59d14bd30d8a32" dependencies = [ "arrow-array", "arrow-ord", "arrow-schema", - "arrow-select", "cc", "futures", "half", "lance-arrow", + "lance-core", "log", "num-traits", "num_cpus", @@ -2648,9 +2746,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b729632272b766e84447a70a56e7fec4a3b8ad4035367ab2f41cfcad44dc6cd4" +checksum = "3d8c893cff71df46e0a01a3c009af0a6791305a3c31abcb9840657aa02ede0d7" dependencies = [ "arrow-array", "arrow-buffer", @@ -2669,7 +2767,7 @@ dependencies = [ "lance-io", "lazy_static", "log", - "object_store 0.9.0", + "object_store", "prost", "prost-build", "prost-types", @@ -2686,9 +2784,9 @@ dependencies = [ [[package]] name = "lance-testing" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71d1b344dd4f25b4f2e3279c4abfeee13c5e88d52fe6818dce57e59c0d29c42" +checksum = "444783792dfad257b7ae1f2cc4bb048ea92fcb2999a4150e778819f62a379fb6" dependencies = [ "arrow-array", "arrow-schema", @@ -2769,9 +2867,21 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libgit2-sys" +version = "0.16.1+1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] [[package]] name = "libm" @@ -2785,16 +2895,28 @@ version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "libc", "redox_syscall", ] +[[package]] +name = "libz-sys" +version = "1.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "llm-ls" @@ -2898,11 +3020,31 @@ dependencies = [ "url", ] +[[package]] +name = "lz4_flex" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mach2" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8" +checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" dependencies = [ "libc", ] @@ -2950,29 +3092,20 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memmap2" -version = "0.7.1" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" dependencies = [ "libc", "stable_deref_trait", ] -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -2996,9 +3129,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "wasi", @@ -3047,9 +3180,9 @@ dependencies = [ [[package]] name = "monostate" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e404e13820ea0df0eda93aa294e0c80de76a0daa6bec590d376fbec6d7810394" +checksum = "878c2a1f1c70e5724fa28f101ca787b6a7e8ad5c5e4ae4ca3b0fa4a419fa9075" dependencies = [ "monostate-impl", "serde", @@ -3057,13 +3190,13 @@ dependencies = [ [[package]] name = "monostate-impl" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531c82a934da419bed3da09bd87d6e98c72f8d4aa755427b3b009c2b8b8c433c" +checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -3145,6 +3278,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.45" @@ -3206,34 +3345,13 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "humantime", - "itertools 0.11.0", - "parking_lot", - "percent-encoding", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "object_store" version = "0.9.0" @@ -3241,19 +3359,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" dependencies = [ "async-trait", - "base64 0.21.5", + "base64 0.21.7", "bytes", "chrono", "futures", "humantime", "hyper", - "itertools 0.12.0", + "itertools 0.12.1", "parking_lot", "percent-encoding", "quick-xml", "rand 0.8.5", "reqwest", - "ring 0.17.5", + "ring 0.17.7", "rustls-pemfile 2.0.0", "serde", "serde_json", @@ -3266,9 +3384,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "onig" @@ -3294,11 +3412,11 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.60" +version = "0.10.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" +checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "cfg-if", "foreign-types", "libc", @@ -3315,7 +3433,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -3326,9 +3444,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.96" +version = "0.9.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" +checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae" dependencies = [ "cc", "libc", @@ -3342,6 +3460,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-float" version = "3.9.2" @@ -3386,6 +3513,41 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "parquet" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.21.7", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.14.3", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd 0.13.0", +] + [[package]] name = "parse-zoneinfo" version = "0.3.0" @@ -3480,22 +3642,22 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -3512,15 +3674,15 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "portable-atomic" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" [[package]] name = "powerfmt" @@ -3536,43 +3698,19 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" -dependencies = [ - "proc-macro2", - "syn 2.0.39", -] - -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "quote", - "version_check", + "syn 2.0.48", ] [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -3604,7 +3742,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.39", + "syn 2.0.48", "tempfile", "which", ] @@ -3619,7 +3757,7 @@ dependencies = [ "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -3633,11 +3771,11 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.9.3" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a1a2f1f0a7ecff9c31abbe177637be0e97a0aef46cf8738ece09327985d998" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.2", "memchr", "unicase", ] @@ -3682,9 +3820,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -3777,9 +3915,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -3798,9 +3936,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -3843,13 +3981,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", + "regex-automata 0.4.5", "regex-syntax 0.8.2", ] @@ -3864,9 +4002,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -3891,6 +4029,16 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "regress" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ed9969cad8051328011596bf549629f1b800cf1731e7964b1eef8dfc480d2c2" +dependencies = [ + "hashbrown 0.13.2", + "memchr", +] + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -3902,11 +4050,11 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.22" +version = "0.11.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" dependencies = [ - "base64 0.21.5", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -3931,6 +4079,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "sync_wrapper", "system-configuration", "tokio", "tokio-native-tls", @@ -3975,9 +4124,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.5" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", "getrandom", @@ -4025,25 +4174,25 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.25" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "rustls" -version = "0.21.9" +version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", - "ring 0.17.5", + "ring 0.17.7", "rustls-webpki", "sct", ] @@ -4066,7 +4215,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64 0.21.5", + "base64 0.21.7", ] [[package]] @@ -4075,15 +4224,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" dependencies = [ - "base64 0.21.5", + "base64 0.21.7", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" +checksum = "0a716eb65e3158e90e17cd93d855216e27bde02745ab842f2cab4a39dba1bacf" [[package]] name = "rustls-webpki" @@ -4091,7 +4240,7 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.5", + "ring 0.17.7", "untrusted 0.9.0", ] @@ -4103,15 +4252,15 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "safetensors" -version = "0.3.3" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93279b86b3de76f820a8854dd06cbc33cfa57a417b19c47f6a25280112fb1df" +checksum = "8d980e6bfb34436fb0a81e42bc41af43f11805bbbca443e7f68e9faaabe669ed" dependencies = [ "serde", "serde_json", @@ -4128,11 +4277,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -4144,6 +4293,30 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "schemars" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45a28f4c49489add4ce10783f7911893516f15afe45d015608d41faca6bc4d29" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c767fd6fa65d9ccf9cf026122c1b555f2ef9a4f0cea69da4d7dbc3e258d30967" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 1.0.109", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -4156,7 +4329,7 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.5", + "ring 0.17.7", "untrusted 0.9.0", ] @@ -4185,9 +4358,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" dependencies = [ "serde", ] @@ -4200,29 +4373,40 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.193" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", +] + +[[package]] +name = "serde_derive_internals" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", @@ -4231,9 +4415,9 @@ dependencies = [ [[package]] name = "serde_path_to_error" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335" +checksum = "ebd154a240de39fdebcf5775d2675c204d7c13cf39a4c697be6493c8e734337c" dependencies = [ "itoa", "serde", @@ -4250,13 +4434,25 @@ dependencies = [ [[package]] name = "serde_repr" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" +checksum = "0b2e6b945e9d3df726b65d6ee24060aff8e3533d431f677a9695db04eff9dfdb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", +] + +[[package]] +name = "serde_tokenstream" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a00ffd23fd882d096f09fcaae2a9de8329a328628e86027e049ee051dc1621f" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.48", ] [[package]] @@ -4273,9 +4469,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.27" +version = "0.9.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" +checksum = "adf8a49373e98a4c5f0ceb5d05aa7c648d75f63774981ed95b7c7443bbd50c6e" dependencies = [ "indexmap", "itoa", @@ -4365,9 +4561,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.2" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "snafu" @@ -4392,14 +4588,10 @@ dependencies = [ ] [[package]] -name = "socket2" -version = "0.4.10" +name = "snap" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" @@ -4437,9 +4629,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.40.0" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" +checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" dependencies = [ "log", "sqlparser_derive", @@ -4453,7 +4645,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -4499,7 +4691,29 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.39", + "syn 2.0.48", +] + +[[package]] +name = "substrait" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5478fbd0313a9b0915a1c0e7ebf15b5fed7d7c6dd7229b4f5e32ce75b10f256a" +dependencies = [ + "git2", + "heck", + "prettyplease", + "prost", + "prost-build", + "prost-types", + "schemars", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.48", + "typify", + "walkdir", ] [[package]] @@ -4521,9 +4735,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -4538,14 +4752,27 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "synstructure" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", - "unicode-xid", + "syn 2.0.48", +] + +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.4.2", + "byteorder", + "enum-as-inner", + "libc", + "thiserror", + "walkdir", ] [[package]] @@ -4587,15 +4814,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", "redox_syscall", "rustix", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -4627,22 +4854,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -4655,14 +4882,26 @@ dependencies = [ "once_cell", ] +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float 2.10.1", +] + [[package]] name = "time" -version = "0.3.30" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +checksum = "00b24b79b7a07f10209f19e683ca1e289d80b1e76ffa8c2b779718566a083679" dependencies = [ "deranged", "itoa", + "num-conv", "powerfmt", "serde", "time-core", @@ -4677,10 +4916,11 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.15" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" dependencies = [ + "num-conv", "time-core", ] @@ -4702,6 +4942,18 @@ dependencies = [ "tinyvec_macros", ] +[[package]] +name = "tinyvec-embed" +version = "0.1.0" +dependencies = [ + "bincode", + "serde", + "thiserror", + "tokio", + "tracing", + "uuid", +] + [[package]] name = "tinyvec_macros" version = "0.1.1" @@ -4710,9 +4962,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokenizers" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "062b8a9613d6017633b80fb55fbb33f1aff006c36225a3025630753398034b3c" +checksum = "6db445cceba5dfeb0f9702be7d6bfd91801ddcbe8fe8722defe7f2e96da75812" dependencies = [ "aho-corasick", "derive_builder", @@ -4741,9 +4993,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.34.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", @@ -4753,7 +5005,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.5", + "socket2", "tokio-macros", "windows-sys 0.48.0", ] @@ -4766,7 +5018,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -4868,7 +5120,7 @@ checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -4909,7 +5161,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -4986,9 +5238,9 @@ dependencies = [ [[package]] name = "tree-sitter-c" -version = "0.20.6" +version = "0.20.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b03bdf218020057abee831581a74bff8c298323d6c6cd1a70556430ded9f4b" +checksum = "9a578ec34a18175c5a3922db2bceb132c82391f5b232375d6bb6531d1bfcf497" dependencies = [ "cc", "tree-sitter", @@ -5026,9 +5278,9 @@ dependencies = [ [[package]] name = "tree-sitter-elixir" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e" +checksum = "1bc0b1f3e6d9f12ca22ae5171f32fd154e3aea29dff565d05ef785c28931415b" dependencies = [ "cc", "tree-sitter", @@ -5056,9 +5308,9 @@ dependencies = [ [[package]] name = "tree-sitter-html" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "184e6b77953a354303dc87bf5fe36558c83569ce92606e7b382a0dc1b7443443" +checksum = "017822b6bd42843c4bd67fabb834f61ce23254e866282dd93871350fd6b7fa1d" dependencies = [ "cc", "tree-sitter", @@ -5076,9 +5328,9 @@ dependencies = [ [[package]] name = "tree-sitter-javascript" -version = "0.20.1" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7" +checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0" dependencies = [ "cc", "tree-sitter", @@ -5086,9 +5338,9 @@ dependencies = [ [[package]] name = "tree-sitter-json" -version = "0.20.1" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d82d2e33ee675dc71289e2ace4f8f9cf96d36d81400e9dae5ea61edaf5dea6" +checksum = "5a9a38a9c679b55cc8d17350381ec08d69fa1a17a53fcf197f344516e485ed4d" dependencies = [ "cc", "tree-sitter", @@ -5136,9 +5388,9 @@ dependencies = [ [[package]] name = "tree-sitter-php" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0db3788e709a5adfb583683a4b686a084e41a0f9e5a2fcb9a8e358f11481036a" +checksum = "4a7d882bc077192da2995bbe25c293cc596f036129b32c6c94752109dc482b8b" dependencies = [ "cc", "tree-sitter", @@ -5206,9 +5458,9 @@ dependencies = [ [[package]] name = "tree-sitter-typescript" -version = "0.20.3" +version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75049f0aafabb2aac205d7bb24da162b53dcd0cfb326785f25a2f32efa8071a" +checksum = "c8bc1d2c24276a48ef097a71b56888ac9db63717e8f8d0b324668a27fd619670" dependencies = [ "cc", "tree-sitter", @@ -5216,15 +5468,25 @@ dependencies = [ [[package]] name = "triomphe" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c5a71827ac326072b6405552093e2ad2accd25a32fd78d4edc82d98c7f2409" +checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] [[package]] name = "typenum" @@ -5232,6 +5494,50 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "typify" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63ed4d717aa95e598e2f9183376b060e95669ef8f444701ea6afb990fde1cf69" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89057244dfade7c58af9e62beccbcbeb7a7e7701697a33b06dbe0b7331fb79cf" +dependencies = [ + "heck", + "log", + "proc-macro2", + "quote", + "regress", + "schemars", + "serde_json", + "syn 2.0.48", + "thiserror", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ddade397f5957d2cd7fb27f905a9a569db20e8e1e3ea589edce40be07b92825" +dependencies = [ + "proc-macro2", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.48", + "typify-impl", +] + [[package]] name = "unicase" version = "2.7.0" @@ -5243,9 +5549,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -5283,12 +5589,6 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - [[package]] name = "unicode_categories" version = "0.1.1" @@ -5297,9 +5597,9 @@ checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" [[package]] name = "unsafe-libyaml" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" +checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" [[package]] name = "untrusted" @@ -5319,7 +5619,7 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8cdd25c339e200129fe4de81451814e5228c9b771d57378817d6117cc2b3f97" dependencies = [ - "base64 0.21.5", + "base64 0.21.7", "flate2", "log", "native-tls", @@ -5358,13 +5658,25 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", "rand 0.8.5", "serde", + "uuid-macro-internal", +] + +[[package]] +name = "uuid-macro-internal" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7abb14ae1a50dad63eaa768a458ef43d298cd1bd44951677bd10b732a9ba2a2d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", ] [[package]] @@ -5381,9 +5693,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vectordb" -version = "0.4.6" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8933a0e6f7862f994ef34221d767a511941155b30f72d0dc112ada028a1e7f74" +checksum = "f60c3fef1748959f034ed1ec53f5c113e58a8018c6999874f4356269754f7ee2" dependencies = [ "arrow", "arrow-array", @@ -5403,7 +5715,7 @@ dependencies = [ "lance-testing", "log", "num-traits", - "object_store 0.9.0", + "object_store", "serde", "serde_json", "snafu", @@ -5450,9 +5762,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -5460,24 +5772,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12" +checksum = "bde2032aeb86bdfaecc8b261eef3cba735cc426c1f3a3416d1e0791be95fc461" dependencies = [ "cfg-if", "js-sys", @@ -5487,9 +5799,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5497,28 +5809,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" [[package]] name = "wasm-streams" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" dependencies = [ "futures-util", "js-sys", @@ -5538,9 +5850,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" +checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" dependencies = [ "js-sys", "wasm-bindgen", @@ -5548,9 +5860,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.3" +version = "0.25.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" +checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" [[package]] name = "which" @@ -5597,20 +5909,11 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.51.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.45.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.42.2", + "windows-targets 0.52.0", ] [[package]] @@ -5631,21 +5934,6 @@ dependencies = [ "windows-targets 0.52.0", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.5" @@ -5676,12 +5964,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -5694,12 +5976,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5712,12 +5988,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5730,12 +6000,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5748,12 +6012,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5766,12 +6024,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -5784,12 +6036,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -5814,24 +6060,24 @@ dependencies = [ [[package]] name = "write-json" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06069a848f95fceae3e5e03c0ddc8cb78452b56654ee0c8e68f938cf790fb9e3" +checksum = "23f6174b2566cc4a74f95e1367ec343e7fa80c93cc8087f5c4a3d6a1088b2118" [[package]] name = "xflags" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4554b580522d0ca238369c16b8f6ce34524d61dafe7244993754bbd05f2c2ea" +checksum = "7d9e15fbb3de55454b0106e314b28e671279009b363e6f1d8e39fdc3bf048944" dependencies = [ "xflags-macros", ] [[package]] name = "xflags-macros" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58e7b3ca8977093aae6b87b6a7730216fc4c53a6530bab5c43a783cd810c1a8" +checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155" [[package]] name = "xmlparser" @@ -5867,6 +6113,15 @@ dependencies = [ "zip", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.7.3" @@ -5887,28 +6142,28 @@ checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", "synstructure", ] [[package]] name = "zerocopy" -version = "0.7.26" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.26" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", ] [[package]] @@ -5928,7 +6183,7 @@ checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.48", "synstructure", ] diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index 8011649..d026834 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -7,8 +7,8 @@ edition = "2021" name = "llm-ls" [dependencies] -arrow-array = "49" -arrow-schema = "49" +arrow-array = "50" +arrow-schema = "50" candle = { version = "0.3", package = "candle-core", default-features = false } candle-nn = "0.3" candle-transformers = "0.3" @@ -50,7 +50,7 @@ tree-sitter-css = "0.20" tree-sitter-elixir = "0.1" tree-sitter-erlang = "0.4" tree-sitter-go = "0.20" -tree-sitter-html = "0.19" +tree-sitter-html = "0.20" tree-sitter-java = "0.20" tree-sitter-javascript = "0.20" tree-sitter-json = "0.20" @@ -58,7 +58,7 @@ tree-sitter-kotlin = "0.3.1" tree-sitter-lua = "0.0.19" tree-sitter-md = "0.1" tree-sitter-objc = "3" -tree-sitter-php = "0.21" +tree-sitter-php = "0.22" tree-sitter-python = "0.20" tree-sitter-r = "0.19" tree-sitter-ruby = "0.20" diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index c20c633..9476a3f 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -221,16 +221,21 @@ async fn initialse_database(cache_path: PathBuf) -> Arc { ], ) .expect("failure while defining schema"); - db.create_table( - "code-slices", - Box::new(RecordBatchIterator::new( - vec![batch].into_iter().map(Ok), - schema, - )), - None, - ) - .await - .expect("failed to create table") + let tbl = db + .create_table( + "code-slices", + Box::new(RecordBatchIterator::new(vec![].into_iter().map(Ok), schema)), + None, + ) + .await + .expect("failed to create table"); + tbl.create_index(&["vector"]) + .ivf_pq() + .num_partitions(256) + .build() + .await + .expect("failed to create index"); + tbl } Err(err) => panic!("error while opening table: {}", err), } diff --git a/crates/tinyvec-embed/Cargo.toml b/crates/tinyvec-embed/Cargo.toml new file mode 100644 index 0000000..ba6ffdf --- /dev/null +++ b/crates/tinyvec-embed/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "tinyvec-embed" +version = "0.1.0" +edition.workspace = true +license.workspace = true +authors.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bincode = "1" +serde = "1" +thiserror = "1" +tokio = { version = "1", features = [ + "fs", + "macros", + "rt-multi-thread", + "sync", +] } +tracing = "0.1" + +[dependencies.uuid] +version = "1.7.0" +features = ["v4", "fast-rng", "macro-diagnostics"] diff --git a/crates/tinyvec-embed/README.md b/crates/tinyvec-embed/README.md new file mode 100644 index 0000000..9846ef5 --- /dev/null +++ b/crates/tinyvec-embed/README.md @@ -0,0 +1,5 @@ +# tinyvec-embed + +Tiny embedded vector database. + +Inspired by [tinyvector](https://github.com/m1guelpf/tinyvector). diff --git a/crates/tinyvec-embed/src/db.rs b/crates/tinyvec-embed/src/db.rs new file mode 100644 index 0000000..ea4e3a5 --- /dev/null +++ b/crates/tinyvec-embed/src/db.rs @@ -0,0 +1,272 @@ +use serde::{Deserialize, Serialize}; +use std::{ + collections::{BinaryHeap, HashMap}, + fs, + path::{Path, PathBuf}, + sync::Arc, +}; +use tokio::{sync::Semaphore, task::JoinSet}; +use tracing::debug; +use uuid::Uuid; + +use crate::{ + error::{Collection as Error, Result}, + similarity::{Distance, ScoreIndex}, +}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Db { + pub collections: HashMap, + pub location: PathBuf, +} + +impl Db { + pub fn open(path: impl AsRef) -> Result { + let path = path.as_ref(); + if !path.exists() { + debug!("Creating database store"); + fs::create_dir_all( + path.parent() + .ok_or(Error::InvalidPath(path.to_path_buf()))?, + ) + .map_err(Into::::into)?; + + return Ok(Self { + collections: HashMap::new(), + location: path.to_path_buf(), + }); + } + debug!("Loading database from store"); + let db = fs::read(path).map_err(Into::::into)?; + Ok(bincode::deserialize(&db[..]).map_err(Into::::into)?) + } + + pub fn create_collection( + &mut self, + name: String, + dimension: usize, + distance: Distance, + ) -> Result { + if self.collections.contains_key(&name) { + return Err(Error::UniqueViolation.into()); + } + + let collection = Collection { + dimension, + distance, + embeddings: Vec::new(), + }; + + self.collections.insert(name, collection.clone()); + + Ok(collection) + } + + pub fn delete_collection(&mut self, name: &str) { + self.collections.remove(name); + } + + pub fn get_collection(&self, name: &str) -> Result<&Collection> { + self.collections.get(name).ok_or(Error::NotFound.into()) + } + + fn save_to_store(&self) -> Result<()> { + let db = bincode::serialize(self).map_err(Into::::into)?; + + fs::write(self.location.as_path(), db).map_err(Into::::into)?; + + Ok(()) + } +} + +impl Drop for Db { + fn drop(&mut self) { + debug!("Saving database to store"); + let _ = self.save_to_store(); + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SimilarityResult { + score: f32, + embedding: Embedding, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Collection { + /// Dimension of the vectors in the collection + pub dimension: usize, + /// Distance metric used for querying + pub distance: Distance, + /// Embeddings in the collection + #[serde(default)] + pub embeddings: Vec, +} + +impl Collection { + pub fn filter(&self) -> FilterBuilder { + FilterBuilder::new() + } + + pub async fn get( + &self, + query: &[f32], + k: usize, + filter: Option bool>, + ) -> Result> { + let embeddings = if let Some(filter) = filter { + self.embeddings.iter().filter(filter).collect::>() + } else { + self.embeddings.iter().collect::>() + }; + get_similarity(self.distance, &embeddings, query, k).await + } + + pub fn insert(&mut self, embedding: Embedding) -> Result<()> { + if embedding.vector.len() != self.dimension { + return Err(Error::DimensionMismatch.into()); + } + + self.embeddings.push(embedding); + + Ok(()) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Embedding { + pub id: Uuid, + pub metadata: Option>, + pub vector: Vec, +} + +impl Embedding { + pub fn new(vector: Vec, metadata: Option>) -> Self { + Self { + id: Uuid::new_v4(), + metadata, + vector, + } + } +} + +pub enum Compare { + Eq, + Neq, + Gt, + Lt, +} + +#[derive(Clone)] +enum Chain { + And, + Or, +} + +pub struct FilterBuilder { + filter: Vec<(String, Compare, String, Option)>, +} + +impl FilterBuilder { + pub fn new() -> Self { + Self { filter: Vec::new() } + } + + pub fn and(mut self) -> Self { + self.filter + .last_mut() + .map(|c| c.3.as_mut().map(|c| *c = Chain::And)); + self + } + + pub fn or(mut self) -> Self { + self.filter + .last_mut() + .map(|c| c.3.as_mut().map(|c| *c = Chain::Or)); + self + } + + pub fn condtion(mut self, lhs: String, op: Compare, rhs: String) -> Self { + self.filter.push((lhs, op, rhs, None)); + self + } + + pub fn build(self) -> impl Fn(&&Embedding) -> bool { + move |e| { + let mut ret = true; + let mut prev = None; + for condition in &self.filter { + let cond_res = match condition.1 { + Compare::Eq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) == Some(&condition.2)) + .unwrap_or(false), + Compare::Neq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) != Some(&condition.2)) + .unwrap_or(false), + Compare::Gt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) > Some(&condition.2)) + .unwrap_or(false), + Compare::Lt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) < Some(&condition.2)) + .unwrap_or(false), + }; + if let Some(prev) = prev { + match prev { + Chain::And => ret = ret && cond_res, + Chain::Or => ret = ret || cond_res, + } + } + prev = condition.3.clone(); + } + ret + } + } +} + +async fn get_similarity( + distance: Distance, + embeddings: &[&Embedding], + query: &[f32], + k: usize, +) -> Result> { + let semaphore = Arc::new(Semaphore::new(8)); + let mut set = JoinSet::new(); + for (index, embedding) in embeddings.into_iter().enumerate() { + let embedding = (*embedding).clone(); + let query = query.to_owned(); + let permit = semaphore.clone().acquire_owned().await.unwrap(); + set.spawn_blocking(move || { + let score = distance.compute(&embedding.vector, &query); + drop(permit); + ScoreIndex { score, index } + }); + } + + let mut heap = BinaryHeap::new(); + while let Some(res) = set.join_next().await { + let score_index = res.map_err(Into::::into)?; + if heap.len() < k || score_index < *heap.peek().unwrap() { + heap.push(score_index); + + if heap.len() > k { + heap.pop(); + } + } + } + Ok(heap + .into_sorted_vec() + .into_iter() + .map(|ScoreIndex { score, index }| SimilarityResult { + score, + embedding: embeddings[index].clone(), + }) + .collect()) +} diff --git a/crates/tinyvec-embed/src/error.rs b/crates/tinyvec-embed/src/error.rs new file mode 100644 index 0000000..0569a53 --- /dev/null +++ b/crates/tinyvec-embed/src/error.rs @@ -0,0 +1,29 @@ +use std::path::PathBuf; + +#[derive(Debug, thiserror::Error)] +pub enum Collection { + #[error("bincode error: {0}")] + Bincode(#[from] bincode::Error), + #[error("The dimension of the vector doesn't match the dimension of the collection")] + DimensionMismatch, + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("invalid path: {0}")] + InvalidPath(PathBuf), + #[error("join error: {0}")] + Join(#[from] tokio::task::JoinError), + #[error("Collection doesn't exist")] + NotFound, + #[error("error sending message in channel")] + Send, + #[error("Collection already exists")] + UniqueViolation, +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("collection error: {0}")] + Collection(#[from] Collection), +} + +pub type Result = std::result::Result; diff --git a/crates/tinyvec-embed/src/lib.rs b/crates/tinyvec-embed/src/lib.rs new file mode 100644 index 0000000..97a06b3 --- /dev/null +++ b/crates/tinyvec-embed/src/lib.rs @@ -0,0 +1,3 @@ +mod db; +mod error; +mod similarity; diff --git a/crates/tinyvec-embed/src/similarity.rs b/crates/tinyvec-embed/src/similarity.rs new file mode 100644 index 0000000..4966644 --- /dev/null +++ b/crates/tinyvec-embed/src/similarity.rs @@ -0,0 +1,51 @@ +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum Distance { + Cosine, +} + +impl Distance { + pub fn compute(&self, a: &[f32], b: &[f32]) -> f32 { + match self { + Distance::Cosine => { + let magnitude_a = a.iter().fold(0.0, |acc, &val| val.mul_add(val, acc)); + let magnitude_b = b.iter().fold(0.0, |acc, &val| val.mul_add(val, acc)); + dot_product(a, b) / (magnitude_a * magnitude_b).sqrt() + } + } + } +} + +fn dot_product(a: &[f32], b: &[f32]) -> f32 { + a.iter().zip(b).fold(0.0, |acc, (x, y)| acc + x * y) +} + +pub struct ScoreIndex { + pub score: f32, + pub index: usize, +} + +impl PartialEq for ScoreIndex { + fn eq(&self, other: &Self) -> bool { + self.score.eq(&other.score) + } +} + +impl Eq for ScoreIndex {} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for ScoreIndex { + fn partial_cmp(&self, other: &Self) -> Option { + // The comparison is intentionally reversed here to make the heap a min-heap + other.score.partial_cmp(&self.score) + } +} + +impl Ord for ScoreIndex { + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} From 307ee39632cac53b5a138d7934438861390fc7a2 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 6 Feb 2024 16:16:10 +0100 Subject: [PATCH 07/22] refactor: replace adaptor strings with enum --- .gitignore | 1 - crates/llm-ls/src/adaptors.rs | 67 +++++++++++++++++++---------------- crates/llm-ls/src/main.rs | 6 ++-- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 0a9bc3e..57bc708 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -.vscode/ dist/ target/ .DS_Store diff --git a/crates/llm-ls/src/adaptors.rs b/crates/llm-ls/src/adaptors.rs index ba03ae4..75f3f90 100644 --- a/crates/llm-ls/src/adaptors.rs +++ b/crates/llm-ls/src/adaptors.rs @@ -196,47 +196,54 @@ fn parse_openai_text(text: &str) -> Result> { } } -pub(crate) const TGI: &str = "tgi"; -pub(crate) const HUGGING_FACE: &str = "huggingface"; -pub(crate) const OLLAMA: &str = "ollama"; -pub(crate) const OPENAI: &str = "openai"; -pub(crate) const DEFAULT_ADAPTOR: &str = HUGGING_FACE; +#[derive(Debug, Default, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub(crate) enum Adaptor { + #[default] + HuggingFace, + Ollama, + OpenAi, + Tgi, +} + +impl Display for Adaptor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::HuggingFace => write!(f, "huggingface"), + Self::Ollama => write!(f, "ollama"), + Self::OpenAi => write!(f, "openai"), + Self::Tgi => write!(f, "tgi"), + } + } +} pub fn adapt_body(prompt: String, params: &CompletionParams) -> Result { - match params - .adaptor - .as_ref() - .unwrap_or(&DEFAULT_ADAPTOR.to_string()) - .as_str() - { - TGI => Ok(build_tgi_body(prompt, ¶ms.request_params)), - HUGGING_FACE => Ok(build_api_body(prompt, ¶ms.request_params)), - OLLAMA => Ok(build_ollama_body(prompt, params)), - OPENAI => Ok(build_openai_body(prompt, params)), - adaptor => Err(Error::UnknownAdaptor(adaptor.to_owned())), + match params.adaptor.as_ref().unwrap_or(&Adaptor::default()) { + Adaptor::HuggingFace => Ok(build_api_body(prompt, ¶ms.request_params)), + Adaptor::Ollama => Ok(build_ollama_body(prompt, params)), + Adaptor::OpenAi => Ok(build_openai_body(prompt, params)), + Adaptor::Tgi => Ok(build_tgi_body(prompt, ¶ms.request_params)), } } pub fn adapt_headers( - adaptor: Option<&String>, + adaptor: Option<&Adaptor>, api_token: Option<&String>, ide: Ide, ) -> Result { - match adaptor.unwrap_or(&DEFAULT_ADAPTOR.to_string()).as_str() { - TGI => build_tgi_headers(api_token, ide), - HUGGING_FACE => build_api_headers(api_token, ide), - OLLAMA => build_ollama_headers(), - OPENAI => build_openai_headers(api_token, ide), - adaptor => Err(Error::UnknownAdaptor(adaptor.to_owned())), + match adaptor.unwrap_or(&Adaptor::default()) { + Adaptor::HuggingFace => build_api_headers(api_token, ide), + Adaptor::Ollama => build_ollama_headers(), + Adaptor::OpenAi => build_openai_headers(api_token, ide), + Adaptor::Tgi => build_tgi_headers(api_token, ide), } } -pub fn parse_generations(adaptor: Option<&String>, text: &str) -> Result> { - match adaptor.unwrap_or(&DEFAULT_ADAPTOR.to_string()).as_str() { - TGI => parse_tgi_text(text), - HUGGING_FACE => parse_api_text(text), - OLLAMA => parse_ollama_text(text), - OPENAI => parse_openai_text(text), - adaptor => Err(Error::UnknownAdaptor(adaptor.to_owned())), +pub fn parse_generations(adaptor: Option<&Adaptor>, text: &str) -> Result> { + match adaptor.unwrap_or(&Adaptor::default()) { + Adaptor::HuggingFace => parse_api_text(text), + Adaptor::Ollama => parse_ollama_text(text), + Adaptor::OpenAi => parse_openai_text(text), + Adaptor::Tgi => parse_tgi_text(text), } } diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 568d851..4bd58ae 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -1,4 +1,4 @@ -use adaptors::{adapt_body, adapt_headers, parse_generations}; +use adaptors::{adapt_body, adapt_headers, parse_generations, Adaptor}; use clap::Parser; use document::Document; use error::{Error, Result}; @@ -285,7 +285,7 @@ pub struct CompletionParams { fim: FimParams, api_token: Option, model: String, - adaptor: Option, + adaptor: Option, tokens_to_clear: Vec, tokenizer_config: Option, context_window: usize, @@ -601,7 +601,7 @@ impl Backend { "received completion request for {}", params.text_document_position.text_document.uri ); - let is_using_inference_api = params.adaptor.as_ref().unwrap_or(&adaptors::DEFAULT_ADAPTOR.to_owned()).as_str() == adaptors::HUGGING_FACE; + let is_using_inference_api = matches!(params.adaptor.as_ref().unwrap_or(&Adaptor::default()), Adaptor::HuggingFace); if params.api_token.is_none() && is_using_inference_api { let now = Instant::now(); let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await; From 453b4778ed7eff64228650f5359c7a041e0366ef Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Mon, 19 Feb 2024 22:56:01 +0100 Subject: [PATCH 08/22] test: add similarity & filter test to tinyvec-embed --- .gitignore | 1 + Cargo.lock | 2755 ++----------------------------- crates/llm-ls/Cargo.toml | 4 - crates/llm-ls/src/error.rs | 4 - crates/tinyvec-embed/Cargo.toml | 7 +- crates/tinyvec-embed/src/db.rs | 193 ++- 6 files changed, 314 insertions(+), 2650 deletions(-) diff --git a/.gitignore b/.gitignore index 57bc708..0a9bc3e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist/ target/ .DS_Store diff --git a/Cargo.lock b/Cargo.lock index f52fdfc..539aab9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,20 +28,6 @@ dependencies = [ "cpufeatures", ] -[[package]] -name = "ahash" -version = "0.8.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" -dependencies = [ - "cfg-if", - "const-random", - "getrandom", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.2" @@ -51,42 +37,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anstream" version = "0.6.12" @@ -142,641 +92,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - -[[package]] -name = "arrayvec" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" - -[[package]] -name = "arrow" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "num", -] - -[[package]] -name = "arrow-array" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" -dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown 0.14.3", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" -dependencies = [ - "bytes", - "half", - "num", -] - -[[package]] -name = "arrow-cast" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "base64 0.21.7", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", -] - -[[package]] -name = "arrow-csv" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num", -] - -[[package]] -name = "arrow-ipc" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", - "lz4_flex", - "zstd 0.13.0", -] - -[[package]] -name = "arrow-json" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "indexmap", - "lexical-core", - "num", - "serde", - "serde_json", -] - -[[package]] -name = "arrow-ord" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "half", - "num", -] - -[[package]] -name = "arrow-row" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", - "hashbrown 0.14.3", -] - -[[package]] -name = "arrow-schema" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" - -[[package]] -name = "arrow-select" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num", -] - -[[package]] -name = "arrow-string" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "num", - "regex", - "regex-syntax 0.8.2", -] - -[[package]] -name = "async-compression" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" -dependencies = [ - "bzip2", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd 0.13.0", - "zstd-safe 7.0.0", -] - -[[package]] -name = "async-recursion" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.49", -] - -[[package]] -name = "async-trait" -version = "0.1.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.49", -] - -[[package]] -name = "async_cell" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "834eee9ce518130a3b4d5af09ecc43e9d6b57ee76613f227a1ddd6b77c7a62bc" - -[[package]] -name = "auto_impl" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "823b8bb275161044e2ac7a25879cb3e2480cb403e3943022c7c769c599b756aa" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.49", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "aws-config" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6b3804dca60326e07205179847f17a4fce45af3a1106939177ad41ac08a6de" -dependencies = [ - "aws-credential-types", - "aws-http", - "aws-sdk-sso", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-json", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "hex", - "http", - "hyper", - "ring 0.16.20", - "time", - "tokio", - "tower", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-credential-types" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a66ac8ef5fa9cf01c2d999f39d16812e90ec1467bd382cbbb74ba23ea86201" -dependencies = [ - "aws-smithy-async", - "aws-smithy-types", - "fastrand", - "tokio", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-http" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e626370f9ba806ae4c439e49675fd871f5767b093075cdf4fef16cac42ba900" -dependencies = [ - "aws-credential-types", - "aws-smithy-http", - "aws-smithy-types", - "aws-types", - "bytes", - "http", - "http-body", - "lazy_static", - "percent-encoding", - "pin-project-lite", - "tracing", -] - -[[package]] -name = "aws-runtime" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ac5cf0ff19c1bca0cea7932e11b239d1025a45696a4f44f72ea86e2b8bdd07" -dependencies = [ - "aws-credential-types", - "aws-http", - "aws-sigv4", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "fastrand", - "http", - "percent-encoding", - "tracing", - "uuid", -] - -[[package]] -name = "aws-sdk-dynamodb" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10aef6843bfc2dabfccad27f7e1ab303942bbda19f7ea7777d0d74388d073db4" -dependencies = [ - "aws-credential-types", - "aws-http", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http", - "regex", - "tokio-stream", - "tracing", -] - -[[package]] -name = "aws-sdk-sso" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "903f888ff190e64f6f5c83fb0f8d54f9c20481f1dc26359bb8896f5d99908949" -dependencies = [ - "aws-credential-types", - "aws-http", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "http", - "regex", - "tokio-stream", - "tracing", -] - -[[package]] -name = "aws-sdk-sts" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47ad6bf01afc00423d781d464220bf69fb6a674ad6629cbbcb06d88cdc2be82" -dependencies = [ - "aws-credential-types", - "aws-http", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "http", - "regex", - "tracing", -] - -[[package]] -name = "aws-sigv4" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b28f4910bb956b7ab320b62e98096402354eca976c587d1eeccd523d9bac03" -dependencies = [ - "aws-smithy-http", - "form_urlencoded", - "hex", - "hmac", - "http", - "once_cell", - "percent-encoding", - "regex", - "sha2", - "time", - "tracing", -] - -[[package]] -name = "aws-smithy-async" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cdb73f85528b9d19c23a496034ac53703955a59323d581c06aa27b4e4e247af" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - -[[package]] -name = "aws-smithy-client" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c27b2756264c82f830a91cb4d2d485b2d19ad5bea476d9a966e03d27f27ba59a" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", - "bytes", - "fastrand", - "http", - "http-body", - "hyper", - "hyper-rustls", - "lazy_static", - "pin-project-lite", - "rustls 0.21.10", - "tokio", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-http" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54cdcf365d8eee60686885f750a34c190e513677db58bbc466c44c588abf4199" -dependencies = [ - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "http", - "http-body", - "hyper", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "aws-smithy-http-tower" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822de399d0ce62829a69dfa8c5cd08efdbe61a7426b953e2268f8b8b52a607bd" -dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "bytes", - "http", - "http-body", - "pin-project-lite", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1e7ab8fa7ad10c193af7ae56d2420989e9f4758bf03601a342573333ea34f" -dependencies = [ - "aws-smithy-types", -] - -[[package]] -name = "aws-smithy-query" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28556a3902091c1f768a34f6c998028921bdab8d47d92586f363f14a4a32d047" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - -[[package]] -name = "aws-smithy-runtime" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "745e096b3553e7e0f40622aa04971ce52765af82bebdeeac53aa6fc82fe801e6" -dependencies = [ - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "fastrand", - "http", - "http-body", - "once_cell", - "pin-project-lite", - "pin-utils", - "tokio", - "tracing", -] - -[[package]] -name = "aws-smithy-runtime-api" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d0ae0c9cfd57944e9711ea610b48a963fb174a53aabacc08c5794a594b1d02" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-types", - "bytes", - "http", - "tokio", - "tracing", -] - -[[package]] -name = "aws-smithy-types" -version = "0.56.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d90dbc8da2f6be461fa3c1906b20af8f79d14968fe47f2b7d29d086f62a51728" -dependencies = [ - "base64-simd", - "itoa", - "num-integer", - "ryu", - "serde", - "time", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.56.1" +name = "async-trait" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e01d2dedcdd8023043716cfeeb3c6c59f2d447fce365d8e194838891794b23b6" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ - "xmlparser", + "proc-macro2", + "quote", + "syn 2.0.49", ] [[package]] -name = "aws-types" -version = "0.56.1" +name = "auto_impl" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85aa0451bf8af1bf22a4f028d5d28054507a14be43cb8ac0597a8471fba9edfe" +checksum = "823b8bb275161044e2ac7a25879cb3e2480cb403e3943022c7c769c599b756aa" dependencies = [ - "aws-credential-types", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-types", - "http", - "rustc_version", - "tracing", + "proc-macro2", + "quote", + "syn 2.0.49", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "axum" version = "0.6.20" @@ -853,16 +195,6 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" -dependencies = [ - "outref", - "vsimd", -] - [[package]] name = "base64ct" version = "1.6.0" @@ -890,28 +222,6 @@ version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq 0.3.0", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -921,39 +231,12 @@ dependencies = [ "generic-array", ] -[[package]] -name = "brotli" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - [[package]] name = "bumpalo" version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d32a994c2b3ca201d9b263612a374263f05e7adde37c4707f693dcd375076d1f" -[[package]] -name = "bytecount" -version = "0.6.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" - [[package]] name = "bytemuck" version = "1.14.3" @@ -986,16 +269,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" -[[package]] -name = "bytes-utils" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" -dependencies = [ - "bytes", - "either", -] - [[package]] name = "bzip2" version = "0.4.4" @@ -1017,15 +290,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "camino" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" -dependencies = [ - "serde", -] - [[package]] name = "candle-core" version = "0.3.3" @@ -1081,28 +345,6 @@ dependencies = [ "wav", ] -[[package]] -name = "cargo-platform" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694c8807f2ae16faecc43dc17d74b3eb042482789fd0eb64b39a2e04e087053f" -dependencies = [ - "serde", -] - -[[package]] -name = "cargo_metadata" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" -dependencies = [ - "camino", - "cargo-platform", - "semver", - "serde", - "serde_json", -] - [[package]] name = "cc" version = "1.0.83" @@ -1119,43 +361,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "wasm-bindgen", - "windows-targets 0.52.0", -] - -[[package]] -name = "chrono-tz" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" -dependencies = [ - "parse-zoneinfo", - "phf", - "phf_codegen", -] - [[package]] name = "cipher" version = "0.4.4" @@ -1212,17 +417,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "comfy-table" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" -dependencies = [ - "strum", - "strum_macros", - "unicode-width", -] - [[package]] name = "console" version = "0.15.8" @@ -1236,38 +430,12 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "const-random" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom", - "once_cell", - "tiny-keccak", -] - [[package]] name = "constant_time_eq" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" -[[package]] -name = "constant_time_eq" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" - [[package]] name = "core-foundation" version = "0.9.4" @@ -1352,27 +520,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "csv" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] - [[package]] name = "custom-types" version = "0.1.0" @@ -1425,230 +572,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.3", + "hashbrown", "lock_api", "once_cell", "parking_lot_core", ] -[[package]] -name = "datafusion" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", - "flate2", - "futures", - "glob", - "half", - "hashbrown 0.14.3", - "indexmap", - "itertools 0.12.1", - "log", - "num_cpus", - "object_store", - "parking_lot", - "parquet", - "pin-project-lite", - "rand 0.8.5", - "sqlparser", - "tempfile", - "tokio", - "tokio-util", - "url", - "uuid", - "xz2", - "zstd 0.13.0", -] - -[[package]] -name = "datafusion-common" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "chrono", - "half", - "libc", - "num_cpus", - "object_store", - "parquet", - "sqlparser", -] - -[[package]] -name = "datafusion-execution" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" -dependencies = [ - "arrow", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures", - "hashbrown 0.14.3", - "log", - "object_store", - "parking_lot", - "rand 0.8.5", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "datafusion-common", - "paste", - "sqlparser", - "strum", - "strum_macros", -] - -[[package]] -name = "datafusion-optimizer" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown 0.14.3", - "itertools 0.12.1", - "log", - "regex-syntax 0.8.2", -] - -[[package]] -name = "datafusion-physical-expr" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "base64 0.21.7", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "half", - "hashbrown 0.14.3", - "hex", - "indexmap", - "itertools 0.12.1", - "log", - "md-5", - "paste", - "petgraph", - "rand 0.8.5", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] - -[[package]] -name = "datafusion-physical-plan" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "futures", - "half", - "hashbrown 0.14.3", - "indexmap", - "itertools 0.12.1", - "log", - "once_cell", - "parking_lot", - "pin-project-lite", - "rand 0.8.5", - "tokio", - "uuid", -] - -[[package]] -name = "datafusion-sql" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" -dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-expr", - "log", - "sqlparser", -] - -[[package]] -name = "datafusion-substrait" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad6bef77af3d8a697ae63ffbcb5aa66b74cd08ea93a31e2e757da75b2f1452f" -dependencies = [ - "async-recursion", - "chrono", - "datafusion", - "itertools 0.12.1", - "object_store", - "prost", - "prost-types", - "substrait", -] - [[package]] name = "deranged" version = "0.3.11" @@ -1718,20 +647,8 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.48.0", -] - -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - -[[package]] -name = "dyn-clone" -version = "1.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" + "windows-sys 0.48.0", +] [[package]] name = "dyn-stack" @@ -1792,15 +709,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "error-chain" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" -dependencies = [ - "version_check", -] - [[package]] name = "esaxx-rs" version = "0.1.10" @@ -1813,22 +721,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - [[package]] name = "flate2" version = "1.0.28" @@ -2099,10 +991,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi", - "wasm-bindgen", ] [[package]] @@ -2111,19 +1001,6 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" -[[package]] -name = "git2" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd" -dependencies = [ - "bitflags 2.4.2", - "libc", - "libgit2-sys", - "log", - "url", -] - [[package]] name = "gitignore" version = "0.1.0" @@ -2172,24 +1049,11 @@ dependencies = [ "rand_distr", ] -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash", -] - [[package]] name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "heck" @@ -2203,12 +1067,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - [[package]] name = "hf-hub" version = "0.3.2" @@ -2277,604 +1135,172 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" [[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "hyper" -version = "0.14.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http", - "hyper", - "log", - "rustls 0.21.10", - "rustls-native-certs", - "tokio", - "tokio-rustls", -] - -[[package]] -name = "hyper-tls" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" -dependencies = [ - "bytes", - "hyper", - "native-tls", - "tokio", - "tokio-native-tls", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "2.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" -dependencies = [ - "equivalent", - "hashbrown 0.14.3", -] - -[[package]] -name = "indicatif" -version = "0.17.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" -dependencies = [ - "console", - "instant", - "number_prefix", - "portable-atomic", - "unicode-width", -] - -[[package]] -name = "inout" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" -dependencies = [ - "generic-array", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "ipnet" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" - -[[package]] -name = "jobserver" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lance" -version = "0.9.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fc899b783c767084aab6c416f762c066d55ab28d3a3f4f2f9b37b1cf92d9be5" -dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "async_cell", - "aws-credential-types", - "aws-sdk-dynamodb", - "byteorder", - "bytes", - "chrono", - "dashmap", - "datafusion", - "datafusion-physical-expr", - "futures", - "half", - "itertools 0.12.1", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-datagen", - "lance-file", - "lance-index", - "lance-io", - "lance-linalg", - "lance-table", - "lazy_static", - "log", - "lru_time_cache", - "lzma-sys", - "moka", - "num_cpus", - "object_store", - "ordered-float 3.9.2", - "pin-project", - "prost", - "prost-build", - "rand 0.8.5", - "roaring", - "serde", - "serde_json", - "snafu", - "tempfile", - "tokio", - "tracing", - "url", - "uuid", -] - -[[package]] -name = "lance-arrow" -version = "0.9.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15bd560b201db5ab6785bb5dc9a13b4f2ed202328cdfebbc61cd17148babc1a9" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "getrandom", - "half", - "num-traits", - "rand 0.8.5", - "serde", -] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] -name = "lance-core" -version = "0.9.15" +name = "hyper" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fc7f1ea751bedef2ee185f35ec030a9e76bf3add6944fc652202acb917c529a" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", - "async-trait", - "byteorder", "bytes", - "chrono", - "datafusion-common", - "datafusion-sql", - "futures", - "lance-arrow", - "lazy_static", - "mock_instant", - "moka", - "object_store", - "pin-project", - "prost", - "rand 0.8.5", - "roaring", - "serde_json", - "snafu", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", "tokio", + "tower-service", "tracing", - "url", + "want", ] [[package]] -name = "lance-datafusion" -version = "0.9.15" +name = "hyper-rustls" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2809b09155b1867a82d0e92e4754268c75412421a6acf70a01ea043222845b28" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ - "arrow", - "arrow-array", - "arrow-ord", - "arrow-schema", - "async-trait", - "datafusion", - "datafusion-common", - "datafusion-physical-expr", - "datafusion-substrait", - "futures", - "lance-arrow", - "lance-core", - "prost", - "snafu", - "substrait", + "futures-util", + "http", + "hyper", + "rustls 0.21.10", "tokio", + "tokio-rustls", ] [[package]] -name = "lance-datagen" -version = "0.9.15" +name = "hyper-tls" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2812c1d9b08a3dfdd7f006d2cdbffdfba3db0f1b56ce01e11cfbcf9e3afefe98" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ - "arrow", - "arrow-array", - "arrow-cast", - "arrow-schema", - "chrono", - "rand 0.8.5", - "rand_xoshiro", + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", ] [[package]] -name = "lance-file" -version = "0.9.15" +name = "ident_case" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ea440571db38d0f9fe5a592a77deba16aba3575474d2deaf752103c61360c2" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "datafusion-common", - "futures", - "lance-arrow", - "lance-core", - "lance-io", - "num-traits", - "num_cpus", - "object_store", - "prost", - "prost-build", - "roaring", - "snafu", - "tokio", - "tracing", -] +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] -name = "lance-index" -version = "0.9.15" +name = "idna" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090b15c514e6547973b3a41fefbd32b00c58cbc6dcd8d7ab835b1e56f93ca2cf" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ - "arrow", - "arrow-array", - "arrow-ord", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-sql", - "futures", - "half", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-file", - "lance-io", - "lance-linalg", - "lance-table", - "log", - "num-traits", - "num_cpus", - "object_store", - "prost", - "prost-build", - "rand 0.8.5", - "roaring", - "rustc_version", - "serde", - "serde_json", - "snafu", - "tempfile", - "tokio", - "tracing", + "unicode-bidi", + "unicode-normalization", ] [[package]] -name = "lance-io" -version = "0.9.15" +name = "indexmap" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e3ac4aab6ce0b19be9fddf0b7fde6fb6a4133c3dad38bbda658e2413544da40" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "aws-config", - "aws-credential-types", - "byteorder", - "bytes", - "chrono", - "futures", - "lance-arrow", - "lance-core", - "lazy_static", - "num_cpus", - "object_store", - "pin-project", - "prost", - "prost-build", - "shellexpand", - "snafu", - "tokio", - "tracing", - "url", + "equivalent", + "hashbrown", ] [[package]] -name = "lance-linalg" -version = "0.9.15" +name = "indicatif" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "811c661ca43a3d192b9a0cc13f2bca62ee6a0ee2db021266a48ac91e243dfa27" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" dependencies = [ - "arrow-array", - "arrow-ord", - "arrow-schema", - "cc", - "futures", - "half", - "lance-arrow", - "lance-core", - "log", - "num-traits", - "num_cpus", - "rand 0.8.5", - "tokio", - "tracing", + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", ] [[package]] -name = "lance-table" -version = "0.9.15" +name = "inout" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea98b16bfd2f87ba49879a107388f62c6f6aa57c5b93115ace62cd85186ededd" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", - "async-trait", - "aws-credential-types", - "aws-sdk-dynamodb", - "byteorder", - "bytes", - "chrono", - "futures", - "lance-arrow", - "lance-core", - "lance-file", - "lance-io", - "lazy_static", - "log", - "object_store", - "prost", - "prost-build", - "prost-types", - "rand 0.8.5", - "roaring", - "serde", - "serde_json", - "snafu", - "tokio", - "tracing", - "url", - "uuid", + "generic-array", ] [[package]] -name = "lance-testing" -version = "0.9.15" +name = "instant" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd44f068d692b035784b1074b806a2cb1131b2ba614869f98727c2dc3df4fe96" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ - "arrow-array", - "arrow-schema", - "lance-arrow", - "num-traits", - "rand 0.8.5", + "cfg-if", ] [[package]] -name = "lazy_static" -version = "1.4.0" +name = "ipnet" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] -name = "lexical-core" -version = "0.8.5" +name = "itertools" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "either", ] [[package]] -name = "lexical-parse-float" -version = "0.8.5" +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", + "either", ] [[package]] -name = "lexical-parse-integer" -version = "0.8.6" +name = "itoa" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] -name = "lexical-util" -version = "0.8.5" +name = "jobserver" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ - "static_assertions", + "libc", ] [[package]] -name = "lexical-write-float" -version = "0.8.5" +name = "js-sys" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", + "wasm-bindgen", ] [[package]] -name = "lexical-write-integer" -version = "0.8.5" +name = "lazy_static" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" @@ -2882,18 +1308,6 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" -[[package]] -name = "libgit2-sys" -version = "0.16.2+1.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" -dependencies = [ - "cc", - "libc", - "libz-sys", - "pkg-config", -] - [[package]] name = "libm" version = "0.2.8" @@ -2911,18 +1325,6 @@ dependencies = [ "redox_syscall", ] -[[package]] -name = "libz-sys" -version = "1.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -2933,8 +1335,6 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" name = "llm-ls" version = "0.5.2" dependencies = [ - "arrow-array", - "arrow-schema", "candle-core", "candle-nn", "candle-transformers", @@ -2944,7 +1344,6 @@ dependencies = [ "gitignore", "hf-hub", "home", - "lance-linalg", "lsp-client", "reqwest", "ropey", @@ -2983,7 +1382,6 @@ dependencies = [ "tree-sitter-swift", "tree-sitter-typescript", "uuid", - "vectordb", ] [[package]] @@ -3002,12 +1400,6 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" -[[package]] -name = "lru_time_cache" -version = "0.11.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9106e1d747ffd48e6be5bb2d97fa706ed25b144fbee4d5c02eae110cd8d6badd" - [[package]] name = "lsp-client" version = "0.1.0" @@ -3032,35 +1424,6 @@ dependencies = [ "url", ] -[[package]] -name = "lz4_flex" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "mach2" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" -dependencies = [ - "libc", -] - [[package]] name = "macro_rules_attribute" version = "0.2.0" @@ -3092,16 +1455,6 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - [[package]] name = "memchr" version = "2.7.1" @@ -3150,15 +1503,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "mock_instant" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9366861eb2a2c436c20b12c8dbec5f798cea6b47ad99216be0282942e2c81ea0" -dependencies = [ - "once_cell", -] - [[package]] name = "mock_server" version = "0.1.0" @@ -3168,28 +1512,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "moka" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa6e72583bf6830c956235bff0d5afec8cf2952f579ebad18ae7821a917d950f" -dependencies = [ - "crossbeam-channel", - "crossbeam-epoch", - "crossbeam-utils", - "once_cell", - "parking_lot", - "quanta", - "rustc_version", - "scheduled-thread-pool", - "skeptic", - "smallvec", - "tagptr", - "thiserror", - "triomphe", - "uuid", -] - [[package]] name = "monostate" version = "0.1.11" @@ -3211,12 +1533,6 @@ dependencies = [ "syn 2.0.49", ] -[[package]] -name = "multimap" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" - [[package]] name = "native-tls" version = "0.2.11" @@ -3247,37 +1563,12 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.4" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" dependencies = [ - "autocfg", - "num-integer", - "num-traits", + "overload", + "winapi", ] [[package]] @@ -3296,38 +1587,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.18" @@ -3363,36 +1622,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" -dependencies = [ - "async-trait", - "base64 0.21.7", - "bytes", - "chrono", - "futures", - "humantime", - "hyper", - "itertools 0.12.1", - "parking_lot", - "percent-encoding", - "quick-xml", - "rand 0.8.5", - "reqwest", - "ring 0.17.8", - "rustls-pemfile 2.1.0", - "serde", - "serde_json", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "once_cell" version = "1.19.0" @@ -3471,30 +1700,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "3.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" -dependencies = [ - "num-traits", -] - -[[package]] -name = "outref" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" - [[package]] name = "overload" version = "0.1.1" @@ -3524,50 +1729,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "parquet" -version = "50.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.21.7", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown 0.14.3", - "lz4_flex", - "num", - "num-bigint", - "object_store", - "paste", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd 0.13.0", -] - -[[package]] -name = "parse-zoneinfo" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" -dependencies = [ - "regex", -] - [[package]] name = "password-hash" version = "0.4.2" @@ -3603,54 +1764,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "petgraph" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "phf" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - -[[package]] -name = "phf_shared" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" -dependencies = [ - "siphasher", -] - [[package]] name = "pin-project" version = "1.1.4" @@ -3707,16 +1820,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "prettyplease" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" -dependencies = [ - "proc-macro2", - "syn 2.0.49", -] - [[package]] name = "proc-macro2" version = "1.0.78" @@ -3726,71 +1829,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prost" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" -dependencies = [ - "bytes", - "heck", - "itertools 0.11.0", - "log", - "multimap", - "once_cell", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn 2.0.49", - "tempfile", - "which", -] - -[[package]] -name = "prost-derive" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" -dependencies = [ - "anyhow", - "itertools 0.11.0", - "proc-macro2", - "quote", - "syn 2.0.49", -] - -[[package]] -name = "prost-types" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" -dependencies = [ - "prost", -] - -[[package]] -name = "pulldown-cmark" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" -dependencies = [ - "bitflags 2.4.2", - "memchr", - "unicase", -] - [[package]] name = "pulp" version = "0.18.8" @@ -3803,32 +1841,6 @@ dependencies = [ "reborrow", ] -[[package]] -name = "quanta" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" -dependencies = [ - "crossbeam-utils", - "libc", - "mach2", - "once_cell", - "raw-cpuid", - "wasi", - "web-sys", - "winapi", -] - -[[package]] -name = "quick-xml" -version = "0.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "quote" version = "1.0.35" @@ -3906,15 +1918,6 @@ dependencies = [ "rand 0.8.5", ] -[[package]] -name = "rand_xoshiro" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" -dependencies = [ - "rand_core 0.6.4", -] - [[package]] name = "raw-cpuid" version = "10.7.0" @@ -4034,16 +2037,6 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" -[[package]] -name = "regress" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ed9969cad8051328011596bf549629f1b800cf1731e7964b1eef8dfc480d2c2" -dependencies = [ - "hashbrown 0.13.2", - "memchr", -] - [[package]] name = "remove_dir_all" version = "0.5.3" @@ -4079,8 +2072,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls 0.21.10", - "rustls-native-certs", - "rustls-pemfile 1.0.4", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", @@ -4106,21 +2098,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - [[package]] name = "ring" version = "0.17.8" @@ -4131,21 +2108,11 @@ dependencies = [ "cfg-if", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.52.0", ] -[[package]] -name = "roaring" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" -dependencies = [ - "bytemuck", - "byteorder", -] - [[package]] name = "ropey" version = "1.6.1" @@ -4162,15 +2129,6 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustix" version = "0.38.31" @@ -4191,7 +2149,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", - "ring 0.17.8", + "ring", "rustls-webpki 0.101.7", "sct", ] @@ -4203,25 +2161,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" dependencies = [ "log", - "ring 0.17.8", + "ring", "rustls-pki-types", "rustls-webpki 0.102.2", "subtle", "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework", -] - [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -4231,16 +2177,6 @@ dependencies = [ "base64 0.21.7", ] -[[package]] -name = "rustls-pemfile" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c333bb734fcdedcea57de1602543590f545f127dc8b533324318fd492c5c70b" -dependencies = [ - "base64 0.21.7", - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.3.0" @@ -4253,8 +2189,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -4263,9 +2199,9 @@ version = "0.102.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" dependencies = [ - "ring 0.17.8", + "ring", "rustls-pki-types", - "untrusted 0.9.0", + "untrusted", ] [[package]] @@ -4305,40 +2241,7 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "scheduled-thread-pool" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" -dependencies = [ - "parking_lot", -] - -[[package]] -name = "schemars" -version = "0.8.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45a28f4c49489add4ce10783f7911893516f15afe45d015608d41faca6bc4d29" -dependencies = [ - "dyn-clone", - "schemars_derive", - "serde", - "serde_json", -] - -[[package]] -name = "schemars_derive" -version = "0.8.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c767fd6fa65d9ccf9cf026122c1b555f2ef9a4f0cea69da4d7dbc3e258d30967" -dependencies = [ - "proc-macro2", - "quote", - "serde_derive_internals", - "syn 1.0.109", + "windows-sys 0.52.0", ] [[package]] @@ -4353,8 +2256,8 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -4380,15 +2283,6 @@ dependencies = [ "libc", ] -[[package]] -name = "semver" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" -dependencies = [ - "serde", -] - [[package]] name = "seq-macro" version = "0.3.5" @@ -4415,17 +2309,6 @@ dependencies = [ "syn 2.0.49", ] -[[package]] -name = "serde_derive_internals" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "serde_json" version = "1.0.113" @@ -4467,18 +2350,6 @@ dependencies = [ "syn 2.0.49", ] -[[package]] -name = "serde_tokenstream" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a00ffd23fd882d096f09fcaae2a9de8329a328628e86027e049ee051dc1621f" -dependencies = [ - "proc-macro2", - "quote", - "serde", - "syn 2.0.49", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -4535,15 +2406,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "shellexpand" -version = "3.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da03fa3b94cc19e3ebfc88c4229c49d8f08cdbd1228870a45f0ffdf84988e14b" -dependencies = [ - "dirs", -] - [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -4553,27 +2415,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "skeptic" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" -dependencies = [ - "bytecount", - "cargo_metadata", - "error-chain", - "glob", - "pulldown-cmark", - "tempfile", - "walkdir", -] - [[package]] name = "slab" version = "0.4.9" @@ -4589,34 +2430,6 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" -[[package]] -name = "snafu" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" -dependencies = [ - "doc-comment", - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - [[package]] name = "socket2" version = "0.5.5" @@ -4627,12 +2440,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -4651,39 +2458,12 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "sqlparser" -version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" -dependencies = [ - "log", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.49", -] - [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "str_indices" version = "0.4.3" @@ -4702,50 +2482,6 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" -[[package]] -name = "strum" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.49", -] - -[[package]] -name = "substrait" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5478fbd0313a9b0915a1c0e7ebf15b5fed7d7c6dd7229b4f5e32ce75b10f256a" -dependencies = [ - "git2", - "heck", - "prettyplease", - "prost", - "prost-build", - "prost-types", - "schemars", - "semver", - "serde", - "serde_json", - "serde_yaml", - "syn 2.0.49", - "typify", - "walkdir", -] - [[package]] name = "subtle" version = "2.5.0" @@ -4826,12 +2562,6 @@ dependencies = [ "libc", ] -[[package]] -name = "tagptr" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" - [[package]] name = "tempdir" version = "0.3.7" @@ -4912,17 +2642,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float 2.10.1", -] - [[package]] name = "time" version = "0.3.34" @@ -4954,15 +2673,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinyvec" version = "1.6.0" @@ -4978,6 +2688,7 @@ version = "0.1.0" dependencies = [ "bincode", "serde", + "tempfile", "thiserror", "tokio", "tracing", @@ -5071,17 +2782,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-stream" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-util" version = "0.7.10" @@ -5496,87 +3196,18 @@ dependencies = [ "tree-sitter", ] -[[package]] -name = "triomphe" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" - [[package]] name = "try-lock" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - [[package]] name = "typenum" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "typify" -version = "0.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63ed4d717aa95e598e2f9183376b060e95669ef8f444701ea6afb990fde1cf69" -dependencies = [ - "typify-impl", - "typify-macro", -] - -[[package]] -name = "typify-impl" -version = "0.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89057244dfade7c58af9e62beccbcbeb7a7e7701697a33b06dbe0b7331fb79cf" -dependencies = [ - "heck", - "log", - "proc-macro2", - "quote", - "regress", - "schemars", - "serde_json", - "syn 2.0.49", - "thiserror", - "unicode-ident", -] - -[[package]] -name = "typify-macro" -version = "0.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ddade397f5957d2cd7fb27f905a9a569db20e8e1e3ea589edce40be07b92825" -dependencies = [ - "proc-macro2", - "quote", - "schemars", - "serde", - "serde_json", - "serde_tokenstream", - "syn 2.0.49", - "typify-impl", -] - -[[package]] -name = "unicase" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - [[package]] name = "unicode-bidi" version = "0.3.15" @@ -5631,12 +3262,6 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -5675,12 +3300,6 @@ dependencies = [ "serde", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8parse" version = "0.2.1" @@ -5722,50 +3341,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vectordb" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fd0e3e2df01b04b1a9d64c14d98aa629e8074efefad1f2de1291c7b0d28bbe" -dependencies = [ - "arrow", - "arrow-array", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-ord", - "arrow-schema", - "async-trait", - "bytes", - "chrono", - "futures", - "half", - "lance", - "lance-index", - "lance-linalg", - "lance-testing", - "log", - "num-traits", - "object_store", - "serde", - "serde_json", - "snafu", - "tokio", - "url", -] - [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "vsimd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" - [[package]] name = "walkdir" version = "2.4.0" @@ -5904,18 +3485,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi" version = "0.3.9" @@ -5947,15 +3516,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets 0.52.0", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -6119,12 +3679,6 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155" -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "xshell" version = "0.2.5" @@ -6153,15 +3707,6 @@ dependencies = [ "zip", ] -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yoke" version = "0.7.3" @@ -6186,26 +3731,6 @@ dependencies = [ "synstructure", ] -[[package]] -name = "zerocopy" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.49", -] - [[package]] name = "zerofrom" version = "0.1.3" @@ -6242,7 +3767,7 @@ dependencies = [ "aes", "byteorder", "bzip2", - "constant_time_eq 0.1.5", + "constant_time_eq", "crc32fast", "crossbeam-utils", "flate2", @@ -6250,7 +3775,7 @@ dependencies = [ "pbkdf2", "sha1", "time", - "zstd 0.11.2+zstd.1.5.2", + "zstd", ] [[package]] @@ -6259,16 +3784,7 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", -] - -[[package]] -name = "zstd" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" -dependencies = [ - "zstd-safe 7.0.0", + "zstd-safe", ] [[package]] @@ -6281,15 +3797,6 @@ dependencies = [ "zstd-sys", ] -[[package]] -name = "zstd-safe" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" -dependencies = [ - "zstd-sys", -] - [[package]] name = "zstd-sys" version = "2.0.9+zstd.1.5.5" diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index c1fe39f..b1664b3 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -7,8 +7,6 @@ edition = "2021" name = "llm-ls" [dependencies] -arrow-array = "50" -arrow-schema = "50" candle = { version = "0.3", package = "candle-core", default-features = false } candle-nn = "0.3" candle-transformers = "0.3" @@ -18,7 +16,6 @@ futures-util = "0.3" gitignore = { path = "../gitignore" } hf-hub = { version = "0.3", features = ["tokio"] } home = "0.5" -lance-linalg = "0.9.9" ropey = { version = "1.6", default-features = false, features = [ "simd", "cr_lines", @@ -67,7 +64,6 @@ tree-sitter-rust = "0.20" tree-sitter-scala = "0.20" tree-sitter-swift = "0.4" tree-sitter-typescript = "0.20" -vectordb = "0.4" [dependencies.uuid] version = "1.4" diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index 076baed..a20daff 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -15,8 +15,6 @@ pub(crate) fn internal_error(err: E) -> LspError { #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("arrow error: {0}")] - Arrow(#[from] arrow_schema::ArrowError), #[error("candle error: {0}")] Candle(#[from] candle::Error), #[error("gitignore error: {0}")] @@ -59,8 +57,6 @@ pub enum Error { TokioJoin(#[from] tokio::task::JoinError), #[error("unknown backend: {0}")] UnknownBackend(String), - #[error("vector db error: {0}")] - VectorDb(#[from] vectordb::error::Error), } pub(crate) type Result = std::result::Result; diff --git a/crates/tinyvec-embed/Cargo.toml b/crates/tinyvec-embed/Cargo.toml index ba6ffdf..f9e0349 100644 --- a/crates/tinyvec-embed/Cargo.toml +++ b/crates/tinyvec-embed/Cargo.toml @@ -9,7 +9,7 @@ authors.workspace = true [dependencies] bincode = "1" -serde = "1" +serde = { version = "1", features = ["derive"] } thiserror = "1" tokio = { version = "1", features = [ "fs", @@ -21,4 +21,7 @@ tracing = "0.1" [dependencies.uuid] version = "1.7.0" -features = ["v4", "fast-rng", "macro-diagnostics"] +features = ["v4", "fast-rng", "macro-diagnostics", "serde"] + +[dev-dependencies] +tempfile = "3" diff --git a/crates/tinyvec-embed/src/db.rs b/crates/tinyvec-embed/src/db.rs index ea4e3a5..9316692 100644 --- a/crates/tinyvec-embed/src/db.rs +++ b/crates/tinyvec-embed/src/db.rs @@ -112,10 +112,13 @@ impl Collection { &self, query: &[f32], k: usize, - filter: Option bool>, + filter: Option, ) -> Result> { let embeddings = if let Some(filter) = filter { - self.embeddings.iter().filter(filter).collect::>() + self.embeddings + .iter() + .filter(filter.build()) + .collect::>() } else { self.embeddings.iter().collect::>() }; @@ -136,12 +139,12 @@ impl Collection { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Embedding { pub id: Uuid, - pub metadata: Option>, + pub metadata: Option>, pub vector: Vec, } impl Embedding { - pub fn new(vector: Vec, metadata: Option>) -> Self { + pub fn new(vector: Vec, metadata: Option>) -> Self { Self { id: Uuid::new_v4(), metadata, @@ -150,6 +153,39 @@ impl Embedding { } } +impl PartialEq for Embedding { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl Eq for Embedding {} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize)] +pub enum Value { + String(String), + Number(f32), +} + +impl From for Value { + fn from(value: f32) -> Self { + Self::Number(value) + } +} + +impl From<&str> for Value { + fn from(value: &str) -> Self { + Self::String(value.to_owned()) + } +} + +impl From for Value { + fn from(value: String) -> Self { + Self::String(value) + } +} + +#[derive(Debug)] pub enum Compare { Eq, Neq, @@ -157,14 +193,14 @@ pub enum Compare { Lt, } -#[derive(Clone)] +#[derive(Clone, Debug)] enum Chain { And, Or, } pub struct FilterBuilder { - filter: Vec<(String, Compare, String, Option)>, + filter: Vec<(String, Compare, Value, Option)>, } impl FilterBuilder { @@ -173,25 +209,30 @@ impl FilterBuilder { } pub fn and(mut self) -> Self { - self.filter - .last_mut() - .map(|c| c.3.as_mut().map(|c| *c = Chain::And)); + if let Some(c) = self.filter.last_mut() { + c.3 = Some(Chain::And); + }; self } pub fn or(mut self) -> Self { - self.filter - .last_mut() - .map(|c| c.3.as_mut().map(|c| *c = Chain::Or)); + if let Some(c) = self.filter.last_mut() { + c.3 = Some(Chain::Or); + } self } - pub fn condtion(mut self, lhs: String, op: Compare, rhs: String) -> Self { - self.filter.push((lhs, op, rhs, None)); + pub fn comparison(mut self, key: String, op: Compare, value: Value) -> Self { + assert!( + self.filter.last().map(|c| c.3.is_some()).unwrap_or(true), + "Missing chain operator in filter" + ); + self.filter.push((key, op, value, None)); self } - pub fn build(self) -> impl Fn(&&Embedding) -> bool { + // XXX: we assume the user will chain filters correctly + fn build(self) -> impl Fn(&&Embedding) -> bool { move |e| { let mut ret = true; let mut prev = None; @@ -223,6 +264,8 @@ impl FilterBuilder { Chain::And => ret = ret && cond_res, Chain::Or => ret = ret || cond_res, } + } else { + ret = cond_res; } prev = condition.3.clone(); } @@ -239,7 +282,7 @@ async fn get_similarity( ) -> Result> { let semaphore = Arc::new(Semaphore::new(8)); let mut set = JoinSet::new(); - for (index, embedding) in embeddings.into_iter().enumerate() { + for (index, embedding) in embeddings.iter().enumerate() { let embedding = (*embedding).clone(); let query = query.to_owned(); let permit = semaphore.clone().acquire_owned().await.unwrap(); @@ -270,3 +313,121 @@ async fn get_similarity( }) .collect()) } + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::*; + + #[tokio::test] + async fn simple_similarity() { + let temp_dir = TempDir::new().expect("failed to create tempt dir"); + let db_path = temp_dir.path().join("embeddings.db"); + let mut db = match Db::open(db_path) { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + let mut col = match db.create_collection("test".to_owned(), 5, Distance::Cosine) { + Ok(col) => col, + Err(err) => panic!("{}", err.to_string()), + }; + let embedding = Embedding::new( + vec![0.9999695, 0.76456239, 0.86767905, 0.17577756, 0.9949882], + None, + ); + col.insert(embedding.clone()) + .expect("faield to insert embedding"); + + let expected = SimilarityResult { + score: 0.7449362, + embedding, + }; + let results = col + .get( + &[0.5902804, 0.516834, 0.12403694, 0.8444756, 0.4672038], + 1, + None, + ) + .await + .expect("failed to get most similar embeddings"); + let actual = results + .first() + .expect("missing embedding in similarity result"); + assert!((expected.score - actual.score).abs() <= f32::EPSILON); + assert_eq!(expected.embedding.id, actual.embedding.id); + } + + #[tokio::test] + async fn filter() { + let temp_dir = TempDir::new().expect("failed to create tempt dir"); + let db_path = temp_dir.path().join("embeddings.db"); + let mut db = match Db::open(db_path) { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + let mut col = match db.create_collection("test".to_owned(), 5, Distance::Cosine) { + Ok(col) => col, + Err(err) => panic!("{}", err.to_string()), + }; + let embedding1 = Embedding::new( + vec![0.5880849, 0.25781349, 0.32253786, 0.80958734, 0.8591076], + Some(HashMap::from([ + ("i".to_owned(), 32.0.into()), + ("j".to_owned(), 10.0.into()), + ])), + ); + col.insert(embedding1.clone()) + .expect("faield to insert embedding"); + let embedding2 = Embedding::new( + vec![0.43717385, 0.21100248, 0.5068433, 0.9626808, 0.6763327], + Some(HashMap::from([ + ("i".to_owned(), 7.0.into()), + ("j".to_owned(), 100.0.into()), + ])), + ); + col.insert(embedding2.clone()) + .expect("faield to insert embedding"); + let embedding3 = Embedding::new( + vec![0.2630481, 0.24888718, 0.3375401, 0.92770165, 0.44944693], + Some(HashMap::from([ + ("i".to_owned(), 29.0.into()), + ("j".to_owned(), 16.0.into()), + ])), + ); + col.insert(embedding3.clone()) + .expect("faield to insert embedding"); + let embedding4 = Embedding::new( + vec![0.7642892, 0.47043378, 0.9035855, 0.31120034, 0.5757918], + Some(HashMap::from([ + ("i".to_owned(), 3.0.into()), + ("j".to_owned(), 110.0.into()), + ])), + ); + col.insert(embedding4.clone()) + .expect("faield to insert embedding"); + + let results = col + .get( + &[0.09537213, 0.5104327, 0.69980987, 0.13146928, 0.30541683], + 4, + Some( + col.filter() + .comparison("i".to_owned(), Compare::Lt, 25.0.into()) + .and() + .comparison("j".to_owned(), Compare::Gt, 50.0.into()), + ), + ) + .await + .expect("failed to get most similar embeddings"); + let actual_scores: Vec = results.iter().map(|r| r.score).collect(); + let expected_scores: Vec = vec![0.8701641, 0.6552329]; + assert!(expected_scores + .iter() + .zip(actual_scores.iter()) + .all(|(e, a)| { (e - a).abs() <= f32::EPSILON })); + let expected_embeddings = vec![embedding4, embedding2]; + let actual_embeddings: Vec = results.into_iter().map(|r| r.embedding).collect(); + assert_eq!(expected_embeddings, actual_embeddings); + } +} From 10601b112db92758599bee56d236f2f35169b0de Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 20 Feb 2024 12:25:32 +0100 Subject: [PATCH 09/22] feat: make tinyvec-embed thread safe --- Cargo.lock | 1 + crates/llm-ls/Cargo.toml | 1 + crates/llm-ls/src/retrieval.rs | 21 ++- crates/tinyvec-embed/src/db.rs | 218 +++++++++++++++++++++++------- crates/tinyvec-embed/src/error.rs | 10 +- crates/tinyvec-embed/src/lib.rs | 6 +- 6 files changed, 186 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 539aab9..1123807 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1350,6 +1350,7 @@ dependencies = [ "serde", "serde_json", "thiserror", + "tinyvec-embed", "tokenizers", "tokio", "tower-lsp", diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index b1664b3..d0e9707 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -27,6 +27,7 @@ reqwest = { version = "0.11", default-features = false, features = [ serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "1" +tinyvec-embed = { path = "../tinyvec-embed" } tokenizers = { version = "0.15", default-features = false, features = ["onig"] } tokio = { version = "1", features = [ "fs", diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 9476a3f..60aac70 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -1,7 +1,4 @@ use crate::error::Result; -use arrow_array::builder::{FixedSizeListBuilder, Float32Builder}; -use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt32Array}; -use arrow_schema::{DataType, Field, Schema}; use candle::utils::{cuda_is_available, metal_is_available}; use candle::{Device, Tensor}; use candle_nn::VarBuilder; @@ -9,23 +6,21 @@ use candle_transformers::models::bert::{BertModel, Config, DTYPE}; use futures_util::StreamExt; use gitignore::Gitignore; use hf_hub::{api::tokio::Api, Repo, RepoType}; -use lance_linalg::distance::MetricType; use std::collections::VecDeque; use std::path::Path; use std::{path::PathBuf, sync::Arc}; +use tinyvec_embed::db::Db; use tokenizers::Tokenizer; use tokio::io::AsyncReadExt; use tokio::task::spawn_blocking; +use tokio::time::Instant; use tower_lsp::lsp_types::notification::Progress; use tower_lsp::lsp_types::{ NumberOrString, ProgressParams, ProgressParamsValue, Range, WorkDoneProgress, WorkDoneProgressReport, }; use tower_lsp::Client; -use tracing::{error, info, warn}; -use vectordb::error::Error; -use vectordb::table::ReadParams; -use vectordb::{Connection, Database, Table}; +use tracing::{debug, error, info, warn}; // TODO: // - create sliding window and splitting of files logic @@ -144,6 +139,7 @@ fn is_code_file(file_name: &Path) -> bool { } async fn build_model_and_tokenizer() -> Result<(BertModel, Tokenizer)> { + let start = Instant::now(); let device = device(false)?; let model_id = "bigcode/starencoder".to_string(); let revision = "main".to_string(); @@ -162,6 +158,7 @@ async fn build_model_and_tokenizer() -> Result<(BertModel, Tokenizer)> { let vb = VarBuilder::from_pth(&weights_filename, DTYPE, &device)?; let model = BertModel::load(vb, &config)?; + debug!("loaded model in {:.2?}", start.elapsed()); Ok((model, tokenizer)) } @@ -185,9 +182,9 @@ fn device(cpu: bool) -> Result { } } -async fn initialse_database(cache_path: PathBuf) -> Arc { +async fn initialse_database(cache_path: PathBuf) -> Arc { let uri = cache_path.join("database"); - let db = Database::connect(uri.to_str().expect("path should be utf8")) + let db = Db::open(uri.to_str().expect("path should be utf8")) .await .expect("failed to open database"); match db @@ -242,7 +239,7 @@ async fn initialse_database(cache_path: PathBuf) -> Arc { } pub(crate) struct SnippetRetriever { - db: Arc, + db: Arc, model: Arc, tokenizer: Tokenizer, window_size: usize, @@ -431,7 +428,7 @@ impl SnippetRetriever { let mut results = self .db .search(&[0.]) - .metric_type(MetricType::Cosine) + .metric_type(Distance::Cosine) .filter(&filter) .execute_stream() .await?; diff --git a/crates/tinyvec-embed/src/db.rs b/crates/tinyvec-embed/src/db.rs index 9316692..53facbf 100644 --- a/crates/tinyvec-embed/src/db.rs +++ b/crates/tinyvec-embed/src/db.rs @@ -1,91 +1,128 @@ use serde::{Deserialize, Serialize}; use std::{ collections::{BinaryHeap, HashMap}, - fs, path::{Path, PathBuf}, sync::Arc, }; -use tokio::{sync::Semaphore, task::JoinSet}; +use tokio::{ + fs, + sync::{RwLock, Semaphore}, + task::JoinSet, +}; use tracing::debug; use uuid::Uuid; use crate::{ - error::{Collection as Error, Result}, + error::{Collection as CollectionError, Error, Result}, similarity::{Distance, ScoreIndex}, }; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Clone, Debug)] pub struct Db { - pub collections: HashMap, - pub location: PathBuf, + inner: Arc>, +} + +#[derive(Clone, Debug)] +pub struct DbInner { + collections: HashMap>>, + location: PathBuf, } impl Db { - pub fn open(path: impl AsRef) -> Result { + /// Opens a database from disk or creates a new one if it doesn't exist + pub async fn open(path: impl AsRef) -> Result { let path = path.as_ref(); + let mut inner = DbInner { + collections: HashMap::new(), + location: path.to_path_buf(), + }; if !path.exists() { debug!("Creating database store"); - fs::create_dir_all( - path.parent() - .ok_or(Error::InvalidPath(path.to_path_buf()))?, - ) - .map_err(Into::::into)?; + fs::create_dir_all(path).await?; return Ok(Self { - collections: HashMap::new(), - location: path.to_path_buf(), + inner: Arc::new(RwLock::new(inner)), }); } debug!("Loading database from store"); - let db = fs::read(path).map_err(Into::::into)?; - Ok(bincode::deserialize(&db[..]).map_err(Into::::into)?) + + let mut entries = fs::read_dir(path).await?; + while let Some(entry) = entries.next_entry().await? { + let entry_type = entry.file_type().await?; + if entry_type.is_file() { + let col = fs::read(entry.path()).await?; + let col = bincode::deserialize(&col[..])?; + let name = entry + .file_name() + .to_str() + .ok_or(Error::InvalidFileName)? + .to_owned(); + inner.collections.insert(name, Arc::new(RwLock::new(col))); + } else { + // warning? + } + } + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) } - pub fn create_collection( + pub async fn create_collection( &mut self, name: String, dimension: usize, distance: Distance, - ) -> Result { - if self.collections.contains_key(&name) { - return Err(Error::UniqueViolation.into()); + ) -> Result>> { + if self.inner.read().await.collections.contains_key(&name) { + return Err(CollectionError::UniqueViolation.into()); } - let collection = Collection { + let collection = Arc::new(RwLock::new(Collection { dimension, distance, embeddings: Vec::new(), - }; + })); - self.collections.insert(name, collection.clone()); + self.inner + .write() + .await + .collections + .insert(name, collection.clone()); Ok(collection) } - pub fn delete_collection(&mut self, name: &str) { - self.collections.remove(name); + /// Removes a collection from [`Db`]. + /// + /// The [`Collection`] will still exist in memory for as long as you hold a copy, given it is + /// wrapped in an `Arc`. + pub async fn delete_collection(&mut self, name: &str) { + self.inner.write().await.collections.remove(name); } - pub fn get_collection(&self, name: &str) -> Result<&Collection> { - self.collections.get(name).ok_or(Error::NotFound.into()) + pub async fn get_collection(&self, name: &str) -> Result>> { + self.inner + .read() + .await + .collections + .get(name) + .ok_or(CollectionError::NotFound.into()) + .cloned() } - fn save_to_store(&self) -> Result<()> { - let db = bincode::serialize(self).map_err(Into::::into)?; + /// Save database to disk + pub async fn save(&self) -> Result<()> { + let inner = self.inner.read().await; + for (name, collection) in inner.collections.iter() { + let db = bincode::serialize(&*collection.read().await)?; - fs::write(self.location.as_path(), db).map_err(Into::::into)?; + fs::write(inner.location.as_path().join(name), db).await?; + } Ok(()) } } -impl Drop for Db { - fn drop(&mut self) { - debug!("Saving database to store"); - let _ = self.save_to_store(); - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SimilarityResult { score: f32, @@ -104,7 +141,7 @@ pub struct Collection { } impl Collection { - pub fn filter(&self) -> FilterBuilder { + pub fn filter() -> FilterBuilder { FilterBuilder::new() } @@ -127,13 +164,21 @@ impl Collection { pub fn insert(&mut self, embedding: Embedding) -> Result<()> { if embedding.vector.len() != self.dimension { - return Err(Error::DimensionMismatch.into()); + return Err(CollectionError::DimensionMismatch.into()); } self.embeddings.push(embedding); Ok(()) } + + pub fn len(&self) -> usize { + self.embeddings.len() + } + + pub fn is_empty(&self) -> bool { + self.embeddings.is_empty() + } } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -274,6 +319,12 @@ impl FilterBuilder { } } +impl Default for FilterBuilder { + fn default() -> Self { + Self::new() + } +} + async fn get_similarity( distance: Distance, embeddings: &[&Embedding], @@ -295,7 +346,7 @@ async fn get_similarity( let mut heap = BinaryHeap::new(); while let Some(res) = set.join_next().await { - let score_index = res.map_err(Into::::into)?; + let score_index = res.map_err(Into::::into)?; if heap.len() < k || score_index < *heap.peek().unwrap() { heap.push(score_index); @@ -323,12 +374,15 @@ mod tests { #[tokio::test] async fn simple_similarity() { let temp_dir = TempDir::new().expect("failed to create tempt dir"); - let db_path = temp_dir.path().join("embeddings.db"); - let mut db = match Db::open(db_path) { + let db_path = temp_dir.path().join("embeddings"); + let mut db = match Db::open(db_path).await { Ok(db) => db, Err(err) => panic!("{}", err.to_string()), }; - let mut col = match db.create_collection("test".to_owned(), 5, Distance::Cosine) { + let col = match db + .create_collection("test".to_owned(), 5, Distance::Cosine) + .await + { Ok(col) => col, Err(err) => panic!("{}", err.to_string()), }; @@ -336,7 +390,9 @@ mod tests { vec![0.9999695, 0.76456239, 0.86767905, 0.17577756, 0.9949882], None, ); - col.insert(embedding.clone()) + col.write() + .await + .insert(embedding.clone()) .expect("faield to insert embedding"); let expected = SimilarityResult { @@ -344,6 +400,8 @@ mod tests { embedding, }; let results = col + .read() + .await .get( &[0.5902804, 0.516834, 0.12403694, 0.8444756, 0.4672038], 1, @@ -361,12 +419,15 @@ mod tests { #[tokio::test] async fn filter() { let temp_dir = TempDir::new().expect("failed to create tempt dir"); - let db_path = temp_dir.path().join("embeddings.db"); - let mut db = match Db::open(db_path) { + let db_path = temp_dir.path().join("embeddings"); + let mut db = match Db::open(db_path).await { Ok(db) => db, Err(err) => panic!("{}", err.to_string()), }; - let mut col = match db.create_collection("test".to_owned(), 5, Distance::Cosine) { + let col = match db + .create_collection("test".to_owned(), 5, Distance::Cosine) + .await + { Ok(col) => col, Err(err) => panic!("{}", err.to_string()), }; @@ -377,7 +438,9 @@ mod tests { ("j".to_owned(), 10.0.into()), ])), ); - col.insert(embedding1.clone()) + col.write() + .await + .insert(embedding1.clone()) .expect("faield to insert embedding"); let embedding2 = Embedding::new( vec![0.43717385, 0.21100248, 0.5068433, 0.9626808, 0.6763327], @@ -386,7 +449,9 @@ mod tests { ("j".to_owned(), 100.0.into()), ])), ); - col.insert(embedding2.clone()) + col.write() + .await + .insert(embedding2.clone()) .expect("faield to insert embedding"); let embedding3 = Embedding::new( vec![0.2630481, 0.24888718, 0.3375401, 0.92770165, 0.44944693], @@ -395,7 +460,9 @@ mod tests { ("j".to_owned(), 16.0.into()), ])), ); - col.insert(embedding3.clone()) + col.write() + .await + .insert(embedding3.clone()) .expect("faield to insert embedding"); let embedding4 = Embedding::new( vec![0.7642892, 0.47043378, 0.9035855, 0.31120034, 0.5757918], @@ -404,15 +471,19 @@ mod tests { ("j".to_owned(), 110.0.into()), ])), ); - col.insert(embedding4.clone()) + col.write() + .await + .insert(embedding4.clone()) .expect("faield to insert embedding"); let results = col + .read() + .await .get( &[0.09537213, 0.5104327, 0.69980987, 0.13146928, 0.30541683], 4, Some( - col.filter() + Collection::filter() .comparison("i".to_owned(), Compare::Lt, 25.0.into()) .and() .comparison("j".to_owned(), Compare::Gt, 50.0.into()), @@ -430,4 +501,47 @@ mod tests { let actual_embeddings: Vec = results.into_iter().map(|r| r.embedding).collect(); assert_eq!(expected_embeddings, actual_embeddings); } + + #[tokio::test] + async fn storage() { + let temp_dir = TempDir::new().expect("failed to create tempt dir"); + let db_path = temp_dir.path().join("embeddings"); + + let mut db = match Db::open(db_path.as_path()).await { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + assert!(db.inner.read().await.collections.is_empty()); + assert_eq!(db.inner.read().await.location, db_path); + + let col = match db + .create_collection("test".to_owned(), 5, Distance::Cosine) + .await + { + Ok(col) => col, + Err(err) => panic!("{}", err.to_string()), + }; + let embedding = Embedding::new( + vec![0.9999695, 0.76456239, 0.86767905, 0.17577756, 0.9949882], + None, + ); + col.write() + .await + .insert(embedding.clone()) + .expect("faield to insert embedding"); + + db.save().await.expect("failed to save to disk"); + let db = match Db::open(db_path).await { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + assert_eq!(db.inner.read().await.collections.len(), 1); + let col = db + .get_collection("test") + .await + .expect("failed to get collection"); + assert_eq!(col.read().await.len(), 1); + assert_eq!(col.read().await.distance, Distance::Cosine); + assert_eq!(col.read().await.dimension, 5); + } } diff --git a/crates/tinyvec-embed/src/error.rs b/crates/tinyvec-embed/src/error.rs index 0569a53..3f781c3 100644 --- a/crates/tinyvec-embed/src/error.rs +++ b/crates/tinyvec-embed/src/error.rs @@ -2,12 +2,8 @@ use std::path::PathBuf; #[derive(Debug, thiserror::Error)] pub enum Collection { - #[error("bincode error: {0}")] - Bincode(#[from] bincode::Error), #[error("The dimension of the vector doesn't match the dimension of the collection")] DimensionMismatch, - #[error("io error: {0}")] - Io(#[from] std::io::Error), #[error("invalid path: {0}")] InvalidPath(PathBuf), #[error("join error: {0}")] @@ -22,8 +18,14 @@ pub enum Collection { #[derive(Debug, thiserror::Error)] pub enum Error { + #[error("bincode error: {0}")] + Bincode(#[from] bincode::Error), #[error("collection error: {0}")] Collection(#[from] Collection), + #[error("a file with an invalid name was found in the database directory")] + InvalidFileName, + #[error("io error: {0}")] + Io(#[from] std::io::Error), } pub type Result = std::result::Result; diff --git a/crates/tinyvec-embed/src/lib.rs b/crates/tinyvec-embed/src/lib.rs index 97a06b3..32e288f 100644 --- a/crates/tinyvec-embed/src/lib.rs +++ b/crates/tinyvec-embed/src/lib.rs @@ -1,3 +1,3 @@ -mod db; -mod error; -mod similarity; +pub mod db; +pub mod error; +pub mod similarity; From 1cd0a05a1a3a4a7764331a77c96532dc2ee76fb0 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 20 Feb 2024 20:00:05 +0100 Subject: [PATCH 10/22] feat: replace lancedb with tinyvec-embed --- crates/llm-ls/src/document.rs | 12 +- crates/llm-ls/src/error.rs | 10 + crates/llm-ls/src/main.rs | 38 +++- crates/llm-ls/src/retrieval.rs | 331 ++++++++++++++++-------------- crates/tinyvec-embed/src/db.rs | 127 +++++++++++- crates/tinyvec-embed/src/error.rs | 8 + 6 files changed, 360 insertions(+), 166 deletions(-) diff --git a/crates/llm-ls/src/document.rs b/crates/llm-ls/src/document.rs index 7ac6af4..04ef614 100644 --- a/crates/llm-ls/src/document.rs +++ b/crates/llm-ls/src/document.rs @@ -148,7 +148,11 @@ impl Document { }) } - pub(crate) async fn change(&mut self, range: Range, text: &str) -> Result<()> { + pub(crate) async fn change( + &mut self, + range: Range, + text: &str, + ) -> Result<(usize, usize, usize)> { let start_idx = get_position_idx( &self.text, range.start.line as usize, @@ -215,6 +219,10 @@ impl Document { tree.edit(&edit); } self.tree = self.parser.parse(self.text.to_string(), self.tree.as_ref()); - Ok(()) + Ok(( + start_position.row, + old_end_position.row, + new_end_position.row, + )) } } diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index a20daff..ac9a77d 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -19,6 +19,8 @@ pub enum Error { Candle(#[from] candle::Error), #[error("gitignore error: {0}")] Gitignore(#[from] gitignore::Error), + #[error("huggingface api error: {0}")] + HfApi(#[from] hf_hub::api::tokio::ApiError), #[error("http error: {0}")] Http(#[from] reqwest::Error), #[error("io error: {0}")] @@ -33,6 +35,10 @@ pub enum Error { InvalidRepositoryId, #[error("invalid tokenizer path")] InvalidTokenizerPath, + #[error("malformatted embedding metadata, missing {0} field")] + MalformattedEmbeddingMetadata(String), + #[error("embedding has no metadata")] + MissingMetadata, #[error("ollama error: {0}")] Ollama(crate::backend::APIError), #[error("openai error: {0}")] @@ -47,8 +53,12 @@ pub enum Error { Rope(#[from] ropey::Error), #[error("serde json error: {0}")] SerdeJson(#[from] serde_json::Error), + #[error("strip prefix error: {0}")] + StripPrefix(#[from] std::path::StripPrefixError), #[error("tgi error: {0}")] Tgi(crate::backend::APIError), + #[error("tinyvec-embed error: {0}")] + TinyVecEmbed(#[from] tinyvec_embed::error::Error), #[error("tree-sitter language error: {0}")] TreeSitterLanguage(#[from] tree_sitter::LanguageError), #[error("tokenizer error: {0}")] diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index f870a2e..c2853f5 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -669,7 +669,7 @@ impl LanguageServer for LlmService { async fn did_change(&self, params: DidChangeTextDocumentParams) { let uri = params.text_document.uri.to_string(); - if uri == "file:///" { + if params.text_document.uri.scheme() == "file" && Path::new(&uri).exists() { return; } if params.content_changes.is_empty() { @@ -689,9 +689,35 @@ impl LanguageServer for LlmService { if let Some(doc) = doc { for change in ¶ms.content_changes { if let Some(range) = change.range { - // TODO: self.snippet_retriever.write().await.update_document(uri).await?; match doc.change(range, &change.text).await { - Ok(()) => info!("{uri} changed"), + Ok((start, old_end, new_end)) => { + let start = Position::new(start as u32, 0); + if let Err(err) = self + .snippet_retriever + .write() + .await + .remove( + uri.clone(), + Range::new(start, Position::new(old_end as u32, 0)), + ) + .await + { + error!("error while removing embeddings: {err}"); + } + if let Err(err) = self + .snippet_retriever + .write() + .await + .update_document( + uri.clone(), + Range::new(start, Position::new(new_end as u32, 0)), + ) + .await + { + error!("error while updating embeddings: {err}"); + } + info!("{uri} changed"); + } Err(err) => error!("error when changing {uri}: {err}"), } } else { @@ -727,6 +753,12 @@ impl LanguageServer for LlmService { .send(()) .await .map_err(internal_error)?; + self.snippet_retriever + .read() + .await + .stop() + .await + .map_err(internal_error)?; Ok(()) } } diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 60aac70..888c271 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -1,15 +1,15 @@ -use crate::error::Result; +use crate::error::{Error, Result}; use candle::utils::{cuda_is_available, metal_is_available}; use candle::{Device, Tensor}; use candle_nn::VarBuilder; use candle_transformers::models::bert::{BertModel, Config, DTYPE}; -use futures_util::StreamExt; use gitignore::Gitignore; use hf_hub::{api::tokio::Api, Repo, RepoType}; -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use std::path::Path; use std::{path::PathBuf, sync::Arc}; -use tinyvec_embed::db::Db; +use tinyvec_embed::db::{Collection, Compare, Db, Embedding, FilterBuilder, SimilarityResult}; +use tinyvec_embed::similarity::Distance; use tokenizers::Tokenizer; use tokio::io::AsyncReadExt; use tokio::task::spawn_blocking; @@ -65,7 +65,6 @@ fn is_code_file(file_name: &Path) -> bool { "json", "yaml", "yml", - "ini", "toml", "cfg", "conf", @@ -154,11 +153,16 @@ async fn build_model_and_tokenizer() -> Result<(BertModel, Tokenizer)> { }; let config = tokio::fs::read_to_string(config_filename).await?; let config: Config = serde_json::from_str(&config)?; - let tokenizer = Tokenizer::from_file(tokenizer_filename)?; + let mut tokenizer: Tokenizer = Tokenizer::from_file(tokenizer_filename)?; + tokenizer.with_padding(None); + tokenizer.with_truncation(None)?; let vb = VarBuilder::from_pth(&weights_filename, DTYPE, &device)?; let model = BertModel::load(vb, &config)?; - debug!("loaded model in {:.2?}", start.elapsed()); + debug!( + "loaded model and tokenizer in {} ms", + start.elapsed().as_millis() + ); Ok((model, tokenizer)) } @@ -182,64 +186,50 @@ fn device(cpu: bool) -> Result { } } -async fn initialse_database(cache_path: PathBuf) -> Arc { +async fn initialse_database(cache_path: PathBuf) -> Db { let uri = cache_path.join("database"); - let db = Db::open(uri.to_str().expect("path should be utf8")) - .await - .expect("failed to open database"); + let mut db = Db::open(uri).await.expect("failed to open database"); match db - .open_table_with_params("code-slices", ReadParams::default()) + .create_collection("code-slices".to_owned(), 768, Distance::Cosine) .await { - Ok(table) => table, - Err(vectordb::error::Error::TableNotFound { .. }) => { - let schema = Arc::new(Schema::new(vec![ - Field::new( - "vector", - DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::Float32, true)), - 768, - ), - false, - ), - Field::new("content", DataType::Utf8, false), - Field::new("file_url", DataType::Utf8, false), - Field::new("start_line_no", DataType::UInt32, false), - Field::new("end_line_no", DataType::UInt32, false), - ])); - let batch = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(FixedSizeListBuilder::new(Float32Builder::new(), 768).finish()), - Arc::new(StringArray::from(Vec::<&str>::new())), - Arc::new(StringArray::from(Vec::<&str>::new())), - Arc::new(UInt32Array::from(Vec::::new())), - Arc::new(UInt32Array::from(Vec::::new())), - ], - ) - .expect("failure while defining schema"); - let tbl = db - .create_table( - "code-slices", - Box::new(RecordBatchIterator::new(vec![].into_iter().map(Ok), schema)), - None, - ) - .await - .expect("failed to create table"); - tbl.create_index(&["vector"]) - .ivf_pq() - .num_partitions(256) - .build() - .await - .expect("failed to create index"); - tbl - } - Err(err) => panic!("error while opening table: {}", err), + Ok(_) + | Err(tinyvec_embed::error::Error::Collection( + tinyvec_embed::error::Collection::UniqueViolation, + )) => (), + Err(err) => panic!("failed to create collection: {err}"), + } + db +} + +pub(crate) struct Snippet { + file_url: String, + code: String, +} + +impl TryFrom<&SimilarityResult> for Snippet { + type Error = Error; + + fn try_from(value: &SimilarityResult) -> Result { + let meta = value + .embedding + .metadata + .as_ref() + .ok_or(Error::MissingMetadata)?; + let file_url = meta + .get("file_url") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("file_url".to_owned()))? + .inner_string()?; + let code = meta + .get("snippet") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? + .inner_string()?; + Ok(Snippet { file_url, code }) } } pub(crate) struct SnippetRetriever { - db: Arc, + db: Db, model: Arc, tokenizer: Tokenizer, window_size: usize, @@ -288,7 +278,7 @@ impl SnippetRetriever { }) .await; let mut stack = VecDeque::new(); - stack.push_back(workspace_root); + stack.push_back(workspace_root.clone()); while let Some(src) = stack.pop_back() { let mut entries = tokio::fs::read_dir(&src).await?; while let Some(entry) = entries.next_entry().await? { @@ -320,7 +310,13 @@ impl SnippetRetriever { token: token.clone(), value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( WorkDoneProgressReport { - message: Some(format!("({i}/{}) done", files.len())), + message: Some(format!( + "{i}/{} ({})", + files.len(), + file.strip_prefix(workspace_root.as_path())? + .to_str() + .expect("expect file name to be valid unicode") + )), ..Default::default() }, )), @@ -331,23 +327,122 @@ impl SnippetRetriever { Ok(()) } - pub(crate) async fn add_document(&mut self, file_url: String) -> Result<()> { + pub(crate) async fn add_document(&self, file_url: String) -> Result<()> { + self.build_and_add_snippets(file_url, 0, None).await?; + Ok(()) + } + + pub(crate) async fn update_document(&mut self, file_url: String, range: Range) -> Result<()> { + self.build_and_add_snippets( + file_url, + range.start.line as usize, + Some(range.end.line as usize), + ) + .await?; + Ok(()) + } + + pub(crate) async fn search( + &self, + snippet: String, + filter: Option, + ) -> Result> { + let col = self.db.get_collection("code-slices").await?; + let query = self + .generate_embedding(self.model.clone(), snippet, self.tokenizer.clone()) + .await?; + let result = col + .read() + .await + .get(&query, 5, filter) + .await? + .iter() + .map(TryInto::try_into) + .collect::>>()?; + Ok(result) + } + + pub(crate) async fn stop(&self) -> Result<()> { + self.db.save().await?; + Ok(()) + } + + pub(crate) async fn remove(&self, file_url: String, range: Range) -> Result<()> { + let col = self.db.get_collection("code-slices").await?; + col.write().await.remove(Some( + Collection::filter() + .comparison( + "start_line_no".to_owned(), + Compare::GtEq, + range.start.line.into(), + ) + .and() + .comparison( + "end_line_no".to_owned(), + Compare::LtEq, + range.end.line.into(), + ) + .and() + .comparison("file_url".to_owned(), Compare::Eq, file_url.into()), + ))?; + Ok(()) + } +} + +impl SnippetRetriever { + async fn generate_embedding( + &self, + model: Arc, + snippet: String, + tokenizer: Tokenizer, + ) -> Result> { + let start = Instant::now(); + let embedding = spawn_blocking(move || -> Result> { + let tokens = tokenizer.encode(snippet, true)?.get_ids().to_vec(); + let token_ids = Tensor::new(&tokens[..], &model.device)?.unsqueeze(0)?; + let token_type_ids = token_ids.zeros_like()?; + let embedding = model.forward(&token_ids, &token_type_ids)?; + let (_n_sentence, n_tokens, _hidden_size) = embedding.dims3()?; + let embedding = (embedding.sum(1)? / (n_tokens as f64))?; + let embedding = embedding.get(0)?.to_vec1::()?; + Ok(embedding) + }) + .await?; + debug!("embedding generated in {}", start.elapsed().as_millis()); + embedding + } + + async fn build_and_add_snippets( + &self, + file_url: String, + start: usize, + end: Option, + ) -> Result<()> { + let col = self.db.get_collection("code-slices").await?; let file = tokio::fs::read_to_string(&file_url).await?; let lines = file.split('\n').collect::>(); - let mut embeddings = FixedSizeListBuilder::new(Float32Builder::new(), 768); - let mut snippets = Vec::new(); - let mut file_urls = Vec::new(); - let mut start_line_no = Vec::new(); - let mut end_line_no = Vec::new(); - for start_line in (0..lines.len()).step_by(self.window_step) { + let end = end.unwrap_or(lines.len()).min(lines.len()); + for start_line in (start..end).step_by(self.window_step) { let end_line = (start_line + self.window_size - 1).min(lines.len()); - if self - .exists(format!( - "file_url = '{file_url}' AND start_line_no = {start_line} AND end_line_no = {end_line}" - )) + if !col + .read() + .await + .get( + &[], + 1, + Some( + Collection::filter() + .comparison("file_url".to_owned(), Compare::Eq, file_url.clone().into()) + .and() + .comparison("start_line_no".to_owned(), Compare::Eq, start_line.into()) + .and() + .comparison("end_line_no".to_owned(), Compare::Eq, end_line.into()), + ), + ) .await? + .is_empty() { - info!("snippet {file_url}[{start_line}, {end_line}] already indexed"); + debug!("snippet {file_url}[{start_line}, {end_line}] already indexed"); continue; } let window = lines[start_line..end_line].to_vec(); @@ -356,27 +451,15 @@ impl SnippetRetriever { continue; } if snippet.len() > 1024 { - warn!("snippet {file_url}[{start_line}, {end_line}] is too big to be indexed"); + debug!("snippet {file_url}[{start_line}, {end_line}] is too big to be indexed"); continue; } - let tokenizer = self - .tokenizer - .with_padding(None) - .with_truncation(None)? - .clone(); let model = self.model.clone(); let snippet_clone = snippet.clone(); - let result = spawn_blocking(move || -> Result> { - let tokens = tokenizer.encode(snippet_clone, true)?.get_ids().to_vec(); - let token_ids = Tensor::new(&tokens[..], &model.device)?.unsqueeze(0)?; - let token_type_ids = token_ids.zeros_like()?; - let embedding = model.forward(&token_ids, &token_type_ids)?; - let (_n_sentence, n_tokens, _hidden_size) = embedding.dims3()?; - let embedding = (embedding.sum(1)? / (n_tokens as f64))?; - let embedding = embedding.get(0)?.to_vec1::()?; - Ok(embedding) - }) - .await?; + let tokenizer = self.tokenizer.clone(); + let result = self + .generate_embedding(model, snippet_clone, tokenizer) + .await; let embedding = match result { Ok(e) => e, Err(err) => { @@ -386,70 +469,16 @@ impl SnippetRetriever { continue; } }; - embeddings.values().append_slice(&embedding); - embeddings.append(true); - snippets.push(snippet.clone()); - file_urls.push(file_url.clone()); - start_line_no.push(start_line as u32); - end_line_no.push(end_line as u32); + col.write().await.insert(Embedding::new( + embedding, + Some(HashMap::from([ + ("file_url".to_owned(), file_url.clone().into()), + ("start_line_no".to_owned(), start_line.into()), + ("end_line_no".to_owned(), end_line.into()), + ("snippet".to_owned(), snippet.clone().into()), + ])), + ))?; } - - let batch = RecordBatch::try_new( - self.db.schema(), - vec![ - Arc::new(embeddings.finish()), - Arc::new(StringArray::from(snippets)), - Arc::new(StringArray::from(file_urls)), - Arc::new(UInt32Array::from(start_line_no)), - Arc::new(UInt32Array::from(end_line_no)), - ], - )?; - self.db - .add( - Box::new(RecordBatchIterator::new( - vec![batch].into_iter().map(Ok), - self.db.schema(), - )), - None, - ) - .await?; Ok(()) } - - pub(crate) async fn update_document(&mut self, file_url: String, range: Range) { - // TODO: - // - delete elements matching Range - // - keep the smallest start line to create new windows from - // - build new windows based on range - // - insert them into table - } - - pub(crate) async fn exists(&self, filter: String) -> Result { - let mut results = self - .db - .search(&[0.]) - .metric_type(Distance::Cosine) - .filter(&filter) - .execute_stream() - .await?; - let exists = if let Some(record_batch) = results.next().await { - let record_batch = record_batch.map_err(Into::::into)?; - if record_batch.num_rows() > 0 { - true - } else { - info!("record batch: {record_batch:?}"); - false - } - } else { - false - }; - if !exists { - info!("filter: {filter}"); - } - Ok(exists) - } - - pub(crate) async fn search(&self, snippet: String, filter: &str) -> Result { - Ok("toto".to_string()) - } } diff --git a/crates/tinyvec-embed/src/db.rs b/crates/tinyvec-embed/src/db.rs index 53facbf..8267d2b 100644 --- a/crates/tinyvec-embed/src/db.rs +++ b/crates/tinyvec-embed/src/db.rs @@ -1,6 +1,7 @@ use serde::{Deserialize, Serialize}; use std::{ collections::{BinaryHeap, HashMap}, + fmt::Display, path::{Path, PathBuf}, sync::Arc, }; @@ -123,13 +124,13 @@ impl Db { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct SimilarityResult { - score: f32, - embedding: Embedding, + pub score: f32, + pub embedding: Embedding, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct Collection { /// Dimension of the vectors in the collection pub dimension: usize, @@ -154,7 +155,7 @@ impl Collection { let embeddings = if let Some(filter) = filter { self.embeddings .iter() - .filter(filter.build()) + .filter(filter.fn_ref_closure()) .collect::>() } else { self.embeddings.iter().collect::>() @@ -172,6 +173,19 @@ impl Collection { Ok(()) } + /// Remove values matching filter. + /// + /// Empties the collection when `filter` is `None`. + pub fn remove(&mut self, filter: Option) -> Result<()> { + if let Some(filter) = filter { + let mut closure = filter.fn_mut_closure(); + self.embeddings.retain(|e| !closure(e)); + } else { + self.embeddings.clear(); + } + Ok(()) + } + pub fn len(&self) -> usize { self.embeddings.len() } @@ -181,7 +195,7 @@ impl Collection { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct Embedding { pub id: Uuid, pub metadata: Option>, @@ -206,12 +220,42 @@ impl PartialEq for Embedding { impl Eq for Embedding {} -#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, PartialOrd, Serialize, Deserialize)] pub enum Value { String(String), Number(f32), } +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(s) => write!(f, "{s}"), + Self::Number(n) => write!(f, "{n}"), + } + } +} + +impl Value { + pub fn inner_string(&self) -> Result { + match self { + Self::String(s) => Ok(s.to_owned()), + _ => Err(Error::ValueNotString(self.to_owned())), + } + } +} + +impl From for Value { + fn from(value: usize) -> Self { + Self::Number(value as f32) + } +} + +impl From for Value { + fn from(value: u32) -> Self { + Self::Number(value as f32) + } +} + impl From for Value { fn from(value: f32) -> Self { Self::Number(value) @@ -235,7 +279,9 @@ pub enum Compare { Eq, Neq, Gt, + GtEq, Lt, + LtEq, } #[derive(Clone, Debug)] @@ -276,8 +322,59 @@ impl FilterBuilder { self } + fn fn_mut_closure(self) -> impl FnMut(&Embedding) -> bool { + move |e| { + let mut ret = true; + let mut prev = None; + for condition in &self.filter { + let cond_res = match condition.1 { + Compare::Eq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) == Some(&condition.2)) + .unwrap_or(false), + Compare::Neq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) != Some(&condition.2)) + .unwrap_or(false), + Compare::Gt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) > Some(&condition.2)) + .unwrap_or(false), + Compare::GtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) >= Some(&condition.2)) + .unwrap_or(false), + Compare::Lt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) < Some(&condition.2)) + .unwrap_or(false), + Compare::LtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) <= Some(&condition.2)) + .unwrap_or(false), + }; + if let Some(prev) = prev { + match prev { + Chain::And => ret = ret && cond_res, + Chain::Or => ret = ret || cond_res, + } + } else { + ret = cond_res; + } + prev = condition.3.clone(); + } + ret + } + } + // XXX: we assume the user will chain filters correctly - fn build(self) -> impl Fn(&&Embedding) -> bool { + fn fn_ref_closure(self) -> impl Fn(&&Embedding) -> bool { move |e| { let mut ret = true; let mut prev = None; @@ -298,11 +395,21 @@ impl FilterBuilder { .as_ref() .map(|f| f.get(&condition.0) > Some(&condition.2)) .unwrap_or(false), + Compare::GtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) >= Some(&condition.2)) + .unwrap_or(false), Compare::Lt => e .metadata .as_ref() .map(|f| f.get(&condition.0) < Some(&condition.2)) .unwrap_or(false), + Compare::LtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) <= Some(&condition.2)) + .unwrap_or(false), }; if let Some(prev) = prev { match prev { @@ -336,7 +443,7 @@ async fn get_similarity( for (index, embedding) in embeddings.iter().enumerate() { let embedding = (*embedding).clone(); let query = query.to_owned(); - let permit = semaphore.clone().acquire_owned().await.unwrap(); + let permit = semaphore.clone().acquire_owned().await?; set.spawn_blocking(move || { let score = distance.compute(&embedding.vector, &query); drop(permit); @@ -347,7 +454,7 @@ async fn get_similarity( let mut heap = BinaryHeap::new(); while let Some(res) = set.join_next().await { let score_index = res.map_err(Into::::into)?; - if heap.len() < k || score_index < *heap.peek().unwrap() { + if heap.len() < k || score_index < *heap.peek().ok_or(CollectionError::EmptyBinaryHeap)? { heap.push(score_index); if heap.len() > k { diff --git a/crates/tinyvec-embed/src/error.rs b/crates/tinyvec-embed/src/error.rs index 3f781c3..6a31c86 100644 --- a/crates/tinyvec-embed/src/error.rs +++ b/crates/tinyvec-embed/src/error.rs @@ -1,9 +1,13 @@ use std::path::PathBuf; +use crate::db::Value; + #[derive(Debug, thiserror::Error)] pub enum Collection { #[error("The dimension of the vector doesn't match the dimension of the collection")] DimensionMismatch, + #[error("attempt to peek an empty binary heap")] + EmptyBinaryHeap, #[error("invalid path: {0}")] InvalidPath(PathBuf), #[error("join error: {0}")] @@ -18,6 +22,8 @@ pub enum Collection { #[derive(Debug, thiserror::Error)] pub enum Error { + #[error("acquire error: {0}")] + Acquire(#[from] tokio::sync::AcquireError), #[error("bincode error: {0}")] Bincode(#[from] bincode::Error), #[error("collection error: {0}")] @@ -26,6 +32,8 @@ pub enum Error { InvalidFileName, #[error("io error: {0}")] Io(#[from] std::io::Error), + #[error("expected value to be string, got: {0}")] + ValueNotString(Value), } pub type Result = std::result::Result; From 9d643f5e975af5f9ff0e0b370f405e178350431f Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 20 Feb 2024 23:16:50 +0100 Subject: [PATCH 11/22] feat: ignore specfic files --- crates/llm-ls/src/main.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index c2853f5..aecb6fe 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -534,6 +534,14 @@ impl LlmService { ); Ok(()) } + + fn ignore_file(&self, uri: Url) -> bool { + let uri_str = uri.to_string(); + let path = uri.path(); + uri.scheme() == "output" + || uri.scheme() == "term" + || uri.scheme() == "file" && (uri_str == "file:///" || !Path::new(&path).exists()) + } } #[tower_lsp::async_trait] @@ -644,9 +652,10 @@ impl LanguageServer for LlmService { async fn did_open(&self, params: DidOpenTextDocumentParams) { let uri = params.text_document.uri.to_string(); - if uri == "file:///" { + if self.ignore_file(params.text_document.uri) { return; } + match Document::open( ¶ms.text_document.language_id, ¶ms.text_document.text, @@ -669,15 +678,8 @@ impl LanguageServer for LlmService { async fn did_change(&self, params: DidChangeTextDocumentParams) { let uri = params.text_document.uri.to_string(); - if params.text_document.uri.scheme() == "file" && Path::new(&uri).exists() { - return; - } - if params.content_changes.is_empty() { - return; - } - - // ignore the output scheme - if params.text_document.uri.scheme() == "output" { + let path = params.text_document.uri.path(); + if self.ignore_file(params.text_document.uri.clone()) { return; } @@ -697,7 +699,7 @@ impl LanguageServer for LlmService { .write() .await .remove( - uri.clone(), + path.to_owned(), Range::new(start, Position::new(old_end as u32, 0)), ) .await @@ -709,7 +711,7 @@ impl LanguageServer for LlmService { .write() .await .update_document( - uri.clone(), + path.to_owned(), Range::new(start, Position::new(new_end as u32, 0)), ) .await From 660eeba1ec6912a3c9a069fc3d87dea0ae635306 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 20 Feb 2024 23:17:17 +0100 Subject: [PATCH 12/22] feat: replace bigcode/starencoder with intfloat/multilingual-e5-small --- crates/llm-ls/src/retrieval.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 888c271..28850c3 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -140,7 +140,7 @@ fn is_code_file(file_name: &Path) -> bool { async fn build_model_and_tokenizer() -> Result<(BertModel, Tokenizer)> { let start = Instant::now(); let device = device(false)?; - let model_id = "bigcode/starencoder".to_string(); + let model_id = "intfloat/multilingual-e5-small".to_string(); let revision = "main".to_string(); let repo = Repo::with_revision(model_id, RepoType::Model, revision); let (config_filename, tokenizer_filename, weights_filename) = { @@ -190,7 +190,7 @@ async fn initialse_database(cache_path: PathBuf) -> Db { let uri = cache_path.join("database"); let mut db = Db::open(uri).await.expect("failed to open database"); match db - .create_collection("code-slices".to_owned(), 768, Distance::Cosine) + .create_collection("code-slices".to_owned(), 384, Distance::Cosine) .await { Ok(_) @@ -303,7 +303,7 @@ impl SnippetRetriever { } } for (i, file) in files.iter().enumerate() { - let file_url = file.to_str().expect("file path should be utf8").to_string(); + let file_url = file.to_str().expect("file path should be utf8").to_owned(); self.add_document(file_url).await?; client .send_notification::(ProgressParams { @@ -408,7 +408,7 @@ impl SnippetRetriever { Ok(embedding) }) .await?; - debug!("embedding generated in {}", start.elapsed().as_millis()); + debug!("embedding generated in {} ms", start.elapsed().as_millis()); embedding } From 3756feebfdd916899e89f10d0ec1235c3b82cbf8 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Wed, 21 Feb 2024 14:59:37 +0100 Subject: [PATCH 13/22] feat: add similar snippets to prompt --- crates/llm-ls/src/document.rs | 2 +- crates/llm-ls/src/error.rs | 2 + crates/llm-ls/src/language_id.rs | 361 +++++++++++++++++++++++++++++++ crates/llm-ls/src/main.rs | 81 ++++++- crates/llm-ls/src/retrieval.rs | 30 ++- 5 files changed, 447 insertions(+), 29 deletions(-) diff --git a/crates/llm-ls/src/document.rs b/crates/llm-ls/src/document.rs index 04ef614..16d3c7d 100644 --- a/crates/llm-ls/src/document.rs +++ b/crates/llm-ls/src/document.rs @@ -123,7 +123,7 @@ fn get_parser(language_id: LanguageId) -> Result { parser.set_language(tree_sitter_typescript::language_tsx())?; Ok(parser) } - LanguageId::Unknown => Ok(Parser::new()), + _ => Ok(Parser::new()), } } diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index ac9a77d..5d27085 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -53,6 +53,8 @@ pub enum Error { Rope(#[from] ropey::Error), #[error("serde json error: {0}")] SerdeJson(#[from] serde_json::Error), + #[error("snippet is too larger to be converted to an embedding: {0} > {1}")] + SnippetTooLarge(usize, usize), #[error("strip prefix error: {0}")] StripPrefix(#[from] std::path::StripPrefixError), #[error("tgi error: {0}")] diff --git a/crates/llm-ls/src/language_id.rs b/crates/llm-ls/src/language_id.rs index 31ce3d9..9ed62c3 100644 --- a/crates/llm-ls/src/language_id.rs +++ b/crates/llm-ls/src/language_id.rs @@ -3,60 +3,128 @@ use std::fmt; #[derive(Clone, Copy, Serialize, Deserialize)] pub(crate) enum LanguageId { + Abap, + Bat, + BibTeX, Bash, + Clojure, + CoffeeScript, C, Cpp, CSharp, + Css, + Diff, + Dart, + Dockerfile, Elixir, Erlang, + FSharp, + GitCommit, + GitRebase, Go, + Groovy, + Handlebars, Html, + Ini, Java, JavaScript, JavaScriptReact, Json, Kotlin, + LaTeX, + Less, Lua, + Makefile, Markdown, ObjectiveC, + ObjectiveCpp, + Perl, + Perl6, + Php, + Powershell, + Pug, Python, R, + Razor, Ruby, Rust, + Scss, Scala, + ShaderLab, + Sql, Swift, + Toml, TypeScript, TypeScriptReact, + TeX, + VisualBasic, + Xml, + Xsl, + Yaml, Unknown, } impl fmt::Display for LanguageId { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Self::Abap => write!(f, "abap"), Self::Bash => write!(f, "shellscript"), + Self::Bat => write!(f, "bat"), + Self::BibTeX => write!(f, "bibtex"), + Self::Clojure => write!(f, "clojure"), + Self::CoffeeScript => write!(f, "coffeescript"), Self::C => write!(f, "c"), Self::Cpp => write!(f, "cpp"), Self::CSharp => write!(f, "csharp"), + Self::Css => write!(f, "css"), + Self::Diff => write!(f, "diff"), + Self::Dart => write!(f, "dart"), + Self::Dockerfile => write!(f, "dockerfile"), Self::Elixir => write!(f, "elixir"), Self::Erlang => write!(f, "erlang"), + Self::FSharp => write!(f, "fsharp"), + Self::GitCommit => write!(f, "git-commit"), + Self::GitRebase => write!(f, "git-rebase"), Self::Go => write!(f, "go"), + Self::Groovy => write!(f, "groovy"), + Self::Handlebars => write!(f, "handlebars"), Self::Html => write!(f, "html"), + Self::Ini => write!(f, "ini"), Self::Java => write!(f, "java"), Self::JavaScript => write!(f, "javascript"), Self::JavaScriptReact => write!(f, "javascriptreact"), Self::Json => write!(f, "json"), Self::Kotlin => write!(f, "kotlin"), + Self::LaTeX => write!(f, "latex"), + Self::Less => write!(f, "less"), Self::Lua => write!(f, "lua"), + Self::Makefile => write!(f, "makefile"), Self::Markdown => write!(f, "markdown"), Self::ObjectiveC => write!(f, "objective-c"), + Self::ObjectiveCpp => write!(f, "objective-cpp"), + Self::Perl => write!(f, "perl"), + Self::Perl6 => write!(f, "perl6"), + Self::Php => write!(f, "php"), + Self::Powershell => write!(f, "powershell"), + Self::Pug => write!(f, "jade"), Self::Python => write!(f, "python"), Self::R => write!(f, "r"), + Self::Razor => write!(f, "razor"), Self::Ruby => write!(f, "ruby"), Self::Rust => write!(f, "rust"), + Self::ShaderLab => write!(f, "shaderlab"), + Self::Scss => write!(f, "scss"), Self::Scala => write!(f, "scala"), + Self::Sql => write!(f, "sql"), Self::Swift => write!(f, "swift"), + Self::Toml => write!(f, "toml"), Self::TypeScript => write!(f, "typescript"), Self::TypeScriptReact => write!(f, "typescriptreact"), + Self::TeX => write!(f, "tex"), + Self::VisualBasic => write!(f, "vb"), + Self::Xml => write!(f, "xml"), + Self::Xsl => write!(f, "xsl"), + Self::Yaml => write!(f, "Yaml"), Self::Unknown => write!(f, "unknown"), } } @@ -75,30 +143,63 @@ impl fmt::Display for LanguageIdError { impl From<&str> for LanguageId { fn from(value: &str) -> Self { match value { + "abap" => Self::Abap, + "bat" => Self::Bat, + "bibtex" => Self::BibTeX, + "clojure" => Self::Clojure, + "coffeescript" => Self::CoffeeScript, "c" => Self::C, "cpp" => Self::Cpp, "csharp" => Self::CSharp, + "css" => Self::Css, + "diff" => Self::Diff, + "dart" => Self::Dart, + "dockerfile" => Self::Dockerfile, "elixir" => Self::Elixir, "erlang" => Self::Erlang, + "fsharp" => Self::FSharp, + "git-commit" => Self::GitCommit, + "git-rebase" => Self::GitRebase, "go" => Self::Go, + "groovy" => Self::Groovy, + "handlebars" => Self::Handlebars, "html" => Self::Html, + "ini" => Self::Ini, + "jade" => Self::Pug, "java" => Self::Java, "javascript" => Self::JavaScript, "javascriptreact" => Self::JavaScriptReact, "json" => Self::Json, "kotlin" => Self::Kotlin, + "latex" => Self::LaTeX, + "less" => Self::Less, "lua" => Self::Lua, + "makefile" => Self::Makefile, "markdown" => Self::Markdown, "objective-c" => Self::ObjectiveC, + "objective-cpp" => Self::ObjectiveCpp, + "perl" => Self::Perl, + "perl6" => Self::Perl6, + "php" => Self::Php, + "powershell" => Self::Powershell, "python" => Self::Python, "r" => Self::R, + "razor" => Self::Razor, "ruby" => Self::Ruby, "rust" => Self::Rust, + "sass" | "scss" => Self::Scss, "scala" => Self::Scala, + "shaderlab" => Self::ShaderLab, "shellscript" => Self::Bash, + "sql" => Self::Sql, "swift" => Self::Swift, "typescript" => Self::TypeScript, "typescriptreact" => Self::TypeScriptReact, + "tex" => Self::TeX, + "vb" => Self::VisualBasic, + "xml" => Self::Xml, + "xsl" => Self::Xsl, + "yaml" => Self::Yaml, _ => Self::Unknown, } } @@ -109,3 +210,263 @@ impl From for LanguageId { Self::from(value.as_str()) } } + +#[derive(Clone, Debug)] +pub(crate) struct LanguageComment { + open: String, + close: Option, +} + +impl LanguageComment { + pub(crate) fn comment_string(&self, s: String) -> String { + let close = if let Some(close) = self.close.as_ref() { + close.clone() + } else { + String::new() + }; + format!("{} {s} {close}", self.open) + } +} + +impl LanguageId { + pub(crate) fn get_language_comment(&self) -> LanguageComment { + match self { + Self::Abap => LanguageComment { + open: "*".to_owned(), + close: None, + }, + Self::Bash => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Bat => LanguageComment { + open: "REM".to_owned(), + close: None, + }, + Self::BibTeX => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::Clojure => LanguageComment { + open: ";;".to_owned(), + close: None, + }, + Self::CoffeeScript => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::C => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Cpp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::CSharp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Css => LanguageComment { + open: "/*".to_owned(), + close: Some("*/".to_owned()), + }, + Self::Diff => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Dart => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Dockerfile => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Elixir => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Erlang => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::FSharp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::GitCommit => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::GitRebase => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Go => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Groovy => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Handlebars => LanguageComment { + open: "{{!--".to_owned(), + close: Some("--}}".to_owned()), + }, + Self::Html => LanguageComment { + open: "".to_owned()), + }, + Self::Ini => LanguageComment { + open: ";".to_owned(), + close: None, + }, + Self::Java => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::JavaScript => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::JavaScriptReact => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Json => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Kotlin => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::LaTeX => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::Less => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Lua => LanguageComment { + open: "--".to_owned(), + close: None, + }, + Self::Makefile => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Markdown => LanguageComment { + open: "".to_owned()), + }, + Self::ObjectiveC => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::ObjectiveCpp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Perl => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Perl6 => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Php => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Powershell => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Pug => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Python => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::R => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Razor => LanguageComment { + open: "@*".to_owned(), + close: Some("*@".to_owned()), + }, + Self::Ruby => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Rust => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Scss => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Scala => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::ShaderLab => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Sql => LanguageComment { + open: "--".to_owned(), + close: None, + }, + Self::Swift => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Toml => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::TypeScript => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::TypeScriptReact => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::TeX => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::VisualBasic => LanguageComment { + open: "'".to_owned(), + close: None, + }, + Self::Xml => LanguageComment { + open: "".to_owned()), + }, + Self::Xsl => LanguageComment { + open: "".to_owned()), + }, + Self::Yaml => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Unknown => LanguageComment { + open: "#".to_owned(), + close: None, + }, + } + } +} diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index aecb6fe..44631ff 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -3,18 +3,21 @@ use custom_types::llm_ls::{ AcceptCompletionParams, Backend, Completion, FimParams, GetCompletionsParams, GetCompletionsResult, Ide, RejectCompletionParams, TokenizerConfig, }; -use retrieval::SnippetRetriever; +use language_id::LanguageId; +use retrieval::{Snippet, SnippetRetriever}; use ropey::Rope; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt::Display; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Duration; +use tinyvec_embed::db::{Compare, FilterBuilder}; use tokenizers::Tokenizer; use tokio::io::AsyncWriteExt; use tokio::net::TcpListener; use tokio::sync::{mpsc, RwLock}; +use tokio::time::Instant; use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::notification::Progress; use tower_lsp::lsp_types::request::WorkDoneProgressCreate; @@ -145,12 +148,40 @@ struct LlmService { cancel_snippet_build_rx: Arc>>, } -fn build_prompt( +fn build_context_header(language_id: LanguageId, snippets: Vec) -> String { + let comment = language_id.get_language_comment(); + let mut header = vec![comment.comment_string( + "Below are some relevant code snippets contained in this project's files:".to_owned(), + )]; + for snippet in snippets { + header.push(comment.comment_string("--------------".to_owned())); + header.push(comment.comment_string(format!("snippet from: {}", snippet.file_url))); + header.push(comment.comment_string("--------------".to_owned())); + header.push( + snippet + .code + .lines() + .map(|l| comment.comment_string(l.to_owned())) + .collect::>() + .join("\n"), + ); + header.push(comment.comment_string("--------------".to_owned())); + } + let mut header = header.join("\n"); + header.push('\n'); + header +} + +#[allow(clippy::too_many_arguments)] +async fn build_prompt( pos: Position, text: &Rope, fim: &FimParams, tokenizer: Option>, context_window: usize, + file_url: &str, + language_id: LanguageId, + snippet_retriever: Arc>, ) -> Result { let t = Instant::now(); if fim.enabled { @@ -199,13 +230,23 @@ fn build_prompt( before_line = before_iter.next(); after_line = after_iter.next(); } + let before = before.into_iter().rev().collect::>().join(""); + let snippets = snippet_retriever + .read() + .await + .search( + format!("{before}{after}"), + Some(FilterBuilder::new().comparison( + "file_url".to_owned(), + Compare::Neq, + file_url.into(), + )), + ) + .await?; + let context_header = build_context_header(language_id, snippets); let prompt = format!( - "{}{}{}{}{}", - fim.prefix, - before.into_iter().rev().collect::>().join(""), - fim.suffix, - after, - fim.middle + "{}{context_header}{before}{}{after}{}", + fim.prefix, fim.suffix, fim.middle ); let time = t.elapsed().as_millis(); info!(prompt, build_prompt_ms = time, "built prompt in {time} ms"); @@ -233,6 +274,20 @@ fn build_prompt( before.push(line); } let prompt = before.into_iter().rev().collect::>().join(""); + let snippets = snippet_retriever + .read() + .await + .search( + prompt.clone(), + Some(FilterBuilder::new().comparison( + "file_url".to_owned(), + Compare::Neq, + file_url.into(), + )), + ) + .await?; + let context_header = build_context_header(language_id, snippets); + let prompt = format!("{context_header}{prompt}"); let time = t.elapsed().as_millis(); info!(prompt, build_prompt_ms = time, "built prompt in {time} ms"); Ok(prompt) @@ -441,8 +496,9 @@ impl LlmService { async move { let document_map = self.document_map.read().await; + let file_url = params.text_document_position.text_document.uri.as_str(); let document = - match document_map.get(params.text_document_position.text_document.uri.as_str()) { + match document_map.get(file_url) { Some(doc) => doc, None => { debug!("failed to find document"); @@ -496,7 +552,10 @@ impl LlmService { ¶ms.fim, tokenizer, params.context_window, - )?; + &file_url.replace("file://", ""), + document.language_id, + self.snippet_retriever.clone(), + ).await?; let http_client = if params.tls_skip_verify_insecure { info!("tls verification is disabled"); diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 28850c3..3391b7b 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -10,7 +10,7 @@ use std::path::Path; use std::{path::PathBuf, sync::Arc}; use tinyvec_embed::db::{Collection, Compare, Db, Embedding, FilterBuilder, SimilarityResult}; use tinyvec_embed::similarity::Distance; -use tokenizers::Tokenizer; +use tokenizers::{Encoding, Tokenizer, TruncationDirection}; use tokio::io::AsyncReadExt; use tokio::task::spawn_blocking; use tokio::time::Instant; @@ -203,8 +203,8 @@ async fn initialse_database(cache_path: PathBuf) -> Db { } pub(crate) struct Snippet { - file_url: String, - code: String, + pub(crate) file_url: String, + pub(crate) code: String, } impl TryFrom<&SimilarityResult> for Snippet { @@ -348,8 +348,10 @@ impl SnippetRetriever { filter: Option, ) -> Result> { let col = self.db.get_collection("code-slices").await?; + let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; + encoding.truncate(512, 1, TruncationDirection::Right); let query = self - .generate_embedding(self.model.clone(), snippet, self.tokenizer.clone()) + .generate_embedding(encoding, self.model.clone()) .await?; let result = col .read() @@ -390,15 +392,15 @@ impl SnippetRetriever { } impl SnippetRetriever { + // TODO: handle overflowing in Encoding async fn generate_embedding( &self, + encoding: Encoding, model: Arc, - snippet: String, - tokenizer: Tokenizer, ) -> Result> { let start = Instant::now(); let embedding = spawn_blocking(move || -> Result> { - let tokens = tokenizer.encode(snippet, true)?.get_ids().to_vec(); + let tokens = encoding.get_ids().to_vec(); let token_ids = Tensor::new(&tokens[..], &model.device)?.unsqueeze(0)?; let token_type_ids = token_ids.zeros_like()?; let embedding = model.forward(&token_ids, &token_type_ids)?; @@ -450,16 +452,10 @@ impl SnippetRetriever { if snippet.is_empty() { continue; } - if snippet.len() > 1024 { - debug!("snippet {file_url}[{start_line}, {end_line}] is too big to be indexed"); - continue; - } - let model = self.model.clone(); - let snippet_clone = snippet.clone(); - let tokenizer = self.tokenizer.clone(); - let result = self - .generate_embedding(model, snippet_clone, tokenizer) - .await; + + let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; + encoding.truncate(512, 1, TruncationDirection::Right); + let result = self.generate_embedding(encoding, self.model.clone()).await; let embedding = match result { Ok(e) => e, Err(err) => { From 4b14a20e369a13fdbef99e9f24339380dde37d64 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Thu, 22 Feb 2024 15:35:05 +0100 Subject: [PATCH 14/22] fix: dangling llm-ls process --- crates/llm-ls/src/main.rs | 155 +++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 69 deletions(-) diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 44631ff..ce0e98c 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -16,7 +16,7 @@ use tinyvec_embed::db::{Compare, FilterBuilder}; use tokenizers::Tokenizer; use tokio::io::AsyncWriteExt; use tokio::net::TcpListener; -use tokio::sync::{mpsc, RwLock}; +use tokio::sync::{oneshot, RwLock}; use tokio::time::Instant; use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::notification::Progress; @@ -144,8 +144,8 @@ struct LlmService { unauthenticated_warn_at: Arc>, snippet_retriever: Arc>, supports_progress_bar: Arc>, - cancel_snippet_build_tx: mpsc::Sender<()>, - cancel_snippet_build_rx: Arc>>, + cancel_snippet_build_tx: Arc>>>, + cancel_snippet_build_rx: Arc>>>, } fn build_context_header(language_id: LanguageId, snippets: Vec) -> String { @@ -603,6 +603,8 @@ impl LlmService { } } +struct Cancelled(bool); + #[tower_lsp::async_trait] impl LanguageServer for LlmService { async fn initialize(&self, params: InitializeParams) -> LspResult { @@ -645,68 +647,82 @@ impl LanguageServer for LlmService { let token = NumberOrString::Number(42); let token_copy = NumberOrString::Number(42); - let handle = tokio::spawn(async move { - let guard = workspace_folders.read().await; - if let Some(workspace_folders) = guard.as_ref() { - if *supports_progress_bar.read().await { - match client - .send_request::(WorkDoneProgressCreateParams { - token: token.clone(), - }) - .await - { - Ok(_) => (), - Err(err) => { - error!("err: {err}"); - return; - } - }; - client - .send_notification::(ProgressParams { - token: token.clone(), - value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( - WorkDoneProgressBegin { - title: "creating workspace embeddings".to_owned(), - ..Default::default() - }, - )), - }) - .await; + if let Some(rx) = self.cancel_snippet_build_rx.write().await.take() { + let handle = tokio::spawn(async move { + let guard = workspace_folders.read().await; + if let Some(workspace_folders) = guard.as_ref() { + if *supports_progress_bar.read().await { + match client + .send_request::(WorkDoneProgressCreateParams { + token: token.clone(), + }) + .await + { + Ok(_) => (), + Err(err) => { + error!("err: {err}"); + return Cancelled(false); + } + }; + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( + WorkDoneProgressBegin { + title: "creating workspace embeddings".to_owned(), + ..Default::default() + }, + )), + }) + .await; + } + let mut guard = snippet_retriever.write().await; + tokio::select! { + res = guard.build_workspace_snippets( + client.clone(), + token, + workspace_folders[0].uri.path(), + ) => { + if let Err(err) = res { + error!("failed building workspace snippets: {err}"); + } + Cancelled(false) + }, + _ = rx => { + debug!("received cancellation, stopping indexation"); + Cancelled(true) + }, + } + } else { + Cancelled(false) } - snippet_retriever - .write() - .await - .build_workspace_snippets( - client.clone(), - token, - workspace_folders[0].uri.path(), - ) - .await - .expect("failed to build workspace snippets"); - } - }); - let mut guard = self.cancel_snippet_build_rx.write().await; - tokio::select! { - _ = handle => { - if *self.supports_progress_bar.read().await { - self.client - .send_notification::(ProgressParams { - token: token_copy, - value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( - WorkDoneProgressEnd { - ..Default::default() - }, - )), - }) - .await; + }); + let cancelled = match handle.await { + Ok(c) => c, + Err(err) => { + error!("error building workspace snippets: {err}"); + return; } - }, - _ = guard.recv() => return, + }; + if let Cancelled(false) = cancelled { + self.client + .log_message(MessageType::INFO, "llm-ls initialized") + .await; + info!("initialized language server"); + } + } + if *self.supports_progress_bar.read().await { + self.client + .send_notification::(ProgressParams { + token: token_copy, + value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( + WorkDoneProgressEnd { + ..Default::default() + }, + )), + }) + .await; } - self.client - .log_message(MessageType::INFO, "llm-ls initialized") - .await; - info!("initialized language server"); } async fn did_open(&self, params: DidOpenTextDocumentParams) { @@ -810,10 +826,11 @@ impl LanguageServer for LlmService { async fn shutdown(&self) -> LspResult<()> { debug!("shutdown"); - self.cancel_snippet_build_tx - .send(()) - .await - .map_err(internal_error)?; + if let Some(tx) = self.cancel_snippet_build_tx.write().await.take() { + if tx.send(()).is_err() { + return Err(internal_error("failed to cancel indexing")); + } + } self.snippet_retriever .read() .await @@ -874,7 +891,7 @@ async fn main() { .await .expect("failed to initialise snippet retriever"), )); - let (cancel_snippet_build_tx, rx) = mpsc::channel(1); + let (tx, rx) = oneshot::channel(); let (service, socket) = LspService::build(|client| LlmService { cache_dir, client, @@ -890,8 +907,8 @@ async fn main() { )), snippet_retriever, supports_progress_bar: Arc::new(RwLock::new(false)), - cancel_snippet_build_tx, - cancel_snippet_build_rx: Arc::new(RwLock::new(rx)), + cancel_snippet_build_tx: Arc::new(RwLock::new(Some(tx))), + cancel_snippet_build_rx: Arc::new(RwLock::new(Some(rx))), }) .custom_method("llm-ls/getCompletions", LlmService::get_completions) .custom_method("llm-ls/acceptCompletion", LlmService::accept_completion) From b3245f76803d9c8686feced3d301a4e0a0683844 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Thu, 22 Feb 2024 16:25:57 +0100 Subject: [PATCH 15/22] refactor: cleaner shutdown --- crates/llm-ls/src/language_id.rs | 4 +- crates/llm-ls/src/main.rs | 150 +++++++++++++++---------------- crates/llm-ls/src/retrieval.rs | 4 +- 3 files changed, 75 insertions(+), 83 deletions(-) diff --git a/crates/llm-ls/src/language_id.rs b/crates/llm-ls/src/language_id.rs index 9ed62c3..b680b18 100644 --- a/crates/llm-ls/src/language_id.rs +++ b/crates/llm-ls/src/language_id.rs @@ -220,11 +220,11 @@ pub(crate) struct LanguageComment { impl LanguageComment { pub(crate) fn comment_string(&self, s: String) -> String { let close = if let Some(close) = self.close.as_ref() { - close.clone() + format!(" {}", close.clone()) } else { String::new() }; - format!("{} {s} {close}", self.open) + format!("{} {s}{close}", self.open) } } diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index ce0e98c..2762882 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -17,6 +17,7 @@ use tokenizers::Tokenizer; use tokio::io::AsyncWriteExt; use tokio::net::TcpListener; use tokio::sync::{oneshot, RwLock}; +use tokio::task::JoinHandle; use tokio::time::Instant; use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::notification::Progress; @@ -145,10 +146,13 @@ struct LlmService { snippet_retriever: Arc>, supports_progress_bar: Arc>, cancel_snippet_build_tx: Arc>>>, - cancel_snippet_build_rx: Arc>>>, + indexation_handle: Arc>>>, } fn build_context_header(language_id: LanguageId, snippets: Vec) -> String { + if snippets.is_empty() { + return String::new(); + } let comment = language_id.get_language_comment(); let mut header = vec![comment.comment_string( "Below are some relevant code snippets contained in this project's files:".to_owned(), @@ -603,8 +607,6 @@ impl LlmService { } } -struct Cancelled(bool); - #[tower_lsp::async_trait] impl LanguageServer for LlmService { async fn initialize(&self, params: InitializeParams) -> LspResult { @@ -647,82 +649,68 @@ impl LanguageServer for LlmService { let token = NumberOrString::Number(42); let token_copy = NumberOrString::Number(42); - if let Some(rx) = self.cancel_snippet_build_rx.write().await.take() { - let handle = tokio::spawn(async move { - let guard = workspace_folders.read().await; - if let Some(workspace_folders) = guard.as_ref() { - if *supports_progress_bar.read().await { - match client - .send_request::(WorkDoneProgressCreateParams { - token: token.clone(), - }) - .await - { - Ok(_) => (), - Err(err) => { - error!("err: {err}"); - return Cancelled(false); - } - }; - client - .send_notification::(ProgressParams { - token: token.clone(), - value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( - WorkDoneProgressBegin { - title: "creating workspace embeddings".to_owned(), - ..Default::default() - }, - )), - }) - .await; - } - let mut guard = snippet_retriever.write().await; - tokio::select! { - res = guard.build_workspace_snippets( - client.clone(), - token, - workspace_folders[0].uri.path(), - ) => { - if let Err(err) = res { - error!("failed building workspace snippets: {err}"); - } - Cancelled(false) - }, - _ = rx => { - debug!("received cancellation, stopping indexation"); - Cancelled(true) - }, - } - } else { - Cancelled(false) + let (tx, rx) = oneshot::channel(); + *self.cancel_snippet_build_tx.write().await = Some(tx); + let handle = tokio::spawn(async move { + let guard = workspace_folders.read().await; + if let Some(workspace_folders) = guard.as_ref() { + if *supports_progress_bar.read().await { + match client + .send_request::(WorkDoneProgressCreateParams { + token: token.clone(), + }) + .await + { + Ok(_) => (), + Err(err) => { + error!("err: {err}"); + return; + } + }; + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( + WorkDoneProgressBegin { + title: "creating workspace embeddings".to_owned(), + ..Default::default() + }, + )), + }) + .await; } - }); - let cancelled = match handle.await { - Ok(c) => c, - Err(err) => { - error!("error building workspace snippets: {err}"); - return; + let mut guard = snippet_retriever.write().await; + tokio::select! { + res = guard.build_workspace_snippets( + client.clone(), + token, + workspace_folders[0].uri.path(), + ) => { + if let Err(err) = res { + error!("failed building workspace snippets: {err}"); + } + }, + _ = rx => debug!("received cancellation, stopping indexation"), + } + if *supports_progress_bar.read().await { + client + .send_notification::(ProgressParams { + token: token_copy, + value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( + WorkDoneProgressEnd { + ..Default::default() + }, + )), + }) + .await; } - }; - if let Cancelled(false) = cancelled { - self.client - .log_message(MessageType::INFO, "llm-ls initialized") - .await; - info!("initialized language server"); } - } - if *self.supports_progress_bar.read().await { - self.client - .send_notification::(ProgressParams { - token: token_copy, - value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( - WorkDoneProgressEnd { - ..Default::default() - }, - )), - }) - .await; - } + }); + *self.indexation_handle.write().await = Some(handle); + self.client + .log_message(MessageType::INFO, "llm-ls initialized") + .await; + info!("initialized language server"); } async fn did_open(&self, params: DidOpenTextDocumentParams) { @@ -837,6 +825,11 @@ impl LanguageServer for LlmService { .stop() .await .map_err(internal_error)?; + if let Some(handle) = self.indexation_handle.write().await.take() { + if let Err(err) = handle.await { + error!("error indexing snippets: {err}"); + } + } Ok(()) } } @@ -891,7 +884,6 @@ async fn main() { .await .expect("failed to initialise snippet retriever"), )); - let (tx, rx) = oneshot::channel(); let (service, socket) = LspService::build(|client| LlmService { cache_dir, client, @@ -907,8 +899,8 @@ async fn main() { )), snippet_retriever, supports_progress_bar: Arc::new(RwLock::new(false)), - cancel_snippet_build_tx: Arc::new(RwLock::new(Some(tx))), - cancel_snippet_build_rx: Arc::new(RwLock::new(Some(rx))), + cancel_snippet_build_tx: Arc::new(RwLock::new(None)), + indexation_handle: Arc::new(RwLock::new(None)), }) .custom_method("llm-ls/getCompletions", LlmService::get_completions) .custom_method("llm-ls/acceptCompletion", LlmService::accept_completion) diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 3391b7b..9db6d6c 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -20,7 +20,7 @@ use tower_lsp::lsp_types::{ WorkDoneProgressReport, }; use tower_lsp::Client; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, warn}; // TODO: // - create sliding window and splitting of files logic @@ -261,7 +261,7 @@ impl SnippetRetriever { token: NumberOrString, workspace_root: &str, ) -> Result<()> { - info!("building workspace snippets"); + debug!("building workspace snippets"); let workspace_root = PathBuf::from(workspace_root); let mut files = Vec::new(); let gitignore = Gitignore::parse(&workspace_root).ok(); From 3ccf695baa4679ff814ef38391b62c9d93040ee0 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Mon, 26 Feb 2024 19:47:23 +0100 Subject: [PATCH 16/22] feat: add benchmark --- Cargo.lock | 142 ++++++++++++++++++ crates/tinyvec-embed/Cargo.toml | 5 + crates/tinyvec-embed/benches/bench_main.rs | 7 + .../tinyvec-embed/benches/benchmarks/mod.rs | 1 + .../benches/benchmarks/retrieval_speed.rs | 43 ++++++ 5 files changed, 198 insertions(+) create mode 100644 crates/tinyvec-embed/benches/bench_main.rs create mode 100644 crates/tinyvec-embed/benches/benchmarks/mod.rs create mode 100644 crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs diff --git a/Cargo.lock b/Cargo.lock index 1123807..e296054 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.12" @@ -345,6 +351,12 @@ dependencies = [ "wav", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.83" @@ -361,6 +373,33 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -470,6 +509,44 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-channel" version = "0.5.11" @@ -1254,6 +1331,26 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.11.0" @@ -1651,6 +1748,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "openssl" version = "0.10.63" @@ -1803,6 +1906,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.6.0" @@ -2674,6 +2805,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -2688,6 +2829,7 @@ name = "tinyvec-embed" version = "0.1.0" dependencies = [ "bincode", + "criterion", "serde", "tempfile", "thiserror", diff --git a/crates/tinyvec-embed/Cargo.toml b/crates/tinyvec-embed/Cargo.toml index f9e0349..b05bc00 100644 --- a/crates/tinyvec-embed/Cargo.toml +++ b/crates/tinyvec-embed/Cargo.toml @@ -24,4 +24,9 @@ version = "1.7.0" features = ["v4", "fast-rng", "macro-diagnostics", "serde"] [dev-dependencies] +criterion = { version = "0.5", features = ["async_tokio"] } tempfile = "3" + +[[bench]] +name = "bench_main" +harness = false diff --git a/crates/tinyvec-embed/benches/bench_main.rs b/crates/tinyvec-embed/benches/bench_main.rs new file mode 100644 index 0000000..bafb20c --- /dev/null +++ b/crates/tinyvec-embed/benches/bench_main.rs @@ -0,0 +1,7 @@ +use criterion::criterion_main; + +mod benchmarks; + +criterion_main! { + benchmarks::retrieval_speed::retrieval_speed, +} diff --git a/crates/tinyvec-embed/benches/benchmarks/mod.rs b/crates/tinyvec-embed/benches/benchmarks/mod.rs new file mode 100644 index 0000000..5d78574 --- /dev/null +++ b/crates/tinyvec-embed/benches/benchmarks/mod.rs @@ -0,0 +1 @@ +pub mod retrieval_speed; diff --git a/crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs b/crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs new file mode 100644 index 0000000..b72e02c --- /dev/null +++ b/crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs @@ -0,0 +1,43 @@ +use std::collections::HashMap; + +use criterion::{criterion_group, Criterion}; +use tinyvec_embed::{ + db::{Collection, Embedding}, + similarity::Distance, +}; +use tokio::runtime::Runtime; +use uuid::Uuid; + +pub fn get_collection(dimension: usize, embeddings_count: usize) -> Collection { + let embeddings = (0..embeddings_count) + .map(|i| Embedding { + id: Uuid::new_v4(), + metadata: Some(HashMap::from([(i.to_string(), i.into())])), + vector: vec![i as f32; dimension], + }) + .collect::>(); + + Collection { + dimension, + distance: Distance::Cosine, + embeddings, + } +} + +pub fn bench_retrieval(c: &mut Criterion) { + let dimension = 768; + let embeddings_count = 50_000; + let rt = Runtime::new().unwrap(); + c.bench_function("get top 5 k", |b| { + let collection = get_collection(dimension, embeddings_count); + let query = vec![42.; dimension]; + b.to_async(&rt) + .iter(|| async { collection.get(&query, 5, None).await.unwrap() }); + }); +} + +criterion_group! { + name = retrieval_speed; + config = Criterion::default(); + targets = bench_retrieval +} From 9e2f7c0c3717f696decf212c0230e7e8c3a060b5 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 27 Feb 2024 19:31:15 +0100 Subject: [PATCH 17/22] fix: rework gitignore to be closer to spec --- crates/gitignore/src/lib.rs | 192 ++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 64 deletions(-) diff --git a/crates/gitignore/src/lib.rs b/crates/gitignore/src/lib.rs index 805ddbe..3e319e2 100644 --- a/crates/gitignore/src/lib.rs +++ b/crates/gitignore/src/lib.rs @@ -13,6 +13,8 @@ pub enum Error { Io(#[from] std::io::Error), #[error("non utf8 path")] NonUtf8Path, + #[error("path has no parent folder")] + NoParent, #[error("glob pattern error: {0}")] Pattern(#[from] glob::PatternError), } @@ -22,7 +24,7 @@ pub type Result = std::result::Result; #[derive(Debug)] pub struct Rule { negate: bool, - pattern: Pattern, + patterns: Vec, _source_line: usize, } @@ -32,9 +34,11 @@ impl Rule { base_path: impl AsRef, _source_line: usize, ) -> Result> { + let mut patterns = vec![]; if pattern.trim().is_empty() || pattern.starts_with('#') { return Ok(None); } + pattern = pattern.trim_start().to_owned(); let negate = if pattern.starts_with('!') { pattern.remove(0); true @@ -48,26 +52,22 @@ impl Rule { false }; let anchored = pattern.contains('/'); - let pattern = if anchored { - let base = format!("{}/{pattern}", base_path.as_ref().to_str().unwrap()); - if directory { - format!("{base}/**") - } else { - base - } - } else if !pattern.starts_with("**") { - let base = format!("**/{pattern}"); - if directory { - format!("{base}/**") - } else { - base - } + if pattern.starts_with('/') { + pattern.remove(0); + } + let base_path_str = base_path.as_ref().to_str().ok_or(Error::NonUtf8Path)?; + let base_pattern = if anchored || pattern.starts_with("**") { + format!("{base_path_str}/{pattern}") } else { - pattern + format!("{base_path_str}/**/{pattern}") }; + patterns.push(Pattern::new(&format!("{base_pattern}/**"))?); + if !directory { + patterns.push(Pattern::new(&base_pattern)?); + } Ok(Some(Self { negate, - pattern: Pattern::new(&pattern)?, + patterns, _source_line, })) } @@ -75,6 +75,7 @@ impl Rule { #[derive(Debug)] pub struct Gitignore { + base_path: PathBuf, rules: Vec, _source_file: PathBuf, } @@ -92,66 +93,122 @@ impl Gitignore { let mut rules = Vec::new(); for (line_nb, line) in reader.lines().enumerate() { let line = line?; - if let Some(rule) = Rule::parse(line, path.parent().unwrap(), line_nb + 1)? { + if let Some(rule) = + Rule::parse(line, path.parent().ok_or(Error::NoParent)?, line_nb + 1)? + { rules.push(rule); } } Ok(Self { + base_path: path.parent().ok_or(Error::NoParent)?.to_path_buf(), rules, _source_file: path, }) } pub fn ignored(&self, path: impl AsRef) -> Result { - let path = canonicalize(path)?; + let path = if path.as_ref().starts_with(&self.base_path) { + path.as_ref().to_path_buf() + } else { + canonicalize(self.base_path.join(path))? + }; let match_opts = MatchOptions { case_sensitive: true, require_literal_separator: true, require_literal_leading_dot: false, }; + let path_str = path.to_str().ok_or(Error::NonUtf8Path)?; + let to_match = if path.is_dir() { + format!("{path_str}/") + } else { + path_str.to_owned() + }; for rule in &self.rules { - let path_str = path.to_str().ok_or(Error::NonUtf8Path)?; - let to_match = if path.is_dir() { - format!("{path_str}/") - } else { - path_str.to_owned() - }; - if rule.pattern.matches_with(&to_match, match_opts) { - return Ok(!rule.negate); + for pattern in rule.patterns.iter() { + // TODO: handle negation properly + // negation should include + if rule.negate { + continue; + } + if pattern.matches_with(&to_match, match_opts) { + return Ok(true); + } } } Ok(false) } + + /// Add ad hoc rule from a pattern + pub fn add_rule(&mut self, pattern: String) -> Result<()> { + if let Some(rule) = Rule::parse(pattern, &self.base_path, usize::MAX)? { + self.rules.push(rule); + } + Ok(()) + } } #[cfg(test)] mod tests { - use std::sync::Once; + use std::io::Write; - use super::*; + use tempdir::TempDir; - static INIT: Once = Once::new(); + use super::*; - fn create_gitignore(rules: &str, name: &str) -> Gitignore { - INIT.call_once(|| { - std::env::set_current_dir(canonicalize("../..").unwrap()).unwrap(); - }); - std::fs::write(name, rules).unwrap(); - let gitignore = Gitignore::parse(name).unwrap(); - std::fs::remove_file(name).unwrap(); - gitignore + fn create_gitignore(rules: &str, name: &str) -> (TempDir, Gitignore) { + let temp_dir = TempDir::new(name).unwrap(); + std::fs::File::create(temp_dir.path().join("LICENSE")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("config")).unwrap(); + std::fs::File::create(temp_dir.path().join("config.yaml")).unwrap(); + std::fs::File::create(temp_dir.path().join("Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("README.md")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("xtask")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/gitignore")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/gitignore/Cargo.toml")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/llm-ls/src")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/llm-ls/config")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/llm-ls/config.yaml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/llm-ls/Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/llm-ls/src/main.rs")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/lsp-client/src")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/lsp-client/Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/lsp-client/src/lib.rs")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/mock_server")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/mock_server/Cargo.toml")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/testbed/src")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/testbed/Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/testbed/src/main.rs")).unwrap(); + std::fs::create_dir_all( + temp_dir + .path() + .join("crates/testbed/repositories/simple/src"), + ) + .unwrap(); + std::fs::File::create( + temp_dir + .path() + .join("crates/testbed/repositories/simple/src/main.rs"), + ) + .unwrap(); + let gitignore_path = temp_dir.path().join(name); + std::fs::File::create(&gitignore_path) + .unwrap() + .write_all(rules.as_bytes()) + .unwrap(); + let gitignore = Gitignore::parse(gitignore_path).unwrap(); + (temp_dir, gitignore) } #[test] - fn test_regular_pattern() { - let gitignore = create_gitignore("Cargo.toml", "regular_pattern"); + fn test_regular_relative_pattern() { + let (_temp_dir, gitignore) = create_gitignore("Cargo.toml", "regular_relative_pattern"); assert!(gitignore.ignored("Cargo.toml").unwrap()); assert!(!gitignore.ignored("LICENSE").unwrap()); } #[test] fn test_glob_pattern() { - let gitignore = create_gitignore("crates/**/Cargo.toml", "glob_pattern"); + let (_temp_dir, gitignore) = create_gitignore("crates/**/Cargo.toml", "glob_pattern"); assert!(gitignore.ignored("crates/gitignore/Cargo.toml").unwrap()); assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); assert!(gitignore.ignored("crates/lsp-client/Cargo.toml").unwrap()); @@ -163,21 +220,8 @@ mod tests { } #[test] - fn test_negate_glob_pattern() { - let gitignore = create_gitignore("!crates/**/Cargo.toml", "negate_glob_pattern"); - assert!(!gitignore.ignored("crates/gitignore/Cargo.toml").unwrap()); - assert!(!gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); - assert!(!gitignore.ignored("crates/lsp-client/Cargo.toml").unwrap()); - assert!(!gitignore.ignored("crates/mock_server/Cargo.toml").unwrap()); - assert!(!gitignore.ignored("crates/testbed/Cargo.toml").unwrap()); - assert!(!gitignore.ignored("crates/llm-ls/src/main.rs").unwrap()); - assert!(!gitignore.ignored("crates/lsp-client/src/lib.rs").unwrap()); - assert!(!gitignore.ignored("crates/testbed/src/main.rs").unwrap()); - } - - #[test] - fn test_start_glob_pattern() { - let gitignore = create_gitignore("**/crates/", "start_glob_pattern"); + fn test_dir_start_glob_pattern() { + let (_temp_dir, gitignore) = create_gitignore("**/crates/", "start_glob_pattern"); assert!(gitignore.ignored("crates/").unwrap()); assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); assert!(gitignore @@ -188,8 +232,8 @@ mod tests { } #[test] - fn test_relative_path() { - let gitignore = create_gitignore("crates/", "relative_path"); + fn test_dir_relative_path() { + let (_temp_dir, gitignore) = create_gitignore("crates/", "relative_path"); assert!(gitignore.ignored("crates/").unwrap()); assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); assert!(gitignore @@ -199,13 +243,33 @@ mod tests { assert!(!gitignore.ignored("README.md").unwrap()); } + // TODO: + // #[test] + // fn test_negate_pattern() { + // let (_temp_dir, gitignore) = create_gitignore( + // "aaa/*\n\ + // !aaa/Cargo.toml", + // "negate_pattern", + // ); + // assert!(!gitignore.ignored("aaa/Cargo.toml").unwrap()); + // assert!(gitignore.ignored("aaa/config.yaml").unwrap()); + // } + #[test] - fn test_negate_pattern() { - let gitignore = create_gitignore( - "!Cargo.toml\n\ - Cargo.toml", - "negate_pattern", - ); + fn test_ad_hoc_rule_add() { + let (_temp_dir, mut gitignore) = create_gitignore("!Cargo.toml", "adhoc_add"); + assert!(!gitignore.ignored("config.yaml").unwrap()); assert!(!gitignore.ignored("Cargo.toml").unwrap()); + gitignore.add_rule("config.yaml".to_owned()).unwrap(); + assert!(gitignore.ignored("config.yaml").unwrap()); + } + + #[test] + fn test_anchored_file_or_dir() { + let (_temp_dir, gitignore) = create_gitignore("/config*", "anchored_file_or_dir"); + assert!(gitignore.ignored("config.yaml").unwrap()); + assert!(gitignore.ignored("config").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/config.yaml").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/config").unwrap()); } } From 702ba67124f9211f6bbe073138924a901220ef6f Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Tue, 27 Feb 2024 19:31:29 +0100 Subject: [PATCH 18/22] feat: add llm-ls config file --- Cargo.lock | 64 ++++++++++++++++++++++------------ crates/llm-ls/Cargo.toml | 8 +++-- crates/llm-ls/src/config.rs | 36 +++++++++++++++++++ crates/llm-ls/src/error.rs | 4 +++ crates/llm-ls/src/main.rs | 15 ++++++++ crates/llm-ls/src/retrieval.rs | 11 +++++- 6 files changed, 112 insertions(+), 26 deletions(-) create mode 100644 crates/llm-ls/src/config.rs diff --git a/Cargo.lock b/Cargo.lock index e296054..2e07812 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,9 +298,9 @@ dependencies = [ [[package]] name = "candle-core" -version = "0.3.3" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db8659ea87ee8197d2fc627348916cce0561330ee7ae3874e771691d3cecb2f" +checksum = "8d3c6a912d574b4ca6e14ee816a1139ee0d15b75a48291f153dd16f6c643130e" dependencies = [ "byteorder", "gemm", @@ -319,9 +319,9 @@ dependencies = [ [[package]] name = "candle-nn" -version = "0.3.3" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ddce8312032760a6791d6adc9c56dc54fd7c1be38d85dcc4862f1c75228bbc7" +checksum = "9fd8f228c993467be5295a9759fc64d79e9ff9ea9080d7fc1cdf8d44b899082a" dependencies = [ "candle-core", "half", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "candle-transformers" -version = "0.3.3" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68834a0cacb7e002d1f4abfe26a7cd1237e2ba342fddcf2e30913c4edb96409d" +checksum = "f76c29230b2dffa216e174512bf00f621eb1f4ba77718593f183828aad427d44" dependencies = [ "byteorder", "candle-core", @@ -348,7 +348,6 @@ dependencies = [ "serde_json", "serde_plain", "tracing", - "wav", ] [[package]] @@ -456,6 +455,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "config" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7328b20597b53c2454f0b1919720c25c7339051c02b72b7e05409e00b14132be" +dependencies = [ + "lazy_static", + "nom", + "pathdiff", + "serde", + "yaml-rust", +] + [[package]] name = "console" version = "0.15.8" @@ -1422,6 +1434,12 @@ dependencies = [ "redox_syscall", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -1436,6 +1454,7 @@ dependencies = [ "candle-nn", "candle-transformers", "clap", + "config", "custom-types", "futures-util", "gitignore", @@ -1446,6 +1465,7 @@ dependencies = [ "ropey", "serde", "serde_json", + "serde_yaml", "thiserror", "tinyvec-embed", "tokenizers", @@ -1850,6 +1870,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "pathdiff" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" + [[package]] name = "pbkdf2" version = "0.11.0" @@ -2224,12 +2250,6 @@ dependencies = [ "winreg", ] -[[package]] -name = "riff" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" - [[package]] name = "ring" version = "0.17.8" @@ -3594,15 +3614,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "wav" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609" -dependencies = [ - "riff", -] - [[package]] name = "web-sys" version = "0.3.68" @@ -3850,6 +3861,15 @@ dependencies = [ "zip", ] +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yoke" version = "0.7.3" diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index d0e9707..a5b386b 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -7,10 +7,11 @@ edition = "2021" name = "llm-ls" [dependencies] -candle = { version = "0.3", package = "candle-core", default-features = false } -candle-nn = "0.3" -candle-transformers = "0.3" +candle = { version = "0.4", package = "candle-core", default-features = false } +candle-nn = "0.4" +candle-transformers = "0.4" clap = { version = "4", features = ["derive"] } +config = { version = "0.14", features = ["yaml"], default_features = false } custom-types = { path = "../custom-types" } futures-util = "0.3" gitignore = { path = "../gitignore" } @@ -25,6 +26,7 @@ reqwest = { version = "0.11", default-features = false, features = [ "rustls-tls", ] } serde = { version = "1", features = ["derive"] } +serde_yaml = "0.9" serde_json = "1" thiserror = "1" tinyvec-embed = { path = "../tinyvec-embed" } diff --git a/crates/llm-ls/src/config.rs b/crates/llm-ls/src/config.rs new file mode 100644 index 0000000..6552108 --- /dev/null +++ b/crates/llm-ls/src/config.rs @@ -0,0 +1,36 @@ +use std::path::Path; + +use config::Config; +use serde::{Deserialize, Serialize}; +use tokio::fs::write; + +use crate::error::Result; + +#[derive(Deserialize, Serialize)] +pub(crate) struct LlmLsConfig { + /// .gitignore-like glob patterns to exclude from indexing + pub(crate) ignored_paths: Vec, +} + +impl Default for LlmLsConfig { + fn default() -> Self { + Self { + ignored_paths: vec![".git/".into(), ".idea/".into(), ".DS_Store/".into()], + } + } +} + +pub async fn load_config(cache_path: &str) -> Result { + let config_file_path = Path::new(cache_path).join("config.yaml"); + if config_file_path.exists() { + Ok(Config::builder() + .add_source(config::File::with_name(&format!("{cache_path}/config"))) + .add_source(config::Environment::with_prefix("LLM_LS")) + .build()? + .try_deserialize()?) + } else { + let config = LlmLsConfig::default(); + write(config_file_path, serde_yaml::to_string(&config)?.as_bytes()).await?; + Ok(config) + } +} diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index 5d27085..b0d8210 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -17,6 +17,8 @@ pub(crate) fn internal_error(err: E) -> LspError { pub enum Error { #[error("candle error: {0}")] Candle(#[from] candle::Error), + #[error("config error: {0}")] + Config(#[from] config::ConfigError), #[error("gitignore error: {0}")] Gitignore(#[from] gitignore::Error), #[error("huggingface api error: {0}")] @@ -69,6 +71,8 @@ pub enum Error { TokioJoin(#[from] tokio::task::JoinError), #[error("unknown backend: {0}")] UnknownBackend(String), + #[error("yaml serialization error: {0}")] + Yaml(#[from] serde_yaml::Error), } pub(crate) type Result = std::result::Result; diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 2762882..eb2aef6 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -1,4 +1,5 @@ use clap::Parser; +use config::{load_config, LlmLsConfig}; use custom_types::llm_ls::{ AcceptCompletionParams, Backend, Completion, FimParams, GetCompletionsParams, GetCompletionsResult, Ide, RejectCompletionParams, TokenizerConfig, @@ -34,6 +35,7 @@ use crate::document::Document; use crate::error::{internal_error, Error, Result}; mod backend; +mod config; mod document; mod error; mod language_id; @@ -137,6 +139,7 @@ pub struct Generation { struct LlmService { cache_dir: PathBuf, client: Client, + config: Arc, document_map: Arc>>, http_client: reqwest::Client, unsafe_http_client: reqwest::Client, @@ -643,6 +646,7 @@ impl LanguageServer for LlmService { async fn initialized(&self, _: InitializedParams) { let client = self.client.clone(); + let config = self.config.clone(); let snippet_retriever = self.snippet_retriever.clone(); let supports_progress_bar = self.supports_progress_bar.clone(); let workspace_folders = self.workspace_folders.clone(); @@ -683,6 +687,7 @@ impl LanguageServer for LlmService { tokio::select! { res = guard.build_workspace_snippets( client.clone(), + config, token, workspace_folders[0].uri.path(), ) => { @@ -884,9 +889,19 @@ async fn main() { .await .expect("failed to initialise snippet retriever"), )); + let config = Arc::new( + load_config( + cache_dir + .to_str() + .expect("cache dir path is not valid utf8"), + ) + .await + .expect("failed to load config file"), + ); let (service, socket) = LspService::build(|client| LlmService { cache_dir, client, + config, document_map: Arc::new(RwLock::new(HashMap::new())), http_client, unsafe_http_client, diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 9db6d6c..3786aef 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -1,3 +1,4 @@ +use crate::config::LlmLsConfig; use crate::error::{Error, Result}; use candle::utils::{cuda_is_available, metal_is_available}; use candle::{Device, Tensor}; @@ -258,13 +259,21 @@ impl SnippetRetriever { pub(crate) async fn build_workspace_snippets( &mut self, client: Client, + config: Arc, token: NumberOrString, workspace_root: &str, ) -> Result<()> { debug!("building workspace snippets"); let workspace_root = PathBuf::from(workspace_root); let mut files = Vec::new(); - let gitignore = Gitignore::parse(&workspace_root).ok(); + let mut gitignore = Gitignore::parse(&workspace_root).ok(); + for pattern in config.ignored_paths.iter() { + if let Some(gitignore) = gitignore.as_mut() { + if let Err(err) = gitignore.add_rule(pattern.clone()) { + error!("failed to parse pattern: {err}"); + } + }; + } client .send_notification::(ProgressParams { From 8b87df65e3a35605edf89991e1aeb46c4e306f7e Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Wed, 28 Feb 2024 16:19:17 +0100 Subject: [PATCH 19/22] feat: separate embeddings collections for each workspace --- crates/gitignore/src/lib.rs | 9 ++++ crates/llm-ls/src/config.rs | 2 +- crates/llm-ls/src/error.rs | 8 +++- crates/llm-ls/src/main.rs | 6 +-- crates/llm-ls/src/retrieval.rs | 76 ++++++++++++++++++++++++---------- 5 files changed, 73 insertions(+), 28 deletions(-) diff --git a/crates/gitignore/src/lib.rs b/crates/gitignore/src/lib.rs index 3e319e2..cc6d172 100644 --- a/crates/gitignore/src/lib.rs +++ b/crates/gitignore/src/lib.rs @@ -106,9 +106,18 @@ impl Gitignore { }) } + /// Checks if a path is ignored. + /// + /// Path can be relative within the directory which contains the `.gitignore` file. + /// + /// # Errors + /// + /// This function will return an error if the file does not exist. pub fn ignored(&self, path: impl AsRef) -> Result { let path = if path.as_ref().starts_with(&self.base_path) { path.as_ref().to_path_buf() + } else if path.as_ref().has_root() { + return Ok(false); } else { canonicalize(self.base_path.join(path))? }; diff --git a/crates/llm-ls/src/config.rs b/crates/llm-ls/src/config.rs index 6552108..db87f34 100644 --- a/crates/llm-ls/src/config.rs +++ b/crates/llm-ls/src/config.rs @@ -15,7 +15,7 @@ pub(crate) struct LlmLsConfig { impl Default for LlmLsConfig { fn default() -> Self { Self { - ignored_paths: vec![".git/".into(), ".idea/".into(), ".DS_Store/".into()], + ignored_paths: vec![".git".into(), ".idea".into(), ".DS_Store".into()], } } } diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index b0d8210..f6ac083 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -1,4 +1,4 @@ -use std::fmt::Display; +use std::{fmt::Display, path::PathBuf}; use tower_lsp::jsonrpc::Error as LspError; use tracing::error; @@ -41,6 +41,10 @@ pub enum Error { MalformattedEmbeddingMetadata(String), #[error("embedding has no metadata")] MissingMetadata, + #[error("no final path: {0}")] + NoFinalPath(PathBuf), + #[error("error converting to string")] + NonUnicode, #[error("ollama error: {0}")] Ollama(crate::backend::APIError), #[error("openai error: {0}")] @@ -69,6 +73,8 @@ pub enum Error { Tokenizer(#[from] tokenizers::Error), #[error("tokio join error: {0}")] TokioJoin(#[from] tokio::task::JoinError), + #[error("embeddings database is uninitialised")] + UninitialisedDatabase, #[error("unknown backend: {0}")] UnknownBackend(String), #[error("yaml serialization error: {0}")] diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index eb2aef6..c26d6f6 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -820,9 +820,7 @@ impl LanguageServer for LlmService { async fn shutdown(&self) -> LspResult<()> { debug!("shutdown"); if let Some(tx) = self.cancel_snippet_build_tx.write().await.take() { - if tx.send(()).is_err() { - return Err(internal_error("failed to cancel indexing")); - } + let _ = tx.send(()); } self.snippet_retriever .read() @@ -885,7 +883,7 @@ async fn main() { .expect("failed to build reqwest unsafe client"); let snippet_retriever = Arc::new(RwLock::new( - SnippetRetriever::new(cache_dir.clone(), 20, 10) + SnippetRetriever::new(cache_dir.join("database"), 20, 10) .await .expect("failed to initialise snippet retriever"), )); diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 3786aef..65bf1aa 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -187,22 +187,6 @@ fn device(cpu: bool) -> Result { } } -async fn initialse_database(cache_path: PathBuf) -> Db { - let uri = cache_path.join("database"); - let mut db = Db::open(uri).await.expect("failed to open database"); - match db - .create_collection("code-slices".to_owned(), 384, Distance::Cosine) - .await - { - Ok(_) - | Err(tinyvec_embed::error::Error::Collection( - tinyvec_embed::error::Collection::UniqueViolation, - )) => (), - Err(err) => panic!("failed to create collection: {err}"), - } - db -} - pub(crate) struct Snippet { pub(crate) file_url: String, pub(crate) code: String, @@ -230,7 +214,9 @@ impl TryFrom<&SimilarityResult> for Snippet { } pub(crate) struct SnippetRetriever { - db: Db, + cache_path: PathBuf, + collection_name: String, + db: Option, model: Arc, tokenizer: Tokenizer, window_size: usize, @@ -246,9 +232,12 @@ impl SnippetRetriever { window_size: usize, window_step: usize, ) -> Result { + let collection_name = "code-slices".to_owned(); let (model, tokenizer) = build_model_and_tokenizer().await?; Ok(Self { - db: initialse_database(cache_path).await, + cache_path, + collection_name, + db: None, model: Arc::new(model), tokenizer, window_size, @@ -256,6 +245,23 @@ impl SnippetRetriever { }) } + pub(crate) async fn initialse_database(&mut self, db_name: &str) -> Result { + let uri = self.cache_path.join(db_name); + let mut db = Db::open(uri).await.expect("failed to open database"); + match db + .create_collection(self.collection_name.clone(), 384, Distance::Cosine) + .await + { + Ok(_) + | Err(tinyvec_embed::error::Error::Collection( + tinyvec_embed::error::Collection::UniqueViolation, + )) => (), + Err(err) => panic!("failed to create collection: {err}"), + } + self.db = Some(db.clone()); + Ok(db) + } + pub(crate) async fn build_workspace_snippets( &mut self, client: Client, @@ -265,6 +271,16 @@ impl SnippetRetriever { ) -> Result<()> { debug!("building workspace snippets"); let workspace_root = PathBuf::from(workspace_root); + if self.db.is_none() { + self.initialse_database( + workspace_root + .file_name() + .ok_or_else(|| Error::NoFinalPath(workspace_root.clone()))? + .to_str() + .ok_or(Error::NonUnicode)?, + ) + .await?; + } let mut files = Vec::new(); let mut gitignore = Gitignore::parse(&workspace_root).ok(); for pattern in config.ignored_paths.iter() { @@ -356,7 +372,11 @@ impl SnippetRetriever { snippet: String, filter: Option, ) -> Result> { - let col = self.db.get_collection("code-slices").await?; + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + let col = db.get_collection(&self.collection_name).await?; let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; encoding.truncate(512, 1, TruncationDirection::Right); let query = self @@ -374,12 +394,20 @@ impl SnippetRetriever { } pub(crate) async fn stop(&self) -> Result<()> { - self.db.save().await?; + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + db.save().await?; Ok(()) } pub(crate) async fn remove(&self, file_url: String, range: Range) -> Result<()> { - let col = self.db.get_collection("code-slices").await?; + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + let col = db.get_collection(&self.collection_name).await?; col.write().await.remove(Some( Collection::filter() .comparison( @@ -429,7 +457,11 @@ impl SnippetRetriever { start: usize, end: Option, ) -> Result<()> { - let col = self.db.get_collection("code-slices").await?; + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + let col = db.get_collection("code-slices").await?; let file = tokio::fs::read_to_string(&file_url).await?; let lines = file.split('\n').collect::>(); let end = end.unwrap_or(lines.len()).min(lines.len()); From baedf8585679b4c33b075268aa5fe12a8ad1fffb Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Wed, 28 Feb 2024 17:03:06 +0100 Subject: [PATCH 20/22] feat: add `ModelConfig` to `LlmLsConfig` --- crates/llm-ls/src/config.rs | 41 +++++++++++++++++++++++++++++----- crates/llm-ls/src/main.rs | 10 ++++----- crates/llm-ls/src/retrieval.rs | 34 +++++++++++++++++++++------- 3 files changed, 66 insertions(+), 19 deletions(-) diff --git a/crates/llm-ls/src/config.rs b/crates/llm-ls/src/config.rs index db87f34..66abd0b 100644 --- a/crates/llm-ls/src/config.rs +++ b/crates/llm-ls/src/config.rs @@ -6,8 +6,28 @@ use tokio::fs::write; use crate::error::Result; +#[derive(Clone, Deserialize, Serialize)] +pub(crate) struct ModelConfig { + pub(crate) id: String, + pub(crate) revision: String, + pub(crate) embeddings_size: usize, + pub(crate) max_input_size: usize, +} + +impl Default for ModelConfig { + fn default() -> Self { + Self { + id: "intfloat/multilingual-e5-small".to_string(), + revision: "main".to_string(), + embeddings_size: 384, + max_input_size: 512, + } + } +} + #[derive(Deserialize, Serialize)] pub(crate) struct LlmLsConfig { + pub(crate) model: ModelConfig, /// .gitignore-like glob patterns to exclude from indexing pub(crate) ignored_paths: Vec, } @@ -15,22 +35,31 @@ pub(crate) struct LlmLsConfig { impl Default for LlmLsConfig { fn default() -> Self { Self { + model: ModelConfig::default(), ignored_paths: vec![".git".into(), ".idea".into(), ".DS_Store".into()], } } } -pub async fn load_config(cache_path: &str) -> Result { +/// Loads configuration from a file and environment variables. +/// +/// If the file does not exist, it will be created with the default configuration. +/// +/// # Arguments +/// +/// * `cache_path` - Path to the directory where the configuration file will be stored. +pub(crate) async fn load_config(cache_path: &str) -> Result { let config_file_path = Path::new(cache_path).join("config.yaml"); - if config_file_path.exists() { - Ok(Config::builder() + let config = if config_file_path.exists() { + Config::builder() .add_source(config::File::with_name(&format!("{cache_path}/config"))) .add_source(config::Environment::with_prefix("LLM_LS")) .build()? - .try_deserialize()?) + .try_deserialize()? } else { let config = LlmLsConfig::default(); write(config_file_path, serde_yaml::to_string(&config)?.as_bytes()).await?; - Ok(config) - } + config + }; + Ok(config) } diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index c26d6f6..0ad9ecf 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -882,11 +882,6 @@ async fn main() { .build() .expect("failed to build reqwest unsafe client"); - let snippet_retriever = Arc::new(RwLock::new( - SnippetRetriever::new(cache_dir.join("database"), 20, 10) - .await - .expect("failed to initialise snippet retriever"), - )); let config = Arc::new( load_config( cache_dir @@ -896,6 +891,11 @@ async fn main() { .await .expect("failed to load config file"), ); + let snippet_retriever = Arc::new(RwLock::new( + SnippetRetriever::new(cache_dir.join("embeddings"), config.model.clone(), 20, 10) + .await + .expect("failed to initialise snippet retriever"), + )); let (service, socket) = LspService::build(|client| LlmService { cache_dir, client, diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 65bf1aa..8860b2d 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -1,4 +1,4 @@ -use crate::config::LlmLsConfig; +use crate::config::{LlmLsConfig, ModelConfig}; use crate::error::{Error, Result}; use candle::utils::{cuda_is_available, metal_is_available}; use candle::{Device, Tensor}; @@ -138,11 +138,12 @@ fn is_code_file(file_name: &Path) -> bool { } } -async fn build_model_and_tokenizer() -> Result<(BertModel, Tokenizer)> { +async fn build_model_and_tokenizer( + model_id: String, + revision: String, +) -> Result<(BertModel, Tokenizer)> { let start = Instant::now(); let device = device(false)?; - let model_id = "intfloat/multilingual-e5-small".to_string(); - let revision = "main".to_string(); let repo = Repo::with_revision(model_id, RepoType::Model, revision); let (config_filename, tokenizer_filename, weights_filename) = { let api = Api::new()?; @@ -218,6 +219,7 @@ pub(crate) struct SnippetRetriever { collection_name: String, db: Option, model: Arc, + model_config: ModelConfig, tokenizer: Tokenizer, window_size: usize, window_step: usize, @@ -229,16 +231,20 @@ impl SnippetRetriever { /// Panics if the database cannot be initialised. pub(crate) async fn new( cache_path: PathBuf, + model_config: ModelConfig, window_size: usize, window_step: usize, ) -> Result { let collection_name = "code-slices".to_owned(); - let (model, tokenizer) = build_model_and_tokenizer().await?; + let (model, tokenizer) = + build_model_and_tokenizer(model_config.id.clone(), model_config.revision.clone()) + .await?; Ok(Self { cache_path, collection_name, db: None, model: Arc::new(model), + model_config, tokenizer, window_size, window_step, @@ -249,7 +255,11 @@ impl SnippetRetriever { let uri = self.cache_path.join(db_name); let mut db = Db::open(uri).await.expect("failed to open database"); match db - .create_collection(self.collection_name.clone(), 384, Distance::Cosine) + .create_collection( + self.collection_name.clone(), + self.model_config.embeddings_size, + Distance::Cosine, + ) .await { Ok(_) @@ -378,7 +388,11 @@ impl SnippetRetriever { }; let col = db.get_collection(&self.collection_name).await?; let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; - encoding.truncate(512, 1, TruncationDirection::Right); + encoding.truncate( + self.model_config.max_input_size, + 1, + TruncationDirection::Right, + ); let query = self .generate_embedding(encoding, self.model.clone()) .await?; @@ -495,7 +509,11 @@ impl SnippetRetriever { } let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; - encoding.truncate(512, 1, TruncationDirection::Right); + encoding.truncate( + self.model_config.max_input_size, + 1, + TruncationDirection::Right, + ); let result = self.generate_embedding(encoding, self.model.clone()).await; let embedding = match result { Ok(e) => e, From 6e3d6c0190b2192aa7ca98213d1998a254c91362 Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Wed, 28 Feb 2024 19:02:43 +0100 Subject: [PATCH 21/22] feat: add strategies for building query embedding vector --- crates/llm-ls/src/main.rs | 20 +++++++++- crates/llm-ls/src/retrieval.rs | 70 +++++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 0ad9ecf..38996dd 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -33,6 +33,7 @@ use uuid::Uuid; use crate::backend::{build_body, build_headers, parse_generations}; use crate::document::Document; use crate::error::{internal_error, Error, Result}; +use crate::retrieval::BuildFrom; mod backend; mod config; @@ -238,11 +239,21 @@ async fn build_prompt( after_line = after_iter.next(); } let before = before.into_iter().rev().collect::>().join(""); + let query = snippet_retriever + .read() + .await + .build_query( + format!("{before}{after}"), + BuildFrom::Cursor { + cursor_position: before.len(), + }, + ) + .await?; let snippets = snippet_retriever .read() .await .search( - format!("{before}{after}"), + &query, Some(FilterBuilder::new().comparison( "file_url".to_owned(), Compare::Neq, @@ -281,11 +292,16 @@ async fn build_prompt( before.push(line); } let prompt = before.into_iter().rev().collect::>().join(""); + let query = snippet_retriever + .read() + .await + .build_query(prompt.clone(), BuildFrom::End) + .await?; let snippets = snippet_retriever .read() .await .search( - prompt.clone(), + &query, Some(FilterBuilder::new().comparison( "file_url".to_owned(), Compare::Neq, diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 8860b2d..09615d5 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -251,7 +251,7 @@ impl SnippetRetriever { }) } - pub(crate) async fn initialse_database(&mut self, db_name: &str) -> Result { + pub(crate) async fn initialise_database(&mut self, db_name: &str) -> Result { let uri = self.cache_path.join(db_name); let mut db = Db::open(uri).await.expect("failed to open database"); match db @@ -282,13 +282,15 @@ impl SnippetRetriever { debug!("building workspace snippets"); let workspace_root = PathBuf::from(workspace_root); if self.db.is_none() { - self.initialse_database( + self.initialise_database(&format!( + "{}--{}", workspace_root .file_name() .ok_or_else(|| Error::NoFinalPath(workspace_root.clone()))? .to_str() .ok_or(Error::NonUnicode)?, - ) + self.model_config.id.replace('/', "--"), + )) .await?; } let mut files = Vec::new(); @@ -377,9 +379,49 @@ impl SnippetRetriever { Ok(()) } - pub(crate) async fn search( + pub(crate) async fn build_query( &self, snippet: String, + strategy: BuildFrom, + ) -> Result> { + match strategy { + BuildFrom::Start => { + let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; + encoding.truncate( + self.model_config.max_input_size, + 1, + TruncationDirection::Right, + ); + self.generate_embedding(encoding, self.model.clone()).await + } + BuildFrom::Cursor { cursor_position } => { + let (before, after) = snippet.split_at(cursor_position); + let mut before_encoding = self.tokenizer.encode(before, true)?; + let mut after_encoding = self.tokenizer.encode(after, true)?; + let share = self.model_config.max_input_size / 2; + before_encoding.truncate(share, 1, TruncationDirection::Left); + after_encoding.truncate(share, 1, TruncationDirection::Right); + before_encoding.take_overflowing(); + after_encoding.take_overflowing(); + before_encoding.merge_with(after_encoding, false); + self.generate_embedding(before_encoding, self.model.clone()) + .await + } + BuildFrom::End => { + let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; + encoding.truncate( + self.model_config.max_input_size, + 1, + TruncationDirection::Left, + ); + self.generate_embedding(encoding, self.model.clone()).await + } + } + } + + pub(crate) async fn search( + &self, + query: &[f32], filter: Option, ) -> Result> { let db = match self.db.as_ref() { @@ -387,19 +429,10 @@ impl SnippetRetriever { None => return Err(Error::UninitialisedDatabase), }; let col = db.get_collection(&self.collection_name).await?; - let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; - encoding.truncate( - self.model_config.max_input_size, - 1, - TruncationDirection::Right, - ); - let query = self - .generate_embedding(encoding, self.model.clone()) - .await?; let result = col .read() .await - .get(&query, 5, filter) + .get(query, 5, filter) .await? .iter() .map(TryInto::try_into) @@ -537,3 +570,12 @@ impl SnippetRetriever { Ok(()) } } + +pub(crate) enum BuildFrom { + Cursor { + cursor_position: usize, + }, + End, + #[allow(dead_code)] + Start, +} From 64a4c38e552dc6fba7c42da2f4f752ef53d2cd12 Mon Sep 17 00:00:00 2001 From: Wats0ns Date: Wed, 6 Mar 2024 12:25:29 +0100 Subject: [PATCH 22/22] Added batch embedding computing (#86) --------- Co-authored-by: Quentin Maire Co-authored-by: Luc Georges --- crates/llm-ls/src/error.rs | 2 + crates/llm-ls/src/retrieval.rs | 160 ++++++++++++++++++++++-------- crates/tinyvec-embed/src/db.rs | 23 +++++ crates/tinyvec-embed/src/error.rs | 2 + 4 files changed, 144 insertions(+), 43 deletions(-) diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index f6ac083..46f17f1 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -79,6 +79,8 @@ pub enum Error { UnknownBackend(String), #[error("yaml serialization error: {0}")] Yaml(#[from] serde_yaml::Error), + #[error("No embedding built")] + MissingEmbedding, } pub(crate) type Result = std::result::Result; diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs index 09615d5..6bea3f8 100644 --- a/crates/llm-ls/src/retrieval.rs +++ b/crates/llm-ls/src/retrieval.rs @@ -7,11 +7,14 @@ use candle_transformers::models::bert::{BertModel, Config, DTYPE}; use gitignore::Gitignore; use hf_hub::{api::tokio::Api, Repo, RepoType}; use std::collections::{HashMap, VecDeque}; +use std::iter::zip; use std::path::Path; use std::{path::PathBuf, sync::Arc}; use tinyvec_embed::db::{Collection, Compare, Db, Embedding, FilterBuilder, SimilarityResult}; use tinyvec_embed::similarity::Distance; -use tokenizers::{Encoding, Tokenizer, TruncationDirection}; +use tokenizers::{ + Encoding, PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer, TruncationDirection, +}; use tokio::io::AsyncReadExt; use tokio::task::spawn_blocking; use tokio::time::Instant; @@ -156,9 +159,16 @@ async fn build_model_and_tokenizer( let config = tokio::fs::read_to_string(config_filename).await?; let config: Config = serde_json::from_str(&config)?; let mut tokenizer: Tokenizer = Tokenizer::from_file(tokenizer_filename)?; - tokenizer.with_padding(None); + tokenizer.with_padding(Some(PaddingParams { + strategy: PaddingStrategy::BatchLongest, + direction: PaddingDirection::Right, + pad_to_multiple_of: Some(8), + // TODO: use values provided in model config + pad_id: 0, + pad_type_id: 0, + pad_token: "".to_string(), + })); tokenizer.with_truncation(None)?; - let vb = VarBuilder::from_pth(&weights_filename, DTYPE, &device)?; let model = BertModel::load(vb, &config)?; debug!( @@ -191,6 +201,8 @@ fn device(cpu: bool) -> Result { pub(crate) struct Snippet { pub(crate) file_url: String, pub(crate) code: String, + pub(crate) start_line: usize, + pub(crate) end_line: usize, } impl TryFrom<&SimilarityResult> for Snippet { @@ -210,7 +222,20 @@ impl TryFrom<&SimilarityResult> for Snippet { .get("snippet") .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? .inner_string()?; - Ok(Snippet { file_url, code }) + let start_line = meta + .get("start_line_no") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? + .try_into()?; + let end_line = meta + .get("start_line_no") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? + .try_into()?; + Ok(Snippet { + file_url, + code, + start_line, + end_line, + }) } } @@ -280,6 +305,7 @@ impl SnippetRetriever { workspace_root: &str, ) -> Result<()> { debug!("building workspace snippets"); + let start = Instant::now(); let workspace_root = PathBuf::from(workspace_root); if self.db.is_none() { self.initialise_database(&format!( @@ -360,7 +386,10 @@ impl SnippetRetriever { }) .await; } - + debug!( + "Built workspace snippets in {} ms", + start.elapsed().as_millis() + ); Ok(()) } @@ -384,7 +413,7 @@ impl SnippetRetriever { snippet: String, strategy: BuildFrom, ) -> Result> { - match strategy { + let result = match strategy { BuildFrom::Start => { let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; encoding.truncate( @@ -392,7 +421,8 @@ impl SnippetRetriever { 1, TruncationDirection::Right, ); - self.generate_embedding(encoding, self.model.clone()).await + self.generate_embeddings(vec![encoding], self.model.clone()) + .await? } BuildFrom::Cursor { cursor_position } => { let (before, after) = snippet.split_at(cursor_position); @@ -404,8 +434,8 @@ impl SnippetRetriever { before_encoding.take_overflowing(); after_encoding.take_overflowing(); before_encoding.merge_with(after_encoding, false); - self.generate_embedding(before_encoding, self.model.clone()) - .await + self.generate_embeddings(vec![before_encoding], self.model.clone()) + .await? } BuildFrom::End => { let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; @@ -414,9 +444,15 @@ impl SnippetRetriever { 1, TruncationDirection::Left, ); - self.generate_embedding(encoding, self.model.clone()).await + self.generate_embeddings(vec![encoding], self.model.clone()) + .await? } + }; + if result.is_empty() { + return Err(Error::MissingEmbedding); } + let mut result = result; + Ok(result.remove(0)) } pub(crate) async fn search( @@ -477,21 +513,24 @@ impl SnippetRetriever { impl SnippetRetriever { // TODO: handle overflowing in Encoding - async fn generate_embedding( + /// Embedding order is preserved and stays the same as encoding input + async fn generate_embeddings( &self, - encoding: Encoding, + encodings: Vec, model: Arc, - ) -> Result> { + ) -> Result>> { let start = Instant::now(); - let embedding = spawn_blocking(move || -> Result> { - let tokens = encoding.get_ids().to_vec(); - let token_ids = Tensor::new(&tokens[..], &model.device)?.unsqueeze(0)?; + let embedding = spawn_blocking(move || -> Result>> { + let tokens = encodings + .iter() + .map(|elem| Ok(Tensor::new(elem.get_ids().to_vec(), &model.device)?)) + .collect::>>()?; + let token_ids = Tensor::stack(&tokens, 0)?; let token_type_ids = token_ids.zeros_like()?; let embedding = model.forward(&token_ids, &token_type_ids)?; let (_n_sentence, n_tokens, _hidden_size) = embedding.dims3()?; let embedding = (embedding.sum(1)? / (n_tokens as f64))?; - let embedding = embedding.get(0)?.to_vec1::()?; - Ok(embedding) + Ok(embedding.to_vec2::()?) }) .await?; debug!("embedding generated in {} ms", start.elapsed().as_millis()); @@ -512,6 +551,8 @@ impl SnippetRetriever { let file = tokio::fs::read_to_string(&file_url).await?; let lines = file.split('\n').collect::>(); let end = end.unwrap_or(lines.len()).min(lines.len()); + let mut snippets: Vec = Vec::new(); + debug!("Building embeddings for {file_url}"); for start_line in (start..end).step_by(self.window_step) { let end_line = (start_line + self.window_size - 1).min(lines.len()); if !col @@ -538,35 +579,68 @@ impl SnippetRetriever { let window = lines[start_line..end_line].to_vec(); let snippet = window.join("\n"); if snippet.is_empty() { + debug!("snippet {file_url}[{start_line}, {end_line}] empty"); continue; } + snippets.push(Snippet { + file_url: file_url.clone().into(), + code: snippet, + start_line, + end_line, + }); + } + { + let nb_snippets = snippets.len(); + let steps = self.window_step; + debug!("Build {nb_snippets} snippets for {file_url}: {start}, {end}, {steps}"); + } - let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; - encoding.truncate( - self.model_config.max_input_size, - 1, - TruncationDirection::Right, - ); - let result = self.generate_embedding(encoding, self.model.clone()).await; - let embedding = match result { - Ok(e) => e, - Err(err) => { - error!( - "error generating embedding for {file_url}[{start_line}, {end_line}]: {err}", - ); - continue; - } - }; - col.write().await.insert(Embedding::new( - embedding, - Some(HashMap::from([ - ("file_url".to_owned(), file_url.clone().into()), - ("start_line_no".to_owned(), start_line.into()), - ("end_line_no".to_owned(), end_line.into()), - ("snippet".to_owned(), snippet.clone().into()), - ])), - ))?; + // Group by length to reduce padding effect + let snippets = spawn_blocking(|| -> Result> { + snippets.sort_unstable_by(|first, second| first.code.len().cmp(&second.code.len())); + Ok(snippets) + }) + .await?; + + // TODO: improvements to compute an efficient batch size: + // - batch size should be relative to the cumulative size of all elements in the batch, + // Set embedding_batch_size to 8 if device is GPU, use match + let embedding_batch_size = match self.model.device { + Device::Cpu => 2, + _ => 8, + }; + for batch in snippets?.chunks(embedding_batch_size) { + let batch_code = batch.iter().map(|snippet| snippet.code.clone()).collect(); + let encodings = self + .tokenizer + .encode_batch(batch_code, true)? + .iter_mut() + .map(|encoding| { + encoding.truncate(512, 1, TruncationDirection::Right); + encoding.clone() + }) + .collect(); + let results = self + .generate_embeddings(encodings, self.model.clone()) + .await?; + col.write().await.batch_insert( + zip(results, batch) + .map(|item| { + let (embedding, snippet) = item; + Embedding::new( + embedding, + Some(HashMap::from([ + ("file_url".to_owned(), snippet.file_url.clone().into()), + ("start_line_no".to_owned(), snippet.start_line.into()), + ("end_line_no".to_owned(), snippet.end_line.into()), + ("snippet".to_owned(), snippet.code.clone().into()), + ])), + ) + }) + .collect::>(), + )?; } + db.save().await?; Ok(()) } } diff --git a/crates/tinyvec-embed/src/db.rs b/crates/tinyvec-embed/src/db.rs index 8267d2b..dd20995 100644 --- a/crates/tinyvec-embed/src/db.rs +++ b/crates/tinyvec-embed/src/db.rs @@ -173,6 +173,17 @@ impl Collection { Ok(()) } + pub fn batch_insert(&mut self, embeddings: Vec) -> Result<()> { + if embeddings + .iter() + .any(|embedding| embedding.vector.len() != self.dimension) + { + return Err(CollectionError::DimensionMismatch.into()); + } + self.embeddings.extend(embeddings); + Ok(()) + } + /// Remove values matching filter. /// /// Empties the collection when `filter` is `None`. @@ -244,6 +255,18 @@ impl Value { } } +impl TryInto for &Value { + type Error = Error; + + fn try_into(self) -> Result { + if let Value::Number(n) = self { + Ok(n.clone() as usize) + } else { + Err(Error::ValueNotNumber(self.to_owned())) + } + } +} + impl From for Value { fn from(value: usize) -> Self { Self::Number(value as f32) diff --git a/crates/tinyvec-embed/src/error.rs b/crates/tinyvec-embed/src/error.rs index 6a31c86..8dc0f54 100644 --- a/crates/tinyvec-embed/src/error.rs +++ b/crates/tinyvec-embed/src/error.rs @@ -32,6 +32,8 @@ pub enum Error { InvalidFileName, #[error("io error: {0}")] Io(#[from] std::io::Error), + #[error("expected value to be a valid number, got: {0}")] + ValueNotNumber(Value), #[error("expected value to be string, got: {0}")] ValueNotString(Value), }