diff --git a/Cargo.lock b/Cargo.lock index febca6c..2e07812 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aes" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ "cfg-if", "cipher", @@ -37,11 +37,17 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" -version = "0.6.4" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540" dependencies = [ "anstyle", "anstyle-parse", @@ -53,65 +59,64 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] name = "async-trait" -version = "0.1.73" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] name = "auto_impl" -version = "1.1.0" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fee3da8ef1276b0bee5dd1c7258010d8fffd31801447323115a25560e1327b89" +checksum = "823b8bb275161044e2ac7a25879cb3e2480cb403e3943022c7c769c599b756aa" dependencies = [ - "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.49", ] [[package]] @@ -192,9 +197,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.3" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64ct" @@ -202,6 +207,15 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -210,9 +224,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" [[package]] name = "block-buffer" @@ -225,21 +239,41 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d32a994c2b3ca201d9b263612a374263f05e7adde37c4707f693dcd375076d1f" + +[[package]] +name = "bytemuck" +version = "1.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.49", +] [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "bzip2" @@ -262,6 +296,66 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "candle-core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d3c6a912d574b4ca6e14ee816a1139ee0d15b75a48291f153dd16f6c643130e" +dependencies = [ + "byteorder", + "gemm", + "half", + "memmap2", + "num-traits", + "num_cpus", + "rand 0.8.5", + "rand_distr", + "rayon", + "safetensors", + "thiserror", + "yoke", + "zip", +] + +[[package]] +name = "candle-nn" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd8f228c993467be5295a9759fc64d79e9ff9ea9080d7fc1cdf8d44b899082a" +dependencies = [ + "candle-core", + "half", + "num-traits", + "rayon", + "safetensors", + "serde", + "thiserror", +] + +[[package]] +name = "candle-transformers" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f76c29230b2dffa216e174512bf00f621eb1f4ba77718593f183828aad427d44" +dependencies = [ + "byteorder", + "candle-core", + "candle-nn", + "num-traits", + "rand 0.8.5", + "rayon", + "serde", + "serde_json", + "serde_plain", + "tracing", +] + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.83" @@ -278,6 +372,33 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -290,9 +411,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.7" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" dependencies = [ "clap_builder", "clap_derive", @@ -300,33 +421,33 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.7" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim", + "strsim 0.11.0", ] [[package]] name = "clap_derive" -version = "4.4.7" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" @@ -334,6 +455,32 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "config" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7328b20597b53c2454f0b1919720c25c7339051c02b72b7e05409e00b14132be" +dependencies = [ + "lazy_static", + "nom", + "pathdiff", + "serde", + "yaml-rust", +] + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.52.0", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -342,9 +489,9 @@ checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -352,70 +499,105 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" dependencies = [ - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "crypto-common" @@ -457,7 +639,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", + "strsim 0.10.0", "syn 1.0.109", ] @@ -479,7 +661,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.0", + "hashbrown", "lock_api", "once_cell", "parking_lot_core", @@ -487,9 +669,12 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] [[package]] name = "derive_builder" @@ -533,11 +718,48 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "dyn-stack" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" +dependencies = [ + "bytemuck", + "reborrow", +] + [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" + +[[package]] +name = "encode_unicode" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" @@ -548,6 +770,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.49", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -556,12 +790,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.5" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -578,9 +812,9 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -609,18 +843,24 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "futures" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -633,9 +873,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -643,15 +883,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -660,38 +900,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -705,6 +945,124 @@ dependencies = [ "slab", ] +[[package]] +name = "gemm" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" +dependencies = [ + "dyn-stack", + "gemm-c32", + "gemm-c64", + "gemm-common", + "gemm-f16", + "gemm-f32", + "gemm-f64", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" +dependencies = [ + "bytemuck", + "dyn-stack", + "half", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp", + "raw-cpuid", + "rayon", + "seq-macro", + "sysctl", +] + +[[package]] +name = "gemm-f16" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" +dependencies = [ + "dyn-stack", + "gemm-common", + "gemm-f32", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -717,9 +1075,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -728,15 +1086,30 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.0" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "gitignore" +version = "0.1.0" +dependencies = [ + "glob", + "tempdir", + "thiserror", +] + +[[package]] +name = "glob" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -744,7 +1117,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap", "slab", "tokio", "tokio-util", @@ -752,16 +1125,24 @@ dependencies = [ ] [[package]] -name = "hashbrown" -version = "0.12.3" +name = "half" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "rand 0.8.5", + "rand_distr", +] [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" [[package]] name = "heck" @@ -771,9 +1152,30 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" + +[[package]] +name = "hf-hub" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732" +dependencies = [ + "dirs", + "futures", + "indicatif", + "log", + "native-tls", + "num_cpus", + "rand 0.8.5", + "reqwest", + "serde", + "serde_json", + "thiserror", + "tokio", + "ureq", +] [[package]] name = "hmac" @@ -786,18 +1188,18 @@ dependencies = [ [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "http" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" dependencies = [ "bytes", "fnv", @@ -806,9 +1208,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http", @@ -829,9 +1231,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", @@ -844,7 +1246,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2", "tokio", "tower-service", "tracing", @@ -853,14 +1255,14 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http", "hyper", - "rustls", + "rustls 0.21.10", "tokio", "tokio-rustls", ] @@ -886,9 +1288,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -896,22 +1298,25 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ - "autocfg", - "hashbrown 0.12.3", + "equivalent", + "hashbrown", ] [[package]] -name = "indexmap" -version = "2.0.1" +name = "indicatif" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" dependencies = [ - "equivalent", - "hashbrown 0.14.0", + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", ] [[package]] @@ -923,11 +1328,40 @@ dependencies = [ "generic-array", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + [[package]] name = "ipnet" -version = "2.8.0" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] [[package]] name = "itertools" @@ -938,26 +1372,35 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] @@ -970,28 +1413,61 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.147" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libredox" +version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.2", + "libc", + "redox_syscall", +] + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "llm-ls" version = "0.5.2" dependencies = [ + "candle-core", + "candle-nn", + "candle-transformers", "clap", + "config", "custom-types", + "futures-util", + "gitignore", + "hf-hub", "home", + "lsp-client", "reqwest", "ropey", "serde", "serde_json", + "serde_yaml", "thiserror", + "tinyvec-embed", "tokenizers", "tokio", "tower-lsp", @@ -1003,6 +1479,7 @@ dependencies = [ "tree-sitter-c", "tree-sitter-c-sharp", "tree-sitter-cpp", + "tree-sitter-css", "tree-sitter-elixir", "tree-sitter-erlang", "tree-sitter-go", @@ -1014,6 +1491,7 @@ dependencies = [ "tree-sitter-lua", "tree-sitter-md", "tree-sitter-objc", + "tree-sitter-php", "tree-sitter-python", "tree-sitter-r", "tree-sitter-ruby", @@ -1026,9 +1504,9 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -1091,23 +1569,24 @@ dependencies = [ [[package]] name = "matchit" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed1202b2a6f884ae56f04cff409ab315c5ce26b5e58d7412e484f01fd52f52ef" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" [[package]] name = "memchr" -version = "2.6.3" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] -name = "memoffset" -version = "0.9.0" +name = "memmap2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" dependencies = [ - "autocfg", + "libc", + "stable_deref_trait", ] [[package]] @@ -1124,22 +1603,22 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "wasi", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1153,9 +1632,9 @@ dependencies = [ [[package]] name = "monostate" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f370ae88093ec6b11a710dec51321a61d420fafd1bad6e30d01bd9c920e8ee" +checksum = "878c2a1f1c70e5724fa28f101ca787b6a7e8ad5c5e4ae4ca3b0fa4a419fa9075" dependencies = [ "monostate-impl", "serde", @@ -1163,13 +1642,13 @@ dependencies = [ [[package]] name = "monostate-impl" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce" +checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -1210,6 +1689,32 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "bytemuck", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -1220,20 +1725,26 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "onig" @@ -1257,13 +1768,19 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "cfg-if", "foreign-types", "libc", @@ -1280,7 +1797,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -1291,9 +1808,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae" dependencies = [ "cc", "libc", @@ -1301,6 +1818,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "overload" version = "0.1.1" @@ -1319,15 +1842,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -1337,7 +1860,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", - "rand_core", + "rand_core 0.6.4", "subtle", ] @@ -1347,6 +1870,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "pathdiff" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" + [[package]] name = "pbkdf2" version = "0.11.0" @@ -1361,28 +1890,28 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pin-project" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -1399,9 +1928,49 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "portable-atomic" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" @@ -1410,45 +1979,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "proc-macro2" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", + "unicode-ident", ] [[package]] -name = "proc-macro-error-attr" -version = "1.0.4" +name = "pulp" +version = "0.18.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "091bad01115892393939669b38f88ff2b70838e969a7ac172a9d06d05345a732" dependencies = [ - "proc-macro2", - "quote", - "version_check", + "bytemuck", + "libm", + "num-complex", + "reborrow", ] [[package]] -name = "proc-macro2" -version = "1.0.66" +name = "quote" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ - "unicode-ident", + "proc-macro2", ] [[package]] -name = "quote" -version = "1.0.33" +name = "rand" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" dependencies = [ - "proc-macro2", + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", ] [[package]] @@ -1459,7 +2029,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -1469,9 +2039,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", ] +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.6.4" @@ -1481,11 +2066,30 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rayon" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -1498,39 +2102,65 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" dependencies = [ "either", - "itertools", + "itertools 0.11.0", "rayon", ] [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + [[package]] name = "redox_syscall" -version = "0.3.5" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_users" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" -version = "1.9.5" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.3.8", - "regex-syntax 0.7.5", + "regex-automata 0.4.5", + "regex-syntax 0.8.2", ] [[package]] @@ -1544,13 +2174,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.8" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] @@ -1561,17 +2191,26 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.5" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] [[package]] name = "reqwest" -version = "0.11.20" +version = "0.11.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" +checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" dependencies = [ - "base64 0.21.3", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -1590,11 +2229,13 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.21.10", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", + "sync_wrapper", + "system-configuration", "tokio", "tokio-native-tls", "tokio-rustls", @@ -1605,30 +2246,30 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots", + "webpki-roots 0.25.4", "winreg", ] [[package]] name = "ring" -version = "0.16.20" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", + "cfg-if", + "getrandom", "libc", - "once_cell", "spin", "untrusted", - "web-sys", - "winapi", + "windows-sys 0.52.0", ] [[package]] name = "ropey" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ce7a2c43a32e50d666e33c5a80251b31147bb4b49024bcab11fb6f20c671ed" +checksum = "93411e420bcd1a75ddd1dc3caf18c23155eda2c090631a85af21ba19e97093b5" dependencies = [ "smallvec", "str_indices", @@ -1642,45 +2283,76 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" -version = "0.38.13" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "rustls" -version = "0.21.7" +version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", "ring", - "rustls-webpki", + "rustls-webpki 0.101.7", "sct", ] +[[package]] +name = "rustls" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" +dependencies = [ + "log", + "ring", + "rustls-pki-types", + "rustls-webpki 0.102.2", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" -version = "1.0.3" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + +[[package]] +name = "rustls-pki-types" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "048a63e5b3ac996d78d402940b5fa47973d2d080c6c6fffa1d0f19c4445310b7" + +[[package]] +name = "rustls-webpki" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "base64 0.21.3", + "ring", + "untrusted", ] [[package]] name = "rustls-webpki" -version = "0.101.4" +version = "0.102.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d" +checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" dependencies = [ "ring", + "rustls-pki-types", "untrusted", ] @@ -1692,17 +2364,36 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "safetensors" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d980e6bfb34436fb0a81e42bc41af43f11805bbbca443e7f68e9faaabe669ed" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "same-file" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] [[package]] name = "schannel" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1713,9 +2404,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ "ring", "untrusted", @@ -1744,56 +2435,71 @@ dependencies = [ "libc", ] +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + [[package]] name = "serde" -version = "1.0.188" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] name = "serde_json" -version = "1.0.105" +version = "1.0.113" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" +checksum = "ebd154a240de39fdebcf5775d2675c204d7c13cf39a4c697be6493c8e734337c" dependencies = [ "itoa", - "ryu", "serde", ] [[package]] -name = "serde_path_to_error" -version = "0.1.14" +name = "serde_plain" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335" +checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50" dependencies = [ - "itoa", "serde", ] [[package]] name = "serde_repr" -version = "0.1.16" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" +checksum = "0b2e6b945e9d3df726b65d6ee24060aff8e3533d431f677a9695db04eff9dfdb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -1810,11 +2516,11 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.25" +version = "0.9.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" +checksum = "8fd075d994154d4a774f95b51fb96bdc2832b0ea48425c92546073816cda1f2f" dependencies = [ - "indexmap 2.0.1", + "indexmap", "itoa", "ryu", "serde", @@ -1845,9 +2551,9 @@ dependencies = [ [[package]] name = "sharded-slab" -version = "0.1.4" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" dependencies = [ "lazy_static", ] @@ -1872,35 +2578,25 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" - -[[package]] -name = "socket2" -version = "0.4.9" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" -dependencies = [ - "libc", - "winapi", -] +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "socket2" -version = "0.5.3" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "spin" -version = "0.5.2" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "spm_precompiled" @@ -1914,11 +2610,17 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "str_indices" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f026164926842ec52deb1938fae44f83dfdb82d0a5b0270c5bd5935ab74d6dd" +checksum = "e9557cb6521e8d009c51a8666f09356f4b817ba9ba0981a305bd86aee47bd35c" [[package]] name = "strsim" @@ -1926,6 +2628,12 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + [[package]] name = "subtle" version = "2.5.0" @@ -1945,9 +2653,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.31" +version = "2.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" +checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496" dependencies = [ "proc-macro2", "quote", @@ -1960,17 +2668,72 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.49", +] + +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.4.2", + "byteorder", + "enum-as-inner", + "libc", + "thiserror", + "walkdir", +] + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" -version = "3.8.0" +version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", "rustix", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1985,7 +2748,7 @@ dependencies = [ "home", "lsp-client", "lsp-types", - "rand", + "rand 0.8.5", "reqwest", "ropey", "serde", @@ -2003,22 +2766,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -2033,12 +2796,14 @@ dependencies = [ [[package]] name = "time" -version = "0.3.28" +version = "0.3.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" dependencies = [ "deranged", "itoa", + "num-conv", + "powerfmt", "serde", "time-core", "time-macros", @@ -2046,19 +2811,30 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.14" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" dependencies = [ + "num-conv", "time-core", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -2068,6 +2844,20 @@ dependencies = [ "tinyvec_macros", ] +[[package]] +name = "tinyvec-embed" +version = "0.1.0" +dependencies = [ + "bincode", + "criterion", + "serde", + "tempfile", + "thiserror", + "tokio", + "tracing", + "uuid", +] + [[package]] name = "tinyvec_macros" version = "0.1.1" @@ -2076,26 +2866,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokenizers" -version = "0.15.1" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db445cceba5dfeb0f9702be7d6bfd91801ddcbe8fe8722defe7f2e96da75812" +checksum = "3dd47962b0ba36e7fd33518fbf1754d136fd1474000162bbf2a8b5fcb2d3654d" dependencies = [ "aho-corasick", "derive_builder", "esaxx-rs", "getrandom", - "itertools", + "itertools 0.12.1", "lazy_static", "log", "macro_rules_attribute", "monostate", "onig", "paste", - "rand", + "rand 0.8.5", "rayon", "rayon-cond", "regex", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", "serde", "serde_json", "spm_precompiled", @@ -2107,9 +2897,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.32.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", @@ -2119,20 +2909,20 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.3", + "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -2151,15 +2941,15 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.10", "tokio", ] [[package]] name = "tokio-util" -version = "0.7.8" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -2223,7 +3013,7 @@ checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] @@ -2234,11 +3024,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -2247,31 +3036,32 @@ dependencies = [ [[package]] name = "tracing-appender" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d48f71a791638519505cefafe162606f706c25592e4bde4d97600c0195312e" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" dependencies = [ "crossbeam-channel", + "thiserror", "time", "tracing-subscriber", ] [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", ] [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", "valuable", @@ -2279,12 +3069,12 @@ dependencies = [ [[package]] name = "tracing-log" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ - "lazy_static", "log", + "once_cell", "tracing-core", ] @@ -2300,9 +3090,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ "matchers", "nu-ansi-term", @@ -2331,9 +3121,9 @@ dependencies = [ [[package]] name = "tree-sitter-bash" -version = "0.20.3" +version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "096f57b3b44c04bfc7b21a4da44bfa16adf1f88aba18993b8478a091076d0968" +checksum = "57da2032c37eb2ce29fd18df7d3b94355fec8d6d854d8f80934955df542b5906" dependencies = [ "cc", "tree-sitter", @@ -2341,9 +3131,9 @@ dependencies = [ [[package]] name = "tree-sitter-c" -version = "0.20.6" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b03bdf218020057abee831581a74bff8c298323d6c6cd1a70556430ded9f4b" +checksum = "4bbd5f3d8658c08581f8f2adac6c391c2e9fa00fe9246bf6c5f52213b9cc6b72" dependencies = [ "cc", "tree-sitter", @@ -2361,9 +3151,19 @@ dependencies = [ [[package]] name = "tree-sitter-cpp" -version = "0.20.3" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46b04a5ada71059afb9895966a6cc1094acc8d2ea1971006db26573e7dfebb74" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-css" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b4b625f46a7370544b9cf0545532c26712ae49bfc02eb09825db358b9f79e1" +checksum = "c3306ddefa1d2681adda2613d11974ffabfbeb215e23235da6c862f3493a04fd" dependencies = [ "cc", "tree-sitter", @@ -2371,9 +3171,9 @@ dependencies = [ [[package]] name = "tree-sitter-elixir" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e" +checksum = "1bc0b1f3e6d9f12ca22ae5171f32fd154e3aea29dff565d05ef785c28931415b" dependencies = [ "cc", "tree-sitter", @@ -2421,9 +3221,9 @@ dependencies = [ [[package]] name = "tree-sitter-javascript" -version = "0.20.1" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7" +checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0" dependencies = [ "cc", "tree-sitter", @@ -2431,9 +3231,9 @@ dependencies = [ [[package]] name = "tree-sitter-json" -version = "0.20.1" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d82d2e33ee675dc71289e2ace4f8f9cf96d36d81400e9dae5ea61edaf5dea6" +checksum = "5a9a38a9c679b55cc8d17350381ec08d69fa1a17a53fcf197f344516e485ed4d" dependencies = [ "cc", "tree-sitter", @@ -2461,9 +3261,9 @@ dependencies = [ [[package]] name = "tree-sitter-md" -version = "0.1.5" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a237fa10f6b466b76c783c79b08cc172581e547ef1dbb6ddf1f8b4e230157e1" +checksum = "3c20d3ef8d202430b644a307e6299d84bf8ed87fa1b796e4638f8805a595060c" dependencies = [ "cc", "tree-sitter", @@ -2479,6 +3279,16 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-php" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a7d882bc077192da2995bbe25c293cc596f036129b32c6c94752109dc482b8b" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-python" version = "0.20.4" @@ -2501,9 +3311,9 @@ dependencies = [ [[package]] name = "tree-sitter-ruby" -version = "0.20.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ac30cbb1560363ae76e1ccde543d6d99087421e228cc47afcec004b86bb711a" +checksum = "44d50ef383469df8485f024c5fb01faced8cb90368192a7ba02605b43b2427fe" dependencies = [ "cc", "tree-sitter", @@ -2541,9 +3351,9 @@ dependencies = [ [[package]] name = "tree-sitter-typescript" -version = "0.20.3" +version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75049f0aafabb2aac205d7bb24da162b53dcd0cfb326785f25a2f32efa8071a" +checksum = "c8bc1d2c24276a48ef097a71b56888ac9db63717e8f8d0b324668a27fd619670" dependencies = [ "cc", "tree-sitter", @@ -2551,9 +3361,9 @@ dependencies = [ [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" @@ -2563,15 +3373,15 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" @@ -2593,9 +3403,15 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode-width" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unicode_categories" @@ -2605,21 +3421,41 @@ checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" [[package]] name = "unsafe-libyaml" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" +checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" [[package]] name = "untrusted" -version = "0.7.1" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35" +dependencies = [ + "base64 0.21.7", + "flate2", + "log", + "native-tls", + "once_cell", + "rustls 0.22.2", + "rustls-pki-types", + "rustls-webpki 0.102.2", + "serde", + "serde_json", + "url", + "webpki-roots 0.26.1", +] [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -2635,13 +3471,25 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.5.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", - "rand", + "rand 0.8.5", "serde", + "uuid-macro-internal", +] + +[[package]] +name = "uuid-macro-internal" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7abb14ae1a50dad63eaa768a458ef43d298cd1bd44951677bd10b732a9ba2a2d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.49", ] [[package]] @@ -2662,6 +3510,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2679,9 +3537,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2689,24 +3547,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "877b9c3f61ceea0e56331985743b13f3d25c406a7098d45180fb5f09bc19ed97" dependencies = [ "cfg-if", "js-sys", @@ -2716,9 +3574,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2726,28 +3584,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.31", + "syn 2.0.49", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "wasm-streams" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" dependencies = [ "futures-util", "js-sys", @@ -2758,9 +3616,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", @@ -2768,9 +3626,18 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.2" +version = "0.25.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" + +[[package]] +name = "webpki-roots" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" +checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +dependencies = [ + "rustls-pki-types", +] [[package]] name = "winapi" @@ -2788,6 +3655,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2800,7 +3676,16 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] @@ -2809,13 +3694,28 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] @@ -2824,42 +3724,84 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "winreg" version = "0.50.0" @@ -2867,29 +3809,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ "cfg-if", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "write-json" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06069a848f95fceae3e5e03c0ddc8cb78452b56654ee0c8e68f938cf790fb9e3" +checksum = "23f6174b2566cc4a74f95e1367ec343e7fa80c93cc8087f5c4a3d6a1088b2118" [[package]] name = "xflags" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4554b580522d0ca238369c16b8f6ce34524d61dafe7244993754bbd05f2c2ea" +checksum = "7d9e15fbb3de55454b0106e314b28e671279009b363e6f1d8e39fdc3bf048944" dependencies = [ "xflags-macros", ] [[package]] name = "xflags-macros" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58e7b3ca8977093aae6b87b6a7730216fc4c53a6530bab5c43a783cd810c1a8" +checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155" [[package]] name = "xshell" @@ -2919,6 +3861,66 @@ dependencies = [ "zip", ] +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "yoke" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65e71b2e4f287f467794c671e2b8f8a5f3716b3c829079a1c44740148eff07e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.49", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.49", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" + [[package]] name = "zip" version = "0.6.6" diff --git a/crates/gitignore/Cargo.toml b/crates/gitignore/Cargo.toml new file mode 100644 index 0000000..cc6e292 --- /dev/null +++ b/crates/gitignore/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "gitignore" +version = "0.1.0" +edition.workspace = true +license.workspace = true +authors.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +glob = "0.3" +thiserror = "1" + +[dev-dependencies] +tempdir = "0.3" diff --git a/crates/gitignore/src/lib.rs b/crates/gitignore/src/lib.rs new file mode 100644 index 0000000..cc6d172 --- /dev/null +++ b/crates/gitignore/src/lib.rs @@ -0,0 +1,284 @@ +use std::{ + fmt::Debug, + fs::{canonicalize, File}, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, +}; + +use glob::{MatchOptions, Pattern}; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("non utf8 path")] + NonUtf8Path, + #[error("path has no parent folder")] + NoParent, + #[error("glob pattern error: {0}")] + Pattern(#[from] glob::PatternError), +} + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub struct Rule { + negate: bool, + patterns: Vec, + _source_line: usize, +} + +impl Rule { + pub fn parse( + mut pattern: String, + base_path: impl AsRef, + _source_line: usize, + ) -> Result> { + let mut patterns = vec![]; + if pattern.trim().is_empty() || pattern.starts_with('#') { + return Ok(None); + } + pattern = pattern.trim_start().to_owned(); + let negate = if pattern.starts_with('!') { + pattern.remove(0); + true + } else { + false + }; + let directory = if pattern.ends_with('/') { + pattern.pop(); + true + } else { + false + }; + let anchored = pattern.contains('/'); + if pattern.starts_with('/') { + pattern.remove(0); + } + let base_path_str = base_path.as_ref().to_str().ok_or(Error::NonUtf8Path)?; + let base_pattern = if anchored || pattern.starts_with("**") { + format!("{base_path_str}/{pattern}") + } else { + format!("{base_path_str}/**/{pattern}") + }; + patterns.push(Pattern::new(&format!("{base_pattern}/**"))?); + if !directory { + patterns.push(Pattern::new(&base_pattern)?); + } + Ok(Some(Self { + negate, + patterns, + _source_line, + })) + } +} + +#[derive(Debug)] +pub struct Gitignore { + base_path: PathBuf, + rules: Vec, + _source_file: PathBuf, +} + +impl Gitignore { + /// Parses a `.gitignore` file at `path`. + /// + /// If `path` is a directory, attempts to read `{dir}/.gitignore`. + pub fn parse(path: impl AsRef) -> Result { + let mut path = canonicalize(path)?; + if path.is_dir() { + path = path.join(".gitignore"); + } + let reader = BufReader::new(File::open(&path)?); + let mut rules = Vec::new(); + for (line_nb, line) in reader.lines().enumerate() { + let line = line?; + if let Some(rule) = + Rule::parse(line, path.parent().ok_or(Error::NoParent)?, line_nb + 1)? + { + rules.push(rule); + } + } + Ok(Self { + base_path: path.parent().ok_or(Error::NoParent)?.to_path_buf(), + rules, + _source_file: path, + }) + } + + /// Checks if a path is ignored. + /// + /// Path can be relative within the directory which contains the `.gitignore` file. + /// + /// # Errors + /// + /// This function will return an error if the file does not exist. + pub fn ignored(&self, path: impl AsRef) -> Result { + let path = if path.as_ref().starts_with(&self.base_path) { + path.as_ref().to_path_buf() + } else if path.as_ref().has_root() { + return Ok(false); + } else { + canonicalize(self.base_path.join(path))? + }; + let match_opts = MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, + }; + let path_str = path.to_str().ok_or(Error::NonUtf8Path)?; + let to_match = if path.is_dir() { + format!("{path_str}/") + } else { + path_str.to_owned() + }; + for rule in &self.rules { + for pattern in rule.patterns.iter() { + // TODO: handle negation properly + // negation should include + if rule.negate { + continue; + } + if pattern.matches_with(&to_match, match_opts) { + return Ok(true); + } + } + } + Ok(false) + } + + /// Add ad hoc rule from a pattern + pub fn add_rule(&mut self, pattern: String) -> Result<()> { + if let Some(rule) = Rule::parse(pattern, &self.base_path, usize::MAX)? { + self.rules.push(rule); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::io::Write; + + use tempdir::TempDir; + + use super::*; + + fn create_gitignore(rules: &str, name: &str) -> (TempDir, Gitignore) { + let temp_dir = TempDir::new(name).unwrap(); + std::fs::File::create(temp_dir.path().join("LICENSE")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("config")).unwrap(); + std::fs::File::create(temp_dir.path().join("config.yaml")).unwrap(); + std::fs::File::create(temp_dir.path().join("Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("README.md")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("xtask")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/gitignore")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/gitignore/Cargo.toml")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/llm-ls/src")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/llm-ls/config")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/llm-ls/config.yaml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/llm-ls/Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/llm-ls/src/main.rs")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/lsp-client/src")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/lsp-client/Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/lsp-client/src/lib.rs")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/mock_server")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/mock_server/Cargo.toml")).unwrap(); + std::fs::create_dir_all(temp_dir.path().join("crates/testbed/src")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/testbed/Cargo.toml")).unwrap(); + std::fs::File::create(temp_dir.path().join("crates/testbed/src/main.rs")).unwrap(); + std::fs::create_dir_all( + temp_dir + .path() + .join("crates/testbed/repositories/simple/src"), + ) + .unwrap(); + std::fs::File::create( + temp_dir + .path() + .join("crates/testbed/repositories/simple/src/main.rs"), + ) + .unwrap(); + let gitignore_path = temp_dir.path().join(name); + std::fs::File::create(&gitignore_path) + .unwrap() + .write_all(rules.as_bytes()) + .unwrap(); + let gitignore = Gitignore::parse(gitignore_path).unwrap(); + (temp_dir, gitignore) + } + + #[test] + fn test_regular_relative_pattern() { + let (_temp_dir, gitignore) = create_gitignore("Cargo.toml", "regular_relative_pattern"); + assert!(gitignore.ignored("Cargo.toml").unwrap()); + assert!(!gitignore.ignored("LICENSE").unwrap()); + } + + #[test] + fn test_glob_pattern() { + let (_temp_dir, gitignore) = create_gitignore("crates/**/Cargo.toml", "glob_pattern"); + assert!(gitignore.ignored("crates/gitignore/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/lsp-client/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/mock_server/Cargo.toml").unwrap()); + assert!(gitignore.ignored("crates/testbed/Cargo.toml").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/src/main.rs").unwrap()); + assert!(!gitignore.ignored("crates/lsp-client/src/lib.rs").unwrap()); + assert!(!gitignore.ignored("crates/testbed/src/main.rs").unwrap()); + } + + #[test] + fn test_dir_start_glob_pattern() { + let (_temp_dir, gitignore) = create_gitignore("**/crates/", "start_glob_pattern"); + assert!(gitignore.ignored("crates/").unwrap()); + assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(gitignore + .ignored("crates/testbed/repositories/simple/src/main.rs") + .unwrap()); + assert!(!gitignore.ignored("xtask/").unwrap()); + assert!(!gitignore.ignored("README.md").unwrap()); + } + + #[test] + fn test_dir_relative_path() { + let (_temp_dir, gitignore) = create_gitignore("crates/", "relative_path"); + assert!(gitignore.ignored("crates/").unwrap()); + assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap()); + assert!(gitignore + .ignored("crates/testbed/repositories/simple/src/main.rs") + .unwrap()); + assert!(!gitignore.ignored("xtask/").unwrap()); + assert!(!gitignore.ignored("README.md").unwrap()); + } + + // TODO: + // #[test] + // fn test_negate_pattern() { + // let (_temp_dir, gitignore) = create_gitignore( + // "aaa/*\n\ + // !aaa/Cargo.toml", + // "negate_pattern", + // ); + // assert!(!gitignore.ignored("aaa/Cargo.toml").unwrap()); + // assert!(gitignore.ignored("aaa/config.yaml").unwrap()); + // } + + #[test] + fn test_ad_hoc_rule_add() { + let (_temp_dir, mut gitignore) = create_gitignore("!Cargo.toml", "adhoc_add"); + assert!(!gitignore.ignored("config.yaml").unwrap()); + assert!(!gitignore.ignored("Cargo.toml").unwrap()); + gitignore.add_rule("config.yaml".to_owned()).unwrap(); + assert!(gitignore.ignored("config.yaml").unwrap()); + } + + #[test] + fn test_anchored_file_or_dir() { + let (_temp_dir, gitignore) = create_gitignore("/config*", "anchored_file_or_dir"); + assert!(gitignore.ignored("config.yaml").unwrap()); + assert!(gitignore.ignored("config").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/config.yaml").unwrap()); + assert!(!gitignore.ignored("crates/llm-ls/config").unwrap()); + } +} diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml index 72f1f5b..a5b386b 100644 --- a/crates/llm-ls/Cargo.toml +++ b/crates/llm-ls/Cargo.toml @@ -7,8 +7,15 @@ edition = "2021" name = "llm-ls" [dependencies] +candle = { version = "0.4", package = "candle-core", default-features = false } +candle-nn = "0.4" +candle-transformers = "0.4" clap = { version = "4", features = ["derive"] } +config = { version = "0.14", features = ["yaml"], default_features = false } custom-types = { path = "../custom-types" } +futures-util = "0.3" +gitignore = { path = "../gitignore" } +hf-hub = { version = "0.3", features = ["tokio"] } home = "0.5" ropey = { version = "1.6", default-features = false, features = [ "simd", @@ -19,8 +26,10 @@ reqwest = { version = "0.11", default-features = false, features = [ "rustls-tls", ] } serde = { version = "1", features = ["derive"] } +serde_yaml = "0.9" serde_json = "1" thiserror = "1" +tinyvec-embed = { path = "../tinyvec-embed" } tokenizers = { version = "0.15", default-features = false, features = ["onig"] } tokio = { version = "1", features = [ "fs", @@ -38,6 +47,7 @@ tree-sitter-bash = "0.20" tree-sitter-c = "0.20" tree-sitter-cpp = "0.20" tree-sitter-c-sharp = "0.20" +tree-sitter-css = "0.20" tree-sitter-elixir = "0.1" tree-sitter-erlang = "0.4" tree-sitter-go = "0.20" @@ -49,6 +59,7 @@ tree-sitter-kotlin = "0.3.1" tree-sitter-lua = "0.0.19" tree-sitter-md = "0.1" tree-sitter-objc = "3" +tree-sitter-php = "0.22" tree-sitter-python = "0.20" tree-sitter-r = "0.19" tree-sitter-ruby = "0.20" @@ -60,3 +71,6 @@ tree-sitter-typescript = "0.20" [dependencies.uuid] version = "1.4" features = ["v4", "fast-rng", "serde"] + +[dev-dependencies] +lsp-client = { path = "../lsp-client" } diff --git a/crates/llm-ls/src/config.rs b/crates/llm-ls/src/config.rs new file mode 100644 index 0000000..66abd0b --- /dev/null +++ b/crates/llm-ls/src/config.rs @@ -0,0 +1,65 @@ +use std::path::Path; + +use config::Config; +use serde::{Deserialize, Serialize}; +use tokio::fs::write; + +use crate::error::Result; + +#[derive(Clone, Deserialize, Serialize)] +pub(crate) struct ModelConfig { + pub(crate) id: String, + pub(crate) revision: String, + pub(crate) embeddings_size: usize, + pub(crate) max_input_size: usize, +} + +impl Default for ModelConfig { + fn default() -> Self { + Self { + id: "intfloat/multilingual-e5-small".to_string(), + revision: "main".to_string(), + embeddings_size: 384, + max_input_size: 512, + } + } +} + +#[derive(Deserialize, Serialize)] +pub(crate) struct LlmLsConfig { + pub(crate) model: ModelConfig, + /// .gitignore-like glob patterns to exclude from indexing + pub(crate) ignored_paths: Vec, +} + +impl Default for LlmLsConfig { + fn default() -> Self { + Self { + model: ModelConfig::default(), + ignored_paths: vec![".git".into(), ".idea".into(), ".DS_Store".into()], + } + } +} + +/// Loads configuration from a file and environment variables. +/// +/// If the file does not exist, it will be created with the default configuration. +/// +/// # Arguments +/// +/// * `cache_path` - Path to the directory where the configuration file will be stored. +pub(crate) async fn load_config(cache_path: &str) -> Result { + let config_file_path = Path::new(cache_path).join("config.yaml"); + let config = if config_file_path.exists() { + Config::builder() + .add_source(config::File::with_name(&format!("{cache_path}/config"))) + .add_source(config::Environment::with_prefix("LLM_LS")) + .build()? + .try_deserialize()? + } else { + let config = LlmLsConfig::default(); + write(config_file_path, serde_yaml::to_string(&config)?.as_bytes()).await?; + config + }; + Ok(config) +} diff --git a/crates/llm-ls/src/document.rs b/crates/llm-ls/src/document.rs index 7ac6af4..16d3c7d 100644 --- a/crates/llm-ls/src/document.rs +++ b/crates/llm-ls/src/document.rs @@ -123,7 +123,7 @@ fn get_parser(language_id: LanguageId) -> Result { parser.set_language(tree_sitter_typescript::language_tsx())?; Ok(parser) } - LanguageId::Unknown => Ok(Parser::new()), + _ => Ok(Parser::new()), } } @@ -148,7 +148,11 @@ impl Document { }) } - pub(crate) async fn change(&mut self, range: Range, text: &str) -> Result<()> { + pub(crate) async fn change( + &mut self, + range: Range, + text: &str, + ) -> Result<(usize, usize, usize)> { let start_idx = get_position_idx( &self.text, range.start.line as usize, @@ -215,6 +219,10 @@ impl Document { tree.edit(&edit); } self.tree = self.parser.parse(self.text.to_string(), self.tree.as_ref()); - Ok(()) + Ok(( + start_position.row, + old_end_position.row, + new_end_position.row, + )) } } diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs index 6812cf8..46f17f1 100644 --- a/crates/llm-ls/src/error.rs +++ b/crates/llm-ls/src/error.rs @@ -1,4 +1,4 @@ -use std::fmt::Display; +use std::{fmt::Display, path::PathBuf}; use tower_lsp::jsonrpc::Error as LspError; use tracing::error; @@ -15,6 +15,14 @@ pub(crate) fn internal_error(err: E) -> LspError { #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("candle error: {0}")] + Candle(#[from] candle::Error), + #[error("config error: {0}")] + Config(#[from] config::ConfigError), + #[error("gitignore error: {0}")] + Gitignore(#[from] gitignore::Error), + #[error("huggingface api error: {0}")] + HfApi(#[from] hf_hub::api::tokio::ApiError), #[error("http error: {0}")] Http(#[from] reqwest::Error), #[error("io error: {0}")] @@ -29,6 +37,14 @@ pub enum Error { InvalidRepositoryId, #[error("invalid tokenizer path")] InvalidTokenizerPath, + #[error("malformatted embedding metadata, missing {0} field")] + MalformattedEmbeddingMetadata(String), + #[error("embedding has no metadata")] + MissingMetadata, + #[error("no final path: {0}")] + NoFinalPath(PathBuf), + #[error("error converting to string")] + NonUnicode, #[error("ollama error: {0}")] Ollama(crate::backend::APIError), #[error("openai error: {0}")] @@ -43,16 +59,28 @@ pub enum Error { Rope(#[from] ropey::Error), #[error("serde json error: {0}")] SerdeJson(#[from] serde_json::Error), + #[error("snippet is too larger to be converted to an embedding: {0} > {1}")] + SnippetTooLarge(usize, usize), + #[error("strip prefix error: {0}")] + StripPrefix(#[from] std::path::StripPrefixError), #[error("tgi error: {0}")] Tgi(crate::backend::APIError), + #[error("tinyvec-embed error: {0}")] + TinyVecEmbed(#[from] tinyvec_embed::error::Error), #[error("tree-sitter language error: {0}")] TreeSitterLanguage(#[from] tree_sitter::LanguageError), #[error("tokenizer error: {0}")] Tokenizer(#[from] tokenizers::Error), #[error("tokio join error: {0}")] TokioJoin(#[from] tokio::task::JoinError), + #[error("embeddings database is uninitialised")] + UninitialisedDatabase, #[error("unknown backend: {0}")] UnknownBackend(String), + #[error("yaml serialization error: {0}")] + Yaml(#[from] serde_yaml::Error), + #[error("No embedding built")] + MissingEmbedding, } pub(crate) type Result = std::result::Result; diff --git a/crates/llm-ls/src/language_id.rs b/crates/llm-ls/src/language_id.rs index 31ce3d9..b680b18 100644 --- a/crates/llm-ls/src/language_id.rs +++ b/crates/llm-ls/src/language_id.rs @@ -3,60 +3,128 @@ use std::fmt; #[derive(Clone, Copy, Serialize, Deserialize)] pub(crate) enum LanguageId { + Abap, + Bat, + BibTeX, Bash, + Clojure, + CoffeeScript, C, Cpp, CSharp, + Css, + Diff, + Dart, + Dockerfile, Elixir, Erlang, + FSharp, + GitCommit, + GitRebase, Go, + Groovy, + Handlebars, Html, + Ini, Java, JavaScript, JavaScriptReact, Json, Kotlin, + LaTeX, + Less, Lua, + Makefile, Markdown, ObjectiveC, + ObjectiveCpp, + Perl, + Perl6, + Php, + Powershell, + Pug, Python, R, + Razor, Ruby, Rust, + Scss, Scala, + ShaderLab, + Sql, Swift, + Toml, TypeScript, TypeScriptReact, + TeX, + VisualBasic, + Xml, + Xsl, + Yaml, Unknown, } impl fmt::Display for LanguageId { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Self::Abap => write!(f, "abap"), Self::Bash => write!(f, "shellscript"), + Self::Bat => write!(f, "bat"), + Self::BibTeX => write!(f, "bibtex"), + Self::Clojure => write!(f, "clojure"), + Self::CoffeeScript => write!(f, "coffeescript"), Self::C => write!(f, "c"), Self::Cpp => write!(f, "cpp"), Self::CSharp => write!(f, "csharp"), + Self::Css => write!(f, "css"), + Self::Diff => write!(f, "diff"), + Self::Dart => write!(f, "dart"), + Self::Dockerfile => write!(f, "dockerfile"), Self::Elixir => write!(f, "elixir"), Self::Erlang => write!(f, "erlang"), + Self::FSharp => write!(f, "fsharp"), + Self::GitCommit => write!(f, "git-commit"), + Self::GitRebase => write!(f, "git-rebase"), Self::Go => write!(f, "go"), + Self::Groovy => write!(f, "groovy"), + Self::Handlebars => write!(f, "handlebars"), Self::Html => write!(f, "html"), + Self::Ini => write!(f, "ini"), Self::Java => write!(f, "java"), Self::JavaScript => write!(f, "javascript"), Self::JavaScriptReact => write!(f, "javascriptreact"), Self::Json => write!(f, "json"), Self::Kotlin => write!(f, "kotlin"), + Self::LaTeX => write!(f, "latex"), + Self::Less => write!(f, "less"), Self::Lua => write!(f, "lua"), + Self::Makefile => write!(f, "makefile"), Self::Markdown => write!(f, "markdown"), Self::ObjectiveC => write!(f, "objective-c"), + Self::ObjectiveCpp => write!(f, "objective-cpp"), + Self::Perl => write!(f, "perl"), + Self::Perl6 => write!(f, "perl6"), + Self::Php => write!(f, "php"), + Self::Powershell => write!(f, "powershell"), + Self::Pug => write!(f, "jade"), Self::Python => write!(f, "python"), Self::R => write!(f, "r"), + Self::Razor => write!(f, "razor"), Self::Ruby => write!(f, "ruby"), Self::Rust => write!(f, "rust"), + Self::ShaderLab => write!(f, "shaderlab"), + Self::Scss => write!(f, "scss"), Self::Scala => write!(f, "scala"), + Self::Sql => write!(f, "sql"), Self::Swift => write!(f, "swift"), + Self::Toml => write!(f, "toml"), Self::TypeScript => write!(f, "typescript"), Self::TypeScriptReact => write!(f, "typescriptreact"), + Self::TeX => write!(f, "tex"), + Self::VisualBasic => write!(f, "vb"), + Self::Xml => write!(f, "xml"), + Self::Xsl => write!(f, "xsl"), + Self::Yaml => write!(f, "Yaml"), Self::Unknown => write!(f, "unknown"), } } @@ -75,30 +143,63 @@ impl fmt::Display for LanguageIdError { impl From<&str> for LanguageId { fn from(value: &str) -> Self { match value { + "abap" => Self::Abap, + "bat" => Self::Bat, + "bibtex" => Self::BibTeX, + "clojure" => Self::Clojure, + "coffeescript" => Self::CoffeeScript, "c" => Self::C, "cpp" => Self::Cpp, "csharp" => Self::CSharp, + "css" => Self::Css, + "diff" => Self::Diff, + "dart" => Self::Dart, + "dockerfile" => Self::Dockerfile, "elixir" => Self::Elixir, "erlang" => Self::Erlang, + "fsharp" => Self::FSharp, + "git-commit" => Self::GitCommit, + "git-rebase" => Self::GitRebase, "go" => Self::Go, + "groovy" => Self::Groovy, + "handlebars" => Self::Handlebars, "html" => Self::Html, + "ini" => Self::Ini, + "jade" => Self::Pug, "java" => Self::Java, "javascript" => Self::JavaScript, "javascriptreact" => Self::JavaScriptReact, "json" => Self::Json, "kotlin" => Self::Kotlin, + "latex" => Self::LaTeX, + "less" => Self::Less, "lua" => Self::Lua, + "makefile" => Self::Makefile, "markdown" => Self::Markdown, "objective-c" => Self::ObjectiveC, + "objective-cpp" => Self::ObjectiveCpp, + "perl" => Self::Perl, + "perl6" => Self::Perl6, + "php" => Self::Php, + "powershell" => Self::Powershell, "python" => Self::Python, "r" => Self::R, + "razor" => Self::Razor, "ruby" => Self::Ruby, "rust" => Self::Rust, + "sass" | "scss" => Self::Scss, "scala" => Self::Scala, + "shaderlab" => Self::ShaderLab, "shellscript" => Self::Bash, + "sql" => Self::Sql, "swift" => Self::Swift, "typescript" => Self::TypeScript, "typescriptreact" => Self::TypeScriptReact, + "tex" => Self::TeX, + "vb" => Self::VisualBasic, + "xml" => Self::Xml, + "xsl" => Self::Xsl, + "yaml" => Self::Yaml, _ => Self::Unknown, } } @@ -109,3 +210,263 @@ impl From for LanguageId { Self::from(value.as_str()) } } + +#[derive(Clone, Debug)] +pub(crate) struct LanguageComment { + open: String, + close: Option, +} + +impl LanguageComment { + pub(crate) fn comment_string(&self, s: String) -> String { + let close = if let Some(close) = self.close.as_ref() { + format!(" {}", close.clone()) + } else { + String::new() + }; + format!("{} {s}{close}", self.open) + } +} + +impl LanguageId { + pub(crate) fn get_language_comment(&self) -> LanguageComment { + match self { + Self::Abap => LanguageComment { + open: "*".to_owned(), + close: None, + }, + Self::Bash => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Bat => LanguageComment { + open: "REM".to_owned(), + close: None, + }, + Self::BibTeX => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::Clojure => LanguageComment { + open: ";;".to_owned(), + close: None, + }, + Self::CoffeeScript => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::C => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Cpp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::CSharp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Css => LanguageComment { + open: "/*".to_owned(), + close: Some("*/".to_owned()), + }, + Self::Diff => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Dart => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Dockerfile => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Elixir => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Erlang => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::FSharp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::GitCommit => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::GitRebase => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Go => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Groovy => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Handlebars => LanguageComment { + open: "{{!--".to_owned(), + close: Some("--}}".to_owned()), + }, + Self::Html => LanguageComment { + open: "".to_owned()), + }, + Self::Ini => LanguageComment { + open: ";".to_owned(), + close: None, + }, + Self::Java => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::JavaScript => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::JavaScriptReact => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Json => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Kotlin => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::LaTeX => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::Less => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Lua => LanguageComment { + open: "--".to_owned(), + close: None, + }, + Self::Makefile => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Markdown => LanguageComment { + open: "".to_owned()), + }, + Self::ObjectiveC => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::ObjectiveCpp => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Perl => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Perl6 => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Php => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Powershell => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Pug => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Python => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::R => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Razor => LanguageComment { + open: "@*".to_owned(), + close: Some("*@".to_owned()), + }, + Self::Ruby => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Rust => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Scss => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Scala => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::ShaderLab => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Sql => LanguageComment { + open: "--".to_owned(), + close: None, + }, + Self::Swift => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::Toml => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::TypeScript => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::TypeScriptReact => LanguageComment { + open: "//".to_owned(), + close: None, + }, + Self::TeX => LanguageComment { + open: "%".to_owned(), + close: None, + }, + Self::VisualBasic => LanguageComment { + open: "'".to_owned(), + close: None, + }, + Self::Xml => LanguageComment { + open: "".to_owned()), + }, + Self::Xsl => LanguageComment { + open: "".to_owned()), + }, + Self::Yaml => LanguageComment { + open: "#".to_owned(), + close: None, + }, + Self::Unknown => LanguageComment { + open: "#".to_owned(), + close: None, + }, + } + } +} diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index d0cd9e8..38996dd 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -1,20 +1,28 @@ use clap::Parser; +use config::{load_config, LlmLsConfig}; use custom_types::llm_ls::{ AcceptCompletionParams, Backend, Completion, FimParams, GetCompletionsParams, GetCompletionsResult, Ide, RejectCompletionParams, TokenizerConfig, }; +use language_id::LanguageId; +use retrieval::{Snippet, SnippetRetriever}; use ropey::Rope; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt::Display; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Duration; +use tinyvec_embed::db::{Compare, FilterBuilder}; use tokenizers::Tokenizer; use tokio::io::AsyncWriteExt; use tokio::net::TcpListener; -use tokio::sync::RwLock; +use tokio::sync::{oneshot, RwLock}; +use tokio::task::JoinHandle; +use tokio::time::Instant; use tower_lsp::jsonrpc::Result as LspResult; +use tower_lsp::lsp_types::notification::Progress; +use tower_lsp::lsp_types::request::WorkDoneProgressCreate; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer, LspService, Server}; use tracing::{debug, error, info, info_span, warn, Instrument}; @@ -25,11 +33,14 @@ use uuid::Uuid; use crate::backend::{build_body, build_headers, parse_generations}; use crate::document::Document; use crate::error::{internal_error, Error, Result}; +use crate::retrieval::BuildFrom; mod backend; +mod config; mod document; mod error; mod language_id; +mod retrieval; const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600); pub const NAME: &str = "llm-ls"; @@ -129,20 +140,56 @@ pub struct Generation { struct LlmService { cache_dir: PathBuf, client: Client, + config: Arc, document_map: Arc>>, http_client: reqwest::Client, unsafe_http_client: reqwest::Client, workspace_folders: Arc>>>, tokenizer_map: Arc>>>, unauthenticated_warn_at: Arc>, + snippet_retriever: Arc>, + supports_progress_bar: Arc>, + cancel_snippet_build_tx: Arc>>>, + indexation_handle: Arc>>>, } -fn build_prompt( +fn build_context_header(language_id: LanguageId, snippets: Vec) -> String { + if snippets.is_empty() { + return String::new(); + } + let comment = language_id.get_language_comment(); + let mut header = vec![comment.comment_string( + "Below are some relevant code snippets contained in this project's files:".to_owned(), + )]; + for snippet in snippets { + header.push(comment.comment_string("--------------".to_owned())); + header.push(comment.comment_string(format!("snippet from: {}", snippet.file_url))); + header.push(comment.comment_string("--------------".to_owned())); + header.push( + snippet + .code + .lines() + .map(|l| comment.comment_string(l.to_owned())) + .collect::>() + .join("\n"), + ); + header.push(comment.comment_string("--------------".to_owned())); + } + let mut header = header.join("\n"); + header.push('\n'); + header +} + +#[allow(clippy::too_many_arguments)] +async fn build_prompt( pos: Position, text: &Rope, fim: &FimParams, tokenizer: Option>, context_window: usize, + file_url: &str, + language_id: LanguageId, + snippet_retriever: Arc>, ) -> Result { let t = Instant::now(); if fim.enabled { @@ -191,13 +238,33 @@ fn build_prompt( before_line = before_iter.next(); after_line = after_iter.next(); } + let before = before.into_iter().rev().collect::>().join(""); + let query = snippet_retriever + .read() + .await + .build_query( + format!("{before}{after}"), + BuildFrom::Cursor { + cursor_position: before.len(), + }, + ) + .await?; + let snippets = snippet_retriever + .read() + .await + .search( + &query, + Some(FilterBuilder::new().comparison( + "file_url".to_owned(), + Compare::Neq, + file_url.into(), + )), + ) + .await?; + let context_header = build_context_header(language_id, snippets); let prompt = format!( - "{}{}{}{}{}", - fim.prefix, - before.into_iter().rev().collect::>().join(""), - fim.suffix, - after, - fim.middle + "{}{context_header}{before}{}{after}{}", + fim.prefix, fim.suffix, fim.middle ); let time = t.elapsed().as_millis(); info!(prompt, build_prompt_ms = time, "built prompt in {time} ms"); @@ -225,6 +292,25 @@ fn build_prompt( before.push(line); } let prompt = before.into_iter().rev().collect::>().join(""); + let query = snippet_retriever + .read() + .await + .build_query(prompt.clone(), BuildFrom::End) + .await?; + let snippets = snippet_retriever + .read() + .await + .search( + &query, + Some(FilterBuilder::new().comparison( + "file_url".to_owned(), + Compare::Neq, + file_url.into(), + )), + ) + .await?; + let context_header = build_context_header(language_id, snippets); + let prompt = format!("{context_header}{prompt}"); let time = t.elapsed().as_millis(); info!(prompt, build_prompt_ms = time, "built prompt in {time} ms"); Ok(prompt) @@ -433,8 +519,9 @@ impl LlmService { async move { let document_map = self.document_map.read().await; + let file_url = params.text_document_position.text_document.uri.as_str(); let document = - match document_map.get(params.text_document_position.text_document.uri.as_str()) { + match document_map.get(file_url) { Some(doc) => doc, None => { debug!("failed to find document"); @@ -488,7 +575,10 @@ impl LlmService { ¶ms.fim, tokenizer, params.context_window, - )?; + &file_url.replace("file://", ""), + document.language_id, + self.snippet_retriever.clone(), + ).await?; let http_client = if params.tls_skip_verify_insecure { info!("tls verification is disabled"); @@ -526,12 +616,35 @@ impl LlmService { ); Ok(()) } + + fn ignore_file(&self, uri: Url) -> bool { + let uri_str = uri.to_string(); + let path = uri.path(); + uri.scheme() == "output" + || uri.scheme() == "term" + || uri.scheme() == "file" && (uri_str == "file:///" || !Path::new(&path).exists()) + } } #[tower_lsp::async_trait] impl LanguageServer for LlmService { async fn initialize(&self, params: InitializeParams) -> LspResult { - *self.workspace_folders.write().await = params.workspace_folders; + *self.workspace_folders.write().await = params.workspace_folders.clone(); + *self.supports_progress_bar.write().await = params + .capabilities + .window + .map(|window| window.work_done_progress.unwrap_or(false)) + .unwrap_or(false); + let position_encoding = params.capabilities.general.and_then(|general_cap| { + general_cap.position_encodings.and_then(|encodings| { + if encodings.contains(&PositionEncodingKind::UTF8) { + Some(PositionEncodingKind::UTF8) + } else { + // self.client.show_message(MessageType::WARNING, "llm-ls only supports UTF-8 position encoding, defaulting to UTF-16 which might cause offsetting errors").await; + None + } + }) + }); Ok(InitializeResult { server_info: Some(ServerInfo { name: "llm-ls".to_owned(), @@ -541,12 +654,80 @@ impl LanguageServer for LlmService { text_document_sync: Some(TextDocumentSyncCapability::Kind( TextDocumentSyncKind::INCREMENTAL, )), + position_encoding, ..Default::default() }, }) } async fn initialized(&self, _: InitializedParams) { + let client = self.client.clone(); + let config = self.config.clone(); + let snippet_retriever = self.snippet_retriever.clone(); + let supports_progress_bar = self.supports_progress_bar.clone(); + let workspace_folders = self.workspace_folders.clone(); + let token = NumberOrString::Number(42); + + let token_copy = NumberOrString::Number(42); + let (tx, rx) = oneshot::channel(); + *self.cancel_snippet_build_tx.write().await = Some(tx); + let handle = tokio::spawn(async move { + let guard = workspace_folders.read().await; + if let Some(workspace_folders) = guard.as_ref() { + if *supports_progress_bar.read().await { + match client + .send_request::(WorkDoneProgressCreateParams { + token: token.clone(), + }) + .await + { + Ok(_) => (), + Err(err) => { + error!("err: {err}"); + return; + } + }; + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( + WorkDoneProgressBegin { + title: "creating workspace embeddings".to_owned(), + ..Default::default() + }, + )), + }) + .await; + } + let mut guard = snippet_retriever.write().await; + tokio::select! { + res = guard.build_workspace_snippets( + client.clone(), + config, + token, + workspace_folders[0].uri.path(), + ) => { + if let Err(err) = res { + error!("failed building workspace snippets: {err}"); + } + }, + _ = rx => debug!("received cancellation, stopping indexation"), + } + if *supports_progress_bar.read().await { + client + .send_notification::(ProgressParams { + token: token_copy, + value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( + WorkDoneProgressEnd { + ..Default::default() + }, + )), + }) + .await; + } + } + }); + *self.indexation_handle.write().await = Some(handle); self.client .log_message(MessageType::INFO, "llm-ls initialized") .await; @@ -555,9 +736,10 @@ impl LanguageServer for LlmService { async fn did_open(&self, params: DidOpenTextDocumentParams) { let uri = params.text_document.uri.to_string(); - if uri == "file:///" { + if self.ignore_file(params.text_document.uri) { return; } + match Document::open( ¶ms.text_document.language_id, ¶ms.text_document.text, @@ -580,15 +762,8 @@ impl LanguageServer for LlmService { async fn did_change(&self, params: DidChangeTextDocumentParams) { let uri = params.text_document.uri.to_string(); - if uri == "file:///" { - return; - } - if params.content_changes.is_empty() { - return; - } - - // ignore the output scheme - if params.text_document.uri.scheme() == "output" { + let path = params.text_document.uri.path(); + if self.ignore_file(params.text_document.uri.clone()) { return; } @@ -601,7 +776,34 @@ impl LanguageServer for LlmService { for change in ¶ms.content_changes { if let Some(range) = change.range { match doc.change(range, &change.text).await { - Ok(()) => info!("{uri} changed"), + Ok((start, old_end, new_end)) => { + let start = Position::new(start as u32, 0); + if let Err(err) = self + .snippet_retriever + .write() + .await + .remove( + path.to_owned(), + Range::new(start, Position::new(old_end as u32, 0)), + ) + .await + { + error!("error while removing embeddings: {err}"); + } + if let Err(err) = self + .snippet_retriever + .write() + .await + .update_document( + path.to_owned(), + Range::new(start, Position::new(new_end as u32, 0)), + ) + .await + { + error!("error while updating embeddings: {err}"); + } + info!("{uri} changed"); + } Err(err) => error!("error when changing {uri}: {err}"), } } else { @@ -633,6 +835,20 @@ impl LanguageServer for LlmService { async fn shutdown(&self) -> LspResult<()> { debug!("shutdown"); + if let Some(tx) = self.cancel_snippet_build_tx.write().await.take() { + let _ = tx.send(()); + } + self.snippet_retriever + .read() + .await + .stop() + .await + .map_err(internal_error)?; + if let Some(handle) = self.indexation_handle.write().await.take() { + if let Err(err) = handle.await { + error!("error indexing snippets: {err}"); + } + } Ok(()) } } @@ -682,9 +898,24 @@ async fn main() { .build() .expect("failed to build reqwest unsafe client"); + let config = Arc::new( + load_config( + cache_dir + .to_str() + .expect("cache dir path is not valid utf8"), + ) + .await + .expect("failed to load config file"), + ); + let snippet_retriever = Arc::new(RwLock::new( + SnippetRetriever::new(cache_dir.join("embeddings"), config.model.clone(), 20, 10) + .await + .expect("failed to initialise snippet retriever"), + )); let (service, socket) = LspService::build(|client| LlmService { cache_dir, client, + config, document_map: Arc::new(RwLock::new(HashMap::new())), http_client, unsafe_http_client, @@ -695,6 +926,10 @@ async fn main() { .checked_sub(MAX_WARNING_REPEAT) .expect("instant to be in bounds"), )), + snippet_retriever, + supports_progress_bar: Arc::new(RwLock::new(false)), + cancel_snippet_build_tx: Arc::new(RwLock::new(None)), + indexation_handle: Arc::new(RwLock::new(None)), }) .custom_method("llm-ls/getCompletions", LlmService::get_completions) .custom_method("llm-ls/acceptCompletion", LlmService::accept_completion) diff --git a/crates/llm-ls/src/retrieval.rs b/crates/llm-ls/src/retrieval.rs new file mode 100644 index 0000000..6bea3f8 --- /dev/null +++ b/crates/llm-ls/src/retrieval.rs @@ -0,0 +1,655 @@ +use crate::config::{LlmLsConfig, ModelConfig}; +use crate::error::{Error, Result}; +use candle::utils::{cuda_is_available, metal_is_available}; +use candle::{Device, Tensor}; +use candle_nn::VarBuilder; +use candle_transformers::models::bert::{BertModel, Config, DTYPE}; +use gitignore::Gitignore; +use hf_hub::{api::tokio::Api, Repo, RepoType}; +use std::collections::{HashMap, VecDeque}; +use std::iter::zip; +use std::path::Path; +use std::{path::PathBuf, sync::Arc}; +use tinyvec_embed::db::{Collection, Compare, Db, Embedding, FilterBuilder, SimilarityResult}; +use tinyvec_embed::similarity::Distance; +use tokenizers::{ + Encoding, PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer, TruncationDirection, +}; +use tokio::io::AsyncReadExt; +use tokio::task::spawn_blocking; +use tokio::time::Instant; +use tower_lsp::lsp_types::notification::Progress; +use tower_lsp::lsp_types::{ + NumberOrString, ProgressParams, ProgressParamsValue, Range, WorkDoneProgress, + WorkDoneProgressReport, +}; +use tower_lsp::Client; +use tracing::{debug, error, warn}; + +// TODO: +// - create sliding window and splitting of files logic +// - handle ipynb +// - handle updates + +async fn file_is_empty(file_path: impl AsRef) -> Result { + let mut content = String::new(); + tokio::fs::File::open(&file_path) + .await? + .read_to_string(&mut content) + .await?; + Ok(content.trim().is_empty()) +} + +fn is_code_file(file_name: &Path) -> bool { + let code_extensions = [ + "ada", + "adb", + "ads", + "c", + "h", + "cpp", + "hpp", + "cc", + "cxx", + "hxx", + "cs", + "css", + "scss", + "sass", + "less", + "java", + "js", + "jsx", + "ts", + "tsx", + "php", + "phtml", + "html", + "xml", + "json", + "yaml", + "yml", + "toml", + "cfg", + "conf", + "sh", + "bash", + "zsh", + "ps1", + "psm1", + "bat", + "cmd", + "py", + "rb", + "swift", + "pl", + "pm", + "t", + "r", + "rs", + "go", + "kt", + "kts", + "sql", + "md", + "markdown", + "txt", + "lua", + "ex", + "exs", + "erl", + "rb", + "scala", + "sc", + "ml", + "mli", + "zig", + "clj", + "cljs", + "cljc", + "cljx", + "cr", + "Dockerfile", + "fs", + "fsi", + "fsx", + "hs", + "lhs", + "groovy", + "jsonnet", + "jl", + "nim", + "rkt", + "scm", + "tf", + "nix", + "vue", + "svelte", + "lisp", + "lsp", + "el", + "elc", + "eln", + ]; + + let extension = file_name.extension().and_then(|ext| ext.to_str()); + + if let Some(ext) = extension { + code_extensions.contains(&ext.to_lowercase().as_str()) + } else { + false + } +} + +async fn build_model_and_tokenizer( + model_id: String, + revision: String, +) -> Result<(BertModel, Tokenizer)> { + let start = Instant::now(); + let device = device(false)?; + let repo = Repo::with_revision(model_id, RepoType::Model, revision); + let (config_filename, tokenizer_filename, weights_filename) = { + let api = Api::new()?; + let api = api.repo(repo); + let config = api.get("config.json").await?; + let tokenizer = api.get("tokenizer.json").await?; + let weights = api.get("pytorch_model.bin").await?; + (config, tokenizer, weights) + }; + let config = tokio::fs::read_to_string(config_filename).await?; + let config: Config = serde_json::from_str(&config)?; + let mut tokenizer: Tokenizer = Tokenizer::from_file(tokenizer_filename)?; + tokenizer.with_padding(Some(PaddingParams { + strategy: PaddingStrategy::BatchLongest, + direction: PaddingDirection::Right, + pad_to_multiple_of: Some(8), + // TODO: use values provided in model config + pad_id: 0, + pad_type_id: 0, + pad_token: "".to_string(), + })); + tokenizer.with_truncation(None)?; + let vb = VarBuilder::from_pth(&weights_filename, DTYPE, &device)?; + let model = BertModel::load(vb, &config)?; + debug!( + "loaded model and tokenizer in {} ms", + start.elapsed().as_millis() + ); + Ok((model, tokenizer)) +} + +fn device(cpu: bool) -> Result { + if cpu { + Ok(Device::Cpu) + } else if cuda_is_available() { + Ok(Device::new_cuda(0)?) + } else if metal_is_available() { + Ok(Device::new_metal(0)?) + } else { + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + warn!("Running on CPU, to run on GPU(metal), use the `-metal` binary"); + } + #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))] + { + warn!("Running on CPU, to run on GPU, use the `-cuda` binary"); + } + Ok(Device::Cpu) + } +} + +pub(crate) struct Snippet { + pub(crate) file_url: String, + pub(crate) code: String, + pub(crate) start_line: usize, + pub(crate) end_line: usize, +} + +impl TryFrom<&SimilarityResult> for Snippet { + type Error = Error; + + fn try_from(value: &SimilarityResult) -> Result { + let meta = value + .embedding + .metadata + .as_ref() + .ok_or(Error::MissingMetadata)?; + let file_url = meta + .get("file_url") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("file_url".to_owned()))? + .inner_string()?; + let code = meta + .get("snippet") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? + .inner_string()?; + let start_line = meta + .get("start_line_no") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? + .try_into()?; + let end_line = meta + .get("start_line_no") + .ok_or_else(|| Error::MalformattedEmbeddingMetadata("snippet".to_owned()))? + .try_into()?; + Ok(Snippet { + file_url, + code, + start_line, + end_line, + }) + } +} + +pub(crate) struct SnippetRetriever { + cache_path: PathBuf, + collection_name: String, + db: Option, + model: Arc, + model_config: ModelConfig, + tokenizer: Tokenizer, + window_size: usize, + window_step: usize, +} + +impl SnippetRetriever { + /// # Panics + /// + /// Panics if the database cannot be initialised. + pub(crate) async fn new( + cache_path: PathBuf, + model_config: ModelConfig, + window_size: usize, + window_step: usize, + ) -> Result { + let collection_name = "code-slices".to_owned(); + let (model, tokenizer) = + build_model_and_tokenizer(model_config.id.clone(), model_config.revision.clone()) + .await?; + Ok(Self { + cache_path, + collection_name, + db: None, + model: Arc::new(model), + model_config, + tokenizer, + window_size, + window_step, + }) + } + + pub(crate) async fn initialise_database(&mut self, db_name: &str) -> Result { + let uri = self.cache_path.join(db_name); + let mut db = Db::open(uri).await.expect("failed to open database"); + match db + .create_collection( + self.collection_name.clone(), + self.model_config.embeddings_size, + Distance::Cosine, + ) + .await + { + Ok(_) + | Err(tinyvec_embed::error::Error::Collection( + tinyvec_embed::error::Collection::UniqueViolation, + )) => (), + Err(err) => panic!("failed to create collection: {err}"), + } + self.db = Some(db.clone()); + Ok(db) + } + + pub(crate) async fn build_workspace_snippets( + &mut self, + client: Client, + config: Arc, + token: NumberOrString, + workspace_root: &str, + ) -> Result<()> { + debug!("building workspace snippets"); + let start = Instant::now(); + let workspace_root = PathBuf::from(workspace_root); + if self.db.is_none() { + self.initialise_database(&format!( + "{}--{}", + workspace_root + .file_name() + .ok_or_else(|| Error::NoFinalPath(workspace_root.clone()))? + .to_str() + .ok_or(Error::NonUnicode)?, + self.model_config.id.replace('/', "--"), + )) + .await?; + } + let mut files = Vec::new(); + let mut gitignore = Gitignore::parse(&workspace_root).ok(); + for pattern in config.ignored_paths.iter() { + if let Some(gitignore) = gitignore.as_mut() { + if let Err(err) = gitignore.add_rule(pattern.clone()) { + error!("failed to parse pattern: {err}"); + } + }; + } + + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( + WorkDoneProgressReport { + message: Some("listing workspace files".to_owned()), + ..Default::default() + }, + )), + }) + .await; + let mut stack = VecDeque::new(); + stack.push_back(workspace_root.clone()); + while let Some(src) = stack.pop_back() { + let mut entries = tokio::fs::read_dir(&src).await?; + while let Some(entry) = entries.next_entry().await? { + let entry_type = entry.file_type().await?; + + let src_path = entry.path(); + + if let Some(gitignore) = &gitignore { + if gitignore.ignored(&src_path)? { + continue; + } + } + + if entry_type.is_dir() { + stack.push_back(src_path); + } else if entry_type.is_file() + && is_code_file(&src_path) + && !file_is_empty(&src_path).await? + { + files.push(src_path); + } + } + } + for (i, file) in files.iter().enumerate() { + let file_url = file.to_str().expect("file path should be utf8").to_owned(); + self.add_document(file_url).await?; + client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( + WorkDoneProgressReport { + message: Some(format!( + "{i}/{} ({})", + files.len(), + file.strip_prefix(workspace_root.as_path())? + .to_str() + .expect("expect file name to be valid unicode") + )), + ..Default::default() + }, + )), + }) + .await; + } + debug!( + "Built workspace snippets in {} ms", + start.elapsed().as_millis() + ); + Ok(()) + } + + pub(crate) async fn add_document(&self, file_url: String) -> Result<()> { + self.build_and_add_snippets(file_url, 0, None).await?; + Ok(()) + } + + pub(crate) async fn update_document(&mut self, file_url: String, range: Range) -> Result<()> { + self.build_and_add_snippets( + file_url, + range.start.line as usize, + Some(range.end.line as usize), + ) + .await?; + Ok(()) + } + + pub(crate) async fn build_query( + &self, + snippet: String, + strategy: BuildFrom, + ) -> Result> { + let result = match strategy { + BuildFrom::Start => { + let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; + encoding.truncate( + self.model_config.max_input_size, + 1, + TruncationDirection::Right, + ); + self.generate_embeddings(vec![encoding], self.model.clone()) + .await? + } + BuildFrom::Cursor { cursor_position } => { + let (before, after) = snippet.split_at(cursor_position); + let mut before_encoding = self.tokenizer.encode(before, true)?; + let mut after_encoding = self.tokenizer.encode(after, true)?; + let share = self.model_config.max_input_size / 2; + before_encoding.truncate(share, 1, TruncationDirection::Left); + after_encoding.truncate(share, 1, TruncationDirection::Right); + before_encoding.take_overflowing(); + after_encoding.take_overflowing(); + before_encoding.merge_with(after_encoding, false); + self.generate_embeddings(vec![before_encoding], self.model.clone()) + .await? + } + BuildFrom::End => { + let mut encoding = self.tokenizer.encode(snippet.clone(), true)?; + encoding.truncate( + self.model_config.max_input_size, + 1, + TruncationDirection::Left, + ); + self.generate_embeddings(vec![encoding], self.model.clone()) + .await? + } + }; + if result.is_empty() { + return Err(Error::MissingEmbedding); + } + let mut result = result; + Ok(result.remove(0)) + } + + pub(crate) async fn search( + &self, + query: &[f32], + filter: Option, + ) -> Result> { + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + let col = db.get_collection(&self.collection_name).await?; + let result = col + .read() + .await + .get(query, 5, filter) + .await? + .iter() + .map(TryInto::try_into) + .collect::>>()?; + Ok(result) + } + + pub(crate) async fn stop(&self) -> Result<()> { + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + db.save().await?; + Ok(()) + } + + pub(crate) async fn remove(&self, file_url: String, range: Range) -> Result<()> { + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + let col = db.get_collection(&self.collection_name).await?; + col.write().await.remove(Some( + Collection::filter() + .comparison( + "start_line_no".to_owned(), + Compare::GtEq, + range.start.line.into(), + ) + .and() + .comparison( + "end_line_no".to_owned(), + Compare::LtEq, + range.end.line.into(), + ) + .and() + .comparison("file_url".to_owned(), Compare::Eq, file_url.into()), + ))?; + Ok(()) + } +} + +impl SnippetRetriever { + // TODO: handle overflowing in Encoding + /// Embedding order is preserved and stays the same as encoding input + async fn generate_embeddings( + &self, + encodings: Vec, + model: Arc, + ) -> Result>> { + let start = Instant::now(); + let embedding = spawn_blocking(move || -> Result>> { + let tokens = encodings + .iter() + .map(|elem| Ok(Tensor::new(elem.get_ids().to_vec(), &model.device)?)) + .collect::>>()?; + let token_ids = Tensor::stack(&tokens, 0)?; + let token_type_ids = token_ids.zeros_like()?; + let embedding = model.forward(&token_ids, &token_type_ids)?; + let (_n_sentence, n_tokens, _hidden_size) = embedding.dims3()?; + let embedding = (embedding.sum(1)? / (n_tokens as f64))?; + Ok(embedding.to_vec2::()?) + }) + .await?; + debug!("embedding generated in {} ms", start.elapsed().as_millis()); + embedding + } + + async fn build_and_add_snippets( + &self, + file_url: String, + start: usize, + end: Option, + ) -> Result<()> { + let db = match self.db.as_ref() { + Some(db) => db.clone(), + None => return Err(Error::UninitialisedDatabase), + }; + let col = db.get_collection("code-slices").await?; + let file = tokio::fs::read_to_string(&file_url).await?; + let lines = file.split('\n').collect::>(); + let end = end.unwrap_or(lines.len()).min(lines.len()); + let mut snippets: Vec = Vec::new(); + debug!("Building embeddings for {file_url}"); + for start_line in (start..end).step_by(self.window_step) { + let end_line = (start_line + self.window_size - 1).min(lines.len()); + if !col + .read() + .await + .get( + &[], + 1, + Some( + Collection::filter() + .comparison("file_url".to_owned(), Compare::Eq, file_url.clone().into()) + .and() + .comparison("start_line_no".to_owned(), Compare::Eq, start_line.into()) + .and() + .comparison("end_line_no".to_owned(), Compare::Eq, end_line.into()), + ), + ) + .await? + .is_empty() + { + debug!("snippet {file_url}[{start_line}, {end_line}] already indexed"); + continue; + } + let window = lines[start_line..end_line].to_vec(); + let snippet = window.join("\n"); + if snippet.is_empty() { + debug!("snippet {file_url}[{start_line}, {end_line}] empty"); + continue; + } + snippets.push(Snippet { + file_url: file_url.clone().into(), + code: snippet, + start_line, + end_line, + }); + } + { + let nb_snippets = snippets.len(); + let steps = self.window_step; + debug!("Build {nb_snippets} snippets for {file_url}: {start}, {end}, {steps}"); + } + + // Group by length to reduce padding effect + let snippets = spawn_blocking(|| -> Result> { + snippets.sort_unstable_by(|first, second| first.code.len().cmp(&second.code.len())); + Ok(snippets) + }) + .await?; + + // TODO: improvements to compute an efficient batch size: + // - batch size should be relative to the cumulative size of all elements in the batch, + // Set embedding_batch_size to 8 if device is GPU, use match + let embedding_batch_size = match self.model.device { + Device::Cpu => 2, + _ => 8, + }; + for batch in snippets?.chunks(embedding_batch_size) { + let batch_code = batch.iter().map(|snippet| snippet.code.clone()).collect(); + let encodings = self + .tokenizer + .encode_batch(batch_code, true)? + .iter_mut() + .map(|encoding| { + encoding.truncate(512, 1, TruncationDirection::Right); + encoding.clone() + }) + .collect(); + let results = self + .generate_embeddings(encodings, self.model.clone()) + .await?; + col.write().await.batch_insert( + zip(results, batch) + .map(|item| { + let (embedding, snippet) = item; + Embedding::new( + embedding, + Some(HashMap::from([ + ("file_url".to_owned(), snippet.file_url.clone().into()), + ("start_line_no".to_owned(), snippet.start_line.into()), + ("end_line_no".to_owned(), snippet.end_line.into()), + ("snippet".to_owned(), snippet.code.clone().into()), + ])), + ) + }) + .collect::>(), + )?; + } + db.save().await?; + Ok(()) + } +} + +pub(crate) enum BuildFrom { + Cursor { + cursor_position: usize, + }, + End, + #[allow(dead_code)] + Start, +} diff --git a/crates/tinyvec-embed/Cargo.toml b/crates/tinyvec-embed/Cargo.toml new file mode 100644 index 0000000..b05bc00 --- /dev/null +++ b/crates/tinyvec-embed/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "tinyvec-embed" +version = "0.1.0" +edition.workspace = true +license.workspace = true +authors.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bincode = "1" +serde = { version = "1", features = ["derive"] } +thiserror = "1" +tokio = { version = "1", features = [ + "fs", + "macros", + "rt-multi-thread", + "sync", +] } +tracing = "0.1" + +[dependencies.uuid] +version = "1.7.0" +features = ["v4", "fast-rng", "macro-diagnostics", "serde"] + +[dev-dependencies] +criterion = { version = "0.5", features = ["async_tokio"] } +tempfile = "3" + +[[bench]] +name = "bench_main" +harness = false diff --git a/crates/tinyvec-embed/README.md b/crates/tinyvec-embed/README.md new file mode 100644 index 0000000..9846ef5 --- /dev/null +++ b/crates/tinyvec-embed/README.md @@ -0,0 +1,5 @@ +# tinyvec-embed + +Tiny embedded vector database. + +Inspired by [tinyvector](https://github.com/m1guelpf/tinyvector). diff --git a/crates/tinyvec-embed/benches/bench_main.rs b/crates/tinyvec-embed/benches/bench_main.rs new file mode 100644 index 0000000..bafb20c --- /dev/null +++ b/crates/tinyvec-embed/benches/bench_main.rs @@ -0,0 +1,7 @@ +use criterion::criterion_main; + +mod benchmarks; + +criterion_main! { + benchmarks::retrieval_speed::retrieval_speed, +} diff --git a/crates/tinyvec-embed/benches/benchmarks/mod.rs b/crates/tinyvec-embed/benches/benchmarks/mod.rs new file mode 100644 index 0000000..5d78574 --- /dev/null +++ b/crates/tinyvec-embed/benches/benchmarks/mod.rs @@ -0,0 +1 @@ +pub mod retrieval_speed; diff --git a/crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs b/crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs new file mode 100644 index 0000000..b72e02c --- /dev/null +++ b/crates/tinyvec-embed/benches/benchmarks/retrieval_speed.rs @@ -0,0 +1,43 @@ +use std::collections::HashMap; + +use criterion::{criterion_group, Criterion}; +use tinyvec_embed::{ + db::{Collection, Embedding}, + similarity::Distance, +}; +use tokio::runtime::Runtime; +use uuid::Uuid; + +pub fn get_collection(dimension: usize, embeddings_count: usize) -> Collection { + let embeddings = (0..embeddings_count) + .map(|i| Embedding { + id: Uuid::new_v4(), + metadata: Some(HashMap::from([(i.to_string(), i.into())])), + vector: vec![i as f32; dimension], + }) + .collect::>(); + + Collection { + dimension, + distance: Distance::Cosine, + embeddings, + } +} + +pub fn bench_retrieval(c: &mut Criterion) { + let dimension = 768; + let embeddings_count = 50_000; + let rt = Runtime::new().unwrap(); + c.bench_function("get top 5 k", |b| { + let collection = get_collection(dimension, embeddings_count); + let query = vec![42.; dimension]; + b.to_async(&rt) + .iter(|| async { collection.get(&query, 5, None).await.unwrap() }); + }); +} + +criterion_group! { + name = retrieval_speed; + config = Criterion::default(); + targets = bench_retrieval +} diff --git a/crates/tinyvec-embed/src/db.rs b/crates/tinyvec-embed/src/db.rs new file mode 100644 index 0000000..dd20995 --- /dev/null +++ b/crates/tinyvec-embed/src/db.rs @@ -0,0 +1,677 @@ +use serde::{Deserialize, Serialize}; +use std::{ + collections::{BinaryHeap, HashMap}, + fmt::Display, + path::{Path, PathBuf}, + sync::Arc, +}; +use tokio::{ + fs, + sync::{RwLock, Semaphore}, + task::JoinSet, +}; +use tracing::debug; +use uuid::Uuid; + +use crate::{ + error::{Collection as CollectionError, Error, Result}, + similarity::{Distance, ScoreIndex}, +}; + +#[derive(Clone, Debug)] +pub struct Db { + inner: Arc>, +} + +#[derive(Clone, Debug)] +pub struct DbInner { + collections: HashMap>>, + location: PathBuf, +} + +impl Db { + /// Opens a database from disk or creates a new one if it doesn't exist + pub async fn open(path: impl AsRef) -> Result { + let path = path.as_ref(); + let mut inner = DbInner { + collections: HashMap::new(), + location: path.to_path_buf(), + }; + if !path.exists() { + debug!("Creating database store"); + fs::create_dir_all(path).await?; + + return Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }); + } + debug!("Loading database from store"); + + let mut entries = fs::read_dir(path).await?; + while let Some(entry) = entries.next_entry().await? { + let entry_type = entry.file_type().await?; + if entry_type.is_file() { + let col = fs::read(entry.path()).await?; + let col = bincode::deserialize(&col[..])?; + let name = entry + .file_name() + .to_str() + .ok_or(Error::InvalidFileName)? + .to_owned(); + inner.collections.insert(name, Arc::new(RwLock::new(col))); + } else { + // warning? + } + } + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } + + pub async fn create_collection( + &mut self, + name: String, + dimension: usize, + distance: Distance, + ) -> Result>> { + if self.inner.read().await.collections.contains_key(&name) { + return Err(CollectionError::UniqueViolation.into()); + } + + let collection = Arc::new(RwLock::new(Collection { + dimension, + distance, + embeddings: Vec::new(), + })); + + self.inner + .write() + .await + .collections + .insert(name, collection.clone()); + + Ok(collection) + } + + /// Removes a collection from [`Db`]. + /// + /// The [`Collection`] will still exist in memory for as long as you hold a copy, given it is + /// wrapped in an `Arc`. + pub async fn delete_collection(&mut self, name: &str) { + self.inner.write().await.collections.remove(name); + } + + pub async fn get_collection(&self, name: &str) -> Result>> { + self.inner + .read() + .await + .collections + .get(name) + .ok_or(CollectionError::NotFound.into()) + .cloned() + } + + /// Save database to disk + pub async fn save(&self) -> Result<()> { + let inner = self.inner.read().await; + for (name, collection) in inner.collections.iter() { + let db = bincode::serialize(&*collection.read().await)?; + + fs::write(inner.location.as_path().join(name), db).await?; + } + + Ok(()) + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SimilarityResult { + pub score: f32, + pub embedding: Embedding, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Collection { + /// Dimension of the vectors in the collection + pub dimension: usize, + /// Distance metric used for querying + pub distance: Distance, + /// Embeddings in the collection + #[serde(default)] + pub embeddings: Vec, +} + +impl Collection { + pub fn filter() -> FilterBuilder { + FilterBuilder::new() + } + + pub async fn get( + &self, + query: &[f32], + k: usize, + filter: Option, + ) -> Result> { + let embeddings = if let Some(filter) = filter { + self.embeddings + .iter() + .filter(filter.fn_ref_closure()) + .collect::>() + } else { + self.embeddings.iter().collect::>() + }; + get_similarity(self.distance, &embeddings, query, k).await + } + + pub fn insert(&mut self, embedding: Embedding) -> Result<()> { + if embedding.vector.len() != self.dimension { + return Err(CollectionError::DimensionMismatch.into()); + } + + self.embeddings.push(embedding); + + Ok(()) + } + + pub fn batch_insert(&mut self, embeddings: Vec) -> Result<()> { + if embeddings + .iter() + .any(|embedding| embedding.vector.len() != self.dimension) + { + return Err(CollectionError::DimensionMismatch.into()); + } + self.embeddings.extend(embeddings); + Ok(()) + } + + /// Remove values matching filter. + /// + /// Empties the collection when `filter` is `None`. + pub fn remove(&mut self, filter: Option) -> Result<()> { + if let Some(filter) = filter { + let mut closure = filter.fn_mut_closure(); + self.embeddings.retain(|e| !closure(e)); + } else { + self.embeddings.clear(); + } + Ok(()) + } + + pub fn len(&self) -> usize { + self.embeddings.len() + } + + pub fn is_empty(&self) -> bool { + self.embeddings.is_empty() + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Embedding { + pub id: Uuid, + pub metadata: Option>, + pub vector: Vec, +} + +impl Embedding { + pub fn new(vector: Vec, metadata: Option>) -> Self { + Self { + id: Uuid::new_v4(), + metadata, + vector, + } + } +} + +impl PartialEq for Embedding { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl Eq for Embedding {} + +#[derive(Clone, Debug, PartialEq, PartialOrd, Serialize, Deserialize)] +pub enum Value { + String(String), + Number(f32), +} + +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(s) => write!(f, "{s}"), + Self::Number(n) => write!(f, "{n}"), + } + } +} + +impl Value { + pub fn inner_string(&self) -> Result { + match self { + Self::String(s) => Ok(s.to_owned()), + _ => Err(Error::ValueNotString(self.to_owned())), + } + } +} + +impl TryInto for &Value { + type Error = Error; + + fn try_into(self) -> Result { + if let Value::Number(n) = self { + Ok(n.clone() as usize) + } else { + Err(Error::ValueNotNumber(self.to_owned())) + } + } +} + +impl From for Value { + fn from(value: usize) -> Self { + Self::Number(value as f32) + } +} + +impl From for Value { + fn from(value: u32) -> Self { + Self::Number(value as f32) + } +} + +impl From for Value { + fn from(value: f32) -> Self { + Self::Number(value) + } +} + +impl From<&str> for Value { + fn from(value: &str) -> Self { + Self::String(value.to_owned()) + } +} + +impl From for Value { + fn from(value: String) -> Self { + Self::String(value) + } +} + +#[derive(Debug)] +pub enum Compare { + Eq, + Neq, + Gt, + GtEq, + Lt, + LtEq, +} + +#[derive(Clone, Debug)] +enum Chain { + And, + Or, +} + +pub struct FilterBuilder { + filter: Vec<(String, Compare, Value, Option)>, +} + +impl FilterBuilder { + pub fn new() -> Self { + Self { filter: Vec::new() } + } + + pub fn and(mut self) -> Self { + if let Some(c) = self.filter.last_mut() { + c.3 = Some(Chain::And); + }; + self + } + + pub fn or(mut self) -> Self { + if let Some(c) = self.filter.last_mut() { + c.3 = Some(Chain::Or); + } + self + } + + pub fn comparison(mut self, key: String, op: Compare, value: Value) -> Self { + assert!( + self.filter.last().map(|c| c.3.is_some()).unwrap_or(true), + "Missing chain operator in filter" + ); + self.filter.push((key, op, value, None)); + self + } + + fn fn_mut_closure(self) -> impl FnMut(&Embedding) -> bool { + move |e| { + let mut ret = true; + let mut prev = None; + for condition in &self.filter { + let cond_res = match condition.1 { + Compare::Eq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) == Some(&condition.2)) + .unwrap_or(false), + Compare::Neq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) != Some(&condition.2)) + .unwrap_or(false), + Compare::Gt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) > Some(&condition.2)) + .unwrap_or(false), + Compare::GtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) >= Some(&condition.2)) + .unwrap_or(false), + Compare::Lt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) < Some(&condition.2)) + .unwrap_or(false), + Compare::LtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) <= Some(&condition.2)) + .unwrap_or(false), + }; + if let Some(prev) = prev { + match prev { + Chain::And => ret = ret && cond_res, + Chain::Or => ret = ret || cond_res, + } + } else { + ret = cond_res; + } + prev = condition.3.clone(); + } + ret + } + } + + // XXX: we assume the user will chain filters correctly + fn fn_ref_closure(self) -> impl Fn(&&Embedding) -> bool { + move |e| { + let mut ret = true; + let mut prev = None; + for condition in &self.filter { + let cond_res = match condition.1 { + Compare::Eq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) == Some(&condition.2)) + .unwrap_or(false), + Compare::Neq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) != Some(&condition.2)) + .unwrap_or(false), + Compare::Gt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) > Some(&condition.2)) + .unwrap_or(false), + Compare::GtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) >= Some(&condition.2)) + .unwrap_or(false), + Compare::Lt => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) < Some(&condition.2)) + .unwrap_or(false), + Compare::LtEq => e + .metadata + .as_ref() + .map(|f| f.get(&condition.0) <= Some(&condition.2)) + .unwrap_or(false), + }; + if let Some(prev) = prev { + match prev { + Chain::And => ret = ret && cond_res, + Chain::Or => ret = ret || cond_res, + } + } else { + ret = cond_res; + } + prev = condition.3.clone(); + } + ret + } + } +} + +impl Default for FilterBuilder { + fn default() -> Self { + Self::new() + } +} + +async fn get_similarity( + distance: Distance, + embeddings: &[&Embedding], + query: &[f32], + k: usize, +) -> Result> { + let semaphore = Arc::new(Semaphore::new(8)); + let mut set = JoinSet::new(); + for (index, embedding) in embeddings.iter().enumerate() { + let embedding = (*embedding).clone(); + let query = query.to_owned(); + let permit = semaphore.clone().acquire_owned().await?; + set.spawn_blocking(move || { + let score = distance.compute(&embedding.vector, &query); + drop(permit); + ScoreIndex { score, index } + }); + } + + let mut heap = BinaryHeap::new(); + while let Some(res) = set.join_next().await { + let score_index = res.map_err(Into::::into)?; + if heap.len() < k || score_index < *heap.peek().ok_or(CollectionError::EmptyBinaryHeap)? { + heap.push(score_index); + + if heap.len() > k { + heap.pop(); + } + } + } + Ok(heap + .into_sorted_vec() + .into_iter() + .map(|ScoreIndex { score, index }| SimilarityResult { + score, + embedding: embeddings[index].clone(), + }) + .collect()) +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::*; + + #[tokio::test] + async fn simple_similarity() { + let temp_dir = TempDir::new().expect("failed to create tempt dir"); + let db_path = temp_dir.path().join("embeddings"); + let mut db = match Db::open(db_path).await { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + let col = match db + .create_collection("test".to_owned(), 5, Distance::Cosine) + .await + { + Ok(col) => col, + Err(err) => panic!("{}", err.to_string()), + }; + let embedding = Embedding::new( + vec![0.9999695, 0.76456239, 0.86767905, 0.17577756, 0.9949882], + None, + ); + col.write() + .await + .insert(embedding.clone()) + .expect("faield to insert embedding"); + + let expected = SimilarityResult { + score: 0.7449362, + embedding, + }; + let results = col + .read() + .await + .get( + &[0.5902804, 0.516834, 0.12403694, 0.8444756, 0.4672038], + 1, + None, + ) + .await + .expect("failed to get most similar embeddings"); + let actual = results + .first() + .expect("missing embedding in similarity result"); + assert!((expected.score - actual.score).abs() <= f32::EPSILON); + assert_eq!(expected.embedding.id, actual.embedding.id); + } + + #[tokio::test] + async fn filter() { + let temp_dir = TempDir::new().expect("failed to create tempt dir"); + let db_path = temp_dir.path().join("embeddings"); + let mut db = match Db::open(db_path).await { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + let col = match db + .create_collection("test".to_owned(), 5, Distance::Cosine) + .await + { + Ok(col) => col, + Err(err) => panic!("{}", err.to_string()), + }; + let embedding1 = Embedding::new( + vec![0.5880849, 0.25781349, 0.32253786, 0.80958734, 0.8591076], + Some(HashMap::from([ + ("i".to_owned(), 32.0.into()), + ("j".to_owned(), 10.0.into()), + ])), + ); + col.write() + .await + .insert(embedding1.clone()) + .expect("faield to insert embedding"); + let embedding2 = Embedding::new( + vec![0.43717385, 0.21100248, 0.5068433, 0.9626808, 0.6763327], + Some(HashMap::from([ + ("i".to_owned(), 7.0.into()), + ("j".to_owned(), 100.0.into()), + ])), + ); + col.write() + .await + .insert(embedding2.clone()) + .expect("faield to insert embedding"); + let embedding3 = Embedding::new( + vec![0.2630481, 0.24888718, 0.3375401, 0.92770165, 0.44944693], + Some(HashMap::from([ + ("i".to_owned(), 29.0.into()), + ("j".to_owned(), 16.0.into()), + ])), + ); + col.write() + .await + .insert(embedding3.clone()) + .expect("faield to insert embedding"); + let embedding4 = Embedding::new( + vec![0.7642892, 0.47043378, 0.9035855, 0.31120034, 0.5757918], + Some(HashMap::from([ + ("i".to_owned(), 3.0.into()), + ("j".to_owned(), 110.0.into()), + ])), + ); + col.write() + .await + .insert(embedding4.clone()) + .expect("faield to insert embedding"); + + let results = col + .read() + .await + .get( + &[0.09537213, 0.5104327, 0.69980987, 0.13146928, 0.30541683], + 4, + Some( + Collection::filter() + .comparison("i".to_owned(), Compare::Lt, 25.0.into()) + .and() + .comparison("j".to_owned(), Compare::Gt, 50.0.into()), + ), + ) + .await + .expect("failed to get most similar embeddings"); + let actual_scores: Vec = results.iter().map(|r| r.score).collect(); + let expected_scores: Vec = vec![0.8701641, 0.6552329]; + assert!(expected_scores + .iter() + .zip(actual_scores.iter()) + .all(|(e, a)| { (e - a).abs() <= f32::EPSILON })); + let expected_embeddings = vec![embedding4, embedding2]; + let actual_embeddings: Vec = results.into_iter().map(|r| r.embedding).collect(); + assert_eq!(expected_embeddings, actual_embeddings); + } + + #[tokio::test] + async fn storage() { + let temp_dir = TempDir::new().expect("failed to create tempt dir"); + let db_path = temp_dir.path().join("embeddings"); + + let mut db = match Db::open(db_path.as_path()).await { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + assert!(db.inner.read().await.collections.is_empty()); + assert_eq!(db.inner.read().await.location, db_path); + + let col = match db + .create_collection("test".to_owned(), 5, Distance::Cosine) + .await + { + Ok(col) => col, + Err(err) => panic!("{}", err.to_string()), + }; + let embedding = Embedding::new( + vec![0.9999695, 0.76456239, 0.86767905, 0.17577756, 0.9949882], + None, + ); + col.write() + .await + .insert(embedding.clone()) + .expect("faield to insert embedding"); + + db.save().await.expect("failed to save to disk"); + let db = match Db::open(db_path).await { + Ok(db) => db, + Err(err) => panic!("{}", err.to_string()), + }; + assert_eq!(db.inner.read().await.collections.len(), 1); + let col = db + .get_collection("test") + .await + .expect("failed to get collection"); + assert_eq!(col.read().await.len(), 1); + assert_eq!(col.read().await.distance, Distance::Cosine); + assert_eq!(col.read().await.dimension, 5); + } +} diff --git a/crates/tinyvec-embed/src/error.rs b/crates/tinyvec-embed/src/error.rs new file mode 100644 index 0000000..8dc0f54 --- /dev/null +++ b/crates/tinyvec-embed/src/error.rs @@ -0,0 +1,41 @@ +use std::path::PathBuf; + +use crate::db::Value; + +#[derive(Debug, thiserror::Error)] +pub enum Collection { + #[error("The dimension of the vector doesn't match the dimension of the collection")] + DimensionMismatch, + #[error("attempt to peek an empty binary heap")] + EmptyBinaryHeap, + #[error("invalid path: {0}")] + InvalidPath(PathBuf), + #[error("join error: {0}")] + Join(#[from] tokio::task::JoinError), + #[error("Collection doesn't exist")] + NotFound, + #[error("error sending message in channel")] + Send, + #[error("Collection already exists")] + UniqueViolation, +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("acquire error: {0}")] + Acquire(#[from] tokio::sync::AcquireError), + #[error("bincode error: {0}")] + Bincode(#[from] bincode::Error), + #[error("collection error: {0}")] + Collection(#[from] Collection), + #[error("a file with an invalid name was found in the database directory")] + InvalidFileName, + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("expected value to be a valid number, got: {0}")] + ValueNotNumber(Value), + #[error("expected value to be string, got: {0}")] + ValueNotString(Value), +} + +pub type Result = std::result::Result; diff --git a/crates/tinyvec-embed/src/lib.rs b/crates/tinyvec-embed/src/lib.rs new file mode 100644 index 0000000..32e288f --- /dev/null +++ b/crates/tinyvec-embed/src/lib.rs @@ -0,0 +1,3 @@ +pub mod db; +pub mod error; +pub mod similarity; diff --git a/crates/tinyvec-embed/src/similarity.rs b/crates/tinyvec-embed/src/similarity.rs new file mode 100644 index 0000000..4966644 --- /dev/null +++ b/crates/tinyvec-embed/src/similarity.rs @@ -0,0 +1,51 @@ +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum Distance { + Cosine, +} + +impl Distance { + pub fn compute(&self, a: &[f32], b: &[f32]) -> f32 { + match self { + Distance::Cosine => { + let magnitude_a = a.iter().fold(0.0, |acc, &val| val.mul_add(val, acc)); + let magnitude_b = b.iter().fold(0.0, |acc, &val| val.mul_add(val, acc)); + dot_product(a, b) / (magnitude_a * magnitude_b).sqrt() + } + } + } +} + +fn dot_product(a: &[f32], b: &[f32]) -> f32 { + a.iter().zip(b).fold(0.0, |acc, (x, y)| acc + x * y) +} + +pub struct ScoreIndex { + pub score: f32, + pub index: usize, +} + +impl PartialEq for ScoreIndex { + fn eq(&self, other: &Self) -> bool { + self.score.eq(&other.score) + } +} + +impl Eq for ScoreIndex {} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for ScoreIndex { + fn partial_cmp(&self, other: &Self) -> Option { + // The comparison is intentionally reversed here to make the heap a min-heap + other.score.partial_cmp(&self.score) + } +} + +impl Ord for ScoreIndex { + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +}