Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
32b9975
wip
McPatate Oct 18, 2023
407be56
wip: add starencoder model to generate embeddings
McPatate Dec 11, 2023
c8b77a9
feat: add suppot for tcp connections to enable debugging
McPatate Jan 28, 2024
15859bc
refactor: error handling
McPatate Jan 28, 2024
d0a02eb
refactor: remove unnecessary comment
McPatate Jan 28, 2024
11d2906
feat: add tinyvec-embed
McPatate Feb 6, 2024
307ee39
refactor: replace adaptor strings with enum
McPatate Feb 6, 2024
e04353a
Merge branch 'main' into feat/multi_file_context
McPatate Feb 19, 2024
2586fad
Merge branch 'main' into feat/multi_file_context
McPatate Feb 19, 2024
0c44720
Merge branch 'main' into feat/multi_file_context
McPatate Feb 19, 2024
453b477
test: add similarity & filter test to tinyvec-embed
McPatate Feb 19, 2024
10601b1
feat: make tinyvec-embed thread safe
McPatate Feb 20, 2024
1cd0a05
feat: replace lancedb with tinyvec-embed
McPatate Feb 20, 2024
9d643f5
feat: ignore specfic files
McPatate Feb 20, 2024
660eeba
feat: replace bigcode/starencoder with intfloat/multilingual-e5-small
McPatate Feb 20, 2024
3756fee
feat: add similar snippets to prompt
McPatate Feb 21, 2024
4b14a20
fix: dangling llm-ls process
McPatate Feb 22, 2024
b3245f7
refactor: cleaner shutdown
McPatate Feb 22, 2024
3ccf695
feat: add benchmark
McPatate Feb 26, 2024
9e2f7c0
fix: rework gitignore to be closer to spec
McPatate Feb 27, 2024
702ba67
feat: add llm-ls config file
McPatate Feb 27, 2024
8b87df6
feat: separate embeddings collections for each workspace
McPatate Feb 28, 2024
baedf85
feat: add `ModelConfig` to `LlmLsConfig`
McPatate Feb 28, 2024
6e3d6c0
feat: add strategies for building query embedding vector
McPatate Feb 28, 2024
64a4c38
Added batch embedding computing (#86)
Wats0ns Mar 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,844 changes: 1,423 additions & 421 deletions Cargo.lock

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions crates/gitignore/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "gitignore"
version = "0.1.0"
edition.workspace = true
license.workspace = true
authors.workspace = true

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
glob = "0.3"
thiserror = "1"

[dev-dependencies]
tempdir = "0.3"
284 changes: 284 additions & 0 deletions crates/gitignore/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
use std::{
fmt::Debug,
fs::{canonicalize, File},
io::{BufRead, BufReader},
path::{Path, PathBuf},
};

use glob::{MatchOptions, Pattern};

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("non utf8 path")]
NonUtf8Path,
#[error("path has no parent folder")]
NoParent,
#[error("glob pattern error: {0}")]
Pattern(#[from] glob::PatternError),
}

pub type Result<T> = std::result::Result<T, Error>;

#[derive(Debug)]
pub struct Rule {
negate: bool,
patterns: Vec<Pattern>,
_source_line: usize,
}

impl Rule {
pub fn parse(
mut pattern: String,
base_path: impl AsRef<Path>,
_source_line: usize,
) -> Result<Option<Self>> {
let mut patterns = vec![];
if pattern.trim().is_empty() || pattern.starts_with('#') {
return Ok(None);
}
pattern = pattern.trim_start().to_owned();
let negate = if pattern.starts_with('!') {
pattern.remove(0);
true
} else {
false
};
let directory = if pattern.ends_with('/') {
pattern.pop();
true
} else {
false
};
let anchored = pattern.contains('/');
if pattern.starts_with('/') {
pattern.remove(0);
}
let base_path_str = base_path.as_ref().to_str().ok_or(Error::NonUtf8Path)?;
let base_pattern = if anchored || pattern.starts_with("**") {
format!("{base_path_str}/{pattern}")
} else {
format!("{base_path_str}/**/{pattern}")
};
patterns.push(Pattern::new(&format!("{base_pattern}/**"))?);
if !directory {
patterns.push(Pattern::new(&base_pattern)?);
}
Ok(Some(Self {
negate,
patterns,
_source_line,
}))
}
}

#[derive(Debug)]
pub struct Gitignore {
base_path: PathBuf,
rules: Vec<Rule>,
_source_file: PathBuf,
}

impl Gitignore {
/// Parses a `.gitignore` file at `path`.
///
/// If `path` is a directory, attempts to read `{dir}/.gitignore`.
pub fn parse(path: impl AsRef<Path>) -> Result<Self> {
let mut path = canonicalize(path)?;
if path.is_dir() {
path = path.join(".gitignore");
}
let reader = BufReader::new(File::open(&path)?);
let mut rules = Vec::new();
for (line_nb, line) in reader.lines().enumerate() {
let line = line?;
if let Some(rule) =
Rule::parse(line, path.parent().ok_or(Error::NoParent)?, line_nb + 1)?
{
rules.push(rule);
}
}
Ok(Self {
base_path: path.parent().ok_or(Error::NoParent)?.to_path_buf(),
rules,
_source_file: path,
})
}

/// Checks if a path is ignored.
///
/// Path can be relative within the directory which contains the `.gitignore` file.
///
/// # Errors
///
/// This function will return an error if the file does not exist.
pub fn ignored(&self, path: impl AsRef<Path>) -> Result<bool> {
let path = if path.as_ref().starts_with(&self.base_path) {
path.as_ref().to_path_buf()
} else if path.as_ref().has_root() {
return Ok(false);
} else {
canonicalize(self.base_path.join(path))?
};
let match_opts = MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
};
let path_str = path.to_str().ok_or(Error::NonUtf8Path)?;
let to_match = if path.is_dir() {
format!("{path_str}/")
} else {
path_str.to_owned()
};
for rule in &self.rules {
for pattern in rule.patterns.iter() {
// TODO: handle negation properly
// negation should include
if rule.negate {
continue;
}
if pattern.matches_with(&to_match, match_opts) {
return Ok(true);
}
}
}
Ok(false)
}

/// Add ad hoc rule from a pattern
pub fn add_rule(&mut self, pattern: String) -> Result<()> {
if let Some(rule) = Rule::parse(pattern, &self.base_path, usize::MAX)? {
self.rules.push(rule);
}
Ok(())
}
}

#[cfg(test)]
mod tests {
use std::io::Write;

use tempdir::TempDir;

use super::*;

fn create_gitignore(rules: &str, name: &str) -> (TempDir, Gitignore) {
let temp_dir = TempDir::new(name).unwrap();
std::fs::File::create(temp_dir.path().join("LICENSE")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("config")).unwrap();
std::fs::File::create(temp_dir.path().join("config.yaml")).unwrap();
std::fs::File::create(temp_dir.path().join("Cargo.toml")).unwrap();
std::fs::File::create(temp_dir.path().join("README.md")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("xtask")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("crates/gitignore")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/gitignore/Cargo.toml")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("crates/llm-ls/src")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("crates/llm-ls/config")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/llm-ls/config.yaml")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/llm-ls/Cargo.toml")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/llm-ls/src/main.rs")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("crates/lsp-client/src")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/lsp-client/Cargo.toml")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/lsp-client/src/lib.rs")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("crates/mock_server")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/mock_server/Cargo.toml")).unwrap();
std::fs::create_dir_all(temp_dir.path().join("crates/testbed/src")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/testbed/Cargo.toml")).unwrap();
std::fs::File::create(temp_dir.path().join("crates/testbed/src/main.rs")).unwrap();
std::fs::create_dir_all(
temp_dir
.path()
.join("crates/testbed/repositories/simple/src"),
)
.unwrap();
std::fs::File::create(
temp_dir
.path()
.join("crates/testbed/repositories/simple/src/main.rs"),
)
.unwrap();
let gitignore_path = temp_dir.path().join(name);
std::fs::File::create(&gitignore_path)
.unwrap()
.write_all(rules.as_bytes())
.unwrap();
let gitignore = Gitignore::parse(gitignore_path).unwrap();
(temp_dir, gitignore)
}

#[test]
fn test_regular_relative_pattern() {
let (_temp_dir, gitignore) = create_gitignore("Cargo.toml", "regular_relative_pattern");
assert!(gitignore.ignored("Cargo.toml").unwrap());
assert!(!gitignore.ignored("LICENSE").unwrap());
}

#[test]
fn test_glob_pattern() {
let (_temp_dir, gitignore) = create_gitignore("crates/**/Cargo.toml", "glob_pattern");
assert!(gitignore.ignored("crates/gitignore/Cargo.toml").unwrap());
assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap());
assert!(gitignore.ignored("crates/lsp-client/Cargo.toml").unwrap());
assert!(gitignore.ignored("crates/mock_server/Cargo.toml").unwrap());
assert!(gitignore.ignored("crates/testbed/Cargo.toml").unwrap());
assert!(!gitignore.ignored("crates/llm-ls/src/main.rs").unwrap());
assert!(!gitignore.ignored("crates/lsp-client/src/lib.rs").unwrap());
assert!(!gitignore.ignored("crates/testbed/src/main.rs").unwrap());
}

#[test]
fn test_dir_start_glob_pattern() {
let (_temp_dir, gitignore) = create_gitignore("**/crates/", "start_glob_pattern");
assert!(gitignore.ignored("crates/").unwrap());
assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap());
assert!(gitignore
.ignored("crates/testbed/repositories/simple/src/main.rs")
.unwrap());
assert!(!gitignore.ignored("xtask/").unwrap());
assert!(!gitignore.ignored("README.md").unwrap());
}

#[test]
fn test_dir_relative_path() {
let (_temp_dir, gitignore) = create_gitignore("crates/", "relative_path");
assert!(gitignore.ignored("crates/").unwrap());
assert!(gitignore.ignored("crates/llm-ls/Cargo.toml").unwrap());
assert!(gitignore
.ignored("crates/testbed/repositories/simple/src/main.rs")
.unwrap());
assert!(!gitignore.ignored("xtask/").unwrap());
assert!(!gitignore.ignored("README.md").unwrap());
}

// TODO:
// #[test]
// fn test_negate_pattern() {
// let (_temp_dir, gitignore) = create_gitignore(
// "aaa/*\n\
// !aaa/Cargo.toml",
// "negate_pattern",
// );
// assert!(!gitignore.ignored("aaa/Cargo.toml").unwrap());
// assert!(gitignore.ignored("aaa/config.yaml").unwrap());
// }

#[test]
fn test_ad_hoc_rule_add() {
let (_temp_dir, mut gitignore) = create_gitignore("!Cargo.toml", "adhoc_add");
assert!(!gitignore.ignored("config.yaml").unwrap());
assert!(!gitignore.ignored("Cargo.toml").unwrap());
gitignore.add_rule("config.yaml".to_owned()).unwrap();
assert!(gitignore.ignored("config.yaml").unwrap());
}

#[test]
fn test_anchored_file_or_dir() {
let (_temp_dir, gitignore) = create_gitignore("/config*", "anchored_file_or_dir");
assert!(gitignore.ignored("config.yaml").unwrap());
assert!(gitignore.ignored("config").unwrap());
assert!(!gitignore.ignored("crates/llm-ls/config.yaml").unwrap());
assert!(!gitignore.ignored("crates/llm-ls/config").unwrap());
}
}
14 changes: 14 additions & 0 deletions crates/llm-ls/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,15 @@ edition = "2021"
name = "llm-ls"

[dependencies]
candle = { version = "0.4", package = "candle-core", default-features = false }
candle-nn = "0.4"
candle-transformers = "0.4"
clap = { version = "4", features = ["derive"] }
config = { version = "0.14", features = ["yaml"], default_features = false }
custom-types = { path = "../custom-types" }
futures-util = "0.3"
gitignore = { path = "../gitignore" }
hf-hub = { version = "0.3", features = ["tokio"] }
home = "0.5"
ropey = { version = "1.6", default-features = false, features = [
"simd",
Expand All @@ -19,8 +26,10 @@ reqwest = { version = "0.11", default-features = false, features = [
"rustls-tls",
] }
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9"
serde_json = "1"
thiserror = "1"
tinyvec-embed = { path = "../tinyvec-embed" }
tokenizers = { version = "0.15", default-features = false, features = ["onig"] }
tokio = { version = "1", features = [
"fs",
Expand All @@ -38,6 +47,7 @@ tree-sitter-bash = "0.20"
tree-sitter-c = "0.20"
tree-sitter-cpp = "0.20"
tree-sitter-c-sharp = "0.20"
tree-sitter-css = "0.20"
tree-sitter-elixir = "0.1"
tree-sitter-erlang = "0.4"
tree-sitter-go = "0.20"
Expand All @@ -49,6 +59,7 @@ tree-sitter-kotlin = "0.3.1"
tree-sitter-lua = "0.0.19"
tree-sitter-md = "0.1"
tree-sitter-objc = "3"
tree-sitter-php = "0.22"
tree-sitter-python = "0.20"
tree-sitter-r = "0.19"
tree-sitter-ruby = "0.20"
Expand All @@ -60,3 +71,6 @@ tree-sitter-typescript = "0.20"
[dependencies.uuid]
version = "1.4"
features = ["v4", "fast-rng", "serde"]

[dev-dependencies]
lsp-client = { path = "../lsp-client" }
Loading