Skip to content

Commit ca456be

Browse files
authored
Skip directories to improve performance (#68)
* filter out ignore globs * ignore dirs * bumping version
1 parent 35ae73a commit ca456be

File tree

7 files changed

+91
-33
lines changed

7 files changed

+91
-33
lines changed

Cargo.lock

Lines changed: 13 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "codeowners"
3-
version = "0.2.6"
3+
version = "0.2.7"
44
edition = "2024"
55

66
[profile.release]
@@ -12,6 +12,7 @@ path = "src/lib.rs"
1212
[dependencies]
1313
clap = { version = "4.5.20", features = ["derive"] }
1414
clap_derive = "4.5.18"
15+
crossbeam-channel = "0.5.15"
1516
error-stack = "0.5.0"
1617
enum_dispatch = "0.3.13"
1718
fast-glob = "1.0.0"

dev/run_benchmarks_for_gv.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ echo "To run these benchmarks on your application, you can place this repo next
99

1010
hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_benchmarks_gv.md \
1111
'../rubyatscale/codeowners-rs/target/release/codeowners gv' \
12-
'bin/codeownership validate'
12+
'bin/codeownership validate' \
13+
'bin/codeowners-rs gv'

src/config.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ pub struct Config {
2121

2222
#[serde(default = "default_cache_directory")]
2323
pub cache_directory: String,
24+
25+
#[serde(default = "default_ignore_dirs")]
26+
pub ignore_dirs: Vec<String>,
2427
}
2528

2629
#[allow(dead_code)]
@@ -57,6 +60,23 @@ fn vendored_gems_path() -> String {
5760
"vendored/".to_string()
5861
}
5962

63+
fn default_ignore_dirs() -> Vec<String> {
64+
vec![
65+
".cursor".to_owned(),
66+
".git".to_owned(),
67+
".idea".to_owned(),
68+
".vscode".to_owned(),
69+
".yarn".to_owned(),
70+
"ar_doc".to_owned(),
71+
"db".to_owned(),
72+
"helm".to_owned(),
73+
"log".to_owned(),
74+
"node_modules".to_owned(),
75+
"sorbet".to_owned(),
76+
"tmp".to_owned(),
77+
]
78+
}
79+
6080
#[cfg(test)]
6181
mod tests {
6282
use std::{

src/ownership/for_file_fast.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ mod tests {
303303
unowned_globs: vec![],
304304
vendored_gems_path: vendored_path.to_string(),
305305
cache_directory: "tmp/cache/codeowners".to_string(),
306+
ignore_dirs: vec![],
306307
}
307308
}
308309

src/project_builder.rs

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ use std::{
44
sync::{Arc, Mutex},
55
};
66

7-
use error_stack::{Result, ResultExt};
7+
use error_stack::{Report, Result, ResultExt};
88
use fast_glob::glob_match;
9-
use ignore::{WalkBuilder, WalkParallel, WalkState};
9+
use ignore::{DirEntry, WalkBuilder, WalkParallel, WalkState};
1010
use rayon::iter::{IntoParallelIterator, ParallelIterator};
1111
use tracing::{instrument, warn};
1212

@@ -51,53 +51,81 @@ impl<'a> ProjectBuilder<'a> {
5151

5252
#[instrument(level = "debug", skip_all)]
5353
pub fn build(&mut self) -> Result<Project, Error> {
54-
let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY);
5554
let mut builder = WalkBuilder::new(&self.base_path);
5655
builder.hidden(false);
56+
builder.follow_links(false);
57+
// Prune traversal early: skip heavy and irrelevant directories
58+
let ignore_dirs = self.config.ignore_dirs.clone();
59+
let base_path = self.base_path.clone();
60+
61+
builder.filter_entry(move |entry: &DirEntry| {
62+
let path = entry.path();
63+
let file_name = entry.file_name().to_str().unwrap_or("");
64+
if let Some(ft) = entry.file_type() {
65+
if ft.is_dir() {
66+
if let Ok(rel) = path.strip_prefix(&base_path) {
67+
if rel.components().count() == 1 && ignore_dirs.iter().any(|d| *d == file_name) {
68+
return false;
69+
}
70+
}
71+
}
72+
}
73+
74+
true
75+
});
76+
5777
let walk_parallel: WalkParallel = builder.build_parallel();
5878

59-
let collected = Arc::new(Mutex::new(Vec::with_capacity(INITIAL_VECTOR_CAPACITY)));
60-
let collected_for_threads = Arc::clone(&collected);
79+
let (tx, rx) = crossbeam_channel::unbounded::<EntryType>();
80+
let error_holder: Arc<Mutex<Option<Report<Error>>>> = Arc::new(Mutex::new(None));
81+
let error_holder_for_threads = Arc::clone(&error_holder);
82+
83+
let this: &ProjectBuilder<'a> = self;
6184

6285
walk_parallel.run(move || {
63-
let collected = Arc::clone(&collected_for_threads);
86+
let error_holder = Arc::clone(&error_holder_for_threads);
87+
let tx = tx.clone();
6488
Box::new(move |res| {
6589
if let Ok(entry) = res {
66-
if let Ok(mut v) = collected.lock() {
67-
v.push(entry);
90+
match this.build_entry_type(entry) {
91+
Ok(entry_type) => {
92+
let _ = tx.send(entry_type);
93+
}
94+
Err(report) => {
95+
if let Ok(mut slot) = error_holder.lock() {
96+
if slot.is_none() {
97+
*slot = Some(report);
98+
}
99+
}
100+
}
68101
}
69102
}
70103
WalkState::Continue
71104
})
72105
});
73106

74-
// Process sequentially with &mut self without panicking on Arc/Mutex unwraps
75-
let collected_entries = match Arc::try_unwrap(collected) {
76-
// We are the sole owner of the Arc
107+
// Take ownership of the collected entry types
108+
let entry_types: Vec<EntryType> = rx.iter().collect();
109+
110+
// If any error occurred while building entry types, return it
111+
let maybe_error = match Arc::try_unwrap(error_holder) {
77112
Ok(mutex) => match mutex.into_inner() {
78-
// Mutex not poisoned
79-
Ok(entries) => entries,
80-
// Recover entries even if the mutex was poisoned
113+
Ok(err_opt) => err_opt,
81114
Err(poisoned) => poisoned.into_inner(),
82115
},
83-
// There are still other Arc references; lock and take the contents
84116
Err(arc) => match arc.lock() {
85-
Ok(mut guard) => std::mem::take(&mut *guard),
86-
// Recover guard even if poisoned, then take contents
87-
Err(poisoned) => {
88-
let mut guard = poisoned.into_inner();
89-
std::mem::take(&mut *guard)
90-
}
117+
Ok(mut guard) => guard.take(),
118+
Err(poisoned) => poisoned.into_inner().take(),
91119
},
92120
};
93-
for entry in collected_entries {
94-
entry_types.push(self.build_entry_type(entry)?);
121+
if let Some(report) = maybe_error {
122+
return Err(report);
95123
}
96124

97125
self.build_project_from_entry_types(entry_types)
98126
}
99127

100-
fn build_entry_type(&mut self, entry: ignore::DirEntry) -> Result<EntryType, Error> {
128+
fn build_entry_type(&self, entry: ignore::DirEntry) -> Result<EntryType, Error> {
101129
let absolute_path = entry.path();
102130

103131
let is_dir = entry.file_type().ok_or(Error::Io).change_context(Error::Io)?.is_dir();

src/project_file_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ impl<'a> ProjectFileBuilder<'a> {
2121
Self { global_cache }
2222
}
2323

24-
pub(crate) fn build(&mut self, path: PathBuf) -> ProjectFile {
24+
pub(crate) fn build(&self, path: PathBuf) -> ProjectFile {
2525
if let Ok(Some(cached_project_file)) = self.get_project_file_from_cache(&path) {
2626
return cached_project_file;
2727
}

0 commit comments

Comments
 (0)