Skip to content

Commit 9828708

Browse files
authored
perf: parallelize Remapping::get_many (#314)
No logic changes except: 2nd commit removes walkdir and doesn't sort by file name anymore because order doesn't matter when we run it in parallel
1 parent 0a1c495 commit 9828708

File tree

2 files changed

+101
-91
lines changed

2 files changed

+101
-91
lines changed

crates/artifacts/solc/Cargo.toml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ regex.workspace = true
3131
tokio = { workspace = true, optional = true, features = ["fs"] }
3232
futures-util = { workspace = true, optional = true }
3333

34-
# walkdir
35-
walkdir = { workspace = true, optional = true }
36-
3734
# rayon
3835
rayon = { workspace = true, optional = true }
3936

@@ -48,5 +45,5 @@ foundry-compilers-core = { workspace = true, features = ["test-utils"] }
4845
[features]
4946
async = ["dep:tokio", "dep:futures-util"]
5047
checksum = ["foundry-compilers-core/hasher"]
51-
walkdir = ["dep:walkdir", "foundry-compilers-core/walkdir"]
48+
walkdir = ["rayon", "foundry-compilers-core/walkdir"]
5249
rayon = ["dep:rayon"]

crates/artifacts/solc/src/remappings/find.rs

Lines changed: 100 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
use super::Remapping;
22
use foundry_compilers_core::utils;
3+
use rayon::prelude::*;
34
use std::{
45
collections::{btree_map::Entry, BTreeMap, HashSet},
6+
fs::FileType,
57
path::{Path, PathBuf},
8+
sync::Mutex,
69
};
710

811
const DAPPTOOLS_CONTRACTS_DIR: &str = "src";
@@ -52,6 +55,7 @@ impl Remapping {
5255
/// which would be multiple rededications according to our rules ("governance", "protocol-v2"),
5356
/// are unified into `@aave` by looking at their common ancestor, the root of this subdirectory
5457
/// (`@aave`)
58+
#[instrument(level = "trace", name = "Remapping::find_many")]
5559
pub fn find_many(dir: &Path) -> Vec<Self> {
5660
/// prioritize
5761
/// - ("a", "1/2") over ("a", "1/2/3")
@@ -76,41 +80,30 @@ impl Remapping {
7680
}
7781
}
7882

79-
// all combined remappings from all subdirs
80-
let mut all_remappings = BTreeMap::new();
81-
8283
let is_inside_node_modules = dir.ends_with("node_modules");
84+
let visited_symlink_dirs = Mutex::new(HashSet::new());
8385

84-
let mut visited_symlink_dirs = HashSet::new();
8586
// iterate over all dirs that are children of the root
86-
for dir in walkdir::WalkDir::new(dir)
87-
.sort_by_file_name()
88-
.follow_links(true)
89-
.min_depth(1)
90-
.max_depth(1)
91-
.into_iter()
92-
.filter_entry(|e| !is_hidden(e))
93-
.filter_map(Result::ok)
94-
.filter(|e| e.file_type().is_dir())
95-
{
96-
let depth1_dir = dir.path();
97-
// check all remappings in this depth 1 folder
98-
let candidates = find_remapping_candidates(
99-
depth1_dir,
100-
depth1_dir,
101-
0,
102-
is_inside_node_modules,
103-
&mut visited_symlink_dirs,
104-
);
105-
106-
for candidate in candidates {
107-
if let Some(name) = candidate.window_start.file_name().and_then(|s| s.to_str()) {
108-
insert_prioritized(
109-
&mut all_remappings,
110-
format!("{name}/"),
111-
candidate.source_dir,
112-
);
113-
}
87+
let candidates = read_dir(dir)
88+
.filter(|(_, file_type, _)| file_type.is_dir())
89+
.collect::<Vec<_>>()
90+
.par_iter()
91+
.flat_map_iter(|(dir, _, _)| {
92+
find_remapping_candidates(
93+
dir,
94+
dir,
95+
0,
96+
is_inside_node_modules,
97+
&visited_symlink_dirs,
98+
)
99+
})
100+
.collect::<Vec<_>>();
101+
102+
// all combined remappings from all subdirs
103+
let mut all_remappings = BTreeMap::new();
104+
for candidate in candidates {
105+
if let Some(name) = candidate.window_start.file_name().and_then(|s| s.to_str()) {
106+
insert_prioritized(&mut all_remappings, format!("{name}/"), candidate.source_dir);
114107
}
115108
}
116109

@@ -292,45 +285,33 @@ fn is_lib_dir(dir: &Path) -> bool {
292285
}
293286

294287
/// Returns true if the file is _hidden_
295-
fn is_hidden(entry: &walkdir::DirEntry) -> bool {
296-
entry.file_name().to_str().map(|s| s.starts_with('.')).unwrap_or(false)
288+
fn is_hidden(path: &Path) -> bool {
289+
path.file_name().and_then(|p| p.to_str()).map(|s| s.starts_with('.')).unwrap_or(false)
297290
}
298291

299292
/// Finds all remappings in the directory recursively
300293
///
301-
/// Note: this supports symlinks and will short-circuit if a symlink dir has already been visited, this can occur in pnpm setups: <https://github.com/foundry-rs/foundry/issues/7820>
294+
/// Note: this supports symlinks and will short-circuit if a symlink dir has already been visited,
295+
/// this can occur in pnpm setups: <https://github.com/foundry-rs/foundry/issues/7820>
302296
fn find_remapping_candidates(
303297
current_dir: &Path,
304298
open: &Path,
305299
current_level: usize,
306300
is_inside_node_modules: bool,
307-
visited_symlink_dirs: &mut HashSet<PathBuf>,
301+
visited_symlink_dirs: &Mutex<HashSet<PathBuf>>,
308302
) -> Vec<Candidate> {
303+
trace!("find_remapping_candidates({})", current_dir.display());
304+
309305
// this is a marker if the current root is a candidate for a remapping
310306
let mut is_candidate = false;
311307

312-
// all found candidates
313-
let mut candidates = Vec::new();
314-
315308
// scan all entries in the current dir
316-
for entry in walkdir::WalkDir::new(current_dir)
317-
.sort_by_file_name()
318-
.follow_links(true)
319-
.min_depth(1)
320-
.max_depth(1)
321-
.into_iter()
322-
.filter_entry(|e| !is_hidden(e))
323-
.filter_map(Result::ok)
324-
{
325-
let entry: walkdir::DirEntry = entry;
326-
309+
let mut search = Vec::new();
310+
for (subdir, file_type, path_is_symlink) in read_dir(current_dir) {
327311
// found a solidity file directly the current dir
328-
if !is_candidate
329-
&& entry.file_type().is_file()
330-
&& entry.path().extension() == Some("sol".as_ref())
331-
{
312+
if !is_candidate && file_type.is_file() && subdir.extension() == Some("sol".as_ref()) {
332313
is_candidate = true;
333-
} else if entry.file_type().is_dir() {
314+
} else if file_type.is_dir() {
334315
// if the dir is a symlink to a parent dir we short circuit here
335316
// `walkdir` will catch symlink loops, but this check prevents that we end up scanning a
336317
// workspace like
@@ -339,9 +320,9 @@ fn find_remapping_candidates(
339320
// ├── dep/node_modules
340321
// ├── symlink to `my-package`
341322
// ```
342-
if entry.path_is_symlink() {
343-
if let Ok(target) = utils::canonicalize(entry.path()) {
344-
if !visited_symlink_dirs.insert(target.clone()) {
323+
if path_is_symlink {
324+
if let Ok(target) = utils::canonicalize(&subdir) {
325+
if !visited_symlink_dirs.lock().unwrap().insert(target.clone()) {
345326
// short-circuiting if we've already visited the symlink
346327
return Vec::new();
347328
}
@@ -355,40 +336,46 @@ fn find_remapping_candidates(
355336
}
356337
}
357338

358-
let subdir = entry.path();
359339
// we skip commonly used subdirs that should not be searched for recursively
360-
if !(subdir.ends_with("tests") || subdir.ends_with("test") || subdir.ends_with("demo"))
361-
{
362-
// scan the subdirectory for remappings, but we need a way to identify nested
363-
// dependencies like `ds-token/lib/ds-stop/lib/ds-note/src/contract.sol`, or
364-
// `oz/{tokens,auth}/{contracts, interfaces}/contract.sol` to assign
365-
// the remappings to their root, we use a window that lies between two barriers. If
366-
// we find a solidity file within a window, it belongs to the dir that opened the
367-
// window.
368-
369-
// check if the subdir is a lib barrier, in which case we open a new window
370-
if is_lib_dir(subdir) {
371-
candidates.extend(find_remapping_candidates(
372-
subdir,
373-
subdir,
374-
current_level + 1,
375-
is_inside_node_modules,
376-
visited_symlink_dirs,
377-
));
378-
} else {
379-
// continue scanning with the current window
380-
candidates.extend(find_remapping_candidates(
381-
subdir,
382-
open,
383-
current_level,
384-
is_inside_node_modules,
385-
visited_symlink_dirs,
386-
));
387-
}
340+
if !no_recurse(&subdir) {
341+
search.push(subdir);
388342
}
389343
}
390344
}
391345

346+
// all found candidates
347+
let mut candidates = search
348+
.par_iter()
349+
.flat_map_iter(|subdir| {
350+
// scan the subdirectory for remappings, but we need a way to identify nested
351+
// dependencies like `ds-token/lib/ds-stop/lib/ds-note/src/contract.sol`, or
352+
// `oz/{tokens,auth}/{contracts, interfaces}/contract.sol` to assign
353+
// the remappings to their root, we use a window that lies between two barriers. If
354+
// we find a solidity file within a window, it belongs to the dir that opened the
355+
// window.
356+
357+
// check if the subdir is a lib barrier, in which case we open a new window
358+
if is_lib_dir(subdir) {
359+
find_remapping_candidates(
360+
subdir,
361+
subdir,
362+
current_level + 1,
363+
is_inside_node_modules,
364+
visited_symlink_dirs,
365+
)
366+
} else {
367+
// continue scanning with the current window
368+
find_remapping_candidates(
369+
subdir,
370+
open,
371+
current_level,
372+
is_inside_node_modules,
373+
visited_symlink_dirs,
374+
)
375+
}
376+
})
377+
.collect::<Vec<_>>();
378+
392379
// need to find the actual next window in the event `open` is a lib dir
393380
let window_start = next_nested_window(open, current_dir);
394381
// finally, we need to merge, adjust candidates from the same level and open window
@@ -425,6 +412,32 @@ fn find_remapping_candidates(
425412
candidates
426413
}
427414

415+
/// Returns an iterator over the entries in the directory:
416+
/// `(path, real_file_type, path_is_symlink)`
417+
///
418+
/// File type is the file type of the link if it is a symlink. This mimics the behavior of
419+
/// `walkdir` with `follow_links` set to `true`.
420+
fn read_dir(dir: &Path) -> impl Iterator<Item = (PathBuf, FileType, bool)> {
421+
std::fs::read_dir(dir)
422+
.into_iter()
423+
.flatten()
424+
.filter_map(Result::ok)
425+
.filter_map(|e| {
426+
let path = e.path();
427+
let mut ft = e.file_type().ok()?;
428+
let path_is_symlink = ft.is_symlink();
429+
if path_is_symlink {
430+
ft = std::fs::metadata(&path).ok()?.file_type();
431+
}
432+
Some((path, ft, path_is_symlink))
433+
})
434+
.filter(|(p, _, _)| !is_hidden(p))
435+
}
436+
437+
fn no_recurse(dir: &Path) -> bool {
438+
dir.ends_with("tests") || dir.ends_with("test") || dir.ends_with("demo")
439+
}
440+
428441
/// Counts the number of components between `root` and `current`
429442
/// `dir_distance("root/a", "root/a/b/c") == 2`
430443
fn dir_distance(root: &Path, current: &Path) -> usize {

0 commit comments

Comments
 (0)