diff --git a/.importlinter b/.importlinter index c58f0d92..097c591d 100644 --- a/.importlinter +++ b/.importlinter @@ -21,3 +21,4 @@ ignore_imports = grimp.application.graph -> grimp grimp.adaptors.filesystem -> grimp grimp.application.scanning -> grimp + grimp.application.usecases -> grimp diff --git a/benchmark_build_graph.py b/benchmark_build_graph.py new file mode 100644 index 00000000..aa52e036 --- /dev/null +++ b/benchmark_build_graph.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +"""Benchmark build_graph vs build_graph_rust.""" + +import argparse +import os +import shutil +import sys +import time +from dataclasses import dataclass +from typing import Callable + +import grimp + + +@dataclass +class BenchmarkResult: + """Result of a single benchmark run.""" + + name: str + elapsed: float + modules: int + imports: int + + +def run_benchmark( + name: str, + build_func: Callable, + package_name: str, + cache_dir: str | None, +) -> BenchmarkResult: + """Run a single benchmark and return the result.""" + print(f"\n{name}:") + start = time.perf_counter() + graph = build_func(package_name, cache_dir=cache_dir) + elapsed = time.perf_counter() - start + + modules = len(graph.modules) + imports = len(graph.find_matching_direct_imports(import_expression="** -> **")) + + print(f" Time: {elapsed:.3f}s") + print(f" Modules: {modules}") + print(f" Imports: {imports}") + + return BenchmarkResult(name, elapsed, modules, imports) + + +def cleanup_cache_dir(cache_dir: str) -> None: + """Remove cache directory.""" + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + + +def print_comparison( + py_results: list[BenchmarkResult], rust_results: list[BenchmarkResult] +) -> None: + """Print comparison of benchmark results.""" + py_no_cache, py_cold, py_warm = py_results + rust_no_cache, rust_cold, rust_warm = rust_results + + print("\n" + "=" * 60) + print("Comparison:") + print(f" Python (no cache): {py_no_cache.elapsed:.3f}s") + print( + f" Python (cold cache): {py_cold.elapsed:.3f}s " + f"({py_no_cache.elapsed / py_cold.elapsed:.2f}x speedup)" + ) + print( + f" Python (warm cache): {py_warm.elapsed:.3f}s " + f"({py_no_cache.elapsed / py_warm.elapsed:.2f}x speedup)" + ) + print( + f" Rust (no cache): {rust_no_cache.elapsed:.3f}s " + f"({py_no_cache.elapsed / rust_no_cache.elapsed:.2f}x vs Python no cache)" + ) + print( + f" Rust (cold cache): {rust_cold.elapsed:.3f}s " + f"({py_no_cache.elapsed / rust_cold.elapsed:.2f}x vs Python no cache)" + ) + print( + f" Rust (warm cache): {rust_warm.elapsed:.3f}s " + f"({py_no_cache.elapsed / rust_warm.elapsed:.2f}x vs Python no cache)" + ) + print(f"\n Python cache speedup: {py_no_cache.elapsed / py_warm.elapsed:.2f}x") + print(f" Rust cache speedup: {rust_no_cache.elapsed / rust_warm.elapsed:.2f}x") + + # Verify correctness + if py_no_cache.modules != rust_no_cache.modules: + print( + f"\nāš ļø Warning: Module count mismatch " + f"({py_no_cache.modules} vs {rust_no_cache.modules})" + ) + if py_no_cache.imports != rust_no_cache.imports: + print( + f"āš ļø Warning: Import count mismatch ({py_no_cache.imports} vs {rust_no_cache.imports})" + ) + + +def benchmark_build_graph(package_name: str, working_dir: str | None = None) -> None: + """Benchmark both graph building implementations.""" + if working_dir: + os.chdir(working_dir) + print(f"Changed directory to: {working_dir}") + # Add working directory to Python path + if working_dir not in sys.path: + sys.path.insert(0, working_dir) + print(f"Added to PYTHONPATH: {working_dir}\n") + + print(f"Benchmarking graph building for package: {package_name}") + print("=" * 60) + + cache_dir = ".grimp_cache_benchmark" + + # Benchmark Python version + py_no_cache = run_benchmark( + "Python version without cache (build_graph)", + grimp.build_graph, + package_name, + None, + ) + + cleanup_cache_dir(cache_dir) + py_cold = run_benchmark( + "Python version with cache - first run (cold cache)", + grimp.build_graph, + package_name, + cache_dir, + ) + + py_warm = run_benchmark( + "Python version with cache - second run (warm cache)", + grimp.build_graph, + package_name, + cache_dir, + ) + + # Benchmark Rust version + rust_no_cache = run_benchmark( + "Rust version without cache (build_graph_rust)", + grimp.build_graph_rust, + package_name, + None, + ) + + cleanup_cache_dir(cache_dir) + rust_cold = run_benchmark( + "Rust version with cache - first run (cold cache)", + grimp.build_graph_rust, + package_name, + cache_dir, + ) + + rust_warm = run_benchmark( + "Rust version with cache - second run (warm cache)", + grimp.build_graph_rust, + package_name, + cache_dir, + ) + + cleanup_cache_dir(cache_dir) + + # Print comparison + print_comparison( + [py_no_cache, py_cold, py_warm], + [rust_no_cache, rust_cold, rust_warm], + ) + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark build_graph vs build_graph_rust") + parser.add_argument("package", help="Package name to analyze") + parser.add_argument("-d", "--directory", help="Working directory to change to before running") + + args = parser.parse_args() + + try: + benchmark_build_graph(args.package, args.directory) + except Exception as e: + print(f"\nError: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/justfile b/justfile index b0db10a3..b2622a90 100644 --- a/justfile +++ b/justfile @@ -12,6 +12,11 @@ install-precommit: compile: @uv run maturin develop +# Compiles the Rust code in release mode. +[group('testing')] +compile-release: + @uv run maturin develop --release + # Compiles Rust, then runs Rust and Python tests. [group('testing')] compile-and-test: @@ -169,4 +174,7 @@ full-check: @just lint @just build-docs @just test-all - @echo 'šŸ‘ {{GREEN}} Linting, docs and tests all good.{{NORMAL}}' \ No newline at end of file + @echo 'šŸ‘ {{GREEN}} Linting, docs and tests all good.{{NORMAL}}' + +benchmark-build-graph-rust-vs-python package_name package_dir: compile-release + uv run benchmark_build_graph.py {{package_name}} -d {{package_dir}} diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a00f0b1b..51f21a15 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -7,12 +7,17 @@ name = "_rustgrimp" version = "0.1.0" dependencies = [ "bimap", + "bincode", "const_format", + "crossbeam", "derive-new", "encoding_rs", + "filetime", "getset", + "ignore", "indexmap 2.11.0", "itertools 0.14.0", + "map-macro", "parameterized", "pyo3", "rayon", @@ -27,6 +32,7 @@ dependencies = [ "slotmap", "string-interner", "tap", + "tempfile", "thiserror", "unindent", ] @@ -52,6 +58,26 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + [[package]] name = "bitflags" version = "2.9.4" @@ -95,6 +121,28 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -114,6 +162,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -152,6 +209,34 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + [[package]] name = "foldhash" version = "0.1.5" @@ -178,6 +263,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "getset" version = "0.1.6" @@ -190,6 +287,19 @@ dependencies = [ "syn", ] +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -211,6 +321,22 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -279,12 +405,35 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +[[package]] +name = "libredox" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "log" version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "map-macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb950a42259642e5a3483115aca87eebed2a64886993463af9c9739c205b8d3a" + [[package]] name = "memchr" version = "2.7.5" @@ -481,6 +630,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "rand" version = "0.8.5" @@ -508,7 +663,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.16", ] [[package]] @@ -531,6 +686,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.11.2" @@ -632,12 +796,34 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.219" @@ -737,6 +923,19 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + [[package]] name = "thiserror" version = "2.0.16" @@ -833,18 +1032,153 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + [[package]] name = "zerocopy" version = "0.8.26" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 4eb4e139..fd1097cc 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -29,6 +29,9 @@ serde_json = "1.0.137" serde_yaml = "0.9" unindent = "0.2.4" encoding_rs = "0.8.35" +ignore = "0.4" +crossbeam = "0.8" +bincode = "2.0.0-rc.3" [dependencies.pyo3] version = "0.26.0" @@ -40,3 +43,6 @@ default = ["extension-module"] [dev-dependencies] parameterized = "2.0.0" serde_json = "1.0.143" +tempfile = "3.14.0" +filetime = "0.2.26" +map-macro = "0.3.0" diff --git a/rust/src/errors.rs b/rust/src/errors.rs index 2a894edb..cd06f076 100644 --- a/rust/src/errors.rs +++ b/rust/src/errors.rs @@ -1,6 +1,6 @@ use crate::exceptions; use pyo3::PyErr; -use pyo3::exceptions::PyValueError; +use pyo3::exceptions::{PyFileNotFoundError, PyIOError, PyValueError}; use ruff_python_parser::ParseError as RuffParseError; use thiserror::Error; @@ -36,6 +36,21 @@ pub enum GrimpError { #[error("Could not use corrupt cache file {0}.")] CorruptCache(String), + + #[error("Error walking directory: {error}")] + WalkDirError { error: String }, + + #[error("Failed to read file {path}: {error}")] + FileReadError { path: String, error: String }, + + #[error("Failed to get file metadata for {path}: {error}")] + FileMetadataError { path: String, error: String }, + + #[error("Failed to write cache file {path}: {error}")] + CacheWriteError { path: String, error: String }, + + #[error("Package directory does not exist: {0}")] + PackageDirectoryNotFound(String), } pub type GrimpResult = Result; @@ -55,6 +70,13 @@ impl From for PyErr { line_number, text, .. } => PyErr::new::((line_number, text)), GrimpError::CorruptCache(_) => exceptions::CorruptCache::new_err(value.to_string()), + GrimpError::WalkDirError { .. } => PyIOError::new_err(value.to_string()), + GrimpError::FileReadError { .. } => PyIOError::new_err(value.to_string()), + GrimpError::FileMetadataError { .. } => PyIOError::new_err(value.to_string()), + GrimpError::CacheWriteError { .. } => PyIOError::new_err(value.to_string()), + GrimpError::PackageDirectoryNotFound(_) => { + PyFileNotFoundError::new_err(value.to_string()) + } } } } diff --git a/rust/src/graph/builder/imports_cache.rs b/rust/src/graph/builder/imports_cache.rs new file mode 100644 index 00000000..fea03fd0 --- /dev/null +++ b/rust/src/graph/builder/imports_cache.rs @@ -0,0 +1,169 @@ +use std::collections::HashMap; +use std::fs; +use std::io::{Read as _, Write as _}; +use std::path::{Path, PathBuf}; + +use bincode::{Decode, Encode}; + +use crate::errors::{GrimpError, GrimpResult}; +use crate::import_parsing::ImportedObject; + +/// Cache for storing parsed import information. +#[derive(Debug, Clone)] +pub struct ImportsCache { + cache_dir: PathBuf, + // Map of package name to package imports. + cache: HashMap>, +} + +impl ImportsCache { + /// Get cached imports for a module if they exist and the mtime matches. + pub fn get_imports( + &self, + package_name: &str, + module_name: &str, + mtime_secs: i64, + ) -> Option> { + let package_cache = self.cache.get(package_name)?; + let cached = package_cache.get(module_name)?; + if cached.mtime_secs() == mtime_secs { + Some(cached.imported_objects().to_vec()) + } else { + None + } + } + + /// Store parsed imports for a module. + pub fn set_imports( + &mut self, + package_name: String, + module_name: String, + mtime_secs: i64, + imports: Vec, + ) { + self.cache + .entry(package_name) + .or_default() + .insert(module_name, CachedImports::new(mtime_secs, imports)); + } + + /// Save cache to disk. + pub fn save(&self) -> GrimpResult<()> { + fs::create_dir_all(&self.cache_dir).map_err(|e| GrimpError::CacheWriteError { + path: self.cache_dir.display().to_string(), + error: e.to_string(), + })?; + + // Write marker files if they don't exist + self.write_marker_files_if_missing()?; + + for (package_name, package_cache) in &self.cache { + let cache_file = self + .cache_dir + .join(format!("{}.imports.bincode", package_name)); + + let encoded = bincode::encode_to_vec(package_cache, bincode::config::standard()) + .map_err(|e| GrimpError::CacheWriteError { + path: cache_file.display().to_string(), + error: e.to_string(), + })?; + + let mut file = + fs::File::create(&cache_file).map_err(|e| GrimpError::CacheWriteError { + path: cache_file.display().to_string(), + error: e.to_string(), + })?; + + file.write_all(&encoded) + .map_err(|e| GrimpError::CacheWriteError { + path: cache_file.display().to_string(), + error: e.to_string(), + })?; + } + + Ok(()) + } + + /// Write marker files (.gitignore and CACHEDIR.TAG) if they don't already exist. + fn write_marker_files_if_missing(&self) -> GrimpResult<()> { + let marker_files = [ + (".gitignore", "# Automatically created by Grimp.\n*"), + ( + "CACHEDIR.TAG", + "Signature: 8a477f597d28d172789f06886806bc55\n\ + # This file is a cache directory tag automatically created by Grimp.\n\ + # For information about cache directory tags see https://bford.info/cachedir/", + ), + ]; + + for (filename, contents) in &marker_files { + let full_path = self.cache_dir.join(filename); + if !full_path.exists() { + fs::write(&full_path, contents).map_err(|e| GrimpError::CacheWriteError { + path: full_path.display().to_string(), + error: e.to_string(), + })?; + } + } + + Ok(()) + } + + /// Load cache from disk. + pub fn load(cache_dir: &Path, package_names: &[String]) -> Self { + let mut cache = HashMap::new(); + + for package_name in package_names { + let cache_file = cache_dir.join(format!("{}.imports.bincode", package_name)); + + let package_cache = if let Ok(mut file) = fs::File::open(&cache_file) { + let mut buffer = Vec::new(); + if file.read_to_end(&mut buffer).is_ok() { + if let Ok(decoded) = bincode::decode_from_slice::< + HashMap, + _, + >(&buffer, bincode::config::standard()) + { + decoded.0 + } else { + HashMap::new() + } + } else { + HashMap::new() + } + } else { + HashMap::new() + }; + + cache.insert(package_name.clone(), package_cache); + } + + ImportsCache { + cache, + cache_dir: cache_dir.to_path_buf(), + } + } +} + +#[derive(Debug, Clone, Encode, Decode)] +struct CachedImports { + mtime_secs: i64, + imported_objects: Vec, +} + +impl CachedImports { + fn new(mtime_secs: i64, imported_objects: Vec) -> Self { + Self { + mtime_secs, + imported_objects, + } + } + + fn mtime_secs(&self) -> i64 { + self.mtime_secs + } + + fn imported_objects(&self) -> &[ImportedObject] { + &self.imported_objects + } +} diff --git a/rust/src/graph/builder/mod.rs b/rust/src/graph/builder/mod.rs new file mode 100644 index 00000000..bda363ca --- /dev/null +++ b/rust/src/graph/builder/mod.rs @@ -0,0 +1,374 @@ +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::PathBuf; + +use derive_new::new; +use ignore::WalkBuilder; +use rayon::prelude::*; + +use crate::errors::{GrimpError, GrimpResult}; +use crate::graph::Graph; +use crate::import_parsing::{ImportedObject, parse_imports_from_code}; + +mod imports_cache; +use imports_cache::ImportsCache; + +mod utils; +use utils::{ + ResolvedImport, distill_external_module, is_internal, is_package, path_to_module_name, + resolve_internal_module, resolve_relative_import, +}; + +mod read_python_file; +use read_python_file::read_python_file; + +#[derive(Debug, Clone, new)] +pub struct PackageSpec { + #[new(into)] + name: String, + #[new(into)] + directory: PathBuf, +} + +pub fn build_graph( + packages: &[PackageSpec], + include_external_packages: bool, + exclude_type_checking_imports: bool, + cache_dir: Option<&PathBuf>, +) -> GrimpResult { + // Check all package directories exist + for package in packages { + if !package.directory.exists() { + return Err(GrimpError::PackageDirectoryNotFound( + package.directory.display().to_string(), + )); + } + } + let package_names: HashSet = packages.iter().map(|p| p.name.clone()).collect(); + + // Discover all modules + let discovered_modules = discover_modules(packages)?; + + // Parse all modules in parallel + let parsed_modules = parse_modules(&discovered_modules, cache_dir)?; + + // Resolve imports and assemble graph + let imports_by_module = resolve_imports( + &parsed_modules, + include_external_packages, + exclude_type_checking_imports, + &package_names, + ); + + Ok(assemble_graph(&imports_by_module, &package_names)) +} + +#[derive(Debug, Clone)] +struct FoundModule { + package_name: String, + name: String, + path: PathBuf, + is_package: bool, + mtime_secs: i64, +} + +#[derive(Debug)] +struct ParsedModule { + module: FoundModule, + imported_objects: Vec, +} + +/// Discover all Python modules in one or more packages. +/// +/// Walks all package directories to find Python files and returns +/// a vector of discovered modules with their metadata. +fn discover_modules(packages: &[PackageSpec]) -> GrimpResult> { + let packages: Vec = packages.to_vec(); + + let mut builder = WalkBuilder::new(&packages[0].directory); + for package in &packages[1..] { + builder.add(&package.directory); + } + builder + .standard_filters(false) // Don't use gitignore or other filters + .hidden(true) // Ignore hidden files/directories + .filter_entry(|entry| { + // Allow Python files (but skip files with multiple dots like dotted.module.py) + if entry.file_type().is_some_and(|ft| ft.is_file()) { + if entry.path().extension().and_then(|s| s.to_str()) == Some("py") { + // Check if filename has multiple dots (invalid Python module names) + if let Some(file_name) = entry.file_name().to_str() { + return file_name.matches('.').count() == 1; // Only the .py extension + } + } + return false; + } + + // For directories, only descend if they contain __init__.py + if entry.file_type().is_some_and(|ft| ft.is_dir()) { + let init_path = entry.path().join("__init__.py"); + return init_path.exists(); + } + + false + }); + + let mut discovered_modules = Vec::new(); + for result in builder.build() { + let entry = result.map_err(|e| GrimpError::WalkDirError { + error: format!("Error walking directory: {}", e), + })?; + + let path = entry.path(); + + // Only process files, not directories + if !path.is_file() { + continue; + } + + // Find which package this file belongs to by checking if path starts with package directory + let package = packages + .iter() + .find(|pkg| path.starts_with(&pkg.directory)) + .unwrap(); + + if let Some(module_name) = path_to_module_name(path, package) { + let is_package = is_package(path); + + // Get mtime + let mtime_secs = fs::metadata(path) + .and_then(|m| m.modified()) + .ok() + .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|d| d.as_secs() as i64) + .unwrap_or(0); + + discovered_modules.push(FoundModule { + package_name: package.name.clone(), + name: module_name, + path: path.to_owned(), + is_package, + mtime_secs, + }); + } + } + + Ok(discovered_modules) +} + +/// Parse all discovered modules in parallel using rayon. +/// +/// # Error Handling +/// +/// If any module fails to parse, we return the first error encountered and stop processing. +/// +/// # Returns +/// +/// Returns a vector of parsed modules, or the first error encountered. +fn parse_modules( + modules: &[FoundModule], + cache_dir: Option<&PathBuf>, +) -> GrimpResult> { + // Load cache for all packages if available + let mut cache: Option = cache_dir.map(|dir| { + let package_names: Vec = modules + .iter() + .map(|m| m.package_name.clone()) + .collect::>() + .into_iter() + .collect(); + ImportsCache::load(dir, &package_names) + }); + + // Parse all modules in parallel using rayon + let parsed_modules: Result, GrimpError> = modules + .par_iter() + .map(|module| parse_module_imports(module, cache.as_ref())) + .collect(); + + let parsed_modules = parsed_modules?; + + // Update and save cache + if let Some(cache) = &mut cache { + for parsed in &parsed_modules { + cache.set_imports( + parsed.module.package_name.clone(), + parsed.module.name.clone(), + parsed.module.mtime_secs, + parsed.imported_objects.clone(), + ); + } + cache.save()?; + } + + Ok(parsed_modules) +} + +fn parse_module_imports( + module: &FoundModule, + cache: Option<&ImportsCache>, +) -> GrimpResult { + // Check if we have a cached version with matching mtime + if let Some(cache) = cache + && let Some(imported_objects) = + cache.get_imports(&module.package_name, &module.name, module.mtime_secs) + { + // Cache hit - use cached imports + return Ok(ParsedModule { + module: module.clone(), + imported_objects, + }); + } + + // Cache miss or file modified - parse the file + let code = read_python_file(&module.path)?; + + let imported_objects = parse_imports_from_code(&code, module.path.to_str().unwrap_or(""))?; + + Ok(ParsedModule { + module: module.clone(), + imported_objects, + }) +} + +fn resolve_imports( + parsed_modules: &[ParsedModule], + include_external_packages: bool, + exclude_type_checking_imports: bool, + packages: &HashSet, +) -> HashMap> { + let all_modules: HashSet = parsed_modules + .iter() + .map(|module| module.module.name.clone()) + .collect(); + + let mut imports_by_module = HashMap::new(); + for parsed_module in parsed_modules { + let mut resolved_imports = HashSet::new(); + + // Resolve each imported object + for imported_object in &parsed_module.imported_objects { + // Skip type checking imports if requested + if exclude_type_checking_imports && imported_object.typechecking_only { + continue; + } + + // Resolve relative imports to absolute + let absolute_import_name = resolve_relative_import( + &parsed_module.module.name, + parsed_module.module.is_package, + &imported_object.name, + ); + + // Try to resolve as internal module first + if let Some(internal_module) = + resolve_internal_module(&absolute_import_name, &all_modules) + { + resolved_imports.insert(ResolvedImport { + importer: parsed_module.module.name.to_string(), + imported: internal_module, + line_number: imported_object.line_number, + line_contents: imported_object.line_contents.clone(), + }); + } else if include_external_packages { + // Try to resolve as an external module + if let Some(external_module) = + distill_external_module(&absolute_import_name, packages) + { + resolved_imports.insert(ResolvedImport { + importer: parsed_module.module.name.to_string(), + imported: external_module, + line_number: imported_object.line_number, + line_contents: imported_object.line_contents.clone(), + }); + } + } + } + + imports_by_module.insert(parsed_module.module.name.clone(), resolved_imports); + } + + imports_by_module +} + +fn assemble_graph( + imports_by_module: &HashMap>, + package_names: &HashSet, +) -> Graph { + let mut graph = Graph::default(); + + // Add all modules and their imports + for (module_name, imports) in imports_by_module { + // Add the module itself and get its token + let importer_token = graph.get_or_add_module(module_name).token(); + + for import in imports { + // Add the imported module + let imported_token = if is_internal(&import.imported, package_names) { + graph.get_or_add_module(&import.imported).token() + } else { + graph.get_or_add_squashed_module(&import.imported).token() + }; + + // Add the import with details + graph.add_detailed_import( + importer_token, + imported_token, + import.line_number as u32, + &import.line_contents, + ); + } + } + + graph +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::{DEFAULT_MTIME, TempFileSystemBuilder}; + use map_macro::hash_set; + + #[test] + fn test_discover_modules_happy_path() { + const SOME_MTIME: i64 = 12340000; + + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + not-a-python-file.txt + .hidden + foo/ + __init__.py + one.py + two/ + __init__.py + green.py + blue.py + "#, + ) + .with_file_mtime_map([("mypackage/foo/one.py", SOME_MTIME)]) + .build() + .unwrap(); + + let result = + discover_modules(&[PackageSpec::new("mypackage", temp_fs.join("mypackage"))]).unwrap(); + + assert_eq!(result.len(), 6); + assert_eq!( + result + .iter() + .map(|m| (m.name.as_str(), m.is_package, m.mtime_secs)) + .collect::>(), + hash_set! { + ("mypackage", true, DEFAULT_MTIME), + ("mypackage.foo", true, DEFAULT_MTIME), + ("mypackage.foo.one", false, SOME_MTIME), + ("mypackage.foo.two", true, DEFAULT_MTIME), + ("mypackage.foo.two.green", false, DEFAULT_MTIME), + ("mypackage.foo.two.blue", false, DEFAULT_MTIME), + } + ); + } +} diff --git a/rust/src/graph/builder/read_python_file.rs b/rust/src/graph/builder/read_python_file.rs new file mode 100644 index 00000000..0d99d76d --- /dev/null +++ b/rust/src/graph/builder/read_python_file.rs @@ -0,0 +1,103 @@ +use std::{fs, io::Read, path::Path}; + +use encoding_rs::Encoding; + +use crate::errors::{GrimpError, GrimpResult}; + +/// Read a Python source file with proper encoding detection. +/// +/// Python PEP 263 specifies that encoding can be declared in the first or second line +/// in the format: `# coding: ` or `# -*- coding: -*-` +/// +/// This function: +/// 1. Reads the file as bytes +/// 2. Checks the first two lines for an encoding declaration +/// 3. Decodes the file using the detected encoding (or UTF-8 as default) +pub fn read_python_file(path: &Path) -> GrimpResult { + // Read file as bytes + let mut file = fs::File::open(path).map_err(|e| GrimpError::FileReadError { + path: path.display().to_string(), + error: e.to_string(), + })?; + + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes) + .map_err(|e| GrimpError::FileReadError { + path: path.display().to_string(), + error: e.to_string(), + })?; + + // Detect encoding from first two lines + let encoding = detect_python_encoding(&bytes); + + // Decode using detected encoding + let (decoded, _encoding_used, had_errors) = encoding.decode(&bytes); + + if had_errors { + return Err(GrimpError::FileReadError { + path: path.display().to_string(), + error: format!("Failed to decode file with encoding {}", encoding.name()), + }); + } + + Ok(decoded.into_owned()) +} + +/// Detect Python source file encoding from the first two lines. +/// +/// Looks for patterns like: +/// - `# coding: ` +/// - `# -*- coding: -*-` +/// - `# coding=` +fn detect_python_encoding(bytes: &[u8]) -> &'static Encoding { + // Read first two lines as ASCII (encoding declarations must be ASCII-compatible) + let mut line_count = 0; + let mut line_start = 0; + + for (i, &byte) in bytes.iter().enumerate() { + if byte == b'\n' { + line_count += 1; + if line_count <= 2 { + // Check this line for encoding declaration + let line = &bytes[line_start..i]; + if let Some(encoding) = extract_encoding_from_line(line) { + return encoding; + } + line_start = i + 1; + } else { + break; + } + } + } + + // Default to UTF-8 + encoding_rs::UTF_8 +} + +/// Extract encoding from a single line if it contains an encoding declaration. +fn extract_encoding_from_line(line: &[u8]) -> Option<&'static Encoding> { + // Convert line to string (should be ASCII for encoding declarations) + let line_str = std::str::from_utf8(line).ok()?; + + // Look for "coding:" or "coding=" + if let Some(pos) = line_str + .find("coding:") + .or_else(|| line_str.find("coding=")) + { + let after_coding = &line_str[pos + 7..]; // Skip "coding:" or "coding=" + + // Extract encoding name (alphanumeric, dash, underscore until whitespace or special char) + let encoding_name: String = after_coding + .trim_start() + .chars() + .take_while(|c| c.is_alphanumeric() || *c == '-' || *c == '_') + .collect(); + + if !encoding_name.is_empty() { + // Try to get the encoding + return Encoding::for_label(encoding_name.as_bytes()); + } + } + + None +} diff --git a/rust/src/graph/builder/utils.rs b/rust/src/graph/builder/utils.rs new file mode 100644 index 00000000..b25003fd --- /dev/null +++ b/rust/src/graph/builder/utils.rs @@ -0,0 +1,170 @@ +use std::collections::HashSet; +use std::path::Path; + +use crate::graph::builder::PackageSpec; + +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct ResolvedImport { + pub importer: String, + pub imported: String, + pub line_number: usize, + pub line_contents: String, +} + +/// Check if a module filename represents a package (i.e., __init__.py) +pub fn is_package(module_path: &Path) -> bool { + module_path + .file_name() + .and_then(|name| name.to_str()) + .map(|name| name == "__init__.py") + .unwrap_or(false) +} + +/// Check if a module is a descendant of another module. +pub fn is_descendant(module_name: &str, potential_ancestor: &str) -> bool { + module_name.starts_with(&format!("{}.", potential_ancestor)) +} + +/// Check if module is internal to any of the given packages +pub fn is_internal<'a>(module_name: &str, packages: impl IntoIterator) -> bool { + packages + .into_iter() + .any(|pkg| module_name == pkg || is_descendant(module_name, pkg)) +} + +/// Convert module path to module name +pub fn path_to_module_name(module_path: &Path, package: &PackageSpec) -> Option { + let relative_path = module_path.strip_prefix(&package.directory).ok()?; + + let mut components: Vec = vec![package.name.clone()]; + for component in relative_path.iter() { + let component_str = component.to_str()?; + if component_str == "__init__.py" { + // This is a package, don't add __init__ + break; + } else if component_str.ends_with(".py") { + // Strip .py extension + components.push(component_str.strip_suffix(".py")?.to_string()); + } else { + // Directory component + components.push(component_str.to_string()); + } + } + + Some(components.join(".")) +} + +/// Convert a relative import to an absolute import name +pub fn resolve_relative_import( + module_name: &str, + is_package: bool, + imported_object_name: &str, +) -> String { + let leading_dots_count = imported_object_name + .chars() + .take_while(|&c| c == '.') + .count(); + + if leading_dots_count == 0 { + return imported_object_name.to_string(); + } + + let imported_object_name_base = if is_package { + if leading_dots_count == 1 { + module_name.to_string() + } else { + let parts: Vec<&str> = module_name.split('.').collect(); + parts[0..parts.len() - leading_dots_count + 1].join(".") + } + } else { + let parts: Vec<&str> = module_name.split('.').collect(); + parts[0..parts.len() - leading_dots_count].join(".") + }; + + format!( + "{}.{}", + imported_object_name_base, + &imported_object_name[leading_dots_count..] + ) +} + +/// Resolve an imported object name to an internal module +pub fn resolve_internal_module( + imported_object_name: &str, + all_modules: &HashSet, +) -> Option { + let candidate_module = imported_object_name.to_string(); + + if all_modules.contains(&candidate_module) { + return Some(candidate_module); + } + + // Check if parent module exists + if let Some((parent, _)) = imported_object_name.rsplit_once('.') + && all_modules.contains(parent) + { + return Some(parent.to_string()); + } + + None +} + +/// Given a module that we already know is external, turn it into a module to add to the graph. +/// +/// The 'distillation' process involves removing any unwanted subpackages. For example, +/// django.models.db should be turned into simply django. +/// +/// The process is more complex for potential namespace packages, as it's not possible to +/// determine the portion package simply from name. Rather than adding the overhead of a +/// filesystem read, we just get the shallowest component that does not clash with an internal +/// module namespace. Take, for example, foo.blue.alpha.one. If one of the found +/// packages is foo.blue.beta, the module will be distilled to foo.blue.alpha. +/// Alternatively, if the found package is foo.green, the distilled module will +/// be foo.blue. +/// +/// Returns None if the module is a parent of one of the internal packages (doesn't make sense, +/// probably an import of a namespace package). +pub fn distill_external_module( + module_name: &str, + found_package_names: &HashSet, +) -> Option { + for found_package in found_package_names { + // If it's a module that is a parent of the package, return None + // as it doesn't make sense and is probably an import of a namespace package. + if is_descendant(found_package, module_name) { + return None; + } + } + + let module_root = module_name.split('.').next().unwrap(); + + // If it shares a namespace with an internal module, get the shallowest component that does + // not clash with an internal module namespace. + let mut candidate_portions: Vec = Vec::new(); + let mut sorted_found_packages: Vec<&String> = found_package_names.iter().collect(); + sorted_found_packages.sort(); + sorted_found_packages.reverse(); + + for found_package in sorted_found_packages { + if is_descendant(found_package, module_root) { + let mut internal_components: Vec<&str> = found_package.split('.').collect(); + let mut external_components: Vec<&str> = module_name.split('.').collect(); + let mut external_namespace_components: Vec<&str> = vec![]; + while external_components[0] == internal_components[0] { + external_namespace_components.push(external_components.remove(0)); + internal_components.remove(0); + } + external_namespace_components.push(external_components[0]); + candidate_portions.push(external_namespace_components.join(".")); + } + } + + if !candidate_portions.is_empty() { + // If multiple internal modules share a namespace with this module, use the deepest one + // as we know that that will be a namespace too. + candidate_portions.sort_by_key(|portion| portion.split('.').count()); + Some(candidate_portions.last().unwrap().clone()) + } else { + Some(module_root.to_string()) + } +} diff --git a/rust/src/graph/mod.rs b/rust/src/graph/mod.rs index b01e82b7..4dda54e0 100644 --- a/rust/src/graph/mod.rs +++ b/rust/src/graph/mod.rs @@ -19,6 +19,7 @@ use crate::graph::higher_order_queries::Level; use crate::graph::higher_order_queries::PackageDependency as PyPackageDependency; use crate::module_expressions::ModuleExpression; +pub mod builder; pub mod direct_import_queries; pub mod graph_manipulation; pub mod hierarchy_queries; @@ -81,6 +82,10 @@ pub struct GraphWrapper { } impl GraphWrapper { + pub fn from_graph(graph: Graph) -> Self { + GraphWrapper { _graph: graph } + } + fn get_visible_module_by_name(&self, name: &str) -> Result<&Module, ModuleNotPresent> { self._graph .get_module_by_name(name) diff --git a/rust/src/graph_building.rs b/rust/src/graph_building.rs new file mode 100644 index 00000000..3551540b --- /dev/null +++ b/rust/src/graph_building.rs @@ -0,0 +1,62 @@ +use pyo3::prelude::*; +use pyo3::types::PyModule; +use std::path::PathBuf; + +use crate::errors::GrimpError; +use crate::graph::GraphWrapper; +use crate::graph::builder::{PackageSpec, build_graph}; + +#[pyclass(name = "PackageSpec")] +#[derive(Clone)] +pub struct PyPackageSpec { + inner: PackageSpec, +} + +#[pymethods] +impl PyPackageSpec { + #[new] + fn new(name: String, directory: String) -> Self { + PyPackageSpec { + inner: PackageSpec::new(name, PathBuf::from(directory)), + } + } +} + +#[pyfunction] +#[pyo3(signature = (packages, include_external_packages=false, exclude_type_checking_imports=false, cache_dir=None))] +pub fn build_graph_rust( + py: Python, + packages: Vec, + include_external_packages: bool, + exclude_type_checking_imports: bool, + cache_dir: Option, +) -> PyResult { + let cache_path = cache_dir.map(PathBuf::from); + + // Extract the inner PackageSpec from each PyPackageSpec + let package_specs: Vec = packages.iter().map(|p| p.inner.clone()).collect(); + + let graph_result = build_graph( + &package_specs, + include_external_packages, + exclude_type_checking_imports, + cache_path.as_ref(), + ); + + match graph_result { + Ok(graph) => Ok(GraphWrapper::from_graph(graph)), + Err(GrimpError::ParseError { + module_filename, + line_number, + text, + .. + }) => { + // Import the Python SourceSyntaxError from grimp.exceptions + let exceptions_module = PyModule::import(py, "grimp.exceptions")?; + let source_syntax_error = exceptions_module.getattr("SourceSyntaxError")?; + let exception = source_syntax_error.call1((module_filename, line_number, text))?; + Err(PyErr::from_value(exception)) + } + Err(e) => Err(e.into()), + } +} diff --git a/rust/src/import_parsing.rs b/rust/src/import_parsing.rs index a43f7422..0fa64b08 100644 --- a/rust/src/import_parsing.rs +++ b/rust/src/import_parsing.rs @@ -1,10 +1,12 @@ +use bincode::{Decode, Encode}; + use crate::errors::{GrimpError, GrimpResult}; use ruff_python_ast::statement_visitor::{StatementVisitor, walk_body, walk_stmt}; use ruff_python_ast::{Expr, Stmt}; use ruff_python_parser::parse_module; use ruff_source_file::{LineIndex, SourceCode}; -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Encode, Decode)] pub struct ImportedObject { pub name: String, pub line_number: usize, diff --git a/rust/src/lib.rs b/rust/src/lib.rs index e63b501b..a1a2f422 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,11 +3,15 @@ pub mod errors; pub mod exceptions; mod filesystem; pub mod graph; +mod graph_building; pub mod import_parsing; mod import_scanning; pub mod module_expressions; mod module_finding; +#[cfg(test)] +pub mod test_utils; + use pyo3::prelude::*; #[pymodule] @@ -31,4 +35,7 @@ mod _rustgrimp { use crate::exceptions::{ CorruptCache, InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError, }; + + #[pymodule_export] + use crate::graph_building::{PyPackageSpec, build_graph_rust}; } diff --git a/rust/src/test_utils.rs b/rust/src/test_utils.rs new file mode 100644 index 00000000..93ac23d7 --- /dev/null +++ b/rust/src/test_utils.rs @@ -0,0 +1,431 @@ +use filetime::{FileTime, set_file_mtime}; +use std::collections::HashMap; +use std::fs; +use std::io::Write; +use std::path::{Path, PathBuf}; +use tempfile::TempDir; + +/// Default mtime for files (in seconds since Unix epoch) +pub const DEFAULT_MTIME: i64 = 10000; + +/// A builder for creating temporary directory structures for testing. +/// +/// # Example +/// +/// ``` +/// use _rustgrimp::test_utils::TempFileSystemBuilder; +/// +/// let temp_fs = TempFileSystemBuilder::new(r#" +/// mypackage/ +/// __init__.py +/// foo/ +/// __init__.py +/// one.py +/// "#) +/// .with_file_content_map([ +/// ("mypackage/foo/one.py", "from . import two"), +/// ]) +/// .with_file_mtime_map([ +/// ("mypackage/foo/one.py", 12340000), +/// ]) +/// .build() +/// .unwrap(); +/// +/// let package_dir = temp_fs.join("mypackage"); +/// ``` +pub struct TempFileSystemBuilder { + contents: String, + content_map: HashMap, + mtime_overrides: HashMap, +} + +impl TempFileSystemBuilder { + /// Create a new builder with the directory structure. + /// + /// The string should be formatted with directories ending in `/` and files without. + /// Indentation determines the hierarchy. + /// + /// # Example + /// + /// ``` + /// let builder = TempFileSystemBuilder::new(r#" + /// mypackage/ + /// __init__.py + /// foo/ + /// __init__.py + /// one.py + /// "#); + /// ``` + pub fn new(contents: &str) -> Self { + Self { + contents: contents.to_string(), + content_map: HashMap::new(), + mtime_overrides: HashMap::new(), + } + } + + /// Set the content for a specific file (relative path). + /// + /// # Arguments + /// + /// * `path` - Relative path to the file from the temp directory root + /// * `content` - The text content of the file + /// + /// # Example + /// + /// ``` + /// let builder = TempFileSystemBuilder::new("...") + /// .with_file_content("mypackage/foo/one.py", "from . import two"); + /// ``` + pub fn with_file_content(mut self, path: &str, content: &str) -> Self { + self.content_map + .insert(path.to_string(), content.to_string()); + self + } + + /// Set the content for multiple files at once. + /// + /// # Arguments + /// + /// * `content_map` - An iterator of (path, content) pairs + /// + /// # Example + /// + /// ``` + /// let builder = TempFileSystemBuilder::new("...") + /// .with_file_content_map([ + /// ("mypackage/foo/one.py", "from . import two"), + /// ("mypackage/foo/two.py", "x = 1"), + /// ]); + /// ``` + pub fn with_file_content_map( + mut self, + content_map: impl IntoIterator, impl Into)>, + ) -> Self { + self.content_map + .extend(content_map.into_iter().map(|(k, v)| (k.into(), v.into()))); + self + } + + /// Set a custom modification time for a specific file (relative path). + /// + /// # Arguments + /// + /// * `path` - Relative path to the file from the temp directory root + /// * `mtime` - Modification time in seconds since Unix epoch + /// + /// # Example + /// + /// ``` + /// let builder = TempFileSystemBuilder::new("...") + /// .with_mtime("mypackage/foo/one.py", 12340000); + /// ``` + pub fn with_file_mtime(mut self, path: &str, mtime: i64) -> Self { + self.mtime_overrides.insert(path.to_string(), mtime); + self + } + + /// Set custom modification times for multiple files at once. + /// + /// # Arguments + /// + /// * `mtime_map` - An iterator of (path, mtime) pairs + /// + /// # Example + /// + /// ``` + /// let builder = TempFileSystemBuilder::new("...") + /// .with_file_mtime_map([ + /// ("mypackage/foo/one.py", 12340000), + /// ("mypackage/foo/two.py", 12350000), + /// ]); + /// ``` + pub fn with_file_mtime_map( + mut self, + mtime_map: impl IntoIterator, i64)>, + ) -> Self { + self.mtime_overrides + .extend(mtime_map.into_iter().map(|(k, v)| (k.into(), v))); + self + } + + /// Build the temporary file system + pub fn build(self) -> std::io::Result { + let temp_dir = TempDir::new()?; + + // Create the directory structure + Self::create_structure(temp_dir.path(), &self.contents)?; + + // Write file contents from content_map + for (relative_path, content) in &self.content_map { + let full_path = temp_dir.path().join(relative_path); + + // Ensure parent directory exists + if let Some(parent) = full_path.parent() { + fs::create_dir_all(parent)?; + } + + let mut file = fs::File::create(&full_path)?; + file.write_all(content.as_bytes())?; + + // Set default mtime for files created via content_map + let default_filetime = FileTime::from_unix_time(DEFAULT_MTIME, 0); + set_file_mtime(&full_path, default_filetime)?; + } + + // Apply mtime overrides (must come after content_map writes) + for (relative_path, mtime) in &self.mtime_overrides { + let full_path = temp_dir.path().join(relative_path); + if full_path.exists() { + let filetime = FileTime::from_unix_time(*mtime, 0); + set_file_mtime(&full_path, filetime)?; + } + } + + Ok(TempFileSystem { temp_dir }) + } + + fn create_structure(base_path: &Path, contents: &str) -> std::io::Result<()> { + let lines: Vec<&str> = contents + .lines() + .map(|l| l.trim_end()) + .filter(|l| !l.is_empty()) + .collect(); + + if lines.is_empty() { + return Ok(()); + } + + // Calculate minimum indentation to dedent + let min_indent = lines + .iter() + .filter(|l| !l.trim().is_empty()) + .map(|l| l.len() - l.trim_start().len()) + .min() + .unwrap_or(0); + + // Parse the structure + let mut stack: Vec<(usize, PathBuf)> = vec![(0, base_path.to_path_buf())]; + + for line in lines { + let trimmed = line.trim_start(); + if trimmed.is_empty() { + continue; + } + + let indent = line.len() - trimmed.len() - min_indent; + let indent_level = indent / 4; // Assume 4 spaces per level + + // Pop stack until we find the parent + while stack.len() > indent_level + 1 { + stack.pop(); + } + + let parent_path = &stack.last().unwrap().1; + let name = trimmed.trim(); + + if name.ends_with('/') { + // Directory + let dir_name = name.trim_end_matches('/'); + let dir_path = parent_path.join(dir_name); + fs::create_dir_all(&dir_path)?; + stack.push((indent_level, dir_path)); + } else { + // File + let file_path = parent_path.join(name); + let mut file = fs::File::create(&file_path)?; + // Create empty file + file.write_all(b"")?; + + // Set default mtime + let default_filetime = FileTime::from_unix_time(DEFAULT_MTIME, 0); + set_file_mtime(&file_path, default_filetime)?; + } + } + + Ok(()) + } +} + +/// A temporary directory structure for testing. +pub struct TempFileSystem { + temp_dir: TempDir, +} + +impl TempFileSystem { + /// Get the root path of the temporary directory + pub fn path(&self) -> &Path { + self.temp_dir.path() + } + + /// Join a path component to the root path + /// + /// # Example + /// + /// ``` + /// let temp_fs = TempFileSystemBuilder::new("...").build().unwrap(); + /// let package_dir = temp_fs.join("mypackage"); + /// ``` + pub fn join(&self, path: impl AsRef) -> PathBuf { + self.temp_dir.path().join(path) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_simple_structure() { + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + foo/ + __init__.py + one.py + "#, + ) + .build() + .unwrap(); + + let base = temp_fs.path(); + assert!(base.join("mypackage").is_dir()); + assert!(base.join("mypackage/__init__.py").is_file()); + assert!(base.join("mypackage/foo").is_dir()); + assert!(base.join("mypackage/foo/__init__.py").is_file()); + assert!(base.join("mypackage/foo/one.py").is_file()); + } + + #[test] + fn test_create_with_file_content() { + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + foo/ + __init__.py + one.py + "#, + ) + .with_file_content("mypackage/foo/one.py", "from . import two") + .build() + .unwrap(); + + let one_py = temp_fs.path().join("mypackage/foo/one.py"); + let content = fs::read_to_string(&one_py).unwrap(); + assert_eq!(content, "from . import two"); + } + + #[test] + fn test_create_with_content_map() { + let mut content_map = HashMap::new(); + content_map.insert("mypackage/foo/one.py".to_string(), "import sys".to_string()); + content_map.insert("mypackage/foo/two.py".to_string(), "x = 1".to_string()); + + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + foo/ + __init__.py + one.py + two.py + "#, + ) + .with_file_content_map(content_map) + .build() + .unwrap(); + + let one_py = temp_fs.path().join("mypackage/foo/one.py"); + let content = fs::read_to_string(&one_py).unwrap(); + assert_eq!(content, "import sys"); + + let two_py = temp_fs.path().join("mypackage/foo/two.py"); + let content = fs::read_to_string(&two_py).unwrap(); + assert_eq!(content, "x = 1"); + } + + #[test] + fn test_create_with_custom_mtimes() { + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + foo/ + __init__.py + one.py + "#, + ) + .with_file_mtime("mypackage/foo/one.py", 12340000) + .build() + .unwrap(); + + let one_py = temp_fs.path().join("mypackage/foo/one.py"); + let metadata = fs::metadata(&one_py).unwrap(); + let mtime = FileTime::from_last_modification_time(&metadata); + assert_eq!(mtime.unix_seconds(), 12340000); + + let init_py = temp_fs.path().join("mypackage/__init__.py"); + let metadata = fs::metadata(&init_py).unwrap(); + let mtime = FileTime::from_last_modification_time(&metadata); + assert_eq!(mtime.unix_seconds(), DEFAULT_MTIME); + } + + #[test] + fn test_nested_directories() { + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + foo/ + __init__.py + two/ + __init__.py + green.py + blue.py + "#, + ) + .build() + .unwrap(); + + let base = temp_fs.path(); + assert!(base.join("mypackage/foo/two").is_dir()); + assert!(base.join("mypackage/foo/two/green.py").is_file()); + assert!(base.join("mypackage/foo/two/blue.py").is_file()); + } + + #[test] + fn test_builder_chaining() { + let temp_fs = TempFileSystemBuilder::new( + r#" + mypackage/ + __init__.py + foo/ + __init__.py + one.py + two.py + "#, + ) + .with_file_mtime("mypackage/foo/one.py", 11111111) + .with_file_mtime("mypackage/foo/two.py", 22222222) + .with_file_content("mypackage/foo/one.py", "# one") + .with_file_content("mypackage/foo/two.py", "# two") + .build() + .unwrap(); + + let one_py = temp_fs.path().join("mypackage/foo/one.py"); + let metadata = fs::metadata(&one_py).unwrap(); + let mtime = FileTime::from_last_modification_time(&metadata); + assert_eq!(mtime.unix_seconds(), 11111111); + let content = fs::read_to_string(&one_py).unwrap(); + assert_eq!(content, "# one"); + + let two_py = temp_fs.path().join("mypackage/foo/two.py"); + let metadata = fs::metadata(&two_py).unwrap(); + let mtime = FileTime::from_last_modification_time(&metadata); + assert_eq!(mtime.unix_seconds(), 22222222); + let content = fs::read_to_string(&two_py).unwrap(); + assert_eq!(content, "# two"); + } +} diff --git a/src/grimp/__init__.py b/src/grimp/__init__.py index 26fa6298..90c6ac03 100644 --- a/src/grimp/__init__.py +++ b/src/grimp/__init__.py @@ -1,9 +1,9 @@ __version__ = "3.13" -from .application.graph import DetailedImport, ImportGraph, Import +from .application.graph import DetailedImport, Import, ImportGraph from .domain.analysis import PackageDependency, Route -from .domain.valueobjects import DirectImport, Module, Layer -from .main import build_graph +from .domain.valueobjects import DirectImport, Layer, Module +from .main import build_graph, build_graph_rust __all__ = [ "Module", @@ -14,5 +14,6 @@ "PackageDependency", "Route", "build_graph", + "build_graph_rust", "Layer", ] diff --git a/src/grimp/application/graph.py b/src/grimp/application/graph.py index 0847511c..7bffddf4 100644 --- a/src/grimp/application/graph.py +++ b/src/grimp/application/graph.py @@ -1,14 +1,16 @@ from __future__ import annotations -from typing import TypedDict + from collections.abc import Sequence +from typing import TypedDict + +from grimp import _rustgrimp as rust # type: ignore[attr-defined] from grimp.domain.analysis import PackageDependency, Route from grimp.domain.valueobjects import Layer -from grimp import _rustgrimp as rust # type: ignore[attr-defined] from grimp.exceptions import ( + InvalidImportExpression, + InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, - InvalidModuleExpression, - InvalidImportExpression, ) @@ -37,6 +39,12 @@ def __init__(self) -> None: self._cached_modules: set[str] | None = None self._rustgraph = rust.Graph() + @classmethod + def from_rustgraph(cls, rustgraph: rust.Graph) -> ImportGraph: + graph = ImportGraph() + graph._rustgraph = rustgraph + return graph + # Mechanics # --------- diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index e7503d29..70396b00 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -2,17 +2,17 @@ Use cases handle application logic. """ +from collections.abc import Iterable, Sequence from typing import cast -from collections.abc import Sequence, Iterable -from .scanning import scan_imports +from ..application.graph import ImportGraph from ..application.ports import caching from ..application.ports.filesystem import AbstractFileSystem, BasicFileSystem -from ..application.graph import ImportGraph from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile from ..application.ports.packagefinder import AbstractPackageFinder from ..domain.valueobjects import DirectImport, Module from .config import settings +from .scanning import scan_imports class NotSupplied: @@ -68,6 +68,46 @@ def build_graph( return graph +def build_graph_rust( + package_name, + *additional_package_names, + include_external_packages: bool = False, + exclude_type_checking_imports: bool = False, + cache_dir: str | type[NotSupplied] | None = NotSupplied, +) -> ImportGraph: + """ + Build and return an import graph for the supplied package(s) using the Rust implementation. + """ + from grimp import _rustgrimp as rust # type: ignore[attr-defined] + + # Create package specs for all packages + all_package_names = [package_name] + list(additional_package_names) + package_specs = [] + for package_name in all_package_names: + package_directory = settings.PACKAGE_FINDER.determine_package_directory( + package_name=package_name, file_system=settings.FILE_SYSTEM + ) + package_specs.append(rust.PackageSpec(package_name, package_directory)) + + # Handle cache_dir + cache_dir_arg: str | None = None + if cache_dir is NotSupplied: + cache_dir_arg = ".grimp_cache" + elif isinstance(cache_dir, str): + cache_dir_arg = cache_dir + + # Build the graph + rust_graph = rust.build_graph_rust( + package_specs, + include_external_packages=include_external_packages, + exclude_type_checking_imports=exclude_type_checking_imports, + cache_dir=cache_dir_arg, + ) + + # Wrap the rust graph in our ImportGraph wrapper + return ImportGraph.from_rustgraph(rust_graph) + + def _find_packages( file_system: AbstractFileSystem, package_names: Sequence[object] ) -> set[FoundPackage]: diff --git a/src/grimp/main.py b/src/grimp/main.py index 45a297a0..88c5fc7f 100644 --- a/src/grimp/main.py +++ b/src/grimp/main.py @@ -1,13 +1,13 @@ -__all__ = ["build_graph"] +__all__ = ["build_graph", "build_graph_rust"] from .adaptors.caching import Cache from .adaptors.filesystem import FileSystem -from .application.graph import ImportGraph from .adaptors.modulefinder import ModuleFinder from .adaptors.packagefinder import ImportLibPackageFinder from .adaptors.timing import SystemClockTimer from .application.config import settings -from .application.usecases import build_graph +from .application.graph import ImportGraph +from .application.usecases import build_graph, build_graph_rust settings.configure( MODULE_FINDER=ModuleFinder(), diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index aece8dad..25c1f4b4 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -1,14 +1,15 @@ -import uuid -import random -import pytest -import json import importlib +import json +import random +import uuid +from copy import deepcopy from pathlib import Path -from grimp.application.graph import ImportGraph -from grimp import PackageDependency, Route +import pytest + import grimp -from copy import deepcopy +from grimp import PackageDependency, Route +from grimp.application.graph import ImportGraph @pytest.fixture(scope="module") @@ -303,7 +304,7 @@ def test_build_django_uncached(benchmark): In this benchmark, the cache is turned off. """ - benchmark(grimp.build_graph, "django", cache_dir=None) + benchmark(grimp.build_graph_rust, "django", cache_dir=None) def test_build_django_from_cache_no_misses(benchmark): @@ -313,9 +314,9 @@ def test_build_django_from_cache_no_misses(benchmark): This benchmark fully utilizes the cache. """ # Populate the cache first, before beginning the benchmark. - grimp.build_graph("django") + grimp.build_graph_rust("django") - benchmark(grimp.build_graph, "django") + benchmark(grimp.build_graph_rust, "django") @pytest.mark.parametrize( @@ -364,7 +365,7 @@ def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses: int): # turn off multiple runs, which could potentially be misleading when running locally. # Populate the cache first, before beginning the benchmark. - grimp.build_graph("django") + grimp.build_graph_rust("django") # Add some modules which won't be in the cache. # (Use some real python, which will take time to parse.) django_path = Path(importlib.util.find_spec("django").origin).parent # type: ignore @@ -380,7 +381,7 @@ def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses: int): hash_buster = f"\n# Hash busting comment: {uuid.uuid4()}" new_module.write_text(module_contents + hash_buster) - benchmark.pedantic(grimp.build_graph, ["django"], rounds=1, iterations=1) + benchmark.pedantic(grimp.build_graph_rust, ["django"], rounds=1, iterations=1) # Delete the modules we just created. for module in extra_modules: diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py new file mode 100644 index 00000000..5092c386 --- /dev/null +++ b/tests/functional/conftest.py @@ -0,0 +1,12 @@ +import pytest + +import grimp + + +@pytest.fixture(params=["python", "rust"], ids=["python", "rust"]) +def build_graph(request): + """Fixture that provides both Python and Rust graph building implementations.""" + if request.param == "python": + return grimp.build_graph + else: + return grimp.build_graph_rust diff --git a/tests/functional/test_build_and_use_graph.py b/tests/functional/test_build_and_use_graph.py index ed151598..7d42cbd4 100644 --- a/tests/functional/test_build_and_use_graph.py +++ b/tests/functional/test_build_and_use_graph.py @@ -1,7 +1,5 @@ -from grimp import build_graph import pytest - """ For ease of reference, these are the imports of all the files: @@ -30,7 +28,9 @@ # --------- -def test_modules(): +def test_modules( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) assert graph.modules == { @@ -53,7 +53,9 @@ def test_modules(): } -def test_add_module(): +def test_add_module( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) number_of_modules = len(graph.modules) @@ -62,7 +64,9 @@ def test_add_module(): assert number_of_modules + 1 == len(graph.modules) -def test_remove_module(): +def test_remove_module( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) number_of_modules = len(graph.modules) @@ -71,7 +75,9 @@ def test_remove_module(): assert number_of_modules - 1 == len(graph.modules) -def test_add_and_remove_import(): +def test_add_and_remove_import( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) a = "testpackage.one.delta.blue" b = "testpackage.two.alpha" @@ -91,7 +97,9 @@ def test_add_and_remove_import(): # ----------- -def test_find_children(): +def test_find_children( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) assert graph.find_children("testpackage.one") == { @@ -102,7 +110,9 @@ def test_find_children(): } -def test_find_descendants(): +def test_find_descendants( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) assert graph.find_descendants("testpackage.one") == { @@ -118,7 +128,9 @@ def test_find_descendants(): # -------------- -def test_find_modules_directly_imported_by(): +def test_find_modules_directly_imported_by( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) result = graph.find_modules_directly_imported_by("testpackage.utils") @@ -126,7 +138,9 @@ def test_find_modules_directly_imported_by(): assert {"testpackage.one", "testpackage.two.alpha"} == result -def test_find_modules_that_directly_import(): +def test_find_modules_that_directly_import( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) result = graph.find_modules_that_directly_import("testpackage.one.alpha") @@ -141,7 +155,9 @@ def test_find_modules_that_directly_import(): } == result -def test_direct_import_exists(): +def test_direct_import_exists( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) assert False is graph.direct_import_exists( @@ -155,7 +171,9 @@ def test_direct_import_exists(): ) -def test_get_import_details(): +def test_get_import_details( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) assert [ @@ -173,7 +191,7 @@ def test_get_import_details(): class TestPathExists: - def test_as_packages_false(self): + def test_as_packages_false(self, build_graph): graph = build_graph("testpackage", cache_dir=None) assert not graph.chain_exists( @@ -182,7 +200,7 @@ def test_as_packages_false(self): assert graph.chain_exists(imported="testpackage.one.alpha", importer="testpackage.utils") - def test_as_packages_true(self): + def test_as_packages_true(self, build_graph): graph = build_graph("testpackage", cache_dir=None) assert graph.chain_exists( @@ -194,7 +212,9 @@ def test_as_packages_true(self): ) -def test_find_shortest_chain(): +def test_find_shortest_chain( + build_graph, +): graph = build_graph("testpackage", cache_dir=None) assert ( @@ -246,7 +266,7 @@ def test_find_shortest_chain(): ), ], ) -def test_find_shortest_chains(as_packages: bool | None, expected_result: set[tuple]): +def test_find_shortest_chains(build_graph, as_packages: bool | None, expected_result: set[tuple]): importer = "testpackage.three" imported = "testpackage.one.alpha" @@ -263,7 +283,7 @@ def test_find_shortest_chains(as_packages: bool | None, expected_result: set[tup class TestFindDownstreamModules: - def test_as_package_false(self): + def test_as_package_false(self, build_graph): graph = build_graph("testpackage", cache_dir=None) result = graph.find_downstream_modules("testpackage.one.alpha") @@ -281,7 +301,7 @@ def test_as_package_false(self): "testpackage.three.gamma", } == result - def test_as_package_true(self): + def test_as_package_true(self, build_graph): graph = build_graph("testpackage", cache_dir=None) result = graph.find_downstream_modules("testpackage.one", as_package=True) @@ -299,7 +319,7 @@ def test_as_package_true(self): class TestFindUpstreamModules: - def test_as_package_false(self): + def test_as_package_false(self, build_graph): graph = build_graph("testpackage", cache_dir=None) assert graph.find_upstream_modules("testpackage.one.alpha") == set() @@ -310,7 +330,7 @@ def test_as_package_false(self): "testpackage.one.alpha", } - def test_as_package_true(self): + def test_as_package_true(self, build_graph): graph = build_graph("testpackage", cache_dir=None) assert graph.find_upstream_modules("testpackage.two", as_package=True) == { @@ -321,13 +341,13 @@ def test_as_package_true(self): class TestExcludeTypeCheckingImports: - def test_exclude_false(self): + def test_exclude_false(self, build_graph): graph = build_graph("testpackage", cache_dir=None, exclude_type_checking_imports=False) assert True is graph.direct_import_exists( importer="testpackage.one.beta", imported="testpackage.two.alpha" ) - def test_exclude_true(self): + def test_exclude_true(self, build_graph): graph = build_graph("testpackage", cache_dir=None, exclude_type_checking_imports=True) assert False is graph.direct_import_exists( importer="testpackage.one.beta", imported="testpackage.two.alpha" diff --git a/tests/functional/test_build_and_use_graph_with_multiple_roots.py b/tests/functional/test_build_and_use_graph_with_multiple_roots.py index 4c0efdcf..48fe6839 100644 --- a/tests/functional/test_build_and_use_graph_with_multiple_roots.py +++ b/tests/functional/test_build_and_use_graph_with_multiple_roots.py @@ -1,5 +1,4 @@ import pytest # type: ignore -from grimp import build_graph """ For ease of reference, these are the imports of all the files: @@ -24,7 +23,9 @@ @pytest.mark.parametrize("root_packages", PACKAGES_IN_DIFFERENT_ORDERS) class TestBuildGraph: - def test_graph_has_correct_modules_regardless_of_package_order(self, root_packages): + def test_graph_has_correct_modules_regardless_of_package_order( + self, build_graph, root_packages + ): graph = build_graph(*root_packages, cache_dir=None) assert graph.modules == { @@ -38,7 +39,7 @@ def test_graph_has_correct_modules_regardless_of_package_order(self, root_packag "rootpackagegreen.two", } - def test_stores_import_within_package(self, root_packages): + def test_stores_import_within_package(self, build_graph, root_packages): graph = build_graph(*root_packages, cache_dir=None) assert [ @@ -52,7 +53,7 @@ def test_stores_import_within_package(self, root_packages): importer="rootpackageblue.two", imported="rootpackageblue.one.alpha" ) - def test_stores_import_between_root_packages(self, root_packages): + def test_stores_import_between_root_packages(self, build_graph, root_packages): graph = build_graph(*root_packages, cache_dir=None) assert [ diff --git a/tests/functional/test_build_graph_on_real_packages.py b/tests/functional/test_build_graph_on_real_packages.py index b2da12af..431ee6f6 100644 --- a/tests/functional/test_build_graph_on_real_packages.py +++ b/tests/functional/test_build_graph_on_real_packages.py @@ -12,6 +12,11 @@ def test_build_graph_on_real_package(package_name, snapshot): imports = graph.find_matching_direct_imports(import_expression="** -> **") assert imports == snapshot + graph_from_rust = grimp.build_graph_rust(package_name, cache_dir=None) + imports_from_rust = graph_from_rust.find_matching_direct_imports(import_expression="** -> **") + sort_key = lambda i: (i["importer"], i["imported"]) # noqa:E731 + assert sorted(imports_from_rust, key=sort_key) == sorted(imports, key=sort_key) + @pytest.mark.parametrize( "package_name", @@ -19,7 +24,9 @@ def test_build_graph_on_real_package(package_name, snapshot): ) def test_nominate_cycle_breakers_django(package_name, snapshot): graph = grimp.build_graph("django") - cycle_breakers = graph.nominate_cycle_breakers(package_name) - assert cycle_breakers == snapshot + + graph_from_rust = grimp.build_graph("django") + cycle_breakers_from_rust = graph_from_rust.nominate_cycle_breakers(package_name) + assert cycle_breakers_from_rust == cycle_breakers diff --git a/tests/functional/test_encoding_handling.py b/tests/functional/test_encoding_handling.py index 21a8f5a2..72761a9f 100644 --- a/tests/functional/test_encoding_handling.py +++ b/tests/functional/test_encoding_handling.py @@ -1,11 +1,8 @@ -import grimp - - -def test_build_graph_of_non_ascii_source(): +def test_build_graph_of_non_ascii_source(build_graph): """ Tests we can cope with non ascii Python source files. """ - graph = grimp.build_graph("encodingpackage", cache_dir=None) + graph = build_graph("encodingpackage", cache_dir=None) result = graph.get_import_details( importer="encodingpackage.importer", imported="encodingpackage.imported" @@ -21,11 +18,11 @@ def test_build_graph_of_non_ascii_source(): ] == result -def test_build_graph_of_non_utf8_source(): +def test_build_graph_of_non_utf8_source(build_graph): """ Tests we can cope with non UTF-8 Python source files. """ - graph = grimp.build_graph("encodingpackage", cache_dir=None) + graph = build_graph("encodingpackage", cache_dir=None) result = graph.get_import_details( importer="encodingpackage.shift_jis_importer", imported="encodingpackage.imported" diff --git a/tests/functional/test_error_handling.py b/tests/functional/test_error_handling.py index 45635986..014fb7a4 100644 --- a/tests/functional/test_error_handling.py +++ b/tests/functional/test_error_handling.py @@ -3,10 +3,10 @@ import pytest # type: ignore -from grimp import build_graph, exceptions +from grimp import exceptions -def test_syntax_error_includes_module(): +def test_syntax_error_includes_module(build_graph): dirname = os.path.dirname(__file__) filename = os.path.abspath( os.path.join(dirname, "..", "assets", "syntaxerrorpackage", "foo", "one.py") @@ -21,7 +21,7 @@ def test_syntax_error_includes_module(): assert expected_exception == excinfo.value -def test_missing_root_init_file(): +def test_missing_root_init_file(build_graph): with pytest.raises( exceptions.NamespacePackageEncountered, match=re.escape( diff --git a/tests/functional/test_namespace_packages.py b/tests/functional/test_namespace_packages.py index 0d0d8b83..2f97e0ea 100644 --- a/tests/functional/test_namespace_packages.py +++ b/tests/functional/test_namespace_packages.py @@ -1,6 +1,6 @@ import pytest # type: ignore -from grimp import build_graph, exceptions +from grimp import exceptions """ For ease of reference, these are the imports of all the files: @@ -18,7 +18,9 @@ """ -def test_build_graph_for_namespace(): +def test_build_graph_for_namespace( + build_graph, +): with pytest.raises(exceptions.NamespacePackageEncountered): build_graph("mynamespace", cache_dir=None) @@ -40,13 +42,15 @@ def test_build_graph_for_namespace(): ), ), ) -def test_modules_for_namespace_child(package, expected_modules): +def test_modules_for_namespace_child(build_graph, package, expected_modules): graph = build_graph(package, cache_dir=None) assert graph.modules == expected_modules -def test_modules_for_multiple_namespace_children(): +def test_modules_for_multiple_namespace_children( + build_graph, +): graph = build_graph("mynamespace.green", "mynamespace.blue", cache_dir=None) assert graph.modules == GREEN_MODULES | BLUE_MODULES @@ -73,7 +77,7 @@ def test_modules_for_multiple_namespace_children(): ), ) def test_external_packages_handling( - packages, expected_internal_modules, expected_external_modules + build_graph, packages, expected_internal_modules, expected_external_modules ): graph = build_graph(*packages, include_external_packages=True, cache_dir=None) @@ -81,7 +85,9 @@ def test_external_packages_handling( assert all(graph.is_module_squashed(m) for m in expected_external_modules) -def test_import_within_namespace_child(): +def test_import_within_namespace_child( + build_graph, +): graph = build_graph("mynamespace.blue", cache_dir=None) assert graph.direct_import_exists( @@ -89,7 +95,9 @@ def test_import_within_namespace_child(): ) -def test_import_between_namespace_children(): +def test_import_between_namespace_children( + build_graph, +): graph = build_graph("mynamespace.blue", "mynamespace.green", cache_dir=None) assert graph.direct_import_exists( @@ -104,7 +112,7 @@ def test_import_between_namespace_children(): "package_name", ("nestednamespace", "nestednamespace.foo", "nestednamespace.foo.alpha"), ) -def test_build_graph_for_nested_namespace(package_name): +def test_build_graph_for_nested_namespace(build_graph, package_name): with pytest.raises(exceptions.NamespacePackageEncountered): build_graph(package_name, cache_dir=None) @@ -134,13 +142,15 @@ def test_build_graph_for_nested_namespace(package_name): ), ), ) -def test_modules_for_nested_namespace_child(package, expected_modules): +def test_modules_for_nested_namespace_child(build_graph, package, expected_modules): graph = build_graph(package, cache_dir=None) assert graph.modules == expected_modules -def test_import_within_nested_namespace_child(): +def test_import_within_nested_namespace_child( + build_graph, +): graph = build_graph( "nestednamespace.foo.alpha.blue", cache_dir=None, @@ -152,7 +162,9 @@ def test_import_within_nested_namespace_child(): ) -def test_import_between_nested_namespace_children(): +def test_import_between_nested_namespace_children( + build_graph, +): graph = build_graph( "nestednamespace.foo.alpha.blue", "nestednamespace.foo.alpha.green", @@ -204,7 +216,7 @@ def test_import_between_nested_namespace_children(): ), ) def test_external_packages_handling_for_nested_namespaces( - packages, expected_internal_modules, expected_external_modules + build_graph, packages, expected_internal_modules, expected_external_modules ): graph = build_graph(*packages, include_external_packages=True, cache_dir=None)