From ea5da5f4ab5eca087a6d89edf3725c71041dbe88 Mon Sep 17 00:00:00 2001 From: David Seddon Date: Mon, 20 Oct 2025 18:13:38 +0100 Subject: [PATCH 1/2] Add nominate_cycle_breakers method --- CHANGELOG.rst | 5 + docs/usage.rst | 36 +++- pyproject.toml | 1 + src/grimp/application/graph.py | 59 +++++- .../application/graph/test_cycle_breakers.py | 179 ++++++++++++++++++ 5 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 tests/unit/application/graph/test_cycle_breakers.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 602aeabb..60c6699f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ Changelog ========= +latest +------ + +* Add `nominate_cycle_breakers` method. + 3.12 (2025-10-09) ----------------- diff --git a/docs/usage.rst b/docs/usage.rst index dc7db5b2..8d2b1e85 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -198,7 +198,7 @@ Methods for analysing direct imports Find all direct imports matching the passed import expression. - The imports are returned are in the following form:: + The imports returned are in the following form:: [ { @@ -505,6 +505,40 @@ Higher level analysis ``frozenset[str]``: Imported modules at the end of the chain. +.. py:function:: ImportGraph.nominate_cycle_breakers(package) + + Choose an approximately minimal set of dependencies that, if removed, would make the package locally acyclic. + + - 'Acyclic' means that there are no direct dependency cycles between the package's children. Indirect + dependencies (i.e. ones involving modules outside the supplied package) are disregarded, + as are imports between the package and its children. + - 'Dependency cycles' mean cycles between the *squashed* children (see `Terminology`_ above). + + Multiple sets of cycle breakers can exist for a given package. To arrive at this particular set, the following + approach is used: + + 1. Create a graph whose nodes are each child of the package. + 2. For each pair of children, add directed edges corresponding to whether there are imports between those two + children (as packages, rather than individual modules). The edges are weighted according to the number of + _dependencies_ they represent: this is usually the same as the number of imports, but if a module imports + another module in multiple places, it will be treated as a single dependency. + 3. Calculate the approximately + `minimum weighted feedback arc set `_. + This attempts to find a set of edges with the smallest total weight that can be removed from the graph in order + to make it acyclic. It uses the greedy cycle-breaking heuristic of Eades, Lin and Smyth: not guaranteed + to find the optimal solution, but it is faster than alternatives. + 4. These edges are then used to look up all the concrete imports in the fully unsquashed graph, which are returned. + For example, an edge discovered in step 3. of ``mypackage.foo -> mypackage.bar``, with a weight 3, might correspond + to these three imports: ``mypackage.foo.blue -> mypackage.bar.green``, + ``mypackage.foo.blue.one -> mypackage.bar.green.two``, and ``mypackage.foo.blue -> mypackage.bar.green.three``. + + :param str package: The package in the graph to check for cycles. If a module with no children is passed, + an empty set is be returned. + :return: A set of imports that, if removed, would make the imports between the the children of the supplied + package acyclic. + :rtype: ``set[tuple[str, str]]``. In each import tuple, the first element is the importer module and the second + is the imported. + Methods for manipulating the graph ---------------------------------- diff --git a/pyproject.toml b/pyproject.toml index ace813b2..0d85e275 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ authors = [ ] requires-python = ">=3.9" dependencies = [ + "igraph>=0.11.9", "typing-extensions>=3.10.0.0", ] classifiers = [ diff --git a/src/grimp/application/graph.py b/src/grimp/application/graph.py index f9487e6f..6808cedc 100644 --- a/src/grimp/application/graph.py +++ b/src/grimp/application/graph.py @@ -1,8 +1,9 @@ from __future__ import annotations +import itertools from typing import List, Optional, Sequence, Set, Tuple, TypedDict from grimp.domain.analysis import PackageDependency, Route from grimp.domain.valueobjects import Layer - +import igraph as ig # type: ignore from grimp import _rustgrimp as rust # type: ignore[attr-defined] from grimp.exceptions import ( ModuleNotPresent, @@ -17,6 +18,11 @@ class Import(TypedDict): imported: str +# Corresponds to importer, imported. +# Prefer this form to Import, as it's both more lightweight, and hashable. +ImportTuple = Tuple[str, str] + + class DetailedImport(Import): line_number: int line_contents: str @@ -440,6 +446,57 @@ def find_illegal_dependencies_for_layers( return _dependencies_from_tuple(result) + def nominate_cycle_breakers(self, package: str) -> set[ImportTuple]: + """ + Identify a set of imports that, if removed, would make the package locally acyclic. + """ + children = self.find_children(package) + if len(children) < 2: + return set() + igraph = ig.Graph(directed=True) + igraph.add_vertices([package, *children]) + edges: list[tuple[str, str]] = [] + weights: list[int] = [] + for downstream, upstream in itertools.permutations(children, r=2): + total_imports = 0 + for expression in ( + f"{downstream} -> {upstream}", + f"{downstream}.** -> {upstream}", + f"{downstream} -> {upstream}.**", + f"{downstream}.** -> {upstream}.**", + ): + total_imports += len(self.find_matching_direct_imports(expression)) + if total_imports: + edges.append((downstream, upstream)) + weights.append(total_imports) + + igraph.add_edges(edges) + igraph.es["weight"] = weights + + arc_set = igraph.feedback_arc_set(weights="weight") + + squashed_imports: list[Import] = [] + for edge_id in arc_set: + edge = igraph.es[edge_id] + squashed_imports.append( + { + "importer": edge.source_vertex["name"], + "imported": edge.target_vertex["name"], + } + ) + + unsquashed_imports: list[Import] = [] + for squashed_import in squashed_imports: + for pattern in ( + f"{squashed_import['importer']} -> {squashed_import['imported']}", + f"{squashed_import['importer']}.** -> {squashed_import['imported']}", + f"{squashed_import['importer']} -> {squashed_import['imported']}.**", + f"{squashed_import['importer']}.** -> {squashed_import['imported']}.**", + ): + unsquashed_imports.extend(self.find_matching_direct_imports(pattern)) + + return {(i["importer"], i["imported"]) for i in unsquashed_imports} + # Dunder methods # -------------- diff --git a/tests/unit/application/graph/test_cycle_breakers.py b/tests/unit/application/graph/test_cycle_breakers.py new file mode 100644 index 00000000..d701f03c --- /dev/null +++ b/tests/unit/application/graph/test_cycle_breakers.py @@ -0,0 +1,179 @@ +from grimp.application.graph import ImportGraph +import pytest + + +class TestNominateCycleBreakers: + def test_empty_graph(self): + graph = ImportGraph() + graph.add_module("pkg") + + result = graph.nominate_cycle_breakers("pkg") + + assert result == set() + + @pytest.mark.parametrize( + "module", + ( + "pkg", + "pkg.foo", + "pkg.foo.blue", + ), + ) + def test_graph_with_no_imports(self, module: str): + graph = self._build_graph_with_no_imports() + + result = graph.nominate_cycle_breakers(module) + + assert result == set() + + @pytest.mark.parametrize( + "module", + ( + "pkg", + "pkg.bar", + "pkg.foo.blue", + "pkg.foo.green", # Leaf package. + ), + ) + def test_acyclic_graph(self, module: str): + graph = self._build_acyclic_graph() + + result = graph.nominate_cycle_breakers(module) + + assert result == set() + + def test_one_breaker(self): + graph = self._build_acyclic_graph() + importer, imported = "pkg.bar.red.four", "pkg.foo.blue.two" + graph.add_import(importer=importer, imported=imported) + result = graph.nominate_cycle_breakers("pkg") + + assert result == {(importer, imported)} + + def test_three_breakers(self): + graph = self._build_acyclic_graph() + imports = { + ("pkg.bar.red.four", "pkg.foo.blue.two"), + ("pkg.bar.yellow", "pkg.foo.blue.three"), + ("pkg.bar", "pkg.foo.blue.three"), + } + for importer, imported in imports: + graph.add_import(importer=importer, imported=imported) + + result = graph.nominate_cycle_breakers("pkg") + + assert result == imports + + def test_nominated_based_on_dependencies_rather_than_imports(self): + graph = self._build_acyclic_graph() + # Add lots of imports from a single module - this will be treated as + # a single dependency. + importer, imported = "pkg.bar.red.four", "pkg.foo.blue.two" + for i in range(1, 30): + graph.add_import( + importer=importer, imported=imported, line_number=i, line_contents="-" + ) + + graph.add_import(importer=importer, imported=imported) + + result = graph.nominate_cycle_breakers("pkg") + + assert result == {(importer, imported)} + + def test_imports_between_passed_package_and_children_are_disregarded(self): + graph = self._build_acyclic_graph() + parent, child = "pkg.foo.blue", "pkg.foo" + graph.add_import(importer=parent, imported=child) + graph.add_import(importer=child, imported=parent) + + result = graph.nominate_cycle_breakers(parent) + + assert result == set() + + def test_on_child_of_root(self): + graph = self._build_acyclic_graph() + imports = { + ("pkg.bar.red.five", "pkg.bar.yellow.eight"), + ("pkg.bar.red", "pkg.bar.yellow"), + } + for importer, imported in imports: + graph.add_import(importer=importer, imported=imported) + + result = graph.nominate_cycle_breakers("pkg.bar") + + assert result == imports + + def test_on_grandchild_of_root(self): + graph = self._build_acyclic_graph() + imports = { + ("pkg.bar.orange.ten.gamma", "pkg.bar.orange.nine.alpha"), + ("pkg.bar.orange.ten", "pkg.bar.orange.nine.alpha"), + } + for importer, imported in imports: + graph.add_import(importer=importer, imported=imported) + + result = graph.nominate_cycle_breakers("pkg.bar.orange") + + assert result == imports + + def test_on_package_with_one_child(self): + graph = self._build_acyclic_graph() + graph.add_module("pkg.bar.orange.ten.gamma.onechild") + + result = graph.nominate_cycle_breakers("pkg.bar.orange.ten.gamma") + + assert result == set() + + def _build_graph_with_no_imports(self) -> ImportGraph: + graph = ImportGraph() + for module in ( + "pkg", + "pkg.foo", + "pkg.foo.blue", + "pkg.foo.blue.one", + "pkg.foo.blue.two", + "pkg.foo.green", + "pkg.bar", + "pkg.bar.red", + "pkg.bar.red.three", + "pkg.bar.red.four", + "pkg.bar.red.five", + "pkg.bar.red.six", + "pkg.bar.red.seven", + "pkg.bar.yellow", + "pkg.bar.yellow.eight", + "pkg.bar.orange", + "pkg.bar.orange.nine", + "pkg.bar.orange.nine.alpha", + "pkg.bar.orange.nine.beta", + "pkg.bar.orange.ten", + "pkg.bar.orange.ten.gamma", + "pkg.bar.orange.ten.delta", + ): + graph.add_module(module) + return graph + + def _build_acyclic_graph(self) -> ImportGraph: + graph = self._build_graph_with_no_imports() + # Add imports that make: + # pkg.foo -> pkg.bar + # pkg.bar.yellow -> pkg.foo.red + # pkg.bar.orange.nine -> pkg.bar.orange.ten + for importer, imported in ( + ("pkg.foo", "pkg.bar.red"), + ("pkg.foo.green", "pkg.bar.yellow"), + ("pkg.foo.blue.two", "pkg.bar.red.three"), + ("pkg.foo.blue.two", "pkg.bar.red.four"), + ("pkg.foo.blue.two", "pkg.bar.red.five"), + ("pkg.foo.blue.two", "pkg.bar.red.six"), + ("pkg.foo.blue.two", "pkg.bar.red.seven"), + ("pkg.bar.yellow", "pkg.bar.red"), + ("pkg.bar.yellow.eight", "pkg.bar.red.three"), + ("pkg.bar.yellow.eight", "pkg.bar.red.four"), + ("pkg.bar.yellow.eight", "pkg.bar.red.five"), + ("pkg.bar.orange.nine", "pkg.bar.orange.ten.gamma"), + ("pkg.bar.orange.nine.alpha", "pkg.bar.orange.ten.gamma"), + ("pkg.bar.orange.nine.beta", "pkg.bar.orange.ten.delta"), + ): + graph.add_import(importer=importer, imported=imported) + return graph From 5513daa5cd6033ca31e820bc1956d519c4015a6d Mon Sep 17 00:00:00 2001 From: David Seddon Date: Sat, 25 Oct 2025 18:07:37 +0100 Subject: [PATCH 2/2] WIP - get plumbing for nominate_cycle_breakers in rust --- rust/src/graph/cycle_breakers.rs | 12 ++++++++++++ rust/src/graph/mod.rs | 28 ++++++++++++++++++++++++++++ src/grimp/application/graph.py | 4 ++++ 3 files changed, 44 insertions(+) create mode 100644 rust/src/graph/cycle_breakers.rs diff --git a/rust/src/graph/cycle_breakers.rs b/rust/src/graph/cycle_breakers.rs new file mode 100644 index 00000000..8ff79761 --- /dev/null +++ b/rust/src/graph/cycle_breakers.rs @@ -0,0 +1,12 @@ +use crate::errors::GrimpResult; +use crate::graph::{Graph, ModuleToken}; +use rustc_hash::FxHashSet; + +impl Graph { + pub fn nominate_cycle_breakers( + &self, + _package: ModuleToken, + ) -> GrimpResult> { + Ok(FxHashSet::default()) + } +} diff --git a/rust/src/graph/mod.rs b/rust/src/graph/mod.rs index 934528ec..256f4fcd 100644 --- a/rust/src/graph/mod.rs +++ b/rust/src/graph/mod.rs @@ -25,6 +25,7 @@ pub mod hierarchy_queries; pub mod higher_order_queries; pub mod import_chain_queries; +pub mod cycle_breakers; pub(crate) mod pathfinding; static MODULE_NAMES: LazyLock>> = @@ -619,6 +620,33 @@ impl GraphWrapper { self.convert_package_dependencies_to_python(py, illegal_dependencies) } + pub fn nominate_cycle_breakers<'py>( + &self, + py: Python<'py>, + package: &str, + ) -> PyResult> { + let package = self.get_visible_module_by_name(package)?.token(); + let cycle_breakers = self._graph.nominate_cycle_breakers(package)?; + PySet::new( + py, + cycle_breakers + .into_iter() + .map(|(importer, imported)| { + let importer = self._graph.get_module(importer).unwrap(); + let imported = self._graph.get_module(imported).unwrap(); + Import::new(importer.name(), imported.name()) + }) + .map(|import| { + [ + ("importer", import.importer.into_py_any(py).unwrap()), + ("imported", import.imported.into_py_any(py).unwrap()), + ] + .into_py_dict(py) + .unwrap() + }), + ) + } + #[pyo3(name = "clone")] pub fn clone_py(&self) -> GraphWrapper { self.clone() diff --git a/src/grimp/application/graph.py b/src/grimp/application/graph.py index 6808cedc..b41f5477 100644 --- a/src/grimp/application/graph.py +++ b/src/grimp/application/graph.py @@ -450,6 +450,10 @@ def nominate_cycle_breakers(self, package: str) -> set[ImportTuple]: """ Identify a set of imports that, if removed, would make the package locally acyclic. """ + # Check plumbing - not implemented yet. + rust_result = self._rustgraph.nominate_cycle_breakers(package) + assert rust_result == set() + children = self.find_children(package) if len(children) < 2: return set()