From 5c58290ed7010792e5c7cd356b10fef91a93ebe0 Mon Sep 17 00:00:00 2001 From: David Seddon Date: Wed, 22 Oct 2025 18:02:45 +0100 Subject: [PATCH 1/3] Don't attempt to use nonexistent dependency group --- justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/justfile b/justfile index 200f3114..0de109f8 100644 --- a/justfile +++ b/justfile @@ -138,7 +138,7 @@ benchmark-local: # Show recent local benchmark results. [group('benchmarking')] show-benchmark-results: - @uv run --group=benchmark-local pytest-benchmark compare --group-by=fullname --sort=name --columns=mean + @uv run pytest-benchmark compare --group-by=fullname --sort=name --columns=mean # Run benchmarks using Codspeed. This only works in CI. [group('benchmarking')] From ab08f07a4fca280893f453d73dd329ad7e9e8e16 Mon Sep 17 00:00:00 2001 From: David Seddon Date: Wed, 22 Oct 2025 18:08:55 +0100 Subject: [PATCH 2/3] Remove _run_benchmark function This indirection wasn't useful. --- tests/benchmarking/test_benchmarking.py | 65 ++++++++----------------- 1 file changed, 21 insertions(+), 44 deletions(-) diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index f1a58bc6..c1d38187 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -11,10 +11,6 @@ from .adaptors import PrefixMissingCache -def _run_benchmark(benchmark, fn, *args, **kwargs): - return benchmark(fn, *args, **kwargs) - - @pytest.fixture(scope="module") def large_graph(): raw_json = (Path(__file__).parent / "large_graph.json").read_text() @@ -300,7 +296,7 @@ def test_build_django_uncached(benchmark): In this benchmark, the cache is turned off. """ - _run_benchmark(benchmark, grimp.build_graph, "django", cache_dir=None) + benchmark(grimp.build_graph, "django", cache_dir=None) def test_build_django_from_cache_no_misses(benchmark): @@ -312,7 +308,7 @@ def test_build_django_from_cache_no_misses(benchmark): # Populate the cache first, before beginning the benchmark. grimp.build_graph("django") - _run_benchmark(benchmark, grimp.build_graph, "django") + benchmark(grimp.build_graph, "django") @pytest.mark.parametrize( @@ -349,7 +345,7 @@ def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses): # Populate the cache. grimp.build_graph("django") - _run_benchmark(benchmark, grimp.build_graph, "django") + benchmark(grimp.build_graph, "django") # Clean up. [module.unlink() for module in extra_modules] @@ -371,8 +367,7 @@ def _remove_package_dependencies(graph, package_dependencies): return graph def test_top_level_large_graph_violated(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_illegal_dependencies_for_layers, layers=TOP_LEVEL_LAYERS, containers=("mypackage",), @@ -383,8 +378,7 @@ def test_top_level_large_graph_kept(self, large_graph, benchmark): large_graph = self._remove_package_dependencies( large_graph, TOP_LEVEL_PACKAGE_DEPENDENCIES ) - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_illegal_dependencies_for_layers, layers=TOP_LEVEL_LAYERS, containers=("mypackage",), @@ -392,50 +386,39 @@ def test_top_level_large_graph_kept(self, large_graph, benchmark): assert result == set() def test_deep_layers_large_graph_violated(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS - ) + result = benchmark(large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS) assert result == DEEP_LAYER_PACKAGE_DEPENDENCIES def test_deep_layers_large_graph_kept(self, large_graph, benchmark): large_graph = self._remove_package_dependencies( large_graph, DEEP_LAYER_PACKAGE_DEPENDENCIES ) - result = _run_benchmark( - benchmark, large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS - ) + result = benchmark(large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS) assert result == set() def test_find_descendants(large_graph, benchmark): - result = _run_benchmark(benchmark, large_graph.find_descendants, "mypackage") + result = benchmark(large_graph.find_descendants, "mypackage") assert len(result) == 28222 def test_find_downstream_modules(large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_downstream_modules, DEEP_LAYERS[0], as_package=True - ) + result = benchmark(large_graph.find_downstream_modules, DEEP_LAYERS[0], as_package=True) assert len(result) == 80 def test_find_upstream_modules(large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_upstream_modules, DEEP_LAYERS[0], as_package=True - ) + result = benchmark(large_graph.find_upstream_modules, DEEP_LAYERS[0], as_package=True) assert len(result) == 2159 class TestFindShortestChain: def test_chain_found(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_shortest_chain, DEEP_LAYERS[0], DEEP_LAYERS[1] - ) + result = benchmark(large_graph.find_shortest_chain, DEEP_LAYERS[0], DEEP_LAYERS[1]) assert result is not None def test_no_chain(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_shortest_chain, DEEP_LAYERS[0], "mypackage.data.vendors.4053192739.6373932949", @@ -445,8 +428,7 @@ def test_no_chain(self, large_graph, benchmark): class TestFindShortestChains: def test_chains_found(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_shortest_chains, DEEP_LAYERS[0], DEEP_LAYERS[1], @@ -455,8 +437,7 @@ def test_chains_found(self, large_graph, benchmark): assert len(result) > 0 def test_no_chains(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_shortest_chains, DEEP_LAYERS[0], "mypackage.data.vendors.4053192739.6373932949", @@ -489,8 +470,7 @@ def test_chains_found_sparse_imports(self, benchmark): graph.add_import(importer=f"a.m{i}", imported=f"b.m{i}") graph.add_import(importer=f"b.m{i}", imported=f"c.m{i}") - result = _run_benchmark( - benchmark, + result = benchmark( graph.find_shortest_chains, "a", "c", @@ -500,7 +480,7 @@ def test_chains_found_sparse_imports(self, benchmark): def test_copy_graph(large_graph, benchmark): - _run_benchmark(benchmark, lambda: deepcopy(large_graph)) + benchmark(lambda: deepcopy(large_graph)) def test_modules_property_first_access(large_graph, benchmark): @@ -512,7 +492,7 @@ def f(): # Accessing the modules property is what we're benchmarking. _ = large_graph.modules - _run_benchmark(benchmark, f) + benchmark(f) def test_modules_property_many_accesses(large_graph, benchmark): @@ -525,7 +505,7 @@ def f(): for i in range(1000): _ = large_graph.modules - _run_benchmark(benchmark, f) + benchmark(f) def test_get_import_details(benchmark): @@ -540,19 +520,16 @@ def f(): for i in range(iterations): graph.get_import_details(importer=f"blue_{i}", imported=f"green_{i}") - _run_benchmark(benchmark, f) + benchmark(f) def test_find_matching_modules(benchmark, large_graph): - matching_modules = _run_benchmark( - benchmark, lambda: large_graph.find_matching_modules("mypackage.domain.**") - ) + matching_modules = benchmark(lambda: large_graph.find_matching_modules("mypackage.domain.**")) assert len(matching_modules) == 2519 def test_find_matching_direct_imports(benchmark, large_graph): - matching_imports = _run_benchmark( - benchmark, + matching_imports = benchmark( lambda: large_graph.find_matching_direct_imports( "mypackage.domain.** -> mypackage.data.**" ), From 78ac056174f6afdbd7918058493746e3765c9f1e Mon Sep 17 00:00:00 2001 From: David Seddon Date: Thu, 23 Oct 2025 12:25:42 +0100 Subject: [PATCH 3/3] Update partial cache benchmark --- tests/benchmarking/adaptors.py | 20 ------ tests/benchmarking/test_benchmarking.py | 94 +++++++++++++++++-------- 2 files changed, 65 insertions(+), 49 deletions(-) delete mode 100644 tests/benchmarking/adaptors.py diff --git a/tests/benchmarking/adaptors.py b/tests/benchmarking/adaptors.py deleted file mode 100644 index 93bb9134..00000000 --- a/tests/benchmarking/adaptors.py +++ /dev/null @@ -1,20 +0,0 @@ -from grimp.adaptors.caching import Cache, CacheMiss -from typing import Set - -from grimp.application.ports.modulefinder import ModuleFile -from grimp.domain.valueobjects import DirectImport - - -class PrefixMissingCache(Cache): - """ - Test double of the real cache that will miss caching any module that begins with - a special prefix. - """ - - MISSING_PREFIX = "miss_marker_8772f06d64b6_" # Arbitrary prefix. - - def read_imports(self, module_file: ModuleFile) -> Set[DirectImport]: - leaf_name = module_file.module.name.split(".")[-1] - if leaf_name.startswith(self.MISSING_PREFIX): - raise CacheMiss - return super().read_imports(module_file) diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index c1d38187..5e5b4517 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -1,14 +1,14 @@ +import uuid +import random import pytest import json import importlib from pathlib import Path -from tests.config import override_settings from grimp.application.graph import ImportGraph from grimp import PackageDependency, Route import grimp from copy import deepcopy -from .adaptors import PrefixMissingCache @pytest.fixture(scope="module") @@ -55,14 +55,16 @@ def large_graph(): middle=(), tails=frozenset( { - "mypackage.application.7537183614.6928774480.5676105139.3275676604" # noqa:E501 + "mypackage.application.7537183614.6928774480.5676105139.3275676604" + # noqa:E501 } ), ), Route( heads=frozenset( { - "mypackage.domain.6928774480.5676105139.1330171288.7588443317.4661445087" # noqa:E501 + "mypackage.domain.6928774480.5676105139.1330171288.7588443317.4661445087" + # noqa:E501 } ), middle=(), @@ -83,7 +85,8 @@ def large_graph(): Route( heads=frozenset( { - "mypackage.domain.6928774480.1028759677.7960519247.2888779155.7486857426" # noqa:E501 + "mypackage.domain.6928774480.1028759677.7960519247.2888779155.7486857426" + # noqa:E501 } ), middle=(), @@ -135,15 +138,19 @@ def large_graph(): Route( heads=frozenset( { - "mypackage.application.7537183614.2538372545.1153384736.6297289996", # noqa:E501 - "mypackage.application.7537183614.2538372545.1153384736.6404547812.6297289996", # noqa:E501 + "mypackage.application.7537183614.2538372545.1153384736.6297289996", + # noqa:E501 + "mypackage.application.7537183614.2538372545.1153384736.6404547812.6297289996", + # noqa:E501 } ), middle=("mypackage.6398020133.9075581450.6529869526.6297289996",), tails=frozenset( { - "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.2943193489", # noqa:E501 - "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.6322703811", # noqa:E501 + "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.2943193489", + # noqa:E501 + "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.6322703811", + # noqa:E501 } ), ) @@ -319,36 +326,65 @@ def test_build_django_from_cache_no_misses(benchmark): 350, # Around half the Django codebase. ), ) -def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses): +def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses: int): """ Benchmarks building a graph of real package - in this case Django. This benchmark utilizes the cache except for a few modules, which we add. """ - # We must use a special cache class, otherwise the cache will be populated - # by the first iteration. It would be better to do this using a setup function, - # which is supported by pytest-benchmark's pedantic mode, but not codspeed. - # This won't give us a truly accurate picture, but it's better than nothing. + # We need to take care in benchmarking partially populated caches, because + # the benchmark may run multiple times, depending on the context in which it's run. + # If we're not careful, the cache will be populated the first time and not reset + # in subsequent runs. + # + # The benchmark fixture available here is either from pytest-benchmark (used locally) + # or pytest-codspeed (used in CI). Here's how they both behave: + # + # - pytest-benchmark: By default, dynamically decides how many times to run the benchmark. + # It does this to improve benchmarking of very fast single runs: it will run + # the code many times and average the total time. That's not so important + # for code that takes orders of magnitude longer than the timer resolution. + # + # We can override this by using benchmark.pedantic, where we specify the + # number of rounds and iterations. Each iteration contains multiple rounds. + # + # It's also possible to provide a setup function that runs + # between each iteration. A teardown function will be in the next release + # but isn't in pytest-benchmark<=5.1.0. + # + # - pytest-codspeed: This can run in two modes, CPU instrumentation and wall time. Currently + # we use CPU instrumentation as wall time is only available to + # Github organizations. CPU mode will always run the benchmark once, + # regardless of what rounds and iterations are specified in pedantic mode. + # This mode measures the speed of the benchmark by simulating the CPU, + # rather than timing how long it actually takes, so there is no point in + # running it multiple times and taking an average. + # + # So - in this case, because we are benchmarking a relatively slow piece of code, we explicitly + # turn off multiple runs, which could potentially be misleading when running locally. - # Add some specially-named modules which will be treated as not in the cache. - django_path = Path(importlib.util.find_spec("django").origin).parent - extra_modules = [ - django_path / f"{PrefixMissingCache.MISSING_PREFIX}{i}.py" for i in range(number_of_misses) - ] - # Use some real python, which will take time to parse. + # Populate the cache first, before beginning the benchmark. + grimp.build_graph("django") + # Add some modules which won't be in the cache. + # (Use some real python, which will take time to parse.) + django_path = Path(importlib.util.find_spec("django").origin).parent # type: ignore module_to_copy = django_path / "forms" / "forms.py" module_contents = module_to_copy.read_text() - for extra_module in extra_modules: - extra_module.write_text(module_contents) - - with override_settings(CACHE_CLASS=PrefixMissingCache): - # Populate the cache. - grimp.build_graph("django") + extra_modules = [ + django_path / f"module_{i}_{random.randint(100000, 999999)}.py" + for i in range(number_of_misses) + ] + for new_module in extra_modules: + # Make sure the module contents aren't identical. Depending on how the cache is implemented, + # perhaps this could make a difference. + hash_buster = f"\n# Hash busting comment: {uuid.uuid4()}" + new_module.write_text(module_contents + hash_buster) - benchmark(grimp.build_graph, "django") + benchmark.pedantic(grimp.build_graph, ["django"], rounds=1, iterations=1) - # Clean up. - [module.unlink() for module in extra_modules] + # Delete the modules we just created. + for module in extra_modules: + module.unlink() class TestFindIllegalDependenciesForLayers: