diff --git a/justfile b/justfile index 200f3114..0de109f8 100644 --- a/justfile +++ b/justfile @@ -138,7 +138,7 @@ benchmark-local: # Show recent local benchmark results. [group('benchmarking')] show-benchmark-results: - @uv run --group=benchmark-local pytest-benchmark compare --group-by=fullname --sort=name --columns=mean + @uv run pytest-benchmark compare --group-by=fullname --sort=name --columns=mean # Run benchmarks using Codspeed. This only works in CI. [group('benchmarking')] diff --git a/tests/benchmarking/adaptors.py b/tests/benchmarking/adaptors.py deleted file mode 100644 index 93bb9134..00000000 --- a/tests/benchmarking/adaptors.py +++ /dev/null @@ -1,20 +0,0 @@ -from grimp.adaptors.caching import Cache, CacheMiss -from typing import Set - -from grimp.application.ports.modulefinder import ModuleFile -from grimp.domain.valueobjects import DirectImport - - -class PrefixMissingCache(Cache): - """ - Test double of the real cache that will miss caching any module that begins with - a special prefix. - """ - - MISSING_PREFIX = "miss_marker_8772f06d64b6_" # Arbitrary prefix. - - def read_imports(self, module_file: ModuleFile) -> Set[DirectImport]: - leaf_name = module_file.module.name.split(".")[-1] - if leaf_name.startswith(self.MISSING_PREFIX): - raise CacheMiss - return super().read_imports(module_file) diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index f1a58bc6..5e5b4517 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -1,18 +1,14 @@ +import uuid +import random import pytest import json import importlib from pathlib import Path -from tests.config import override_settings from grimp.application.graph import ImportGraph from grimp import PackageDependency, Route import grimp from copy import deepcopy -from .adaptors import PrefixMissingCache - - -def _run_benchmark(benchmark, fn, *args, **kwargs): - return benchmark(fn, *args, **kwargs) @pytest.fixture(scope="module") @@ -59,14 +55,16 @@ def large_graph(): middle=(), tails=frozenset( { - "mypackage.application.7537183614.6928774480.5676105139.3275676604" # noqa:E501 + "mypackage.application.7537183614.6928774480.5676105139.3275676604" + # noqa:E501 } ), ), Route( heads=frozenset( { - "mypackage.domain.6928774480.5676105139.1330171288.7588443317.4661445087" # noqa:E501 + "mypackage.domain.6928774480.5676105139.1330171288.7588443317.4661445087" + # noqa:E501 } ), middle=(), @@ -87,7 +85,8 @@ def large_graph(): Route( heads=frozenset( { - "mypackage.domain.6928774480.1028759677.7960519247.2888779155.7486857426" # noqa:E501 + "mypackage.domain.6928774480.1028759677.7960519247.2888779155.7486857426" + # noqa:E501 } ), middle=(), @@ -139,15 +138,19 @@ def large_graph(): Route( heads=frozenset( { - "mypackage.application.7537183614.2538372545.1153384736.6297289996", # noqa:E501 - "mypackage.application.7537183614.2538372545.1153384736.6404547812.6297289996", # noqa:E501 + "mypackage.application.7537183614.2538372545.1153384736.6297289996", + # noqa:E501 + "mypackage.application.7537183614.2538372545.1153384736.6404547812.6297289996", + # noqa:E501 } ), middle=("mypackage.6398020133.9075581450.6529869526.6297289996",), tails=frozenset( { - "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.2943193489", # noqa:E501 - "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.6322703811", # noqa:E501 + "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.2943193489", + # noqa:E501 + "mypackage.plugins.5634303718.6180716911.7582995238.1039461003.6322703811", + # noqa:E501 } ), ) @@ -300,7 +303,7 @@ def test_build_django_uncached(benchmark): In this benchmark, the cache is turned off. """ - _run_benchmark(benchmark, grimp.build_graph, "django", cache_dir=None) + benchmark(grimp.build_graph, "django", cache_dir=None) def test_build_django_from_cache_no_misses(benchmark): @@ -312,7 +315,7 @@ def test_build_django_from_cache_no_misses(benchmark): # Populate the cache first, before beginning the benchmark. grimp.build_graph("django") - _run_benchmark(benchmark, grimp.build_graph, "django") + benchmark(grimp.build_graph, "django") @pytest.mark.parametrize( @@ -323,36 +326,65 @@ def test_build_django_from_cache_no_misses(benchmark): 350, # Around half the Django codebase. ), ) -def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses): +def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses: int): """ Benchmarks building a graph of real package - in this case Django. This benchmark utilizes the cache except for a few modules, which we add. """ - # We must use a special cache class, otherwise the cache will be populated - # by the first iteration. It would be better to do this using a setup function, - # which is supported by pytest-benchmark's pedantic mode, but not codspeed. - # This won't give us a truly accurate picture, but it's better than nothing. + # We need to take care in benchmarking partially populated caches, because + # the benchmark may run multiple times, depending on the context in which it's run. + # If we're not careful, the cache will be populated the first time and not reset + # in subsequent runs. + # + # The benchmark fixture available here is either from pytest-benchmark (used locally) + # or pytest-codspeed (used in CI). Here's how they both behave: + # + # - pytest-benchmark: By default, dynamically decides how many times to run the benchmark. + # It does this to improve benchmarking of very fast single runs: it will run + # the code many times and average the total time. That's not so important + # for code that takes orders of magnitude longer than the timer resolution. + # + # We can override this by using benchmark.pedantic, where we specify the + # number of rounds and iterations. Each iteration contains multiple rounds. + # + # It's also possible to provide a setup function that runs + # between each iteration. A teardown function will be in the next release + # but isn't in pytest-benchmark<=5.1.0. + # + # - pytest-codspeed: This can run in two modes, CPU instrumentation and wall time. Currently + # we use CPU instrumentation as wall time is only available to + # Github organizations. CPU mode will always run the benchmark once, + # regardless of what rounds and iterations are specified in pedantic mode. + # This mode measures the speed of the benchmark by simulating the CPU, + # rather than timing how long it actually takes, so there is no point in + # running it multiple times and taking an average. + # + # So - in this case, because we are benchmarking a relatively slow piece of code, we explicitly + # turn off multiple runs, which could potentially be misleading when running locally. - # Add some specially-named modules which will be treated as not in the cache. - django_path = Path(importlib.util.find_spec("django").origin).parent - extra_modules = [ - django_path / f"{PrefixMissingCache.MISSING_PREFIX}{i}.py" for i in range(number_of_misses) - ] - # Use some real python, which will take time to parse. + # Populate the cache first, before beginning the benchmark. + grimp.build_graph("django") + # Add some modules which won't be in the cache. + # (Use some real python, which will take time to parse.) + django_path = Path(importlib.util.find_spec("django").origin).parent # type: ignore module_to_copy = django_path / "forms" / "forms.py" module_contents = module_to_copy.read_text() - for extra_module in extra_modules: - extra_module.write_text(module_contents) - - with override_settings(CACHE_CLASS=PrefixMissingCache): - # Populate the cache. - grimp.build_graph("django") + extra_modules = [ + django_path / f"module_{i}_{random.randint(100000, 999999)}.py" + for i in range(number_of_misses) + ] + for new_module in extra_modules: + # Make sure the module contents aren't identical. Depending on how the cache is implemented, + # perhaps this could make a difference. + hash_buster = f"\n# Hash busting comment: {uuid.uuid4()}" + new_module.write_text(module_contents + hash_buster) - _run_benchmark(benchmark, grimp.build_graph, "django") + benchmark.pedantic(grimp.build_graph, ["django"], rounds=1, iterations=1) - # Clean up. - [module.unlink() for module in extra_modules] + # Delete the modules we just created. + for module in extra_modules: + module.unlink() class TestFindIllegalDependenciesForLayers: @@ -371,8 +403,7 @@ def _remove_package_dependencies(graph, package_dependencies): return graph def test_top_level_large_graph_violated(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_illegal_dependencies_for_layers, layers=TOP_LEVEL_LAYERS, containers=("mypackage",), @@ -383,8 +414,7 @@ def test_top_level_large_graph_kept(self, large_graph, benchmark): large_graph = self._remove_package_dependencies( large_graph, TOP_LEVEL_PACKAGE_DEPENDENCIES ) - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_illegal_dependencies_for_layers, layers=TOP_LEVEL_LAYERS, containers=("mypackage",), @@ -392,50 +422,39 @@ def test_top_level_large_graph_kept(self, large_graph, benchmark): assert result == set() def test_deep_layers_large_graph_violated(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS - ) + result = benchmark(large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS) assert result == DEEP_LAYER_PACKAGE_DEPENDENCIES def test_deep_layers_large_graph_kept(self, large_graph, benchmark): large_graph = self._remove_package_dependencies( large_graph, DEEP_LAYER_PACKAGE_DEPENDENCIES ) - result = _run_benchmark( - benchmark, large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS - ) + result = benchmark(large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS) assert result == set() def test_find_descendants(large_graph, benchmark): - result = _run_benchmark(benchmark, large_graph.find_descendants, "mypackage") + result = benchmark(large_graph.find_descendants, "mypackage") assert len(result) == 28222 def test_find_downstream_modules(large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_downstream_modules, DEEP_LAYERS[0], as_package=True - ) + result = benchmark(large_graph.find_downstream_modules, DEEP_LAYERS[0], as_package=True) assert len(result) == 80 def test_find_upstream_modules(large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_upstream_modules, DEEP_LAYERS[0], as_package=True - ) + result = benchmark(large_graph.find_upstream_modules, DEEP_LAYERS[0], as_package=True) assert len(result) == 2159 class TestFindShortestChain: def test_chain_found(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, large_graph.find_shortest_chain, DEEP_LAYERS[0], DEEP_LAYERS[1] - ) + result = benchmark(large_graph.find_shortest_chain, DEEP_LAYERS[0], DEEP_LAYERS[1]) assert result is not None def test_no_chain(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_shortest_chain, DEEP_LAYERS[0], "mypackage.data.vendors.4053192739.6373932949", @@ -445,8 +464,7 @@ def test_no_chain(self, large_graph, benchmark): class TestFindShortestChains: def test_chains_found(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_shortest_chains, DEEP_LAYERS[0], DEEP_LAYERS[1], @@ -455,8 +473,7 @@ def test_chains_found(self, large_graph, benchmark): assert len(result) > 0 def test_no_chains(self, large_graph, benchmark): - result = _run_benchmark( - benchmark, + result = benchmark( large_graph.find_shortest_chains, DEEP_LAYERS[0], "mypackage.data.vendors.4053192739.6373932949", @@ -489,8 +506,7 @@ def test_chains_found_sparse_imports(self, benchmark): graph.add_import(importer=f"a.m{i}", imported=f"b.m{i}") graph.add_import(importer=f"b.m{i}", imported=f"c.m{i}") - result = _run_benchmark( - benchmark, + result = benchmark( graph.find_shortest_chains, "a", "c", @@ -500,7 +516,7 @@ def test_chains_found_sparse_imports(self, benchmark): def test_copy_graph(large_graph, benchmark): - _run_benchmark(benchmark, lambda: deepcopy(large_graph)) + benchmark(lambda: deepcopy(large_graph)) def test_modules_property_first_access(large_graph, benchmark): @@ -512,7 +528,7 @@ def f(): # Accessing the modules property is what we're benchmarking. _ = large_graph.modules - _run_benchmark(benchmark, f) + benchmark(f) def test_modules_property_many_accesses(large_graph, benchmark): @@ -525,7 +541,7 @@ def f(): for i in range(1000): _ = large_graph.modules - _run_benchmark(benchmark, f) + benchmark(f) def test_get_import_details(benchmark): @@ -540,19 +556,16 @@ def f(): for i in range(iterations): graph.get_import_details(importer=f"blue_{i}", imported=f"green_{i}") - _run_benchmark(benchmark, f) + benchmark(f) def test_find_matching_modules(benchmark, large_graph): - matching_modules = _run_benchmark( - benchmark, lambda: large_graph.find_matching_modules("mypackage.domain.**") - ) + matching_modules = benchmark(lambda: large_graph.find_matching_modules("mypackage.domain.**")) assert len(matching_modules) == 2519 def test_find_matching_direct_imports(benchmark, large_graph): - matching_imports = _run_benchmark( - benchmark, + matching_imports = benchmark( lambda: large_graph.find_matching_direct_imports( "mypackage.domain.** -> mypackage.data.**" ),