diff --git a/rust/src/filesystem.rs b/rust/src/filesystem.rs index c234aa64..8f3a74f0 100644 --- a/rust/src/filesystem.rs +++ b/rust/src/filesystem.rs @@ -323,7 +323,7 @@ fn parse_indented_file_system_string(file_system_string: &str) -> HashMap = HashMap::new(); let mut path_stack: Vec = Vec::new(); // Stores current directory path components let mut first_line = true; // Flag to handle the very first path component - + let mut first_line_indent: usize = 0; // Normalize newlines and split into lines let buffer = file_system_string.replace("\r\n", "\n"); let lines: Vec<&str> = buffer.split('\n').collect(); @@ -334,27 +334,25 @@ fn parse_indented_file_system_string(file_system_string: &str) -> HashMap current_depth { path_stack.pop(); } - // If the current line is a file, append it to the path for inserting into map, // then pop it off so that subsequent siblings are correctly handled. // If it's a directory, append it and it stays on the stack for its children. diff --git a/src/grimp/adaptors/modulefinder.py b/src/grimp/adaptors/modulefinder.py index 934190f9..310a6ea1 100644 --- a/src/grimp/adaptors/modulefinder.py +++ b/src/grimp/adaptors/modulefinder.py @@ -4,6 +4,7 @@ from grimp.application.ports import modulefinder from grimp.application.ports.filesystem import AbstractFileSystem from grimp.domain.valueobjects import Module +from collections.abc import Set logger = logging.getLogger(__name__) @@ -37,13 +38,23 @@ def _get_python_files_inside_package(self, directory: str) -> Iterable[str]: Return: Generator of Python file names. """ + # Containers are directories within `directory` that contain an __init__.py + # and who do not have any ancestors within `directory` that do contain an __init__.py. + # We keep track of these so we can tell the difference between nested namespace packages + # (which we do want to include) and orphaned packages (which we don't). + containers: set[str] = set() for dirpath, dirs, files in self.file_system.walk(directory): - # Don't include directories that aren't Python packages, - # nor their subdirectories. - if "__init__.py" not in files: - for d in list(dirs): - dirs.remove(d) - continue + if self._is_in_container(dirpath, containers): + # Are we somewhere inside a non-namespace package (i.e. portion)? If so, don't include directories + # that aren't Python packages, nor their subdirectories. + if "__init__.py" not in files: + # Don't drill down further in this directory. + for d in list(dirs): + dirs.remove(d) + continue + elif "__init__.py" in files: + # It's a container. + containers.add(dirpath) # Don't include hidden directories. dirs_to_remove = [d for d in dirs if self._should_ignore_dir(d)] @@ -54,6 +65,12 @@ def _get_python_files_inside_package(self, directory: str) -> Iterable[str]: if self._is_python_file(filename, dirpath): yield self.file_system.join(dirpath, filename) + def _is_in_container(self, directory: str, containers: Set[str]) -> bool: + for container in containers: + if directory.startswith(container): + return True + return False + def _should_ignore_dir(self, directory: str) -> bool: # TODO: make this configurable. # Skip adding directories that are hidden. diff --git a/src/grimp/adaptors/packagefinder.py b/src/grimp/adaptors/packagefinder.py index e87f644e..55c5d28f 100644 --- a/src/grimp/adaptors/packagefinder.py +++ b/src/grimp/adaptors/packagefinder.py @@ -12,10 +12,9 @@ class ImportLibPackageFinder(AbstractPackageFinder): - def determine_package_directory( + def determine_package_directories( self, package_name: str, file_system: AbstractFileSystem - ) -> str: - # TODO - do we need to add the current working directory here? + ) -> set[str]: # Attempt to locate the package file. spec = importlib.util.find_spec(package_name) if not spec: @@ -26,13 +25,8 @@ def determine_package_directory( if not self._is_a_package(spec, file_system) or self._has_a_non_namespace_parent(spec): raise exceptions.NotATopLevelModule - return file_system.dirname(spec.origin) - - raise exceptions.NamespacePackageEncountered( - f"Package '{package_name}' is a namespace package (see PEP 420). Try specifying the " - "portion name instead. If you are not intentionally using namespace packages, " - "adding an __init__.py file should fix the problem." - ) + assert spec.submodule_search_locations # This should be the case if spec.has_location. + return set(spec.submodule_search_locations) def _is_a_package(self, spec: ModuleSpec, file_system: AbstractFileSystem) -> bool: assert spec.origin diff --git a/src/grimp/application/ports/packagefinder.py b/src/grimp/application/ports/packagefinder.py index 09c4da45..30b6d0ed 100644 --- a/src/grimp/application/ports/packagefinder.py +++ b/src/grimp/application/ports/packagefinder.py @@ -5,7 +5,7 @@ class AbstractPackageFinder(abc.ABC): @abc.abstractmethod - def determine_package_directory( + def determine_package_directories( self, package_name: str, file_system: AbstractFileSystem - ) -> str: + ) -> set[str]: raise NotImplementedError diff --git a/src/grimp/application/scanning.py b/src/grimp/application/scanning.py index 09eab3c8..da111796 100644 --- a/src/grimp/application/scanning.py +++ b/src/grimp/application/scanning.py @@ -1,4 +1,4 @@ -from collections.abc import Collection +from collections.abc import Collection, Set from grimp import _rustgrimp as rust # type: ignore[attr-defined] from grimp.domain.valueobjects import DirectImport, Module @@ -10,7 +10,7 @@ def scan_imports( module_files: Collection[ModuleFile], *, - found_packages: set[FoundPackage], + found_packages: Set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool, ) -> dict[ModuleFile, set[DirectImport]]: diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index e7503d29..ef3225d8 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -3,7 +3,8 @@ """ from typing import cast -from collections.abc import Sequence, Iterable + +from collections.abc import Sequence, Iterable, Set, Mapping from .scanning import scan_imports from ..application.ports import caching @@ -50,9 +51,10 @@ def build_graph( """ file_system: AbstractFileSystem = settings.FILE_SYSTEM + root_package_names = [package_name] + list(additional_package_names) found_packages = _find_packages( file_system=file_system, - package_names=[package_name] + list(additional_package_names), + root_package_names=root_package_names, ) imports_by_module = _scan_packages( @@ -63,31 +65,32 @@ def build_graph( cache_dir=cache_dir, ) - graph = _assemble_graph(found_packages, imports_by_module) + graph = _assemble_graph(root_package_names, found_packages, imports_by_module) return graph def _find_packages( - file_system: AbstractFileSystem, package_names: Sequence[object] + file_system: AbstractFileSystem, root_package_names: Sequence[object] ) -> set[FoundPackage]: - package_names = _validate_package_names_are_strings(package_names) + root_package_names = _validate_package_names_are_strings(root_package_names) module_finder: AbstractModuleFinder = settings.MODULE_FINDER package_finder: AbstractPackageFinder = settings.PACKAGE_FINDER found_packages: set[FoundPackage] = set() - for package_name in package_names: - package_directory = package_finder.determine_package_directory( + for package_name in root_package_names: + package_directories = package_finder.determine_package_directories( package_name=package_name, file_system=file_system ) - found_package = module_finder.find_package( - package_name=package_name, - package_directory=package_directory, - file_system=file_system, - ) - found_packages.add(found_package) + for package_directory in package_directories: + found_package = module_finder.find_package( + package_name=package_name, + package_directory=package_directory, + file_system=file_system, + ) + found_packages.add(found_package) return found_packages @@ -101,7 +104,7 @@ def _validate_package_names_are_strings( def _scan_packages( - found_packages: set[FoundPackage], + found_packages: Set[FoundPackage], file_system: BasicFileSystem, include_external_packages: bool, exclude_type_checking_imports: bool, @@ -150,13 +153,19 @@ def _scan_packages( def _assemble_graph( - found_packages: set[FoundPackage], - imports_by_module: dict[Module, set[DirectImport]], + root_package_names: Sequence[str], + found_packages: Set[FoundPackage], + imports_by_module: Mapping[Module, Set[DirectImport]], ) -> ImportGraph: graph: ImportGraph = settings.IMPORT_GRAPH_CLASS() package_modules = {Module(found_package.name) for found_package in found_packages} + # Add root packages first. These will usually be the names of the found packages, but + # not in the case of namespace packages. + for root_package_name in root_package_names: + graph.add_module(root_package_name) + for module, direct_imports in imports_by_module.items(): graph.add_module(module.name) for direct_import in direct_imports: @@ -176,7 +185,7 @@ def _assemble_graph( return graph -def _is_external(module: Module, package_modules: set[Module]) -> bool: +def _is_external(module: Module, package_modules: Set[Module]) -> bool: return not any( module.is_descendant_of(package_module) or module == package_module for package_module in package_modules diff --git a/tests/adaptors/filesystem.py b/tests/adaptors/filesystem.py index d8ed455d..ed619764 100644 --- a/tests/adaptors/filesystem.py +++ b/tests/adaptors/filesystem.py @@ -68,12 +68,23 @@ def walk(self, directory_name): For each directory in the tree rooted at directory top (including top itself), it yields a 3-tuple (dirpath, dirnames, filenames). """ - try: - directory_contents = self.contents[directory_name] - except KeyError: - return [] - - yield from self._walk_contents(directory_contents, containing_directory=directory_name) + if contents_key := self._drill_down_for_contents_key(directory_name): + directory_contents = self.contents[contents_key] + assert isinstance(directory_contents, dict) + yield from self._walk_contents(directory_contents, containing_directory=contents_key) + + return [] + + def _drill_down_for_contents_key(self, directory_name: str) -> str | None: + if directory_name in self.contents: + return directory_name + + if self.sep in directory_name: + # Try one level up. + parent = self.sep.join(directory_name.split(self.sep)[:-1]) + return self._drill_down_for_contents_key(parent) + else: + return None def _walk_contents( self, directory_contents: dict[str, Any], containing_directory: str diff --git a/tests/adaptors/packagefinder.py b/tests/adaptors/packagefinder.py index b242f26d..5740677d 100644 --- a/tests/adaptors/packagefinder.py +++ b/tests/adaptors/packagefinder.py @@ -1,11 +1,13 @@ +from collections.abc import Mapping from grimp.application.ports.packagefinder import AbstractPackageFinder + from grimp.application.ports.filesystem import AbstractFileSystem class BaseFakePackageFinder(AbstractPackageFinder): - directory_map: dict[str, str] = {} + directory_map: Mapping[str, set[str]] = {} - def determine_package_directory( + def determine_package_directories( self, package_name: str, file_system: AbstractFileSystem - ) -> str: + ) -> set[str]: return self.directory_map[package_name] diff --git a/tests/functional/test_error_handling.py b/tests/functional/test_error_handling.py index 45635986..e79914ba 100644 --- a/tests/functional/test_error_handling.py +++ b/tests/functional/test_error_handling.py @@ -1,5 +1,4 @@ import os -import re import pytest # type: ignore @@ -19,15 +18,3 @@ def test_syntax_error_includes_module(): filename=filename, lineno=5, text="fromb . import two" ) assert expected_exception == excinfo.value - - -def test_missing_root_init_file(): - with pytest.raises( - exceptions.NamespacePackageEncountered, - match=re.escape( - "Package 'missingrootinitpackage' is a namespace package (see PEP 420). Try specifying " - "the portion name instead. If you are not intentionally " - "using namespace packages, adding an __init__.py file should fix the problem." - ), - ): - build_graph("missingrootinitpackage", cache_dir=None) diff --git a/tests/functional/test_namespace_packages.py b/tests/functional/test_namespace_packages.py index 0d0d8b83..8066b44c 100644 --- a/tests/functional/test_namespace_packages.py +++ b/tests/functional/test_namespace_packages.py @@ -1,6 +1,6 @@ import pytest # type: ignore -from grimp import build_graph, exceptions +from grimp import build_graph """ For ease of reference, these are the imports of all the files: @@ -17,14 +17,17 @@ nestednamespace.foo.alpha.blue.one, nestednamespace.bar.beta.orange """ +YELLOW_MODULES = {"mynamespace.yellow"} +GREEN_MODULES = {"mynamespace.green", "mynamespace.green.alpha"} +BLUE_MODULES = {"mynamespace.blue", "mynamespace.blue.alpha", "mynamespace.blue.beta"} + def test_build_graph_for_namespace(): - with pytest.raises(exceptions.NamespacePackageEncountered): - build_graph("mynamespace", cache_dir=None) + graph = build_graph("mynamespace", cache_dir=None) + assert graph.modules == {"mynamespace"} | YELLOW_MODULES | GREEN_MODULES | BLUE_MODULES -GREEN_MODULES = {"mynamespace.green", "mynamespace.green.alpha"} -BLUE_MODULES = {"mynamespace.blue", "mynamespace.blue.alpha", "mynamespace.blue.beta"} + assert graph.count_imports() @pytest.mark.parametrize( @@ -100,13 +103,29 @@ def test_import_between_namespace_children(): # Nested namespaces +@pytest.mark.xfail(strict=True) @pytest.mark.parametrize( "package_name", ("nestednamespace", "nestednamespace.foo", "nestednamespace.foo.alpha"), ) def test_build_graph_for_nested_namespace(package_name): - with pytest.raises(exceptions.NamespacePackageEncountered): - build_graph(package_name, cache_dir=None) + graph = build_graph(package_name, cache_dir=None) + + assert graph.modules == { + "nestednamespace", + "nestednamespace.foo", + "nestednamespace.foo.alpha", + "nestednamespace.foo.alpha.blue", + "nestednamespace.foo.alpha.blue.one", + "nestednamespace.foo.alpha.blue.two", + "nestednamespace.foo.alpha.green", + "nestednamespace.foo.alpha.green.one", + "nestednamespace.foo.alpha.green.two", + "nestednamespace.bar", + "nestednamespace.bar.beta", + "nestednamespace.bar.beta.orange", + } + assert graph.count_imports() @pytest.mark.parametrize( diff --git a/tests/unit/adaptors/test_filesystem.py b/tests/unit/adaptors/test_filesystem.py index e769902b..1c526bc2 100644 --- a/tests/unit/adaptors/test_filesystem.py +++ b/tests/unit/adaptors/test_filesystem.py @@ -40,6 +40,7 @@ def test_split(self, path, expected): ("/path/to/mypackage/readme.txt", True), ("/path/to/mypackage/foo/one.txt", True), ("/path/to/mypackage/foo/two/green.txt", True), + ("/path/to/mypackage/bar/blue.txt", True), ("/path/to/nonexistent.txt", False), ("/path/to/mypackage/purple.txt", False), ], @@ -53,6 +54,8 @@ def test_exists_content_only(self, file_name, expected): one.txt two/ green.txt + bar/ + blue.txt """ ) diff --git a/tests/unit/adaptors/test_modulefinder.py b/tests/unit/adaptors/test_modulefinder.py index 9d7a04c1..72199b35 100644 --- a/tests/unit/adaptors/test_modulefinder.py +++ b/tests/unit/adaptors/test_modulefinder.py @@ -49,12 +49,52 @@ def test_happy_path(): ) -def test_namespaced_packages(): +def test_when_package_is_namespace(): module_finder = ModuleFinder() file_system = FakeFileSystem( contents=""" - /path/to/somenamespace/foo/ + /path/to/somenamespace/ + foo/ + __init__.py + blue.py + green/ + __init__.py + one.py + two/ + __init__.py + """ + ) + + result = module_finder.find_package( + package_name="somenamespace", + package_directory="/path/to/somenamespace", + file_system=file_system, + ) + + assert result == FoundPackage( + name="somenamespace", + directory="/path/to/somenamespace", + module_files=frozenset( + { + # N.B. no ModuleFile for the namespace package. + ModuleFile(module=Module("somenamespace.foo"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("somenamespace.foo.blue"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("somenamespace.foo.green"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("somenamespace.foo.green.one"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("somenamespace.foo.green.two"), mtime=DEFAULT_MTIME), + } + ), + ) + + +def test_when_package_is_namespace_portion(): + module_finder = ModuleFinder() + + file_system = FakeFileSystem( + contents=""" + /path/to/somenamespace/ + foo/ __init__.py blue.py green/ @@ -124,6 +164,53 @@ def test_ignores_orphaned_python_files(): ) +def test_includes_nested_namespace_packages_but_not_orphans(): + module_finder = ModuleFinder() + + file_system = FakeFileSystem( + contents=""" + /path/to/mypackage/ + foo/ + blue/ + __init__.py + one.py + two/ + __init__.py + alpha.py + noinitpackage/ + three.py + orphan/ + __init__.py + four.py + green/ + five/ + __init__.py + beta.py + """ + ) + + result = module_finder.find_package( + package_name="mypackage", + package_directory="/path/to/mypackage", + file_system=file_system, + ) + + assert result == FoundPackage( + name="mypackage", + directory="/path/to/mypackage", + module_files=frozenset( + { + ModuleFile(module=Module("mypackage.foo.blue"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("mypackage.foo.blue.one"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("mypackage.foo.blue.two"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("mypackage.foo.blue.two.alpha"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("mypackage.foo.green.five"), mtime=DEFAULT_MTIME), + ModuleFile(module=Module("mypackage.foo.green.five.beta"), mtime=DEFAULT_MTIME), + } + ), + ) + + @pytest.mark.parametrize( "extension, should_warn", ( diff --git a/tests/unit/adaptors/test_packagefinder.py b/tests/unit/adaptors/test_packagefinder.py index 51bb2eed..f7f2de62 100644 --- a/tests/unit/adaptors/test_packagefinder.py +++ b/tests/unit/adaptors/test_packagefinder.py @@ -1,4 +1,3 @@ -import re from pathlib import Path import pytest # type: ignore @@ -14,35 +13,42 @@ @pytest.mark.parametrize( "package, expected", ( - ("testpackage", assets / "testpackage"), + ("testpackage", {str(assets / "testpackage")}), + ( + "missingrootinitpackage", + { + str(assets / "missingrootinitpackage"), + }, + ), + ( + "missingrootinitpackage.one", + { + str(assets / "missingrootinitpackage" / "one"), + }, + ), ( "mynamespace.green", - assets / "namespacepackages" / "locationone" / "mynamespace" / "green", + {str(assets / "namespacepackages" / "locationone" / "mynamespace" / "green")}, ), ( "mynamespace.blue", - assets / "namespacepackages" / "locationtwo" / "mynamespace" / "blue", + {str(assets / "namespacepackages" / "locationtwo" / "mynamespace" / "blue")}, + ), + ( + "mynamespace", + { + str(assets / "namespacepackages" / "locationone" / "mynamespace"), + str(assets / "namespacepackages" / "locationtwo" / "mynamespace"), + }, ), ), ) -def test_determine_package_directory(package, expected): - assert ImportLibPackageFinder().determine_package_directory(package, FileSystem()) == str( - expected +def test_determine_package_directories(package, expected): + assert ( + ImportLibPackageFinder().determine_package_directories(package, FileSystem()) == expected ) -def test_determine_package_directory_doesnt_support_namespace_packages(): - with pytest.raises( - exceptions.NamespacePackageEncountered, - match=re.escape( - "Package 'mynamespace' is a namespace package (see PEP 420). Try specifying the" - " portion name instead. If you are not intentionally using " - "namespace packages, adding an __init__.py file should fix the problem." - ), - ): - ImportLibPackageFinder().determine_package_directory("mynamespace", FakeFileSystem()) - - @pytest.mark.parametrize( "package", ( @@ -53,8 +59,8 @@ def test_determine_package_directory_doesnt_support_namespace_packages(): "mynamespace.yellow", ), ) -def test_determine_package_directory_doesnt_support_non_top_level_modules(package): +def test_determine_package_directories_doesnt_support_non_top_level_modules(package): with pytest.raises( exceptions.NotATopLevelModule, ): - ImportLibPackageFinder().determine_package_directory(package, FakeFileSystem()) + ImportLibPackageFinder().determine_package_directories(package, FakeFileSystem()) diff --git a/tests/unit/application/test_usecases.py b/tests/unit/application/test_usecases.py index 5f7adbe8..88b61588 100644 --- a/tests/unit/application/test_usecases.py +++ b/tests/unit/application/test_usecases.py @@ -36,7 +36,7 @@ def test_happy_path(self, include_external_packages): ) class FakePackageFinder(BaseFakePackageFinder): - directory_map = {"mypackage": "/path/to/mypackage"} + directory_map = {"mypackage": {"/path/to/mypackage"}} with override_settings(FILE_SYSTEM=file_system, PACKAGE_FINDER=FakePackageFinder()): graph = usecases.build_graph( @@ -66,6 +66,51 @@ class FakePackageFinder(BaseFakePackageFinder): with pytest.raises(ValueError, match="Cannot find children of a squashed module."): graph.find_children(module) + def test_namespace_package_passed_as_root(self): + file_system = FakeFileSystem( + contents=""" + /path/to/mypackage/ + foo/ + __init__.py + one.py + two/ + __init__.py + green.py + blue.py + bar/ + __init__.py + three.py + /different-path/to/mypackage/ + foobar/ + __init__.py + four.py + """, + ) + + class FakePackageFinder(BaseFakePackageFinder): + directory_map = { + "mypackage": { + "/path/to/mypackage", + "/different-path/to/mypackage", + } + } + + with override_settings(FILE_SYSTEM=file_system, PACKAGE_FINDER=FakePackageFinder()): + graph = usecases.build_graph("mypackage") + + assert graph.modules == { + "mypackage", + "mypackage.foo", + "mypackage.foo.one", + "mypackage.foo.two", + "mypackage.foo.two.green", + "mypackage.foo.two.blue", + "mypackage.bar", + "mypackage.bar.three", + "mypackage.foobar", + "mypackage.foobar.four", + } + def test_boolean_additional_package_raises_type_error(self): """ Tests that a useful error message if build_graph is called @@ -85,7 +130,7 @@ def test_build_graph_respects_cache_dir(self, supplied_cache_dir): file_system = FakeFileSystem() class FakePackageFinder(BaseFakePackageFinder): - directory_map = {"mypackage": "/path/to/mypackage"} + directory_map = {"mypackage": {"/path/to/mypackage"}} SOME_DEFAULT_CACHE_DIR = ".some_default" @@ -147,14 +192,14 @@ def test_forgives_wrong_type_being_passed_to_include_external_packages(self): ) class FakePackageFinder(BaseFakePackageFinder): - directory_map = {"mypackage": "/path/to/mypackage"} + directory_map = {"mypackage": {"/path/to/mypackage"}} with override_settings(FILE_SYSTEM=file_system, PACKAGE_FINDER=FakePackageFinder()): graph = usecases.build_graph( "mypackage", # Note: this should be a bool, but we want to tolerate it, # as Import Linter currently has a bug where it will pass it as None. - include_external_packages=None, + include_external_packages=False, ) expected_modules = {