diff --git a/CHANGES/1371.feature b/CHANGES/1371.feature new file mode 100644 index 000000000..87129b070 --- /dev/null +++ b/CHANGES/1371.feature @@ -0,0 +1 @@ +Implement `layout` option on AptPublication that allows user to choose different cache-friendly style of deb URL. diff --git a/pulp_deb/app/constants.py b/pulp_deb/app/constants.py index 7d9608156..511d0f23a 100644 --- a/pulp_deb/app/constants.py +++ b/pulp_deb/app/constants.py @@ -1,3 +1,5 @@ +from types import SimpleNamespace + NO_MD5_WARNING_MESSAGE = ( "Your pulp instance is configured to prohibit use of the MD5 checksum algorithm!\n" 'Processing MD5 IN ADDITION to a secure hash like SHA-256 is "highly recommended".\n' @@ -17,3 +19,15 @@ # Represents null values since nulls can't be used in unique indexes in postgres < 15 NULL_VALUE = "__!!!NULL VALUE!!!__" + +LAYOUT_TYPES = SimpleNamespace( + NESTED_ALPHABETICALLY="nested_alphabetically", # default + NESTED_BY_DIGEST="nested_by_digest", + NESTED_BY_BOTH="nested_by_both", +) + +LAYOUT_CHOICES = ( + (LAYOUT_TYPES.NESTED_ALPHABETICALLY, LAYOUT_TYPES.NESTED_ALPHABETICALLY), + (LAYOUT_TYPES.NESTED_BY_DIGEST, LAYOUT_TYPES.NESTED_BY_DIGEST), + (LAYOUT_TYPES.NESTED_BY_BOTH, LAYOUT_TYPES.NESTED_BY_BOTH), +) diff --git a/pulp_deb/app/migrations/0034_aptpublication_layout.py b/pulp_deb/app/migrations/0034_aptpublication_layout.py new file mode 100644 index 000000000..1501d3560 --- /dev/null +++ b/pulp_deb/app/migrations/0034_aptpublication_layout.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.27 on 2025-12-22 19:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deb', '0033_aptalternatecontentsource'), + ] + + operations = [ + migrations.AddField( + model_name='aptpublication', + name='layout', + field=models.TextField(choices=[('nested_alphabetically', 'nested_alphabetically'), ('nested_by_digest', 'nested_by_digest'), ('nested_by_both', 'nested_by_both')], default='nested_alphabetically'), + ), + ] diff --git a/pulp_deb/app/models/content/content.py b/pulp_deb/app/models/content/content.py index 284088802..02b01a808 100644 --- a/pulp_deb/app/models/content/content.py +++ b/pulp_deb/app/models/content/content.py @@ -16,6 +16,8 @@ from pulpcore.plugin.models import Content from pulpcore.plugin.util import get_domain_pk +from pulp_deb.app.constants import LAYOUT_TYPES + BOOL_CHOICES = [(True, "yes"), (False, "no")] @@ -76,21 +78,30 @@ def name(self): """Print a nice name for Packages.""" return "{}_{}_{}".format(self.package, self.version, self.architecture) - def filename(self, component=""): + def filename(self, component="", layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY): """Assemble filename in pool directory.""" - sourcename = self.source or self.package - sourcename = sourcename.split("(", 1)[0].rstrip() - if sourcename.startswith("lib"): - prefix = sourcename[0:4] - else: - prefix = sourcename[0] - return os.path.join( - "pool", - component, - prefix, - sourcename, - "{}.{}".format(self.name, self.SUFFIX), - ) + if layout == LAYOUT_TYPES.NESTED_ALPHABETICALLY: + sourcename = self.source or self.package + sourcename = sourcename.split("(", 1)[0].rstrip() + if sourcename.startswith("lib"): + prefix = sourcename[0:4] + else: + prefix = sourcename[0] + return os.path.join( + "pool", + component, + prefix, + sourcename, + "{}.{}".format(self.name, self.SUFFIX), + ) + else: # NESTED_BY_DIGEST or NESTED_BY_BOTH. The primary URL in BOTH is by digest. + return os.path.join( + "pool", + component, + self.sha256[0:2], + self.sha256[2:6], + "{}.{}".format(self.name, self.SUFFIX), + ) class Meta: default_related_name = "%(app_label)s_%(model_name)s" @@ -209,20 +220,18 @@ def derived_dsc_filename(self): """Print a nice name for the Dsc file.""" return "{}_{}.{}".format(self.source, self.version, self.SUFFIX) - def derived_dir(self, component=""): + def derived_dir(self, component="", layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY): """Assemble full dir in pool directory.""" sourcename = self.source - prefix = sourcename[0] - return os.path.join( - "pool", - component, - prefix, - sourcename, - ) - - def derived_path(self, name, component=""): + if layout == LAYOUT_TYPES.NESTED_ALPHABETICALLY: + return os.path.join("pool", component, sourcename[0], sourcename) + else: # NESTED_BY_DIGEST or NESTED_BY_BOTH + sha256 = self.sha256 + return os.path.join("pool", component, sha256[0:2], sha256[2:6], sourcename) + + def derived_path(self, name, component="", layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY): """Assemble filename in pool directory.""" - return os.path.join(self.derived_dir(component), name) + return os.path.join(self.derived_dir(component, layout=layout), name) @property def checksums_sha1(self): diff --git a/pulp_deb/app/models/publication.py b/pulp_deb/app/models/publication.py index 6329163e3..4caad5989 100644 --- a/pulp_deb/app/models/publication.py +++ b/pulp_deb/app/models/publication.py @@ -14,6 +14,7 @@ RepositoryVersion, ) +from pulp_deb.app.constants import LAYOUT_CHOICES, LAYOUT_TYPES from pulp_deb.app.models.signing_service import AptReleaseSigningService @@ -71,6 +72,7 @@ class AptPublication(Publication, AutoAddObjPermsMixin): simple = models.BooleanField(default=False) structured = models.BooleanField(default=True) + layout = models.TextField(choices=LAYOUT_CHOICES, default=LAYOUT_TYPES.NESTED_ALPHABETICALLY) signing_service = models.ForeignKey( AptReleaseSigningService, on_delete=models.PROTECT, null=True ) diff --git a/pulp_deb/app/serializers/content_serializers.py b/pulp_deb/app/serializers/content_serializers.py index 08cfb68a3..e71fe3bb3 100644 --- a/pulp_deb/app/serializers/content_serializers.py +++ b/pulp_deb/app/serializers/content_serializers.py @@ -27,6 +27,7 @@ from pulp_deb.app.constants import ( PACKAGE_UPLOAD_DEFAULT_COMPONENT, PACKAGE_UPLOAD_DEFAULT_DISTRIBUTION, + LAYOUT_TYPES, ) from pulp_deb.app.constants import NULL_VALUE @@ -478,7 +479,13 @@ def from822(cls, data, **kwargs): package_fields["custom_fields"] = custom_fields return cls(data=package_fields, **kwargs) - def to822(self, component="", artifact_dict=None, remote_artifact_dict=None): + def to822( + self, + component="", + artifact_dict=None, + remote_artifact_dict=None, + layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY, + ): """Create deb822.Package object from model.""" ret = deb822.Packages() @@ -502,7 +509,7 @@ def to822(self, component="", artifact_dict=None, remote_artifact_dict=None): ret.update({"SHA1": artifact.sha1} if artifact.sha1 else {}) ret.update({"SHA256": artifact.sha256}) ret.update({"Size": str(artifact.size)}) - ret.update({"Filename": self.instance.filename(component)}) + ret.update({"Filename": self.instance.filename(component, layout=layout)}) return ret @@ -1066,7 +1073,7 @@ def from822(cls, data, **kwargs): data={k: data[v] for k, v in cls.TRANSLATION_DICT.items() if v in data}, **kwargs ) - def to822(self, component="", paragraph=False): + def to822(self, component="", paragraph=False, layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY): """ Create deb822.Dsc object from model. If the 'paragraph' argument is True then the returned object will be adjusted to be a valid paragraph in a source index file. @@ -1103,8 +1110,7 @@ def to822(self, component="", paragraph=False): and include 'Directory'. Currently we skip the optional 'Priority' and 'Section'. """ ret["Package"] = ret.pop("Source") - ret["Directory"] = self.instance.derived_dir(component) - + ret["Directory"] = self.instance.derived_dir(component, layout=layout) return ret class Meta: diff --git a/pulp_deb/app/serializers/publication_serializers.py b/pulp_deb/app/serializers/publication_serializers.py index a7d8c2cf5..d5d7f7ee1 100644 --- a/pulp_deb/app/serializers/publication_serializers.py +++ b/pulp_deb/app/serializers/publication_serializers.py @@ -8,6 +8,7 @@ DetailRelatedField, ) +from pulp_deb.app.constants import LAYOUT_CHOICES, LAYOUT_TYPES from pulp_deb.app.models import ( AptDistribution, AptPublication, @@ -38,6 +39,13 @@ class AptPublicationSerializer(PublicationSerializer): structured = BooleanField(help_text="Activate structured publishing mode.", default=True) publish_upstream_release_fields = BooleanField(help_text="", required=False) checkpoint = serializers.BooleanField(required=False) + layout = serializers.ChoiceField( + help_text="How to layout the packages within the published repository.", + choices=LAYOUT_CHOICES, + required=False, + allow_null=False, + default=LAYOUT_TYPES.NESTED_ALPHABETICALLY, + ) signing_service = RelatedField( help_text="Sign Release files with this signing key", many=False, @@ -62,6 +70,7 @@ class Meta: "checkpoint", "signing_service", "publish_upstream_release_fields", + "layout", ) model = AptPublication diff --git a/pulp_deb/app/tasks/publishing.py b/pulp_deb/app/tasks/publishing.py index 222765e02..83a481208 100644 --- a/pulp_deb/app/tasks/publishing.py +++ b/pulp_deb/app/tasks/publishing.py @@ -17,6 +17,7 @@ from django.db.utils import IntegrityError from django.forms.models import model_to_dict +from pulp_deb.app.constants import NULL_VALUE from pulpcore.plugin.models import ( Artifact, PublishedArtifact, @@ -26,7 +27,6 @@ ) from pulpcore.plugin.util import get_domain -from pulp_deb.app.constants import NULL_VALUE from pulp_deb.app.models import ( AptPublication, AptRepository, @@ -49,6 +49,7 @@ from pulp_deb.app.constants import ( NO_MD5_WARNING_MESSAGE, CHECKSUM_TYPE_MAP, + LAYOUT_TYPES, ) import logging @@ -86,6 +87,7 @@ def publish( checkpoint=False, signing_service_pk=None, publish_upstream_release_fields=None, + layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY, ): """ Use provided publisher to create a Publication based on a RepositoryVersion. @@ -96,6 +98,7 @@ def publish( structured (bool): Create a structured publication with releases and components. checkpoint (bool): Whether to create a checkpoint publication. signing_service_pk (str): Use this SigningService to sign the Release files. + layout (str): The layout determines the form the package urls take. """ @@ -124,6 +127,7 @@ def publish( publication.simple = simple publication.structured = structured publication.signing_service = signing_service + publication.layout = layout repository = AptRepository.objects.get(pk=repo_version.repository.pk) if simple: @@ -341,19 +345,29 @@ def add_packages(self, packages, artifact_dict, remote_artifact_dict): content_artifacts = { package.pk: list(package.contentartifact_set.all()) for package in packages } + layout = self.parent.publication.layout for package in packages: with suppress(IntegrityError): content_artifact = content_artifacts.get(package.pk, [None])[0] - relative_path = package.filename(self.component) - published_artifact = PublishedArtifact( - relative_path=relative_path, + relative_path=package.filename(self.component, layout=layout), publication=self.parent.publication, content_artifact=content_artifact, ) published_artifacts.append(published_artifact) package_data.append((package, package.architecture)) + # In the NESTED_BY_BOTH layout, we want to _also_ publish the package under the + # alphabetical path but _not_ reference it in the repo metadata. + if layout == LAYOUT_TYPES.NESTED_BY_BOTH: + alt_published_artifact = PublishedArtifact( + relative_path=package.filename( + self.component, layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY + ), + publication=self.parent.publication, + content_artifact=content_artifact, + ) + published_artifacts.append(alt_published_artifact) with transaction.atomic(): if published_artifacts: @@ -362,9 +376,9 @@ def add_packages(self, packages, artifact_dict, remote_artifact_dict): for package, architecture in package_data: package_serializer = Package822Serializer(package, context={"request": None}) try: - package_serializer.to822(self.component, artifact_dict, remote_artifact_dict).dump( - self.package_index_files[architecture][0] - ) + package_serializer.to822( + self.component, artifact_dict, remote_artifact_dict, layout=layout + ).dump(self.package_index_files[architecture][0]) except KeyError: log.warn( f"Published package '{package.relative_path}' with architecture " @@ -385,12 +399,27 @@ def add_source_packages(self, source_packages): for content_artifact in artifact_set: published_artifact = PublishedArtifact( relative_path=source_package.derived_path( - os.path.basename(content_artifact.relative_path), self.component + os.path.basename(content_artifact.relative_path), + self.component, + layout=self.parent.publication.layout, ), publication=self.parent.publication, content_artifact=content_artifact, ) published_artifacts.append(published_artifact) + # In the NESTED_BY_BOTH layout, we want to _also_ publish the source package + # under the alphabetical path but _not_ reference it in the repo metadata. + if self.parent.publication.layout == LAYOUT_TYPES.NESTED_BY_BOTH: + alt_published_artifact = PublishedArtifact( + relative_path=source_package.derived_path( + os.path.basename(content_artifact.relative_path), + self.component, + layout=LAYOUT_TYPES.NESTED_ALPHABETICALLY, + ), + publication=self.parent.publication, + content_artifact=content_artifact, + ) + published_artifacts.append(alt_published_artifact) source_package_data.append(source_package) with transaction.atomic(): @@ -401,9 +430,9 @@ def add_source_packages(self, source_packages): dsc_file_822_serializer = DscFile822Serializer( source_package, context={"request": None} ) - dsc_file_822_serializer.to822(self.component, paragraph=True).dump( - self.source_index_file_info[0] - ) + dsc_file_822_serializer.to822( + self.component, paragraph=True, layout=self.parent.publication.layout + ).dump(self.source_index_file_info[0]) self.source_index_file_info[0].write(b"\n") def finish(self): diff --git a/pulp_deb/app/viewsets/publication.py b/pulp_deb/app/viewsets/publication.py index 3c0624132..ca4b5394c 100644 --- a/pulp_deb/app/viewsets/publication.py +++ b/pulp_deb/app/viewsets/publication.py @@ -216,6 +216,7 @@ def create(self, request): publish_upstream_release_fields = serializer.validated_data.get( "publish_upstream_release_fields" ) + layout = serializer.validated_data.get("layout") kwargs = { "repository_version_pk": repository_version.pk, @@ -223,6 +224,7 @@ def create(self, request): "structured": structured, "signing_service_pk": getattr(signing_service, "pk", None), "publish_upstream_release_fields": publish_upstream_release_fields, + "layout": layout, } if checkpoint: kwargs["checkpoint"] = True diff --git a/pulp_deb/tests/functional/api/test_publish.py b/pulp_deb/tests/functional/api/test_publish.py index 8d7e0b87f..09a940181 100644 --- a/pulp_deb/tests/functional/api/test_publish.py +++ b/pulp_deb/tests/functional/api/test_publish.py @@ -2,6 +2,7 @@ from debian import deb822 import os import pytest +import re from pulp_deb.tests.functional.constants import ( DEB_FIXTURE_ALT_SINGLE_DIST, @@ -18,6 +19,9 @@ DEB_PUBLICATION_ARGS_ONLY_SIMPLE, DEB_PUBLICATION_ARGS_ONLY_STRUCTURED, DEB_PUBLICATION_ARGS_SIMPLE_AND_STRUCTURED, + DEB_PUBLICATION_ARGS_NESTED_ALPHABETICALLY, + DEB_PUBLICATION_ARGS_NESTED_BY_DIGEST, + DEB_PUBLICATION_ARGS_NESTED_BY_BOTH, DEB_PUBLISH_COMPLEX_DEBIAN_SECURITY, DEB_PUBLISH_COMPLEX_UBUNTU_BACKPORTS, DEB_PUBLISH_EMPTY_REPOSITORY, @@ -196,6 +200,66 @@ def test_publish_any_repo_version( deb_delete_publication(second_publication) +@pytest.mark.parallel +@pytest.mark.parametrize( + "publication_args", + [ + DEB_PUBLICATION_ARGS_ONLY_STRUCTURED, + DEB_PUBLICATION_ARGS_NESTED_ALPHABETICALLY, + DEB_PUBLICATION_ARGS_NESTED_BY_DIGEST, + DEB_PUBLICATION_ARGS_NESTED_BY_BOTH, + ], +) +def test_publish_layout( + apt_distribution_api, + create_publication_and_verify_repo_version, + deb_distribution_factory, + download_content_unit, + publication_args, +): + """Test whether a the layout parameter is generating expected package URLs + and the packages are available. + + The following cases are tested: + + * `Publish a repo where layout is not specified (alphabetical by default).`_ + * `Publish a repo explicit alphabetical layout.`_ + * `Publish a repo explicit digest layout.`_ + * `Publish a repo explicit "both" layout.`_ + """ + remote_args = {"distributions": DEB_FIXTURE_SINGLE_DIST} + + # Create a repository and publication. + publication, _, _, _ = create_publication_and_verify_repo_version( + remote_args, publication_args, is_modified=True + ) + + # Create a distribution + distribution = deb_distribution_factory(publication) + distribution = apt_distribution_api.read(distribution.pulp_href) + + # Expected filename format + if "layout" not in publication_args or publication_args["layout"] == "nested_alphabetically": + expected = r"pool/asgard/[a-z]/" + else: # nested_by_digest or nested_by_both + expected = r"pool/asgard/[0-9a-f]{2}/[0-9a-f]{4}/" + + # Verify than an expected Package index exists, and that the expected URL is + # generated and that the package is actually available. + package_index = download_content_unit( + distribution.to_dict()["base_path"], "dists/ragnarok/asguard/binary-ppc64/Packages" + ) + for line in str(package_index): + if not line.startswith("Filename:"): + continue + + _, _, filename = line.partition(": ") + assert re.match(expected, filename) + + package = download_content_unit(distribution.to_dict()["base_path"], filename) + assert "404" not in str(package) + + @pytest.mark.parallel @pytest.mark.parametrize( "set_on, expect_signed", diff --git a/pulp_deb/tests/functional/constants.py b/pulp_deb/tests/functional/constants.py index f88683c09..15bc2b092 100644 --- a/pulp_deb/tests/functional/constants.py +++ b/pulp_deb/tests/functional/constants.py @@ -47,6 +47,21 @@ def _clean_dict(d): DEB_PUBLICATION_ARGS_ONLY_STRUCTURED = {"simple": False, "structured": True} DEB_PUBLICATION_ARGS_SIMPLE_AND_STRUCTURED = {"simple": True, "structured": True} DEB_PUBLICATION_ARGS_ALL = {"simple": True, "structured": True, "signing_service": ""} +DEB_PUBLICATION_ARGS_NESTED_ALPHABETICALLY = { + "simple": False, + "structured": True, + "layout": "nested_alphabetically", +} +DEB_PUBLICATION_ARGS_NESTED_BY_DIGEST = { + "simple": False, + "structured": True, + "layout": "nested_by_digest", +} +DEB_PUBLICATION_ARGS_NESTED_BY_BOTH = { + "simple": False, + "structured": True, + "layout": "nested_by_both", +} DEB_P2P_REMOTE_ARGS_SIMPLE = {"distributions": "default", "policy": "immediate"} DEB_P2P_REMOTE_ARGS_STRUCTURED = {"distributions": "ragnarok nosuite", "policy": "immediate"} diff --git a/pulp_deb/tests/unit/test_models.py b/pulp_deb/tests/unit/test_models.py index 3e2cc6096..6389eda82 100644 --- a/pulp_deb/tests/unit/test_models.py +++ b/pulp_deb/tests/unit/test_models.py @@ -1,8 +1,9 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.test import TestCase - from pulpcore.plugin.models import Artifact, ContentArtifact -from pulp_deb.app.models import Package, AptRepository, Release + +from pulp_deb.app.constants import LAYOUT_TYPES +from pulp_deb.app.models import AptRepository, Package, Release from pulp_deb.app.models.repository import handle_duplicate_releases from pulp_deb.app.serializers import Package822Serializer @@ -19,7 +20,7 @@ class TestPackage(TestCase): "Description: A sea jötunn associated with the ocean.\n" "MD5sum: aabb\n" "SHA1: ccdd\n" - "SHA256: eeff\n" + "SHA256: eeff11\n" "Size: 42\n" "Filename: pool/a/aegir/aegir_0.1-edda0_sea.deb\n" ) @@ -33,13 +34,14 @@ def setUp(self): essential=True, maintainer="Utgardloki", description="A sea jötunn associated with the ocean.", + sha256="eeff11", ) self.package1.save() self.artifact1 = Artifact( size=42, md5="aabb", sha1="ccdd", - sha256="eeff", + sha256="eeff11", sha512="kkll", file=SimpleUploadedFile("test_filename", b"test content"), ) @@ -55,12 +57,27 @@ def test_filename(self): """Test that the pool filename of a package is correct.""" self.assertEqual(self.package1.filename(), "pool/a/aegir/aegir_0.1-edda0_sea.deb") + def test_filename_by_digest(self): + """Test that the pool filename of a package is correct.""" + for layout in [LAYOUT_TYPES.NESTED_BY_DIGEST, LAYOUT_TYPES.NESTED_BY_BOTH]: + self.assertEqual( + self.package1.filename(layout=layout), "pool/ee/ff11/aegir_0.1-edda0_sea.deb" + ) + def test_filename_with_component(self): """Test that the pool filename of a package with component is correct.""" self.assertEqual( self.package1.filename("joetunn"), "pool/joetunn/a/aegir/aegir_0.1-edda0_sea.deb" ) + def test_filename_with_component_and_by_digest(self): + """Test that the pool filename of a package with component is correct.""" + for layout in [LAYOUT_TYPES.NESTED_BY_DIGEST, LAYOUT_TYPES.NESTED_BY_BOTH]: + self.assertEqual( + self.package1.filename("joetunn", layout=layout), + "pool/joetunn/ee/ff11/aegir_0.1-edda0_sea.deb", + ) + def test_to822(self): """Test if package transforms correctly into 822dict.""" artifact_dict = {self.package1.sha256: self.artifact1}