diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..e3c19687 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,14 @@ +FROM --platform=linux/amd64 mcr.microsoft.com/devcontainers/python:1-3.12-bullseye + +RUN --mount=type=cache,target=/home/vscode/.cache/pip \ + set -eux; \ + apt-get update; \ + apt-get install -y moreutils; \ + pip wheel --no-deps torch; \ + pip install patchelf pre-commit nox ipdb torch-*.whl; \ + mkdir -p /usr/local/lib/nv; \ + ln -s /usr/local/lib/python3.12/site-packages/nvidia/*/lib/*.so* /usr/local/lib/nv/; \ + echo "/usr/local/lib/nv" > /etc/ld.so.conf.d/nv.conf; \ + ldconfig -p + +COPY demo.sh / diff --git a/.devcontainer/demo.sh b/.devcontainer/demo.sh new file mode 100755 index 00000000..e488b9d2 --- /dev/null +++ b/.devcontainer/demo.sh @@ -0,0 +1,12 @@ +#! /bin/bash + +rm -rf /tmp/wheelhouse + +auditwheel -v repair \ + --exclude libcuda.so.1 \ + --exclude libcusolver.so.11 \ + --exclude libcusparseLt.so.0 \ + --plat=manylinux_2_35_x86_64 \ + -w /tmp/wheelhouse \ + /torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl \ + 2>&1 | ts '[%Y-%m-%d %H:%M:%S]' diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..9fbd4dae --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,9 @@ +{ + "build": { "dockerfile": "Dockerfile" }, + "mounts": [ + "type=bind,source=${localEnv:HOME}${localEnv:USERPROFILE}/.ssh/id_rsa,target=/home/vscode/.ssh/id_rsa,readonly", + "type=bind,source=${localEnv:HOME}${localEnv:USERPROFILE}/.ssh/known_hosts,target=/home/vscode/.ssh/known_hosts", + "type=tmpfs,target=/tmp" + ], + "postStartCommand": "pip install -e /workspaces/auditwheel" +} diff --git a/src/auditwheel/main.py b/src/auditwheel/main.py index 758b8970..bf180c48 100644 --- a/src/auditwheel/main.py +++ b/src/auditwheel/main.py @@ -9,7 +9,7 @@ import auditwheel -from . import main_lddtree, main_repair, main_show +from . import main_lddtree, main_repair, main_show, tools def main() -> int | None: @@ -46,6 +46,7 @@ def main() -> int | None: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) + tools._COMPRESS_LEVEL = args.zip if not hasattr(args, "func"): p.print_help() diff --git a/src/auditwheel/main_repair.py b/src/auditwheel/main_repair.py index 65e356ee..b77b992f 100644 --- a/src/auditwheel/main_repair.py +++ b/src/auditwheel/main_repair.py @@ -2,6 +2,7 @@ import argparse import logging +import zlib from pathlib import Path from auditwheel.patcher import Patchelf @@ -40,6 +41,18 @@ def configure_parser(sub_parsers) -> None: # type: ignore[no-untyped-def] formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument("WHEEL_FILE", type=Path, help="Path to wheel file.", nargs="+") + parser.add_argument( + "-z", + "--zip-level", + action=EnvironmentDefault, + metavar="zip", + env="AUDITWHEEL_ZIP_LEVEL", + dest="zip", + type=int, + help="Compress level to be used to create zip file.", + choices=list(range(zlib.Z_NO_COMPRESSION, zlib.Z_BEST_COMPRESSION + 1)), + default=zlib.Z_DEFAULT_COMPRESSION, + ) parser.add_argument( "--plat", action=EnvironmentDefault, @@ -112,6 +125,8 @@ def configure_parser(sub_parsers) -> None: # type: ignore[no-untyped-def] def execute(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int: + print(args) + exit() from .repair import repair_wheel from .wheel_abi import NonPlatformWheel, analyze_wheel_abi diff --git a/src/auditwheel/policy/__init__.py b/src/auditwheel/policy/__init__.py index 4fc1033b..0cb7a4d2 100644 --- a/src/auditwheel/policy/__init__.py +++ b/src/auditwheel/policy/__init__.py @@ -145,19 +145,19 @@ def versioned_symbols_policy( def policy_is_satisfied( policy_name: str, policy_sym_vers: dict[str, set[str]] ) -> bool: - policy_satisfied = True for name in set(required_vers) & set(policy_sym_vers): if not required_vers[name].issubset(policy_sym_vers[name]): - for symbol in required_vers[name] - policy_sym_vers[name]: - logger.debug( - "Package requires %s, incompatible with " - "policy %s which requires %s", - symbol, - policy_name, - policy_sym_vers[name], - ) - policy_satisfied = False - return policy_satisfied + symbols = required_vers[name] - policy_sym_vers[name] + logger.debug( + "%s requires any of %s, incompatible with " + "policy %s which requires %s", + name, + symbols, + policy_name, + policy_sym_vers[name], + ) + return False + return True required_vers: dict[str, set[str]] = {} for symbols in versioned_symbols.values(): diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py new file mode 100644 index 00000000..e69de29b diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index 964676cc..90150c6b 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -7,7 +7,8 @@ import re import shutil import stat -from collections.abc import Iterable +import typing as t +from concurrent.futures import Future, ThreadPoolExecutor, as_completed from fnmatch import fnmatch from os.path import isabs from pathlib import Path @@ -65,6 +66,10 @@ def repair_wheel( dest_dir = Path(match.group("name") + lib_sdir) + pool = ThreadPoolExecutor() + copy_works: dict[Path, Future[t.Any]] = {} + replace_works: dict[Path, Future[t.Any]] = {} + # here, fn is a path to an ELF file (lib or executable) in # the wheel, and v['libs'] contains its required libs for fn, v in external_refs_by_fn.items(): @@ -82,25 +87,48 @@ def repair_wheel( if not dest_dir.exists(): dest_dir.mkdir() - new_soname, new_path = copylib(src_path, dest_dir, patcher) + new_soname, new_path = copylib(src_path, dest_dir, patcher, dry=True) + if new_path not in copy_works: + copy_works[new_path] = pool.submit( + copylib, src_path, dest_dir, patcher + ) soname_map[soname] = (new_soname, new_path) replacements.append((soname, new_soname)) - if replacements: - patcher.replace_needed(fn, *replacements) - if len(ext_libs) > 0: - new_fn = fn - if _path_is_script(fn): - new_fn = _replace_elf_script_with_shim(match.group("name"), fn) + # Replace rpath do not need copy to be done + def _inner_replace( + fn: Path, replacements: list[tuple[str, str]], append_rpath: bool + ) -> None: + logger.info("Start replace for %s", fn) + if replacements: + patcher.replace_needed(fn, *replacements) + + if append_rpath: + new_fn = fn + if _path_is_script(fn): + new_fn = _replace_elf_script_with_shim(match.group("name"), fn) + + new_rpath = os.path.relpath(dest_dir, new_fn.parent) + new_rpath = os.path.join("$ORIGIN", new_rpath) + append_rpath_within_wheel(new_fn, new_rpath, ctx.name, patcher) + + logger.info("Done replace for %s", fn) - new_rpath = os.path.relpath(dest_dir, new_fn.parent) - new_rpath = os.path.join("$ORIGIN", new_rpath) - append_rpath_within_wheel(new_fn, new_rpath, ctx.name, patcher) + replace_works[fn] = pool.submit( + _inner_replace, fn, replacements, len(ext_libs) > 0 + ) # we grafted in a bunch of libraries and modified their sonames, but # they may have internal dependencies (DT_NEEDED) on one another, so # we need to update those records so each now knows about the new # name of the other. + assert all( + f.exception() is None + for f in as_completed( + itertools.chain(copy_works.values(), replace_works.values()) + ) + ) + replace_works.clear() for _, path in soname_map.values(): needed = elf_read_dt_needed(path) replacements = [] @@ -108,26 +136,29 @@ def repair_wheel( if n in soname_map: replacements.append((n, soname_map[n][0])) if replacements: - patcher.replace_needed(path, *replacements) + replace_works[path] = pool.submit( + patcher.replace_needed, path, *replacements + ) + assert all(f.exception() is None for f in as_completed(replace_works.values())) if update_tags: ctx.out_wheel = add_platforms(ctx, abis, get_replace_platforms(abis[0])) if strip: - libs_to_strip = [path for (_, path) in soname_map.values()] - extensions = external_refs_by_fn.keys() - strip_symbols(itertools.chain(libs_to_strip, extensions)) - - return ctx.out_wheel + for lib in itertools.chain( + [path for (_, path) in soname_map.values()], external_refs_by_fn.keys() + ): + logger.info("Stripping symbols from %s", lib) + pool.submit(check_call, ["strip", "-s", lib]) + pool.shutdown() -def strip_symbols(libraries: Iterable[Path]) -> None: - for lib in libraries: - logger.info("Stripping symbols from %s", lib) - check_call(["strip", "-s", lib]) + return ctx.out_wheel -def copylib(src_path: Path, dest_dir: Path, patcher: ElfPatcher) -> tuple[str, Path]: +def copylib( + src_path: Path, dest_dir: Path, patcher: ElfPatcher, dry: bool = False +) -> tuple[str, Path]: """Graft a shared library from the system into the wheel and update the relevant links. @@ -151,10 +182,10 @@ def copylib(src_path: Path, dest_dir: Path, patcher: ElfPatcher) -> tuple[str, P new_soname = src_name dest_path = dest_dir / new_soname - if dest_path.exists(): + if dry or dest_path.exists(): return new_soname, dest_path - logger.debug("Grafting: %s -> %s", src_path, dest_path) + logger.debug("Start grafting: %s -> %s", src_path, dest_path) rpaths = elf_read_rpaths(src_path) shutil.copy2(src_path, dest_path) statinfo = dest_path.stat() @@ -166,6 +197,8 @@ def copylib(src_path: Path, dest_dir: Path, patcher: ElfPatcher) -> tuple[str, P if any(itertools.chain(rpaths["rpaths"], rpaths["runpaths"])): patcher.set_rpath(dest_path, "$ORIGIN") + logger.debug("Done grafting to: %s", src_path) + return new_soname, dest_path diff --git a/src/auditwheel/tools.py b/src/auditwheel/tools.py index 0592ad2d..16837d4a 100644 --- a/src/auditwheel/tools.py +++ b/src/auditwheel/tools.py @@ -1,9 +1,11 @@ from __future__ import annotations import argparse +import logging import os import subprocess import zipfile +import zlib from collections.abc import Generator, Iterable from datetime import datetime, timezone from pathlib import Path @@ -11,6 +13,15 @@ _T = TypeVar("_T") +logger = logging.getLogger(__name__) + +# Default: zlib.Z_DEFAULT_COMPRESSION (-1 aka. level 6) balances speed and size. +# Maintained for typical builds where iteration speed outweighs distribution savings. +# Override via AUDITWHEEL_ZIP_LEVEL/--zip-level for: +# - some test builds that needs no compression at all (0) +# - bandwidth-constrained or large amount of downloads (9) +_COMPRESS_LEVEL = zlib.Z_DEFAULT_COMPRESSION + def unique_by_index(sequence: Iterable[_T]) -> list[_T]: """unique elements in `sequence` in the order in which they occur @@ -90,6 +101,7 @@ def zip2dir(zip_fname: Path, out_dir: Path) -> None: out_dir : str Directory path containing files to go in the zip archive """ + start = datetime.now() with zipfile.ZipFile(zip_fname, "r") as z: for name in z.namelist(): member = z.getinfo(name) @@ -102,6 +114,9 @@ def zip2dir(zip_fname: Path, out_dir: Path) -> None: attr &= 511 # only keep permission bits attr |= 6 << 6 # at least read/write for current user os.chmod(extracted_path, attr) + logger.info( + "zip2dir from %s to %s takes %s", zip_fname, out_dir, datetime.now() - start + ) def dir2zip(in_dir: Path, zip_fname: Path, date_time: datetime | None = None) -> None: @@ -120,6 +135,7 @@ def dir2zip(in_dir: Path, zip_fname: Path, date_time: datetime | None = None) -> date_time : Optional[datetime] Time stamp to set on each file in the archive """ + start = datetime.now() in_dir = in_dir.resolve(strict=True) if date_time is None: st = in_dir.stat() @@ -140,7 +156,10 @@ def dir2zip(in_dir: Path, zip_fname: Path, date_time: datetime | None = None) -> zinfo.date_time = date_time_args zinfo.compress_type = compression with open(fname, "rb") as fp: - z.writestr(zinfo, fp.read()) + z.writestr(zinfo, fp.read(), compresslevel=_COMPRESS_LEVEL) + logger.info( + "dir2zip from %s to %s takes %s", in_dir, zip_fname, datetime.now() - start + ) def tarbz2todir(tarbz2_fname: Path, out_dir: Path) -> None: @@ -157,15 +176,16 @@ def __init__( required: bool = True, default: str | None = None, choices: Iterable[str] | None = None, + type: type | None = None, **kwargs: Any, ) -> None: self.env_default = os.environ.get(env) self.env = env if self.env_default: - default = self.env_default + default = self.env_default if type is None else type(self.env_default) if default: required = False - if self.env_default and choices is not None and self.env_default not in choices: + if default and choices is not None and default not in choices: self.option_strings = kwargs["option_strings"] args = { "value": self.env_default, diff --git a/src/auditwheel/wheeltools.py b/src/auditwheel/wheeltools.py index 828fbeee..86d35ad4 100644 --- a/src/auditwheel/wheeltools.py +++ b/src/auditwheel/wheeltools.py @@ -15,7 +15,9 @@ from itertools import product from os.path import splitext from pathlib import Path +from tempfile import TemporaryDirectory from types import TracebackType +from typing import Any, ClassVar from packaging.utils import parse_wheel_filename @@ -57,6 +59,10 @@ def rewrite_record(bdist_dir: Path) -> None: ---------- bdist_dir : Path Path of unpacked wheel file + + Returns + ------- + if wheel is unchanged """ info_dir = _dist_info_dir(bdist_dir) record_path = info_dir / "RECORD" @@ -77,12 +83,14 @@ def skip(path: Path) -> bool: with open(record_path, "w+", newline="", encoding="utf-8") as record_file: writer = csv.writer(record_file) + skip_all = True for path in files(): relative_path = path.relative_to(bdist_dir) if skip(relative_path): hash_ = "" size = "" else: + skip_all = False data = path.read_bytes() digest = hashlib.sha256(data).digest() sha256 = urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") @@ -98,8 +106,13 @@ class InWheel(InTemporaryDirectory): On entering, you'll find yourself in the root tree of the wheel. If you've asked for an output wheel, then on exit we'll rewrite the wheel record and pack stuff up for you. + + If `out_wheel` is None, we assume the wheel won't be modified and we can + cache the unpacked wheel for future use. """ + _whl_cache: ClassVar[dict[Path, TemporaryDirectory[Any]]] = {} + def __init__(self, in_wheel: Path, out_wheel: Path | None = None) -> None: """Initialize in-wheel context manager @@ -113,9 +126,35 @@ def __init__(self, in_wheel: Path, out_wheel: Path | None = None) -> None: """ self.in_wheel = in_wheel.absolute() self.out_wheel = None if out_wheel is None else out_wheel.absolute() - super().__init__() + self.read_only = out_wheel is None + self.use_cache = self.in_wheel in self._whl_cache + if self.use_cache and not Path(self._whl_cache[self.in_wheel].name).exists(): + self.use_cache = False + logger.debug( + "Wheel ctx %s for %s is no longer valid", + self._whl_cache.pop(self.in_wheel), + self.in_wheel, + ) + + if self.use_cache: + logger.debug( + "Reuse %s for %s", self._whl_cache[self.in_wheel], self.in_wheel + ) + self._tmpdir = self._whl_cache[self.in_wheel] + if not self.read_only: + self._whl_cache.pop(self.in_wheel) + else: + super().__init__() + if self.read_only: + self._whl_cache[self.in_wheel] = self._tmpdir def __enter__(self) -> Path: + if self.use_cache or self.read_only: + if not self.use_cache: + zip2dir(self.in_wheel, self.name) + self._pwd = Path.cwd() + os.chdir(self.name) + return Path(self.name) zip2dir(self.in_wheel, self.name) return super().__enter__() @@ -132,6 +171,16 @@ def __exit__( if timestamp: date_time = datetime.fromtimestamp(int(timestamp), tz=timezone.utc) dir2zip(self.name, self.out_wheel, date_time) + if self.use_cache or self.read_only: + logger.debug( + "Exiting reused %s for %s", + self._whl_cache[self.in_wheel], + self.in_wheel, + ) + os.chdir(self._pwd) + if not self.read_only: + super().__exit__(exc, value, tb) + return None return super().__exit__(exc, value, tb) diff --git a/tests/integration/test_manylinux.py b/tests/integration/test_manylinux.py index da08e26f..f04d87b9 100644 --- a/tests/integration/test_manylinux.py +++ b/tests/integration/test_manylinux.py @@ -389,7 +389,7 @@ def assert_show_output( assert expected_match, f"No match for tag {expected_tag}" expected_glibc = (int(expected_match["major"]), int(expected_match["minor"])) actual_match = TAG_RE.match(match["tag"]) - assert actual_match, f"No match for tag {match['tag']}" + assert actual_match, f"No match for tag {match['tag']}, output={output}" actual_glibc = (int(actual_match["major"]), int(actual_match["minor"])) assert expected_match["arch"] == actual_match["arch"] assert actual_glibc <= expected_glibc