From 76e227242e311d068eb24e65c86253d08c7ffcf0 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 14:25:39 +0200 Subject: [PATCH 1/7] Use translate to improve performance of canonicalize_name Co-Authored-By: Henry Schreiner --- Lib/importlib/metadata/__init__.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index b010bb8525e5cc..0735373443201e 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -890,6 +890,13 @@ def search(self, prepared: Prepared): return itertools.chain(infos, eggs) +# Translation table for Prepared.normalize: lowercase and replace - . with _ +_normalize_table = str.maketrans( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", + "abcdefghijklmnopqrstuvwxyz__", +) + + class Prepared: """ A prepared search query for metadata on a possibly-named package. @@ -925,7 +932,13 @@ def normalize(name): """ PEP 503 normalization plus dashes as underscores. """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503 + # About 3x faster, safe since packages only support alphanumeric characters + value = name.translate(_normalize_table) + # Condense repeats (faster than regex) + while "__" in value: + value = value.replace("__", "_") + return value @staticmethod def legacy_normalize(name): From 00c76fc7451ccb294bd890244aa17116239175e3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 15:42:11 +0200 Subject: [PATCH 2/7] Add blurb --- .../next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst new file mode 100644 index 00000000000000..efca629a4fd67c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst @@ -0,0 +1,2 @@ +:mod:`importlib.metadata`: Use ``translate`` to improve performance of +``canonicalize_name``. Patch by Hugo van Kemenade and Henry Schreiner. From c1bb3ccfc0dfe93b644356e1e54ab2e8bc90e661 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 16:23:56 +0200 Subject: [PATCH 3/7] Improve blurb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- .../Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst index efca629a4fd67c..1d22709572641b 100644 --- a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst +++ b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst @@ -1,2 +1,3 @@ -:mod:`importlib.metadata`: Use ``translate`` to improve performance of -``canonicalize_name``. Patch by Hugo van Kemenade and Henry Schreiner. +:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of +:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and +Henry Schreiner. From 0eae55202f7a2413637e39df83c0953a06ddfae3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 17:16:02 +0200 Subject: [PATCH 4/7] Improve comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/importlib/metadata/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 0735373443201e..d273a620ff869a 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -890,7 +890,8 @@ def search(self, prepared: Prepared): return itertools.chain(infos, eggs) -# Translation table for Prepared.normalize: lowercase and replace - . with _ +# Translation table for Prepared.normalize: lowercase and +# replace "-" (hyphen) and "." (dot) with "_" (underscore). _normalize_table = str.maketrans( "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", "abcdefghijklmnopqrstuvwxyz__", From 1657880f0fad56be0483d6d1d6c9d9e66b21e1ee Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 17:15:12 +0200 Subject: [PATCH 5/7] Add tests for Prepared.normalize --- Lib/test/test_importlib/metadata/test_api.py | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Lib/test/test_importlib/metadata/test_api.py b/Lib/test/test_importlib/metadata/test_api.py index 9f6e12c87e859c..a7249c0d0f01da 100644 --- a/Lib/test/test_importlib/metadata/test_api.py +++ b/Lib/test/test_importlib/metadata/test_api.py @@ -6,6 +6,7 @@ from importlib.metadata import ( Distribution, PackageNotFoundError, + Prepared, distribution, entry_points, files, @@ -313,3 +314,35 @@ class InvalidateCache(unittest.TestCase): def test_invalidate_cache(self): # No externally observable behavior, but ensures test coverage... importlib.invalidate_caches() + + +class PreparedTests(unittest.TestCase): + def test_normalize(self): + tests = [ + # Simple + ("sample", "sample"), + # Mixed case + ("Sample", "sample"), + ("SAMPLE", "sample"), + ("SaMpLe", "sample"), + # Separator conversions + ("sample-pkg", "sample_pkg"), + ("sample.pkg", "sample_pkg"), + ("sample_pkg", "sample_pkg"), + # Multiple separators + ("sample---pkg", "sample_pkg"), + ("sample___pkg", "sample_pkg"), + ("sample...pkg", "sample_pkg"), + # Mixed separators + ("sample-._pkg", "sample_pkg"), + ("sample_.-pkg", "sample_pkg"), + # Complex + ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), + # Uppercase with separators + ("SAMPLE-PKG", "sample_pkg"), + ("Sample.Pkg", "sample_pkg"), + ("SAMPLE_PKG", "sample_pkg"), + ] + for name, expected in tests: + with self.subTest(name=name): + self.assertEqual(Prepared.normalize(name), expected) From ee0e6aa3b276de216a760502fd7060ec878385d9 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 17:38:16 +0200 Subject: [PATCH 6/7] Fix lint --- Lib/importlib/metadata/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index d273a620ff869a..9b723b4ec15e12 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -890,7 +890,7 @@ def search(self, prepared: Prepared): return itertools.chain(infos, eggs) -# Translation table for Prepared.normalize: lowercase and +# Translation table for Prepared.normalize: lowercase and # replace "-" (hyphen) and "." (dot) with "_" (underscore). _normalize_table = str.maketrans( "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", From 7b7f9a8773cc12f54ff987e6c6fa8ffcbf3a6000 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 10 Jan 2026 20:57:39 +0200 Subject: [PATCH 7/7] Add extra test case with repeated separator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bartosz Sławecki --- Lib/test/test_importlib/metadata/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_importlib/metadata/test_api.py b/Lib/test/test_importlib/metadata/test_api.py index a7249c0d0f01da..3c856a88b77bf6 100644 --- a/Lib/test/test_importlib/metadata/test_api.py +++ b/Lib/test/test_importlib/metadata/test_api.py @@ -338,6 +338,7 @@ def test_normalize(self): ("sample_.-pkg", "sample_pkg"), # Complex ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), + ("Sample__Pkg.name__foo", "sample_pkg_name_foo"), # Uppercase with separators ("SAMPLE-PKG", "sample_pkg"), ("Sample.Pkg", "sample_pkg"),