From 613aa3d16298dd0a8589df0101c16fca7960ae18 Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Sat, 22 Mar 2025 14:27:21 -0400 Subject: [PATCH 1/6] BREAKING CHANGE: refactor PackageURL by moving String functions to StringUtil --- .../MalformedPackageURLException.java | 4 +- .../com/github/packageurl/PackageURL.java | 268 +++--------------- .../github/packageurl/PackageURLBuilder.java | 10 +- .../packageurl/ValidationException.java | 4 +- .../github/packageurl/utils/StringUtil.java | 266 +++++++++++++++++ .../com/github/packageurl/PackageURLTest.java | 18 +- .../StringUtilBenchmark.java} | 12 +- .../packageurl/utils/StringUtilTest.java | 43 +++ 8 files changed, 370 insertions(+), 255 deletions(-) create mode 100644 src/main/java/com/github/packageurl/utils/StringUtil.java rename src/test/java/com/github/packageurl/{PercentEncodingBenchmark.java => utils/StringUtilBenchmark.java} (92%) create mode 100644 src/test/java/com/github/packageurl/utils/StringUtilTest.java diff --git a/src/main/java/com/github/packageurl/MalformedPackageURLException.java b/src/main/java/com/github/packageurl/MalformedPackageURLException.java index a6d21d67..a882f359 100644 --- a/src/main/java/com/github/packageurl/MalformedPackageURLException.java +++ b/src/main/java/com/github/packageurl/MalformedPackageURLException.java @@ -53,7 +53,7 @@ public MalformedPackageURLException(@Nullable String msg) { * * @param message the detail message * @param cause the cause - * @since 1.6.0 + * @since 2.0.0 */ public MalformedPackageURLException(String message, Throwable cause) { super(message, cause); @@ -64,7 +64,7 @@ public MalformedPackageURLException(String message, Throwable cause) { * message of {@code (cause==null ? null : cause.toString())}. * * @param cause the cause - * @since 1.6.0 + * @since 2.0.0 */ public MalformedPackageURLException(Throwable cause) { super(cause); diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index decfad01..0603a859 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -23,11 +23,10 @@ import static java.util.Objects.requireNonNull; +import com.github.packageurl.utils.StringUtil; import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.Map; @@ -36,7 +35,6 @@ import java.util.TreeMap; import java.util.function.IntPredicate; import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.jspecify.annotations.Nullable; /** @@ -58,8 +56,6 @@ public final class PackageURL implements Serializable { private static final long serialVersionUID = 3243226021636427586L; - private static final char PERCENT_CHAR = '%'; - /** * The PackageURL scheme constant */ @@ -171,14 +167,14 @@ public PackageURL(final String purl) throws MalformedPackageURLException { if (index <= start) { throw new MalformedPackageURLException("Invalid purl: does not contain both a type and name"); } - this.type = toLowerCase(validateType(remainder.substring(start, index))); + this.type = StringUtil.toLowerCase(validateType(remainder.substring(start, index))); start = index + 1; // version is optional - check for existence index = remainder.lastIndexOf('@'); if (index >= start) { - this.version = validateVersion(this.type, percentDecode(remainder.substring(index + 1))); + this.version = validateVersion(this.type, StringUtil.percentDecode(remainder.substring(index + 1))); remainder = remainder.substring(0, index); } else { this.version = null; @@ -187,10 +183,10 @@ public PackageURL(final String purl) throws MalformedPackageURLException { // The 'remainder' should now consist of an optional namespace and the name index = remainder.lastIndexOf('/'); if (index <= start) { - this.name = validateName(this.type, percentDecode(remainder.substring(start))); + this.name = validateName(this.type, StringUtil.percentDecode(remainder.substring(start))); this.namespace = null; } else { - this.name = validateName(this.type, percentDecode(remainder.substring(index + 1))); + this.name = validateName(this.type, StringUtil.percentDecode(remainder.substring(index + 1))); remainder = remainder.substring(0, index); this.namespace = validateNamespace(this.type, parsePath(remainder.substring(start), false)); } @@ -248,7 +244,7 @@ public PackageURL( * @param subpath the subpath string * @throws MalformedPackageURLException if parsing fails * @throws NullPointerException if {@code type} or {@code name} are {@code null} - * @since 1.6.0 + * @since 2.0.0 */ public PackageURL( final String type, @@ -258,7 +254,7 @@ public PackageURL( final @Nullable Map qualifiers, final @Nullable String subpath) throws MalformedPackageURLException { - this.type = toLowerCase(validateType(requireNonNull(type, "type"))); + this.type = StringUtil.toLowerCase(validateType(requireNonNull(type, "type"))); this.namespace = validateNamespace(this.type, namespace); this.name = validateName(this.type, requireNonNull(name, "name")); this.version = validateVersion(this.type, version); @@ -355,24 +351,16 @@ private static String validateType(final String value) throws MalformedPackageUR throw new MalformedPackageURLException("The PackageURL type cannot be empty"); } - validateChars(value, PackageURL::isValidCharForType, "type"); + validateChars(value, StringUtil::isValidCharForType, "type"); return value; } - private static boolean isValidCharForType(int c) { - return (isAlphaNumeric(c) || c == '.' || c == '+' || c == '-'); - } - - private static boolean isValidCharForKey(int c) { - return (isAlphaNumeric(c) || c == '.' || c == '_' || c == '-'); - } - private static void validateChars(String value, IntPredicate predicate, String component) throws MalformedPackageURLException { char firstChar = value.charAt(0); - if (isDigit(firstChar)) { + if (StringUtil.isDigit(firstChar)) { throw new MalformedPackageURLException( "The PackageURL " + component + " cannot start with a number: " + firstChar); } @@ -414,7 +402,7 @@ private static void validateChars(String value, IntPredicate predicate, String c case StandardTypes.LUAROCKS: case StandardTypes.QPKG: case StandardTypes.RPM: - retVal = tempNamespace != null ? toLowerCase(tempNamespace) : null; + retVal = tempNamespace != null ? StringUtil.toLowerCase(tempNamespace) : null; break; case StandardTypes.MLFLOW: case StandardTypes.OCI: @@ -447,13 +435,13 @@ private static String validateName(final String type, final String value) throws case StandardTypes.HEX: case StandardTypes.LUAROCKS: case StandardTypes.OCI: - temp = toLowerCase(value); + temp = StringUtil.toLowerCase(value); break; case StandardTypes.PUB: - temp = toLowerCase(value).replaceAll("[^a-z0-9_]", "_"); + temp = StringUtil.toLowerCase(value).replaceAll("[^a-z0-9_]", "_"); break; case StandardTypes.PYPI: - temp = toLowerCase(value).replace('_', '-'); + temp = StringUtil.toLowerCase(value).replace('_', '-'); break; default: temp = value; @@ -471,7 +459,7 @@ private static String validateName(final String type, final String value) throws case StandardTypes.HUGGINGFACE: case StandardTypes.LUAROCKS: case StandardTypes.OCI: - return toLowerCase(value); + return StringUtil.toLowerCase(value); default: return value; } @@ -496,7 +484,7 @@ private static void validateKey(final @Nullable String value) throws MalformedPa throw new MalformedPackageURLException("Qualifier key is invalid: " + value); } - validateChars(value, PackageURL::isValidCharForKey, "qualifier key"); + validateChars(value, StringUtil::isValidCharForKey, "qualifier key"); } private static void validateValue(final String key, final @Nullable String value) @@ -571,9 +559,9 @@ private String canonicalize(boolean coordinatesOnly) { purl.append(encodePath(namespace)); purl.append('/'); } - purl.append(percentEncode(name)); + purl.append(StringUtil.percentEncode(name)); if (version != null) { - purl.append('@').append(percentEncode(version)); + purl.append('@').append(StringUtil.percentEncode(version)); } if (!coordinatesOnly) { @@ -587,7 +575,7 @@ private String canonicalize(boolean coordinatesOnly) { } purl.append(entry.getKey()); purl.append('='); - purl.append(percentEncode(entry.getValue())); + purl.append(StringUtil.percentEncode(entry.getValue())); separator = true; } } @@ -598,178 +586,6 @@ private String canonicalize(boolean coordinatesOnly) { return purl.toString(); } - private static boolean isUnreserved(int c) { - return (isValidCharForKey(c) || c == '~'); - } - - private static boolean shouldEncode(int c) { - return !isUnreserved(c); - } - - private static boolean isAlpha(int c) { - return (isLowerCase(c) || isUpperCase(c)); - } - - private static boolean isDigit(int c) { - return (c >= '0' && c <= '9'); - } - - private static boolean isAlphaNumeric(int c) { - return (isDigit(c) || isAlpha(c)); - } - - private static boolean isUpperCase(int c) { - return (c >= 'A' && c <= 'Z'); - } - - private static int indexOfFirstUpperCaseChar(String s) { - int length = s.length(); - - for (int i = 0; i < length; i++) { - if (isUpperCase(s.charAt(i))) { - return i; - } - } - - return -1; - } - - private static boolean isLowerCase(int c) { - return (c >= 'a' && c <= 'z'); - } - - private static int toLowerCase(int c) { - return isUpperCase(c) ? (c ^ 0x20) : c; - } - - private static String toLowerCase(String s) { - int pos = indexOfFirstUpperCaseChar(s); - - if (pos == -1) { - return s; - } - - char[] chars = s.toCharArray(); - int length = chars.length; - - for (int i = pos; i < length; i++) { - chars[i] = (char) toLowerCase(chars[i]); - } - - return new String(chars); - } - - private static int indexOfFirstPercentChar(final byte[] bytes) { - return IntStream.range(0, bytes.length) - .filter(i -> isPercent(bytes[i])) - .findFirst() - .orElse(-1); - } - - private static byte percentDecode(final byte[] bytes, final int start) { - if (start + 2 >= bytes.length) { - throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '" - + new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'"); - } - - int pos1 = start + 1; - byte b1 = bytes[pos1]; - int c1 = Character.digit(b1, 16); - - if (c1 == -1) { - throw new ValidationException( - "Invalid percent encoding char 1 at offset " + pos1 + " with value '" + ((char) b1) + "'"); - } - - int pos2 = pos1 + 1; - byte b2 = bytes[pos2]; - int c2 = Character.digit(bytes[pos2], 16); - - if (c2 == -1) { - throw new ValidationException( - "Invalid percent encoding char 2 at offset " + pos2 + " with value '" + ((char) b2) + "'"); - } - - return ((byte) ((c1 << 4) + c2)); - } - - // package-private for testing - static String percentDecode(final String source) { - if (source.isEmpty()) { - return source; - } - - byte[] bytes = source.getBytes(StandardCharsets.UTF_8); - int i = indexOfFirstPercentChar(bytes); - - if (i == -1) { - return source; - } - - ByteBuffer buffer = ByteBuffer.wrap(bytes); - buffer.position(i); - int length = buffer.capacity(); - - while (i < length) { - byte b = bytes[i]; - - if (isPercent(b)) { - buffer.put(percentDecode(bytes, i)); - i += 2; - } else { - buffer.put(b); - } - - i++; - } - - return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8); - } - - /** - * URI decodes the given string. - * - * @param source the encoded string - * @return the decoded string - * @since 1.4.2 - * @deprecated this method was made public in error in version 1.4.2 and will be removed without a replacement - */ - @Deprecated - public @Nullable String uriDecode(final @Nullable String source) { - return source != null ? percentDecode(source) : null; - } - - private static boolean isPercent(int c) { - return (c == PERCENT_CHAR); - } - - // package-private for testing - static String percentEncode(final String source) { - if (source.isEmpty()) { - return source; - } - - byte[] bytes = source.getBytes(StandardCharsets.UTF_8); - int length = bytes.length; - ByteBuffer buffer = ByteBuffer.allocate(length * 3); - boolean changed = false; - - for (byte b : bytes) { - if (shouldEncode(b)) { - changed = true; - byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); - byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); - buffer.put((byte) PERCENT_CHAR); - buffer.put(b1); - buffer.put(b2); - } else { - buffer.put(b); - } - } - - return changed ? new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8) : source; - } - /** * Some purl types may have specific constraints. This method attempts to verify them. * @param type the purl type @@ -797,7 +613,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac .filter(entry -> !isEmpty(entry.getValue())) .collect( TreeMap::new, - (map, value) -> map.put(toLowerCase(value.getKey()), value.getValue()), + (map, value) -> map.put(StringUtil.toLowerCase(value.getKey()), value.getValue()), TreeMap::putAll); return validateQualifiers(results); } catch (ValidationException ex) { @@ -815,8 +631,8 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac (map, value) -> { final String[] entry = value.split("=", 2); if (entry.length == 2 && !entry[1].isEmpty()) { - String key = toLowerCase(entry[0]); - if (map.put(key, percentDecode(entry[1])) != null) { + String key = StringUtil.toLowerCase(entry[0]); + if (map.put(key, StringUtil.percentDecode(entry[1])) != null) { throw new ValidationException( "Duplicate package qualifier encountered. More then one value was specified for " + key); @@ -833,12 +649,12 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac private static String[] parsePath(final String path, final boolean isSubpath) { return Arrays.stream(path.split("/")) .filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment)))) - .map(PackageURL::percentDecode) + .map(StringUtil::percentDecode) .toArray(String[]::new); } private static String encodePath(final String path) { - return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/")); + return Arrays.stream(path.split("/")).map(StringUtil::percentEncode).collect(Collectors.joining("/")); } /** @@ -930,13 +746,13 @@ public static final class StandardTypes { /** * Arch Linux and other users of the libalpm/pacman package manager. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String ALPM = "alpm"; /** * APK-based packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String APK = "apk"; /** @@ -946,7 +762,7 @@ public static final class StandardTypes { /** * Bitnami-based packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String BITNAMI = "bitnami"; /** @@ -958,7 +774,7 @@ public static final class StandardTypes { /** * CocoaPods. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String COCOAPODS = "cocoapods"; /** @@ -968,31 +784,31 @@ public static final class StandardTypes { /** * Conan C/C++ packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String CONAN = "conan"; /** * Conda packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String CONDA = "conda"; /** * CPAN Perl packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String CPAN = "cpan"; /** * CRAN R packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String CRAN = "cran"; /** * Debian, Debian derivatives, and Ubuntu packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String DEB = "deb"; /** @@ -1022,19 +838,19 @@ public static final class StandardTypes { /** * Hex packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String HEX = "hex"; /** * Hugging Face ML models. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String HUGGINGFACE = "huggingface"; /** * Lua packages installed with LuaRocks. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String LUAROCKS = "luarocks"; /** @@ -1044,13 +860,13 @@ public static final class StandardTypes { /** * MLflow ML models (Azure ML, Databricks, etc.). * - * @since 1.6.0 + * @since 2.0.0 */ public static final String MLFLOW = "mlflow"; /** * Nixos packages * - * @since 1.6.0 + * @since 2.0.0 */ public static final String NIX = "nix"; /** @@ -1066,13 +882,13 @@ public static final class StandardTypes { * OCI Distribution Specification, including * container images built by Docker and others. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String OCI = "oci"; /** * Dart and Flutter packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String PUB = "pub"; /** @@ -1082,7 +898,7 @@ public static final class StandardTypes { /** * QNX packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String QPKG = "qpkg"; /** @@ -1092,13 +908,13 @@ public static final class StandardTypes { /** * ISO-IEC 19770-2 Software Identification (SWID) tags. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String SWID = "swid"; /** * Swift packages. * - * @since 1.6.0 + * @since 2.0.0 */ public static final String SWIFT = "swift"; /** diff --git a/src/main/java/com/github/packageurl/PackageURLBuilder.java b/src/main/java/com/github/packageurl/PackageURLBuilder.java index ccdb8deb..e62c42d6 100644 --- a/src/main/java/com/github/packageurl/PackageURLBuilder.java +++ b/src/main/java/com/github/packageurl/PackageURLBuilder.java @@ -70,7 +70,7 @@ private static PackageURLBuilder toBuilder(PackageURL packageURL) { * * @param packageURL the existing Package URL object * @return a new builder object - * @since 1.6.0 + * @since 2.0.0 */ public static PackageURLBuilder aPackageURL(final PackageURL packageURL) { return toBuilder(packageURL); @@ -82,7 +82,7 @@ public static PackageURLBuilder aPackageURL(final PackageURL packageURL) { * @param purl the existing Package URL string * @return a new builder object * @throws MalformedPackageURLException if an error occurs while parsing the input - * @since 1.6.0 + * @since 2.0.0 */ public static PackageURLBuilder aPackageURL(final String purl) throws MalformedPackageURLException { return toBuilder(new PackageURL(purl)); @@ -184,7 +184,7 @@ public PackageURLBuilder withQualifier(final String key, final @Nullable String * @param qualifiers the package qualifiers, or {@code null} * @return a reference to the builder * @see PackageURL#getQualifiers() - * @since 1.6.0 + * @since 2.0.0 */ public PackageURLBuilder withQualifiers(final @Nullable Map qualifiers) { if (qualifiers == null) { @@ -221,7 +221,7 @@ public PackageURLBuilder withoutQualifier(final String key) { * Removes a package qualifier. This is a no-op if the qualifier is not present. * @param keys the package qualifier keys to remove * @return a reference to the builder - * @since 1.6.0 + * @since 2.0.0 */ public PackageURLBuilder withoutQualifiers(final Set keys) { if (this.qualifiers != null) { @@ -236,7 +236,7 @@ public PackageURLBuilder withoutQualifiers(final Set keys) { /** * Removes all qualifiers, if any. * @return a reference to this builder. - * @since 1.6.0 + * @since 2.0.0 */ public PackageURLBuilder withoutQualifiers() { qualifiers = null; diff --git a/src/main/java/com/github/packageurl/ValidationException.java b/src/main/java/com/github/packageurl/ValidationException.java index 4cb3ee93..ac4997b4 100644 --- a/src/main/java/com/github/packageurl/ValidationException.java +++ b/src/main/java/com/github/packageurl/ValidationException.java @@ -27,7 +27,7 @@ * @author Jeremy Long * @since 1.1.0 */ -class ValidationException extends RuntimeException { +public class ValidationException extends RuntimeException { private static final long serialVersionUID = 2045474478691037663L; @@ -35,7 +35,7 @@ class ValidationException extends RuntimeException { * Constructs a {@code ValidationException}. * @param msg the error message */ - ValidationException(String msg) { + public ValidationException(String msg) { super(msg); } } diff --git a/src/main/java/com/github/packageurl/utils/StringUtil.java b/src/main/java/com/github/packageurl/utils/StringUtil.java new file mode 100644 index 00000000..39f2bdde --- /dev/null +++ b/src/main/java/com/github/packageurl/utils/StringUtil.java @@ -0,0 +1,266 @@ +/* + * MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.github.packageurl.utils; + +import com.github.packageurl.ValidationException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.stream.IntStream; + +/** + * String utility for validation and encoding. + * + * @since 2.0.0 + */ +public final class StringUtil { + + private static final byte PERCENT_CHAR = '%'; + + private StringUtil() { + throw new AssertionError("Cannot instantiate StringUtil"); + } + + /** + * Returns the lower case version of the string. + * + * @param s the string to convert to lower case + * @return the lower case version of the string + * + * @since 2.0.0 + */ + public static String toLowerCase(String s) { + if (s == null) { + return null; + } + + int pos = indexOfFirstUpperCaseChar(s); + + if (pos == -1) { + return s; + } + + char[] chars = s.toCharArray(); + int length = chars.length; + + for (int i = pos; i < length; i++) { + chars[i] = (char) toLowerCase(chars[i]); + } + + return new String(chars); + } + + /** + * Percent decodes the given string. + * + * @param source the string to decode + * @return the percent decoded string + * + * @since 2.0.0 + */ + public static String percentDecode(final String source) { + if (source == null || source.isEmpty()) { + return source; + } + + byte[] bytes = source.getBytes(StandardCharsets.UTF_8); + int i = indexOfFirstPercentChar(bytes); + + if (i == -1) { + return source; + } + + int length = bytes.length; + int writePos = i; + while (i < length) { + byte b = bytes[i]; + if (b == PERCENT_CHAR) { + bytes[writePos++] = percentDecode(bytes, i++); + i += 2; + } else { + bytes[writePos++] = bytes[i++]; + } + } + + return new String(bytes, 0, writePos, StandardCharsets.UTF_8); + } + + /** + * Percent encodes the given string. + * + * @param source the string to encode + * @return the percent encoded string + * + * @since 2.0.0 + */ + public static String percentEncode(final String source) { + if (source == null || source.isEmpty()) { + return source; + } + byte[] bytes = source.getBytes(StandardCharsets.UTF_8); + int start = indexOfFirstNonAsciiChar(bytes); + if (start == -1) { + return source; + } + int length = bytes.length; + ByteBuffer buffer = ByteBuffer.allocate(start + ((length - start) * 3)); + if (start != 0) { + buffer.put(bytes, 0, start); + } + + for (int i = start; i < length; i++) { + byte b = bytes[i]; + if (shouldEncode(b)) { + byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); + byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); + buffer.put(PERCENT_CHAR); + buffer.put(b1); + buffer.put(b2); + } else { + buffer.put(b); + } + } + + return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8); + } + + /** + * Determines if the character is a digit. + * + * @param c the character to check + * @return true if the character is a digit; otherwise, false + * + * @since 2.0.0 + */ + public static boolean isDigit(int c) { + return (c >= '0' && c <= '9'); + } + + /** + * Determines if the character is valid for the package-url type. + * + * @param c the character to check + * @return true if the character is valid for the package-url type; otherwise, false + * + * @since 2.0.0 + */ + public static boolean isValidCharForType(int c) { + return (isAlphaNumeric(c) || c == '.' || c == '+' || c == '-'); + } + + /** + * Determines if the character is valid for the package-url qualifier key. + * + * @param c the character to check + * @return true if the character is valid for the package-url qualifier key; otherwise, false + * + * @since 2.0.0 + */ + public static boolean isValidCharForKey(int c) { + return (isAlphaNumeric(c) || c == '.' || c == '_' || c == '-'); + } + + private static boolean isUnreserved(int c) { + return (isValidCharForKey(c) || c == '~'); + } + + private static boolean shouldEncode(int c) { + return !isUnreserved(c); + } + + private static boolean isAlpha(int c) { + return (isLowerCase(c) || isUpperCase(c)); + } + + private static boolean isAlphaNumeric(int c) { + return (isDigit(c) || isAlpha(c)); + } + + private static boolean isUpperCase(int c) { + return (c >= 'A' && c <= 'Z'); + } + + private static boolean isLowerCase(int c) { + return (c >= 'a' && c <= 'z'); + } + + private static int toLowerCase(int c) { + return isUpperCase(c) ? (c ^ 0x20) : c; + } + + private static int indexOfFirstUpperCaseChar(String s) { + int length = s.length(); + + for (int i = 0; i < length; i++) { + if (isUpperCase(s.charAt(i))) { + return i; + } + } + + return -1; + } + + private static int indexOfFirstNonAsciiChar(byte[] bytes) { + int length = bytes.length; + int start = -1; + for (int i = 0; i < length; i++) { + if (shouldEncode(bytes[i])) { + start = i; + break; + } + } + return start; + } + + private static int indexOfFirstPercentChar(final byte[] bytes) { + return IntStream.range(0, bytes.length) + .filter(i -> bytes[i] == PERCENT_CHAR) + .findFirst() + .orElse(-1); + } + + private static byte percentDecode(final byte[] bytes, final int start) { + if (start + 2 >= bytes.length) { + throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '" + + new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'"); + } + + int pos1 = start + 1; + byte b1 = bytes[pos1]; + int c1 = Character.digit(b1, 16); + + if (c1 == -1) { + throw new ValidationException( + "Invalid percent encoding char 1 at offset " + pos1 + " with value '" + ((char) b1) + "'"); + } + + int pos2 = pos1 + 1; + byte b2 = bytes[pos2]; + int c2 = Character.digit(bytes[pos2], 16); + + if (c2 == -1) { + throw new ValidationException( + "Invalid percent encoding char 2 at offset " + pos2 + " with value '" + ((char) b2) + "'"); + } + + return ((byte) ((c1 << 4) + c2)); + } +} diff --git a/src/test/java/com/github/packageurl/PackageURLTest.java b/src/test/java/com/github/packageurl/PackageURLTest.java index 18fb3458..adf8694d 100644 --- a/src/test/java/com/github/packageurl/PackageURLTest.java +++ b/src/test/java/com/github/packageurl/PackageURLTest.java @@ -73,24 +73,14 @@ void validPercentEncoding() throws MalformedPackageURLException { "pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl2.toString()); } - @SuppressWarnings("deprecation") @Test void invalidPercentEncoding() throws MalformedPackageURLException { assertThrowsExactly( - MalformedPackageURLException.class, - () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%")); + MalformedPackageURLException.class, + () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%")); assertThrowsExactly( - MalformedPackageURLException.class, - () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0")); - PackageURL purl = new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0"); - Throwable t1 = assertThrowsExactly(ValidationException.class, () -> purl.uriDecode("%")); - assertEquals("Incomplete percent encoding at offset 0 with value '%'", t1.getMessage()); - Throwable t2 = assertThrowsExactly(ValidationException.class, () -> PackageURL.percentDecode("a%0")); - assertEquals("Incomplete percent encoding at offset 1 with value '%0'", t2.getMessage()); - Throwable t3 = assertThrowsExactly(ValidationException.class, () -> PackageURL.percentDecode("aaaa%%0A")); - assertEquals("Invalid percent encoding char 1 at offset 5 with value '%'", t3.getMessage()); - Throwable t4 = assertThrowsExactly(ValidationException.class, () -> PackageURL.percentDecode("%0G")); - assertEquals("Invalid percent encoding char 2 at offset 2 with value 'G'", t4.getMessage()); + MalformedPackageURLException.class, + () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0")); } static Stream constructorParsing() throws IOException { diff --git a/src/test/java/com/github/packageurl/PercentEncodingBenchmark.java b/src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java similarity index 92% rename from src/test/java/com/github/packageurl/PercentEncodingBenchmark.java rename to src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java index 17b421c5..e811abe9 100644 --- a/src/test/java/com/github/packageurl/PercentEncodingBenchmark.java +++ b/src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java @@ -19,7 +19,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.github.packageurl; +package com.github.packageurl.utils; import java.nio.charset.StandardCharsets; import java.util.Random; @@ -35,7 +35,7 @@ import org.openjdk.jmh.infra.Blackhole; /** - * Measures the performance of performance decoding and encoding. + * Measures the performance of performance StringUtil's decoding and encoding. *

* Run the benchmark with: *

@@ -52,7 +52,7 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MICROSECONDS) @State(Scope.Benchmark) -public class PercentEncodingBenchmark { +public class StringUtilBenchmark { private static final int DATA_COUNT = 1000; private static final int DECODED_LENGTH = 256; @@ -91,7 +91,7 @@ private String[] createDecodedData() { private static String[] encodeData(String[] decodedData) { String[] encodedData = new String[decodedData.length]; for (int i = 0; i < decodedData.length; i++) { - encodedData[i] = PackageURL.percentEncode(decodedData[i]); + encodedData[i] = StringUtil.percentEncode(decodedData[i]); } return encodedData; } @@ -114,14 +114,14 @@ public void baseline(Blackhole blackhole) { @Benchmark public void percentDecode(final Blackhole blackhole) { for (int i = 0; i < DATA_COUNT; i++) { - blackhole.consume(PackageURL.percentDecode(encodedData[i])); + blackhole.consume(StringUtil.percentDecode(encodedData[i])); } } @Benchmark public void percentEncode(final Blackhole blackhole) { for (int i = 0; i < DATA_COUNT; i++) { - blackhole.consume(PackageURL.percentEncode(decodedData[i])); + blackhole.consume(StringUtil.percentEncode(decodedData[i])); } } } diff --git a/src/test/java/com/github/packageurl/utils/StringUtilTest.java b/src/test/java/com/github/packageurl/utils/StringUtilTest.java new file mode 100644 index 00000000..09bd6dc0 --- /dev/null +++ b/src/test/java/com/github/packageurl/utils/StringUtilTest.java @@ -0,0 +1,43 @@ +/* + * MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.github.packageurl.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrowsExactly; + +import com.github.packageurl.MalformedPackageURLException; +import com.github.packageurl.PackageURL; +import com.github.packageurl.ValidationException; +import org.junit.jupiter.api.Test; + +public class StringUtilTest { + + @Test + void invalidPercentEncoding() throws MalformedPackageURLException { + Throwable t1 = assertThrowsExactly(ValidationException.class, () -> StringUtil.percentDecode("a%0")); + assertEquals("Incomplete percent encoding at offset 1 with value '%0'", t1.getMessage()); + Throwable t2 = assertThrowsExactly(ValidationException.class, () -> StringUtil.percentDecode("aaaa%%0A")); + assertEquals("Invalid percent encoding char 1 at offset 5 with value '%'", t2.getMessage()); + Throwable t3 = assertThrowsExactly(ValidationException.class, () -> StringUtil.percentDecode("%0G")); + assertEquals("Invalid percent encoding char 2 at offset 2 with value 'G'", t3.getMessage()); + } +} From d5f136c235ba9eaa6049f3ea8eb8f2de471879d5 Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Sat, 22 Mar 2025 14:48:39 -0400 Subject: [PATCH 2/6] style: spotless apply --- src/test/java/com/github/packageurl/PackageURLTest.java | 8 ++++---- .../java/com/github/packageurl/utils/StringUtilTest.java | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/test/java/com/github/packageurl/PackageURLTest.java b/src/test/java/com/github/packageurl/PackageURLTest.java index adf8694d..5577773e 100644 --- a/src/test/java/com/github/packageurl/PackageURLTest.java +++ b/src/test/java/com/github/packageurl/PackageURLTest.java @@ -76,11 +76,11 @@ void validPercentEncoding() throws MalformedPackageURLException { @Test void invalidPercentEncoding() throws MalformedPackageURLException { assertThrowsExactly( - MalformedPackageURLException.class, - () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%")); + MalformedPackageURLException.class, + () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%")); assertThrowsExactly( - MalformedPackageURLException.class, - () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0")); + MalformedPackageURLException.class, + () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0")); } static Stream constructorParsing() throws IOException { diff --git a/src/test/java/com/github/packageurl/utils/StringUtilTest.java b/src/test/java/com/github/packageurl/utils/StringUtilTest.java index 09bd6dc0..d84e1712 100644 --- a/src/test/java/com/github/packageurl/utils/StringUtilTest.java +++ b/src/test/java/com/github/packageurl/utils/StringUtilTest.java @@ -25,7 +25,6 @@ import static org.junit.jupiter.api.Assertions.assertThrowsExactly; import com.github.packageurl.MalformedPackageURLException; -import com.github.packageurl.PackageURL; import com.github.packageurl.ValidationException; import org.junit.jupiter.api.Test; From c49dfb5be4de3928f514b4e7bd0553be866e343f Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Sun, 23 Mar 2025 06:39:43 -0400 Subject: [PATCH 3/6] fix: use internal package --- .../com/github/packageurl/PackageURL.java | 2 +- .../{utils => internal}/StringUtil.java | 2 +- .../packageurl/internal/package-info.java | 27 +++++++++++++++++++ .../StringUtilBenchmark.java | 2 +- .../{utils => internal}/StringUtilTest.java | 2 +- 5 files changed, 31 insertions(+), 4 deletions(-) rename src/main/java/com/github/packageurl/{utils => internal}/StringUtil.java (99%) create mode 100644 src/main/java/com/github/packageurl/internal/package-info.java rename src/test/java/com/github/packageurl/{utils => internal}/StringUtilBenchmark.java (99%) rename src/test/java/com/github/packageurl/{utils => internal}/StringUtilTest.java (98%) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index 0603a859..f2d69bd4 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -23,7 +23,7 @@ import static java.util.Objects.requireNonNull; -import com.github.packageurl.utils.StringUtil; +import com.github.packageurl.internal.StringUtil; import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; diff --git a/src/main/java/com/github/packageurl/utils/StringUtil.java b/src/main/java/com/github/packageurl/internal/StringUtil.java similarity index 99% rename from src/main/java/com/github/packageurl/utils/StringUtil.java rename to src/main/java/com/github/packageurl/internal/StringUtil.java index 39f2bdde..e8ed0808 100644 --- a/src/main/java/com/github/packageurl/utils/StringUtil.java +++ b/src/main/java/com/github/packageurl/internal/StringUtil.java @@ -19,7 +19,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.github.packageurl.utils; +package com.github.packageurl.internal; import com.github.packageurl.ValidationException; import java.nio.ByteBuffer; diff --git a/src/main/java/com/github/packageurl/internal/package-info.java b/src/main/java/com/github/packageurl/internal/package-info.java new file mode 100644 index 00000000..97461a23 --- /dev/null +++ b/src/main/java/com/github/packageurl/internal/package-info.java @@ -0,0 +1,27 @@ +/* + * MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * This package contains utility classes used by the PackageURL library. + */ +package com.github.packageurl.internal; + diff --git a/src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java similarity index 99% rename from src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java rename to src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java index e811abe9..fe72a224 100644 --- a/src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java +++ b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java @@ -19,7 +19,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.github.packageurl.utils; +package com.github.packageurl.internal; import java.nio.charset.StandardCharsets; import java.util.Random; diff --git a/src/test/java/com/github/packageurl/utils/StringUtilTest.java b/src/test/java/com/github/packageurl/internal/StringUtilTest.java similarity index 98% rename from src/test/java/com/github/packageurl/utils/StringUtilTest.java rename to src/test/java/com/github/packageurl/internal/StringUtilTest.java index d84e1712..cd634f36 100644 --- a/src/test/java/com/github/packageurl/utils/StringUtilTest.java +++ b/src/test/java/com/github/packageurl/internal/StringUtilTest.java @@ -19,7 +19,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.github.packageurl.utils; +package com.github.packageurl.internal; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrowsExactly; From 576f2c6ea1995e9b479160a8328232e16f4036eb Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Sun, 23 Mar 2025 17:12:28 -0400 Subject: [PATCH 4/6] style: spotless --- src/main/java/com/github/packageurl/internal/package-info.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/github/packageurl/internal/package-info.java b/src/main/java/com/github/packageurl/internal/package-info.java index 97461a23..96e841e4 100644 --- a/src/main/java/com/github/packageurl/internal/package-info.java +++ b/src/main/java/com/github/packageurl/internal/package-info.java @@ -24,4 +24,3 @@ * This package contains utility classes used by the PackageURL library. */ package com.github.packageurl.internal; - From ae666d9e30ea0debd9f35c9c6659edb38549efb4 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 23 Mar 2025 22:23:41 +0100 Subject: [PATCH 5/6] feat: Improve encoding/decoding performance for ASCII strings (#224) * feat: Improve benchmark (#222) Fixes a bug in the benchmark initialization and adds a `toLowerCase` benchmark. * fix: Benchmark initialization The benchmark **must** be initialized in a `@Setup` method, otherwise `nonAsciiProb` will always be `0.0`. * fix: Improve encoding/decoding performance for ASCII strings Since strings that don't require **any** percent encoding are in practice the rule, the encoding/decoding code should be optimized for this case. --- .../packageurl/internal/StringUtil.java | 137 ++++++++++-------- .../internal/StringUtilBenchmark.java | 16 +- 2 files changed, 87 insertions(+), 66 deletions(-) diff --git a/src/main/java/com/github/packageurl/internal/StringUtil.java b/src/main/java/com/github/packageurl/internal/StringUtil.java index e8ed0808..ee1a45a3 100644 --- a/src/main/java/com/github/packageurl/internal/StringUtil.java +++ b/src/main/java/com/github/packageurl/internal/StringUtil.java @@ -21,10 +21,10 @@ */ package com.github.packageurl.internal; +import static java.lang.Byte.toUnsignedInt; + import com.github.packageurl.ValidationException; -import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.stream.IntStream; /** * String utility for validation and encoding. @@ -35,6 +35,24 @@ public final class StringUtil { private static final byte PERCENT_CHAR = '%'; + private static final boolean[] UNRESERVED_CHARS = new boolean[128]; + + static { + for (char c = '0'; c <= '9'; c++) { + UNRESERVED_CHARS[c] = true; + } + for (char c = 'A'; c <= 'Z'; c++) { + UNRESERVED_CHARS[c] = true; + } + for (char c = 'a'; c <= 'z'; c++) { + UNRESERVED_CHARS[c] = true; + } + UNRESERVED_CHARS['-'] = true; + UNRESERVED_CHARS['.'] = true; + UNRESERVED_CHARS['_'] = true; + UNRESERVED_CHARS['~'] = true; + } + private StringUtil() { throw new AssertionError("Cannot instantiate StringUtil"); } @@ -48,10 +66,6 @@ private StringUtil() { * @since 2.0.0 */ public static String toLowerCase(String s) { - if (s == null) { - return null; - } - int pos = indexOfFirstUpperCaseChar(s); if (pos == -1) { @@ -59,10 +73,9 @@ public static String toLowerCase(String s) { } char[] chars = s.toCharArray(); - int length = chars.length; - for (int i = pos; i < length; i++) { - chars[i] = (char) toLowerCase(chars[i]); + for (int length = chars.length; pos < length; pos++) { + chars[pos] = (char) toLowerCase(chars[pos]); } return new String(chars); @@ -77,26 +90,22 @@ public static String toLowerCase(String s) { * @since 2.0.0 */ public static String percentDecode(final String source) { - if (source == null || source.isEmpty()) { + if (source.indexOf(PERCENT_CHAR) == -1) { return source; } byte[] bytes = source.getBytes(StandardCharsets.UTF_8); - int i = indexOfFirstPercentChar(bytes); - - if (i == -1) { - return source; - } + int readPos = indexOfFirstPercentChar(bytes); + int writePos = readPos; int length = bytes.length; - int writePos = i; - while (i < length) { - byte b = bytes[i]; + while (readPos < length) { + byte b = bytes[readPos]; if (b == PERCENT_CHAR) { - bytes[writePos++] = percentDecode(bytes, i++); - i += 2; + bytes[writePos++] = percentDecode(bytes, readPos++); + readPos += 2; } else { - bytes[writePos++] = bytes[i++]; + bytes[writePos++] = bytes[readPos++]; } } @@ -112,34 +121,29 @@ public static String percentDecode(final String source) { * @since 2.0.0 */ public static String percentEncode(final String source) { - if (source == null || source.isEmpty()) { - return source; - } - byte[] bytes = source.getBytes(StandardCharsets.UTF_8); - int start = indexOfFirstNonAsciiChar(bytes); - if (start == -1) { + if (!shouldEncode(source)) { return source; } - int length = bytes.length; - ByteBuffer buffer = ByteBuffer.allocate(start + ((length - start) * 3)); - if (start != 0) { - buffer.put(bytes, 0, start); - } - for (int i = start; i < length; i++) { - byte b = bytes[i]; - if (shouldEncode(b)) { - byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); - byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); - buffer.put(PERCENT_CHAR); - buffer.put(b1); - buffer.put(b2); + byte[] src = source.getBytes(StandardCharsets.UTF_8); + byte[] dest = new byte[3 * src.length]; + + int writePos = 0; + for (byte b : src) { + if (shouldEncode(toUnsignedInt(b))) { + dest[writePos++] = PERCENT_CHAR; + dest[writePos++] = toHexDigit(b >> 4); + dest[writePos++] = toHexDigit(b); } else { - buffer.put(b); + dest[writePos++] = b; } } - return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8); + return new String(dest, 0, writePos, StandardCharsets.UTF_8); + } + + private static byte toHexDigit(int b) { + return (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); } /** @@ -178,14 +182,34 @@ public static boolean isValidCharForKey(int c) { return (isAlphaNumeric(c) || c == '.' || c == '_' || c == '-'); } + /** + * Returns {@code true} if the character is in the unreserved RFC 3986 set. + *

+ * Warning: Profiling shows that the performance of {@link #percentEncode} relies heavily on this method. + * Modify with care. + *

+ * @param c non-negative integer. + */ private static boolean isUnreserved(int c) { - return (isValidCharForKey(c) || c == '~'); + return c < 128 && UNRESERVED_CHARS[c]; } + /** + * @param c non-negative integer + */ private static boolean shouldEncode(int c) { return !isUnreserved(c); } + private static boolean shouldEncode(String s) { + for (int i = 0, length = s.length(); i < length; i++) { + if (shouldEncode(s.charAt(i))) { + return true; + } + } + return false; + } + private static boolean isAlpha(int c) { return (isLowerCase(c) || isUpperCase(c)); } @@ -195,7 +219,7 @@ private static boolean isAlphaNumeric(int c) { } private static boolean isUpperCase(int c) { - return (c >= 'A' && c <= 'Z'); + return 'A' <= c && c <= 'Z'; } private static boolean isLowerCase(int c) { @@ -207,34 +231,21 @@ private static int toLowerCase(int c) { } private static int indexOfFirstUpperCaseChar(String s) { - int length = s.length(); - - for (int i = 0; i < length; i++) { + for (int i = 0, length = s.length(); i < length; i++) { if (isUpperCase(s.charAt(i))) { return i; } } - return -1; } - private static int indexOfFirstNonAsciiChar(byte[] bytes) { - int length = bytes.length; - int start = -1; - for (int i = 0; i < length; i++) { - if (shouldEncode(bytes[i])) { - start = i; - break; + private static int indexOfFirstPercentChar(final byte[] bytes) { + for (int i = 0, length = bytes.length; i < length; i++) { + if (bytes[i] == PERCENT_CHAR) { + return i; } } - return start; - } - - private static int indexOfFirstPercentChar(final byte[] bytes) { - return IntStream.range(0, bytes.length) - .filter(i -> bytes[i] == PERCENT_CHAR) - .findFirst() - .orElse(-1); + return -1; } private static byte percentDecode(final byte[] bytes, final int start) { diff --git a/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java index d75537fd..e05b5349 100644 --- a/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java +++ b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java @@ -31,6 +31,7 @@ import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.infra.Blackhole; @@ -62,8 +63,14 @@ public class StringUtilBenchmark { @Param({"0", "0.1", "0.5"}) private double nonAsciiProb; - private final String[] decodedData = createDecodedData(); - private final String[] encodedData = encodeData(decodedData); + private String[] decodedData; + private String[] encodedData; + + @Setup + public void setup() { + decodedData = createDecodedData(); + encodedData = encodeData(decodedData); + } private String[] createDecodedData() { Random random = new Random(); @@ -87,7 +94,10 @@ private static String[] encodeData(String[] decodedData) { for (int i = 0; i < encodedData.length; i++) { encodedData[i] = StringUtil.percentEncode(decodedData[i]); if (!StringUtil.percentDecode(encodedData[i]).equals(decodedData[i])) { - throw new RuntimeException("Invalid implementation of `percentEncode` and `percentDecode`."); + throw new RuntimeException( + "Invalid implementation of `percentEncode` and `percentDecode`.\nOriginal data: " + + encodedData[i] + "\nEncoded and decoded data: " + + StringUtil.percentDecode(encodedData[i])); } } return encodedData; From ce08baa60900783e110f583792ea07faf4f747ed Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Mon, 24 Mar 2025 08:24:53 -0400 Subject: [PATCH 6/6] fix: add jspecify annotations --- .../github/packageurl/internal/StringUtil.java | 15 ++++++++------- .../github/packageurl/internal/package-info.java | 3 +++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/github/packageurl/internal/StringUtil.java b/src/main/java/com/github/packageurl/internal/StringUtil.java index ee1a45a3..5225ce1d 100644 --- a/src/main/java/com/github/packageurl/internal/StringUtil.java +++ b/src/main/java/com/github/packageurl/internal/StringUtil.java @@ -25,6 +25,7 @@ import com.github.packageurl.ValidationException; import java.nio.charset.StandardCharsets; +import org.jspecify.annotations.NonNull; /** * String utility for validation and encoding. @@ -65,7 +66,7 @@ private StringUtil() { * * @since 2.0.0 */ - public static String toLowerCase(String s) { + public static @NonNull String toLowerCase(@NonNull String s) { int pos = indexOfFirstUpperCaseChar(s); if (pos == -1) { @@ -89,7 +90,7 @@ public static String toLowerCase(String s) { * * @since 2.0.0 */ - public static String percentDecode(final String source) { + public static @NonNull String percentDecode(@NonNull final String source) { if (source.indexOf(PERCENT_CHAR) == -1) { return source; } @@ -120,7 +121,7 @@ public static String percentDecode(final String source) { * * @since 2.0.0 */ - public static String percentEncode(final String source) { + public static @NonNull String percentEncode(@NonNull final String source) { if (!shouldEncode(source)) { return source; } @@ -142,10 +143,6 @@ public static String percentEncode(final String source) { return new String(dest, 0, writePos, StandardCharsets.UTF_8); } - private static byte toHexDigit(int b) { - return (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); - } - /** * Determines if the character is a digit. * @@ -182,6 +179,10 @@ public static boolean isValidCharForKey(int c) { return (isAlphaNumeric(c) || c == '.' || c == '_' || c == '-'); } + private static byte toHexDigit(int b) { + return (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); + } + /** * Returns {@code true} if the character is in the unreserved RFC 3986 set. *

diff --git a/src/main/java/com/github/packageurl/internal/package-info.java b/src/main/java/com/github/packageurl/internal/package-info.java index 96e841e4..3bdb5740 100644 --- a/src/main/java/com/github/packageurl/internal/package-info.java +++ b/src/main/java/com/github/packageurl/internal/package-info.java @@ -23,4 +23,7 @@ /** * This package contains utility classes used by the PackageURL library. */ +@NullMarked package com.github.packageurl.internal; + +import org.jspecify.annotations.NullMarked;