Perl · khwilliamson · Nov 3, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025
diff --git a/numeric.c b/numeric.c
@@ -376,23 +376,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
                      )
 
 {
-    const char *s0 = start;
-    const char *s;
-    STRLEN len = *len_p;
-    STRLEN bytes_so_far;    /* How many real digits have been processed */
-    UV value = 0;
-    NV value_nv = 0;
-    const PERL_UINT_FAST8_T base = 1 << shift;  /* 2, 8, or 16 */
-    const UV max_div= UV_MAX / base;    /* Value above which, the next digit
-                                           processed would overflow */
-    const I32 input_flags = *flags;
-    const bool allow_underscores =
-                                cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
-    bool overflowed = FALSE;
-
-    /* In overflows, this keeps track of how much to multiply the overflowed NV
-     * by as we continue to parse the remaining digits */
-    NV factor = 0;
+    PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
+    ASSUME(inRANGE(shift, 1, 4) && shift != 2);
 
     /* This function unifies the core of grok_bin, grok_oct, and grok_hex.  It
      * is optimized for hex conversion.  For example, it uses XDIGIT_VALUE to
@@ -410,37 +395,39 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
      *      ...
      */
 
-    PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
-
-    ASSUME(inRANGE(shift, 1, 4) && shift != 2);
-
+    const I32 input_flags = *flags;
     /* Clear output flags; unlikely to find a problem that sets them */
     *flags = 0;
 
+    const bool allow_underscores =
+             cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
+    const char * s = start;
+    const char * e = start + *len_p;
+
     if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
 
         /* strip off leading b or 0b; x or 0x.
            for compatibility silently suffer "b" and "0b" as valid binary; "x"
            and "0x" as valid hex numbers. */
-        if (len >= 1) {
-            if (isALPHA_FOLD_EQ(s0[0], prefix)) {
-                s0++;
-                len--;
+        if (e - s > 1) {
+            if (isALPHA_FOLD_EQ(s[0], prefix)) {
+                s++;
             }
-            else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
-                s0+=2;
-                len-=2;
+            else if (   e - s > 2
+                     && s[0] == '0'
+                     && (isALPHA_FOLD_EQ(s[1], prefix)))
+            {
+                s += 2;
             }
         }
     }
 
-    s = s0; /* s0 potentially advanced from 'start' */
+    const char * const s0 = s; /* Where the significant digits start */
+    UV value = 0;
 
     /* Unroll the loop so that the first 8 digits are branchless except for the
      * switch.  A ninth hex one overflows a 32 bit word. */
-    switch (len) {
-      case 0:
-          return 0;
+    switch (e - s) {
       default:
           if (UNLIKELY(! generic_isCC_(*s, class_bit)))  break;
           value = (value << shift) | XDIGIT_VALUE(*s);
@@ -479,20 +466,30 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
       case 1:
           if (UNLIKELY(! generic_isCC_(*s, class_bit)))  break;
           value = (value << shift) | XDIGIT_VALUE(*s);
-
-          if (LIKELY(len <= 8)) {
+          s++;
+          /* FALLTHROUGH */
+      case 0:
+          if (LIKELY(s >= e)) {
               return value;
           }
 
-          s++;
           break;
     }
 
-    bytes_so_far = s - s0;
-    factor = shift << bytes_so_far;
-    len -= bytes_so_far;
+    /* How many real digits have been processed */
+    STRLEN bytes_so_far = s - s0;
+
+    /* In overflows, this keeps track of how much to multiply the overflowed NV
+     * by as we continue to parse the remaining digits */
+    NV factor = shift << bytes_so_far;
+
+    bool overflowed = FALSE;
+    NV value_nv = 0;
+    const PERL_UINT_FAST8_T base = 1 << shift;  /* 2, 8, or 16 */
+    const UV max_div= UV_MAX / base;    /* Value above which, the next digit
+                                           processed would overflow */
 
-    for (; len--; s++) {
+    for (; s < e; s++) {
         if (generic_isCC_(*s, class_bit)) {
             /* Write it in this wonky order with a goto to attempt to get the
                compiler to make the common case integer-only loop pretty tight.
@@ -541,7 +538,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
         }
 
         if (   *s == '_'
-            && len
+            && s < e - 1
             && allow_underscores
             && generic_isCC_(s[1], class_bit)
 
@@ -551,7 +548,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
                 || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
                                         != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
         {
-            --len;
             ++s;
             goto redo;
         }