From 094570ad68557117f0a25c030f8d7bdfdf962897 Mon Sep 17 00:00:00 2001 From: nikneym Date: Wed, 26 Feb 2025 12:31:59 +0300 Subject: [PATCH 1/2] chore(docs): avx2 doc fixes --- src/simd/avx2.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/simd/avx2.rs b/src/simd/avx2.rs index 367d312..2adf0a2 100644 --- a/src/simd/avx2.rs +++ b/src/simd/avx2.rs @@ -51,7 +51,7 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize { // unsigned comparison dat >= LOW // - // We create a new via `_mm256_max_epu8` which compares vectors `dat` and `LOW` + // `_mm256_max_epu8` creates a new vector by comparing vectors `dat` and `LOW` // and picks the max. values from each for all indices. // So if a byte in `dat` is <= 32, it'll be represented as 33 // which is the smallest valid character. @@ -67,8 +67,7 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize { // We glue the both comparisons via `_mm256_andnot_si256`. // - // Since the representation of truthy/falsy differ in these comparisons, - // we cannot use + // Since the representation of truthiness differ in these comparisons, // we are in need of bitwise NOT to convert valid characters of `del`. let bit = _mm256_andnot_si256(del, low); // This creates a bitmask from the most significant bit of each byte. From c61be277d0971c052b06a6a946c1bb05580cbc85 Mon Sep 17 00:00:00 2001 From: nikneym Date: Wed, 26 Feb 2025 15:06:51 +0300 Subject: [PATCH 2/2] fix(swar): allow utf8 characters --- src/simd/swar.rs | 100 ++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 58 deletions(-) diff --git a/src/simd/swar.rs b/src/simd/swar.rs index 5925d62..e1028d0 100644 --- a/src/simd/swar.rs +++ b/src/simd/swar.rs @@ -109,71 +109,52 @@ const fn uniform_block(b: u8) -> usize { (b as u64 * 0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */) as usize } -// A byte-wise range-check on an enire word/block, -// ensuring all bytes in the word satisfy -// `33 <= x <= 126 && x != '>' && x != '<'` -// IMPORTANT: it false negatives if the block contains '?' +// A byte-wise range-check on an entire word/block, +// ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255` #[inline] fn match_uri_char_8_swar(block: ByteBlock) -> usize { - // 33 <= x <= 126 + // 33 <= (x != 127) <= 255 const M: u8 = 0x21; - const N: u8 = 0x7E; + // uniform block full of exclamation mark (!) (33). const BM: usize = uniform_block(M); - const BN: usize = uniform_block(127 - N); + // uniform block full of 1. + const ONE: usize = uniform_block(0x01); + // uniform block full of DEL (127). + const DEL: usize = uniform_block(0x7f); + // uniform block full of 128. const M128: usize = uniform_block(128); let x = usize::from_ne_bytes(block); // Really just a transmute let lt = x.wrapping_sub(BM) & !x; // <= m - let gt = x.wrapping_add(BN) | x; // >= n - - // XOR checks to catch '<' & '>' for correctness - // - // XOR can be thought of as a "distance function" - // (somewhat extrapolating from the `xor(x, x) = 0` identity and ∀ x != y: xor(x, y) != 0` - // (each u8 "xor key" providing a unique total ordering of u8) - // '<' and '>' have a "xor distance" of 2 (`xor('<', '>') = 2`) - // xor(x, '>') <= 2 => {'>', '?', '<'} - // xor(x, '<') <= 2 => {'<', '=', '>'} - // - // We assume P('=') > P('?'), - // given well/commonly-formatted URLs with querystrings contain - // a single '?' but possibly many '=' - // - // Thus it's preferable/near-optimal to "xor distance" on '>', - // since we'll slowpath at most one block per URL - // - // Some rust code to sanity check this yourself: - // ```rs - // fn xordist(x: u8, n: u8) -> Vec<(char, u8)> { - // (0..=255).into_iter().map(|c| (c as char, c ^ x)).filter(|(_c, y)| *y <= n).collect() - // } - // (xordist(b'<', 2), xordist(b'>', 2)) - // ``` - const B3: usize = uniform_block(3); // (dist <= 2) + 1 to wrap - const BGT: usize = uniform_block(b'>'); - - let xgt = x ^ BGT; - let ltgtq = xgt.wrapping_sub(B3) & !xgt; - - offsetnz((ltgtq | lt | gt) & M128) + + let xor_del = x ^ DEL; + let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL + + offsetnz((lt | eq_del) & M128) } // A byte-wise range-check on an entire word/block, -// ensuring all bytes in the word satisfy `32 <= x <= 126` -// IMPORTANT: false negatives if obs-text is present (0x80..=0xFF) +// ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255` #[inline] fn match_header_value_char_8_swar(block: ByteBlock) -> usize { - // 32 <= x <= 126 + // 32 <= (x != 127) <= 255 const M: u8 = 0x20; - const N: u8 = 0x7E; + // uniform block full of exclamation mark (!) (33). const BM: usize = uniform_block(M); - const BN: usize = uniform_block(127 - N); + // uniform block full of 1. + const ONE: usize = uniform_block(0x01); + // uniform block full of DEL (127). + const DEL: usize = uniform_block(0x7f); + // uniform block full of 128. const M128: usize = uniform_block(128); let x = usize::from_ne_bytes(block); // Really just a transmute let lt = x.wrapping_sub(BM) & !x; // <= m - let gt = x.wrapping_add(BN) | x; // >= n - offsetnz((lt | gt) & M128) + + let xor_del = x ^ DEL; + let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL + + offsetnz((lt | eq_del) & M128) } /// Check block to find offset of first non-zero byte @@ -202,13 +183,15 @@ fn test_is_header_value_block() { for b in 0..32_u8 { assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b); } - // 32..127 => true - for b in 32..127_u8 { + // 32..=126 => true + for b in 32..=126_u8 { assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b); } - // 127..=255 => false - for b in 127..=255_u8 { - assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b); + // 127 => false + assert!(!is_header_value_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F'); + // 128..=255 => true + for b in 128..=255_u8 { + assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b); } @@ -228,14 +211,15 @@ fn test_is_uri_block() { for b in 0..33_u8 { assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b); } - // 33..127 => true if b not in { '<', '?', '>' } - let falsy = |b| b"".contains(&b); - for b in 33..127_u8 { - assert_eq!(is_uri_block([b; BLOCK_SIZE]), !falsy(b), "b={}", b); + // 33..=126 => true + for b in 33..=126_u8 { + assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b); } - // 127..=255 => false - for b in 127..=255_u8 { - assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b); + // 127 => false + assert!(!is_uri_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F'); + // 128..=255 => true + for b in 128..=255_u8 { + assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b); } }