@@ -2,11 +2,17 @@ import type { RegExpVisitor } from "regexpp/visitor"
22import type {
33 Assertion ,
44 EdgeAssertion ,
5+ Element ,
56 LookaroundAssertion ,
67 WordBoundaryAssertion ,
78} from "regexpp/ast"
89import type { RegExpContext } from "../utils"
910import { createRule , defineRegexpVisitor } from "../utils"
11+ import type {
12+ MatchingDirection ,
13+ ReadonlyFlags ,
14+ FirstLookChar ,
15+ } from "regexp-ast-analysis"
1016import {
1117 Chars ,
1218 getFirstCharAfter ,
@@ -15,17 +21,101 @@ import {
1521 getMatchingDirectionFromAssertionKind ,
1622 hasSomeDescendant ,
1723 isPotentiallyEmpty ,
24+ isZeroLength ,
1825 FirstConsumedChars ,
1926} from "regexp-ast-analysis"
2027import { mention } from "../utils/mention"
2128
29+ /**
30+ * Combines 2 look chars such that the result is equivalent to 2 adjacent
31+ * assertions `(?=a)(?=b)`.
32+ */
33+ function firstLookCharsIntersection (
34+ a : FirstLookChar ,
35+ b : FirstLookChar ,
36+ ) : FirstLookChar {
37+ const char = a . char . intersect ( b . char )
38+ return {
39+ char : a . char . intersect ( b . char ) ,
40+ exact : ( a . exact && b . exact ) || char . isEmpty ,
41+ edge : a . edge && b . edge ,
42+ }
43+ }
44+
45+ type GetFirstCharAfter = (
46+ afterThis : Assertion ,
47+ direction : MatchingDirection ,
48+ flags : ReadonlyFlags ,
49+ ) => FirstLookChar
50+
51+ /**
52+ * Creates a {@link GetFirstCharAfter} function that will reorder assertions to
53+ * get the maximum information after the characters after the given assertions.
54+ *
55+ * Conceptually, this will reorder adjacent assertions such that given
56+ * assertion is moved as far as possible in the opposite direction of natural
57+ * matching direction. E.g. when given `$` in `a(?!a)(?<=\w)$`, the characters
58+ * after `$` will be returned as if the pattern was `a$(?!a)(?<=\w)`.
59+ *
60+ * @param forbidden A list of assertions that may not be reordered.
61+ */
62+ function createReorderingGetFirstCharAfter (
63+ forbidden : ReadonlySet < Assertion > ,
64+ ) : GetFirstCharAfter {
65+ /** Whether the given element or one of its descendants is forbidden. */
66+ function hasForbidden ( element : Element ) : boolean {
67+ if ( element . type === "Assertion" && forbidden . has ( element ) ) {
68+ return true
69+ }
70+ for ( const f of forbidden ) {
71+ if ( hasSomeDescendant ( element , f ) ) {
72+ return true
73+ }
74+ }
75+ return false
76+ }
77+
78+ return ( afterThis , direction , flags ) => {
79+ let result = getFirstCharAfter ( afterThis , direction , flags )
80+
81+ if ( afterThis . parent . type === "Alternative" ) {
82+ const { elements } = afterThis . parent
83+
84+ const inc = direction === "ltr" ? - 1 : + 1
85+ const start = elements . indexOf ( afterThis )
86+ for ( let i = start + inc ; i >= 0 && i < elements . length ; i += inc ) {
87+ const other = elements [ i ]
88+ if ( ! isZeroLength ( other ) ) {
89+ break
90+ }
91+ if ( hasForbidden ( other ) ) {
92+ // we hit an element that cannot be reordered
93+ break
94+ }
95+
96+ const otherResult = FirstConsumedChars . toLook (
97+ getFirstConsumedChar ( other , direction , flags ) ,
98+ )
99+
100+ result = firstLookCharsIntersection ( result , otherResult )
101+ }
102+ }
103+
104+ return result
105+ }
106+ }
107+
22108const messages = {
23109 alwaysRejectByChar :
24110 "{{assertion}} will always reject because it is {{followedOrPreceded}} by a character." ,
111+ alwaysAcceptByChar :
112+ "{{assertion}} will always accept because it is never {{followedOrPreceded}} by a character." ,
25113 alwaysRejectByNonLineTerminator :
26114 "{{assertion}} will always reject because it is {{followedOrPreceded}} by a non-line-terminator character." ,
27115 alwaysAcceptByLineTerminator :
28116 "{{assertion}} will always accept because it is {{followedOrPreceded}} by a line-terminator character." ,
117+ alwaysAcceptByLineTerminatorOrEdge :
118+ "{{assertion}} will always accept because it is {{followedOrPreceded}} by a line-terminator character or the {{startOrEnd}} of the input string." ,
29119 alwaysAcceptOrRejectFollowedByWord :
30120 "{{assertion}} will always {{acceptOrReject}} because it is preceded by a non-word character and followed by a word character." ,
31121 alwaysAcceptOrRejectFollowedByNonWord :
@@ -61,12 +151,16 @@ export default createRule("no-useless-assertions", {
61151 flags,
62152 getRegexpLocation,
63153 } : RegExpContext ) : RegExpVisitor . Handlers {
154+ const reported = new Set < Assertion > ( )
155+
64156 /** Report */
65157 function report (
66158 assertion : Assertion ,
67159 messageId : keyof typeof messages ,
68160 data : Record < string , string > ,
69161 ) {
162+ reported . add ( assertion )
163+
70164 context . report ( {
71165 node,
72166 loc : getRegexpLocation ( assertion ) ,
@@ -81,20 +175,48 @@ export default createRule("no-useless-assertions", {
81175 /**
82176 * Verify for `^` or `$`
83177 */
84- function verifyStartOrEnd ( assertion : EdgeAssertion ) : void {
178+ function verifyStartOrEnd (
179+ assertion : EdgeAssertion ,
180+ getFirstCharAfterFn : GetFirstCharAfter ,
181+ ) : void {
85182 // Note: /^/ is the same as /(?<!.)/s and /^/m is the same as /(?<!.)/
86183 // Note: /$/ is the same as /(?!.)/s and /$/m is the same as /(?!.)/
87184
88185 // get the "next" character
89186 const direction = getMatchingDirectionFromAssertionKind (
90187 assertion . kind ,
91188 )
92- const next = getFirstCharAfter ( assertion , direction , flags )
189+ const next = getFirstCharAfterFn ( assertion , direction , flags )
93190
94191 const followedOrPreceded =
95192 assertion . kind === "end" ? "followed" : "preceded"
96193
97- if ( ! next . edge ) {
194+ const lineTerminator = Chars . lineTerminator ( flags )
195+
196+ if ( next . edge ) {
197+ // the string might start/end after the assertion
198+
199+ if ( ! flags . multiline ) {
200+ // ^/$ will always accept at an edge with no char before/after it
201+ if ( next . char . isEmpty ) {
202+ report ( assertion , "alwaysAcceptByChar" , {
203+ followedOrPreceded,
204+ } )
205+ }
206+ } else {
207+ // ^/$ will always accept at an edge or line terminator before/after it
208+ if ( next . char . isSubsetOf ( lineTerminator ) ) {
209+ report (
210+ assertion ,
211+ "alwaysAcceptByLineTerminatorOrEdge" ,
212+ {
213+ followedOrPreceded,
214+ startOrEnd : assertion . kind ,
215+ } ,
216+ )
217+ }
218+ }
219+ } else {
98220 // there is always some character of `node`
99221
100222 if ( ! flags . multiline ) {
@@ -105,8 +227,6 @@ export default createRule("no-useless-assertions", {
105227 } else {
106228 // only if the character is a sub set of /./, will the assertion trivially reject
107229
108- const lineTerminator = Chars . lineTerminator ( flags )
109-
110230 if ( next . char . isDisjointWith ( lineTerminator ) ) {
111231 report (
112232 assertion ,
@@ -127,19 +247,15 @@ export default createRule("no-useless-assertions", {
127247 */
128248 function verifyWordBoundary (
129249 assertion : WordBoundaryAssertion ,
250+ getFirstCharAfterFn : GetFirstCharAfter ,
130251 ) : void {
131252 const word = Chars . word ( flags )
132253
133- const next = getFirstCharAfter ( assertion , "ltr" , flags )
134- const prev = getFirstCharAfter ( assertion , "rtl" , flags )
254+ const next = getFirstCharAfterFn ( assertion , "ltr" , flags )
255+ const prev = getFirstCharAfterFn ( assertion , "rtl" , flags )
135256
136- if ( prev . edge || next . edge ) {
137- // we can only do this analysis if we know the previous and next character
138- return
139- }
140-
141- const nextIsWord = next . char . isSubsetOf ( word )
142- const prevIsWord = prev . char . isSubsetOf ( word )
257+ const nextIsWord = next . char . isSubsetOf ( word ) && ! next . edge
258+ const prevIsWord = prev . char . isSubsetOf ( word ) && ! prev . edge
143259 const nextIsNonWord = next . char . isDisjointWith ( word )
144260 const prevIsNonWord = prev . char . isDisjointWith ( word )
145261
@@ -198,7 +314,10 @@ export default createRule("no-useless-assertions", {
198314 /**
199315 * Verify for LookaroundAssertion
200316 */
201- function verifyLookaround ( assertion : LookaroundAssertion ) : void {
317+ function verifyLookaround (
318+ assertion : LookaroundAssertion ,
319+ getFirstCharAfterFn : GetFirstCharAfter ,
320+ ) : void {
202321 if ( isPotentiallyEmpty ( assertion . alternatives ) ) {
203322 // we don't handle trivial accept/reject based on emptiness
204323 return
@@ -207,10 +326,7 @@ export default createRule("no-useless-assertions", {
207326 const direction = getMatchingDirectionFromAssertionKind (
208327 assertion . kind ,
209328 )
210- const after = getFirstCharAfter ( assertion , direction , flags )
211- if ( after . edge ) {
212- return
213- }
329+ const after = getFirstCharAfterFn ( assertion , direction , flags )
214330
215331 const firstOf = FirstConsumedChars . toLook (
216332 getFirstConsumedChar (
@@ -227,7 +343,10 @@ export default createRule("no-useless-assertions", {
227343 // Careful now! If exact is false, we are only guaranteed to have a superset of the actual character.
228344 // False negatives are fine but we can't have false positives.
229345
230- if ( after . char . isDisjointWith ( firstOf . char ) ) {
346+ if (
347+ after . char . isDisjointWith ( firstOf . char ) &&
348+ ! ( after . edge && firstOf . edge )
349+ ) {
231350 report (
232351 assertion ,
233352 assertion . negate
@@ -240,6 +359,10 @@ export default createRule("no-useless-assertions", {
240359 )
241360 }
242361
362+ if ( after . edge ) {
363+ return
364+ }
365+
243366 // accept is harder because that can't generally be decided by the first character
244367
245368 // if this contains another assertion then that might reject. It's out of our control
@@ -273,23 +396,58 @@ export default createRule("no-useless-assertions", {
273396 }
274397 }
275398
399+ /**
400+ * Verify for Assertion
401+ */
402+ function verifyAssertion (
403+ assertion : Assertion ,
404+ getFirstCharAfterFn : GetFirstCharAfter ,
405+ ) : void {
406+ switch ( assertion . kind ) {
407+ case "start" :
408+ case "end" :
409+ verifyStartOrEnd ( assertion , getFirstCharAfterFn )
410+ break
411+
412+ case "word" :
413+ verifyWordBoundary ( assertion , getFirstCharAfterFn )
414+ break
415+
416+ case "lookahead" :
417+ case "lookbehind" :
418+ verifyLookaround ( assertion , getFirstCharAfterFn )
419+ break
420+ default :
421+ }
422+ }
423+
424+ const allAssertions : Assertion [ ] = [ ]
425+
276426 return {
277427 onAssertionEnter ( assertion ) {
278- switch ( assertion . kind ) {
279- case "start" :
280- case "end" :
281- verifyStartOrEnd ( assertion )
282- break
283-
284- case "word" :
285- verifyWordBoundary ( assertion )
286- break
287-
288- case "lookahead" :
289- case "lookbehind" :
290- verifyLookaround ( assertion )
291- break
292- default :
428+ // Phase 1:
429+ // The context of assertions is determined by only looking
430+ // at elements after the current assertion. This means that
431+ // the order of assertions is kept as is.
432+ verifyAssertion ( assertion , getFirstCharAfter )
433+
434+ // store all assertions for the second phase
435+ allAssertions . push ( assertion )
436+ } ,
437+ onPatternLeave ( ) {
438+ // Phase 2:
439+ // The context of assertions is determined by reordering
440+ // assertions such that as much information as possible can
441+ // be extracted from its surrounding assertions.
442+ const reorderingGetFirstCharAfter =
443+ createReorderingGetFirstCharAfter ( reported )
444+ for ( const assertion of allAssertions ) {
445+ if ( ! reported . has ( assertion ) ) {
446+ verifyAssertion (
447+ assertion ,
448+ reorderingGetFirstCharAfter ,
449+ )
450+ }
293451 }
294452 } ,
295453 }
0 commit comments