@@ -2,24 +2,34 @@ import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
22import type {
33 CharacterClass ,
44 CharacterClassElement ,
5+ ClassRangesCharacterClassElement ,
56 UnicodePropertyCharacterSet ,
7+ UnicodeSetsCharacterClassElement ,
68} from "@eslint-community/regexpp/ast"
79import type { RegExpContext } from "../utils"
8- import {
9- CP_DIGIT_ZERO ,
10- CP_SPACE ,
11- createRule ,
12- defineRegexpVisitor ,
13- } from "../utils"
10+ import { createRule , defineRegexpVisitor } from "../utils"
1411import { mention } from "../utils/mention"
12+ import type { ReadonlyFlags } from "regexp-ast-analysis"
13+ import { toUnicodeSet } from "regexp-ast-analysis"
14+ import type { ReadonlyWord } from "refa"
15+ import { getLexicographicallySmallest } from "../utils/lexicographically-smallest"
1516
16- type CharacterClassElementKind = "\\w" | "\\d" | "\\s" | "\\p" | "*"
17+ type CharacterClassElementKind =
18+ | "\\w"
19+ | "\\d"
20+ | "\\s"
21+ | "\\p"
22+ | "*"
23+ | "\\q"
24+ | "[]"
1725const DEFAULT_ORDER : CharacterClassElementKind [ ] = [
1826 "\\s" ,
1927 "\\w" ,
2028 "\\d" ,
2129 "\\p" ,
2230 "*" ,
31+ "\\q" ,
32+ "[]" ,
2333]
2434
2535/**
@@ -37,9 +47,46 @@ function getCharacterClassElementKind(
3747 ? "\\s"
3848 : "\\p"
3949 }
50+ if ( node . type === "ClassStringDisjunction" ) {
51+ return "\\q"
52+ }
53+ if (
54+ node . type === "CharacterClass" ||
55+ node . type === "ExpressionCharacterClass"
56+ ) {
57+ return "[]"
58+ }
4059 return "*"
4160}
4261
62+ /**
63+ * Return the lexicographically smallest string accepted by the given element.
64+ * If the class set is negate, the original value is used for calculation.
65+ */
66+ function getLexicographicallySmallestFromElement (
67+ node : CharacterClassElement ,
68+ flags : ReadonlyFlags ,
69+ ) : ReadonlyWord {
70+ const us =
71+ node . type === "CharacterSet" && node . negate
72+ ? toUnicodeSet ( { ...node , negate : false } , flags )
73+ : toUnicodeSet ( node , flags )
74+ return getLexicographicallySmallest ( us ) || [ ]
75+ }
76+
77+ /**
78+ * Compare two strings of char sets by byte order.
79+ */
80+ function compareWords ( a : ReadonlyWord , b : ReadonlyWord ) : number {
81+ const l = Math . min ( a . length , b . length )
82+ for ( let i = 0 ; i < l ; i ++ ) {
83+ const aI = a [ i ]
84+ const bI = b [ i ]
85+ if ( aI !== bI ) return aI - bI
86+ }
87+ return a . length - b . length
88+ }
89+
4390export default createRule ( "sort-character-class-elements" , {
4491 meta : {
4592 docs : {
@@ -54,7 +101,17 @@ export default createRule("sort-character-class-elements", {
54101 properties : {
55102 order : {
56103 type : "array" ,
57- items : { enum : [ "\\w" , "\\d" , "\\s" , "\\p" , "*" ] } ,
104+ items : {
105+ enum : [
106+ "\\s" ,
107+ "\\w" ,
108+ "\\d" ,
109+ "\\p" ,
110+ "*" ,
111+ "\\q" ,
112+ "[]" ,
113+ ] ,
114+ } ,
58115 } ,
59116 } ,
60117 additionalProperties : false ,
@@ -73,6 +130,8 @@ export default createRule("sort-character-class-elements", {
73130 "\\d" ?: number
74131 "\\s" ?: number
75132 "\\p" ?: number
133+ "\\q" ?: number
134+ "[]" ?: number
76135 } = { "*" : Infinity }
77136
78137 ; (
@@ -84,6 +143,7 @@ export default createRule("sort-character-class-elements", {
84143
85144 function createVisitor ( {
86145 node,
146+ flags,
87147 getRegexpLocation,
88148 patternSource,
89149 } : RegExpContext ) : RegExpVisitor . Handlers {
@@ -93,10 +153,10 @@ export default createRule("sort-character-class-elements", {
93153 for ( const next of ccNode . elements ) {
94154 if ( prevList . length ) {
95155 const prev = prevList [ 0 ]
96- if ( ! isValidOrder ( prev , next ) ) {
156+ if ( ! isValidOrder ( prev , next , flags ) ) {
97157 let moveTarget = prev
98158 for ( const p of prevList ) {
99- if ( isValidOrder ( p , next ) ) {
159+ if ( isValidOrder ( p , next , flags ) ) {
100160 break
101161 } else {
102162 moveTarget = p
@@ -144,6 +204,7 @@ export default createRule("sort-character-class-elements", {
144204 function isValidOrder (
145205 prev : CharacterClassElement ,
146206 next : CharacterClassElement ,
207+ flags : ReadonlyFlags ,
147208 ) {
148209 const prevKind = getCharacterClassElementKind ( prev )
149210 const nextKind = getCharacterClassElementKind ( next )
@@ -154,41 +215,33 @@ export default createRule("sort-character-class-elements", {
154215 } else if ( prevOrder > nextOrder ) {
155216 return false
156217 }
157- if ( prev . type === "CharacterSet" && prev . kind === "property" ) {
158- if ( next . type === "CharacterSet" ) {
159- if ( next . kind === "property" ) {
160- return isValidOrderForUnicodePropertyCharacterSet (
161- prev ,
162- next ,
163- )
164- }
165- // e.g. /[\p{ASCII}\d]/
166- return false
167- }
168- // e.g. /[\p{ASCII}a]/
218+
219+ const prevOrderShortCircuit = DEFAULT_ORDER . indexOf ( prevKind )
220+ const nextOrderShortCircuit = DEFAULT_ORDER . indexOf ( nextKind )
221+ if ( prevOrderShortCircuit < nextOrderShortCircuit ) {
169222 return true
170- } else if (
223+ } else if ( prevOrderShortCircuit > nextOrderShortCircuit ) {
224+ return false
225+ }
226+
227+ if (
228+ prev . type === "CharacterSet" &&
229+ prev . kind === "property" &&
171230 next . type === "CharacterSet" &&
172231 next . kind === "property"
173232 ) {
174- if ( prev . type === "CharacterSet" ) {
175- // e.g. /[\d\p{ASCII}]/
176- return true
177- }
178- // e.g. /[a\p{ASCII}]/
179- return false
233+ return isValidOrderForUnicodePropertyCharacterSet ( prev , next )
180234 }
181- if ( prev . type === "CharacterSet" && next . type === "CharacterSet" ) {
182- if ( prev . kind === "word" && next . kind === "digit" ) {
183- return true
184- }
185- if ( prev . kind === "digit" && next . kind === "word" ) {
186- return false
187- }
188- }
189- const prevCP = getTargetCodePoint ( prev )
190- const nextCP = getTargetCodePoint ( next )
191- if ( prevCP <= nextCP ) {
235+
236+ const prevWord = getLexicographicallySmallestFromElement (
237+ prev ,
238+ flags ,
239+ )
240+ const nextWord = getLexicographicallySmallestFromElement (
241+ next ,
242+ flags ,
243+ )
244+ if ( compareWords ( prevWord , nextWord ) <= 0 ) {
192245 return true
193246 }
194247 return false
@@ -218,29 +271,6 @@ export default createRule("sort-character-class-elements", {
218271 return true
219272 }
220273
221- /**
222- * Gets the target code point for a given element.
223- */
224- function getTargetCodePoint (
225- node : Exclude < CharacterClassElement , UnicodePropertyCharacterSet > ,
226- ) {
227- if ( node . type === "CharacterSet" ) {
228- if ( node . kind === "digit" || node . kind === "word" ) {
229- return CP_DIGIT_ZERO
230- }
231- if ( node . kind === "space" ) {
232- return CP_SPACE
233- }
234- return Infinity
235- }
236- if ( node . type === "CharacterClassRange" ) {
237- return node . min . value
238- }
239- // FIXME: TS Error
240- // @ts -expect-error -- FIXME
241- return node . value
242- }
243-
244274 return defineRegexpVisitor ( context , {
245275 createVisitor,
246276 } )
@@ -254,9 +284,11 @@ function escapeRaw(node: CharacterClassElement, target: CharacterClassElement) {
254284 let raw = node . raw
255285 if ( raw . startsWith ( "-" ) ) {
256286 const parent = target . parent as CharacterClass
257- // FIXME: TS Error
258- // @ts -expect-error -- FIXME
259- const prev = parent . elements [ parent . elements . indexOf ( target ) - 1 ]
287+ const elements : (
288+ | UnicodeSetsCharacterClassElement
289+ | ClassRangesCharacterClassElement
290+ ) [ ] = parent . elements
291+ const prev = elements [ elements . indexOf ( target ) - 1 ]
260292 if (
261293 prev &&
262294 ( prev . type === "Character" || prev . type === "CharacterSet" )
0 commit comments