22Contains the main functionality of the JSONSchemaLexer.
33"""
44
5- from typing import Any , ClassVar
5+ from typing import ClassVar
66
7- from pygments .lexer import RegexLexer , include
7+ from pygments .lexers .data import (
8+ JsonLexer , # type: ignore[reportMissingTypeStubs]
9+ )
810from pygments .token import Token
911
1012
11- def _get_regex_from_options ( options : list [ str ]) -> str :
13+ class JSONSchemaLexer ( JsonLexer ) :
1214 """
13- Constructs regex allowing any string from the options list.
14-
15- Args:
16- options (list[str]): List of options to be included
17- in the regex pattern.
18-
19- Returns:
20- str: Regular expression pattern constructed from the options.
21-
22- """
23- options = ['"' + option + '"' for option in options ]
24- return "(" + "|" .join (options ) + ")"
25-
26-
27- class JSONSchemaLexer (RegexLexer ):
28- """
29- Lexer for JSON Schema syntax highlighting.
15+ For JSONSchema.
3016 """
3117
3218 name = "JSON Schema Lexer"
@@ -41,15 +27,15 @@ class JSONSchemaLexer(RegexLexer):
4127 "null" ,
4228 ]
4329 core_keywords : ClassVar [list [str ]] = [
44- r"\ $schema" ,
45- r"\ $id" ,
46- r"\ $ref" ,
47- r"\ $defs" ,
48- r"\ $comment" ,
49- r"\ $dynamicAnchor" ,
50- r"\ $dynamicRef" ,
51- r"\ $anchor" ,
52- r"\ $vocabulary" ,
30+ " $schema" ,
31+ " $id" ,
32+ " $ref" ,
33+ " $defs" ,
34+ " $comment" ,
35+ " $dynamicAnchor" ,
36+ " $dynamicRef" ,
37+ " $anchor" ,
38+ " $vocabulary" ,
5339 ]
5440 applicator_keywords : ClassVar [list [str ]] = [
5541 "oneOf" ,
@@ -109,100 +95,32 @@ class JSONSchemaLexer(RegexLexer):
10995 "format_assertion" ,
11096 ]
11197
112- tokens : ClassVar [dict [str , list [Any ]]] = {
113- "whitespace" : [
114- (r"\s+" , Token .Whitespace ),
115- ],
116- "data_types" : [
117- # Used Literal type here to differentiate the highlighted
118- # color of data types from other keywords
119- (_get_regex_from_options (data_types ), Token .Literal ),
120- ],
121- "core_keywords" : [
122- (
123- _get_regex_from_options (core_keywords ),
124- Token .Keyword .Reserved ,
125- "objectattribute" ,
126- ),
127- ],
128- "applicator_keywords" : [
129- (
130- _get_regex_from_options (applicator_keywords ),
131- Token .Keyword .Reserved ,
132- "objectattribute" ,
133- ),
134- ],
135- "validation_keywords" : [
136- (
137- _get_regex_from_options (validation_keywords ),
138- Token .Keyword .Reserved ,
139- "objectattribute" ,
140- ),
141- ],
142- "meta_data_keywords" : [
143- (
144- _get_regex_from_options (meta_data_keywords ),
145- Token .Keyword .Reserved ,
146- "objectattribute" ,
147- ),
148- ],
149- "other_keywords" : [
150- (
151- _get_regex_from_options (other_keywords ),
152- Token .Keyword .Reserved ,
153- "objectattribute" ,
154- ),
155- ],
156- "keywords" : [
157- include ("core_keywords" ),
158- include ("applicator_keywords" ),
159- include ("validation_keywords" ),
160- include ("meta_data_keywords" ),
161- include ("other_keywords" ),
162- ],
163- # represents a simple terminal value
164- "simplevalue" : [
165- include ("data_types" ),
166- (r"(true|false)" , Token .Number ),
167- (
168- r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?" ,
169- Token .Number .Integer ,
170- ),
171- ('"(\\ |"|[^"])*"' , Token .String .Double ),
172- ],
173- # the right hand side of an object, after the attribute name
174- "objectattribute" : [
175- include ("value" ),
176- (r":" , Token .Punctuation ),
177- # comma terminates the attribute but expects more
178- (r"," , Token .Punctuation , "#pop" ),
179- # a closing bracket terminates the entire object, so pop twice
180- (r"}" , Token .Punctuation , ("#pop" , "#pop" )),
181- ],
182- # a json object - { attr, attr, ... }
183- "objectvalue" : [
184- include ("whitespace" ),
185- include ("keywords" ),
186- (r'"(\\\\|\\"|[^"])*"' , Token .Name .Tag , "objectattribute" ),
187- (r"}" , Token .Punctuation , "#pop" ),
188- ],
189- # json array - [ value, value, ... }
190- "arrayvalue" : [
191- include ("whitespace" ),
192- include ("value" ),
193- (r"," , Token .Punctuation ),
194- (r"]" , Token .Punctuation , "#pop" ),
195- ],
196- # a json value - either a simple value or a
197- # complex value (object or array)
198- "value" : [
199- include ("whitespace" ),
200- include ("simplevalue" ),
201- (r"{" , Token .Punctuation , "objectvalue" ),
202- (r"\[" , Token .Punctuation , "arrayvalue" ),
203- ],
204- # the root of a json document whould be a value
205- "root" : [
206- include ("value" ),
207- ],
208- }
98+ parsed_keywords : ClassVar [list [str ]] = [
99+ '"%s"' % keyword
100+ for keyword in (
101+ core_keywords
102+ + applicator_keywords
103+ + meta_data_keywords
104+ + validation_keywords
105+ + other_keywords
106+ )
107+ ]
108+
109+ parsed_data_types : ClassVar [list [str ]] = [
110+ '"%s"' % data_type for data_type in data_types
111+ ]
112+
113+ def get_tokens_unprocessed (self , text : str ): # type: ignore[reportUnknownParameterType]
114+ """
115+ Add token classes to it according to JSON Schema.
116+ """
117+ for start , token , value in super ().get_tokens_unprocessed (text ): # type: ignore[reportUnknownVariableType]
118+ if token is Token .Name .Tag and value in self .parsed_keywords :
119+ yield start , Token .Keyword , value
120+ elif (
121+ token is Token .String .Double
122+ and value in self .parsed_data_types
123+ ):
124+ yield start , Token .Name .Decorator , value
125+ else :
126+ yield start , token , value
0 commit comments