@@ -13,17 +13,17 @@ use crate::syntax_error::SyntaxError;
1313/// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
1414pub struct Parser {
1515 /// The ranges of the statements
16- ranges : Vec < TextRange > ,
16+ ranges : Vec < ( usize , usize ) > ,
1717 /// The syntax errors accumulated during parsing
1818 errors : Vec < SyntaxError > ,
1919 /// The start of the current statement, if any
20- current_stmt_start : Option < TextSize > ,
20+ current_stmt_start : Option < usize > ,
2121 /// The tokens to parse
2222 pub tokens : Vec < Token > ,
2323
2424 eof_token : Token ,
2525
26- last_token_end : Option < TextSize > ,
26+ next_pos : usize ,
2727}
2828
2929/// Result of Building
@@ -46,66 +46,96 @@ impl Parser {
4646 return !WHITESPACE_TOKENS . contains ( & t. kind )
4747 || ( t. kind == SyntaxKind :: Newline && t. text . chars ( ) . count ( ) > 1 ) ;
4848 } )
49- . rev ( )
5049 . cloned ( )
5150 . collect :: < Vec < _ > > ( ) ;
5251
52+ let eof_token = Token :: eof ( usize:: from (
53+ tokens
54+ . last ( )
55+ . map ( |t| t. span . start ( ) )
56+ . unwrap_or ( TextSize :: from ( 0 ) ) ,
57+ ) ) ;
58+
59+ // next_pos should be the initialised with the first valid token already
60+ let mut next_pos = 0 ;
61+ loop {
62+ let token = tokens. get ( next_pos) . unwrap_or ( & eof_token) ;
63+
64+ if is_irrelevant_token ( token) {
65+ next_pos += 1 ;
66+ } else {
67+ break ;
68+ }
69+ }
70+
5371 Self {
5472 ranges : Vec :: new ( ) ,
55- eof_token : Token :: eof ( usize:: from (
56- tokens
57- . first ( )
58- . map ( |t| t. span . start ( ) )
59- . unwrap_or ( TextSize :: from ( 0 ) ) ,
60- ) ) ,
73+ eof_token,
6174 errors : Vec :: new ( ) ,
6275 current_stmt_start : None ,
6376 tokens,
64- last_token_end : None ,
77+ next_pos ,
6578 }
6679 }
6780
6881 pub fn finish ( self ) -> Parse {
6982 Parse {
70- ranges : self . ranges ,
83+ ranges : self
84+ . ranges
85+ . iter ( )
86+ . map ( |( start, end) | {
87+ println ! ( "{} {}" , start, end) ;
88+ let from = self . tokens . get ( * start) ;
89+ let to = self . tokens . get ( * end) . unwrap_or ( & self . eof_token ) ;
90+
91+ TextRange :: new ( from. unwrap ( ) . span . start ( ) , to. span . end ( ) )
92+ } )
93+ . collect ( ) ,
7194 errors : self . errors ,
7295 }
7396 }
7497
7598 /// Start statement
76- pub fn start_stmt ( & mut self ) -> Token {
99+ pub fn start_stmt ( & mut self ) {
77100 assert ! ( self . current_stmt_start. is_none( ) ) ;
78-
79- let token = self . peek ( ) ;
80-
81- self . current_stmt_start = Some ( token. span . start ( ) ) ;
82-
83- token
101+ self . current_stmt_start = Some ( self . next_pos ) ;
84102 }
85103
86104 /// Close statement
87105 pub fn close_stmt ( & mut self ) {
88- self . ranges . push ( TextRange :: new (
106+ assert ! ( self . next_pos > 0 ) ;
107+
108+ self . ranges . push ( (
89109 self . current_stmt_start . expect ( "Expected active statement" ) ,
90- self . last_token_end . expect ( "Expected last token end" ) ,
110+ self . next_pos - 1 ,
91111 ) ) ;
92112
93113 self . current_stmt_start = None ;
94114 }
95115
96- fn advance ( & mut self ) -> Token {
97- let token = self . tokens . pop ( ) . unwrap_or ( self . eof_token . clone ( ) ) ;
98-
99- self . last_token_end = Some ( token. span . end ( ) ) ;
100-
101- token
116+ fn advance ( & mut self ) -> & Token {
117+ let mut first_relevant_token = None ;
118+ loop {
119+ let token = self . tokens . get ( self . next_pos ) . unwrap_or ( & self . eof_token ) ;
120+
121+ // we need to continue with next_pos until the next relevant token after we already
122+ // found the first one
123+ if !is_irrelevant_token ( token) {
124+ if let Some ( t) = first_relevant_token {
125+ return t;
126+ }
127+ first_relevant_token = Some ( token) ;
128+ }
129+
130+ self . next_pos += 1 ;
131+ }
102132 }
103133
104- fn peek ( & mut self ) -> Token {
105- self . tokens
106- . last ( )
107- . cloned ( )
108- . unwrap_or ( self . eof_token . clone ( ) )
134+ fn peek ( & self ) -> & Token {
135+ match self . tokens . get ( self . next_pos ) {
136+ Some ( token ) => token ,
137+ None => & self . eof_token ,
138+ }
109139 }
110140
111141 /// checks if the current token is of `kind` and advances if true
@@ -132,3 +162,8 @@ impl Parser {
132162 todo ! ( ) ;
133163 }
134164}
165+
166+ fn is_irrelevant_token ( t : & Token ) -> bool {
167+ return WHITESPACE_TOKENS . contains ( & t. kind )
168+ && ( t. kind != SyntaxKind :: Newline || t. text . chars ( ) . count ( ) == 1 ) ;
169+ }
0 commit comments