Skip to content

Commit 525ef26

Browse files
authored
feat: upgrade nom to version 8.0.0 and accelerate expr_element using the first token. (#18935)
* refactor: upgrade nom to version 8.0.0 and replace the pratt parser with nom_language. Use the first token check to reduce branch traversal in expr_element. * chore: codefmt * refactor: replace nom-language to pratt-parser * chore: fix unit test * perf: optimize parse embedding * chore: fix unit test * perf: use try_dispatch to categorize the statement_body, reducing the number of branches and stack usage (otherwise, stack overflow is extremely likely). * chore: codefmt * fix: remove parse cut on statement_body * fix: remove parse cut on statement_body * chore: integrate bracket_map_access into the array * chore: optimize array parsing: keep column_id/literal, add fast path in try_dispatch, create array_number function for numeric arrays, handle negative numbers directly * chore: fix scalars test
1 parent 34565b0 commit 525ef26

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1543
-1136
lines changed

โ€ŽCargo.lockโ€Ž

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

โ€ŽCargo.tomlโ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,8 @@ fast-float2 = "0.2.3"
548548
gix = "0.71.0"
549549
indent = "0.1.1"
550550
logos = "0.12.1"
551-
nom = "7.1.1"
552-
nom-rule = "0.4"
551+
nom = "8.0.0"
552+
nom-rule = "0.5.1"
553553
pratt = "0.4.0"
554554
rspack-codespan-reporting = "0.11"
555555
rustc-demangle = "0.1"

โ€Žsrc/query/ast/benches/bench.rsโ€Ž

Lines changed: 14 additions & 5 deletions
Large diffs are not rendered by default.

โ€Žsrc/query/ast/src/parser/comment.rsโ€Ž

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use nom::Parser;
1516
use nom_rule::rule;
1617

1718
use super::expr::literal_string;
@@ -33,7 +34,7 @@ pub fn comment(i: Input) -> IResult<Statement> {
3334
| #comment_column: "`COMMENT [IF EXISTS] ON COLUMN <table_name>.<column_name> IS '<string_literal>'`"
3435
| #comment_network_policy: "`COMMENT [IF EXISTS] ON NETWORK POLICY <policy_name> IS '<string_literal>'`"
3536
| #comment_password_policy: "`COMMENT [IF EXISTS] ON PASSWORD POLICY <policy_name> IS '<string_literal>'`"
36-
)(i)
37+
).parse(i)
3738
}
3839

3940
fn comment_table(i: Input) -> IResult<Statement> {

โ€Žsrc/query/ast/src/parser/common.rsโ€Ž

Lines changed: 95 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,26 @@
1515
use std::cell::RefCell;
1616
use std::rc::Rc;
1717

18-
use nom::branch::alt;
19-
use nom::combinator::consumed;
20-
use nom::combinator::map;
21-
use nom::multi::many1;
18+
pub use nom::branch::alt;
19+
pub use nom::branch::permutation;
20+
pub use nom::combinator::consumed;
21+
pub use nom::combinator::map;
22+
pub use nom::combinator::not;
23+
pub use nom::combinator::value;
24+
pub use nom::multi::many1;
2225
use nom::sequence::terminated;
2326
use nom::Offset;
24-
use nom::Slice;
27+
use nom::Parser;
2528
use nom_rule::rule;
2629
use pratt::PrattError;
2730
use pratt::PrattParser;
2831
use pratt::Precedence;
2932

33+
pub fn parser_fn<'a, O, P>(mut parser: P) -> impl FnMut(Input<'a>) -> IResult<'a, O>
34+
where P: nom::Parser<Input<'a>, Output = O, Error = Error<'a>> {
35+
move |input| parser.parse(input)
36+
}
37+
3038
use crate::ast::quote::QuotedIdent;
3139
use crate::ast::ColumnID;
3240
use crate::ast::DatabaseRef;
@@ -85,7 +93,8 @@ pub fn lambda_params(i: Input) -> IResult<Vec<Identifier>> {
8593
rule!(
8694
#single_param
8795
| #multi_params
88-
)(i)
96+
)
97+
.parse(i)
8998
}
9099

91100
pub fn ident(i: Input) -> IResult<Identifier> {
@@ -116,7 +125,8 @@ pub fn stage_name(i: Input) -> IResult<Identifier> {
116125
rule!(
117126
#plain_ident
118127
| #anonymous_stage
119-
)(i)
128+
)
129+
.parse(i)
120130
}
121131

122132
fn plain_identifier(
@@ -134,7 +144,8 @@ fn plain_identifier(
134144
quote: None,
135145
ident_type: IdentifierType::None,
136146
},
137-
)(i)
147+
)
148+
.parse(i)
138149
}
139150
}
140151

@@ -193,7 +204,8 @@ fn identifier_variable(i: Input) -> IResult<Identifier> {
193204
quote: None,
194205
ident_type: IdentifierType::Variable,
195206
},
196-
)(i)
207+
)
208+
.parse(i)
197209
}
198210

199211
fn non_reserved_identifier(
@@ -205,7 +217,8 @@ fn non_reserved_identifier(
205217
| #quoted_identifier
206218
| #identifier_hole
207219
| #identifier_variable
208-
)(i)
220+
)
221+
.parse(i)
209222
}
210223
}
211224

@@ -228,7 +241,8 @@ fn non_reserved_keyword(
228241
pub fn database_ref(i: Input) -> IResult<DatabaseRef> {
229242
map(dot_separated_idents_1_to_2, |(catalog, database)| {
230243
DatabaseRef { catalog, database }
231-
})(i)
244+
})
245+
.parse(i)
232246
}
233247

234248
pub fn table_ref(i: Input) -> IResult<TableRef> {
@@ -242,7 +256,8 @@ pub fn table_ref(i: Input) -> IResult<TableRef> {
242256
table,
243257
with_options,
244258
},
245-
)(i)
259+
)
260+
.parse(i)
246261
}
247262

248263
pub fn set_type(i: Input) -> IResult<SetType> {
@@ -259,7 +274,8 @@ pub fn set_type(i: Input) -> IResult<SetType> {
259274
},
260275
None => SetType::SettingsSession,
261276
},
262-
)(i)
277+
)
278+
.parse(i)
263279
}
264280

265281
pub fn table_reference_only(i: Input) -> IResult<TableReference> {
@@ -279,7 +295,8 @@ pub fn table_reference_only(i: Input) -> IResult<TableReference> {
279295
unpivot: None,
280296
sample: None,
281297
},
282-
)(i)
298+
)
299+
.parse(i)
283300
}
284301

285302
pub fn column_reference_only(i: Input) -> IResult<(TableReference, Identifier)> {
@@ -304,40 +321,51 @@ pub fn column_reference_only(i: Input) -> IResult<(TableReference, Identifier)>
304321
column,
305322
)
306323
},
307-
)(i)
324+
)
325+
.parse(i)
308326
}
309327

310328
pub fn column_id(i: Input) -> IResult<ColumnID> {
311-
alt((
312-
map_res(rule! { ColumnPosition }, |token| {
313-
let name = token.text().to_string();
314-
let pos = name[1..]
315-
.parse::<usize>()
316-
.map_err(|e| nom::Err::Failure(e.into()))?;
317-
if pos == 0 {
318-
return Err(nom::Err::Failure(ErrorKind::Other(
319-
"column position must be greater than 0",
320-
)));
321-
}
322-
Ok(ColumnID::Position(crate::ast::ColumnPosition {
323-
pos,
324-
name,
325-
span: Some(token.span),
326-
}))
327-
}),
328-
// ROW could be a column name for compatibility
329-
map_res(rule! {ROW}, |token| {
330-
Ok(ColumnID::Name(Identifier::from_name(
331-
transform_span(&[token.clone()]),
332-
"row",
333-
)))
334-
}),
335-
map_res(rule! { #ident }, |ident| Ok(ColumnID::Name(ident))),
336-
))(i)
329+
alt((column_position, column_row, column_ident)).parse(i)
330+
}
331+
332+
pub fn column_position(i: Input) -> IResult<ColumnID> {
333+
map_res(rule! { ColumnPosition }, |token| {
334+
let name = token.text().to_string();
335+
let pos = name[1..]
336+
.parse::<usize>()
337+
.map_err(|e| nom::Err::Failure(e.into()))?;
338+
if pos == 0 {
339+
return Err(nom::Err::Failure(ErrorKind::Other(
340+
"column position must be greater than 0",
341+
)));
342+
}
343+
Ok(ColumnID::Position(crate::ast::ColumnPosition {
344+
pos,
345+
name,
346+
span: Some(token.span),
347+
}))
348+
})
349+
.parse(i)
350+
}
351+
352+
pub fn column_row(i: Input) -> IResult<ColumnID> {
353+
// ROW could be a column name for compatibility
354+
map_res(rule! {ROW}, |token| {
355+
Ok(ColumnID::Name(Identifier::from_name(
356+
transform_span(&[token.clone()]),
357+
"row",
358+
)))
359+
})
360+
.parse(i)
361+
}
362+
363+
pub fn column_ident(i: Input) -> IResult<ColumnID> {
364+
map_res(rule! { #ident }, |ident| Ok(ColumnID::Name(ident))).parse(i)
337365
}
338366

339367
pub fn variable_ident(i: Input) -> IResult<String> {
340-
map(rule! { IdentVariable }, |t| t.text()[1..].to_string())(i)
368+
map(rule! { IdentVariable }, |t| t.text()[1..].to_string()).parse(i)
341369
}
342370

343371
/// Parse one to two idents separated by a dot, fulfilling from the right.
@@ -352,7 +380,8 @@ pub fn dot_separated_idents_1_to_2(i: Input) -> IResult<(Option<Identifier>, Ide
352380
(ident1, None) => (None, ident1),
353381
(ident0, Some((_, ident1))) => (Some(ident0), ident1),
354382
},
355-
)(i)
383+
)
384+
.parse(i)
356385
}
357386

358387
/// Parse one to three idents separated by a dot, fulfilling from the right.
@@ -371,7 +400,8 @@ pub fn dot_separated_idents_1_to_3(
371400
(ident1, Some((_, ident2, None))) => (None, Some(ident1), ident2),
372401
(ident0, Some((_, ident1, Some((_, ident2))))) => (Some(ident0), Some(ident1), ident2),
373402
},
374-
)(i)
403+
)
404+
.parse(i)
375405
}
376406

377407
/// Parse two to four idents separated by a dot, fulfilling from the right.
@@ -396,36 +426,37 @@ pub fn dot_separated_idents_2_to_4(
396426
(Some(ident0), Some(ident1), ident2, ident3)
397427
}
398428
},
399-
)(i)
429+
)
430+
.parse(i)
400431
}
401432

402433
pub fn comma_separated_list0<'a, T>(
403-
item: impl FnMut(Input<'a>) -> IResult<'a, T>,
434+
item: impl nom::Parser<Input<'a>, Output = T, Error = Error<'a>>,
404435
) -> impl FnMut(Input<'a>) -> IResult<'a, Vec<T>> {
405436
separated_list0(match_text(","), item)
406437
}
407438

408439
pub fn comma_separated_list0_ignore_trailing<'a, T>(
409-
item: impl FnMut(Input<'a>) -> IResult<'a, T>,
410-
) -> impl FnMut(Input<'a>) -> IResult<'a, Vec<T>> {
440+
item: impl nom::Parser<Input<'a>, Output = T, Error = Error<'a>>,
441+
) -> impl nom::Parser<Input<'a>, Output = Vec<T>, Error = Error<'a>> {
411442
nom::multi::separated_list0(match_text(","), item)
412443
}
413444

414445
pub fn comma_separated_list1_ignore_trailing<'a, T>(
415-
item: impl FnMut(Input<'a>) -> IResult<'a, T>,
416-
) -> impl FnMut(Input<'a>) -> IResult<'a, Vec<T>> {
446+
item: impl nom::Parser<Input<'a>, Output = T, Error = Error<'a>>,
447+
) -> impl nom::Parser<Input<'a>, Output = Vec<T>, Error = Error<'a>> {
417448
nom::multi::separated_list1(match_text(","), item)
418449
}
419450

420451
pub fn semicolon_terminated_list1<'a, T>(
421-
item: impl FnMut(Input<'a>) -> IResult<'a, T>,
422-
) -> impl FnMut(Input<'a>) -> IResult<'a, Vec<T>> {
452+
item: impl nom::Parser<Input<'a>, Output = T, Error = Error<'a>>,
453+
) -> impl nom::Parser<Input<'a>, Output = Vec<T>, Error = Error<'a>> {
423454
many1(terminated(item, match_text(";")))
424455
}
425456

426457
pub fn comma_separated_list1<'a, T>(
427-
item: impl FnMut(Input<'a>) -> IResult<'a, T>,
428-
) -> impl FnMut(Input<'a>) -> IResult<'a, Vec<T>> {
458+
item: impl nom::Parser<Input<'a>, Output = T, Error = Error<'a>>,
459+
) -> impl nom::Parser<Input<'a>, Output = Vec<T>, Error = Error<'a>> {
429460
separated_list1(match_text(","), item)
430461
}
431462

@@ -437,9 +468,9 @@ pub fn separated_list0<I, O, O2, E, F, G>(
437468
mut f: F,
438469
) -> impl FnMut(I) -> nom::IResult<I, Vec<O>, E>
439470
where
440-
I: Clone + nom::InputLength,
441-
F: nom::Parser<I, O, E>,
442-
G: nom::Parser<I, O2, E>,
471+
I: Clone + nom::Input,
472+
F: nom::Parser<I, Output = O, Error = E>,
473+
G: nom::Parser<I, Output = O2, Error = E>,
443474
E: nom::error::ParseError<I>,
444475
{
445476
move |mut i: I| {
@@ -487,9 +518,9 @@ pub fn separated_list1<I, O, O2, E, F, G>(
487518
mut f: F,
488519
) -> impl FnMut(I) -> nom::IResult<I, Vec<O>, E>
489520
where
490-
I: Clone + nom::InputLength,
491-
F: nom::Parser<I, O, E>,
492-
G: nom::Parser<I, O2, E>,
521+
I: Clone + nom::Input,
522+
F: nom::Parser<I, Output = O, Error = E>,
523+
G: nom::Parser<I, Output = O2, Error = E>,
493524
E: nom::error::ParseError<I>,
494525
{
495526
move |mut i: I| {
@@ -537,7 +568,7 @@ pub fn map_res<'a, O1, O2, F, G>(
537568
mut f: G,
538569
) -> impl FnMut(Input<'a>) -> IResult<'a, O2>
539570
where
540-
F: nom::Parser<Input<'a>, O1, Error<'a>>,
571+
F: nom::Parser<Input<'a>, Output = O1, Error = Error<'a>>,
541572
G: FnMut(O1) -> Result<O2, nom::Err<ErrorKind>>,
542573
{
543574
move |input: Input| {
@@ -565,7 +596,7 @@ pub fn error_hint<'a, O, F>(
565596
message: &'static str,
566597
) -> impl FnMut(Input<'a>) -> IResult<'a, ()>
567598
where
568-
F: nom::Parser<Input<'a>, O, Error<'a>>,
599+
F: nom::Parser<Input<'a>, Output = O, Error = Error<'a>>,
569600
{
570601
move |input: Input| match match_error.parse(input) {
571602
Ok(_) => Err(nom::Err::Error(Error::from_error_kind(
@@ -682,7 +713,7 @@ where
682713
}
683714

684715
pub fn check_template_mode<'a, O, F>(mut parser: F) -> impl FnMut(Input<'a>) -> IResult<'a, O>
685-
where F: nom::Parser<Input<'a>, O, Error<'a>> {
716+
where F: nom::Parser<Input<'a>, Output = O, Error = Error<'a>> {
686717
move |input: Input| {
687718
parser.parse(input).and_then(|(i, res)| {
688719
if input.mode.is_template() {
@@ -715,7 +746,7 @@ macro_rules! declare_experimental_feature {
715746
mut parser: F,
716747
) -> impl FnMut(Input<'a>) -> IResult<'a, O>
717748
where
718-
F: nom::Parser<Input<'a>, O, Error<'a>>,
749+
F: nom::Parser<Input<'a>, Output = O, Error = Error<'a>>,
719750
{
720751
move |input: Input| {
721752
parser.parse(input).and_then(|(i, res)| {

0 commit comments

Comments
ย (0)