better parsing

This commit is contained in:
Jana Dönszelmann 2026-04-03 16:47:18 +02:00
parent bed1d5b38b
commit 53fc09c02f
No known key found for this signature in database
10 changed files with 416 additions and 45 deletions

View file

@ -189,7 +189,7 @@ impl<'a> Path<'a> {
});
let drive = opt((
any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')),
any::<&'a str, E>.verify(|x: &char| x.is_ascii_alphabetic()),
':',
))
.map(|i| i.map(|(letter, _): (char, char)| letter));
@ -237,20 +237,25 @@ impl<'a> Path<'a> {
}
impl<'a> Atom<'a> {
fn parse<E: ParserError<&'a str>, T: 'a>(
terminated_by: impl Parser<&'a str, T, E>,
fn parse<E: ParserError<&'a str>, T: 'a, P: Parser<&'a str, T, E>>(
terminated_by: impl (Fn() -> P) + 'a,
) -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
let text = repeat::<_, _, Cow<'a, str>, _, _>(
1..,
(
peek(not(terminated_by)),
any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()),
let text = (
not(peek(terminated_by())),
repeat_till::<_, _, Cow<'a, str>, _, _, _, _>(
1..,
any::<&str, _>,
peek(alt((
terminated_by().value(()),
any::<&str, _>.verify(|i| i.is_whitespace()).value(()),
eof::<&str, _>.value(()),
))),
)
.map(|(_, i)| i),
.map(|(i, _)| Self::Text(i)),
)
.map(Self::Text);
.map(|(_, x)| x);
trace("atom", alt((text,)))
}
@ -273,8 +278,15 @@ impl<'a> Token<'a> {
Path::parse().map(Self::Path),
AnyString::parse().map(Self::String),
delimited,
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}", ">")))
.map(Self::Atom),
Atom::parse(|| {
alt((
Separator::parse().value(""),
alt((")", "]", "}", ">")),
alt(("(", "[", "{", "<")),
alt(("`", "'", "\"")),
))
})
.map(Self::Atom),
)),
)
}
@ -309,7 +321,7 @@ impl<'a> Token<'a> {
before
}
}),
Atom::parse(fail::<_, (), _>).map(Self::Atom),
Atom::parse(|| fail::<_, (), _>).map(Self::Atom),
)),
))
}
@ -323,7 +335,14 @@ impl<'a> Delimited<'a> {
"delimited",
(
opt((
Atom::parse(alt(("(", "[", "{", "<", Separator::parse().value("")))),
Atom::parse(|| {
alt((
Separator::parse().value(""),
alt((")", "]", "}", ">")),
alt(("(", "[", "{", "<")),
alt(("`", "'", "\"")),
))
}),
Space::parse(),
)),
alt((
@ -374,9 +393,21 @@ impl<'a> Number<'a> {
"number",
(
alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())),
opt((
"_",
repeat::<_, _, Cow<'a, str>, _, _>(
1..,
any::<&'a str, E>.verify(|x| x.is_alphanumeric()),
)
.take(),
)
.map(|(_, suffix): (_, &str)| suffix)),
peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))),
)
.map(|(i, _): (&str, _)| Self(i.into())),
.map(|(number, suffix, _): (&str, _, _)| Self {
number: number.into(),
suffix_without_underscore: suffix.map(Into::into),
}),
)
}
}
@ -654,6 +685,89 @@ mod tests {
"#)
}
#[test]
fn parse_delimited_separated() {
assert_debug_snapshot!(parse(r#"a = Struct { b = 3 }"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Delimited(
Delimited {
prefix: Some(
(
Text(
"Struct",
),
Space(
" ",
),
),
),
delimiter: Brace,
contents: Segments {
segments: [
Segment {
leading_space: Space(
" ",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: None,
},
),
},
},
},
],
trailing_space: Space(
" ",
),
},
},
),
},
},
},
],
trailing_space: Space(
"",
),
}
"#)
}
#[test]
fn parse_text() {
assert_debug_snapshot!(parse(r#"abc"#), @r#"
@ -921,4 +1035,220 @@ mod tests {
}
"#);
}
#[test]
fn parse_ex1() {
assert_debug_snapshot!(parse(r#"a::<b::c::Generalizer<'_, '_>>(a = 3_usize, b = 3_usize)"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: None,
delimiter: Angle,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"c",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: Some(
(
Text(
"Generalizer",
),
Space(
"",
),
),
),
delimiter: Angle,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Single,
contents: "_, ",
num_hashtags: 0,
suffix: "",
},
),
},
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
"_",
),
),
},
],
trailing_space: Space(
"",
),
},
},
),
},
},
},
},
},
],
trailing_space: Space(
"",
),
},
},
),
},
},
},
Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: None,
delimiter: Paren,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: Some(
"usize",
),
},
),
},
},
},
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
",",
),
),
},
Segment {
leading_space: Space(
" ",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: Some(
"usize",
),
},
),
},
},
},
],
trailing_space: Space(
"",
),
},
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
}
}