This commit is contained in:
Jana Dönszelmann 2026-04-02 08:13:29 +02:00
parent 9f401bda53
commit bb8fa818d2
No known key found for this signature in database
9 changed files with 1322 additions and 78 deletions

View file

@ -7,15 +7,24 @@ impl<'a> AnyString<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
// let (prefix, num_hashtags, quote) =
let quote = alt((
'`'.value(QuoteType::Backtick),
'\''.value(QuoteType::Single),
'\"'.value(QuoteType::Double),
));
macro_rules! surrounding {
() => {
take_while(0.., |b: char| {
!b.is_whitespace() && b.is_alphabetic() && !['\'', '"', '`', '#'].contains(&b)
})
};
}
let preamble = (
take_while(0.., |b: char| !b.is_whitespace()),
surrounding!(),
take_while(0.., |c| c == '#').map(|i: &'a str| i.len()),
alt((
'`'.value(QuoteType::Backtick),
'\''.value(QuoteType::Single),
'\"'.value(QuoteType::Double),
)),
quote,
);
trace(
@ -35,17 +44,16 @@ impl<'a> AnyString<'a> {
);
let contents = repeat_till(0.., any, end).map(|(contents, _)| contents);
let suffix = take_while(0.., |b: char| !b.is_whitespace());
(contents, suffix).map(move |(contents, suffix): (Cow<'a, str>, &'a str)| {
Self {
(contents, surrounding!()).map(
move |(contents, suffix): (Cow<'a, str>, &'a str)| Self {
prefix: prefix.into(),
ty: quote,
contents,
num_hashtags,
suffix: suffix.into(),
}
})
},
)
},
),
)
@ -75,14 +83,15 @@ impl PathSep {
}
impl Separator {
fn parser<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
trace(
"separator",
alt((
literal('=').value(Self::Eq),
literal(':').value(Self::Colon),
"::".value(Self::DoubleColon),
(literal('=')).value(Self::Eq),
(literal(':')).value(Self::Colon),
)),
)
}
@ -147,95 +156,159 @@ impl<'a> Path<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
let till_next_sep = repeat_till(
0..,
any::<&'a str, E>.verify(|i: &char| !(*i).is_whitespace()),
peek(alt((
PathSep::parse().value(()),
let terminator = || {
alt((
eof.value(()),
any::<&'a str, E>
.verify(|i: &char| (*i).is_whitespace())
.verify(|i: &char| {
(*i).is_whitespace()
|| !(i.is_alphanumeric()
|| ['_', '-', '\"', '\'', '.', '/', '\\'].contains(i))
})
.value(()),
))),
)
.map(|(segment, _)| segment);
))
};
let terminator_or_sep = || alt((PathSep::parse().value(()), terminator()));
let till_next_sep = || {
trace(
"till next sep",
repeat_till(0.., any::<&'a str, E>, peek(terminator_or_sep()))
.map(|(segment, _)| segment),
)
};
let sep_and_next =
(PathSep::parse(), till_next_sep).map(|(leading_separator, segment)| PathSegment {
(PathSep::parse(), till_next_sep()).map(|(leading_separator, segment)| PathSegment {
leading_separator,
segment,
});
let opt_sep_and_next =
(opt(PathSep::parse()), till_next_sep()).map(|(leading_separator, segment)| {
PathSegment {
leading_separator: leading_separator.unwrap_or(PathSep::None),
segment,
}
});
let drive = opt((
any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z')),
any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')),
':',
))
.map(|i| i.map(|(letter, _): (char, char)| letter));
let drive_and_segments = (
drive,
repeat_till(
1..,
sep_and_next,
peek(any.verify(|i: &char| (*i).is_whitespace())),
)
.map(|(segments, _): (Vec<PathSegment>, _)| {
let (rest, last) = {
let mut segments = segments;
let last = segments.pop().unwrap();
(segments, last)
};
(rest, FileName::parse(last))
}),
opt_sep_and_next,
repeat_till(0.., sep_and_next, peek(terminator()))
.map(|(segments, _): (Vec<PathSegment>, _)| segments),
);
trace(
"path",
drive_and_segments.map(|(drive, (segments, filename))| Self {
drive_excluding_colon: drive,
segments,
filename,
}),
drive_and_segments
.map(|(drive, segment, segments)| {
let (segments, last) = {
let mut segments = segments;
segments.insert(0, segment);
let last = segments.pop().unwrap();
(segments, last)
};
let filename = FileName::parse(last);
Self {
drive_excluding_colon: drive,
segments,
filename,
}
})
.verify(|i| {
!i.segments.is_empty()
|| i.drive_excluding_colon.is_some()
|| i.filename.ext_excluding_dot.is_some()
|| !matches!(i.filename.leading_separator, PathSep::None)
}),
)
}
}
impl<'a> Atom<'a> {
fn parse<E: ParserError<&'a str>>(
except_chars: &'static [char],
fn parse<E: ParserError<&'a str>, T: 'a>(
terminated_by: impl Parser<&'a str, T, E>,
) -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
trace(
"atom",
alt((repeat(
1..,
any.verify(move |i: &char| !(*i).is_whitespace() && !except_chars.contains(i)),
let text = repeat::<_, _, Cow<'a, str>, _, _>(
1..,
(
peek(not(terminated_by)),
any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()),
)
.map(Self::Text),)),
.map(|(_, i)| i),
)
.map(Self::Text);
trace("atom", alt((text,)))
}
}
impl<'a> Token<'a> {
fn parse<E: ParserError<&'a str> + 'a>() -> impl Parser<&'a str, Self, E> {
fn parse_without_separator<E: ParserError<&'a str> + 'a>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*};
let delimited: Box<dyn Parser<&'a str, Self, E>> =
Box::new(Delimited::parse().map(Self::Delimited));
trace(
"token",
"token-without-sep",
alt((
"true".value(Self::True),
"false".value(Self::False),
"None".value(Self::None),
Path::parse().map(Self::Path),
AnyString::parse().map(Self::String),
Number::parse().map(Self::Number),
AnyString::parse().map(Self::String),
delimited,
Atom::parse(&[]).map(Self::Atom),
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}"))).map(Self::Atom),
)),
)
}
fn parse<E: ParserError<&'a str> + 'a>() -> Box<dyn Parser<&'a str, Self, E> + 'a> {
use winnow::{combinator::*, prelude::*};
let before = Self::parse_without_separator();
Box::new(trace(
"token",
alt((
(
before,
opt(
(Space::parse(), Separator::parse()).flat_map(|(space, sep)| {
let box_dyn_segment: Box<dyn Parser<_, _, _>> =
Box::new(Segment::parse());
box_dyn_segment.map(move |segment| (space.clone(), sep, segment))
}),
),
)
.map(|(before, trailer)| {
if let Some((space_before, separator, after)) = trailer {
Token::Separated {
before: Box::new(before),
space_before,
separator,
after: Box::new(after),
}
} else {
before
}
}),
Atom::parse(fail::<_, (), _>).map(Self::Atom),
)),
))
}
}
impl<'a> Delimited<'a> {
@ -245,7 +318,12 @@ impl<'a> Delimited<'a> {
trace(
"delimited",
(
Atom::parse(&['(', '[', '{']),
opt(Atom::parse(alt((
"(",
"[",
"{",
Separator::parse().value(""),
)))),
alt((
literal('(').map(|_| literal(')').value(Delimiter::Paren)),
literal('[').map(|_| literal(']').value(Delimiter::Bracket)),
@ -264,11 +342,11 @@ impl<'a> Delimited<'a> {
impl<'a> Segments<'a> {
fn parse<E: ParserError<&'a str> + 'a, End: 'a>(
end: impl Parser<&'a str, End, E>,
) -> impl Parser<&'a str, (Self, End), E> {
end: impl Parser<&'a str, End, E> + 'a,
) -> Box<dyn Parser<&'a str, (Self, End), E> + 'a> {
use winnow::{combinator::*, prelude::*};
trace(
Box::new(trace(
"segments",
repeat_till(0.., Segment::parse(), (Space::parse(), end)).map(
|(segments, (trailing_space, end)): (Vec<_>, _)| {
@ -281,18 +359,21 @@ impl<'a> Segments<'a> {
)
},
),
)
))
}
}
impl<'a> Number<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{ascii::*, combinator::*, prelude::*};
use winnow::{ascii::*, combinator::*, prelude::*, token::*};
trace(
"number",
alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take()))
.map(|i: &str| Self(i.into())),
(
alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())),
peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))),
)
.map(|(i, _): (&str, _)| Self(i.into())),
)
}
}
@ -318,3 +399,371 @@ pub fn parse_input<'a>(i: &'a str) -> Result<Segments<'a>, String> {
.parse(i)
.map_err(|e| e.to_string())
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use winnow::Parser;
use crate::format_debug_output::{
ast::{Path, Segments},
parse_input,
};
fn parse_path_only<'a>(i: &'a str) -> Path<'a> {
Path::parse::<winnow::error::EmptyError>().parse(i).unwrap()
}
fn parse<'a>(input: &'a str) -> Segments<'a> {
parse_input(input).unwrap()
}
#[test]
fn parse_path() {
assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs"#), @r#"
Path {
drive_excluding_colon: None,
segments: [
PathSegment {
leading_separator: None,
segment: "tests",
},
PathSegment {
leading_separator: Slash,
segment: "ui",
},
PathSegment {
leading_separator: Slash,
segment: "impl-trait",
},
],
filename: FileName {
leading_separator: Slash,
segment: "unsized_coercion",
ext_excluding_dot: Some(
"rs",
),
location: None,
},
}
"#);
}
#[test]
fn parse_path_with_file_line() {
assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs:3:4"#), @r#"
Path {
drive_excluding_colon: None,
segments: [
PathSegment {
leading_separator: None,
segment: "tests",
},
PathSegment {
leading_separator: Slash,
segment: "ui",
},
PathSegment {
leading_separator: Slash,
segment: "impl-trait",
},
],
filename: FileName {
leading_separator: Slash,
segment: "unsized_coercion",
ext_excluding_dot: Some(
"rs",
),
location: Some(
FileLocation {
line: "3",
offset: Some(
"4",
),
},
),
},
}
"#);
}
#[test]
fn parse_empty() {
assert_debug_snapshot!(parse(r#""#), @r#"
Segments {
segments: [],
trailing_space: Space(
"",
),
}
"#)
}
#[test]
fn parse_text() {
assert_debug_snapshot!(parse(r#"abc"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
"abc",
),
),
},
],
trailing_space: Space(
"",
),
}
"#)
}
#[test]
fn parse_boolean() {
assert_debug_snapshot!(parse(r#"true"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: True,
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r#"false"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: False,
},
],
trailing_space: Space(
"",
),
}
"#);
}
#[test]
fn parse_string() {
assert_debug_snapshot!(parse(r##""foo""##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Double,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"#"foo"#"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Double,
contents: "foo",
num_hashtags: 1,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"r#"foo"#"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "r",
ty: Double,
contents: "foo",
num_hashtags: 1,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"c"foo""##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "c",
ty: Double,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"b"foo""##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "b",
ty: Double,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"'a'"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Single,
contents: "a",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"`b`"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Backtick,
contents: "b",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"b'foo'"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "b",
ty: Single,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"b`foo`"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "b",
ty: Backtick,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
}
}