factor out crate

This commit is contained in:
Jana Dönszelmann 2026-04-02 08:31:58 +02:00
parent bb8fa818d2
commit af09bcd403
No known key found for this signature in database
11 changed files with 43 additions and 22 deletions

769
logparse/src/parse.rs Normal file
View file

@ -0,0 +1,769 @@
use std::borrow::Cow;
use super::ast::*;
use winnow::{Parser, combinator::trace, error::ParserError};
impl<'a> AnyString<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
let quote = alt((
'`'.value(QuoteType::Backtick),
'\''.value(QuoteType::Single),
'\"'.value(QuoteType::Double),
));
macro_rules! surrounding {
() => {
take_while(0.., |b: char| {
!b.is_whitespace() && b.is_alphabetic() && !['\'', '"', '`', '#'].contains(&b)
})
};
}
let preamble = (
surrounding!(),
take_while(0.., |c| c == '#').map(|i: &'a str| i.len()),
quote,
);
trace(
"string",
preamble.flat_map(
|(prefix, num_hashtags, quote): (&'a str, usize, QuoteType)| {
let end = (
match quote {
QuoteType::Single => '\'',
QuoteType::Double => '\"',
QuoteType::Backtick => '`',
},
repeat::<_, _, Cow<'a, str>, _, _>(
num_hashtags..=num_hashtags,
literal("#"),
),
);
let contents = repeat_till(0.., any, end).map(|(contents, _)| contents);
(contents, surrounding!()).map(
move |(contents, suffix): (Cow<'a, str>, &'a str)| Self {
prefix: prefix.into(),
ty: quote,
contents,
num_hashtags,
suffix: suffix.into(),
},
)
},
),
)
}
}
impl<'a> Space<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{prelude::*, token::*};
trace(
"space",
take_while(0.., |b: char| b.is_whitespace()).map(|i: &'a str| Self(i.into())),
)
}
}
impl PathSep {
fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*};
trace(
"pathsep",
alt(('/'.value(Self::Slash), '\\'.value(Self::Backslash))),
)
}
}
impl Separator {
fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
trace(
"separator",
alt((
"::".value(Self::DoubleColon),
(literal('=')).value(Self::Eq),
(literal(':')).value(Self::Colon),
)),
)
}
}
impl<'a> FileName<'a> {
fn parse(segment: PathSegment<'a>) -> Self {
fn rsplit<'a>(
input: Cow<'a, str>,
delimiter: char,
) -> Option<(Cow<'a, str>, Cow<'a, str>)> {
match input {
Cow::Borrowed(s) => s
.rsplit_once(delimiter)
.map(|(a, b)| (Cow::Borrowed(a), Cow::Borrowed(b))),
Cow::Owned(s) => s
.rsplit_once(delimiter)
.map(|(a, b)| (Cow::Owned(a.to_string()), Cow::Owned(b.to_string()))),
}
}
let (rest, location) = if let Some((before, offset)) = rsplit(segment.segment.clone(), ':')
{
if let Some((before, line)) = rsplit(before.clone(), ':') {
(
before,
Some(FileLocation {
line,
offset: Some(offset),
}),
)
} else {
(
before,
Some(FileLocation {
line: offset,
offset: None,
}),
)
}
} else {
(segment.segment, None)
};
let (new_segment, ext_excluding_dot) =
if let Some((segment, ext_excluding_dot)) = rsplit(rest.clone(), '.') {
(segment, Some(ext_excluding_dot))
} else {
(rest, None)
};
Self {
leading_separator: segment.leading_separator,
segment: new_segment,
ext_excluding_dot,
location,
}
}
}
impl<'a> Path<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
let terminator = || {
alt((
eof.value(()),
any::<&'a str, E>
.verify(|i: &char| {
(*i).is_whitespace()
|| !(i.is_alphanumeric()
|| ['_', '-', '\"', '\'', '.', '/', '\\'].contains(i))
})
.value(()),
))
};
let terminator_or_sep = || alt((PathSep::parse().value(()), terminator()));
let till_next_sep = || {
trace(
"till next sep",
repeat_till(0.., any::<&'a str, E>, peek(terminator_or_sep()))
.map(|(segment, _)| segment),
)
};
let sep_and_next =
(PathSep::parse(), till_next_sep()).map(|(leading_separator, segment)| PathSegment {
leading_separator,
segment,
});
let opt_sep_and_next =
(opt(PathSep::parse()), till_next_sep()).map(|(leading_separator, segment)| {
PathSegment {
leading_separator: leading_separator.unwrap_or(PathSep::None),
segment,
}
});
let drive = opt((
any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')),
':',
))
.map(|i| i.map(|(letter, _): (char, char)| letter));
let drive_and_segments = (
drive,
opt_sep_and_next,
repeat_till(0.., sep_and_next, peek(terminator()))
.map(|(segments, _): (Vec<PathSegment>, _)| segments),
);
trace(
"path",
drive_and_segments
.map(|(drive, segment, segments)| {
let (segments, last) = {
let mut segments = segments;
segments.insert(0, segment);
let last = segments.pop().unwrap();
(segments, last)
};
let filename = FileName::parse(last);
Self {
drive_excluding_colon: drive,
segments,
filename,
}
})
.verify(|i| {
!i.segments.is_empty()
|| i.drive_excluding_colon.is_some()
|| i.filename.ext_excluding_dot.is_some()
|| !matches!(i.filename.leading_separator, PathSep::None)
}),
)
}
}
impl<'a> Atom<'a> {
fn parse<E: ParserError<&'a str>, T: 'a>(
terminated_by: impl Parser<&'a str, T, E>,
) -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
let text = repeat::<_, _, Cow<'a, str>, _, _>(
1..,
(
peek(not(terminated_by)),
any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()),
)
.map(|(_, i)| i),
)
.map(Self::Text);
trace("atom", alt((text,)))
}
}
impl<'a> Token<'a> {
fn parse_without_separator<E: ParserError<&'a str> + 'a>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*};
let delimited: Box<dyn Parser<&'a str, Self, E>> =
Box::new(Delimited::parse().map(Self::Delimited));
trace(
"token-without-sep",
alt((
"true".value(Self::True),
"false".value(Self::False),
"None".value(Self::None),
Path::parse().map(Self::Path),
Number::parse().map(Self::Number),
AnyString::parse().map(Self::String),
delimited,
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}"))).map(Self::Atom),
)),
)
}
fn parse<E: ParserError<&'a str> + 'a>() -> Box<dyn Parser<&'a str, Self, E> + 'a> {
use winnow::{combinator::*, prelude::*};
let before = Self::parse_without_separator();
Box::new(trace(
"token",
alt((
(
before,
opt(
(Space::parse(), Separator::parse()).flat_map(|(space, sep)| {
let box_dyn_segment: Box<dyn Parser<_, _, _>> =
Box::new(Segment::parse());
box_dyn_segment.map(move |segment| (space.clone(), sep, segment))
}),
),
)
.map(|(before, trailer)| {
if let Some((space_before, separator, after)) = trailer {
Token::Separated {
before: Box::new(before),
space_before,
separator,
after: Box::new(after),
}
} else {
before
}
}),
Atom::parse(fail::<_, (), _>).map(Self::Atom),
)),
))
}
}
impl<'a> Delimited<'a> {
fn parse<E: ParserError<&'a str> + 'a>() -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
trace(
"delimited",
(
opt(Atom::parse(alt((
"(",
"[",
"{",
Separator::parse().value(""),
)))),
alt((
literal('(').map(|_| literal(')').value(Delimiter::Paren)),
literal('[').map(|_| literal(']').value(Delimiter::Bracket)),
literal('{').map(|_| literal('}').value(Delimiter::Brace)),
))
.flat_map(|end| Segments::parse(end)),
)
.map(|(prefix, (contents, delimiter))| Self {
prefix,
delimiter,
contents,
}),
)
}
}
impl<'a> Segments<'a> {
fn parse<E: ParserError<&'a str> + 'a, End: 'a>(
end: impl Parser<&'a str, End, E> + 'a,
) -> Box<dyn Parser<&'a str, (Self, End), E> + 'a> {
use winnow::{combinator::*, prelude::*};
Box::new(trace(
"segments",
repeat_till(0.., Segment::parse(), (Space::parse(), end)).map(
|(segments, (trailing_space, end)): (Vec<_>, _)| {
(
Self {
segments,
trailing_space,
},
end,
)
},
),
))
}
}
impl<'a> Number<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{ascii::*, combinator::*, prelude::*, token::*};
trace(
"number",
(
alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())),
peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))),
)
.map(|(i, _): (&str, _)| Self(i.into())),
)
}
}
impl<'a> Segment<'a> {
fn parse<E: ParserError<&'a str> + 'a>() -> impl Parser<&'a str, Self, E> {
use winnow::prelude::*;
trace(
"segment",
(Space::parse(), Token::parse()).map(|(leading_space, token)| Self {
leading_space,
token,
}),
)
}
}
pub fn parse_input<'a>(i: &'a str) -> Result<Segments<'a>, String> {
use winnow::combinator::eof;
Segments::parse(eof::<&str, winnow::error::EmptyError>)
.map(|(segments, _)| segments)
.parse(i)
.map_err(|e| e.to_string())
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use winnow::Parser;
use crate::format_debug_output::{
ast::{Path, Segments},
parse_input,
};
fn parse_path_only<'a>(i: &'a str) -> Path<'a> {
Path::parse::<winnow::error::EmptyError>().parse(i).unwrap()
}
fn parse<'a>(input: &'a str) -> Segments<'a> {
parse_input(input).unwrap()
}
#[test]
fn parse_path() {
assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs"#), @r#"
Path {
drive_excluding_colon: None,
segments: [
PathSegment {
leading_separator: None,
segment: "tests",
},
PathSegment {
leading_separator: Slash,
segment: "ui",
},
PathSegment {
leading_separator: Slash,
segment: "impl-trait",
},
],
filename: FileName {
leading_separator: Slash,
segment: "unsized_coercion",
ext_excluding_dot: Some(
"rs",
),
location: None,
},
}
"#);
}
#[test]
fn parse_path_with_file_line() {
assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs:3:4"#), @r#"
Path {
drive_excluding_colon: None,
segments: [
PathSegment {
leading_separator: None,
segment: "tests",
},
PathSegment {
leading_separator: Slash,
segment: "ui",
},
PathSegment {
leading_separator: Slash,
segment: "impl-trait",
},
],
filename: FileName {
leading_separator: Slash,
segment: "unsized_coercion",
ext_excluding_dot: Some(
"rs",
),
location: Some(
FileLocation {
line: "3",
offset: Some(
"4",
),
},
),
},
}
"#);
}
#[test]
fn parse_empty() {
assert_debug_snapshot!(parse(r#""#), @r#"
Segments {
segments: [],
trailing_space: Space(
"",
),
}
"#)
}
#[test]
fn parse_text() {
assert_debug_snapshot!(parse(r#"abc"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
"abc",
),
),
},
],
trailing_space: Space(
"",
),
}
"#)
}
#[test]
fn parse_boolean() {
assert_debug_snapshot!(parse(r#"true"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: True,
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r#"false"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: False,
},
],
trailing_space: Space(
"",
),
}
"#);
}
#[test]
fn parse_string() {
assert_debug_snapshot!(parse(r##""foo""##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Double,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"#"foo"#"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Double,
contents: "foo",
num_hashtags: 1,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"r#"foo"#"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "r",
ty: Double,
contents: "foo",
num_hashtags: 1,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"c"foo""##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "c",
ty: Double,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"b"foo""##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "b",
ty: Double,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"'a'"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Single,
contents: "a",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"`b`"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Backtick,
contents: "b",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"b'foo'"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "b",
ty: Single,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
assert_debug_snapshot!(parse(r##"b`foo`"##), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "b",
ty: Backtick,
contents: "foo",
num_hashtags: 0,
suffix: "",
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
}
}