use std::borrow::Cow; use super::ast::*; use winnow::{Parser, combinator::trace, error::ParserError}; impl<'a> AnyString<'a> { fn parse>() -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*, token::*}; let quote = alt(( '`'.value(QuoteType::Backtick), '\''.value(QuoteType::Single), '\"'.value(QuoteType::Double), )); macro_rules! surrounding { () => { take_while(0.., |b: char| { !b.is_whitespace() && b.is_alphabetic() && !['\'', '"', '`', '#'].contains(&b) }) }; } let preamble = ( surrounding!(), take_while(0.., |c| c == '#').map(|i: &'a str| i.len()), quote, ); trace( "string", preamble.flat_map( |(prefix, num_hashtags, quote): (&'a str, usize, QuoteType)| { let end = ( match quote { QuoteType::Single => '\'', QuoteType::Double => '\"', QuoteType::Backtick => '`', }, repeat::<_, _, Cow<'a, str>, _, _>( num_hashtags..=num_hashtags, literal("#"), ), ); let contents = repeat_till(0.., any, end).map(|(contents, _)| contents); (contents, surrounding!()).map( move |(contents, suffix): (Cow<'a, str>, &'a str)| Self { prefix: prefix.into(), ty: quote, contents, num_hashtags, suffix: suffix.into(), }, ) }, ), ) } } impl<'a> Space<'a> { fn parse>() -> impl Parser<&'a str, Self, E> { use winnow::{prelude::*, token::*}; trace( "space", take_while(0.., |b: char| b.is_whitespace()).map(|i: &'a str| Self(i.into())), ) } } impl PathSep { fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*}; trace( "pathsep", alt(('/'.value(Self::Slash), '\\'.value(Self::Backslash))), ) } } impl Separator { fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*, token::*}; trace( "separator", alt(( "::".value(Self::DoubleColon), (literal('=')).value(Self::Eq), (literal(':')).value(Self::Colon), )), ) } } impl<'a> FileLocation<'a> { fn parse>() -> impl Parser<&'a str, Self, E> { use winnow::{ascii::dec_uint, combinator::*, prelude::*}; let colon_number = || { (":".value(()), dec_uint::<_, u64, _>.take()) .map(|(_, number): (_, &str)| Cow::Borrowed(number)) }; let line_offset = (colon_number(), opt(colon_number())); trace( "file location", line_offset.map(|(line, offset)| FileLocation { line, offset }), ) } } impl<'a> FileName<'a> { fn parse(segment: PathSegment<'a>, location: Option>) -> Self { fn rsplit<'a>( input: Cow<'a, str>, delimiter: char, ) -> Option<(Cow<'a, str>, Cow<'a, str>)> { match input { Cow::Borrowed(s) => s .rsplit_once(delimiter) .map(|(a, b)| (Cow::Borrowed(a), Cow::Borrowed(b))), Cow::Owned(s) => s .rsplit_once(delimiter) .map(|(a, b)| (Cow::Owned(a.to_string()), Cow::Owned(b.to_string()))), } } let (new_segment, ext_excluding_dot) = if let Some((segment, ext_excluding_dot)) = rsplit(segment.segment.clone(), '.') && !ext_excluding_dot.is_empty() { (segment, Some(ext_excluding_dot)) } else { (segment.segment, None) }; Self { leading_separator: segment.leading_separator, segment: new_segment, ext_excluding_dot, location, } } } impl<'a> Path<'a> { fn parse>() -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*, token::*}; let terminator = || { alt(( eof.value(()), any::<&'a str, E> .verify(|i: &char| { (*i).is_whitespace() || !(i.is_alphanumeric() || ['_', '-', '\"', '\'', '.', '/', '\\'].contains(i)) }) .value(()), )) }; let terminator_or_sep = || alt((PathSep::parse().value(()), terminator())); let till_next_sep = || { trace( "till next sep", repeat_till(0.., any::<&'a str, E>, peek(terminator_or_sep())) .map(|(segment, _)| segment), ) }; let sep_and_next = (PathSep::parse(), till_next_sep()).map(|(leading_separator, segment)| PathSegment { leading_separator, segment, }); let opt_sep_and_next = (opt(PathSep::parse()), till_next_sep()).map(|(leading_separator, segment)| { PathSegment { leading_separator: leading_separator.unwrap_or(PathSep::None), segment, } }); let drive = opt(( any::<&'a str, E>.verify(|x: &char| x.is_ascii_alphabetic()), ':', )) .map(|i| i.map(|(letter, _): (char, char)| letter)); let drive_and_segments = ( drive, opt_sep_and_next, repeat_till(0.., sep_and_next, peek(terminator())) .map(|(segments, _): (Vec, _)| segments), opt(FileLocation::parse()), ); trace( "path", drive_and_segments .map(|(drive, segment, segments, location)| { let (segments, last) = { let mut segments = segments; segments.insert(0, segment); let last = segments.pop().unwrap(); (segments, last) }; let filename = FileName::parse(last, location); Self { drive_excluding_colon: drive, segments, filename, } }) .verify(|i| { // just ".." isn't valid if i.segments.is_empty() && (i.filename.segment == ".." || i.filename.segment == ".") { return false; } !i.segments.is_empty() || i.filename.ext_excluding_dot.is_some() || !matches!(i.filename.leading_separator, PathSep::None) }), ) } } impl<'a> Atom<'a> { fn parse, T: 'a, P: Parser<&'a str, T, E>>( terminated_by: impl (Fn() -> P) + 'a, ) -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*, token::*}; let text = ( not(peek(terminated_by())), repeat_till::<_, _, Cow<'a, str>, _, _, _, _>( 1.., any::<&str, _>, peek(alt(( terminated_by().value(()), any::<&str, _>.verify(|i| i.is_whitespace()).value(()), eof::<&str, _>.value(()), ))), ) .map(|(i, _)| Self::Text(i)), ) .map(|(_, x)| x); trace("atom", alt((text,))) } } impl<'a> Token<'a> { fn parse_without_separator + 'a>() -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*, token::*}; let delimited: Box> = Box::new(Delimited::parse().map(Self::Delimited)); let lifetime = ( "'", alt(( repeat::<_, _, Cow<'a, str>, _, _>( 1..=3, any.verify(|i: &char| i.is_ascii_lowercase() || *i == '_'), ) .take(), "{erased}", )), peek(alt(( any::<&str, _> .verify(|i: &char| !i.is_alphabetic() && *i != '\'') .value(()), eof::<&str, _>.value(()), ))), ) .map(|(_, lifetime, _)| Token::Lifetime(lifetime.into())); trace( "token-without-sep", alt(( "true".value(Self::True), "false".value(Self::False), "None".value(Self::None), Number::parse().map(Self::Number), Path::parse().map(Self::Path), lifetime, AnyString::parse().map(Self::String), delimited, Atom::parse(|| { alt(( Separator::parse().value(""), alt((")", "]", "}", ">")), alt(("(", "[", "{", "<")), alt(("`", "'", "\"")), )) }) .map(Self::Atom), )), ) } fn parse + 'a>() -> Box + 'a> { use winnow::{combinator::*, prelude::*}; let before = Self::parse_without_separator(); Box::new(trace( "token", alt(( ( before, opt( (Space::parse(), Separator::parse()).flat_map(|(space, sep)| { let box_dyn_segment: Box> = Box::new(Segment::parse()); box_dyn_segment.map(move |segment| (space.clone(), sep, segment)) }), ), ) .map(|(before, trailer)| { if let Some((space_before, separator, after)) = trailer { Token::Separated { before: Box::new(before), space_before, separator, after: Box::new(after), } } else { before } }), Atom::parse(|| fail::<_, (), _>).map(Self::Atom), )), )) } } impl<'a> Delimited<'a> { fn parse + 'a>() -> impl Parser<&'a str, Self, E> { use winnow::{combinator::*, prelude::*, token::*}; trace( "delimited", ( opt(( Atom::parse(|| { alt(( Separator::parse().value(""), alt((")", "]", "}", ">")), alt(("(", "[", "{", "<")), alt(("`", "'", "\"")), )) }), Space::parse(), )), alt(( literal('(').map(|_| literal(')').value(Delimiter::Paren)), literal('[').map(|_| literal(']').value(Delimiter::Bracket)), literal('{').map(|_| literal('}').value(Delimiter::Brace)), literal('<').map(|_| literal('>').value(Delimiter::Angle)), )) .flat_map(|end| Segments::parse(end)), ) .map(|(prefix, (contents, delimiter))| Self { prefix, delimiter, contents, }), ) } } impl<'a> Segments<'a> { fn parse + 'a, End: 'a>( end: impl Parser<&'a str, End, E> + 'a, ) -> Box + 'a> { use winnow::{combinator::*, prelude::*}; Box::new(trace( "segments", repeat_till(0.., Segment::parse(), (Space::parse(), end)).map( |(segments, (trailing_space, end)): (Vec<_>, _)| { ( Self { segments, trailing_space, }, end, ) }, ), )) } } impl<'a> Number<'a> { fn parse>() -> impl Parser<&'a str, Self, E> { use winnow::{ascii::*, combinator::*, prelude::*, token::*}; trace( "number", ( alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())), opt(( "_", repeat::<_, _, Cow<'a, str>, _, _>( 1.., any::<&'a str, E>.verify(|x| x.is_alphanumeric()), ) .take(), ) .map(|(_, suffix): (_, &str)| suffix)), peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))), ) .map(|(number, suffix, _): (&str, _, _)| Self { number: number.into(), suffix_without_underscore: suffix.map(Into::into), }), ) } } impl<'a> Segment<'a> { fn parse + 'a>() -> impl Parser<&'a str, Self, E> { use winnow::prelude::*; trace( "segment", (Space::parse(), Token::parse()).map(|(leading_space, token)| Self { leading_space, token, }), ) } } /// Parses an input string (a log line) into an ast. /// /// This *should* never error. /// Many tests ensure that arbitrary input can be parsed. /// Even if non-structured or completely random. /// The parser will gracefully accept such strings anyway, and just categorize them suboptimally. pub fn parse_input<'a>(i: &'a str) -> Result, String> { use winnow::combinator::eof; Segments::parse(eof::<&str, winnow::error::EmptyError>) .map(|(segments, _)| segments) .parse(i) .map_err(|e| e.to_string()) } #[cfg(test)] mod tests { use insta::assert_debug_snapshot; use winnow::Parser; use crate::{ ast::{Path, Segments}, parse_input, }; fn parse_path_only<'a>(i: &'a str) -> Path<'a> { Path::parse::().parse(i).unwrap() } fn parse<'a>(input: &'a str) -> Segments<'a> { parse_input(input).unwrap() } #[test] fn parse_twodots() { assert_debug_snapshot!(parse(r#".."#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: Atom( Text( "..", ), ), }, ], trailing_space: Space( "", ), } "#); } #[test] fn parse_dot() { assert_debug_snapshot!(parse(r#"."#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: Atom( Text( ".", ), ), }, ], trailing_space: Space( "", ), } "#); } #[test] fn parse_parent() { assert_debug_snapshot!(parse_path_only(r#"../foo.rs"#), @r#" Path { drive_excluding_colon: None, segments: [ PathSegment { leading_separator: None, segment: "..", }, ], filename: FileName { leading_separator: Slash, segment: "foo", ext_excluding_dot: Some( "rs", ), location: None, }, } "#); } #[test] fn parse_cwd() { assert_debug_snapshot!(parse_path_only(r#"./foo.rs"#), @r#" Path { drive_excluding_colon: None, segments: [ PathSegment { leading_separator: None, segment: ".", }, ], filename: FileName { leading_separator: Slash, segment: "foo", ext_excluding_dot: Some( "rs", ), location: None, }, } "#); } #[test] fn parse_cwd_in_path() { assert_debug_snapshot!(parse_path_only(r#"foo/./foo.rs"#), @r#" Path { drive_excluding_colon: None, segments: [ PathSegment { leading_separator: None, segment: "foo", }, PathSegment { leading_separator: Slash, segment: ".", }, ], filename: FileName { leading_separator: Slash, segment: "foo", ext_excluding_dot: Some( "rs", ), location: None, }, } "#); } #[test] fn parse_parent_in_path() { assert_debug_snapshot!(parse_path_only(r#"foo/../foo.rs"#), @r#" Path { drive_excluding_colon: None, segments: [ PathSegment { leading_separator: None, segment: "foo", }, PathSegment { leading_separator: Slash, segment: "..", }, ], filename: FileName { leading_separator: Slash, segment: "foo", ext_excluding_dot: Some( "rs", ), location: None, }, } "#); } #[test] fn parse_path() { assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs"#), @r#" Path { drive_excluding_colon: None, segments: [ PathSegment { leading_separator: None, segment: "tests", }, PathSegment { leading_separator: Slash, segment: "ui", }, PathSegment { leading_separator: Slash, segment: "impl-trait", }, ], filename: FileName { leading_separator: Slash, segment: "unsized_coercion", ext_excluding_dot: Some( "rs", ), location: None, }, } "#); } #[test] fn parse_path_with_file_line() { assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs:3:4"#), @r#" Path { drive_excluding_colon: None, segments: [ PathSegment { leading_separator: None, segment: "tests", }, PathSegment { leading_separator: Slash, segment: "ui", }, PathSegment { leading_separator: Slash, segment: "impl-trait", }, ], filename: FileName { leading_separator: Slash, segment: "unsized_coercion", ext_excluding_dot: Some( "rs", ), location: Some( FileLocation { line: "3", offset: Some( "4", ), }, ), }, } "#); } #[test] fn parse_empty() { assert_debug_snapshot!(parse(r#""#), @r#" Segments { segments: [], trailing_space: Space( "", ), } "#) } #[test] fn parse_delimited_separated() { assert_debug_snapshot!(parse(r#"a = Struct { b = 3 }"#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "a", ), ), space_before: Space( " ", ), separator: Eq, after: Segment { leading_space: Space( " ", ), token: Delimited( Delimited { prefix: Some( ( Text( "Struct", ), Space( " ", ), ), ), delimiter: Brace, contents: Segments { segments: [ Segment { leading_space: Space( " ", ), token: Separated { before: Atom( Text( "b", ), ), space_before: Space( " ", ), separator: Eq, after: Segment { leading_space: Space( " ", ), token: Number( Number { number: "3", suffix_without_underscore: None, }, ), }, }, }, ], trailing_space: Space( " ", ), }, }, ), }, }, }, ], trailing_space: Space( "", ), } "#) } #[test] fn parse_text() { assert_debug_snapshot!(parse(r#"abc"#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: Atom( Text( "abc", ), ), }, ], trailing_space: Space( "", ), } "#) } #[test] fn parse_boolean() { assert_debug_snapshot!(parse(r#"true"#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: True, }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r#"false"#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: False, }, ], trailing_space: Space( "", ), } "#); } #[test] fn parse_string() { assert_debug_snapshot!(parse(r##""foo""##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "", ty: Double, contents: "foo", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"#"foo"#"##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "", ty: Double, contents: "foo", num_hashtags: 1, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"r#"foo"#"##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "r", ty: Double, contents: "foo", num_hashtags: 1, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"c"foo""##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "c", ty: Double, contents: "foo", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"b"foo""##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "b", ty: Double, contents: "foo", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"'a'"##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "", ty: Single, contents: "a", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"`b`"##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "", ty: Backtick, contents: "b", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"b'foo'"##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "b", ty: Single, contents: "foo", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); assert_debug_snapshot!(parse(r##"b`foo`"##), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: String( AnyString { prefix: "b", ty: Backtick, contents: "foo", num_hashtags: 0, suffix: "", }, ), }, ], trailing_space: Space( "", ), } "#); } #[test] fn parse_ex1() { assert_debug_snapshot!(parse(r#"a::>(a = 3_usize, b = 3_usize)"#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "a", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: None, delimiter: Angle, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "b", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "c", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: Some( ( Text( "Generalizer", ), Space( "", ), ), ), delimiter: Angle, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Lifetime( "_", ), }, Segment { leading_space: Space( "", ), token: Atom( Text( ",", ), ), }, Segment { leading_space: Space( " ", ), token: Lifetime( "_", ), }, ], trailing_space: Space( "", ), }, }, ), }, }, }, }, }, ], trailing_space: Space( "", ), }, }, ), }, }, }, Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "a", ), ), space_before: Space( " ", ), separator: Eq, after: Segment { leading_space: Space( " ", ), token: Number( Number { number: "3", suffix_without_underscore: Some( "usize", ), }, ), }, }, }, Segment { leading_space: Space( "", ), token: Atom( Text( ",", ), ), }, Segment { leading_space: Space( " ", ), token: Separated { before: Atom( Text( "b", ), ), space_before: Space( " ", ), separator: Eq, after: Segment { leading_space: Space( " ", ), token: Number( Number { number: "3", suffix_without_underscore: Some( "usize", ), }, ), }, }, }, ], trailing_space: Space( "", ), }, }, ), }, ], trailing_space: Space( "", ), } "#); } #[test] fn parse_ex2() { assert_debug_snapshot!(parse(r#"super_combine_consts::, rustc_middle::ty::context::TyCtxt<'_>>>(?1c, ?2c)"#), @r#" Segments { segments: [ Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "super_combine_consts", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: None, delimiter: Angle, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "rustc_type_ir", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "relate", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "solver_relating", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: Some( ( Text( "SolverRelating", ), Space( "", ), ), ), delimiter: Angle, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Lifetime( "_", ), }, Segment { leading_space: Space( "", ), token: Atom( Text( ",", ), ), }, Segment { leading_space: Space( " ", ), token: Separated { before: Atom( Text( "rustc_infer", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "infer", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: Some( ( Text( "InferCtxt", ), Space( "", ), ), ), delimiter: Angle, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Lifetime( "_", ), }, ], trailing_space: Space( "", ), }, }, ), }, }, }, }, }, Segment { leading_space: Space( "", ), token: Atom( Text( ",", ), ), }, Segment { leading_space: Space( " ", ), token: Separated { before: Atom( Text( "rustc_middle", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "ty", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Separated { before: Atom( Text( "context", ), ), space_before: Space( "", ), separator: DoubleColon, after: Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: Some( ( Text( "TyCtxt", ), Space( "", ), ), ), delimiter: Angle, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Lifetime( "_", ), }, ], trailing_space: Space( "", ), }, }, ), }, }, }, }, }, }, }, ], trailing_space: Space( "", ), }, }, ), }, }, }, }, }, }, }, ], trailing_space: Space( "", ), }, }, ), }, }, }, Segment { leading_space: Space( "", ), token: Delimited( Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [ Segment { leading_space: Space( "", ), token: Atom( Text( "?1c,", ), ), }, Segment { leading_space: Space( " ", ), token: Atom( Text( "?2c", ), ), }, ], trailing_space: Space( "", ), }, }, ), }, ], trailing_space: Space( "", ), } "#) } }