better parsing

This commit is contained in:
Jana Dönszelmann 2026-04-03 16:47:18 +02:00
parent bed1d5b38b
commit 53fc09c02f
No known key found for this signature in database
10 changed files with 416 additions and 45 deletions

View file

@ -0,0 +1,9 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 8d278209bf1e44a21adb1c2c2930f04078cf7b3b5199f663320adfae874257cc # shrinks to original = "¡"
cc 692ed6d9acb3a9744c4315d6ca58ee887c49c1a06e41772eeda09f94beda02a4 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Path(Path { drive_excluding_colon: None, segments: [PathSegment { leading_separator: None, segment: "\u{16af0}" }], filename: FileName { leading_separator: None, segment: "", ext_excluding_dot: None, location: None } }) }], trailing_space: Space("") }
cc 29a5047217c3612e9cf198c1f353d9ba94fdf8dafb7f8bf7b3561b0e82731d01 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space("") } }) }, Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space("") } }) }, Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space(" ") } }) }], trailing_space: Space("") }

View file

@ -49,6 +49,7 @@ pub struct AnyString<'a> {
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Space<'a>(pub Cow<'a, str>); pub struct Space<'a>(pub Cow<'a, str>);
/// See [`Token::Path`].
#[derive(Copy, Clone, Debug, PartialEq, Arbitrary)] #[derive(Copy, Clone, Debug, PartialEq, Arbitrary)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub enum PathSep { pub enum PathSep {
@ -58,7 +59,7 @@ pub enum PathSep {
Backslash, Backslash,
} }
/// A segment of a path, with a leading separator. /// A segment of a path, with a leading separator. See [`Token::Path`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub struct PathSegment<'a> { pub struct PathSegment<'a> {
@ -98,7 +99,11 @@ pub struct Path<'a> {
/// See [`Token::Number`]. /// See [`Token::Number`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Number<'a>(pub Cow<'a, str>); #[allow(missing_docs)]
pub struct Number<'a> {
pub number: Cow<'a, str>,
pub suffix_without_underscore: Option<Cow<'a, str>>,
}
/// Anything that doesn't contain spaces, and that can be a prefix of `Delimited`. /// Anything that doesn't contain spaces, and that can be a prefix of `Delimited`.
/// i.e. an english word, or rust `::`-separated Path /// i.e. an english word, or rust `::`-separated Path
@ -208,6 +213,8 @@ pub enum Token<'a> {
/// There must not be any alphabetic character after the number, without a space inbetween. /// There must not be any alphabetic character after the number, without a space inbetween.
/// That's to guard against finding numbers inside hashes for example. /// That's to guard against finding numbers inside hashes for example.
/// ///
/// A suffix is allowed, however, as long as its separated by an underscore.
///
/// For example: /// For example:
/// ///
/// ```rust /// ```rust
@ -219,6 +226,9 @@ pub enum Token<'a> {
/// assert!(is_number("-1")); /// assert!(is_number("-1"));
/// assert!(is_number("-1.5")); /// assert!(is_number("-1.5"));
/// ///
/// // with suffix
/// assert!(is_number("10_usize"));
///
/// // some counterexamples /// // some counterexamples
/// assert!(!is_number("`mrow!`")); /// assert!(!is_number("`mrow!`"));
/// assert!(!is_number("true")); /// assert!(!is_number("true"));

View file

@ -126,7 +126,17 @@ impl<'a> Display for Path<'a> {
impl<'a> Display for Number<'a> { impl<'a> Display for Number<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0) let Self {
number,
suffix_without_underscore,
} = self;
write!(f, "{number}")?;
if let Some(suffix) = suffix_without_underscore {
write!(f, "_")?;
write!(f, "{suffix}")?;
}
Ok(())
} }
} }

View file

@ -189,7 +189,7 @@ impl<'a> Path<'a> {
}); });
let drive = opt(( let drive = opt((
any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')), any::<&'a str, E>.verify(|x: &char| x.is_ascii_alphabetic()),
':', ':',
)) ))
.map(|i| i.map(|(letter, _): (char, char)| letter)); .map(|i| i.map(|(letter, _): (char, char)| letter));
@ -237,20 +237,25 @@ impl<'a> Path<'a> {
} }
impl<'a> Atom<'a> { impl<'a> Atom<'a> {
fn parse<E: ParserError<&'a str>, T: 'a>( fn parse<E: ParserError<&'a str>, T: 'a, P: Parser<&'a str, T, E>>(
terminated_by: impl Parser<&'a str, T, E>, terminated_by: impl (Fn() -> P) + 'a,
) -> impl Parser<&'a str, Self, E> { ) -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*}; use winnow::{combinator::*, prelude::*, token::*};
let text = repeat::<_, _, Cow<'a, str>, _, _>( let text = (
not(peek(terminated_by())),
repeat_till::<_, _, Cow<'a, str>, _, _, _, _>(
1.., 1..,
( any::<&str, _>,
peek(not(terminated_by)), peek(alt((
any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()), terminated_by().value(()),
any::<&str, _>.verify(|i| i.is_whitespace()).value(()),
eof::<&str, _>.value(()),
))),
) )
.map(|(_, i)| i), .map(|(i, _)| Self::Text(i)),
) )
.map(Self::Text); .map(|(_, x)| x);
trace("atom", alt((text,))) trace("atom", alt((text,)))
} }
@ -273,7 +278,14 @@ impl<'a> Token<'a> {
Path::parse().map(Self::Path), Path::parse().map(Self::Path),
AnyString::parse().map(Self::String), AnyString::parse().map(Self::String),
delimited, delimited,
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}", ">"))) Atom::parse(|| {
alt((
Separator::parse().value(""),
alt((")", "]", "}", ">")),
alt(("(", "[", "{", "<")),
alt(("`", "'", "\"")),
))
})
.map(Self::Atom), .map(Self::Atom),
)), )),
) )
@ -309,7 +321,7 @@ impl<'a> Token<'a> {
before before
} }
}), }),
Atom::parse(fail::<_, (), _>).map(Self::Atom), Atom::parse(|| fail::<_, (), _>).map(Self::Atom),
)), )),
)) ))
} }
@ -323,7 +335,14 @@ impl<'a> Delimited<'a> {
"delimited", "delimited",
( (
opt(( opt((
Atom::parse(alt(("(", "[", "{", "<", Separator::parse().value("")))), Atom::parse(|| {
alt((
Separator::parse().value(""),
alt((")", "]", "}", ">")),
alt(("(", "[", "{", "<")),
alt(("`", "'", "\"")),
))
}),
Space::parse(), Space::parse(),
)), )),
alt(( alt((
@ -374,9 +393,21 @@ impl<'a> Number<'a> {
"number", "number",
( (
alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())), alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())),
opt((
"_",
repeat::<_, _, Cow<'a, str>, _, _>(
1..,
any::<&'a str, E>.verify(|x| x.is_alphanumeric()),
)
.take(),
)
.map(|(_, suffix): (_, &str)| suffix)),
peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))), peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))),
) )
.map(|(i, _): (&str, _)| Self(i.into())), .map(|(number, suffix, _): (&str, _, _)| Self {
number: number.into(),
suffix_without_underscore: suffix.map(Into::into),
}),
) )
} }
} }
@ -654,6 +685,89 @@ mod tests {
"#) "#)
} }
#[test]
fn parse_delimited_separated() {
assert_debug_snapshot!(parse(r#"a = Struct { b = 3 }"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Delimited(
Delimited {
prefix: Some(
(
Text(
"Struct",
),
Space(
" ",
),
),
),
delimiter: Brace,
contents: Segments {
segments: [
Segment {
leading_space: Space(
" ",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: None,
},
),
},
},
},
],
trailing_space: Space(
" ",
),
},
},
),
},
},
},
],
trailing_space: Space(
"",
),
}
"#)
}
#[test] #[test]
fn parse_text() { fn parse_text() {
assert_debug_snapshot!(parse(r#"abc"#), @r#" assert_debug_snapshot!(parse(r#"abc"#), @r#"
@ -921,4 +1035,220 @@ mod tests {
} }
"#); "#);
} }
#[test]
fn parse_ex1() {
assert_debug_snapshot!(parse(r#"a::<b::c::Generalizer<'_, '_>>(a = 3_usize, b = 3_usize)"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: None,
delimiter: Angle,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"c",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: Some(
(
Text(
"Generalizer",
),
Space(
"",
),
),
),
delimiter: Angle,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Single,
contents: "_, ",
num_hashtags: 0,
suffix: "",
},
),
},
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
"_",
),
),
},
],
trailing_space: Space(
"",
),
},
},
),
},
},
},
},
},
],
trailing_space: Space(
"",
),
},
},
),
},
},
},
Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: None,
delimiter: Paren,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: Some(
"usize",
),
},
),
},
},
},
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
",",
),
),
},
Segment {
leading_space: Space(
" ",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: Some(
"usize",
),
},
),
},
},
},
],
trailing_space: Space(
"",
),
},
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
}
} }

View file

@ -119,10 +119,18 @@ impl Path<'static> {
impl Number<'static> { impl Number<'static> {
#[cfg(test)] #[cfg(test)]
fn arb() -> impl Strategy<Value = Self> { fn arb() -> impl Strategy<Value = Self> {
use proptest::option::*;
(
prop_oneof![ prop_oneof![
any::<i64>().prop_map(|number| Self(number.to_string().into())), any::<i64>().prop_map(|number| number.to_string()),
any::<f64>().prop_map(|number| Self(number.to_string().into())) any::<f64>().prop_map(|number| number.to_string())
] ],
of("[a-zA-Z0-9]"),
)
.prop_map(|(a, b)| Self {
number: a.into(),
suffix_without_underscore: b.map(Into::into),
})
} }
} }

View file

@ -25,8 +25,9 @@ pub enum SpanKind {
/// i.e. `Some` in `Some(3)` /// i.e. `Some` in `Some(3)`
Constructor, Constructor,
/// String prefix, suffix, hashtags, etc /// String prefix, suffix, hashtags, etc.
StringSurroundings, /// Also number suffix
Surroundings,
/// Any other text (the default) /// Any other text (the default)
Text, Text,
@ -114,9 +115,9 @@ mod private {
num_hashtags, num_hashtags,
suffix, suffix,
} = self; } = self;
cx.push(prefix, SpanKind::StringSurroundings); cx.push(prefix, SpanKind::Surroundings);
for _ in 0..num_hashtags { for _ in 0..num_hashtags {
cx.push("#", SpanKind::StringSurroundings) cx.push("#", SpanKind::Surroundings)
} }
ty.into_spans(cx); ty.into_spans(cx);
@ -124,9 +125,9 @@ mod private {
ty.into_spans(cx); ty.into_spans(cx);
for _ in 0..num_hashtags { for _ in 0..num_hashtags {
cx.push("#", SpanKind::StringSurroundings) cx.push("#", SpanKind::Surroundings)
} }
cx.push(suffix, SpanKind::StringSurroundings); cx.push(suffix, SpanKind::Surroundings);
} }
} }
@ -138,7 +139,12 @@ mod private {
impl<'a> IntoSpansImpl<'a> for Number<'a> { impl<'a> IntoSpansImpl<'a> for Number<'a> {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
cx.push(self.0, SpanKind::Number) cx.push(self.number, SpanKind::Number);
if let Some(suffix) = self.suffix_without_underscore {
cx.push("_", SpanKind::Surroundings);
cx.push(suffix, SpanKind::Surroundings);
}
} }
} }
@ -228,10 +234,10 @@ mod private {
cx.push( cx.push(
text, text,
match delimiter { match delimiter {
Delimiter::Bracket => SpanKind::Constructor, Delimiter::Brace => SpanKind::Constructor,
Delimiter::Paren if space.0.is_empty() => SpanKind::Constructor, Delimiter::Paren if space.0.is_empty() => SpanKind::Constructor,
Delimiter::Paren => SpanKind::Text, Delimiter::Paren => SpanKind::Text,
Delimiter::Brace => SpanKind::Text, Delimiter::Bracket => SpanKind::Text,
Delimiter::Angle if space.0.is_empty() => SpanKind::Constructor, Delimiter::Angle if space.0.is_empty() => SpanKind::Constructor,
Delimiter::Angle => SpanKind::Text, Delimiter::Angle => SpanKind::Text,
}, },
@ -336,7 +342,7 @@ mod tests {
), ),
( (
"unsized_coercion", "unsized_coercion",
Constructor, Text,
), ),
( (
"[", "[",
@ -398,7 +404,7 @@ mod tests {
), ),
( (
"", "",
StringSurroundings, Surroundings,
), ),
( (
"\"", "\"",
@ -414,7 +420,7 @@ mod tests {
), ),
( (
"", "",
StringSurroundings, Surroundings,
), ),
( (
")", ")",
@ -476,7 +482,7 @@ mod tests {
), ),
( (
"core", "core",
Constructor, Text,
), ),
( (
"[", "[",
@ -572,7 +578,7 @@ mod tests {
), ),
( (
"core", "core",
Constructor, Text,
), ),
( (
"[", "[",
@ -670,7 +676,7 @@ mod tests {
), ),
( (
"unsized_coercion", "unsized_coercion",
Constructor, Text,
), ),
( (
"[", "[",
@ -884,7 +890,7 @@ mod tests {
), ),
( (
"alloc", "alloc",
Constructor, Text,
), ),
( (
"[", "[",

View file

@ -76,9 +76,7 @@ impl Matcher {
.spans() .spans()
.find(span, name) .find(span, name)
.is_some_and(|v| value.matches(v)), .is_some_and(|v| value.matches(v)),
Matcher::Message { value } => { Matcher::Message { value } => entry.message_or_name().is_some_and(|v| value.matches(v)),
entry.message_or_name().is_some_and(|v| value.matches(&v))
}
} }
} }

View file

@ -30,7 +30,7 @@ impl Filters {
if let Some(path) = &path if let Some(path) = &path
&& path.exists() && path.exists()
{ {
match File::open(&path) { match File::open(path) {
Ok(f) => match serde_json::from_reader(f) { Ok(f) => match serde_json::from_reader(f) {
Ok(i) => { Ok(i) => {
return Self { return Self {
@ -73,7 +73,7 @@ impl Filters {
.create(true) .create(true)
.write(true) .write(true)
.truncate(true) .truncate(true)
.open(&path) .open(path)
{ {
Ok(f) => { Ok(f) => {
if let Err(e) = serde_json::to_writer(f, self) { if let Err(e) = serde_json::to_writer(f, self) {

View file

@ -97,7 +97,7 @@ impl Widget for Styled<'_, &Items<'_>> {
.selected() .selected()
.as_ref() .as_ref()
.and_then(|i| i.message_or_name()) .and_then(|i| i.message_or_name())
.is_some_and(|m| &m == msg) .is_some_and(|m| m == msg)
{ {
line.highlight(Highlighted::All); line.highlight(Highlighted::All);
} }

View file

@ -170,7 +170,7 @@ pub fn style_span(kind: SpanKind, style: Style, styles: &Styles) -> Style {
SpanKind::Path => style.fg(styles.literal).underlined(), SpanKind::Path => style.fg(styles.literal).underlined(),
SpanKind::Space(_) => style, SpanKind::Space(_) => style,
SpanKind::Constructor => style.fg(styles.literal), SpanKind::Constructor => style.fg(styles.literal),
SpanKind::StringSurroundings => style.fg(styles.faded), SpanKind::Surroundings => style.fg(styles.faded),
SpanKind::Text => style, SpanKind::Text => style,
} }
} }