better parsing

This commit is contained in:
Jana Dönszelmann 2026-04-03 16:47:18 +02:00
parent bed1d5b38b
commit 53fc09c02f
No known key found for this signature in database
10 changed files with 416 additions and 45 deletions

View file

@ -0,0 +1,9 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 8d278209bf1e44a21adb1c2c2930f04078cf7b3b5199f663320adfae874257cc # shrinks to original = "¡"
cc 692ed6d9acb3a9744c4315d6ca58ee887c49c1a06e41772eeda09f94beda02a4 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Path(Path { drive_excluding_colon: None, segments: [PathSegment { leading_separator: None, segment: "\u{16af0}" }], filename: FileName { leading_separator: None, segment: "", ext_excluding_dot: None, location: None } }) }], trailing_space: Space("") }
cc 29a5047217c3612e9cf198c1f353d9ba94fdf8dafb7f8bf7b3561b0e82731d01 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space("") } }) }, Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space("") } }) }, Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space(" ") } }) }], trailing_space: Space("") }

View file

@ -49,6 +49,7 @@ pub struct AnyString<'a> {
#[derive(Clone, Debug, PartialEq)]
pub struct Space<'a>(pub Cow<'a, str>);
/// See [`Token::Path`].
#[derive(Copy, Clone, Debug, PartialEq, Arbitrary)]
#[allow(missing_docs)]
pub enum PathSep {
@ -58,7 +59,7 @@ pub enum PathSep {
Backslash,
}
/// A segment of a path, with a leading separator.
/// A segment of a path, with a leading separator. See [`Token::Path`].
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct PathSegment<'a> {
@ -98,7 +99,11 @@ pub struct Path<'a> {
/// See [`Token::Number`].
#[derive(Clone, Debug, PartialEq)]
pub struct Number<'a>(pub Cow<'a, str>);
#[allow(missing_docs)]
pub struct Number<'a> {
pub number: Cow<'a, str>,
pub suffix_without_underscore: Option<Cow<'a, str>>,
}
/// Anything that doesn't contain spaces, and that can be a prefix of `Delimited`.
/// i.e. an english word, or rust `::`-separated Path
@ -208,6 +213,8 @@ pub enum Token<'a> {
/// There must not be any alphabetic character after the number, without a space inbetween.
/// That's to guard against finding numbers inside hashes for example.
///
/// A suffix is allowed, however, as long as its separated by an underscore.
///
/// For example:
///
/// ```rust
@ -219,6 +226,9 @@ pub enum Token<'a> {
/// assert!(is_number("-1"));
/// assert!(is_number("-1.5"));
///
/// // with suffix
/// assert!(is_number("10_usize"));
///
/// // some counterexamples
/// assert!(!is_number("`mrow!`"));
/// assert!(!is_number("true"));

View file

@ -126,7 +126,17 @@ impl<'a> Display for Path<'a> {
impl<'a> Display for Number<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
let Self {
number,
suffix_without_underscore,
} = self;
write!(f, "{number}")?;
if let Some(suffix) = suffix_without_underscore {
write!(f, "_")?;
write!(f, "{suffix}")?;
}
Ok(())
}
}

View file

@ -189,7 +189,7 @@ impl<'a> Path<'a> {
});
let drive = opt((
any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')),
any::<&'a str, E>.verify(|x: &char| x.is_ascii_alphabetic()),
':',
))
.map(|i| i.map(|(letter, _): (char, char)| letter));
@ -237,20 +237,25 @@ impl<'a> Path<'a> {
}
impl<'a> Atom<'a> {
fn parse<E: ParserError<&'a str>, T: 'a>(
terminated_by: impl Parser<&'a str, T, E>,
fn parse<E: ParserError<&'a str>, T: 'a, P: Parser<&'a str, T, E>>(
terminated_by: impl (Fn() -> P) + 'a,
) -> impl Parser<&'a str, Self, E> {
use winnow::{combinator::*, prelude::*, token::*};
let text = repeat::<_, _, Cow<'a, str>, _, _>(
1..,
(
peek(not(terminated_by)),
any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()),
let text = (
not(peek(terminated_by())),
repeat_till::<_, _, Cow<'a, str>, _, _, _, _>(
1..,
any::<&str, _>,
peek(alt((
terminated_by().value(()),
any::<&str, _>.verify(|i| i.is_whitespace()).value(()),
eof::<&str, _>.value(()),
))),
)
.map(|(_, i)| i),
.map(|(i, _)| Self::Text(i)),
)
.map(Self::Text);
.map(|(_, x)| x);
trace("atom", alt((text,)))
}
@ -273,8 +278,15 @@ impl<'a> Token<'a> {
Path::parse().map(Self::Path),
AnyString::parse().map(Self::String),
delimited,
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}", ">")))
.map(Self::Atom),
Atom::parse(|| {
alt((
Separator::parse().value(""),
alt((")", "]", "}", ">")),
alt(("(", "[", "{", "<")),
alt(("`", "'", "\"")),
))
})
.map(Self::Atom),
)),
)
}
@ -309,7 +321,7 @@ impl<'a> Token<'a> {
before
}
}),
Atom::parse(fail::<_, (), _>).map(Self::Atom),
Atom::parse(|| fail::<_, (), _>).map(Self::Atom),
)),
))
}
@ -323,7 +335,14 @@ impl<'a> Delimited<'a> {
"delimited",
(
opt((
Atom::parse(alt(("(", "[", "{", "<", Separator::parse().value("")))),
Atom::parse(|| {
alt((
Separator::parse().value(""),
alt((")", "]", "}", ">")),
alt(("(", "[", "{", "<")),
alt(("`", "'", "\"")),
))
}),
Space::parse(),
)),
alt((
@ -374,9 +393,21 @@ impl<'a> Number<'a> {
"number",
(
alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())),
opt((
"_",
repeat::<_, _, Cow<'a, str>, _, _>(
1..,
any::<&'a str, E>.verify(|x| x.is_alphanumeric()),
)
.take(),
)
.map(|(_, suffix): (_, &str)| suffix)),
peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))),
)
.map(|(i, _): (&str, _)| Self(i.into())),
.map(|(number, suffix, _): (&str, _, _)| Self {
number: number.into(),
suffix_without_underscore: suffix.map(Into::into),
}),
)
}
}
@ -654,6 +685,89 @@ mod tests {
"#)
}
#[test]
fn parse_delimited_separated() {
assert_debug_snapshot!(parse(r#"a = Struct { b = 3 }"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Delimited(
Delimited {
prefix: Some(
(
Text(
"Struct",
),
Space(
" ",
),
),
),
delimiter: Brace,
contents: Segments {
segments: [
Segment {
leading_space: Space(
" ",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: None,
},
),
},
},
},
],
trailing_space: Space(
" ",
),
},
},
),
},
},
},
],
trailing_space: Space(
"",
),
}
"#)
}
#[test]
fn parse_text() {
assert_debug_snapshot!(parse(r#"abc"#), @r#"
@ -921,4 +1035,220 @@ mod tests {
}
"#);
}
#[test]
fn parse_ex1() {
assert_debug_snapshot!(parse(r#"a::<b::c::Generalizer<'_, '_>>(a = 3_usize, b = 3_usize)"#), @r#"
Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: None,
delimiter: Angle,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"c",
),
),
space_before: Space(
"",
),
separator: DoubleColon,
after: Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: Some(
(
Text(
"Generalizer",
),
Space(
"",
),
),
),
delimiter: Angle,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: String(
AnyString {
prefix: "",
ty: Single,
contents: "_, ",
num_hashtags: 0,
suffix: "",
},
),
},
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
"_",
),
),
},
],
trailing_space: Space(
"",
),
},
},
),
},
},
},
},
},
],
trailing_space: Space(
"",
),
},
},
),
},
},
},
Segment {
leading_space: Space(
"",
),
token: Delimited(
Delimited {
prefix: None,
delimiter: Paren,
contents: Segments {
segments: [
Segment {
leading_space: Space(
"",
),
token: Separated {
before: Atom(
Text(
"a",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: Some(
"usize",
),
},
),
},
},
},
Segment {
leading_space: Space(
"",
),
token: Atom(
Text(
",",
),
),
},
Segment {
leading_space: Space(
" ",
),
token: Separated {
before: Atom(
Text(
"b",
),
),
space_before: Space(
" ",
),
separator: Eq,
after: Segment {
leading_space: Space(
" ",
),
token: Number(
Number {
number: "3",
suffix_without_underscore: Some(
"usize",
),
},
),
},
},
},
],
trailing_space: Space(
"",
),
},
},
),
},
],
trailing_space: Space(
"",
),
}
"#);
}
}

View file

@ -119,10 +119,18 @@ impl Path<'static> {
impl Number<'static> {
#[cfg(test)]
fn arb() -> impl Strategy<Value = Self> {
prop_oneof![
any::<i64>().prop_map(|number| Self(number.to_string().into())),
any::<f64>().prop_map(|number| Self(number.to_string().into()))
]
use proptest::option::*;
(
prop_oneof![
any::<i64>().prop_map(|number| number.to_string()),
any::<f64>().prop_map(|number| number.to_string())
],
of("[a-zA-Z0-9]"),
)
.prop_map(|(a, b)| Self {
number: a.into(),
suffix_without_underscore: b.map(Into::into),
})
}
}

View file

@ -25,8 +25,9 @@ pub enum SpanKind {
/// i.e. `Some` in `Some(3)`
Constructor,
/// String prefix, suffix, hashtags, etc
StringSurroundings,
/// String prefix, suffix, hashtags, etc.
/// Also number suffix
Surroundings,
/// Any other text (the default)
Text,
@ -114,9 +115,9 @@ mod private {
num_hashtags,
suffix,
} = self;
cx.push(prefix, SpanKind::StringSurroundings);
cx.push(prefix, SpanKind::Surroundings);
for _ in 0..num_hashtags {
cx.push("#", SpanKind::StringSurroundings)
cx.push("#", SpanKind::Surroundings)
}
ty.into_spans(cx);
@ -124,9 +125,9 @@ mod private {
ty.into_spans(cx);
for _ in 0..num_hashtags {
cx.push("#", SpanKind::StringSurroundings)
cx.push("#", SpanKind::Surroundings)
}
cx.push(suffix, SpanKind::StringSurroundings);
cx.push(suffix, SpanKind::Surroundings);
}
}
@ -138,7 +139,12 @@ mod private {
impl<'a> IntoSpansImpl<'a> for Number<'a> {
fn into_spans(self, cx: &mut Context<'a>) {
cx.push(self.0, SpanKind::Number)
cx.push(self.number, SpanKind::Number);
if let Some(suffix) = self.suffix_without_underscore {
cx.push("_", SpanKind::Surroundings);
cx.push(suffix, SpanKind::Surroundings);
}
}
}
@ -228,10 +234,10 @@ mod private {
cx.push(
text,
match delimiter {
Delimiter::Bracket => SpanKind::Constructor,
Delimiter::Brace => SpanKind::Constructor,
Delimiter::Paren if space.0.is_empty() => SpanKind::Constructor,
Delimiter::Paren => SpanKind::Text,
Delimiter::Brace => SpanKind::Text,
Delimiter::Bracket => SpanKind::Text,
Delimiter::Angle if space.0.is_empty() => SpanKind::Constructor,
Delimiter::Angle => SpanKind::Text,
},
@ -336,7 +342,7 @@ mod tests {
),
(
"unsized_coercion",
Constructor,
Text,
),
(
"[",
@ -398,7 +404,7 @@ mod tests {
),
(
"",
StringSurroundings,
Surroundings,
),
(
"\"",
@ -414,7 +420,7 @@ mod tests {
),
(
"",
StringSurroundings,
Surroundings,
),
(
")",
@ -476,7 +482,7 @@ mod tests {
),
(
"core",
Constructor,
Text,
),
(
"[",
@ -572,7 +578,7 @@ mod tests {
),
(
"core",
Constructor,
Text,
),
(
"[",
@ -670,7 +676,7 @@ mod tests {
),
(
"unsized_coercion",
Constructor,
Text,
),
(
"[",
@ -884,7 +890,7 @@ mod tests {
),
(
"alloc",
Constructor,
Text,
),
(
"[",

View file

@ -76,9 +76,7 @@ impl Matcher {
.spans()
.find(span, name)
.is_some_and(|v| value.matches(v)),
Matcher::Message { value } => {
entry.message_or_name().is_some_and(|v| value.matches(&v))
}
Matcher::Message { value } => entry.message_or_name().is_some_and(|v| value.matches(v)),
}
}

View file

@ -30,7 +30,7 @@ impl Filters {
if let Some(path) = &path
&& path.exists()
{
match File::open(&path) {
match File::open(path) {
Ok(f) => match serde_json::from_reader(f) {
Ok(i) => {
return Self {
@ -73,7 +73,7 @@ impl Filters {
.create(true)
.write(true)
.truncate(true)
.open(&path)
.open(path)
{
Ok(f) => {
if let Err(e) = serde_json::to_writer(f, self) {

View file

@ -97,7 +97,7 @@ impl Widget for Styled<'_, &Items<'_>> {
.selected()
.as_ref()
.and_then(|i| i.message_or_name())
.is_some_and(|m| &m == msg)
.is_some_and(|m| m == msg)
{
line.highlight(Highlighted::All);
}

View file

@ -170,7 +170,7 @@ pub fn style_span(kind: SpanKind, style: Style, styles: &Styles) -> Style {
SpanKind::Path => style.fg(styles.literal).underlined(),
SpanKind::Space(_) => style,
SpanKind::Constructor => style.fg(styles.literal),
SpanKind::StringSurroundings => style.fg(styles.faded),
SpanKind::Surroundings => style.fg(styles.faded),
SpanKind::Text => style,
}
}