better parsing

2026-04-03 16:47:18 +02:00 · 2026-04-03 16:47:18 +02:00 · 53fc09c02f
commit 53fc09c02f
parent bed1d5b38b
10 changed files with 416 additions and 45 deletions
--- a/logparse/proptest-regressions/proptesting.txt
+++ b/logparse/proptest-regressions/proptesting.txt
@ -0,0 +1,9 @@
 # Seeds for failure cases proptest has generated in the past. It is
 # automatically read and these particular cases re-run before any
 # novel cases are generated.
 #
 # It is recommended to check this file in to source control so that
 # everyone who runs the test benefits from these saved cases.
 cc 8d278209bf1e44a21adb1c2c2930f04078cf7b3b5199f663320adfae874257cc # shrinks to original = "¡"
 cc 692ed6d9acb3a9744c4315d6ca58ee887c49c1a06e41772eeda09f94beda02a4 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Path(Path { drive_excluding_colon: None, segments: [PathSegment { leading_separator: None, segment: "\u{16af0}" }], filename: FileName { leading_separator: None, segment: "", ext_excluding_dot: None, location: None } }) }], trailing_space: Space("") }
 cc 29a5047217c3612e9cf198c1f353d9ba94fdf8dafb7f8bf7b3561b0e82731d01 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space("") } }) }, Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space("") } }) }, Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: None, delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: True }], trailing_space: Space(" ") } }) }], trailing_space: Space("") }
--- a/logparse/src/ast.rs
+++ b/logparse/src/ast.rs
@ -49,6 +49,7 @@ pub struct AnyString<'a> {
 #[derive(Clone, Debug, PartialEq)]
 pub struct Space<'a>(pub Cow<'a, str>);
 /// See [`Token::Path`].
 #[derive(Copy, Clone, Debug, PartialEq, Arbitrary)]
 #[allow(missing_docs)]
 pub enum PathSep {
@ -58,7 +59,7 @@ pub enum PathSep {
    Backslash,
 }
-/// A segment of a path, with a leading separator.
+/// A segment of a path, with a leading separator. See [`Token::Path`].
 #[derive(Clone, Debug, PartialEq)]
 #[allow(missing_docs)]
 pub struct PathSegment<'a> {
@ -98,7 +99,11 @@ pub struct Path<'a> {
 /// See [`Token::Number`].
 #[derive(Clone, Debug, PartialEq)]
-pub struct Number<'a>(pub Cow<'a, str>);
+#[allow(missing_docs)]
 pub struct Number<'a> {
    pub number: Cow<'a, str>,
    pub suffix_without_underscore: Option<Cow<'a, str>>,
 }
 /// Anything that doesn't contain spaces, and that can be a prefix of `Delimited`.
 /// i.e. an english word, or rust `::`-separated Path
@ -208,6 +213,8 @@ pub enum Token<'a> {
    /// There must not be any alphabetic character after the number, without a space inbetween.
    /// That's to guard against finding numbers inside hashes for example.
    ///
    /// A suffix is allowed, however, as long as its separated by an underscore.
    ///
    /// For example:
    ///
    /// ```rust
@ -219,6 +226,9 @@ pub enum Token<'a> {
    /// assert!(is_number("-1"));
    /// assert!(is_number("-1.5"));
    ///
    /// // with suffix
    /// assert!(is_number("10_usize"));
    ///
    /// // some counterexamples
    /// assert!(!is_number("`mrow!`"));
    /// assert!(!is_number("true"));
--- a/logparse/src/display.rs
+++ b/logparse/src/display.rs
@ -126,7 +126,17 @@ impl<'a> Display for Path<'a> {
 impl<'a> Display for Number<'a> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.0)
+        let Self {
            number,
            suffix_without_underscore,
        } = self;
        write!(f, "{number}")?;
        if let Some(suffix) = suffix_without_underscore {
            write!(f, "_")?;
            write!(f, "{suffix}")?;
        }
        Ok(())
    }
 }
--- a/logparse/src/parse.rs
+++ b/logparse/src/parse.rs
@ -189,7 +189,7 @@ impl<'a> Path<'a> {
            });
        let drive = opt((
-            any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')),
+            any::<&'a str, E>.verify(|x: &char| x.is_ascii_alphabetic()),
            ':',
        ))
        .map(|i| i.map(|(letter, _): (char, char)| letter));
@ -237,20 +237,25 @@ impl<'a> Path<'a> {
 }
 impl<'a> Atom<'a> {
-    fn parse<E: ParserError<&'a str>, T: 'a>(
+    fn parse<E: ParserError<&'a str>, T: 'a, P: Parser<&'a str, T, E>>(
-        terminated_by: impl Parser<&'a str, T, E>,
+        terminated_by: impl (Fn() -> P) + 'a,
    ) -> impl Parser<&'a str, Self, E> {
        use winnow::{combinator::*, prelude::*, token::*};
-        let text = repeat::<_, _, Cow<'a, str>, _, _>(
+        let text = (
            not(peek(terminated_by())),
            repeat_till::<_, _, Cow<'a, str>, _, _, _, _>(
                1..,
-            (
+                any::<&str, _>,
-                peek(not(terminated_by)),
+                peek(alt((
-                any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()),
+                    terminated_by().value(()),
                    any::<&str, _>.verify(|i| i.is_whitespace()).value(()),
                    eof::<&str, _>.value(()),
                ))),
            )
-                .map(|(_, i)| i),
+            .map(|(i, _)| Self::Text(i)),
        )
-        .map(Self::Text);
+            .map(|(_, x)| x);
        trace("atom", alt((text,)))
    }
@ -273,7 +278,14 @@ impl<'a> Token<'a> {
                Path::parse().map(Self::Path),
                AnyString::parse().map(Self::String),
                delimited,
-                Atom::parse(alt((Separator::parse().value(""), ")", "]", "}", ">")))
+                Atom::parse(|| {
                    alt((
                        Separator::parse().value(""),
                        alt((")", "]", "}", ">")),
                        alt(("(", "[", "{", "<")),
                        alt(("`", "'", "\"")),
                    ))
                })
                .map(Self::Atom),
            )),
        )
@ -309,7 +321,7 @@ impl<'a> Token<'a> {
                            before
                        }
                    }),
-                Atom::parse(fail::<_, (), _>).map(Self::Atom),
+                Atom::parse(|| fail::<_, (), _>).map(Self::Atom),
            )),
        ))
    }
@ -323,7 +335,14 @@ impl<'a> Delimited<'a> {
            "delimited",
            (
                opt((
-                    Atom::parse(alt(("(", "[", "{", "<", Separator::parse().value("")))),
+                    Atom::parse(|| {
                        alt((
                            Separator::parse().value(""),
                            alt((")", "]", "}", ">")),
                            alt(("(", "[", "{", "<")),
                            alt(("`", "'", "\"")),
                        ))
                    }),
                    Space::parse(),
                )),
                alt((
@ -374,9 +393,21 @@ impl<'a> Number<'a> {
            "number",
            (
                alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())),
                opt((
                    "_",
                    repeat::<_, _, Cow<'a, str>, _, _>(
                        1..,
                        any::<&'a str, E>.verify(|x| x.is_alphanumeric()),
                    )
                    .take(),
                )
                    .map(|(_, suffix): (_, &str)| suffix)),
                peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))),
            )
-                .map(|(i, _): (&str, _)| Self(i.into())),
+                .map(|(number, suffix, _): (&str, _, _)| Self {
                    number: number.into(),
                    suffix_without_underscore: suffix.map(Into::into),
                }),
        )
    }
 }
@ -654,6 +685,89 @@ mod tests {
        "#)
    }
    #[test]
    fn parse_delimited_separated() {
        assert_debug_snapshot!(parse(r#"a = Struct { b = 3 }"#), @r#"
        Segments {
            segments: [
                Segment {
                    leading_space: Space(
                        "",
                    ),
                    token: Separated {
                        before: Atom(
                            Text(
                                "a",
                            ),
                        ),
                        space_before: Space(
                            " ",
                        ),
                        separator: Eq,
                        after: Segment {
                            leading_space: Space(
                                " ",
                            ),
                            token: Delimited(
                                Delimited {
                                    prefix: Some(
                                        (
                                            Text(
                                                "Struct",
                                            ),
                                            Space(
                                                " ",
                                            ),
                                        ),
                                    ),
                                    delimiter: Brace,
                                    contents: Segments {
                                        segments: [
                                            Segment {
                                                leading_space: Space(
                                                    " ",
                                                ),
                                                token: Separated {
                                                    before: Atom(
                                                        Text(
                                                            "b",
                                                        ),
                                                    ),
                                                    space_before: Space(
                                                        " ",
                                                    ),
                                                    separator: Eq,
                                                    after: Segment {
                                                        leading_space: Space(
                                                            " ",
                                                        ),
                                                        token: Number(
                                                            Number {
                                                                number: "3",
                                                                suffix_without_underscore: None,
                                                            },
                                                        ),
                                                    },
                                                },
                                            },
                                        ],
                                        trailing_space: Space(
                                            " ",
                                        ),
                                    },
                                },
                            ),
                        },
                    },
                },
            ],
            trailing_space: Space(
                "",
            ),
        }
        "#)
    }
    #[test]
    fn parse_text() {
        assert_debug_snapshot!(parse(r#"abc"#), @r#"
@ -921,4 +1035,220 @@ mod tests {
        }
        "#);
    }
    #[test]
    fn parse_ex1() {
        assert_debug_snapshot!(parse(r#"a::<b::c::Generalizer<'_, '_>>(a = 3_usize, b = 3_usize)"#), @r#"
        Segments {
            segments: [
                Segment {
                    leading_space: Space(
                        "",
                    ),
                    token: Separated {
                        before: Atom(
                            Text(
                                "a",
                            ),
                        ),
                        space_before: Space(
                            "",
                        ),
                        separator: DoubleColon,
                        after: Segment {
                            leading_space: Space(
                                "",
                            ),
                            token: Delimited(
                                Delimited {
                                    prefix: None,
                                    delimiter: Angle,
                                    contents: Segments {
                                        segments: [
                                            Segment {
                                                leading_space: Space(
                                                    "",
                                                ),
                                                token: Separated {
                                                    before: Atom(
                                                        Text(
                                                            "b",
                                                        ),
                                                    ),
                                                    space_before: Space(
                                                        "",
                                                    ),
                                                    separator: DoubleColon,
                                                    after: Segment {
                                                        leading_space: Space(
                                                            "",
                                                        ),
                                                        token: Separated {
                                                            before: Atom(
                                                                Text(
                                                                    "c",
                                                                ),
                                                            ),
                                                            space_before: Space(
                                                                "",
                                                            ),
                                                            separator: DoubleColon,
                                                            after: Segment {
                                                                leading_space: Space(
                                                                    "",
                                                                ),
                                                                token: Delimited(
                                                                    Delimited {
                                                                        prefix: Some(
                                                                            (
                                                                                Text(
                                                                                    "Generalizer",
                                                                                ),
                                                                                Space(
                                                                                    "",
                                                                                ),
                                                                            ),
                                                                        ),
                                                                        delimiter: Angle,
                                                                        contents: Segments {
                                                                            segments: [
                                                                                Segment {
                                                                                    leading_space: Space(
                                                                                        "",
                                                                                    ),
                                                                                    token: String(
                                                                                        AnyString {
                                                                                            prefix: "",
                                                                                            ty: Single,
                                                                                            contents: "_, ",
                                                                                            num_hashtags: 0,
                                                                                            suffix: "",
                                                                                        },
                                                                                    ),
                                                                                },
                                                                                Segment {
                                                                                    leading_space: Space(
                                                                                        "",
                                                                                    ),
                                                                                    token: Atom(
                                                                                        Text(
                                                                                            "_",
                                                                                        ),
                                                                                    ),
                                                                                },
                                                                            ],
                                                                            trailing_space: Space(
                                                                                "",
                                                                            ),
                                                                        },
                                                                    },
                                                                ),
                                                            },
                                                        },
                                                    },
                                                },
                                            },
                                        ],
                                        trailing_space: Space(
                                            "",
                                        ),
                                    },
                                },
                            ),
                        },
                    },
                },
                Segment {
                    leading_space: Space(
                        "",
                    ),
                    token: Delimited(
                        Delimited {
                            prefix: None,
                            delimiter: Paren,
                            contents: Segments {
                                segments: [
                                    Segment {
                                        leading_space: Space(
                                            "",
                                        ),
                                        token: Separated {
                                            before: Atom(
                                                Text(
                                                    "a",
                                                ),
                                            ),
                                            space_before: Space(
                                                " ",
                                            ),
                                            separator: Eq,
                                            after: Segment {
                                                leading_space: Space(
                                                    " ",
                                                ),
                                                token: Number(
                                                    Number {
                                                        number: "3",
                                                        suffix_without_underscore: Some(
                                                            "usize",
                                                        ),
                                                    },
                                                ),
                                            },
                                        },
                                    },
                                    Segment {
                                        leading_space: Space(
                                            "",
                                        ),
                                        token: Atom(
                                            Text(
                                                ",",
                                            ),
                                        ),
                                    },
                                    Segment {
                                        leading_space: Space(
                                            " ",
                                        ),
                                        token: Separated {
                                            before: Atom(
                                                Text(
                                                    "b",
                                                ),
                                            ),
                                            space_before: Space(
                                                " ",
                                            ),
                                            separator: Eq,
                                            after: Segment {
                                                leading_space: Space(
                                                    " ",
                                                ),
                                                token: Number(
                                                    Number {
                                                        number: "3",
                                                        suffix_without_underscore: Some(
                                                            "usize",
                                                        ),
                                                    },
                                                ),
                                            },
                                        },
                                    },
                                ],
                                trailing_space: Space(
                                    "",
                                ),
                            },
                        },
                    ),
                },
            ],
            trailing_space: Space(
                "",
            ),
        }
        "#);
    }
 }
--- a/logparse/src/proptesting.rs
+++ b/logparse/src/proptesting.rs
@ -119,10 +119,18 @@ impl Path<'static> {
 impl Number<'static> {
    #[cfg(test)]
    fn arb() -> impl Strategy<Value = Self> {
        use proptest::option::*;
        (
            prop_oneof![
-            any::<i64>().prop_map(|number| Self(number.to_string().into())),
+                any::<i64>().prop_map(|number| number.to_string()),
-            any::<f64>().prop_map(|number| Self(number.to_string().into()))
+                any::<f64>().prop_map(|number| number.to_string())
-        ]
+            ],
            of("[a-zA-Z0-9]"),
        )
            .prop_map(|(a, b)| Self {
                number: a.into(),
                suffix_without_underscore: b.map(Into::into),
            })
    }
 }
--- a/logparse/src/spans.rs
+++ b/logparse/src/spans.rs
@ -25,8 +25,9 @@ pub enum SpanKind {
    /// i.e. `Some` in `Some(3)`
    Constructor,
-    /// String prefix, suffix, hashtags, etc
+    /// String prefix, suffix, hashtags, etc.
-    StringSurroundings,
+    /// Also number suffix
    Surroundings,
    /// Any other text (the default)
    Text,
@ -114,9 +115,9 @@ mod private {
                num_hashtags,
                suffix,
            } = self;
-            cx.push(prefix, SpanKind::StringSurroundings);
+            cx.push(prefix, SpanKind::Surroundings);
            for _ in 0..num_hashtags {
-                cx.push("#", SpanKind::StringSurroundings)
+                cx.push("#", SpanKind::Surroundings)
            }
            ty.into_spans(cx);
@ -124,9 +125,9 @@ mod private {
            ty.into_spans(cx);
            for _ in 0..num_hashtags {
-                cx.push("#", SpanKind::StringSurroundings)
+                cx.push("#", SpanKind::Surroundings)
            }
-            cx.push(suffix, SpanKind::StringSurroundings);
+            cx.push(suffix, SpanKind::Surroundings);
        }
    }
@ -138,7 +139,12 @@ mod private {
    impl<'a> IntoSpansImpl<'a> for Number<'a> {
        fn into_spans(self, cx: &mut Context<'a>) {
-            cx.push(self.0, SpanKind::Number)
+            cx.push(self.number, SpanKind::Number);
            if let Some(suffix) = self.suffix_without_underscore {
                cx.push("_", SpanKind::Surroundings);
                cx.push(suffix, SpanKind::Surroundings);
            }
        }
    }
@ -228,10 +234,10 @@ mod private {
                    cx.push(
                        text,
                        match delimiter {
-                            Delimiter::Bracket => SpanKind::Constructor,
+                            Delimiter::Brace => SpanKind::Constructor,
                            Delimiter::Paren if space.0.is_empty() => SpanKind::Constructor,
                            Delimiter::Paren => SpanKind::Text,
-                            Delimiter::Brace => SpanKind::Text,
+                            Delimiter::Bracket => SpanKind::Text,
                            Delimiter::Angle if space.0.is_empty() => SpanKind::Constructor,
                            Delimiter::Angle => SpanKind::Text,
                        },
@ -336,7 +342,7 @@ mod tests {
            ),
            (
                "unsized_coercion",
-                Constructor,
+                Text,
            ),
            (
                "[",
@ -398,7 +404,7 @@ mod tests {
            ),
            (
                "",
-                StringSurroundings,
+                Surroundings,
            ),
            (
                "\"",
@ -414,7 +420,7 @@ mod tests {
            ),
            (
                "",
-                StringSurroundings,
+                Surroundings,
            ),
            (
                ")",
@ -476,7 +482,7 @@ mod tests {
            ),
            (
                "core",
-                Constructor,
+                Text,
            ),
            (
                "[",
@ -572,7 +578,7 @@ mod tests {
            ),
            (
                "core",
-                Constructor,
+                Text,
            ),
            (
                "[",
@ -670,7 +676,7 @@ mod tests {
            ),
            (
                "unsized_coercion",
-                Constructor,
+                Text,
            ),
            (
                "[",
@ -884,7 +890,7 @@ mod tests {
            ),
            (
                "alloc",
-                Constructor,
+                Text,
            ),
            (
                "[",
--- a/src/tui/filter.rs
+++ b/src/tui/filter.rs
@ -76,9 +76,7 @@ impl Matcher {
                .spans()
                .find(span, name)
                .is_some_and(|v| value.matches(v)),
-            Matcher::Message { value } => {
+            Matcher::Message { value } => entry.message_or_name().is_some_and(|v| value.matches(v)),
                entry.message_or_name().is_some_and(|v| value.matches(&v))
            }
        }
    }
--- a/src/tui/log_viewer/filters.rs
+++ b/src/tui/log_viewer/filters.rs
@ -30,7 +30,7 @@ impl Filters {
        if let Some(path) = &path
            && path.exists()
        {
-            match File::open(&path) {
+            match File::open(path) {
                Ok(f) => match serde_json::from_reader(f) {
                    Ok(i) => {
                        return Self {
@ -73,7 +73,7 @@ impl Filters {
                .create(true)
                .write(true)
                .truncate(true)
-                .open(&path)
+                .open(path)
            {
                Ok(f) => {
                    if let Err(e) = serde_json::to_writer(f, self) {
--- a/src/tui/widgets/items.rs
+++ b/src/tui/widgets/items.rs
@ -97,7 +97,7 @@ impl Widget for Styled<'_, &Items<'_>> {
                                    .selected()
                                    .as_ref()
                                    .and_then(|i| i.message_or_name())
-                                    .is_some_and(|m| &m == msg)
+                                    .is_some_and(|m| m == msg)
                                {
                                    line.highlight(Highlighted::All);
                                }
--- a/src/tui/widgets/line_text.rs
+++ b/src/tui/widgets/line_text.rs
@ -170,7 +170,7 @@ pub fn style_span(kind: SpanKind, style: Style, styles: &Styles) -> Style {
        SpanKind::Path => style.fg(styles.literal).underlined(),
        SpanKind::Space(_) => style,
        SpanKind::Constructor => style.fg(styles.literal),
-        SpanKind::StringSurroundings => style.fg(styles.faded),
+        SpanKind::Surroundings => style.fg(styles.faded),
        SpanKind::Text => style,
    }
 }