diff --git a/Cargo.lock b/Cargo.lock index 1cc7a5b..32c3a66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,7 +100,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -109,6 +118,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -233,6 +248,17 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "windows-sys", +] + [[package]] name = "convert_case" version = "0.10.0" @@ -416,6 +442,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "equivalent" version = "1.0.2" @@ -447,10 +479,16 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" dependencies = [ - "bit-set", + "bit-set 0.5.3", "regex", ] +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "filedescriptor" version = "0.8.3" @@ -613,6 +651,18 @@ dependencies = [ "rustversion", ] +[[package]] +name = "insta" +version = "1.47.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4a6248eb93a4401ed2f37dfe8ea592d3cf05b7cf4f8efa867b6895af7e094e" +dependencies = [ + "console", + "once_cell", + "similar", + "tempfile", +] + [[package]] name = "instability" version = "0.3.11" @@ -769,6 +819,16 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "logparse" +version = "0.1.1" +dependencies = [ + "insta", + "proptest", + "proptest-derive", + "winnow", +] + [[package]] name = "loom" version = "0.7.2" @@ -1042,7 +1102,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -1094,6 +1154,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -1113,6 +1182,42 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" +dependencies = [ + "bit-set 0.8.0", + "bit-vec 0.8.0", + "bitflags 2.11.0", + "num-traits", + "rand 0.9.2", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "proptest-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c57924a81864dddafba92e1bf92f9bf82f97096c44489548a60e888e1547549b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.44" @@ -1134,7 +1239,27 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -1143,6 +1268,24 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core 0.9.5", +] + [[package]] name = "ratatui" version = "0.30.0" @@ -1285,6 +1428,7 @@ dependencies = [ "dumpster", "itertools", "jiff", + "logparse", "nix 0.31.1", "ratatui", "ratatui-themes", @@ -1323,6 +1467,18 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" version = "1.0.23" @@ -1447,6 +1603,12 @@ dependencies = [ "libc", ] +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "siphasher" version = "1.0.2" @@ -1514,6 +1676,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +dependencies = [ + "fastrand", + "getrandom 0.4.1", + "once_cell", + "rustix", + "windows-sys", +] + [[package]] name = "terminfo" version = "0.9.0" @@ -1718,6 +1893,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1792,6 +1973,15 @@ dependencies = [ "utf8parse", ] +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2013,6 +2203,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "winnow" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -2101,6 +2300,26 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 8c3d386..de9d500 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,10 @@ edition = "2024" name = "lv" path = "./src/main.rs" +[workspace] +members = [".", "./logparse/"] +default-members = [".", "./logparse/"] + [dependencies] clap = {version="4.5", features=["derive", "string"]} jiff = {version = "0.2", features = ["serde"]} @@ -21,3 +25,4 @@ nix = {version = "0.31", features = ["process", "signal"]} regex = "1" crossterm = "*" dumpster = "2.1" +logparse = {path = "./logparse/", version="0.1.0"} diff --git a/logparse/Cargo.toml b/logparse/Cargo.toml new file mode 100644 index 0000000..7f86bde --- /dev/null +++ b/logparse/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "logparse" +version = "0.1.2" +edition = "2024" +description = "parse arbitrary messages containing rust-like debug output to syntax highlight them" +authors = ["Jana Dönszelmann "] +license = "MIT OR Apache-2.0" +documentation = "https://docs.rs/logparse" +homepage = "https://git.donsz.nl/jana/logviewer" +repository = "https://git.donsz.nl/jana/logviewer" +readme = "README.md" + +[dependencies] +winnow = {version="1", features=["parser"]} +proptest = "1" +proptest-derive = "0.8" +insta = "1" diff --git a/logparse/README.md b/logparse/README.md new file mode 100644 index 0000000..5d90e09 --- /dev/null +++ b/logparse/README.md @@ -0,0 +1,16 @@ + +# Logparse + +This crate makes a best-effort parsing for arbitrary text that may contain rust-like debug output. +This is intended for logs, that contain rust's debug output, and that you might want to highlight opposing parentheses, brackets, etc in. + +Log messages, when parsed, can *always* be pretty printed back into their textual form perfectly. +The syntax tree unambiguously records any spaces in the source text. +There are tests that ensure this property for a very large number of inputs. + +In addition, *any* log message can be parsed. `parse_input` returns a result, +but as far as I'm aware, +the parser is flexible enough to *never* reject, +and always falls back to categorizing text as *just* text. +There are also property tests to ensure this. + diff --git a/logparse/src/ast.rs b/logparse/src/ast.rs new file mode 100644 index 0000000..71315ff --- /dev/null +++ b/logparse/src/ast.rs @@ -0,0 +1,325 @@ +use proptest_derive::Arbitrary; +use std::borrow::Cow; + +/// See [`Token::Separated`]. +#[derive(Copy, Clone, Debug, PartialEq)] +#[allow(missing_docs)] +pub enum Separator { + Eq, + Colon, + DoubleColon, +} + +/// See [`Token::String`]. +#[derive(Copy, Clone, Debug, Arbitrary, PartialEq)] +#[allow(missing_docs)] +pub enum QuoteType { + Single, + Double, + Backtick, +} + +/// See [`Token::Delimited`]. +#[derive(Clone, Debug, Arbitrary, PartialEq)] +#[allow(missing_docs)] +pub enum Delimiter { + Paren, + Bracket, + Brace, + Angle, +} + +/// See [`Token::String`]. +#[derive(Clone, Debug, PartialEq)] +pub struct AnyString<'a> { + /// Any text immediately preceeding the string, like the `f` prefix in `f"mrrp"`. + pub prefix: Cow<'a, str>, + /// The kind of string quote used. + pub ty: QuoteType, + /// The string's text. + pub contents: Cow<'a, str>, + /// How may hashtags were used around the string. + /// Stings have to open and close with an equal number of hashtags. + pub num_hashtags: usize, + /// Any text immediately following the string. + pub suffix: Cow<'a, str>, +} + +/// Any sequence of 0 or more spaces. +#[derive(Clone, Debug, PartialEq)] +pub struct Space<'a>(pub Cow<'a, str>); + +#[derive(Copy, Clone, Debug, PartialEq, Arbitrary)] +#[allow(missing_docs)] +pub enum PathSep { + /// Happens at the start of paths, for the no leading / case + None, + Slash, + Backslash, +} + +/// A segment of a path, with a leading separator. +#[derive(Clone, Debug, PartialEq)] +#[allow(missing_docs)] +pub struct PathSegment<'a> { + pub leading_separator: PathSep, + pub segment: Cow<'a, str>, +} + +/// See [`Path`]. This is the `:3:4` part in `mrrp.rs:3:4`. +#[derive(Clone, Debug, PartialEq)] +pub struct FileLocation<'a> { + /// Line number. + pub line: Cow<'a, str>, + /// Optionally, a column offset. + pub offset: Option>, +} + +/// See [`Path`]. +#[derive(Clone, Debug, PartialEq)] +#[allow(missing_docs)] +pub struct FileName<'a> { + pub leading_separator: PathSep, + pub segment: Cow<'a, str>, + pub ext_excluding_dot: Option>, + pub location: Option>, +} + +/// See [`Token::Path`]. +#[derive(Clone, Debug, PartialEq)] +#[allow(missing_docs)] +pub struct Path<'a> { + pub drive_excluding_colon: Option, + pub segments: Vec>, + + /// A filename: the last path segment, optionally with an extension and file/line number. + pub filename: FileName<'a>, +} + +/// See [`Token::Number`]. +#[derive(Clone, Debug, PartialEq)] +pub struct Number<'a>(pub Cow<'a, str>); + +/// Anything that doesn't contain spaces, and that can be a prefix of `Delimited`. +/// i.e. an english word, or rust `::`-separated Path +#[derive(Clone, Debug, PartialEq)] +pub enum Atom<'a> { + /// Raw text + Text(Cow<'a, str>), +} + +/// The main AST node: each of these are treated as one unit, that may be separated by spaces. +#[derive(Clone, Debug, PartialEq)] +pub enum Token<'a> { + /// A literal `true`. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_true, Token::True); + /// assert!(is_true("true")); + /// + /// // some counterexamples + /// assert!(!is_true("14")); + /// assert!(!is_true("`mrow!`")); + /// assert!(!is_true("false")); + /// ``` + True, + /// A literal `false`. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_false, Token::False); + /// assert!(is_false("false")); + /// + /// // some counterexamples + /// assert!(!is_false("14")); + /// assert!(!is_false("`mrow!`")); + /// assert!(!is_false("true")); + /// ``` + False, + /// A literal `None`. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_none, Token::None); + /// assert!(is_none("None")); + /// + /// // some counterexamples + /// assert!(!is_none("14")); + /// assert!(!is_none("`mrow!`")); + /// assert!(!is_none("true")); + /// ``` + None, + + /// A path, anything that looks vaguely path-like. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_path, Token::Path(_)); + /// assert!(is_path("/")); + /// assert!(is_path("/home")); + /// assert!(is_path("C:/Users")); + /// assert!(is_path("C:\\Users")); + /// assert!(is_path("/home/some_path/file-some_where.rs")); + /// assert!(is_path("/home/some_path/file-and-line.rs:3")); + /// assert!(is_path("/home/some_path/file-and-line.rs:3:4")); + /// + /// // some counterexamples + /// assert!(!is_path(":14")); + /// assert!(!is_path(":14:15")); + /// assert!(!is_path("14")); + /// assert!(!is_path("`mrow!`")); + /// assert!(!is_path("true")); + /// ``` + Path(Path<'a>), + /// A string literal, with quite flexible rules on what's considered a string. + /// Any rust string literal parses, and a little more. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_string, Token::String(_)); + /// assert!(is_string(r##""mrrp""##)); + /// assert!(is_string(r##"'mrrp'"##)); + /// assert!(is_string(r##"`mrrp`"##)); + /// assert!(is_string(r##"b"mrrp""##)); + /// assert!(is_string(r##"f"mrrp""##)); + /// assert!(is_string(r##"c"mrrp""##)); + /// assert!(is_string(r##"r"mrrp""##)); + /// assert!(is_string(r##"r#"mrrp"#"##)); + /// assert!(is_string(r##"b#"mrrp"#"##)); + /// assert!(is_string(r####"anyprefix###"mrrp"###anysuffix"####)); + /// + /// // some counterexamples + /// assert!(!is_string("14")); + /// assert!(!is_string("true")); + /// assert!(!is_string("/home")); + /// + /// // non-matching hashtags + /// assert!(!is_string(r#"r#"mrrp""#)); + /// assert!(!is_string(r###"r#"mrrp"##"###)); + /// assert!(!is_string(r##"r"mrrp"#"##)); + /// + /// // non-matching quotes + /// assert!(!is_string(r#"`mrrp""#)); + /// assert!(!is_string(r#"`mrrp'"#)); + /// ``` + String(AnyString<'a>), + /// A number, float or int. + /// There must not be any alphabetic character after the number, without a space inbetween. + /// That's to guard against finding numbers inside hashes for example. + /// + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_number, Token::Number(_)); + /// assert!(is_number("1")); + /// assert!(is_number("99999999")); + /// assert!(is_number("1.5")); + /// assert!(is_number("1e10")); + /// assert!(is_number("-1")); + /// assert!(is_number("-1.5")); + /// + /// // some counterexamples + /// assert!(!is_number("`mrow!`")); + /// assert!(!is_number("true")); + /// + /// // suffix + /// assert!(!is_number("14a")); + /// ``` + Number(Number<'a>), + + // TODO: RustPath + /// Any token, separated by a [`Separator`], followed by another segment. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_sep, Token::Separated {..}); + /// assert!(is_sep("a = 3")); + /// assert!(is_sep("a::b::c")); + /// assert!(is_sep("a: 5")); + /// + /// // some counterexamples + /// assert!(!is_sep("`mrow!`")); + /// assert!(!is_sep("true")); + /// assert!(!is_sep("14")); + /// ``` + Separated { + /// The part before the separator. + before: Box>, + /// The space between the `before` part, and the separator. + space_before: Space<'a>, + /// The separator itself. + separator: Separator, + /// The segment after the separator. + after: Box>, + }, + /// A segment, delimited by parentheses, braces or brackets, with an optional prefix. + /// The prefix will be classified as a constructor if the delimiter is braces or parentheses. + /// + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_delim, Token::Delimited(_)); + /// assert!(is_delim("()")); + /// assert!(is_delim("[]")); + /// assert!(is_delim("{}")); + /// assert!(is_delim("(1)")); + /// assert!(is_delim("[1]")); + /// assert!(is_delim("{1}")); + /// assert!(is_delim("{/home/mrrp.rs}")); + /// assert!(is_delim("constructor{/home/mrrp.rs}")); + /// assert!(is_delim("constructor {/home/mrrp.rs}")); + /// + /// // some counterexamples + /// assert!(!is_delim("`mrow!`")); + /// assert!(!is_delim("true")); + /// assert!(!is_delim("14")); + /// ``` + Delimited(Delimited<'a>), + + /// Any other text, that couldn't otherwise be categorized. + /// For example: + /// + /// ```rust + /// # logparse::generate_ast_recognizer!(is_atom, Token::Atom(_)); + /// assert!(is_atom("mrrp")); + /// + /// // some counterexamples + /// assert!(!is_atom("`mrow!`")); + /// assert!(!is_atom("true")); + /// assert!(!is_atom("14")); + /// ``` + Atom(Atom<'a>), +} + +/// See [`Token::Delimited`]. +#[derive(Clone, Debug, PartialEq)] +pub struct Delimited<'a> { + /// Delimiter prefix, i.e. the constructor part in `Some(mrrp)`. + pub prefix: Option<(Atom<'a>, Space<'a>)>, + /// The kind of delimiter itself. + pub delimiter: Delimiter, + /// The contents of the delimited segment. + pub contents: Segments<'a>, +} + +/// A Segment of text, with optional leading space. +#[derive(Clone, Debug, PartialEq)] +pub struct Segment<'a> { + /// Any spaces (or empty) before this segment. + pub leading_space: Space<'a>, + /// The segment's contents itself. + pub token: Token<'a>, +} + +/// Segments, with possible trailing space. +/// +/// Any input is split up into many segments, with their surrounding spaces. +/// Each segment might be a meaningless atom, or may be recognized to have more meaning. +#[derive(Clone, Debug, PartialEq)] +pub struct Segments<'a> { + /// The segments themselves. + pub segments: Vec>, + /// Any spaces (or empty) after this sequence of segments. + pub trailing_space: Space<'a>, +} diff --git a/logparse/src/display.rs b/logparse/src/display.rs new file mode 100644 index 0000000..728f489 --- /dev/null +++ b/logparse/src/display.rs @@ -0,0 +1,188 @@ +use super::ast::*; +use std::fmt::{self, Display}; + +impl Display for Separator { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Separator::Eq => write!(f, "="), + Separator::Colon => write!(f, ":"), + Separator::DoubleColon => write!(f, "::"), + } + } +} + +impl Display for QuoteType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + QuoteType::Single => write!(f, "\'"), + QuoteType::Double => write!(f, "\""), + QuoteType::Backtick => write!(f, "`"), + } + } +} + +impl Delimiter { + fn fmt_start(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Delimiter::Paren => write!(f, "("), + Delimiter::Bracket => write!(f, "["), + Delimiter::Brace => write!(f, "{{"), + Delimiter::Angle => write!(f, "<"), + } + } + + fn fmt_end(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Delimiter::Paren => write!(f, ")"), + Delimiter::Bracket => write!(f, "]"), + Delimiter::Brace => write!(f, "}}"), + Delimiter::Angle => write!(f, ">"), + } + } +} + +impl<'a> Display for AnyString<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.prefix)?; + for _ in 0..self.num_hashtags { + write!(f, "#")?; + } + write!(f, "{}", self.ty)?; + write!(f, "{}", self.contents)?; + write!(f, "{}", self.ty)?; + + for _ in 0..self.num_hashtags { + write!(f, "#")?; + } + write!(f, "{}", self.suffix) + } +} + +impl<'a> Display for Space<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Display for PathSep { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PathSep::Slash => write!(f, "/"), + PathSep::Backslash => write!(f, "\\"), + PathSep::None => Ok(()), + } + } +} + +impl<'a> Display for PathSegment<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.leading_separator)?; + write!(f, "{}", self.segment) + } +} + +impl<'a> Display for FileLocation<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, ":{}", self.line)?; + if let Some(offset) = &self.offset { + write!(f, ":{offset}")?; + } + Ok(()) + } +} + +impl<'a> Display for FileName<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.leading_separator)?; + write!(f, "{}", self.segment)?; + if let Some(ext) = &self.ext_excluding_dot { + write!(f, ".{ext}")?; + } + if let Some(loc) = &self.location { + write!(f, "{loc}")?; + } + Ok(()) + } +} + +impl<'a> Display for Path<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { + drive_excluding_colon, + segments, + filename, + } = self; + + if let Some(drive) = &drive_excluding_colon { + write!(f, "{drive}:")?; + } + + for segment in segments { + write!(f, "{segment}")?; + } + write!(f, "{filename}") + } +} + +impl<'a> Display for Number<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl<'a> Display for Atom<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Atom::Text(text) => write!(f, "{text}"), + } + } +} + +impl<'a> Display for Token<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::Number(number) => write!(f, "{number}"), + Token::True => write!(f, "true"), + Token::False => write!(f, "false"), + Token::None => write!(f, "None"), + Token::Atom(atom) => write!(f, "{atom}"), + Token::Path(path) => write!(f, "{path}"), + Token::Separated { + before, + space_before, + separator, + after, + } => write!(f, "{before}{space_before}{separator}{after}"), + Token::Delimited(delimited) => write!(f, "{delimited}"), + Token::String(s) => write!(f, "{s}"), + } + } +} + +impl<'a> Display for Delimited<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some((prefix, space)) = &self.prefix { + write!(f, "{prefix}")?; + write!(f, "{space}")?; + } + self.delimiter.fmt_start(f)?; + write!(f, "{}", self.contents)?; + self.delimiter.fmt_end(f) + } +} + +impl<'a> Display for Segment<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.leading_space)?; + write!(f, "{}", self.token) + } +} + +impl<'a> Display for Segments<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for segment in &self.segments { + write!(f, "{segment}")?; + } + write!(f, "{}", self.trailing_space) + } +} diff --git a/logparse/src/lib.rs b/logparse/src/lib.rs new file mode 100644 index 0000000..957b657 --- /dev/null +++ b/logparse/src/lib.rs @@ -0,0 +1,44 @@ +#![deny(missing_docs)] +#![deny(warnings)] +#![doc=include_str!("../README.md")] + +/// The structure of parsed log lines +pub mod ast; +mod display; +mod parse; +mod spans; + +#[doc(hidden)] +#[macro_export] +macro_rules! generate_ast_recognizer { + ($name: ident, $pattern: pat) => { + fn $name(s: &str) -> bool { + use logparse::{ast::*, *}; + let l = parse_input(s).unwrap(); + if !l.trailing_space.0.is_empty() { + eprintln!("trailing space {l:?}"); + return false; + } + if l.segments.len() != 1 { + eprintln!("more segments {l:?}"); + return false; + } + if !l.segments[0].leading_space.0.is_empty() { + eprintln!("leading space {l:?}"); + return false; + } + if let $pattern = &l.segments[0].token { + true + } else { + eprintln!("pattern {l:?}"); + false + } + } + }; +} + +#[cfg(test)] +mod proptesting; + +pub use parse::parse_input; +pub use spans::{Config, Span, SpanKind, into_spans}; diff --git a/logparse/src/parse.rs b/logparse/src/parse.rs new file mode 100644 index 0000000..fd24dd9 --- /dev/null +++ b/logparse/src/parse.rs @@ -0,0 +1,769 @@ +use std::borrow::Cow; + +use super::ast::*; +use winnow::{Parser, combinator::trace, error::ParserError}; + +impl<'a> AnyString<'a> { + fn parse>() -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*, token::*}; + + let quote = alt(( + '`'.value(QuoteType::Backtick), + '\''.value(QuoteType::Single), + '\"'.value(QuoteType::Double), + )); + + macro_rules! surrounding { + () => { + take_while(0.., |b: char| { + !b.is_whitespace() && b.is_alphabetic() && !['\'', '"', '`', '#'].contains(&b) + }) + }; + } + + let preamble = ( + surrounding!(), + take_while(0.., |c| c == '#').map(|i: &'a str| i.len()), + quote, + ); + + trace( + "string", + preamble.flat_map( + |(prefix, num_hashtags, quote): (&'a str, usize, QuoteType)| { + let end = ( + match quote { + QuoteType::Single => '\'', + QuoteType::Double => '\"', + QuoteType::Backtick => '`', + }, + repeat::<_, _, Cow<'a, str>, _, _>( + num_hashtags..=num_hashtags, + literal("#"), + ), + ); + + let contents = repeat_till(0.., any, end).map(|(contents, _)| contents); + + (contents, surrounding!()).map( + move |(contents, suffix): (Cow<'a, str>, &'a str)| Self { + prefix: prefix.into(), + ty: quote, + contents, + num_hashtags, + suffix: suffix.into(), + }, + ) + }, + ), + ) + } +} + +impl<'a> Space<'a> { + fn parse>() -> impl Parser<&'a str, Self, E> { + use winnow::{prelude::*, token::*}; + + trace( + "space", + take_while(0.., |b: char| b.is_whitespace()).map(|i: &'a str| Self(i.into())), + ) + } +} + +impl PathSep { + fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*}; + + trace( + "pathsep", + alt(('/'.value(Self::Slash), '\\'.value(Self::Backslash))), + ) + } +} + +impl Separator { + fn parse<'a, E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*, token::*}; + + trace( + "separator", + alt(( + "::".value(Self::DoubleColon), + (literal('=')).value(Self::Eq), + (literal(':')).value(Self::Colon), + )), + ) + } +} + +impl<'a> FileLocation<'a> { + fn parse>() -> impl Parser<&'a str, Self, E> { + use winnow::{ascii::dec_uint, combinator::*, prelude::*}; + + let colon_number = || { + (":".value(()), dec_uint::<_, u64, _>.take()) + .map(|(_, number): (_, &str)| Cow::Borrowed(number)) + }; + let line_offset = (colon_number(), opt(colon_number())); + + trace( + "file location", + line_offset.map(|(line, offset)| FileLocation { line, offset }), + ) + } +} + +impl<'a> FileName<'a> { + fn parse(segment: PathSegment<'a>, location: Option>) -> Self { + fn rsplit<'a>( + input: Cow<'a, str>, + delimiter: char, + ) -> Option<(Cow<'a, str>, Cow<'a, str>)> { + match input { + Cow::Borrowed(s) => s + .rsplit_once(delimiter) + .map(|(a, b)| (Cow::Borrowed(a), Cow::Borrowed(b))), + Cow::Owned(s) => s + .rsplit_once(delimiter) + .map(|(a, b)| (Cow::Owned(a.to_string()), Cow::Owned(b.to_string()))), + } + } + + let (new_segment, ext_excluding_dot) = + if let Some((segment, ext_excluding_dot)) = rsplit(segment.segment.clone(), '.') { + (segment, Some(ext_excluding_dot)) + } else { + (segment.segment, None) + }; + + Self { + leading_separator: segment.leading_separator, + segment: new_segment, + ext_excluding_dot, + location, + } + } +} + +impl<'a> Path<'a> { + fn parse>() -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*, token::*}; + + let terminator = || { + alt(( + eof.value(()), + any::<&'a str, E> + .verify(|i: &char| { + (*i).is_whitespace() + || !(i.is_alphanumeric() + || ['_', '-', '\"', '\'', '.', '/', '\\'].contains(i)) + }) + .value(()), + )) + }; + + let terminator_or_sep = || alt((PathSep::parse().value(()), terminator())); + + let till_next_sep = || { + trace( + "till next sep", + repeat_till(0.., any::<&'a str, E>, peek(terminator_or_sep())) + .map(|(segment, _)| segment), + ) + }; + + let sep_and_next = + (PathSep::parse(), till_next_sep()).map(|(leading_separator, segment)| PathSegment { + leading_separator, + segment, + }); + let opt_sep_and_next = + (opt(PathSep::parse()), till_next_sep()).map(|(leading_separator, segment)| { + PathSegment { + leading_separator: leading_separator.unwrap_or(PathSep::None), + segment, + } + }); + + let drive = opt(( + any::<&'a str, E>.verify(|x: &char| matches!(*x, 'A'..='Z' | 'a' ..= 'z')), + ':', + )) + .map(|i| i.map(|(letter, _): (char, char)| letter)); + let drive_and_segments = ( + drive, + opt_sep_and_next, + repeat_till(0.., sep_and_next, peek(terminator())) + .map(|(segments, _): (Vec, _)| segments), + opt(FileLocation::parse()), + ); + + trace( + "path", + drive_and_segments + .map(|(drive, segment, segments, location)| { + let (segments, last) = { + let mut segments = segments; + segments.insert(0, segment); + let last = segments.pop().unwrap(); + (segments, last) + }; + + let filename = FileName::parse(last, location); + + Self { + drive_excluding_colon: drive, + segments, + filename, + } + }) + .verify(|i| { + !i.segments.is_empty() + || i.filename.ext_excluding_dot.is_some() + || !matches!(i.filename.leading_separator, PathSep::None) + }), + ) + } +} + +impl<'a> Atom<'a> { + fn parse, T: 'a>( + terminated_by: impl Parser<&'a str, T, E>, + ) -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*, token::*}; + + let text = repeat::<_, _, Cow<'a, str>, _, _>( + 1.., + ( + peek(not(terminated_by)), + any::<&str, _>.verify(move |i: &char| !(*i).is_whitespace()), + ) + .map(|(_, i)| i), + ) + .map(Self::Text); + + trace("atom", alt((text,))) + } +} + +impl<'a> Token<'a> { + fn parse_without_separator + 'a>() -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*}; + + let delimited: Box> = + Box::new(Delimited::parse().map(Self::Delimited)); + + trace( + "token-without-sep", + alt(( + "true".value(Self::True), + "false".value(Self::False), + "None".value(Self::None), + Number::parse().map(Self::Number), + Path::parse().map(Self::Path), + AnyString::parse().map(Self::String), + delimited, + Atom::parse(alt((Separator::parse().value(""), ")", "]", "}", ">"))) + .map(Self::Atom), + )), + ) + } + + fn parse + 'a>() -> Box + 'a> { + use winnow::{combinator::*, prelude::*}; + + let before = Self::parse_without_separator(); + + Box::new(trace( + "token", + alt(( + ( + before, + opt( + (Space::parse(), Separator::parse()).flat_map(|(space, sep)| { + let box_dyn_segment: Box> = + Box::new(Segment::parse()); + box_dyn_segment.map(move |segment| (space.clone(), sep, segment)) + }), + ), + ) + .map(|(before, trailer)| { + if let Some((space_before, separator, after)) = trailer { + Token::Separated { + before: Box::new(before), + space_before, + separator, + after: Box::new(after), + } + } else { + before + } + }), + Atom::parse(fail::<_, (), _>).map(Self::Atom), + )), + )) + } +} + +impl<'a> Delimited<'a> { + fn parse + 'a>() -> impl Parser<&'a str, Self, E> { + use winnow::{combinator::*, prelude::*, token::*}; + + trace( + "delimited", + ( + opt(( + Atom::parse(alt(("(", "[", "{", "<", Separator::parse().value("")))), + Space::parse(), + )), + alt(( + literal('(').map(|_| literal(')').value(Delimiter::Paren)), + literal('[').map(|_| literal(']').value(Delimiter::Bracket)), + literal('{').map(|_| literal('}').value(Delimiter::Brace)), + literal('<').map(|_| literal('>').value(Delimiter::Angle)), + )) + .flat_map(|end| Segments::parse(end)), + ) + .map(|(prefix, (contents, delimiter))| Self { + prefix, + delimiter, + contents, + }), + ) + } +} + +impl<'a> Segments<'a> { + fn parse + 'a, End: 'a>( + end: impl Parser<&'a str, End, E> + 'a, + ) -> Box + 'a> { + use winnow::{combinator::*, prelude::*}; + + Box::new(trace( + "segments", + repeat_till(0.., Segment::parse(), (Space::parse(), end)).map( + |(segments, (trailing_space, end)): (Vec<_>, _)| { + ( + Self { + segments, + trailing_space, + }, + end, + ) + }, + ), + )) + } +} + +impl<'a> Number<'a> { + fn parse>() -> impl Parser<&'a str, Self, E> { + use winnow::{ascii::*, combinator::*, prelude::*, token::*}; + + trace( + "number", + ( + alt((float::<_, f64, _>.take(), dec_int::<_, i64, _>.take())), + peek(not(any::<&'a str, E>.verify(|x: &char| x.is_alphabetic()))), + ) + .map(|(i, _): (&str, _)| Self(i.into())), + ) + } +} + +impl<'a> Segment<'a> { + fn parse + 'a>() -> impl Parser<&'a str, Self, E> { + use winnow::prelude::*; + + trace( + "segment", + (Space::parse(), Token::parse()).map(|(leading_space, token)| Self { + leading_space, + token, + }), + ) + } +} + +/// Parses an input string (a log line) into an ast. +/// +/// This *should* never error. +/// Many tests ensure that arbitrary input can be parsed. +/// Even if non-structured or completely random. +/// The parser will gracefully accept such strings anyway, and just categorize them suboptimally. +pub fn parse_input<'a>(i: &'a str) -> Result, String> { + use winnow::combinator::eof; + Segments::parse(eof::<&str, winnow::error::EmptyError>) + .map(|(segments, _)| segments) + .parse(i) + .map_err(|e| e.to_string()) +} + +#[cfg(test)] +mod tests { + use insta::assert_debug_snapshot; + use winnow::Parser; + + use crate::{ + ast::{Path, Segments}, + parse_input, + }; + + fn parse_path_only<'a>(i: &'a str) -> Path<'a> { + Path::parse::().parse(i).unwrap() + } + + fn parse<'a>(input: &'a str) -> Segments<'a> { + parse_input(input).unwrap() + } + + #[test] + fn parse_path() { + assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs"#), @r#" + Path { + drive_excluding_colon: None, + segments: [ + PathSegment { + leading_separator: None, + segment: "tests", + }, + PathSegment { + leading_separator: Slash, + segment: "ui", + }, + PathSegment { + leading_separator: Slash, + segment: "impl-trait", + }, + ], + filename: FileName { + leading_separator: Slash, + segment: "unsized_coercion", + ext_excluding_dot: Some( + "rs", + ), + location: None, + }, + } + "#); + } + + #[test] + fn parse_path_with_file_line() { + assert_debug_snapshot!(parse_path_only(r#"tests/ui/impl-trait/unsized_coercion.rs:3:4"#), @r#" + Path { + drive_excluding_colon: None, + segments: [ + PathSegment { + leading_separator: None, + segment: "tests", + }, + PathSegment { + leading_separator: Slash, + segment: "ui", + }, + PathSegment { + leading_separator: Slash, + segment: "impl-trait", + }, + ], + filename: FileName { + leading_separator: Slash, + segment: "unsized_coercion", + ext_excluding_dot: Some( + "rs", + ), + location: Some( + FileLocation { + line: "3", + offset: Some( + "4", + ), + }, + ), + }, + } + "#); + } + + #[test] + fn parse_empty() { + assert_debug_snapshot!(parse(r#""#), @r#" + Segments { + segments: [], + trailing_space: Space( + "", + ), + } + "#) + } + + #[test] + fn parse_text() { + assert_debug_snapshot!(parse(r#"abc"#), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: Atom( + Text( + "abc", + ), + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#) + } + + #[test] + fn parse_boolean() { + assert_debug_snapshot!(parse(r#"true"#), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: True, + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r#"false"#), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: False, + }, + ], + trailing_space: Space( + "", + ), + } + "#); + } + + #[test] + fn parse_string() { + assert_debug_snapshot!(parse(r##""foo""##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "", + ty: Double, + contents: "foo", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"#"foo"#"##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "", + ty: Double, + contents: "foo", + num_hashtags: 1, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"r#"foo"#"##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "r", + ty: Double, + contents: "foo", + num_hashtags: 1, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"c"foo""##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "c", + ty: Double, + contents: "foo", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"b"foo""##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "b", + ty: Double, + contents: "foo", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"'a'"##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "", + ty: Single, + contents: "a", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"`b`"##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "", + ty: Backtick, + contents: "b", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"b'foo'"##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "b", + ty: Single, + contents: "foo", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + assert_debug_snapshot!(parse(r##"b`foo`"##), @r#" + Segments { + segments: [ + Segment { + leading_space: Space( + "", + ), + token: String( + AnyString { + prefix: "b", + ty: Backtick, + contents: "foo", + num_hashtags: 0, + suffix: "", + }, + ), + }, + ], + trailing_space: Space( + "", + ), + } + "#); + } +} diff --git a/logparse/src/proptesting.rs b/logparse/src/proptesting.rs new file mode 100644 index 0000000..816283c --- /dev/null +++ b/logparse/src/proptesting.rs @@ -0,0 +1,194 @@ +use super::ast::*; +use crate::parse_input; +use crate::{Config, into_spans}; +use proptest::prelude::*; +use proptest::proptest; + +proptest! { + #[test] + fn proptest_from_segments(original in Segments::arb(Token::arb())) { + let stringified = original.to_string(); + let parsed = parse_input(&stringified).unwrap(); + let stringified_again = parsed.to_string(); + assert_eq!(stringified, stringified_again, "parsed: `{parsed:#?}`"); + } + + #[test] + fn proptest_from_random_text(original in ".*") { + let parsed = parse_input(&original).unwrap(); + let stringified_again = parsed.to_string(); + assert_eq!(original, stringified_again, "parsed: `{parsed:#?}`"); + } + + #[test] + fn proptest_into_spans(original in Segments::arb(Token::arb())) { + let stringified = original.to_string(); + let spans = into_spans(original, Config {collapse_space: false}); + let spans_concatenated = spans.into_iter().map(|i| i.text).collect::(); + assert_eq!(stringified, spans_concatenated); + } +} + +impl AnyString<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + let prefix = "\\w*"; + let ty = any::(); + let contents = "\\w*"; + let num_hashtags = 0usize..3; + let suffix = "\\w*"; + + (prefix, ty, contents, num_hashtags, suffix).prop_map( + |(prefix, ty, contents, num_hashtags, suffix)| Self { + prefix: prefix.into(), + ty, + contents: contents.into(), + num_hashtags, + suffix: suffix.into(), + }, + ) + } +} + +impl PathSegment<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + (any::(), "\\w*").prop_map(|(leading_separator, segment)| Self { + leading_separator, + segment: segment.into(), + }) + } +} + +impl FileLocation<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + use proptest::option::*; + ("[0-9]{0,4}", of("[0-9]{0,4}")).prop_map(|(line, offset)| Self { + line: line.into(), + offset: offset.map(Into::into), + }) + } +} + +impl Space<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + " *".prop_map(|spaces| Self(spaces.into())) + } +} + +impl FileName<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + use proptest::option::*; + ( + any::(), + "\\w*", + of(".{0,3}"), + of(FileLocation::arb()), + ) + .prop_map( + |(leading_separator, segment, ext_excluding_dot, location)| Self { + leading_separator, + segment: segment.into(), + ext_excluding_dot: ext_excluding_dot.map(Into::into), + location, + }, + ) + } +} + +impl Path<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + use proptest::{char::*, collection::*, option::*}; + ( + of(range('A', 'Z')), + vec(PathSegment::arb(), 0..3), + FileName::arb(), + ) + .prop_map(|(drive_excluding_colon, segments, filename)| Self { + drive_excluding_colon, + segments, + filename, + }) + } +} + +impl Number<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + prop_oneof![ + any::().prop_map(|number| Self(number.to_string().into())), + any::().prop_map(|number| Self(number.to_string().into())) + ] + } +} + +impl Atom<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + "[a-zA-Z]+".prop_map(|i| Self::Text(i.into())) + } +} + +impl Token<'static> { + #[cfg(test)] + fn arb() -> impl Strategy { + let leaf = prop_oneof![ + Just(Self::True), + Just(Self::False), + Just(Self::None), + Path::arb().prop_map(Self::Path), + AnyString::arb().prop_map(Self::String), + Number::arb().prop_map(Self::Number), + Atom::arb().prop_map(Self::Atom), + ]; + + leaf.prop_recursive(4, 64, 16, |token| { + Delimited::arb(token).prop_map(Self::Delimited).boxed() + }) + } +} + +impl Delimited<'static> { + #[cfg(test)] + fn arb(token: impl Strategy>) -> impl Strategy { + use proptest::option::*; + ( + of((Atom::arb(), Space::arb())), + any::(), + Segments::arb(token), + ) + .prop_map(|(prefix, delimiter, contents)| Self { + prefix, + delimiter, + contents, + }) + } +} + +impl Segment<'static> { + #[cfg(test)] + fn arb(token: impl Strategy>) -> impl Strategy { + (Space::arb(), token).prop_map(|(leading_space, token)| Self { + leading_space, + token, + }) + } +} + +impl Segments<'static> { + #[cfg(test)] + fn arb(token: impl Strategy>) -> impl Strategy { + use proptest::collection::*; + + (vec(Segment::arb(token), 1..10), Space::arb()).prop_map(|(segments, trailing_space)| { + Self { + segments, + trailing_space, + } + }) + } +} diff --git a/logparse/src/spans.rs b/logparse/src/spans.rs new file mode 100644 index 0000000..59844a7 --- /dev/null +++ b/logparse/src/spans.rs @@ -0,0 +1,992 @@ +use super::ast::*; +use std::borrow::Cow; + +/// Text categories, based on the parsing. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum SpanKind { + /// Parentheses e.g. + /// + /// Stores the delimiter depth, for e.g. rainbow delimiters. + Delimiter(usize), + /// Separators like `=`, ':' and ',' + Separator, + /// Numbers + Number, + /// Known literals, like `true`, `false`, `None`, `Ok`, `Err` + Literal, + /// Strings + String, + /// Paths + Path, + /// Spaces (returns original number of spaces) + Space(usize), + + /// Constructor: the prefix of a delimited block. + /// i.e. `Some` in `Some(3)` + Constructor, + + /// String prefix, suffix, hashtags, etc + StringSurroundings, + + /// Any other text (the default) + Text, +} + +/// A `Span` is a piece of categorized text, based on the parsing done by +/// [`parse_input`](crate::parse_input). +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct Span<'a> { + /// The segment of text. + pub text: Cow<'a, str>, + /// Its category. + pub kind: SpanKind, +} + +/// Configuration options for [`into_spans`] +pub struct Config { + /// Turn sequences of more than 1 space into exactly 1 space. + pub collapse_space: bool, +} + +pub trait IntoSpans<'a>: private::IntoSpansImpl<'a> {} + +/// Turn an ast node into [`Span`]s. +pub fn into_spans<'a>(ast: impl IntoSpans<'a>, config: Config) -> Vec> { + let mut cx = private::Context { + config, + res: Vec::new(), + depth: 0, + }; + ast.into_spans(&mut cx); + cx.res +} + +mod private { + use super::*; + + pub struct Context<'a> { + pub config: Config, + pub res: Vec>, + pub depth: usize, + } + + impl<'a> Context<'a> { + fn push(&mut self, text: impl Into>, kind: SpanKind) { + self.res.push(Span { + text: text.into(), + kind, + }) + } + } + + pub trait IntoSpansImpl<'a> { + fn into_spans(self, cx: &mut Context<'a>); + } + + impl<'a, T> IntoSpans<'a> for T where T: IntoSpansImpl<'a> {} + + impl<'a> IntoSpansImpl<'a> for Separator { + fn into_spans(self, cx: &mut Context<'a>) { + match self { + Separator::Eq => cx.push("=", SpanKind::Separator), + Separator::Colon => cx.push(":", SpanKind::Separator), + Separator::DoubleColon => cx.push("::", SpanKind::Separator), + } + } + } + + impl<'a> IntoSpansImpl<'a> for QuoteType { + fn into_spans(self, cx: &mut Context<'a>) { + match self { + QuoteType::Single => cx.push("'", SpanKind::Separator), + QuoteType::Double => cx.push("\"", SpanKind::Separator), + QuoteType::Backtick => cx.push("`", SpanKind::Separator), + } + } + } + + impl<'a> IntoSpansImpl<'a> for AnyString<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + let Self { + prefix, + ty, + contents, + num_hashtags, + suffix, + } = self; + cx.push(prefix, SpanKind::StringSurroundings); + for _ in 0..num_hashtags { + cx.push("#", SpanKind::StringSurroundings) + } + + ty.into_spans(cx); + cx.push(contents, SpanKind::String); + ty.into_spans(cx); + + for _ in 0..num_hashtags { + cx.push("#", SpanKind::StringSurroundings) + } + cx.push(suffix, SpanKind::StringSurroundings); + } + } + + impl<'a> IntoSpansImpl<'a> for Path<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + cx.push(self.to_string(), SpanKind::Path) + } + } + + impl<'a> IntoSpansImpl<'a> for Number<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + cx.push(self.0, SpanKind::Number) + } + } + + impl<'a> IntoSpansImpl<'a> for Atom<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + match self { + Atom::Text(text) => cx.push(text, SpanKind::Text), + } + } + } + + impl<'a> IntoSpansImpl<'a> for Space<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + match self.0.len() { + 0 => {} + 1 => cx.push(self.0, SpanKind::Space(1)), + n if cx.config.collapse_space => cx.push(" ", SpanKind::Space(n)), + n => cx.push(self.0, SpanKind::Space(n)), + } + } + } + + impl<'a> IntoSpansImpl<'a> for Token<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + match self { + Token::True => cx.push("true", SpanKind::Literal), + Token::False => cx.push("false", SpanKind::Literal), + Token::None => cx.push("None", SpanKind::Literal), + Token::Path(path) => path.into_spans(cx), + Token::String(string) => string.into_spans(cx), + Token::Number(number) => number.into_spans(cx), + Token::Separated { + before, + space_before, + separator, + after, + } => { + before.into_spans(cx); + space_before.into_spans(cx); + separator.into_spans(cx); + after.into_spans(cx); + } + Token::Delimited(delimited) => { + delimited.into_spans(cx); + } + Token::Atom(atom) => { + atom.into_spans(cx); + } + } + } + } + + impl<'a> IntoSpansImpl<'a> for Segment<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + let Self { + leading_space, + token, + } = self; + leading_space.into_spans(cx); + token.into_spans(cx); + } + } + + impl<'a> IntoSpansImpl<'a> for Segments<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + let Self { + segments, + trailing_space, + } = self; + for segment in segments { + segment.into_spans(cx); + } + trailing_space.into_spans(cx); + } + } + + impl<'a> IntoSpansImpl<'a> for Delimited<'a> { + fn into_spans(self, cx: &mut Context<'a>) { + let Self { + prefix, + delimiter, + contents, + } = self; + + match prefix { + Some((Atom::Text(text), space)) => { + cx.push( + text, + match delimiter { + Delimiter::Bracket => SpanKind::Constructor, + Delimiter::Paren if space.0.is_empty() => SpanKind::Constructor, + Delimiter::Paren => SpanKind::Text, + Delimiter::Brace => SpanKind::Text, + Delimiter::Angle if space.0.is_empty() => SpanKind::Constructor, + Delimiter::Angle => SpanKind::Text, + }, + ); + space.into_spans(cx); + } + None => {} + } + + match delimiter { + Delimiter::Paren => cx.push("(", SpanKind::Delimiter(cx.depth)), + Delimiter::Bracket => cx.push("[", SpanKind::Delimiter(cx.depth)), + Delimiter::Brace => cx.push("{", SpanKind::Delimiter(cx.depth)), + Delimiter::Angle => cx.push("<", SpanKind::Delimiter(cx.depth)), + } + + cx.depth += 1; + contents.into_spans(cx); + cx.depth -= 1; + + match delimiter { + Delimiter::Paren => cx.push(")", SpanKind::Delimiter(cx.depth)), + Delimiter::Bracket => cx.push("]", SpanKind::Delimiter(cx.depth)), + Delimiter::Brace => cx.push("}", SpanKind::Delimiter(cx.depth)), + Delimiter::Angle => cx.push(">", SpanKind::Delimiter(cx.depth)), + } + } + } +} + +#[cfg(test)] +mod tests { + use insta::assert_debug_snapshot; + + use super::SpanKind; + use crate::{Config, into_spans, parse_input}; + + fn spans(input: &str) -> Vec<(String, SpanKind)> { + let res = parse_input(input).unwrap(); + into_spans( + res, + Config { + collapse_space: true, + }, + ) + .into_iter() + .map(|i| (i.text.into_owned(), i.kind)) + .collect() + } + + #[test] + fn spans_ex1() { + assert_debug_snapshot!(spans( + r#"def_id=DefId(0:3 ~ unsized_coercion[10fa]::Trait)"# + ), @r#" + [ + ( + "def_id", + Text, + ), + ( + "=", + Separator, + ), + ( + "DefId", + Constructor, + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "0", + Number, + ), + ( + ":", + Separator, + ), + ( + "3", + Number, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "~", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "unsized_coercion", + Constructor, + ), + ( + "[", + Delimiter( + 1, + ), + ), + ( + "10fa", + Text, + ), + ( + "]", + Delimiter( + 1, + ), + ), + ( + "::", + Separator, + ), + ( + "Trait", + Text, + ), + ( + ")", + Delimiter( + 0, + ), + ), + ] + "#) + } + + #[test] + fn spans_ex2() { + assert_debug_snapshot!(spans( + r#"data=TypeNs("MetaSized") visible_parent=DefId(2:3984 ~ core[bcc4]::marker) actual_parent=Some(DefId(2:3984 ~ core[bcc4]::marker))"# + ), @r#" + [ + ( + "data", + Text, + ), + ( + "=", + Separator, + ), + ( + "TypeNs", + Constructor, + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "", + StringSurroundings, + ), + ( + "\"", + Separator, + ), + ( + "MetaSized", + String, + ), + ( + "\"", + Separator, + ), + ( + "", + StringSurroundings, + ), + ( + ")", + Delimiter( + 0, + ), + ), + ( + " ", + Space( + 1, + ), + ), + ( + "visible_parent", + Text, + ), + ( + "=", + Separator, + ), + ( + "DefId", + Constructor, + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "2", + Number, + ), + ( + ":", + Separator, + ), + ( + "3984", + Number, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "~", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "core", + Constructor, + ), + ( + "[", + Delimiter( + 1, + ), + ), + ( + "bcc4", + Text, + ), + ( + "]", + Delimiter( + 1, + ), + ), + ( + "::", + Separator, + ), + ( + "marker", + Text, + ), + ( + ")", + Delimiter( + 0, + ), + ), + ( + " ", + Space( + 1, + ), + ), + ( + "actual_parent", + Text, + ), + ( + "=", + Separator, + ), + ( + "Some", + Constructor, + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "DefId", + Constructor, + ), + ( + "(", + Delimiter( + 1, + ), + ), + ( + "2", + Number, + ), + ( + ":", + Separator, + ), + ( + "3984", + Number, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "~", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "core", + Constructor, + ), + ( + "[", + Delimiter( + 2, + ), + ), + ( + "bcc4", + Text, + ), + ( + "]", + Delimiter( + 2, + ), + ), + ( + "::", + Separator, + ), + ( + "marker", + Text, + ), + ( + ")", + Delimiter( + 1, + ), + ), + ( + ")", + Delimiter( + 0, + ), + ), + ] + "#) + } + + #[test] + fn spans_ex3() { + assert_debug_snapshot!(spans( + r#"insert(DefId(0:4 ~ unsized_coercion[10fa]::{impl#0})): inserting TraitRef into specialization graph"# + ), @r#" + [ + ( + "insert", + Constructor, + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "DefId", + Constructor, + ), + ( + "(", + Delimiter( + 1, + ), + ), + ( + "0", + Number, + ), + ( + ":", + Separator, + ), + ( + "4", + Number, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "~", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "unsized_coercion", + Constructor, + ), + ( + "[", + Delimiter( + 2, + ), + ), + ( + "10fa", + Text, + ), + ( + "]", + Delimiter( + 2, + ), + ), + ( + "::", + Separator, + ), + ( + "{", + Delimiter( + 2, + ), + ), + ( + "impl#0", + Text, + ), + ( + "}", + Delimiter( + 2, + ), + ), + ( + ")", + Delimiter( + 1, + ), + ), + ( + ")", + Delimiter( + 0, + ), + ), + ( + ":", + Separator, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "inserting", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "TraitRef", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "<", + Delimiter( + 0, + ), + ), + ( + "u32", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "as", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "Trait", + Text, + ), + ( + ">", + Delimiter( + 0, + ), + ), + ( + " ", + Space( + 1, + ), + ), + ( + "into", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "specialization", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "graph", + Text, + ), + ] + "#) + } + + #[test] + fn spans_ex4() { + assert_debug_snapshot!(spans( + r#"inspecting def_id=DefId(3:662 ~ alloc[ef11]::boxed::Box) span=tests/ui/impl-trait/unsized_coercion.rs:12:15: 12:30 (#0)"# + ), @r##" + [ + ( + "inspecting", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "def_id", + Text, + ), + ( + "=", + Separator, + ), + ( + "DefId", + Constructor, + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "3", + Number, + ), + ( + ":", + Separator, + ), + ( + "662", + Number, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "~", + Text, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "alloc", + Constructor, + ), + ( + "[", + Delimiter( + 1, + ), + ), + ( + "ef11", + Text, + ), + ( + "]", + Delimiter( + 1, + ), + ), + ( + "::", + Separator, + ), + ( + "boxed", + Text, + ), + ( + "::", + Separator, + ), + ( + "Box", + Text, + ), + ( + ")", + Delimiter( + 0, + ), + ), + ( + " ", + Space( + 1, + ), + ), + ( + "span", + Text, + ), + ( + "=", + Separator, + ), + ( + "tests/ui/impl-trait/unsized_coercion.rs:12:15", + Path, + ), + ( + ":", + Separator, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "12", + Number, + ), + ( + ":", + Separator, + ), + ( + "30", + Number, + ), + ( + " ", + Space( + 1, + ), + ), + ( + "(", + Delimiter( + 0, + ), + ), + ( + "#0", + Text, + ), + ( + ")", + Delimiter( + 0, + ), + ), + ] + "##) + } +} diff --git a/proptest-regressions/format_debug_output.txt b/proptest-regressions/format_debug_output.txt new file mode 100644 index 0000000..a42ff27 --- /dev/null +++ b/proptest-regressions/format_debug_output.txt @@ -0,0 +1,9 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 255ec5cdb18e04e16d465fb94cbf15040fb1d5d704c53226ce3a2e9333c6de7c # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Path(Path { drive_excluding_colon: None, segments: [], filename: FileName { leading_separator: Slash, segment: "", ext_excluding_dot: None, location: None } }) }], trailing_space: Space("") } +cc 809c7a1b555b4e553cc8855c8a27de7a87e94c0d22b55ae9d7f6d4dff5f6cde0 # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: Text("a"), delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(" "), token: Path(Path { drive_excluding_colon: None, segments: [], filename: FileName { leading_separator: Slash, segment: "", ext_excluding_dot: Some("\\"), location: None } }) }, Segment { leading_space: Space(" "), token: True }], trailing_space: Space("") } }) }], trailing_space: Space("") } +cc 0b39a0bcfdada4d65ebbcbc80ed000c6b9cc1b60e7de6ffd004a7ee7db350d6c # shrinks to original = Segments { segments: [Segment { leading_space: Space(""), token: Delimited(Delimited { prefix: Text("A"), delimiter: Paren, contents: Segments { segments: [Segment { leading_space: Space(""), token: Path(Path { drive_excluding_colon: None, segments: [], filename: FileName { leading_separator: Slash, segment: "", ext_excluding_dot: Some(":"), location: None } }) }], trailing_space: Space(" ") } }) }], trailing_space: Space("") } diff --git a/proptest-regressions/format_debug_output/proptesting.txt b/proptest-regressions/format_debug_output/proptesting.txt new file mode 100644 index 0000000..3af969d --- /dev/null +++ b/proptest-regressions/format_debug_output/proptesting.txt @@ -0,0 +1,9 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 1dac24f74cdeb63f61d662876f276058bc71481f1df552aeea1293e22b682d59 # shrinks to original = "¡{}" +cc f7a17a233c11246ea8182505c41e30dc2a2f1c9020d5108f95403eb2de179fac # shrinks to original = ")" +cc 7bfbe4d3505dc0e94e5b87ae86e4b4554d9af477f4d0770161c2403ce39627f3 # shrinks to original = "!/\t" diff --git a/src/tui/mod.rs b/src/tui/mod.rs index 984cdfd..17e48e9 100644 --- a/src/tui/mod.rs +++ b/src/tui/mod.rs @@ -5,7 +5,7 @@ use crossterm::{ }, terminal::{EnterAlternateScreen, LeaveAlternateScreen}, }; -use ratatui_themes::{Theme, ThemeName}; +use ratatui_themes::{Color, Theme, ThemeName}; use std::{ fs::{self, DirEntry}, io::{self, Stdout}, @@ -493,6 +493,7 @@ impl App { fn styles(&self) -> Styles { let palette = self.theme.palette(); + let default = Style::new().fg(palette.fg).bg(palette.bg); let highlighted = Style::new().fg(palette.accent).bg(palette.selection); let border = Style::new().fg(palette.fg).bg(palette.bg); @@ -505,6 +506,11 @@ impl App { border, border_highlighted, error, + + literal: palette.secondary, + faded: palette.muted, + delimiter: palette.accent, + string: palette.accent, } } @@ -529,6 +535,11 @@ pub struct Styles { border: Style, border_highlighted: Style, error: Style, + + string: Color, + literal: Color, + faded: Color, + delimiter: Color, } impl Widget for &mut App { diff --git a/src/tui/widgets/items.rs b/src/tui/widgets/items.rs index f655e7e..1313040 100644 --- a/src/tui/widgets/items.rs +++ b/src/tui/widgets/items.rs @@ -79,7 +79,7 @@ impl Widget for Styled<'_, &Items<'_>> { }; let line_text = entry.line_text(prefix, self.filters); - let mut line = line_text.styled(&self.styles); + let mut line = line_text.styled(self.styles); if idx == self.selected_offset && let InputState::None | InputState::Target(InputTarget::This) diff --git a/src/tui/widgets/line_text.rs b/src/tui/widgets/line_text.rs index a16ea30..ee8bd02 100644 --- a/src/tui/widgets/line_text.rs +++ b/src/tui/widgets/line_text.rs @@ -1,7 +1,11 @@ +use std::borrow::Cow; + use ratatui::text::{Line, Span, Text}; use crate::tui::widgets::styled::Styled; +use logparse::{self as lp, Config, SpanKind, into_spans, parse_input}; +#[derive(Debug)] pub enum Highlighted { None, All, @@ -37,6 +41,124 @@ impl LineText { } } +#[derive(Debug)] +pub struct HighlightedSpan<'a> { + span: lp::Span<'a>, + highlighted: bool, +} + +fn cow_split_at<'a>(inp: Cow<'a, str>, offset: usize) -> (Cow<'a, str>, Cow<'a, str>) { + match inp { + Cow::Borrowed(t) => { + let (a, b) = t.split_at(offset); + (Cow::Borrowed(a), Cow::Borrowed(b)) + } + Cow::Owned(t) => { + let (a, b) = t.split_at(offset); + (Cow::Owned(a.to_string()), Cow::Owned(b.to_string())) + } + } +} + +fn span_len(span: &lp::Span<'_>) -> usize { + match span.kind { + SpanKind::Space(n) => n, + _ => span.text.len(), + } +} + +fn highlight_spans<'a>( + i: impl Iterator>, + start: usize, + end: usize, +) -> impl Iterator> { + let mut curr_offset = 0; + + i.flat_map(move |span| { + // entirely before + if curr_offset + span.text.len() <= start { + curr_offset += span_len(&span); + return vec![HighlightedSpan { + span, + highlighted: false, + }]; + } + + let lp::Span { text, kind } = span; + + let mut res = Vec::new(); + + // over the start, push what's before and continue + let text = if curr_offset < start && curr_offset + text.len() > start { + let until_start = start - curr_offset; + let (before, after) = cow_split_at(text, until_start); + + let before_span = lp::Span { kind, text: before }; + + let l = span_len(&before_span); + if l != 0 { + curr_offset += l; + + res.push(HighlightedSpan { + span: before_span, + highlighted: false, + }); + } + + after + } else { + text + }; + + if text.is_empty() { + return res; + } + + // entirely within + if curr_offset + text.len() <= end { + let span = lp::Span { kind, text }; + curr_offset += span_len(&span); + res.push(HighlightedSpan { + span, + highlighted: true, + }); + return res; + } + + // over the end boundary? + let text = if curr_offset < end && curr_offset + text.len() > end { + let until_start = end - curr_offset; + let (before, after) = cow_split_at(text, until_start); + + let before_span = lp::Span { kind, text: before }; + + curr_offset += span_len(&before_span); + res.push(HighlightedSpan { + span: before_span, + highlighted: true, + }); + + after + } else { + text + }; + + if text.is_empty() { + return res; + } + + let after = lp::Span { kind, text }; + + curr_offset += span_len(&after); + res.push(HighlightedSpan { + span: after, + highlighted: false, + }); + + res + }) +} + impl Into> for Styled<'_, LineText> { fn into(self) -> Line<'static> { let mut spans = Vec::new(); @@ -45,19 +167,71 @@ impl Into> for Styled<'_, LineText> { spans.push(Span::from("┃ ")); spans.push(Span::from(self.inner.tree)); - match self.inner.highlighted { - Highlighted::None => { - spans.push(Span::from(self.inner.message).style(self.styles.default)) - } - Highlighted::All => { - spans.push(Span::from(self.inner.message).style(self.styles.highlighted)) - } - Highlighted::Range { from, to } => spans.extend_from_slice(&[ - Span::from(self.inner.message[..from].to_string()).style(self.styles.default), - Span::from(self.inner.message[from..to].to_string()).style(self.styles.highlighted), - Span::from(self.inner.message[to..].to_string()).style(self.styles.default), - ]), - }; + spans.extend(if let Ok(parsed) = parse_input(&self.inner.message) { + let spans = into_spans( + parsed, + Config { + collapse_space: true, + }, + ); + + let spans: Vec<_> = match self.inner.highlighted { + Highlighted::None => spans + .into_iter() + .map(|span| HighlightedSpan { + span, + highlighted: false, + }) + .collect(), + Highlighted::All => spans + .into_iter() + .map(|span| HighlightedSpan { + span, + highlighted: true, + }) + .collect(), + Highlighted::Range { from, to } => { + highlight_spans(spans.into_iter(), from, to).collect() + } + }; + + let spans: Vec<_> = spans + .into_iter() + .map( + |HighlightedSpan { + span: lp::Span { text, kind }, + highlighted, + }| { + let span = Span::from(text.into_owned()); + + let style = if highlighted { + self.styles.highlighted + } else { + self.styles.default + }; + + let style = match kind { + SpanKind::Delimiter(_) => style.fg(self.styles.delimiter).bold(), + SpanKind::Separator => style.fg(self.styles.faded), + SpanKind::Number => style.fg(self.styles.literal), + SpanKind::Literal => style.fg(self.styles.literal).dim(), + SpanKind::String => style.fg(self.styles.string), + SpanKind::Path => style.fg(self.styles.literal).underlined(), + SpanKind::Space(_) => style, + SpanKind::Constructor => style.fg(self.styles.literal), + SpanKind::StringSurroundings => style.fg(self.styles.faded), + SpanKind::Text => style, + }; + + span.style(style) + }, + ) + .collect(); + + spans + } else { + vec![Span::from(self.inner.message).style(self.styles.default)] + }); Line::from(spans) }