docs and line/offset numbers in paths

This commit is contained in:
Jana Dönszelmann 2026-04-02 09:41:35 +02:00
parent af09bcd403
commit 2d9a029130
No known key found for this signature in database
9 changed files with 366 additions and 116 deletions

2
Cargo.lock generated
View file

@ -821,7 +821,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]] [[package]]
name = "logparse" name = "logparse"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"insta", "insta",
"proptest", "proptest",

View file

@ -1,6 +1,6 @@
[package] [package]
name = "logparse" name = "logparse"
version = "0.1.0" version = "0.1.1"
edition = "2024" edition = "2024"
description = "parse arbitrary messages containing rust-like debug output to syntax highlight them" description = "parse arbitrary messages containing rust-like debug output to syntax highlight them"
authors = ["Jana Dönszelmann <cratesio@donsz.nl>"] authors = ["Jana Dönszelmann <cratesio@donsz.nl>"]
@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0"
documentation = "https://docs.rs/logparse" documentation = "https://docs.rs/logparse"
homepage = "https://git.donsz.nl/jana/logviewer" homepage = "https://git.donsz.nl/jana/logviewer"
repository = "https://git.donsz.nl/jana/logviewer" repository = "https://git.donsz.nl/jana/logviewer"
readme = "README.md"
[dependencies] [dependencies]
winnow = {version="1", features=["parser"]} winnow = {version="1", features=["parser"]}

16
logparse/README.md Normal file
View file

@ -0,0 +1,16 @@
# Logparse
This crate makes a best-effort parsing for arbitrary text that may contain rust-like debug output.
This is intended for logs, that contain rust's debug output, and that you might want to highlight opposing parentheses, brackets, etc in.
Log messages, when parsed, can *always* be pretty printed back into their textual form perfectly.
The syntax tree unambiguously records any spaces in the source text.
There are tests that ensure this property for a very large number of inputs.
In addition, *any* log message can be parsed. `parse_input` returns a result,
but as far as I'm aware,
the parser is flexible enough to *never* reject,
and always falls back to categorizing text as *just* text.
There are also property tests to ensure this.

View file

@ -1,21 +1,27 @@
use proptest_derive::Arbitrary; use proptest_derive::Arbitrary;
use std::borrow::Cow; use std::borrow::Cow;
/// See [`Token::Separated`].
#[derive(Copy, Clone, Debug, PartialEq)] #[derive(Copy, Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum Separator { pub enum Separator {
Eq, Eq,
Colon, Colon,
DoubleColon, DoubleColon,
} }
/// See [`Token::String`].
#[derive(Copy, Clone, Debug, Arbitrary, PartialEq)] #[derive(Copy, Clone, Debug, Arbitrary, PartialEq)]
#[allow(missing_docs)]
pub enum QuoteType { pub enum QuoteType {
Single, Single,
Double, Double,
Backtick, Backtick,
} }
/// See [`Token::Delimited`].
#[derive(Clone, Debug, Arbitrary, PartialEq)] #[derive(Clone, Debug, Arbitrary, PartialEq)]
#[allow(missing_docs)]
pub enum Delimiter { pub enum Delimiter {
Paren, Paren,
Bracket, Bracket,
@ -23,19 +29,28 @@ pub enum Delimiter {
Angle, Angle,
} }
/// See [`Token::String`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct AnyString<'a> { pub struct AnyString<'a> {
/// Any text immediately preceeding the string, like the `f` prefix in `f"mrrp"`.
pub prefix: Cow<'a, str>, pub prefix: Cow<'a, str>,
/// The kind of string quote used.
pub ty: QuoteType, pub ty: QuoteType,
/// The string's text.
pub contents: Cow<'a, str>, pub contents: Cow<'a, str>,
/// How may hashtags were used around the string.
/// Stings have to open and close with an equal number of hashtags.
pub num_hashtags: usize, pub num_hashtags: usize,
/// Any text immediately following the string.
pub suffix: Cow<'a, str>, pub suffix: Cow<'a, str>,
} }
/// Any sequence of 0 or more spaces.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Space<'a>(pub Cow<'a, str>); pub struct Space<'a>(pub Cow<'a, str>);
#[derive(Copy, Clone, Debug, PartialEq, Arbitrary)] #[derive(Copy, Clone, Debug, PartialEq, Arbitrary)]
#[allow(missing_docs)]
pub enum PathSep { pub enum PathSep {
/// Happens at the start of paths, for the no leading / case /// Happens at the start of paths, for the no leading / case
None, None,
@ -43,19 +58,26 @@ pub enum PathSep {
Backslash, Backslash,
} }
/// A segment of a path, with a leading separator.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct PathSegment<'a> { pub struct PathSegment<'a> {
pub leading_separator: PathSep, pub leading_separator: PathSep,
pub segment: Cow<'a, str>, pub segment: Cow<'a, str>,
} }
/// See [`Path`]. This is the `:3:4` part in `mrrp.rs:3:4`.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct FileLocation<'a> { pub struct FileLocation<'a> {
/// Line number.
pub line: Cow<'a, str>, pub line: Cow<'a, str>,
/// Optionally, a column offset.
pub offset: Option<Cow<'a, str>>, pub offset: Option<Cow<'a, str>>,
} }
/// See [`Path`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct FileName<'a> { pub struct FileName<'a> {
pub leading_separator: PathSep, pub leading_separator: PathSep,
pub segment: Cow<'a, str>, pub segment: Cow<'a, str>,
@ -63,14 +85,18 @@ pub struct FileName<'a> {
pub location: Option<FileLocation<'a>>, pub location: Option<FileLocation<'a>>,
} }
/// See [`Token::Path`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct Path<'a> { pub struct Path<'a> {
pub drive_excluding_colon: Option<char>, pub drive_excluding_colon: Option<char>,
pub segments: Vec<PathSegment<'a>>, pub segments: Vec<PathSegment<'a>>,
/// A filename: the last path segment, optionally with an extension and file/line number.
pub filename: FileName<'a>, pub filename: FileName<'a>,
} }
/// See [`Token::Number`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Number<'a>(pub Cow<'a, str>); pub struct Number<'a>(pub Cow<'a, str>);
@ -78,46 +104,222 @@ pub struct Number<'a>(pub Cow<'a, str>);
/// i.e. an english word, or rust `::`-separated Path /// i.e. an english word, or rust `::`-separated Path
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Atom<'a> { pub enum Atom<'a> {
/// Raw text
Text(Cow<'a, str>), Text(Cow<'a, str>),
} }
/// The main AST node: each of these are treated as one unit, that may be separated by spaces.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Token<'a> { pub enum Token<'a> {
/// A literal `true`.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_true, Token::True);
/// assert!(is_true("true"));
///
/// // some counterexamples
/// assert!(!is_true("14"));
/// assert!(!is_true("`mrow!`"));
/// assert!(!is_true("false"));
/// ```
True, True,
/// A literal `false`.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_false, Token::False);
/// assert!(is_false("false"));
///
/// // some counterexamples
/// assert!(!is_false("14"));
/// assert!(!is_false("`mrow!`"));
/// assert!(!is_false("true"));
/// ```
False, False,
/// A literal `None`.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_none, Token::None);
/// assert!(is_none("None"));
///
/// // some counterexamples
/// assert!(!is_none("14"));
/// assert!(!is_none("`mrow!`"));
/// assert!(!is_none("true"));
/// ```
None, None,
/// A path, anything that looks vaguely path-like.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_path, Token::Path(_));
/// assert!(is_path("/"));
/// assert!(is_path("/home"));
/// assert!(is_path("C:/Users"));
/// assert!(is_path("C:\\Users"));
/// assert!(is_path("/home/some_path/file-some_where.rs"));
/// assert!(is_path("/home/some_path/file-and-line.rs:3"));
/// assert!(is_path("/home/some_path/file-and-line.rs:3:4"));
///
/// // some counterexamples
/// assert!(!is_path(":14"));
/// assert!(!is_path(":14:15"));
/// assert!(!is_path("14"));
/// assert!(!is_path("`mrow!`"));
/// assert!(!is_path("true"));
/// ```
Path(Path<'a>), Path(Path<'a>),
/// A string literal, with quite flexible rules on what's considered a string.
/// Any rust string literal parses, and a little more.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_string, Token::String(_));
/// assert!(is_string(r##""mrrp""##));
/// assert!(is_string(r##"'mrrp'"##));
/// assert!(is_string(r##"`mrrp`"##));
/// assert!(is_string(r##"b"mrrp""##));
/// assert!(is_string(r##"f"mrrp""##));
/// assert!(is_string(r##"c"mrrp""##));
/// assert!(is_string(r##"r"mrrp""##));
/// assert!(is_string(r##"r#"mrrp"#"##));
/// assert!(is_string(r##"b#"mrrp"#"##));
/// assert!(is_string(r####"anyprefix###"mrrp"###anysuffix"####));
///
/// // some counterexamples
/// assert!(!is_string("14"));
/// assert!(!is_string("true"));
/// assert!(!is_string("/home"));
///
/// // non-matching hashtags
/// assert!(!is_string(r#"r#"mrrp""#));
/// assert!(!is_string(r###"r#"mrrp"##"###));
/// assert!(!is_string(r##"r"mrrp"#"##));
///
/// // non-matching quotes
/// assert!(!is_string(r#"`mrrp""#));
/// assert!(!is_string(r#"`mrrp'"#));
/// ```
String(AnyString<'a>), String(AnyString<'a>),
/// A number, float or int.
/// There must not be any alphabetic character after the number, without a space inbetween.
/// That's to guard against finding numbers inside hashes for example.
///
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_number, Token::Number(_));
/// assert!(is_number("1"));
/// assert!(is_number("99999999"));
/// assert!(is_number("1.5"));
/// assert!(is_number("1e10"));
/// assert!(is_number("-1"));
/// assert!(is_number("-1.5"));
///
/// // some counterexamples
/// assert!(!is_number("`mrow!`"));
/// assert!(!is_number("true"));
///
/// // suffix
/// assert!(!is_number("14a"));
/// ```
Number(Number<'a>), Number(Number<'a>),
// TODO: RustPath // TODO: RustPath
/// Any token, separated by a [`Separator`], followed by another segment.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_sep, Token::Separated {..});
/// assert!(is_sep("a = 3"));
/// assert!(is_sep("a::b::c"));
/// assert!(is_sep("a: 5"));
///
/// // some counterexamples
/// assert!(!is_sep("`mrow!`"));
/// assert!(!is_sep("true"));
/// assert!(!is_sep("14"));
/// ```
Separated { Separated {
/// The part before the separator.
before: Box<Token<'a>>, before: Box<Token<'a>>,
/// The space between the `before` part, and the separator.
space_before: Space<'a>, space_before: Space<'a>,
/// The separator itself.
separator: Separator, separator: Separator,
/// The segment after the separator.
after: Box<Segment<'a>>, after: Box<Segment<'a>>,
}, },
/// A segment, delimited by parentheses, braces or brackets, with an optional prefix.
/// The prefix will be classified as a constructor if the delimiter is braces or parentheses.
///
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_delim, Token::Delimited(_));
/// assert!(is_delim("()"));
/// assert!(is_delim("[]"));
/// assert!(is_delim("{}"));
/// assert!(is_delim("(1)"));
/// assert!(is_delim("[1]"));
/// assert!(is_delim("{1}"));
/// assert!(is_delim("{/home/mrrp.rs}"));
/// assert!(is_delim("constructor{/home/mrrp.rs}"));
/// assert!(is_delim("constructor {/home/mrrp.rs}"));
///
/// // some counterexamples
/// assert!(!is_delim("`mrow!`"));
/// assert!(!is_delim("true"));
/// assert!(!is_delim("14"));
/// ```
Delimited(Delimited<'a>), Delimited(Delimited<'a>),
/// Any other text, that couldn't otherwise be categorized.
/// For example:
///
/// ```rust
/// # logparse::generate_ast_recognizer!(is_atom, Token::Atom(_));
/// assert!(is_atom("mrrp"));
///
/// // some counterexamples
/// assert!(!is_atom("`mrow!`"));
/// assert!(!is_atom("true"));
/// assert!(!is_atom("14"));
/// ```
Atom(Atom<'a>), Atom(Atom<'a>),
} }
/// See [`Token::Delimited`].
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Delimited<'a> { pub struct Delimited<'a> {
pub prefix: Option<Atom<'a>>, /// Delimiter prefix, i.e. the constructor part in `Some(mrrp)`.
pub prefix: Option<(Atom<'a>, Space<'a>)>,
/// The kind of delimiter itself.
pub delimiter: Delimiter, pub delimiter: Delimiter,
/// The contents of the delimited segment.
pub contents: Segments<'a>, pub contents: Segments<'a>,
} }
/// A Segment of text, with optional leading space.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Segment<'a> { pub struct Segment<'a> {
/// Any spaces (or empty) before this segment.
pub leading_space: Space<'a>, pub leading_space: Space<'a>,
/// The segment's contents itself.
pub token: Token<'a>, pub token: Token<'a>,
} }
/// Segments, with possible trailing space.
///
/// Any input is split up into many segments, with their surrounding spaces.
/// Each segment might be a meaningless atom, or may be recognized to have more meaning.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Segments<'a> { pub struct Segments<'a> {
/// The segments themselves.
pub segments: Vec<Segment<'a>>, pub segments: Vec<Segment<'a>>,
/// Any spaces (or empty) after this sequence of segments.
pub trailing_space: Space<'a>, pub trailing_space: Space<'a>,
} }

View file

@ -161,8 +161,9 @@ impl<'a> Display for Token<'a> {
impl<'a> Display for Delimited<'a> { impl<'a> Display for Delimited<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(prefix) = &self.prefix { if let Some((prefix, space)) = &self.prefix {
write!(f, "{prefix}")?; write!(f, "{prefix}")?;
write!(f, "{space}")?;
} }
self.delimiter.fmt_start(f)?; self.delimiter.fmt_start(f)?;
write!(f, "{}", self.contents)?; write!(f, "{}", self.contents)?;

View file

@ -1,10 +1,44 @@
#![deny(missing_docs)]
#![deny(warnings)]
#![doc=include_str!("../README.md")]
/// The structure of parsed log lines
pub mod ast; pub mod ast;
mod display; mod display;
mod parse; mod parse;
mod spans; mod spans;
#[doc(hidden)]
#[macro_export]
macro_rules! generate_ast_recognizer {
($name: ident, $pattern: pat) => {
fn $name(s: &str) -> bool {
use logparse::{ast::*, *};
let l = parse_input(s).unwrap();
if !l.trailing_space.0.is_empty() {
eprintln!("trailing space {l:?}");
return false;
}
if l.segments.len() != 1 {
eprintln!("more segments {l:?}");
return false;
}
if !l.segments[0].leading_space.0.is_empty() {
eprintln!("leading space {l:?}");
return false;
}
if let $pattern = &l.segments[0].token {
true
} else {
eprintln!("pattern {l:?}");
false
}
}
};
}
#[cfg(test)] #[cfg(test)]
mod proptesting; mod proptesting;
pub use parse::parse_input; pub use parse::parse_input;
pub use spans::{Config, Kind as SpanKind, Span, into_spans}; pub use spans::{Config, Span, SpanKind, into_spans};

View file

@ -97,8 +97,25 @@ impl Separator {
} }
} }
impl<'a> FileLocation<'a> {
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
use winnow::{ascii::dec_uint, combinator::*, prelude::*};
let colon_number = || {
(":".value(()), dec_uint::<_, u64, _>.take())
.map(|(_, number): (_, &str)| Cow::Borrowed(number))
};
let line_offset = (colon_number(), opt(colon_number()));
trace(
"file location",
line_offset.map(|(line, offset)| FileLocation { line, offset }),
)
}
}
impl<'a> FileName<'a> { impl<'a> FileName<'a> {
fn parse(segment: PathSegment<'a>) -> Self { fn parse(segment: PathSegment<'a>, location: Option<FileLocation<'a>>) -> Self {
fn rsplit<'a>( fn rsplit<'a>(
input: Cow<'a, str>, input: Cow<'a, str>,
delimiter: char, delimiter: char,
@ -113,34 +130,11 @@ impl<'a> FileName<'a> {
} }
} }
let (rest, location) = if let Some((before, offset)) = rsplit(segment.segment.clone(), ':')
{
if let Some((before, line)) = rsplit(before.clone(), ':') {
(
before,
Some(FileLocation {
line,
offset: Some(offset),
}),
)
} else {
(
before,
Some(FileLocation {
line: offset,
offset: None,
}),
)
}
} else {
(segment.segment, None)
};
let (new_segment, ext_excluding_dot) = let (new_segment, ext_excluding_dot) =
if let Some((segment, ext_excluding_dot)) = rsplit(rest.clone(), '.') { if let Some((segment, ext_excluding_dot)) = rsplit(segment.segment.clone(), '.') {
(segment, Some(ext_excluding_dot)) (segment, Some(ext_excluding_dot))
} else { } else {
(rest, None) (segment.segment, None)
}; };
Self { Self {
@ -202,12 +196,13 @@ impl<'a> Path<'a> {
opt_sep_and_next, opt_sep_and_next,
repeat_till(0.., sep_and_next, peek(terminator())) repeat_till(0.., sep_and_next, peek(terminator()))
.map(|(segments, _): (Vec<PathSegment>, _)| segments), .map(|(segments, _): (Vec<PathSegment>, _)| segments),
opt(FileLocation::parse()),
); );
trace( trace(
"path", "path",
drive_and_segments drive_and_segments
.map(|(drive, segment, segments)| { .map(|(drive, segment, segments, location)| {
let (segments, last) = { let (segments, last) = {
let mut segments = segments; let mut segments = segments;
segments.insert(0, segment); segments.insert(0, segment);
@ -215,7 +210,7 @@ impl<'a> Path<'a> {
(segments, last) (segments, last)
}; };
let filename = FileName::parse(last); let filename = FileName::parse(last, location);
Self { Self {
drive_excluding_colon: drive, drive_excluding_colon: drive,
@ -225,7 +220,6 @@ impl<'a> Path<'a> {
}) })
.verify(|i| { .verify(|i| {
!i.segments.is_empty() !i.segments.is_empty()
|| i.drive_excluding_colon.is_some()
|| i.filename.ext_excluding_dot.is_some() || i.filename.ext_excluding_dot.is_some()
|| !matches!(i.filename.leading_separator, PathSep::None) || !matches!(i.filename.leading_separator, PathSep::None)
}), }),
@ -266,8 +260,8 @@ impl<'a> Token<'a> {
"true".value(Self::True), "true".value(Self::True),
"false".value(Self::False), "false".value(Self::False),
"None".value(Self::None), "None".value(Self::None),
Path::parse().map(Self::Path),
Number::parse().map(Self::Number), Number::parse().map(Self::Number),
Path::parse().map(Self::Path),
AnyString::parse().map(Self::String), AnyString::parse().map(Self::String),
delimited, delimited,
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}"))).map(Self::Atom), Atom::parse(alt((Separator::parse().value(""), ")", "]", "}"))).map(Self::Atom),
@ -318,12 +312,10 @@ impl<'a> Delimited<'a> {
trace( trace(
"delimited", "delimited",
( (
opt(Atom::parse(alt(( opt((
"(", Atom::parse(alt(("(", "[", "{", Separator::parse().value("")))),
"[", Space::parse(),
"{", )),
Separator::parse().value(""),
)))),
alt(( alt((
literal('(').map(|_| literal(')').value(Delimiter::Paren)), literal('(').map(|_| literal(')').value(Delimiter::Paren)),
literal('[').map(|_| literal(']').value(Delimiter::Bracket)), literal('[').map(|_| literal(']').value(Delimiter::Bracket)),
@ -392,6 +384,12 @@ impl<'a> Segment<'a> {
} }
} }
/// Parses an input string (a log line) into an ast.
///
/// This *should* never error.
/// Many tests ensure that arbitrary input can be parsed.
/// Even if non-structured or completely random.
/// The parser will gracefully accept such strings anyway, and just categorize them suboptimally.
pub fn parse_input<'a>(i: &'a str) -> Result<Segments<'a>, String> { pub fn parse_input<'a>(i: &'a str) -> Result<Segments<'a>, String> {
use winnow::combinator::eof; use winnow::combinator::eof;
Segments::parse(eof::<&str, winnow::error::EmptyError>) Segments::parse(eof::<&str, winnow::error::EmptyError>)
@ -405,7 +403,7 @@ mod tests {
use insta::assert_debug_snapshot; use insta::assert_debug_snapshot;
use winnow::Parser; use winnow::Parser;
use crate::format_debug_output::{ use crate::{
ast::{Path, Segments}, ast::{Path, Segments},
parse_input, parse_input,
}; };

View file

@ -1,6 +1,6 @@
use super::ast::*; use super::ast::*;
use crate::format_debug_output::parse_input; use crate::parse_input;
use crate::format_debug_output::{Config, into_spans}; use crate::{Config, into_spans};
use proptest::prelude::*; use proptest::prelude::*;
use proptest::proptest; use proptest::proptest;
@ -156,13 +156,16 @@ impl Delimited<'static> {
#[cfg(test)] #[cfg(test)]
fn arb(token: impl Strategy<Value = Token<'static>>) -> impl Strategy<Value = Self> { fn arb(token: impl Strategy<Value = Token<'static>>) -> impl Strategy<Value = Self> {
use proptest::option::*; use proptest::option::*;
(of(Atom::arb()), any::<Delimiter>(), Segments::arb(token)).prop_map( (
|(prefix, delimiter, contents)| Self { of((Atom::arb(), Space::arb())),
any::<Delimiter>(),
Segments::arb(token),
)
.prop_map(|(prefix, delimiter, contents)| Self {
prefix, prefix,
delimiter, delimiter,
contents, contents,
}, })
)
} }
} }

View file

@ -1,8 +1,9 @@
use super::ast::*; use super::ast::*;
use std::borrow::Cow; use std::borrow::Cow;
/// Text categories, based on the parsing.
#[derive(Copy, Clone, PartialEq, Eq, Debug)] #[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum Kind { pub enum SpanKind {
/// Parentheses e.g. /// Parentheses e.g.
/// ///
/// Stores the delimiter depth, for e.g. rainbow delimiters. /// Stores the delimiter depth, for e.g. rainbow delimiters.
@ -31,35 +32,27 @@ pub enum Kind {
Text, Text,
} }
/// A `Span` is a piece of categorized text, based on the parsing done by
/// [`parse_input`](crate::parse_input).
#[derive(Clone, PartialEq, Eq, Debug)] #[derive(Clone, PartialEq, Eq, Debug)]
pub struct Span<'a> { pub struct Span<'a> {
/// The segment of text.
pub text: Cow<'a, str>, pub text: Cow<'a, str>,
pub kind: Kind, /// Its category.
pub kind: SpanKind,
} }
/// Configuration options for [`into_spans`]
pub struct Config { pub struct Config {
/// Turn sequences of more than 1 space into exactly 1 space.
pub collapse_space: bool, pub collapse_space: bool,
} }
pub struct Context<'a> {
config: Config,
res: Vec<Span<'a>>,
depth: usize,
}
impl<'a> Context<'a> {
pub fn push(&mut self, text: impl Into<Cow<'a, str>>, kind: Kind) {
self.res.push(Span {
text: text.into(),
kind,
})
}
}
pub trait IntoSpans<'a>: private::IntoSpansImpl<'a> {} pub trait IntoSpans<'a>: private::IntoSpansImpl<'a> {}
/// Turn an ast node into [`Span`]s.
pub fn into_spans<'a>(ast: impl IntoSpans<'a>, config: Config) -> Vec<Span<'a>> { pub fn into_spans<'a>(ast: impl IntoSpans<'a>, config: Config) -> Vec<Span<'a>> {
let mut cx = Context { let mut cx = private::Context {
config, config,
res: Vec::new(), res: Vec::new(),
depth: 0, depth: 0,
@ -71,6 +64,21 @@ pub fn into_spans<'a>(ast: impl IntoSpans<'a>, config: Config) -> Vec<Span<'a>>
mod private { mod private {
use super::*; use super::*;
pub struct Context<'a> {
pub config: Config,
pub res: Vec<Span<'a>>,
pub depth: usize,
}
impl<'a> Context<'a> {
fn push(&mut self, text: impl Into<Cow<'a, str>>, kind: SpanKind) {
self.res.push(Span {
text: text.into(),
kind,
})
}
}
pub trait IntoSpansImpl<'a> { pub trait IntoSpansImpl<'a> {
fn into_spans(self, cx: &mut Context<'a>); fn into_spans(self, cx: &mut Context<'a>);
} }
@ -80,9 +88,9 @@ mod private {
impl<'a> IntoSpansImpl<'a> for Separator { impl<'a> IntoSpansImpl<'a> for Separator {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
match self { match self {
Separator::Eq => cx.push("=", Kind::Separator), Separator::Eq => cx.push("=", SpanKind::Separator),
Separator::Colon => cx.push(":", Kind::Separator), Separator::Colon => cx.push(":", SpanKind::Separator),
Separator::DoubleColon => cx.push("::", Kind::Separator), Separator::DoubleColon => cx.push("::", SpanKind::Separator),
} }
} }
} }
@ -90,9 +98,9 @@ mod private {
impl<'a> IntoSpansImpl<'a> for QuoteType { impl<'a> IntoSpansImpl<'a> for QuoteType {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
match self { match self {
QuoteType::Single => cx.push("'", Kind::Separator), QuoteType::Single => cx.push("'", SpanKind::Separator),
QuoteType::Double => cx.push("\"", Kind::Separator), QuoteType::Double => cx.push("\"", SpanKind::Separator),
QuoteType::Backtick => cx.push("`", Kind::Separator), QuoteType::Backtick => cx.push("`", SpanKind::Separator),
} }
} }
} }
@ -106,38 +114,38 @@ mod private {
num_hashtags, num_hashtags,
suffix, suffix,
} = self; } = self;
cx.push(prefix, Kind::StringSurroundings); cx.push(prefix, SpanKind::StringSurroundings);
for _ in 0..num_hashtags { for _ in 0..num_hashtags {
cx.push("#", Kind::StringSurroundings) cx.push("#", SpanKind::StringSurroundings)
} }
ty.into_spans(cx); ty.into_spans(cx);
cx.push(contents, Kind::String); cx.push(contents, SpanKind::String);
ty.into_spans(cx); ty.into_spans(cx);
for _ in 0..num_hashtags { for _ in 0..num_hashtags {
cx.push("#", Kind::StringSurroundings) cx.push("#", SpanKind::StringSurroundings)
} }
cx.push(suffix, Kind::StringSurroundings); cx.push(suffix, SpanKind::StringSurroundings);
} }
} }
impl<'a> IntoSpansImpl<'a> for Path<'a> { impl<'a> IntoSpansImpl<'a> for Path<'a> {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
cx.push(self.to_string(), Kind::Path) cx.push(self.to_string(), SpanKind::Path)
} }
} }
impl<'a> IntoSpansImpl<'a> for Number<'a> { impl<'a> IntoSpansImpl<'a> for Number<'a> {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
cx.push(self.0, Kind::Number) cx.push(self.0, SpanKind::Number)
} }
} }
impl<'a> IntoSpansImpl<'a> for Atom<'a> { impl<'a> IntoSpansImpl<'a> for Atom<'a> {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
match self { match self {
Atom::Text(text) => cx.push(text, Kind::Text), Atom::Text(text) => cx.push(text, SpanKind::Text),
} }
} }
} }
@ -146,9 +154,9 @@ mod private {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
match self.0.len() { match self.0.len() {
0 => {} 0 => {}
1 => cx.push(self.0, Kind::Space(1)), 1 => cx.push(self.0, SpanKind::Space(1)),
n if cx.config.collapse_space => cx.push(" ", Kind::Space(n)), n if cx.config.collapse_space => cx.push(" ", SpanKind::Space(n)),
n => cx.push(self.0, Kind::Space(n)), n => cx.push(self.0, SpanKind::Space(n)),
} }
} }
} }
@ -156,9 +164,9 @@ mod private {
impl<'a> IntoSpansImpl<'a> for Token<'a> { impl<'a> IntoSpansImpl<'a> for Token<'a> {
fn into_spans(self, cx: &mut Context<'a>) { fn into_spans(self, cx: &mut Context<'a>) {
match self { match self {
Token::True => cx.push("true", Kind::Literal), Token::True => cx.push("true", SpanKind::Literal),
Token::False => cx.push("false", Kind::Literal), Token::False => cx.push("false", SpanKind::Literal),
Token::None => cx.push("None", Kind::Literal), Token::None => cx.push("None", SpanKind::Literal),
Token::Path(path) => path.into_spans(cx), Token::Path(path) => path.into_spans(cx),
Token::String(string) => string.into_spans(cx), Token::String(string) => string.into_spans(cx),
Token::Number(number) => number.into_spans(cx), Token::Number(number) => number.into_spans(cx),
@ -216,15 +224,18 @@ mod private {
} = self; } = self;
match prefix { match prefix {
Some(Atom::Text(text)) => cx.push(text, Kind::Constructor), Some((Atom::Text(text), space)) => {
cx.push(text, SpanKind::Constructor);
space.into_spans(cx);
}
None => {} None => {}
} }
match delimiter { match delimiter {
Delimiter::Paren => cx.push("(", Kind::Delimiter(cx.depth)), Delimiter::Paren => cx.push("(", SpanKind::Delimiter(cx.depth)),
Delimiter::Bracket => cx.push("[", Kind::Delimiter(cx.depth)), Delimiter::Bracket => cx.push("[", SpanKind::Delimiter(cx.depth)),
Delimiter::Brace => cx.push("{", Kind::Delimiter(cx.depth)), Delimiter::Brace => cx.push("{", SpanKind::Delimiter(cx.depth)),
Delimiter::Angle => cx.push("<", Kind::Delimiter(cx.depth)), Delimiter::Angle => cx.push("<", SpanKind::Delimiter(cx.depth)),
} }
cx.depth += 1; cx.depth += 1;
@ -232,10 +243,10 @@ mod private {
cx.depth -= 1; cx.depth -= 1;
match delimiter { match delimiter {
Delimiter::Paren => cx.push(")", Kind::Delimiter(cx.depth)), Delimiter::Paren => cx.push(")", SpanKind::Delimiter(cx.depth)),
Delimiter::Bracket => cx.push("]", Kind::Delimiter(cx.depth)), Delimiter::Bracket => cx.push("]", SpanKind::Delimiter(cx.depth)),
Delimiter::Brace => cx.push("}", Kind::Delimiter(cx.depth)), Delimiter::Brace => cx.push("}", SpanKind::Delimiter(cx.depth)),
Delimiter::Angle => cx.push(">", Kind::Delimiter(cx.depth)), Delimiter::Angle => cx.push(">", SpanKind::Delimiter(cx.depth)),
} }
} }
} }
@ -245,10 +256,10 @@ mod private {
mod tests { mod tests {
use insta::assert_debug_snapshot; use insta::assert_debug_snapshot;
use super::Kind; use super::SpanKind;
use crate::format_debug_output::{Config, into_spans, parse_input}; use crate::{Config, into_spans, parse_input};
fn spans(input: &str) -> Vec<(String, Kind)> { fn spans(input: &str) -> Vec<(String, SpanKind)> {
let res = parse_input(input).unwrap(); let res = parse_input(input).unwrap();
into_spans( into_spans(
res, res,
@ -906,29 +917,13 @@ mod tests {
Separator, Separator,
), ),
( (
"tests/ui/impl-trait/unsized_coercion.rs", "tests/ui/impl-trait/unsized_coercion.rs:12:15",
Path, Path,
), ),
( (
":", ":",
Separator, Separator,
), ),
(
"12",
Number,
),
(
":",
Separator,
),
(
"15",
Number,
),
(
":",
Separator,
),
( (
" ", " ",
Space( Space(