docs and line/offset numbers in paths
This commit is contained in:
parent
af09bcd403
commit
2d9a029130
9 changed files with 366 additions and 116 deletions
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "logparse"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
edition = "2024"
|
||||
description = "parse arbitrary messages containing rust-like debug output to syntax highlight them"
|
||||
authors = ["Jana Dönszelmann <cratesio@donsz.nl>"]
|
||||
|
|
@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0"
|
|||
documentation = "https://docs.rs/logparse"
|
||||
homepage = "https://git.donsz.nl/jana/logviewer"
|
||||
repository = "https://git.donsz.nl/jana/logviewer"
|
||||
readme = "README.md"
|
||||
|
||||
[dependencies]
|
||||
winnow = {version="1", features=["parser"]}
|
||||
|
|
|
|||
16
logparse/README.md
Normal file
16
logparse/README.md
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
|
||||
# Logparse
|
||||
|
||||
This crate makes a best-effort parsing for arbitrary text that may contain rust-like debug output.
|
||||
This is intended for logs, that contain rust's debug output, and that you might want to highlight opposing parentheses, brackets, etc in.
|
||||
|
||||
Log messages, when parsed, can *always* be pretty printed back into their textual form perfectly.
|
||||
The syntax tree unambiguously records any spaces in the source text.
|
||||
There are tests that ensure this property for a very large number of inputs.
|
||||
|
||||
In addition, *any* log message can be parsed. `parse_input` returns a result,
|
||||
but as far as I'm aware,
|
||||
the parser is flexible enough to *never* reject,
|
||||
and always falls back to categorizing text as *just* text.
|
||||
There are also property tests to ensure this.
|
||||
|
||||
|
|
@ -1,21 +1,27 @@
|
|||
use proptest_derive::Arbitrary;
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// See [`Token::Separated`].
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Separator {
|
||||
Eq,
|
||||
Colon,
|
||||
DoubleColon,
|
||||
}
|
||||
|
||||
/// See [`Token::String`].
|
||||
#[derive(Copy, Clone, Debug, Arbitrary, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum QuoteType {
|
||||
Single,
|
||||
Double,
|
||||
Backtick,
|
||||
}
|
||||
|
||||
/// See [`Token::Delimited`].
|
||||
#[derive(Clone, Debug, Arbitrary, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Delimiter {
|
||||
Paren,
|
||||
Bracket,
|
||||
|
|
@ -23,19 +29,28 @@ pub enum Delimiter {
|
|||
Angle,
|
||||
}
|
||||
|
||||
/// See [`Token::String`].
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct AnyString<'a> {
|
||||
/// Any text immediately preceeding the string, like the `f` prefix in `f"mrrp"`.
|
||||
pub prefix: Cow<'a, str>,
|
||||
/// The kind of string quote used.
|
||||
pub ty: QuoteType,
|
||||
/// The string's text.
|
||||
pub contents: Cow<'a, str>,
|
||||
/// How may hashtags were used around the string.
|
||||
/// Stings have to open and close with an equal number of hashtags.
|
||||
pub num_hashtags: usize,
|
||||
/// Any text immediately following the string.
|
||||
pub suffix: Cow<'a, str>,
|
||||
}
|
||||
|
||||
/// Any sequence of 0 or more spaces.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Space<'a>(pub Cow<'a, str>);
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Arbitrary)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum PathSep {
|
||||
/// Happens at the start of paths, for the no leading / case
|
||||
None,
|
||||
|
|
@ -43,19 +58,26 @@ pub enum PathSep {
|
|||
Backslash,
|
||||
}
|
||||
|
||||
/// A segment of a path, with a leading separator.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct PathSegment<'a> {
|
||||
pub leading_separator: PathSep,
|
||||
pub segment: Cow<'a, str>,
|
||||
}
|
||||
|
||||
/// See [`Path`]. This is the `:3:4` part in `mrrp.rs:3:4`.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct FileLocation<'a> {
|
||||
/// Line number.
|
||||
pub line: Cow<'a, str>,
|
||||
/// Optionally, a column offset.
|
||||
pub offset: Option<Cow<'a, str>>,
|
||||
}
|
||||
|
||||
/// See [`Path`].
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct FileName<'a> {
|
||||
pub leading_separator: PathSep,
|
||||
pub segment: Cow<'a, str>,
|
||||
|
|
@ -63,14 +85,18 @@ pub struct FileName<'a> {
|
|||
pub location: Option<FileLocation<'a>>,
|
||||
}
|
||||
|
||||
/// See [`Token::Path`].
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Path<'a> {
|
||||
pub drive_excluding_colon: Option<char>,
|
||||
|
||||
pub segments: Vec<PathSegment<'a>>,
|
||||
|
||||
/// A filename: the last path segment, optionally with an extension and file/line number.
|
||||
pub filename: FileName<'a>,
|
||||
}
|
||||
|
||||
/// See [`Token::Number`].
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Number<'a>(pub Cow<'a, str>);
|
||||
|
||||
|
|
@ -78,46 +104,222 @@ pub struct Number<'a>(pub Cow<'a, str>);
|
|||
/// i.e. an english word, or rust `::`-separated Path
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Atom<'a> {
|
||||
/// Raw text
|
||||
Text(Cow<'a, str>),
|
||||
}
|
||||
|
||||
/// The main AST node: each of these are treated as one unit, that may be separated by spaces.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Token<'a> {
|
||||
/// A literal `true`.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_true, Token::True);
|
||||
/// assert!(is_true("true"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_true("14"));
|
||||
/// assert!(!is_true("`mrow!`"));
|
||||
/// assert!(!is_true("false"));
|
||||
/// ```
|
||||
True,
|
||||
/// A literal `false`.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_false, Token::False);
|
||||
/// assert!(is_false("false"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_false("14"));
|
||||
/// assert!(!is_false("`mrow!`"));
|
||||
/// assert!(!is_false("true"));
|
||||
/// ```
|
||||
False,
|
||||
/// A literal `None`.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_none, Token::None);
|
||||
/// assert!(is_none("None"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_none("14"));
|
||||
/// assert!(!is_none("`mrow!`"));
|
||||
/// assert!(!is_none("true"));
|
||||
/// ```
|
||||
None,
|
||||
|
||||
/// A path, anything that looks vaguely path-like.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_path, Token::Path(_));
|
||||
/// assert!(is_path("/"));
|
||||
/// assert!(is_path("/home"));
|
||||
/// assert!(is_path("C:/Users"));
|
||||
/// assert!(is_path("C:\\Users"));
|
||||
/// assert!(is_path("/home/some_path/file-some_where.rs"));
|
||||
/// assert!(is_path("/home/some_path/file-and-line.rs:3"));
|
||||
/// assert!(is_path("/home/some_path/file-and-line.rs:3:4"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_path(":14"));
|
||||
/// assert!(!is_path(":14:15"));
|
||||
/// assert!(!is_path("14"));
|
||||
/// assert!(!is_path("`mrow!`"));
|
||||
/// assert!(!is_path("true"));
|
||||
/// ```
|
||||
Path(Path<'a>),
|
||||
/// A string literal, with quite flexible rules on what's considered a string.
|
||||
/// Any rust string literal parses, and a little more.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_string, Token::String(_));
|
||||
/// assert!(is_string(r##""mrrp""##));
|
||||
/// assert!(is_string(r##"'mrrp'"##));
|
||||
/// assert!(is_string(r##"`mrrp`"##));
|
||||
/// assert!(is_string(r##"b"mrrp""##));
|
||||
/// assert!(is_string(r##"f"mrrp""##));
|
||||
/// assert!(is_string(r##"c"mrrp""##));
|
||||
/// assert!(is_string(r##"r"mrrp""##));
|
||||
/// assert!(is_string(r##"r#"mrrp"#"##));
|
||||
/// assert!(is_string(r##"b#"mrrp"#"##));
|
||||
/// assert!(is_string(r####"anyprefix###"mrrp"###anysuffix"####));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_string("14"));
|
||||
/// assert!(!is_string("true"));
|
||||
/// assert!(!is_string("/home"));
|
||||
///
|
||||
/// // non-matching hashtags
|
||||
/// assert!(!is_string(r#"r#"mrrp""#));
|
||||
/// assert!(!is_string(r###"r#"mrrp"##"###));
|
||||
/// assert!(!is_string(r##"r"mrrp"#"##));
|
||||
///
|
||||
/// // non-matching quotes
|
||||
/// assert!(!is_string(r#"`mrrp""#));
|
||||
/// assert!(!is_string(r#"`mrrp'"#));
|
||||
/// ```
|
||||
String(AnyString<'a>),
|
||||
/// A number, float or int.
|
||||
/// There must not be any alphabetic character after the number, without a space inbetween.
|
||||
/// That's to guard against finding numbers inside hashes for example.
|
||||
///
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_number, Token::Number(_));
|
||||
/// assert!(is_number("1"));
|
||||
/// assert!(is_number("99999999"));
|
||||
/// assert!(is_number("1.5"));
|
||||
/// assert!(is_number("1e10"));
|
||||
/// assert!(is_number("-1"));
|
||||
/// assert!(is_number("-1.5"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_number("`mrow!`"));
|
||||
/// assert!(!is_number("true"));
|
||||
///
|
||||
/// // suffix
|
||||
/// assert!(!is_number("14a"));
|
||||
/// ```
|
||||
Number(Number<'a>),
|
||||
|
||||
// TODO: RustPath
|
||||
/// Any token, separated by a [`Separator`], followed by another segment.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_sep, Token::Separated {..});
|
||||
/// assert!(is_sep("a = 3"));
|
||||
/// assert!(is_sep("a::b::c"));
|
||||
/// assert!(is_sep("a: 5"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_sep("`mrow!`"));
|
||||
/// assert!(!is_sep("true"));
|
||||
/// assert!(!is_sep("14"));
|
||||
/// ```
|
||||
Separated {
|
||||
/// The part before the separator.
|
||||
before: Box<Token<'a>>,
|
||||
/// The space between the `before` part, and the separator.
|
||||
space_before: Space<'a>,
|
||||
/// The separator itself.
|
||||
separator: Separator,
|
||||
/// The segment after the separator.
|
||||
after: Box<Segment<'a>>,
|
||||
},
|
||||
/// A segment, delimited by parentheses, braces or brackets, with an optional prefix.
|
||||
/// The prefix will be classified as a constructor if the delimiter is braces or parentheses.
|
||||
///
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_delim, Token::Delimited(_));
|
||||
/// assert!(is_delim("()"));
|
||||
/// assert!(is_delim("[]"));
|
||||
/// assert!(is_delim("{}"));
|
||||
/// assert!(is_delim("(1)"));
|
||||
/// assert!(is_delim("[1]"));
|
||||
/// assert!(is_delim("{1}"));
|
||||
/// assert!(is_delim("{/home/mrrp.rs}"));
|
||||
/// assert!(is_delim("constructor{/home/mrrp.rs}"));
|
||||
/// assert!(is_delim("constructor {/home/mrrp.rs}"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_delim("`mrow!`"));
|
||||
/// assert!(!is_delim("true"));
|
||||
/// assert!(!is_delim("14"));
|
||||
/// ```
|
||||
Delimited(Delimited<'a>),
|
||||
|
||||
/// Any other text, that couldn't otherwise be categorized.
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # logparse::generate_ast_recognizer!(is_atom, Token::Atom(_));
|
||||
/// assert!(is_atom("mrrp"));
|
||||
///
|
||||
/// // some counterexamples
|
||||
/// assert!(!is_atom("`mrow!`"));
|
||||
/// assert!(!is_atom("true"));
|
||||
/// assert!(!is_atom("14"));
|
||||
/// ```
|
||||
Atom(Atom<'a>),
|
||||
}
|
||||
|
||||
/// See [`Token::Delimited`].
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Delimited<'a> {
|
||||
pub prefix: Option<Atom<'a>>,
|
||||
/// Delimiter prefix, i.e. the constructor part in `Some(mrrp)`.
|
||||
pub prefix: Option<(Atom<'a>, Space<'a>)>,
|
||||
/// The kind of delimiter itself.
|
||||
pub delimiter: Delimiter,
|
||||
/// The contents of the delimited segment.
|
||||
pub contents: Segments<'a>,
|
||||
}
|
||||
|
||||
/// A Segment of text, with optional leading space.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Segment<'a> {
|
||||
/// Any spaces (or empty) before this segment.
|
||||
pub leading_space: Space<'a>,
|
||||
/// The segment's contents itself.
|
||||
pub token: Token<'a>,
|
||||
}
|
||||
|
||||
/// Segments, with possible trailing space.
|
||||
///
|
||||
/// Any input is split up into many segments, with their surrounding spaces.
|
||||
/// Each segment might be a meaningless atom, or may be recognized to have more meaning.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Segments<'a> {
|
||||
/// The segments themselves.
|
||||
pub segments: Vec<Segment<'a>>,
|
||||
/// Any spaces (or empty) after this sequence of segments.
|
||||
pub trailing_space: Space<'a>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -161,8 +161,9 @@ impl<'a> Display for Token<'a> {
|
|||
|
||||
impl<'a> Display for Delimited<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let Some(prefix) = &self.prefix {
|
||||
if let Some((prefix, space)) = &self.prefix {
|
||||
write!(f, "{prefix}")?;
|
||||
write!(f, "{space}")?;
|
||||
}
|
||||
self.delimiter.fmt_start(f)?;
|
||||
write!(f, "{}", self.contents)?;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,44 @@
|
|||
#![deny(missing_docs)]
|
||||
#![deny(warnings)]
|
||||
#![doc=include_str!("../README.md")]
|
||||
|
||||
/// The structure of parsed log lines
|
||||
pub mod ast;
|
||||
mod display;
|
||||
mod parse;
|
||||
mod spans;
|
||||
|
||||
#[doc(hidden)]
|
||||
#[macro_export]
|
||||
macro_rules! generate_ast_recognizer {
|
||||
($name: ident, $pattern: pat) => {
|
||||
fn $name(s: &str) -> bool {
|
||||
use logparse::{ast::*, *};
|
||||
let l = parse_input(s).unwrap();
|
||||
if !l.trailing_space.0.is_empty() {
|
||||
eprintln!("trailing space {l:?}");
|
||||
return false;
|
||||
}
|
||||
if l.segments.len() != 1 {
|
||||
eprintln!("more segments {l:?}");
|
||||
return false;
|
||||
}
|
||||
if !l.segments[0].leading_space.0.is_empty() {
|
||||
eprintln!("leading space {l:?}");
|
||||
return false;
|
||||
}
|
||||
if let $pattern = &l.segments[0].token {
|
||||
true
|
||||
} else {
|
||||
eprintln!("pattern {l:?}");
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod proptesting;
|
||||
|
||||
pub use parse::parse_input;
|
||||
pub use spans::{Config, Kind as SpanKind, Span, into_spans};
|
||||
pub use spans::{Config, Span, SpanKind, into_spans};
|
||||
|
|
|
|||
|
|
@ -97,8 +97,25 @@ impl Separator {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> FileLocation<'a> {
|
||||
fn parse<E: ParserError<&'a str>>() -> impl Parser<&'a str, Self, E> {
|
||||
use winnow::{ascii::dec_uint, combinator::*, prelude::*};
|
||||
|
||||
let colon_number = || {
|
||||
(":".value(()), dec_uint::<_, u64, _>.take())
|
||||
.map(|(_, number): (_, &str)| Cow::Borrowed(number))
|
||||
};
|
||||
let line_offset = (colon_number(), opt(colon_number()));
|
||||
|
||||
trace(
|
||||
"file location",
|
||||
line_offset.map(|(line, offset)| FileLocation { line, offset }),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FileName<'a> {
|
||||
fn parse(segment: PathSegment<'a>) -> Self {
|
||||
fn parse(segment: PathSegment<'a>, location: Option<FileLocation<'a>>) -> Self {
|
||||
fn rsplit<'a>(
|
||||
input: Cow<'a, str>,
|
||||
delimiter: char,
|
||||
|
|
@ -113,34 +130,11 @@ impl<'a> FileName<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
let (rest, location) = if let Some((before, offset)) = rsplit(segment.segment.clone(), ':')
|
||||
{
|
||||
if let Some((before, line)) = rsplit(before.clone(), ':') {
|
||||
(
|
||||
before,
|
||||
Some(FileLocation {
|
||||
line,
|
||||
offset: Some(offset),
|
||||
}),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
before,
|
||||
Some(FileLocation {
|
||||
line: offset,
|
||||
offset: None,
|
||||
}),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
(segment.segment, None)
|
||||
};
|
||||
|
||||
let (new_segment, ext_excluding_dot) =
|
||||
if let Some((segment, ext_excluding_dot)) = rsplit(rest.clone(), '.') {
|
||||
if let Some((segment, ext_excluding_dot)) = rsplit(segment.segment.clone(), '.') {
|
||||
(segment, Some(ext_excluding_dot))
|
||||
} else {
|
||||
(rest, None)
|
||||
(segment.segment, None)
|
||||
};
|
||||
|
||||
Self {
|
||||
|
|
@ -202,12 +196,13 @@ impl<'a> Path<'a> {
|
|||
opt_sep_and_next,
|
||||
repeat_till(0.., sep_and_next, peek(terminator()))
|
||||
.map(|(segments, _): (Vec<PathSegment>, _)| segments),
|
||||
opt(FileLocation::parse()),
|
||||
);
|
||||
|
||||
trace(
|
||||
"path",
|
||||
drive_and_segments
|
||||
.map(|(drive, segment, segments)| {
|
||||
.map(|(drive, segment, segments, location)| {
|
||||
let (segments, last) = {
|
||||
let mut segments = segments;
|
||||
segments.insert(0, segment);
|
||||
|
|
@ -215,7 +210,7 @@ impl<'a> Path<'a> {
|
|||
(segments, last)
|
||||
};
|
||||
|
||||
let filename = FileName::parse(last);
|
||||
let filename = FileName::parse(last, location);
|
||||
|
||||
Self {
|
||||
drive_excluding_colon: drive,
|
||||
|
|
@ -225,7 +220,6 @@ impl<'a> Path<'a> {
|
|||
})
|
||||
.verify(|i| {
|
||||
!i.segments.is_empty()
|
||||
|| i.drive_excluding_colon.is_some()
|
||||
|| i.filename.ext_excluding_dot.is_some()
|
||||
|| !matches!(i.filename.leading_separator, PathSep::None)
|
||||
}),
|
||||
|
|
@ -266,8 +260,8 @@ impl<'a> Token<'a> {
|
|||
"true".value(Self::True),
|
||||
"false".value(Self::False),
|
||||
"None".value(Self::None),
|
||||
Path::parse().map(Self::Path),
|
||||
Number::parse().map(Self::Number),
|
||||
Path::parse().map(Self::Path),
|
||||
AnyString::parse().map(Self::String),
|
||||
delimited,
|
||||
Atom::parse(alt((Separator::parse().value(""), ")", "]", "}"))).map(Self::Atom),
|
||||
|
|
@ -318,12 +312,10 @@ impl<'a> Delimited<'a> {
|
|||
trace(
|
||||
"delimited",
|
||||
(
|
||||
opt(Atom::parse(alt((
|
||||
"(",
|
||||
"[",
|
||||
"{",
|
||||
Separator::parse().value(""),
|
||||
)))),
|
||||
opt((
|
||||
Atom::parse(alt(("(", "[", "{", Separator::parse().value("")))),
|
||||
Space::parse(),
|
||||
)),
|
||||
alt((
|
||||
literal('(').map(|_| literal(')').value(Delimiter::Paren)),
|
||||
literal('[').map(|_| literal(']').value(Delimiter::Bracket)),
|
||||
|
|
@ -392,6 +384,12 @@ impl<'a> Segment<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parses an input string (a log line) into an ast.
|
||||
///
|
||||
/// This *should* never error.
|
||||
/// Many tests ensure that arbitrary input can be parsed.
|
||||
/// Even if non-structured or completely random.
|
||||
/// The parser will gracefully accept such strings anyway, and just categorize them suboptimally.
|
||||
pub fn parse_input<'a>(i: &'a str) -> Result<Segments<'a>, String> {
|
||||
use winnow::combinator::eof;
|
||||
Segments::parse(eof::<&str, winnow::error::EmptyError>)
|
||||
|
|
@ -405,7 +403,7 @@ mod tests {
|
|||
use insta::assert_debug_snapshot;
|
||||
use winnow::Parser;
|
||||
|
||||
use crate::format_debug_output::{
|
||||
use crate::{
|
||||
ast::{Path, Segments},
|
||||
parse_input,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::ast::*;
|
||||
use crate::format_debug_output::parse_input;
|
||||
use crate::format_debug_output::{Config, into_spans};
|
||||
use crate::parse_input;
|
||||
use crate::{Config, into_spans};
|
||||
use proptest::prelude::*;
|
||||
use proptest::proptest;
|
||||
|
||||
|
|
@ -156,13 +156,16 @@ impl Delimited<'static> {
|
|||
#[cfg(test)]
|
||||
fn arb(token: impl Strategy<Value = Token<'static>>) -> impl Strategy<Value = Self> {
|
||||
use proptest::option::*;
|
||||
(of(Atom::arb()), any::<Delimiter>(), Segments::arb(token)).prop_map(
|
||||
|(prefix, delimiter, contents)| Self {
|
||||
(
|
||||
of((Atom::arb(), Space::arb())),
|
||||
any::<Delimiter>(),
|
||||
Segments::arb(token),
|
||||
)
|
||||
.prop_map(|(prefix, delimiter, contents)| Self {
|
||||
prefix,
|
||||
delimiter,
|
||||
contents,
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
use super::ast::*;
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// Text categories, based on the parsing.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||
pub enum Kind {
|
||||
pub enum SpanKind {
|
||||
/// Parentheses e.g.
|
||||
///
|
||||
/// Stores the delimiter depth, for e.g. rainbow delimiters.
|
||||
|
|
@ -31,35 +32,27 @@ pub enum Kind {
|
|||
Text,
|
||||
}
|
||||
|
||||
/// A `Span` is a piece of categorized text, based on the parsing done by
|
||||
/// [`parse_input`](crate::parse_input).
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
pub struct Span<'a> {
|
||||
/// The segment of text.
|
||||
pub text: Cow<'a, str>,
|
||||
pub kind: Kind,
|
||||
/// Its category.
|
||||
pub kind: SpanKind,
|
||||
}
|
||||
|
||||
/// Configuration options for [`into_spans`]
|
||||
pub struct Config {
|
||||
/// Turn sequences of more than 1 space into exactly 1 space.
|
||||
pub collapse_space: bool,
|
||||
}
|
||||
|
||||
pub struct Context<'a> {
|
||||
config: Config,
|
||||
res: Vec<Span<'a>>,
|
||||
depth: usize,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
pub fn push(&mut self, text: impl Into<Cow<'a, str>>, kind: Kind) {
|
||||
self.res.push(Span {
|
||||
text: text.into(),
|
||||
kind,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IntoSpans<'a>: private::IntoSpansImpl<'a> {}
|
||||
|
||||
/// Turn an ast node into [`Span`]s.
|
||||
pub fn into_spans<'a>(ast: impl IntoSpans<'a>, config: Config) -> Vec<Span<'a>> {
|
||||
let mut cx = Context {
|
||||
let mut cx = private::Context {
|
||||
config,
|
||||
res: Vec::new(),
|
||||
depth: 0,
|
||||
|
|
@ -71,6 +64,21 @@ pub fn into_spans<'a>(ast: impl IntoSpans<'a>, config: Config) -> Vec<Span<'a>>
|
|||
mod private {
|
||||
use super::*;
|
||||
|
||||
pub struct Context<'a> {
|
||||
pub config: Config,
|
||||
pub res: Vec<Span<'a>>,
|
||||
pub depth: usize,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
fn push(&mut self, text: impl Into<Cow<'a, str>>, kind: SpanKind) {
|
||||
self.res.push(Span {
|
||||
text: text.into(),
|
||||
kind,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IntoSpansImpl<'a> {
|
||||
fn into_spans(self, cx: &mut Context<'a>);
|
||||
}
|
||||
|
|
@ -80,9 +88,9 @@ mod private {
|
|||
impl<'a> IntoSpansImpl<'a> for Separator {
|
||||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
match self {
|
||||
Separator::Eq => cx.push("=", Kind::Separator),
|
||||
Separator::Colon => cx.push(":", Kind::Separator),
|
||||
Separator::DoubleColon => cx.push("::", Kind::Separator),
|
||||
Separator::Eq => cx.push("=", SpanKind::Separator),
|
||||
Separator::Colon => cx.push(":", SpanKind::Separator),
|
||||
Separator::DoubleColon => cx.push("::", SpanKind::Separator),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -90,9 +98,9 @@ mod private {
|
|||
impl<'a> IntoSpansImpl<'a> for QuoteType {
|
||||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
match self {
|
||||
QuoteType::Single => cx.push("'", Kind::Separator),
|
||||
QuoteType::Double => cx.push("\"", Kind::Separator),
|
||||
QuoteType::Backtick => cx.push("`", Kind::Separator),
|
||||
QuoteType::Single => cx.push("'", SpanKind::Separator),
|
||||
QuoteType::Double => cx.push("\"", SpanKind::Separator),
|
||||
QuoteType::Backtick => cx.push("`", SpanKind::Separator),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -106,38 +114,38 @@ mod private {
|
|||
num_hashtags,
|
||||
suffix,
|
||||
} = self;
|
||||
cx.push(prefix, Kind::StringSurroundings);
|
||||
cx.push(prefix, SpanKind::StringSurroundings);
|
||||
for _ in 0..num_hashtags {
|
||||
cx.push("#", Kind::StringSurroundings)
|
||||
cx.push("#", SpanKind::StringSurroundings)
|
||||
}
|
||||
|
||||
ty.into_spans(cx);
|
||||
cx.push(contents, Kind::String);
|
||||
cx.push(contents, SpanKind::String);
|
||||
ty.into_spans(cx);
|
||||
|
||||
for _ in 0..num_hashtags {
|
||||
cx.push("#", Kind::StringSurroundings)
|
||||
cx.push("#", SpanKind::StringSurroundings)
|
||||
}
|
||||
cx.push(suffix, Kind::StringSurroundings);
|
||||
cx.push(suffix, SpanKind::StringSurroundings);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoSpansImpl<'a> for Path<'a> {
|
||||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
cx.push(self.to_string(), Kind::Path)
|
||||
cx.push(self.to_string(), SpanKind::Path)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoSpansImpl<'a> for Number<'a> {
|
||||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
cx.push(self.0, Kind::Number)
|
||||
cx.push(self.0, SpanKind::Number)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoSpansImpl<'a> for Atom<'a> {
|
||||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
match self {
|
||||
Atom::Text(text) => cx.push(text, Kind::Text),
|
||||
Atom::Text(text) => cx.push(text, SpanKind::Text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -146,9 +154,9 @@ mod private {
|
|||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
match self.0.len() {
|
||||
0 => {}
|
||||
1 => cx.push(self.0, Kind::Space(1)),
|
||||
n if cx.config.collapse_space => cx.push(" ", Kind::Space(n)),
|
||||
n => cx.push(self.0, Kind::Space(n)),
|
||||
1 => cx.push(self.0, SpanKind::Space(1)),
|
||||
n if cx.config.collapse_space => cx.push(" ", SpanKind::Space(n)),
|
||||
n => cx.push(self.0, SpanKind::Space(n)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -156,9 +164,9 @@ mod private {
|
|||
impl<'a> IntoSpansImpl<'a> for Token<'a> {
|
||||
fn into_spans(self, cx: &mut Context<'a>) {
|
||||
match self {
|
||||
Token::True => cx.push("true", Kind::Literal),
|
||||
Token::False => cx.push("false", Kind::Literal),
|
||||
Token::None => cx.push("None", Kind::Literal),
|
||||
Token::True => cx.push("true", SpanKind::Literal),
|
||||
Token::False => cx.push("false", SpanKind::Literal),
|
||||
Token::None => cx.push("None", SpanKind::Literal),
|
||||
Token::Path(path) => path.into_spans(cx),
|
||||
Token::String(string) => string.into_spans(cx),
|
||||
Token::Number(number) => number.into_spans(cx),
|
||||
|
|
@ -216,15 +224,18 @@ mod private {
|
|||
} = self;
|
||||
|
||||
match prefix {
|
||||
Some(Atom::Text(text)) => cx.push(text, Kind::Constructor),
|
||||
Some((Atom::Text(text), space)) => {
|
||||
cx.push(text, SpanKind::Constructor);
|
||||
space.into_spans(cx);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
match delimiter {
|
||||
Delimiter::Paren => cx.push("(", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Bracket => cx.push("[", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Brace => cx.push("{", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Angle => cx.push("<", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Paren => cx.push("(", SpanKind::Delimiter(cx.depth)),
|
||||
Delimiter::Bracket => cx.push("[", SpanKind::Delimiter(cx.depth)),
|
||||
Delimiter::Brace => cx.push("{", SpanKind::Delimiter(cx.depth)),
|
||||
Delimiter::Angle => cx.push("<", SpanKind::Delimiter(cx.depth)),
|
||||
}
|
||||
|
||||
cx.depth += 1;
|
||||
|
|
@ -232,10 +243,10 @@ mod private {
|
|||
cx.depth -= 1;
|
||||
|
||||
match delimiter {
|
||||
Delimiter::Paren => cx.push(")", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Bracket => cx.push("]", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Brace => cx.push("}", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Angle => cx.push(">", Kind::Delimiter(cx.depth)),
|
||||
Delimiter::Paren => cx.push(")", SpanKind::Delimiter(cx.depth)),
|
||||
Delimiter::Bracket => cx.push("]", SpanKind::Delimiter(cx.depth)),
|
||||
Delimiter::Brace => cx.push("}", SpanKind::Delimiter(cx.depth)),
|
||||
Delimiter::Angle => cx.push(">", SpanKind::Delimiter(cx.depth)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -245,10 +256,10 @@ mod private {
|
|||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::Kind;
|
||||
use crate::format_debug_output::{Config, into_spans, parse_input};
|
||||
use super::SpanKind;
|
||||
use crate::{Config, into_spans, parse_input};
|
||||
|
||||
fn spans(input: &str) -> Vec<(String, Kind)> {
|
||||
fn spans(input: &str) -> Vec<(String, SpanKind)> {
|
||||
let res = parse_input(input).unwrap();
|
||||
into_spans(
|
||||
res,
|
||||
|
|
@ -906,29 +917,13 @@ mod tests {
|
|||
Separator,
|
||||
),
|
||||
(
|
||||
"tests/ui/impl-trait/unsized_coercion.rs",
|
||||
"tests/ui/impl-trait/unsized_coercion.rs:12:15",
|
||||
Path,
|
||||
),
|
||||
(
|
||||
":",
|
||||
Separator,
|
||||
),
|
||||
(
|
||||
"12",
|
||||
Number,
|
||||
),
|
||||
(
|
||||
":",
|
||||
Separator,
|
||||
),
|
||||
(
|
||||
"15",
|
||||
Number,
|
||||
),
|
||||
(
|
||||
":",
|
||||
Separator,
|
||||
),
|
||||
(
|
||||
" ",
|
||||
Space(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue