diff --git a/src/easy_mark/easy_mark_highlighter.rs b/src/easy_mark/easy_mark_highlighter.rs deleted file mode 100644 index 0fdaeda..0000000 --- a/src/easy_mark/easy_mark_highlighter.rs +++ /dev/null @@ -1,245 +0,0 @@ -use egui::text::{CCursorRange, LayoutJob}; - -use crate::easy_mark::easy_mark_parser; - -/// Highlight easymark, memoizing previous output to save CPU. -/// -/// In practice, the highlighter is fast enough not to need any caching. -#[derive(Default)] -pub struct MemoizedHighlighter { - style: egui::Style, - code: String, - output: LayoutJob, -} - -impl MemoizedHighlighter { - pub fn highlight( - &mut self, - egui_style: &egui::Style, - code: &str, - cursor: Option, - ) -> LayoutJob { - if (&self.style, self.code.as_str()) != (egui_style, code) { - self.style = egui_style.clone(); - code.clone_into(&mut self.code); - self.output = highlight_easymark(egui_style, code, cursor); - } - self.output.clone() - } -} - -pub fn highlight_easymark( - egui_style: &egui::Style, - mut text: &str, - - // TODO: hide special characters where cursor isn't - _cursor: Option, -) -> LayoutJob { - let mut job = LayoutJob::default(); - let mut style = easy_mark_parser::Style::default(); - let mut start_of_line = true; - - const CODE_INDENT: f32 = 10.0; - - while !text.is_empty() { - if start_of_line && text.starts_with("```") { - let astyle = format_from_style( - egui_style, - &easy_mark_parser::Style { - code: true, - ..Default::default() - }, - ); - - // Render the initial backticks as spaces - text = &text[3..]; - job.append(" ", CODE_INDENT, astyle.clone()); - - match text.find("\n```") { - Some(n) => { - for line in text[..n + 1].lines() { - job.append(line, CODE_INDENT, astyle.clone()); - job.append("\n", 0.0, astyle.clone()); - } - // Render the final backticks as spaces - job.append(" ", CODE_INDENT, astyle); - text = &text[n + 4..]; - } - None => { - job.append(text, 0.0, astyle.clone()); - text = ""; - } - }; - style = Default::default(); - continue; - } - - if text.starts_with('`') { - style.code = true; - let end = text[1..] - .find(&['`', '\n'][..]) - .map_or_else(|| text.len(), |i| i + 2); - job.append(&text[..end], 0.0, format_from_style(egui_style, &style)); - text = &text[end..]; - style.code = false; - continue; - } - - let skip; - - // zero-width space - let _zws = "\u{200b}"; - - let mut apply_basic_style = - |text: &mut &str, - style: &mut easy_mark_parser::Style, - access: fn(&mut easy_mark_parser::Style) -> &mut bool| { - let skip = if *access(style) { - // Include the character that is ending this style: - job.append(&text[..1], 0.0, format_from_style(egui_style, style)); - *text = &text[1..]; - 0 - } else { - 1 - }; - *access(style) ^= true; - skip - }; - - if text.starts_with('*') { - skip = apply_basic_style(&mut text, &mut style, |style| &mut style.strong); - } else if text.starts_with('/') { - skip = apply_basic_style(&mut text, &mut style, |style| &mut style.italics); - } else if text.starts_with('_') { - skip = apply_basic_style(&mut text, &mut style, |style| &mut style.underline); - } else if text.starts_with('$') { - skip = apply_basic_style(&mut text, &mut style, |style| &mut style.small); - } else if text.starts_with('~') { - skip = apply_basic_style(&mut text, &mut style, |style| &mut style.strikethrough); - } else if text.starts_with('^') { - skip = apply_basic_style(&mut text, &mut style, |style| &mut style.raised); - } else if text.starts_with('\\') && text.len() >= 2 { - skip = 2; - } else if start_of_line && text.starts_with(' ') { - // we don't preview indentation, because it is confusing - skip = 1; - } else if start_of_line && text.starts_with("###### ") { - style.heading = true; - skip = 7; - } else if start_of_line && text.starts_with("##### ") { - style.heading = true; - skip = 6; - } else if start_of_line && text.starts_with("#### ") { - style.heading = true; - skip = 5; - } else if start_of_line && text.starts_with("### ") { - style.heading = true; - skip = 4; - } else if start_of_line && text.starts_with("## ") { - style.heading = true; - skip = 3; - } else if start_of_line && text.starts_with("# ") { - style.heading = true; - skip = 2; - } else if start_of_line && text.starts_with("> ") { - style.quoted = true; - skip = 2; - // we don't preview indentation, because it is confusing - } else if start_of_line && text.trim_start().starts_with("- ") { - job.append("• ", 0.0, format_from_style(egui_style, &style)); - text = &text[2..]; - skip = 0; - // we don't preview indentation, because it is confusing - } else { - skip = 0; - } - // Note: we don't preview underline, strikethrough and italics because it confuses things. - - // Swallow everything up to the next special character: - let line_end = text[skip..] - .find('\n') - .map_or_else(|| text.len(), |i| (skip + i + 1)); - let end = text[skip..] - .find(&['*', '`', '~', '_', '/', '$', '^', '\\', '<', '['][..]) - .map_or_else(|| text.len(), |i| (skip + i).max(1)); - - if line_end <= end { - job.append( - &text[..line_end], - 0.0, - format_from_style(egui_style, &style), - ); - text = &text[line_end..]; - start_of_line = true; - style = Default::default(); - } else { - job.append(&text[..end], 0.0, format_from_style(egui_style, &style)); - text = &text[end..]; - start_of_line = false; - } - } - - job -} - -fn format_from_style( - egui_style: &egui::Style, - emark_style: &easy_mark_parser::Style, -) -> egui::text::TextFormat { - use egui::{Align, Color32, Stroke, TextStyle}; - - let color = if emark_style.code { - egui_style.visuals.strong_text_color() * Color32::from_rgb(0x44, 0xff, 0x44) - } else if emark_style.strong || emark_style.heading { - egui_style.visuals.strong_text_color() - } else if emark_style.quoted { - egui_style.visuals.weak_text_color() - } else { - egui_style.visuals.text_color() - }; - - let text_style = if emark_style.heading { - TextStyle::Heading - } else if emark_style.code { - TextStyle::Monospace - } else if emark_style.small | emark_style.raised { - TextStyle::Small - } else { - TextStyle::Body - }; - - let background = if emark_style.code { - egui_style.visuals.code_bg_color - } else { - Color32::TRANSPARENT - }; - - let underline = if emark_style.underline { - Stroke::new(1.0, color) - } else { - Stroke::NONE - }; - - let strikethrough = if emark_style.strikethrough { - Stroke::new(1.0, color) - } else { - Stroke::NONE - }; - - let valign = if emark_style.raised { - Align::TOP - } else { - Align::BOTTOM - }; - - egui::text::TextFormat { - font_id: text_style.resolve(egui_style), - color, - background, - italics: emark_style.italics, - underline, - strikethrough, - valign, - ..Default::default() - } -} diff --git a/src/markdown/ast.rs b/src/markdown/ast.rs index 719d90f..b342c85 100644 --- a/src/markdown/ast.rs +++ b/src/markdown/ast.rs @@ -40,7 +40,7 @@ pub struct Style { pub raised: bool, } -pub enum MarkdownItem<'a> { +pub enum Item<'a> { Text { span: Span<'a>, style: Style, diff --git a/src/markdown/highlighter.rs b/src/markdown/highlighter.rs index f24816a..a471c33 100644 --- a/src/markdown/highlighter.rs +++ b/src/markdown/highlighter.rs @@ -1,9 +1,6 @@ use egui::text::{CCursorRange, LayoutJob}; -use crate::markdown::{ - span::Span, - tokenizer::{Heading, Token, TokenKind, tokenize}, -}; +use super::{Item, Style, parse}; /// Highlight markdown, caching previous output to save CPU. #[derive(Default)] @@ -13,36 +10,6 @@ pub struct MemoizedHighlighter { output: LayoutJob, } -#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -pub struct Style { - /// # heading (large text) - pub heading: Option, - - /// > quoted (slightly dimmer color or other font style) - pub quoted: bool, - - /// `code` (monospace, some other color) - pub code: bool, - - /// self.strong* (emphasized, e.g. bold) - pub strong: bool, - - /// _underline_ - pub underline: bool, - - /// ~strikethrough~ - pub strikethrough: bool, - - /// /italics/ - pub italics: bool, - - /// $small$ - pub small: bool, - - /// ^raised^ - pub raised: bool, -} - impl MemoizedHighlighter { pub fn highlight( &mut self, @@ -67,189 +34,72 @@ pub fn highlight_markdown( _cursor: Option, ) -> LayoutJob { let mut job = LayoutJob::default(); - let mut style = Style::default(); - let mut prev = TokenKind::Newline; + let code_style = Style { + code: true, + ..Default::default() + }; - let tokens: Vec<_> = tokenize(text).collect(); - let mut tokens = &tokens[..]; - - const CODE_INDENT: f32 = 10.0; - - while !tokens.is_empty() { - let token = tokens.first().unwrap(); - tokens = &tokens[1..]; - - let start_of_line = prev == TokenKind::Newline; - prev = token.kind; - - let mut basic_style: Option &mut bool> = None; - - match token.kind { - TokenKind::CodeBlock if start_of_line => { - let span = collect_until( - token, - &mut tokens, - series([TokenKind::Newline, TokenKind::CodeBlock]), - ); - - let code_style = format_from_style( - egui_style, - &Style { - code: true, - ..Default::default() - }, - ); - - job.append(&*span, CODE_INDENT, code_style.clone()); - style = Default::default(); - continue; + for item in parse(text) { + match item { + Item::Text { span, style } => { + job.append(&span, 0.0, format_from_style(egui_style, &style)); } - - TokenKind::Newline => style = Style::default(), - - TokenKind::Strong => basic_style = Some(|s| &mut s.strong), - TokenKind::Italic => basic_style = Some(|s| &mut s.italics), - TokenKind::Strikethrough => basic_style = Some(|s| &mut s.strikethrough), - - TokenKind::CodeBlock | TokenKind::Mono => { - style.code = true; - let span = collect_until( - token, - &mut tokens, - any_of([TokenKind::Mono, TokenKind::CodeBlock, TokenKind::Newline]), - ); - job.append(&*span, 0.0, format_from_style(egui_style, &style)); - style.code = false; - continue; + Item::CodeBlock { + all, + language: _, // TODO + code: _, // TODO + } => { + job.append(&all, 100.0, format_from_style(egui_style, &code_style)); } - - // TODO: different heading strengths - TokenKind::Heading(h) if start_of_line => style.heading = Some(h), - TokenKind::Quote if start_of_line => style.quoted = true, - - // TODO: indented list entries - TokenKind::ListEntry if start_of_line => { - job.append("• ", 0.0, format_from_style(egui_style, &style)); - continue; - } - - TokenKind::Text - // the following tokens are only richly rendered if encountered e.g. at start_of_line. - | TokenKind::Indentation - | TokenKind::ListEntry - | TokenKind::Heading(..) - | TokenKind::Quote => {} } - - // if we encountered a marker for Bold, Italic, or Strikethrough, toggle that style and - // render the token with the style enabled. - if let Some(basic_style) = basic_style { - let mut tmp_style = style; - *basic_style(&mut tmp_style) = true; - *basic_style(&mut style) ^= true; // toggle - job.append(&token.span, 0.0, format_from_style(egui_style, &tmp_style)); - continue; - } - - job.append(&token.span, 0.0, format_from_style(egui_style, &style)); } job } -fn series<'a, const N: usize>(of: [TokenKind; N]) -> impl FnMut(&[Token<'a>; N]) -> bool { - move |token| { - of.iter() - .zip(token) - .all(|(kind, token)| kind == &token.kind) - } -} - -fn any_of<'a, const N: usize>(these: [TokenKind; N]) -> impl FnMut(&[Token<'a>; 1]) -> bool { - move |[token]| these.contains(&token.kind) -} - -/// Collect all tokens up to and including `pattern`, and merge them into a signle span. -/// -/// `N` determines how many specific and consecutive tokens we are looking for. -/// i.e. if we were looking for a [TokenKind::Newline] followed by a [TokenKind::Quote], `N` -/// would equal `2`. -/// -/// `pattern` is a function that accepts an array of `N` tokens and returns `true` if they match, -/// i.e. if we should stop collecting. [any_of] and [series] can help to construct this function. -/// -/// The collected tokens will be split off the head of the slice referred to by `tokens`. -/// -/// # Panic -/// Panics if `tokens` does not contain only consecutive adjacent spans. -fn collect_until<'a, const N: usize>( - token: &Token<'a>, - tokens: &mut &[Token<'a>], - pattern: impl FnMut(&[Token<'a>; N]) -> bool, -) -> Span<'a> -where - for<'b> &'b [Token<'a>; N]: TryFrom<&'b [Token<'a>]>, -{ - let mut windows = tokens - .windows(N) - .map(|slice| <&[Token<'a>; N]>::try_from(slice).ok().unwrap()); - - let split_at = match windows.position(pattern) { - Some(i) => i + N, - None => tokens.len(), // consume everything - }; - - let (consume, keep) = tokens.split_at(split_at); - *tokens = keep; - - consume - .iter() - .fold(token.span.clone(), |span: Span<'_>, token| { - span.try_merge(&token.span).unwrap() - }) -} - -fn format_from_style(egui_style: &egui::Style, emark_style: &Style) -> egui::text::TextFormat { +fn format_from_style(egui_style: &egui::Style, style: &Style) -> egui::text::TextFormat { use egui::{Align, Color32, Stroke, TextStyle}; - let color = if emark_style.strong || emark_style.heading.is_some() { + let color = if style.code { + egui_style.visuals.strong_text_color() * Color32::GREEN + } else if style.strong || style.heading.is_some() { egui_style.visuals.strong_text_color() - } else if emark_style.quoted { + } else if style.quoted { egui_style.visuals.weak_text_color() } else { egui_style.visuals.text_color() }; - let text_style = if emark_style.heading.is_some() { + let text_style = if style.heading.is_some() { TextStyle::Heading - } else if emark_style.code { + } else if style.code { TextStyle::Monospace - } else if emark_style.small | emark_style.raised { + } else if style.small | style.raised { TextStyle::Small } else { TextStyle::Body }; - let background = if emark_style.code { + let background = if style.code { egui_style.visuals.code_bg_color } else { Color32::TRANSPARENT }; - let underline = if emark_style.underline { + let underline = if style.underline { Stroke::new(1.0, color) } else { Stroke::NONE }; - let strikethrough = if emark_style.strikethrough { + let strikethrough = if style.strikethrough { Stroke::new(1.0, color) } else { Stroke::NONE }; - let valign = if emark_style.raised { + let valign = if style.raised { Align::TOP } else { Align::BOTTOM @@ -259,7 +109,7 @@ fn format_from_style(egui_style: &egui::Style, emark_style: &Style) -> egui::tex font_id: text_style.resolve(egui_style), color, background, - italics: emark_style.italics, + italics: style.italics, underline, strikethrough, valign, diff --git a/src/markdown/mod.rs b/src/markdown/mod.rs index 936a025..f379c99 100644 --- a/src/markdown/mod.rs +++ b/src/markdown/mod.rs @@ -1,7 +1,11 @@ +mod ast; mod highlighter; +mod parser; mod span; mod tokenizer; +pub use ast::*; pub use highlighter::*; +pub use parser::*; pub use span::*; pub use tokenizer::*; diff --git a/src/markdown/parser.rs b/src/markdown/parser.rs new file mode 100644 index 0000000..5bf7b68 --- /dev/null +++ b/src/markdown/parser.rs @@ -0,0 +1,172 @@ +use std::iter::{self, once}; + +use crate::markdown::Style; + +use super::{Item, Span, Token, TokenKind, tokenize}; + +pub fn parse(text: &str) -> Vec> { + let tokens: Vec<_> = tokenize(text).collect(); + parse_tokens(&tokens) +} + +pub fn parse_tokens<'a>(mut tokens: &[Token<'a>]) -> Vec> { + // pretend that the first token was preceeded by a newline. + // means we don't have to handle the first token as a special case. + let mut prev = TokenKind::Newline; + + let mut style = Style::default(); + + let mono_style = Style { + code: true, + ..Default::default() + }; + + iter::from_fn(move || { + if tokens.is_empty() { + return None; + } + + let token = tokens.first().unwrap(); + tokens = &tokens[1..]; + + let start_of_line = prev == TokenKind::Newline; + prev = token.kind; + + let mut basic_style: Option &mut bool> = None; + + match token.kind { + TokenKind::CodeBlock if start_of_line => { + let language = collect_until( + None, + &mut tokens, + any_of([TokenKind::Newline]), + ); + + let code = collect_until( + None, + &mut tokens, + series([TokenKind::Newline, TokenKind::CodeBlock]), + ); + + let all = [ + &token.span, + &language, + &code, + ].into_iter().fold(Span::empty(), |a, b| a.try_merge(b).unwrap()); + + let language = language.trim_end_matches("\n"); + let code = code.trim_end_matches("\n```"); + + return Some(Item::CodeBlock { all, language, code }); + } + + TokenKind::Newline => style = Style::default(), + + TokenKind::Strong => basic_style = Some(|s| &mut s.strong), + TokenKind::Italic => basic_style = Some(|s| &mut s.italics), + TokenKind::Strikethrough => basic_style = Some(|s| &mut s.strikethrough), + + TokenKind::CodeBlock | TokenKind::Mono => { + let span = collect_until( + Some(token), + &mut tokens, + any_of([TokenKind::Mono, TokenKind::CodeBlock, TokenKind::Newline]), + ); + + return Some(Item::Text { span, style: mono_style}); + } + + // TODO: different heading strengths + TokenKind::Heading(h) if start_of_line => style.heading = Some(h), + TokenKind::Quote if start_of_line => style.quoted = true, + + // TODO: replace dashes with dots + //// TODO: indented list entries + //TokenKind::ListEntry if start_of_line => { + // job.append("• ", 0.0, format_from_style(egui_style, &style)); + // continue; + //} + + TokenKind::Text + // the following tokens are only richly rendered if encountered e.g. at start_of_line. + | TokenKind::Indentation + | TokenKind::ListEntry + | TokenKind::Heading(..) + | TokenKind::Quote => {} + } + + // if we encountered a marker for Bold, Italic, or Strikethrough, toggle that style and + // render the token with the style enabled. + if let Some(basic_style) = basic_style { + let mut tmp_style = style; + *basic_style(&mut tmp_style) = true; + *basic_style(&mut style) ^= true; // toggle + return Some(Item::Text { + span: token.span.clone(), + style: tmp_style, + }); + } + + Some(Item::Text { + span: token.span.clone(), + style, + }) + }) + .collect() +} + +fn series<'a, const N: usize>(of: [TokenKind; N]) -> impl FnMut(&[Token<'a>; N]) -> bool { + move |token| { + of.iter() + .zip(token) + .all(|(kind, token)| kind == &token.kind) + } +} + +fn any_of<'a, const N: usize>(these: [TokenKind; N]) -> impl FnMut(&[Token<'a>; 1]) -> bool { + move |[token]| these.contains(&token.kind) +} + +/// Collect all tokens up to and including `pattern`, and merge them into a signle span. +/// +/// `N` determines how many specific and consecutive tokens we are looking for. +/// i.e. if we were looking for a [TokenKind::Newline] followed by a [TokenKind::Quote], `N` +/// would equal `2`. +/// +/// `pattern` is a function that accepts an array of `N` tokens and returns `true` if they match, +/// i.e. if we should stop collecting. [any_of] and [series] can help to construct this function. +/// +/// The collected tokens will be split off the head of the slice referred to by `tokens`. +/// +/// # Panic +/// Panics if `tokens` does not contain only consecutive adjacent spans. +fn collect_until<'a, const N: usize>( + first_token: Option<&Token<'a>>, + tokens: &mut &[Token<'a>], + pattern: impl FnMut(&[Token<'a>; N]) -> bool, +) -> Span<'a> +where + // &[T; N]: TryFrom<&[T]> + for<'b> &'b [Token<'a>; N]: TryFrom<&'b [Token<'a>]>, +{ + let mut windows = tokens.windows(N).map(|slice| { + <&[Token<'a>; N]>::try_from(slice) + .ok() + .expect("`windows` promises to return slices of length N") + }); + + let split_at = match windows.position(pattern) { + Some(i) => i + N, + None => tokens.len(), // consume everything + }; + + let (consume, keep) = tokens.split_at(split_at); + *tokens = keep; + + once(first_token) + .flatten() + .chain(consume) + .fold(Span::empty(), |span: Span<'_>, token| { + span.try_merge(&token.span).unwrap() + }) +} diff --git a/src/markdown/span.rs b/src/markdown/span.rs index 3a5e108..02a7463 100644 --- a/src/markdown/span.rs +++ b/src/markdown/span.rs @@ -3,6 +3,8 @@ use std::{ ops::{Deref, Range}, }; +use eyre::{bail, eyre}; + #[derive(Clone, Eq, PartialEq)] pub struct Span<'a> { complete_str: &'a str, @@ -17,6 +19,13 @@ impl<'a> Span<'a> { } } + pub const fn empty() -> Self { + Span { + complete_str: "", + range: 0..0, + } + } + pub fn get(&self, slice: Range) -> Option { let start = self.range.start.checked_add(slice.start)?; let end = self.range.start.checked_add(slice.end)?; @@ -41,26 +50,49 @@ impl<'a> Span<'a> { Some((head, tail)) } + pub fn trim_end_matches(&self, p: &str) -> Self { + if !self.ends_with(p) { + return self.clone(); + } + + Self { + range: self.range.start..self.range.end - p.len(), + complete_str: self.complete_str, + } + } + /// Try to merge the spans. /// + /// If either spans is empty, this just returns the other one. /// This only works if spans are pointing into the same backing buffer, and are adjacent. - pub fn try_merge(&self, other: &Self) -> Option { + pub fn try_merge(&self, other: &Self) -> eyre::Result { + if self.is_empty() { + return Ok(other.clone()); + } + + if other.is_empty() { + return Ok(self.clone()); + } + if self.complete_str.as_ptr() != other.complete_str.as_ptr() { - return None; + bail!("Can't merge different strings"); } if self.range.end == other.range.start { - Some(Self { + Ok(Self { range: self.range.start..other.range.end, ..*self }) } else if self.range.start == other.range.end { - Some(Self { + Ok(Self { range: other.range.start..self.range.end, ..*self }) } else { - None + Err(eyre!("String: {:?}", self.complete_str) + .wrap_err(eyre!("Span 2: {:?}", other.deref())) + .wrap_err(eyre!("Span 1: {:?}", self.deref())) + .wrap_err("Can't merge disjoint string spans")) } } } diff --git a/src/markdown/tokenizer.rs b/src/markdown/tokenizer.rs index 7aed4cb..f2c6c73 100644 --- a/src/markdown/tokenizer.rs +++ b/src/markdown/tokenizer.rs @@ -1,16 +1,6 @@ use std::iter; -use super::span::Span; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Heading { - H6, - H5, - H4, - H3, - H2, - H1, -} +use super::{Heading, span::Span}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenKind {