This commit is contained in:
2025-07-07 00:08:36 +02:00
parent 138df11710
commit 9511ae8176
8 changed files with 419 additions and 10 deletions

View File

@ -1,6 +0,0 @@
fn main() {
lalrpop::Configuration::new()
.set_in_dir("./src/markdown")
.process()
.unwrap();
}

View File

@ -1,6 +1,12 @@
use egui::text::{CCursorRange, LayoutJob}; use egui::text::{CCursorRange, LayoutJob};
use crate::easy_mark::easy_mark_parser; use crate::{
easy_mark::easy_mark_parser,
markdown::{
span::Span,
tokenizer::{Token, TokenKind, tokenize},
},
};
/// Highlight easymark, memoizing previous output to save CPU. /// Highlight easymark, memoizing previous output to save CPU.
/// ///
@ -29,6 +35,131 @@ impl MemoizedHighlighter {
} }
pub fn highlight_easymark( pub fn highlight_easymark(
egui_style: &egui::Style,
text: &str,
// TODO: hide special characters where cursor isn't
_cursor: Option<CCursorRange>,
) -> LayoutJob {
let mut job = LayoutJob::default();
let mut style = easy_mark_parser::Style::default();
let mut prev = TokenKind::Newline;
let tokens: Vec<_> = tokenize(text).collect();
let mut tokens = &tokens[..];
const CODE_INDENT: f32 = 10.0;
while !tokens.is_empty() {
let token = tokens.first().unwrap();
tokens = &tokens[1..];
let start_of_line = prev == TokenKind::Newline;
prev = token.kind;
match token.kind {
TokenKind::CodeBlock if start_of_line => {
let astyle = format_from_style(
egui_style,
&easy_mark_parser::Style {
code: true,
..Default::default()
},
);
let span = collect_until(
token,
&mut tokens,
series([TokenKind::Newline, TokenKind::CodeBlock]),
);
job.append(&*span, CODE_INDENT, astyle.clone());
style = Default::default();
continue;
}
TokenKind::Newline => style = easy_mark_parser::Style::default(),
TokenKind::Strong => style.strong ^= true,
TokenKind::Italic => style.italics ^= true,
TokenKind::Strikethrough => style.strikethrough ^= true,
TokenKind::Heading(_h) if start_of_line => style.heading = true,
TokenKind::Quote if start_of_line => style.quoted = true,
TokenKind::CodeBlock | TokenKind::Mono => {
style.code = true;
let span = collect_until(
token,
&mut tokens,
any_of([TokenKind::Mono, TokenKind::CodeBlock, TokenKind::Newline]),
);
job.append(&*span, 0.0, format_from_style(egui_style, &style));
style.code = false;
continue;
}
TokenKind::Heading(..) | TokenKind::Quote | TokenKind::Text => {}
}
job.append(&token.span, 0.0, format_from_style(egui_style, &style));
}
job
}
fn series<'a, const N: usize>(of: [TokenKind; N]) -> impl FnMut(&[Token<'a>; N]) -> bool {
move |token| {
of.iter()
.zip(token)
.all(|(kind, token)| kind == &token.kind)
}
}
fn any_of<'a, const N: usize>(these: [TokenKind; N]) -> impl FnMut(&[Token<'a>; 1]) -> bool {
move |[token]| these.contains(&token.kind)
}
/// Collect all tokens up to and including `pattern`, and merge them into a signle span.
///
/// `N` determines how many specific and consecutive tokens we are looking for.
/// i.e. if we were looking for a [TokenKind::Newline] followed by a [TokenKind::Quote], `N`
/// would equal `2`.
///
/// `pattern` is a function that accepts an array of `N` tokens and returns `true` if they match,
/// i.e. if we should stop collecting. [any_of] and [series] can help to construct this function.
///
/// The collected tokens will be split off the head of the slice referred to by `tokens`.
///
/// # Panic
/// Panics if `tokens` does not contain only consecutive adjacent spans.
fn collect_until<'a, const N: usize>(
token: &Token<'a>,
tokens: &mut &[Token<'a>],
pattern: impl FnMut(&[Token<'a>; N]) -> bool,
) -> Span<'a>
where
for<'b> &'b [Token<'a>; N]: TryFrom<&'b [Token<'a>]>,
{
let mut windows = tokens
.windows(N)
.map(|slice| <&[Token<'a>; N]>::try_from(slice).ok().unwrap());
let split_at = match windows.position(pattern) {
Some(i) => i + N,
None => tokens.len(), // consume everything
};
let (consume, keep) = tokens.split_at(split_at);
*tokens = keep;
consume
.iter()
.fold(token.span.clone(), |span: Span<'_>, token| {
span.try_merge(&token.span).unwrap()
})
}
pub fn highlight_easymark_old(
egui_style: &egui::Style, egui_style: &egui::Style,
mut text: &str, mut text: &str,

View File

@ -1,4 +1,3 @@
pub mod ast; pub mod ast;
pub mod span; pub mod span;
pub mod tokenizer;
lalrpop_util::lalrpop_mod!(grammar);

View File

@ -0,0 +1,46 @@
---
source: src/markdown/tokenizer.rs
expression: examples
---
- string: "just some normal text :D"
tokens:
- "Token { span: Span(0..24, \"just some normal text :D\"), kind: Text }"
- string: normal *bold* normal
tokens:
- "Token { span: Span(0..7, \"normal \"), kind: Text }"
- "Token { span: Span(7..8, \"*\"), kind: Strong }"
- "Token { span: Span(8..12, \"bold\"), kind: Text }"
- "Token { span: Span(12..13, \"*\"), kind: Strong }"
- "Token { span: Span(13..20, \" normal\"), kind: Text }"
- string: normal * maybe bold? * normal
tokens:
- "Token { span: Span(0..7, \"normal \"), kind: Text }"
- "Token { span: Span(7..8, \"*\"), kind: Strong }"
- "Token { span: Span(8..21, \" maybe bold? \"), kind: Text }"
- "Token { span: Span(21..22, \"*\"), kind: Strong }"
- "Token { span: Span(22..29, \" normal\"), kind: Text }"
- string: "```lang\ncode code code\n```"
tokens:
- "Token { span: Span(0..3, \"```\"), kind: CodeBlock }"
- "Token { span: Span(3..7, \"lang\"), kind: Text }"
- "Token { span: Span(7..8, \"\\n\"), kind: Newline }"
- "Token { span: Span(8..22, \"code code code\"), kind: Text }"
- "Token { span: Span(22..23, \"\\n\"), kind: Newline }"
- "Token { span: Span(23..26, \"```\"), kind: CodeBlock }"
- string: "*/``/*"
tokens:
- "Token { span: Span(0..1, \"*\"), kind: Strong }"
- "Token { span: Span(1..2, \"/\"), kind: Italic }"
- "Token { span: Span(2..3, \"`\"), kind: Mono }"
- "Token { span: Span(3..4, \"`\"), kind: Mono }"
- "Token { span: Span(4..5, \"/\"), kind: Italic }"
- "Token { span: Span(5..6, \"*\"), kind: Strong }"
- string: "*/`*/*/"
tokens:
- "Token { span: Span(0..1, \"*\"), kind: Strong }"
- "Token { span: Span(1..2, \"/\"), kind: Italic }"
- "Token { span: Span(2..3, \"`\"), kind: Mono }"
- "Token { span: Span(3..4, \"*\"), kind: Strong }"
- "Token { span: Span(4..5, \"/\"), kind: Italic }"
- "Token { span: Span(5..6, \"*\"), kind: Strong }"
- "Token { span: Span(6..7, \"/\"), kind: Italic }"

View File

@ -1,4 +1,7 @@
use std::ops::{Deref, Range}; use std::{
fmt,
ops::{Deref, Range},
};
#[derive(Clone, Eq, PartialEq)] #[derive(Clone, Eq, PartialEq)]
pub struct Span<'a> { pub struct Span<'a> {
@ -31,6 +34,35 @@ impl<'a> Span<'a> {
pub fn complete_str(&self) -> Self { pub fn complete_str(&self) -> Self {
Self::new(self.complete_str) Self::new(self.complete_str)
} }
pub fn split_at(&self, i: usize) -> Option<(Self, Self)> {
let head = self.get(0..i)?;
let tail = self.get(i..self.range.len())?;
Some((head, tail))
}
/// Try to merge the spans.
///
/// This only works if spans are pointing into the same backing buffer, and are adjacent.
pub fn try_merge(&self, other: &Self) -> Option<Self> {
if self.complete_str.as_ptr() != other.complete_str.as_ptr() {
return None;
}
if self.range.end == other.range.start {
Some(Self {
range: self.range.start..other.range.end,
..*self
})
} else if self.range.start == other.range.end {
Some(Self {
range: other.range.start..self.range.end,
..*self
})
} else {
None
}
}
} }
impl Deref for Span<'_> { impl Deref for Span<'_> {
@ -40,3 +72,12 @@ impl Deref for Span<'_> {
&self.complete_str[self.range.clone()] &self.complete_str[self.range.clone()]
} }
} }
impl<'a> fmt::Debug for Span<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Span")
.field(&self.range)
.field(&self.deref())
.finish()
}
}

146
src/markdown/tokenizer.rs Normal file
View File

@ -0,0 +1,146 @@
use std::iter;
use super::span::Span;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Heading {
H6,
H5,
H4,
H3,
H2,
H1,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TokenKind {
/// A newline followed by three `
CodeBlock,
/// A newline that isn't a codeblock
Newline,
Strong,
Italic,
Mono,
Strikethrough,
Quote,
Heading(Heading),
/// Normal text
Text,
}
#[derive(Debug)]
pub struct Token<'a> {
pub span: Span<'a>,
pub kind: TokenKind,
}
pub fn tokenize<'a>(s: &'a str) -> impl Iterator<Item = Token<'a>> {
const TOKENS: &[(&'static str, TokenKind)] = &[
("\n", TokenKind::Newline),
("######", TokenKind::Heading(Heading::H6)),
("#####", TokenKind::Heading(Heading::H5)),
("####", TokenKind::Heading(Heading::H4)),
("###", TokenKind::Heading(Heading::H3)),
("##", TokenKind::Heading(Heading::H2)),
("#", TokenKind::Heading(Heading::H1)),
(">", TokenKind::Quote),
("*", TokenKind::Strong),
("/", TokenKind::Italic),
("~", TokenKind::Strikethrough),
("```", TokenKind::CodeBlock),
("`", TokenKind::Mono),
];
let mut s = Span::new(s);
let mut yield_n: usize = 0;
iter::from_fn(move || {
loop {
if s.is_empty() {
return None;
}
if yield_n == s.len() {
let (token, rest) = s.split_at(s.len()).unwrap();
let token = Token {
span: token,
kind: TokenKind::Text,
};
s = rest;
return Some(token);
}
let token = TOKENS.iter().find_map(|(token_str, token_kind)| {
s[yield_n..]
.starts_with(token_str)
.then(|| (*token_kind, token_str.len()))
});
let Some((kind, len)) = token else {
yield_n += s[yield_n..].chars().next().unwrap_or('\0').len_utf8();
continue;
};
if yield_n > 0 {
let (token, rest) = s.split_at(yield_n).unwrap();
let token = Token {
span: token,
kind: TokenKind::Text,
};
s = rest;
yield_n = 0;
return Some(token);
}
let (token, rest) = s.split_at(len).unwrap();
let token = Token { span: token, kind };
s = rest;
return Some(token);
}
})
}
#[cfg(test)]
mod tests {
use serde::Serialize;
use super::tokenize;
#[test]
fn test_tokenize() {
let examples = [
"just some normal text :D",
"normal *bold* normal",
"normal * maybe bold? * normal",
"```lang\ncode code code\n```",
"*/``/*",
"*/`*/*/",
];
#[derive(Serialize)]
struct Result {
pub string: &'static str,
/// Debug-printed tokens
pub tokens: Vec<String>,
}
let examples = examples
.into_iter()
.map(|string| {
let tokens = tokenize(string)
.map(|tokens| format!("{tokens:?}"))
.collect::<Vec<_>>();
Result { string, tokens }
})
.collect::<Vec<_>>();
insta::assert_yaml_snapshot!(examples);
}
}

View File

@ -0,0 +1,44 @@
---
source: src/custom_code_block.rs
assertion_line: 133
expression: list
---
[
Line(
"\n",
),
Line(
"# Hello world\n",
),
Line(
"## Subheader\n",
),
Line(
"- 1\n",
),
CodeBlock {
key: "foo",
content: " whatever\n some code\n Hi mom!",
span: "```foo\n whatever\n some code\n Hi mom!\n```",
},
Line(
" \n",
),
Line(
"\n",
),
CodeBlock {
key: "` # wrong number of ticks, but that's ok",
content: " ``` # indented ticks",
span: "```` # wrong number of ticks, but that's ok\n ``` # indented ticks\n```\n",
},
Line(
"\n",
),
Line(
"``` # no closing ticks\n",
),
Line(
" ",
),
]

View File

@ -0,0 +1,8 @@
---
source: src/painting.rs
assertion_line: 695
expression: serialized
---
```handwriting
BQAAvAA8AEIAPABCAEIAPgBAAAAAAAQAAEIAQgC8ADwAAAAAAEIAPA==
```