Merge branch 'markdown-parser'

2025-07-07 11:44:22 +02:00
parent 6e59cb86dc 7f93084e64
commit e0fd726f02
13 changed files with 638 additions and 357 deletions
--- a/src/easy_mark/easy_mark_parser.rs
+++ b/src/easy_mark/easy_mark_parser.rs
@@ -1,346 +0,0 @@
-//! A parser for `EasyMark`: a very simple markup language.
-//!
-//! WARNING: `EasyMark` is subject to change.
-//
-//! # `EasyMark` design goals:
-//! 1. easy to parse
-//! 2. easy to learn
-//! 3. similar to markdown
-
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-pub enum Item<'a> {
-    /// `\n`
-    // TODO(emilk): add Style here so empty heading still uses up the right amount of space.
-    Newline,
-
-    /// Text
-    Text(Style, &'a str),
-
-    /// title, url
-    Hyperlink(Style, &'a str, &'a str),
-
-    /// leading space before e.g. a [`Self::BulletPoint`].
-    Indentation(usize),
-
-    /// >
-    QuoteIndent,
-
-    /// - a point well made.
-    BulletPoint,
-
-    /// 1. numbered list. The string is the number(s).
-    NumberedPoint(&'a str),
-
-    /// ---
-    Separator,
-
-    /// language, code
-    CodeBlock(&'a str, &'a str),
-}
-
-#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
-pub struct Style {
-    /// # heading (large text)
-    pub heading: bool,
-
-    /// > quoted (slightly dimmer color or other font style)
-    pub quoted: bool,
-
-    /// `code` (monospace, some other color)
-    pub code: bool,
-
-    /// self.strong* (emphasized, e.g. bold)
-    pub strong: bool,
-
-    /// _underline_
-    pub underline: bool,
-
-    /// ~strikethrough~
-    pub strikethrough: bool,
-
-    /// /italics/
-    pub italics: bool,
-
-    /// $small$
-    pub small: bool,
-
-    /// ^raised^
-    pub raised: bool,
-}
-
-/// Parser for the `EasyMark` markup language.
-pub struct Parser<'a> {
-    /// The remainder of the input text
-    s: &'a str,
-
-    /// Are we at the start of a line?
-    start_of_line: bool,
-
-    /// Current self.style. Reset after a newline.
-    style: Style,
-}
-
-impl<'a> Parser<'a> {
-    pub fn new(s: &'a str) -> Self {
-        Self {
-            s,
-            start_of_line: true,
-            style: Style::default(),
-        }
-    }
-
-    /// `1. `, `42. ` etc.
-    fn numbered_list(&mut self) -> Option<Item<'a>> {
-        let n_digits = self.s.chars().take_while(|c| c.is_ascii_digit()).count();
-        if n_digits > 0 && self.s.chars().skip(n_digits).take(2).eq(". ".chars()) {
-            let number = &self.s[..n_digits];
-            self.s = &self.s[(n_digits + 2)..];
-            self.start_of_line = false;
-            return Some(Item::NumberedPoint(number));
-        }
-        None
-    }
-
-    // ```{language}\n{code}```
-    fn code_block(&mut self) -> Option<Item<'a>> {
-        if let Some(language_start) = self.s.strip_prefix("```") {
-            if let Some(newline) = language_start.find('\n') {
-                let language = &language_start[..newline];
-                let code_start = &language_start[newline + 1..];
-                if let Some(end) = code_start.find("\n```") {
-                    let code = &code_start[..end].trim();
-                    self.s = &code_start[end + 4..];
-                    self.start_of_line = false;
-                    return Some(Item::CodeBlock(language, code));
-                } else {
-                    self.s = "";
-                    return Some(Item::CodeBlock(language, code_start));
-                }
-            }
-        }
-        None
-    }
-
-    // `code`
-    fn inline_code(&mut self) -> Option<Item<'a>> {
-        if let Some(rest) = self.s.strip_prefix('`') {
-            self.s = rest;
-            self.start_of_line = false;
-            self.style.code = true;
-            let rest_of_line = &self.s[..self.s.find('\n').unwrap_or(self.s.len())];
-            if let Some(end) = rest_of_line.find('`') {
-                let item = Item::Text(self.style, &self.s[..end]);
-                self.s = &self.s[end + 1..];
-                self.style.code = false;
-                return Some(item);
-            } else {
-                let end = rest_of_line.len();
-                let item = Item::Text(self.style, rest_of_line);
-                self.s = &self.s[end..];
-                self.style.code = false;
-                return Some(item);
-            }
-        }
-        None
-    }
-
-    /// `<url>` or `[link](url)`
-    fn url(&mut self) -> Option<Item<'a>> {
-        if self.s.starts_with('<') {
-            let this_line = &self.s[..self.s.find('\n').unwrap_or(self.s.len())];
-            if let Some(url_end) = this_line.find('>') {
-                let url = &self.s[1..url_end];
-                self.s = &self.s[url_end + 1..];
-                self.start_of_line = false;
-                return Some(Item::Hyperlink(self.style, url, url));
-            }
-        }
-
-        // [text](url)
-        if self.s.starts_with('[') {
-            let this_line = &self.s[..self.s.find('\n').unwrap_or(self.s.len())];
-            if let Some(bracket_end) = this_line.find(']') {
-                let text = &this_line[1..bracket_end];
-                if this_line[bracket_end + 1..].starts_with('(') {
-                    if let Some(parens_end) = this_line[bracket_end + 2..].find(')') {
-                        let parens_end = bracket_end + 2 + parens_end;
-                        let url = &self.s[bracket_end + 2..parens_end];
-                        self.s = &self.s[parens_end + 1..];
-                        self.start_of_line = false;
-                        return Some(Item::Hyperlink(self.style, text, url));
-                    }
-                }
-            }
-        }
-        None
-    }
-}
-
-impl<'a> Iterator for Parser<'a> {
-    type Item = Item<'a>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            if self.s.is_empty() {
-                return None;
-            }
-
-            // \n
-            if self.s.starts_with('\n') {
-                self.s = &self.s[1..];
-                self.start_of_line = true;
-                self.style = Style::default();
-                return Some(Item::Newline);
-            }
-
-            // Ignore line break (continue on the same line)
-            if self.s.starts_with("\\\n") && self.s.len() >= 2 {
-                self.s = &self.s[2..];
-                self.start_of_line = false;
-                continue;
-            }
-
-            // \ escape (to show e.g. a backtick)
-            if self.s.starts_with('\\') && self.s.len() >= 2 {
-                let text = &self.s[1..2];
-                self.s = &self.s[2..];
-                self.start_of_line = false;
-                return Some(Item::Text(self.style, text));
-            }
-
-            if self.start_of_line {
-                // leading space (indentation)
-                if self.s.starts_with(' ') {
-                    let length = self.s.find(|c| c != ' ').unwrap_or(self.s.len());
-                    self.s = &self.s[length..];
-                    self.start_of_line = true; // indentation doesn't count
-                    return Some(Item::Indentation(length));
-                }
-
-                // # Heading
-                if let Some(after) = self.s.strip_prefix("# ") {
-                    self.s = after;
-                    self.start_of_line = false;
-                    self.style.heading = true;
-                    continue;
-                }
-
-                // > quote
-                if let Some(after) = self.s.strip_prefix("> ") {
-                    self.s = after;
-                    self.start_of_line = true; // quote indentation doesn't count
-                    self.style.quoted = true;
-                    return Some(Item::QuoteIndent);
-                }
-
-                // - bullet point
-                if self.s.starts_with("- ") {
-                    self.s = &self.s[2..];
-                    self.start_of_line = false;
-                    return Some(Item::BulletPoint);
-                }
-
-                // `1. `, `42. ` etc.
-                if let Some(item) = self.numbered_list() {
-                    return Some(item);
-                }
-
-                // --- separator
-                if let Some(after) = self.s.strip_prefix("---") {
-                    self.s = after.trim_start_matches('-'); // remove extra dashes
-                    self.s = self.s.strip_prefix('\n').unwrap_or(self.s); // remove trailing newline
-                    self.start_of_line = false;
-                    return Some(Item::Separator);
-                }
-
-                // ```{language}\n{code}```
-                if let Some(item) = self.code_block() {
-                    return Some(item);
-                }
-            }
-
-            // `code`
-            if let Some(item) = self.inline_code() {
-                return Some(item);
-            }
-
-            if let Some(rest) = self.s.strip_prefix('*') {
-                self.s = rest;
-                self.start_of_line = false;
-                self.style.strong = !self.style.strong;
-                continue;
-            }
-            if let Some(rest) = self.s.strip_prefix('_') {
-                self.s = rest;
-                self.start_of_line = false;
-                self.style.underline = !self.style.underline;
-                continue;
-            }
-            if let Some(rest) = self.s.strip_prefix('~') {
-                self.s = rest;
-                self.start_of_line = false;
-                self.style.strikethrough = !self.style.strikethrough;
-                continue;
-            }
-            if let Some(rest) = self.s.strip_prefix('/') {
-                self.s = rest;
-                self.start_of_line = false;
-                self.style.italics = !self.style.italics;
-                continue;
-            }
-            if let Some(rest) = self.s.strip_prefix('$') {
-                self.s = rest;
-                self.start_of_line = false;
-                self.style.small = !self.style.small;
-                continue;
-            }
-            if let Some(rest) = self.s.strip_prefix('^') {
-                self.s = rest;
-                self.start_of_line = false;
-                self.style.raised = !self.style.raised;
-                continue;
-            }
-
-            // `<url>` or `[link](url)`
-            if let Some(item) = self.url() {
-                return Some(item);
-            }
-
-            // Swallow everything up to the next special character:
-            let end = self
-                .s
-                .find(&['*', '`', '~', '_', '/', '$', '^', '\\', '<', '[', '\n'][..])
-                .map_or_else(|| self.s.len(), |special| special.max(1));
-
-            let item = Item::Text(self.style, &self.s[..end]);
-            self.s = &self.s[end..];
-            self.start_of_line = false;
-            return Some(item);
-        }
-    }
-}
-
-#[test]
-fn test_easy_mark_parser() {
-    let items: Vec<_> = Parser::new("~strikethrough `code`~").collect();
-    assert_eq!(
-        items,
-        vec![
-            Item::Text(
-                Style {
-                    strikethrough: true,
-                    ..Default::default()
-                },
-                "strikethrough "
-            ),
-            Item::Text(
-                Style {
-                    code: true,
-                    strikethrough: true,
-                    ..Default::default()
-                },
-                "code"
-            ),
-        ]
-    );
-}
--- a/src/easy_mark/mod.rs
+++ b/src/easy_mark/mod.rs
@@ -1,7 +0,0 @@
-//! Experimental markup language
-
-mod easy_mark_highlighter;
-pub mod easy_mark_parser;
-
-pub use easy_mark_highlighter::{MemoizedHighlighter, highlight_easymark};
-pub use easy_mark_parser as parser;
--- a/src/handwriting/advanced-example.md
+++ b/src/handwriting/advanced-example.md
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,10 +3,10 @@
 pub mod app;
 pub mod constants;
 pub mod custom_code_block;
-pub mod easy_mark;
 pub mod file_editor;
 pub mod folder;
 pub mod handwriting;
+pub mod markdown;
 pub mod preferences;
 pub mod rasterizer;
 pub mod text_editor;
--- a/src/markdown/ast.rs
+++ b/src/markdown/ast.rs
@@ -0,0 +1,54 @@
+use super::span::Span;
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum Heading {
+    H1,
+    H2,
+    H3,
+    H4,
+    H5,
+    H6,
+}
+
+#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
+pub struct Style {
+    /// # heading (large text)
+    pub heading: Option<Heading>,
+
+    /// > quoted (slightly dimmer color or other font style)
+    pub quoted: bool,
+
+    /// `code` (monospace, some other color)
+    pub code: bool,
+
+    /// self.strong* (emphasized, e.g. bold)
+    pub strong: bool,
+
+    /// _underline_
+    pub underline: bool,
+
+    /// ~strikethrough~
+    pub strikethrough: bool,
+
+    /// /italics/
+    pub italics: bool,
+
+    /// $small$
+    pub small: bool,
+
+    /// ^raised^
+    pub raised: bool,
+}
+
+pub enum MarkdownItem<'a> {
+    Text {
+        span: Span<'a>,
+        style: Style,
+    },
+
+    CodeBlock {
+        all: Span<'a>,
+        language: Span<'a>,
+        code: Span<'a>,
+    },
+}
--- a/src/markdown/grammar.lalrpop
+++ b/src/markdown/grammar.lalrpop
@@ -0,0 +1,10 @@
+use std::str::FromStr;
+
+grammar;
+
+pub Term: i32 = {
+    <n:Num> => n,
+    "(" <t:Term> ")" => t,
+};
+
+Num: i32 = <s:r"[0-9]+"> => i32::from_str(s).unwrap();
--- a/src/markdown/highlighter.rs
+++ b/src/markdown/highlighter.rs
@@ -0,0 +1,268 @@
+use egui::text::{CCursorRange, LayoutJob};
+
+use crate::markdown::{
+    span::Span,
+    tokenizer::{Heading, Token, TokenKind, tokenize},
+};
+
+/// Highlight markdown, caching previous output to save CPU.
+#[derive(Default)]
+pub struct MemoizedHighlighter {
+    style: egui::Style,
+    code: String,
+    output: LayoutJob,
+}
+
+#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
+pub struct Style {
+    /// # heading (large text)
+    pub heading: Option<Heading>,
+
+    /// > quoted (slightly dimmer color or other font style)
+    pub quoted: bool,
+
+    /// `code` (monospace, some other color)
+    pub code: bool,
+
+    /// self.strong* (emphasized, e.g. bold)
+    pub strong: bool,
+
+    /// _underline_
+    pub underline: bool,
+
+    /// ~strikethrough~
+    pub strikethrough: bool,
+
+    /// /italics/
+    pub italics: bool,
+
+    /// $small$
+    pub small: bool,
+
+    /// ^raised^
+    pub raised: bool,
+}
+
+impl MemoizedHighlighter {
+    pub fn highlight(
+        &mut self,
+        egui_style: &egui::Style,
+        code: &str,
+        cursor: Option<CCursorRange>,
+    ) -> LayoutJob {
+        if (&self.style, self.code.as_str()) != (egui_style, code) {
+            self.style = egui_style.clone();
+            code.clone_into(&mut self.code);
+            self.output = highlight_markdown(egui_style, code, cursor);
+        }
+        self.output.clone()
+    }
+}
+
+pub fn highlight_markdown(
+    egui_style: &egui::Style,
+    text: &str,
+
+    // TODO: hide special characters where cursor isn't
+    _cursor: Option<CCursorRange>,
+) -> LayoutJob {
+    let mut job = LayoutJob::default();
+    let mut style = Style::default();
+
+    let mut prev = TokenKind::Newline;
+
+    let tokens: Vec<_> = tokenize(text).collect();
+    let mut tokens = &tokens[..];
+
+    const CODE_INDENT: f32 = 10.0;
+
+    while !tokens.is_empty() {
+        let token = tokens.first().unwrap();
+        tokens = &tokens[1..];
+
+        let start_of_line = prev == TokenKind::Newline;
+        prev = token.kind;
+
+        let mut basic_style: Option<fn(&mut Style) -> &mut bool> = None;
+
+        match token.kind {
+            TokenKind::CodeBlock if start_of_line => {
+                let span = collect_until(
+                    token,
+                    &mut tokens,
+                    series([TokenKind::Newline, TokenKind::CodeBlock]),
+                );
+
+                let code_style = format_from_style(
+                    egui_style,
+                    &Style {
+                        code: true,
+                        ..Default::default()
+                    },
+                );
+
+                job.append(&*span, CODE_INDENT, code_style.clone());
+                style = Default::default();
+                continue;
+            }
+
+            TokenKind::Newline => style = Style::default(),
+
+            TokenKind::Strong => basic_style = Some(|s| &mut s.strong),
+            TokenKind::Italic => basic_style = Some(|s| &mut s.italics),
+            TokenKind::Strikethrough => basic_style = Some(|s| &mut s.strikethrough),
+
+            TokenKind::CodeBlock | TokenKind::Mono => {
+                style.code = true;
+                let span = collect_until(
+                    token,
+                    &mut tokens,
+                    any_of([TokenKind::Mono, TokenKind::CodeBlock, TokenKind::Newline]),
+                );
+                job.append(&*span, 0.0, format_from_style(egui_style, &style));
+                style.code = false;
+                continue;
+            }
+
+            // TODO: different heading strengths
+            TokenKind::Heading(h) if start_of_line => style.heading = Some(h),
+            TokenKind::Quote if start_of_line => style.quoted = true,
+
+            // TODO: indented list entries
+            TokenKind::ListEntry if start_of_line => {
+                job.append("• ", 0.0, format_from_style(egui_style, &style));
+                continue;
+            }
+
+            TokenKind::Text
+            // the following tokens are only richly rendered if encountered e.g. at start_of_line.
+            | TokenKind::Indentation
+            | TokenKind::ListEntry
+            | TokenKind::Heading(..)
+            | TokenKind::Quote => {}
+        }
+
+        // if we encountered a marker for Bold, Italic, or Strikethrough, toggle that style and
+        // render the token with the style enabled.
+        if let Some(basic_style) = basic_style {
+            let mut tmp_style = style;
+            *basic_style(&mut tmp_style) = true;
+            *basic_style(&mut style) ^= true; // toggle
+            job.append(&token.span, 0.0, format_from_style(egui_style, &tmp_style));
+            continue;
+        }
+
+        job.append(&token.span, 0.0, format_from_style(egui_style, &style));
+    }
+
+    job
+}
+
+fn series<'a, const N: usize>(of: [TokenKind; N]) -> impl FnMut(&[Token<'a>; N]) -> bool {
+    move |token| {
+        of.iter()
+            .zip(token)
+            .all(|(kind, token)| kind == &token.kind)
+    }
+}
+
+fn any_of<'a, const N: usize>(these: [TokenKind; N]) -> impl FnMut(&[Token<'a>; 1]) -> bool {
+    move |[token]| these.contains(&token.kind)
+}
+
+/// Collect all tokens up to and including `pattern`, and merge them into a signle span.
+///
+/// `N` determines how many specific and consecutive tokens we are looking for.
+/// i.e. if we were looking for a [TokenKind::Newline] followed by a [TokenKind::Quote], `N`
+/// would equal `2`.
+///
+/// `pattern` is a function that accepts an array of `N` tokens and returns `true` if they match,
+/// i.e. if we should stop collecting. [any_of] and [series] can help to construct this function.
+///
+/// The collected tokens will be split off the head of the slice referred to by `tokens`.
+///
+/// # Panic
+/// Panics if `tokens` does not contain only consecutive adjacent spans.
+fn collect_until<'a, const N: usize>(
+    token: &Token<'a>,
+    tokens: &mut &[Token<'a>],
+    pattern: impl FnMut(&[Token<'a>; N]) -> bool,
+) -> Span<'a>
+where
+    for<'b> &'b [Token<'a>; N]: TryFrom<&'b [Token<'a>]>,
+{
+    let mut windows = tokens
+        .windows(N)
+        .map(|slice| <&[Token<'a>; N]>::try_from(slice).ok().unwrap());
+
+    let split_at = match windows.position(pattern) {
+        Some(i) => i + N,
+        None => tokens.len(), // consume everything
+    };
+
+    let (consume, keep) = tokens.split_at(split_at);
+    *tokens = keep;
+
+    consume
+        .iter()
+        .fold(token.span.clone(), |span: Span<'_>, token| {
+            span.try_merge(&token.span).unwrap()
+        })
+}
+
+fn format_from_style(egui_style: &egui::Style, emark_style: &Style) -> egui::text::TextFormat {
+    use egui::{Align, Color32, Stroke, TextStyle};
+
+    let color = if emark_style.strong || emark_style.heading.is_some() {
+        egui_style.visuals.strong_text_color()
+    } else if emark_style.quoted {
+        egui_style.visuals.weak_text_color()
+    } else {
+        egui_style.visuals.text_color()
+    };
+
+    let text_style = if emark_style.heading.is_some() {
+        TextStyle::Heading
+    } else if emark_style.code {
+        TextStyle::Monospace
+    } else if emark_style.small | emark_style.raised {
+        TextStyle::Small
+    } else {
+        TextStyle::Body
+    };
+
+    let background = if emark_style.code {
+        egui_style.visuals.code_bg_color
+    } else {
+        Color32::TRANSPARENT
+    };
+
+    let underline = if emark_style.underline {
+        Stroke::new(1.0, color)
+    } else {
+        Stroke::NONE
+    };
+
+    let strikethrough = if emark_style.strikethrough {
+        Stroke::new(1.0, color)
+    } else {
+        Stroke::NONE
+    };
+
+    let valign = if emark_style.raised {
+        Align::TOP
+    } else {
+        Align::BOTTOM
+    };
+
+    egui::text::TextFormat {
+        font_id: text_style.resolve(egui_style),
+        color,
+        background,
+        italics: emark_style.italics,
+        underline,
+        strikethrough,
+        valign,
+        ..Default::default()
+    }
+}
--- a/src/markdown/mod.rs
+++ b/src/markdown/mod.rs
@@ -0,0 +1,7 @@
+mod highlighter;
+mod span;
+mod tokenizer;
+
+pub use highlighter::*;
+pub use span::*;
+pub use tokenizer::*;
--- a/src/markdown/snapshots/inkrmarkdowntokenizerteststokenize.snap
+++ b/src/markdown/snapshots/inkrmarkdowntokenizerteststokenize.snap
@@ -0,0 +1,46 @@
+---
+source: src/markdown/tokenizer.rs
+expression: examples
+---
+- string: "just some normal text :D"
+  tokens:
+    - "Token { span: Span(0..24, \"just some normal text :D\"), kind: Text }"
+- string: normal *bold* normal
+  tokens:
+    - "Token { span: Span(0..7, \"normal \"), kind: Text }"
+    - "Token { span: Span(7..8, \"*\"), kind: Strong }"
+    - "Token { span: Span(8..12, \"bold\"), kind: Text }"
+    - "Token { span: Span(12..13, \"*\"), kind: Strong }"
+    - "Token { span: Span(13..20, \" normal\"), kind: Text }"
+- string: normal * maybe bold? * normal
+  tokens:
+    - "Token { span: Span(0..7, \"normal \"), kind: Text }"
+    - "Token { span: Span(7..8, \"*\"), kind: Strong }"
+    - "Token { span: Span(8..21, \" maybe bold? \"), kind: Text }"
+    - "Token { span: Span(21..22, \"*\"), kind: Strong }"
+    - "Token { span: Span(22..29, \" normal\"), kind: Text }"
+- string: "```lang\ncode code code\n```"
+  tokens:
+    - "Token { span: Span(0..3, \"```\"), kind: CodeBlock }"
+    - "Token { span: Span(3..7, \"lang\"), kind: Text }"
+    - "Token { span: Span(7..8, \"\\n\"), kind: Newline }"
+    - "Token { span: Span(8..22, \"code code code\"), kind: Text }"
+    - "Token { span: Span(22..23, \"\\n\"), kind: Newline }"
+    - "Token { span: Span(23..26, \"```\"), kind: CodeBlock }"
+- string: "*_``_*"
+  tokens:
+    - "Token { span: Span(0..1, \"*\"), kind: Strong }"
+    - "Token { span: Span(1..2, \"_\"), kind: Italic }"
+    - "Token { span: Span(2..3, \"`\"), kind: Mono }"
+    - "Token { span: Span(3..4, \"`\"), kind: Mono }"
+    - "Token { span: Span(4..5, \"_\"), kind: Italic }"
+    - "Token { span: Span(5..6, \"*\"), kind: Strong }"
+- string: "*_`*_*_"
+  tokens:
+    - "Token { span: Span(0..1, \"*\"), kind: Strong }"
+    - "Token { span: Span(1..2, \"_\"), kind: Italic }"
+    - "Token { span: Span(2..3, \"`\"), kind: Mono }"
+    - "Token { span: Span(3..4, \"*\"), kind: Strong }"
+    - "Token { span: Span(4..5, \"_\"), kind: Italic }"
+    - "Token { span: Span(5..6, \"*\"), kind: Strong }"
+    - "Token { span: Span(6..7, \"_\"), kind: Italic }"
--- a/src/markdown/span.rs
+++ b/src/markdown/span.rs
@@ -0,0 +1,83 @@
+use std::{
+    fmt,
+    ops::{Deref, Range},
+};
+
+#[derive(Clone, Eq, PartialEq)]
+pub struct Span<'a> {
+    complete_str: &'a str,
+    range: Range<usize>,
+}
+
+impl<'a> Span<'a> {
+    pub fn new(complete_str: &'a str) -> Self {
+        Self {
+            complete_str,
+            range: 0..complete_str.len(),
+        }
+    }
+
+    pub fn get(&self, slice: Range<usize>) -> Option<Self> {
+        let start = self.range.start.checked_add(slice.start)?;
+        let end = self.range.start.checked_add(slice.end)?;
+
+        if end > self.range.end || end < start {
+            return None;
+        }
+
+        Some(Self {
+            complete_str: self.complete_str,
+            range: Range { start, end },
+        })
+    }
+
+    pub fn complete_str(&self) -> Self {
+        Self::new(self.complete_str)
+    }
+
+    pub fn split_at(&self, i: usize) -> Option<(Self, Self)> {
+        let head = self.get(0..i)?;
+        let tail = self.get(i..self.range.len())?;
+        Some((head, tail))
+    }
+
+    /// Try to merge the spans.
+    ///
+    /// This only works if spans are pointing into the same backing buffer, and are adjacent.
+    pub fn try_merge(&self, other: &Self) -> Option<Self> {
+        if self.complete_str.as_ptr() != other.complete_str.as_ptr() {
+            return None;
+        }
+
+        if self.range.end == other.range.start {
+            Some(Self {
+                range: self.range.start..other.range.end,
+                ..*self
+            })
+        } else if self.range.start == other.range.end {
+            Some(Self {
+                range: other.range.start..self.range.end,
+                ..*self
+            })
+        } else {
+            None
+        }
+    }
+}
+
+impl Deref for Span<'_> {
+    type Target = str;
+
+    fn deref(&self) -> &Self::Target {
+        &self.complete_str[self.range.clone()]
+    }
+}
+
+impl<'a> fmt::Debug for Span<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("Span")
+            .field(&self.range)
+            .field(&self.deref())
+            .finish()
+    }
+}
--- a/src/markdown/tokenizer.rs
+++ b/src/markdown/tokenizer.rs
@@ -0,0 +1,156 @@
+use std::iter;
+
+use super::span::Span;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Heading {
+    H6,
+    H5,
+    H4,
+    H3,
+    H2,
+    H1,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TokenKind {
+    /// A newline that isn't a codeblock
+    Newline,
+
+    /// "#" to "######"
+    Heading(Heading),
+
+    /// A newline followed by three `
+    CodeBlock,
+
+    Mono,
+    Strong,
+    Italic,
+    Strikethrough,
+
+    /// ">"
+    Quote,
+
+    /// Two spaces
+    Indentation,
+
+    /// "- "
+    ListEntry,
+
+    /// Normal text
+    Text,
+}
+
+const TOKENS: &[(&'static str, TokenKind)] = &[
+    ("\n", TokenKind::Newline),
+    ("######", TokenKind::Heading(Heading::H6)),
+    ("#####", TokenKind::Heading(Heading::H5)),
+    ("####", TokenKind::Heading(Heading::H4)),
+    ("###", TokenKind::Heading(Heading::H3)),
+    ("##", TokenKind::Heading(Heading::H2)),
+    ("#", TokenKind::Heading(Heading::H1)),
+    ("```", TokenKind::CodeBlock),
+    ("`", TokenKind::Mono),
+    ("*", TokenKind::Strong),
+    ("_", TokenKind::Italic),
+    ("~", TokenKind::Strikethrough),
+    (">", TokenKind::Quote),
+    ("  ", TokenKind::Indentation),
+    ("- ", TokenKind::ListEntry),
+];
+
+#[derive(Debug)]
+pub struct Token<'a> {
+    pub span: Span<'a>,
+    pub kind: TokenKind,
+}
+
+pub fn tokenize<'a>(s: &'a str) -> impl Iterator<Item = Token<'a>> {
+    let mut s = Span::new(s);
+    let mut yield_n: usize = 0;
+
+    iter::from_fn(move || {
+        loop {
+            if s.is_empty() {
+                return None;
+            }
+
+            if yield_n == s.len() {
+                let (token, rest) = s.split_at(s.len()).unwrap();
+                let token = Token {
+                    span: token,
+                    kind: TokenKind::Text,
+                };
+                s = rest;
+                return Some(token);
+            }
+
+            let token = TOKENS.iter().find_map(|(token_str, token_kind)| {
+                s[yield_n..]
+                    .starts_with(token_str)
+                    .then(|| (*token_kind, token_str.len()))
+            });
+
+            let Some((kind, len)) = token else {
+                yield_n += s[yield_n..].chars().next().unwrap_or('\0').len_utf8();
+                continue;
+            };
+
+            if yield_n > 0 {
+                let (token, rest) = s.split_at(yield_n).unwrap();
+                let token = Token {
+                    span: token,
+                    kind: TokenKind::Text,
+                };
+                s = rest;
+                yield_n = 0;
+                return Some(token);
+            }
+
+            let (token, rest) = s.split_at(len).unwrap();
+            let token = Token { span: token, kind };
+            s = rest;
+            return Some(token);
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use serde::Serialize;
+
+    use super::tokenize;
+
+    #[test]
+    fn test_tokenize() {
+        let examples = [
+            "just some normal text :D",
+            "normal *bold* normal",
+            "normal * maybe bold? * normal",
+            "```lang\ncode code code\n```",
+            "*_``_*",
+            "*_`*_*_",
+        ];
+
+        #[derive(Serialize)]
+        struct Result {
+            pub string: &'static str,
+
+            /// Debug-printed tokens
+            pub tokens: Vec<String>,
+        }
+
+        let examples = examples
+            .into_iter()
+            .map(|string| {
+                let tokens = tokenize(string)
+                    .map(|tokens| format!("{tokens:?}"))
+                    .collect::<Vec<_>>();
+
+                Result { string, tokens }
+            })
+            .collect::<Vec<_>>();
+
+        insta::assert_yaml_snapshot!(examples);
+    }
+}
--- a/src/snapshots/inkrpaintingtest__serialized
+++ b/src/snapshots/inkrpaintingtest__serialized
@@ -0,0 +1,7 @@
+---
+source: src/painting.rs
+expression: serialized
+---
+```handwriting
+BQAAvAA8AEIAPABCAEIAPgBAAAAAAAQAAEIAQgC8ADwAAAAAAEIAPA==
+```
--- a/src/text_editor.rs
+++ b/src/text_editor.rs
@@ -8,7 +8,7 @@ use egui::{
    Color32, InputState, Key, Modifiers, TextBuffer, TextEdit, Ui, Vec2, text::CCursorRange,
 };

-use crate::easy_mark::MemoizedHighlighter;
+use crate::markdown::MemoizedHighlighter;

 #[derive(Default, serde::Deserialize, serde::Serialize)]
 pub struct MdTextEdit {
@@ -50,8 +50,8 @@ impl MdTextEdit {

        let w = ui.available_width();

-        let mut layouter = |ui: &egui::Ui, easymark: &dyn TextBuffer, _wrap_width: f32| {
-            let mut layout_job = highlighter.highlight(ui.style(), easymark.as_str(), *cursor);
+        let mut layouter = |ui: &egui::Ui, markdown: &dyn TextBuffer, _wrap_width: f32| {
+            let mut layout_job = highlighter.highlight(ui.style(), markdown.as_str(), *cursor);
            layout_job.wrap.max_width = w - 10.0;
            ui.fonts(|f| f.layout_job(layout_job))
        };