From 4e9eacc7b0489f945779ab890d653d120b9292ac Mon Sep 17 00:00:00 2001 From: Joakim Hulthe Date: Wed, 18 Jun 2025 22:56:33 +0200 Subject: [PATCH] Improve handwriting disk-format and decoding --- src/handwriting/disk_format.rs | 97 ++++++++++++++ src/{handwriting.rs => handwriting/mod.rs} | 140 ++++++++++++++------- src/rasterizer.rs | 20 +-- 3 files changed, 200 insertions(+), 57 deletions(-) create mode 100644 src/handwriting/disk_format.rs rename src/{handwriting.rs => handwriting/mod.rs} (85%) diff --git a/src/handwriting/disk_format.rs b/src/handwriting/disk_format.rs new file mode 100644 index 0000000..10b9ef9 --- /dev/null +++ b/src/handwriting/disk_format.rs @@ -0,0 +1,97 @@ +//! see [Packet] + +use std::fmt::Display; + +use half::f16; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +/// A `u16` encoded in little-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, FromBytes, IntoBytes, KnownLayout, Immutable, PartialEq, Eq)] +#[repr(C, packed)] +pub struct u16_le([u8; 2]); + +/// An `f16` encoded in little-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, FromBytes, IntoBytes, KnownLayout, Immutable)] +#[repr(C, packed)] +pub struct f16_le(u16_le); + +/// Top-level type describing the handwriting disk-format. +#[derive(FromBytes, KnownLayout, Immutable)] +#[repr(C, packed)] +pub struct DiskFormat { + pub header: Header, + + /// A packed array of [Stroke]s. + pub strokes: [u8], +} + +pub const V1: u16_le = u16_le::new(1); + +#[derive(FromBytes, IntoBytes, KnownLayout, Immutable)] +#[repr(C, packed)] +pub struct Header { + /// Version of the disk format + pub version: u16_le, +} + +#[derive(FromBytes, IntoBytes, KnownLayout, Immutable)] +#[repr(C, packed)] +pub struct RawStrokeHeader { + /// Number of points in the stroke. + pub len: u16_le, +} + +#[derive(FromBytes, KnownLayout, Immutable)] +#[repr(C, packed)] +pub struct RawStroke { + pub header: RawStrokeHeader, + pub positions: [f16_le], +} + +impl RawStroke { + pub const MIN_LEN: usize = size_of::(); +} + +impl u16_le { + pub const fn new(init: u16) -> Self { + u16_le(init.to_le_bytes()) + } +} + +impl f16_le { + pub const fn new(init: f16) -> Self { + f16_le(u16_le::new(init.to_bits())) + } +} + +impl Display for u16_le { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + u16::from(*self).fmt(f) + } +} + +impl From for u16 { + fn from(value: u16_le) -> Self { + u16::from_le_bytes(value.0) + } +} + +impl From for f16 { + fn from(value: f16_le) -> Self { + f16::from_bits(u16::from(value.0)) + } +} + +impl From for u16_le { + fn from(value: u16) -> Self { + u16_le::new(value) + } +} + +impl From for f16_le { + fn from(value: f16) -> Self { + f16_le::new(value) + } +} diff --git a/src/handwriting.rs b/src/handwriting/mod.rs similarity index 85% rename from src/handwriting.rs rename to src/handwriting/mod.rs index 8c02732..f7dbeae 100644 --- a/src/handwriting.rs +++ b/src/handwriting/mod.rs @@ -6,6 +6,7 @@ use std::{ }; use base64::{Engine, prelude::BASE64_STANDARD}; +use disk_format::{DiskFormat, RawStroke, RawStrokeHeader, f16_le}; use egui::{ Color32, ColorImage, CornerRadius, Event, Frame, Id, Mesh, PointerButton, Pos2, Rect, Sense, Shape, Stroke, TextureHandle, Theme, Ui, Vec2, @@ -16,7 +17,7 @@ use egui::{ use eyre::{Context, bail}; use eyre::{OptionExt, eyre}; use half::f16; -use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; +use zerocopy::{FromBytes, IntoBytes}; use crate::{ custom_code_block::try_from_custom_code_block, @@ -24,6 +25,8 @@ use crate::{ }; use crate::{custom_code_block::write_custom_code_block, util::random_id}; +mod disk_format; + const HANDWRITING_MIN_HEIGHT: f32 = 100.0; const HANDWRITING_BOTTOM_PADDING: f32 = 80.0; const HANDWRITING_MARGIN: f32 = 0.05; @@ -485,6 +488,13 @@ impl Handwriting { tesselator.tessellate_shape(shape, mesh); }); + // sanity-check that tesselation did not produce any NaNs. + // this can happen if the line contains duplicated consecutive positions + //for vertex in &mesh.vertices { + // debug_assert!(vertex.pos.x.is_finite(), "{} must be finite", vertex.pos.x); + // debug_assert!(vertex.pos.y.is_finite(), "{} must be finite", vertex.pos.y); + //} + let texture = texture!(self, ui, &mesh_context); let triangles = mesh_triangles(&self.mesh); @@ -586,24 +596,40 @@ impl Handwriting { ..Default::default() } } + + pub fn encode_as_disk_format(&self) -> Box<[u8]> { + let mut bytes = vec![]; + let header = disk_format::Header { + version: disk_format::V1, + }; + + bytes.extend_from_slice(header.as_bytes()); + + for stroke in &self.strokes { + let Ok(len) = u16::try_from(stroke.len()) else { + log::error!("More than u16::MAX points in a stroke!"); + continue; + }; + + let header = RawStrokeHeader { len: len.into() }; + bytes.extend_from_slice(header.as_bytes()); + + for position in stroke { + for v in [position.x, position.y] { + let v = f16::from_f32(v); + let v = f16_le::from(v); + bytes.extend_from_slice(v.as_bytes()); + } + } + } + + bytes.into_boxed_slice() + } } impl Display for Handwriting { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut raw = vec![]; - - for stroke in &self.strokes { - raw.push((stroke.len() as u16).to_le_bytes()); - for position in stroke { - let x = half::f16::from_f32(position.x); - let y = half::f16::from_f32(position.y); - raw.push(x.to_bits().to_le_bytes()); - raw.push(y.to_bits().to_le_bytes()); - } - } - - let raw = raw.as_slice().as_bytes(); - + let raw = self.encode_as_disk_format(); write_custom_code_block(f, CODE_BLOCK_KEY, BASE64_STANDARD.encode(raw)) } } @@ -619,53 +645,71 @@ impl FromStr for Handwriting { .decode(s) .wrap_err("Failed to decode painting data from base64")?; - #[allow(non_camel_case_types)] - type u16_le = [u8; 2]; + // HACK: first iteration of disk format did not have version header + //bytes.insert(0, 0); + //bytes.insert(0, 1); - #[allow(non_camel_case_types)] - type f16_le = [u8; 2]; + let disk_format = DiskFormat::ref_from_bytes(&bytes[..]).map_err(|_| eyre!("Too short"))?; - #[derive(FromBytes, KnownLayout, Immutable)] - #[repr(C, packed)] - struct Stroke { - pub len: u16_le, - pub positions: [f16_le], + if disk_format.header.version != disk_format::V1 { + bail!( + "Unknown disk_format version: {}", + disk_format.header.version + ); } - let mut bytes = &bytes[..]; + let mut raw_strokes = &disk_format.strokes[..]; let mut strokes = vec![]; - while !bytes.is_empty() { - let header_len = size_of::(); - if bytes.len() < header_len { - bail!("Invalid remaining length: {}", bytes.len()); + while !raw_strokes.is_empty() { + if raw_strokes.len() < RawStroke::MIN_LEN { + bail!("Invalid remaining length: {}", raw_strokes.len()); } - let stroke = Stroke::ref_from_bytes(&bytes[..header_len]).expect("length is correct"); - let len = usize::from(u16::from_le_bytes(stroke.len)); - let len = len * size_of::() * 2; + let stroke = RawStroke::ref_from_bytes(&raw_strokes[..RawStroke::MIN_LEN]) + .expect("length is correct"); - if bytes.len() < len { - bail!("Invalid remaining length: {}", bytes.len()); + // get length as number of points + let len = usize::from(u16::from(stroke.header.len)); + + // convert to length in bytes + let byte_len = 2 * size_of::() * len; + + if raw_strokes.len() < byte_len { + bail!("Invalid remaining length: {}", raw_strokes.len()); } - let (stroke, rest) = bytes.split_at(header_len + len); - bytes = rest; - let stroke = Stroke::ref_from_bytes(stroke) - .map_err(|e| eyre!("Failed to decode stroke bytes: {e}"))?; + let (stroke, rest) = raw_strokes.split_at(RawStroke::MIN_LEN + byte_len); + raw_strokes = rest; - let mut positions = stroke + let stroke = RawStroke::ref_from_bytes(stroke).expect("length is correct"); + + debug_assert_eq!( + stroke.positions.len().rem_euclid(2), + 0, + "{} must be divisible by 2", + stroke.positions.len() + ); + debug_assert_eq!(stroke.positions.len(), len * 2); + + let mut last_pos = Pos2::new(f32::NEG_INFINITY, f32::INFINITY); + + // positions are encoded as an array of f16s [x, y, x, y, x, y, ..] + let stroke: Vec = stroke .positions - .iter() - .map(|&position| f16::from_bits(u16::from_le_bytes(position))); + .chunks_exact(2) + .map(|chunk| [chunk[0], chunk[1]]) + .map(|pos| pos.map(f16::from)) // interpret bytes as f16 + .map(|pos| pos.map(f32::from)) // widen to f32 + .filter(|pos| pos.iter().all(|f| f.is_finite())) // filter out NaNs and Infs + .map(|[x, y]| Pos2::new(x, y)) + .filter(|pos| { + let is_duplicate = pos == &last_pos; + last_pos = *pos; + !is_duplicate // skip duplicates + }) + .collect(); - let mut stroke = vec![]; - while let Some(x) = positions.next() { - let Some(y) = positions.next() else { - unreachable!("len is a multiple of two"); - }; - stroke.push(Pos2::new(x.into(), y.into())); - } strokes.push(stroke); } diff --git a/src/rasterizer.rs b/src/rasterizer.rs index 4fe2bd0..cdc12bc 100644 --- a/src/rasterizer.rs +++ b/src/rasterizer.rs @@ -72,15 +72,11 @@ pub fn rasterize_onto<'a, Blend: BlendFn>( // If the pixel is within the triangle, fill it in. if point_in_triangle.inside { - let c0 = triangle[0] - .color - .linear_multiply(point_in_triangle.weights[0]); - let c1 = triangle[1] - .color - .linear_multiply(point_in_triangle.weights[1]); - let c2 = triangle[2] - .color - .linear_multiply(point_in_triangle.weights[2]); + let [c0, c1, c2] = [0, 1, 2].map(|i| { + triangle[i] + .color + .linear_multiply(point_in_triangle.weights[i]) + }); let color = c0 + c1 + c2; @@ -169,6 +165,12 @@ fn point_in_triangle(point: Pos2, triangle: [&Vertex; 3]) -> PointInTriangle { // Normalize the weights. let weights = areas.map(|area| area / triangle_area); + if cfg!(debug_assertions) { + if weights.into_iter().any(f32::is_nan) { + panic!("weights must not be NaN! {weights:?} {triangle_area:?} {areas:?} {sides:?}"); + } + } + PointInTriangle { inside, weights } }