From 0bc1be80617e26f4f3d141c5fae3fe34824e6155 Mon Sep 17 00:00:00 2001 From: ParkerTenBroeck <51721964+ParkerTenBroeck@users.noreply.github.com> Date: Wed, 14 Jan 2026 15:01:48 -0500 Subject: [PATCH] grapheme clustering and width correction --- Cargo.lock | 17 +++++++++++++++++ automata/Cargo.toml | 2 ++ automata/src/loader/log.rs | 15 +++++++++++---- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e21067..791b965 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,8 @@ version = "0.1.0" dependencies = [ "serde", "serde_with", + "unicode-display-width", + "unicode-segmentation", ] [[package]] @@ -494,12 +496,27 @@ dependencies = [ "time-core", ] +[[package]] +name = "unicode-display-width" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a43273b656140aa2bb8e65351fe87c255f0eca706b2538a9bd4a590a3490bf3" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "wasm-bindgen" version = "0.2.106" diff --git a/automata/Cargo.toml b/automata/Cargo.toml index ad078e8..8479da3 100644 --- a/automata/Cargo.toml +++ b/automata/Cargo.toml @@ -6,6 +6,8 @@ edition = "2024" [dependencies] serde = { version = "1.0", features = ["derive"], optional = true} serde_with = { version = "3.0", features = ["default"], optional = true} +unicode-display-width = "*" +unicode-segmentation = "*" [features] default = [] diff --git a/automata/src/loader/log.rs b/automata/src/loader/log.rs index 4766072..742a7fa 100644 --- a/automata/src/loader/log.rs +++ b/automata/src/loader/log.rs @@ -1,5 +1,8 @@ use std::fmt::Display; +use unicode_display_width::width; +use unicode_segmentation::UnicodeSegmentation; + use crate::loader::Span; #[cfg_attr(feature = "serde", derive(serde::Serialize))] @@ -226,13 +229,17 @@ impl<'a> Display for LogEntryDisplay<'a> { for _ in 0..padding + 3 { write!(f, " ")?; } - for char in line.chars() { + for grapheme in line.graphemes(true) { if (span.0..span.1).contains(&index) { - write!(f, "~")?; + for _ in 0..width(grapheme){ + write!(f, "~")?; + } } else { - write!(f, " ")?; + for _ in 0..width(grapheme){ + write!(f, " ")?; + } } - index += char.len_utf8(); + index += grapheme.len(); } write!(f, "{RESET}")?; index += '\n'.len_utf8();