From 9cae074ff475186f494239499533c17b695766d7 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Sat, 21 Dec 2024 00:45:23 +0100 Subject: [PATCH] Wrap lines using the character width not count Instead of counting the number of grapheme clusters per line, we now count the character _widths_. This ensures that lines containing double width characters are wrapped correctly. Changelog: fixed --- Cargo.lock | 10 ++++++++-- compiler/Cargo.toml | 2 +- compiler/src/format.rs | 6 +++--- std/fixtures/fmt/double_width_characters/input.inko | 1 + std/fixtures/fmt/double_width_characters/output.inko | 9 +++++++++ 5 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 std/fixtures/fmt/double_width_characters/input.inko create mode 100644 std/fixtures/fmt/double_width_characters/output.inko diff --git a/Cargo.lock b/Cargo.lock index b287f4c3..328d298e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -137,7 +137,7 @@ dependencies = [ "location", "similar-asserts", "types", - "unicode-segmentation", + "unicode-width 0.2.0", ] [[package]] @@ -280,7 +280,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" dependencies = [ - "unicode-width", + "unicode-width 0.1.14", ] [[package]] @@ -1007,6 +1007,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 877e5ff8..b2c7c278 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -14,7 +14,7 @@ doctest = false ast = { path = "../ast" } types = { path = "../types" } location = { path = "../location" } -unicode-segmentation = "^1.10" +unicode-width = "^0.2" getopts = "^0.2" fnv = "^1.0" blake3 = "^1.5" diff --git a/compiler/src/format.rs b/compiler/src/format.rs index 30e122ed..dd3d6524 100644 --- a/compiler/src/format.rs +++ b/compiler/src/format.rs @@ -12,7 +12,7 @@ use std::collections::HashSet; use std::fs::{read, write}; use std::io::{stdin, stdout, Error as IoError, Read as _, Write as _}; use std::path::PathBuf; -use unicode_segmentation::UnicodeSegmentation as _; +use unicode_width::UnicodeWidthStr; /// The characters to use for indentation. const INDENT: char = ' '; @@ -79,7 +79,7 @@ enum Node { /// The arguments are: /// /// 1. The text that may include Unicode characters - /// 2. The number of grapheme clusters in the string + /// 2. The width (in cells) of the string. Unicode(String, usize), /// A node to include if the code is to be wrapped across lines. @@ -149,7 +149,7 @@ impl Node { } fn unicode(text: String) -> Node { - let width = text.graphemes(true).count(); + let width = text.width(); Node::Unicode(text, width) } diff --git a/std/fixtures/fmt/double_width_characters/input.inko b/std/fixtures/fmt/double_width_characters/input.inko new file mode 100644 index 00000000..bb03b8e1 --- /dev/null +++ b/std/fixtures/fmt/double_width_characters/input.inko @@ -0,0 +1 @@ +let FULL_WEEKDAYS = ['日曜日', '月曜日', '火曜日', '水曜日', '木曜日', '金曜日', '土曜日'] diff --git a/std/fixtures/fmt/double_width_characters/output.inko b/std/fixtures/fmt/double_width_characters/output.inko new file mode 100644 index 00000000..ab142bb1 --- /dev/null +++ b/std/fixtures/fmt/double_width_characters/output.inko @@ -0,0 +1,9 @@ +let FULL_WEEKDAYS = [ + '日曜日', + '月曜日', + '火曜日', + '水曜日', + '木曜日', + '金曜日', + '土曜日', +]