Skip to content

Commit

Permalink
Unicode display width (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
nitely authored Nov 21, 2024
1 parent 8b178b8 commit 09e379d
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ assert "The quIck? (“bRown”) fox cAn’T jUMp 32.3 feet, rIGHt?".toLower ==
assert cmpCaseless("AbCσ", "aBcΣ")
assert toValidUtf8("a\xffb", replacement = "") == "ab"
assert verifyUtf8("a\xffb") == 1
assert width("이건 테스트야", cjk=true) == 13
```

[docs](https://nitely.github.io/nim-unicodeplus/)
Expand Down
40 changes: 38 additions & 2 deletions src/unicodeplus.nim
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
## This module provides common unicode operations

from std/unicode import
Rune, runes, `==`, fastRuneAt,
fastToUtf8Copy
Rune, runes, `==`, fastRuneAt, fastToUtf8Copy

import pkg/unicodedb/properties
import pkg/unicodedb/types
import pkg/unicodedb/casing
import pkg/unicodedb/widths
import pkg/segmentation

const enableWidth = (NimMajor, NimMinor) >= (2, 0)
when enableWidth:
import pkg/graphemes

export Rune

type
Expand Down Expand Up @@ -460,3 +464,35 @@ func toValidUtf8*(s: string, replacement = "\uFFFD"): string =
oldLen = result.len
i += j+1
result.add2 toOpenArray(s, i, s.len-1)

when enableWidth:
func width(c: Rune, cjk: bool): int =
if c.int == 0xFE0F: # emoji style
return 2
result = case c.unicodeWidth()
of uwdtFull, uwdtWide: 2
of uwdtAmbiguous:
if cjk: 1 else: 2
else: 1

func width*(s: string, cjk = false): int =
## Return the display width of `s`.
## This is usually correct for monospaced fonts,
## but it may not be accurate in some cases.
## Requires Nim >= 2.
result = 0
var i = 0
var i2 = 0
var c = Rune(0)
var w = 0
for bounds in s.graphemeBounds:
i = 0
i2 = -1
c = Rune(0)
w = 0
while i < bounds.b-bounds.a+1:
doAssert i > i2, "invalid utf8"
i2 = i
fastRuneAt(toOpenArray(s, bounds.a, bounds.b), i, c)
w = max(w, c.width(cjk))
result += w
22 changes: 17 additions & 5 deletions tests/tests.nim
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import unittest
from unicode import Rune, toUTF8
from strutils import contains
import std/unittest
from std/unicode import Rune, toUTF8
from std/strutils import contains

from unicodedb/casing import caseFold
from pkg/unicodedb/casing import caseFold

import unicodeplus
import ../src/unicodeplus

when (NimMajor, NimMinor) >= (2, 0):
type MyAssertionDefect = ref AssertionDefect
Expand Down Expand Up @@ -582,3 +582,15 @@ when true:
check toValidUtf8("\xC2abc") == "\uFFFDabc"
check toValidUtf8("\xF0\x80\x80") == "\uFFFD"
check toValidUtf8("\x80\x80") == "\uFFFD"

when (NimMajor, NimMinor) >= (2, 0):
test "width":
doAssert width("이건 테스트야", cjk=true) == 13
doAssert width("🐤node お名前=☜(゚ヮ゚☜)") == 22
doAssert width("👨‍👩‍👧‍👦") == 2
doAssert width("👨‍👩‍👧‍👦👨‍👩‍👧‍👦") == 4
doAssert width("👨‍👩‍👧‍👦🥱🧛🏻‍♂️") == 6
doAssert width("u̲n̲d̲e̲r̲l̲i̲n̲e̲d̲") == 20
doAssert width("a") == 1
doAssert width("ab") == 2
doAssert width("abc") == 3
1 change: 1 addition & 0 deletions unicodeplus.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ srcDir = "src"
requires "nim >= 1.0"
requires "unicodedb >= 0.8"
requires "segmentation >= 0.1"
requires "graphemes >= 0.12"

task test, "Test":
exec "nim c -r tests/tests"
Expand Down

0 comments on commit 09e379d

Please sign in to comment.