Skip to content

Commit

Permalink
Fix double-width characters disappearing when wrapping (#3180)
Browse files Browse the repository at this point in the history
* Update docstring for `Text.wrap`s width parameter to indicate that it's referring to the number of *single-width* characters.

Also a small addition to the gitignore file.

* Working on double width wrapping fixes

* Chop cells to fit to width

* Fix folding when theres already text on line

* Update wrapping logic to fix issues with CJK charcters disappearing when the "fold" location sat *within* a double-width character. Ensure we retain browser logic of: if there is no space on the current line, move to a new line, and if theres not enough space on the entire new line, fold the text over multiple lines at appropriate locations.

* Remove old TODO comments

* Add regression test note

* Rename function to avoid breaking change

* Update CHANGELOG

* Remove old comment that is no longer relevant

* Cover off some wrapping edge cases

* Adding docstrings to tests explaining their purpose

* Renaming a local, function scope function alias

* Update rich/_wrap.py

Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com>

* PR feedback

* Testing wrapping with trailing and leading whitespace

* Improve docstring wording

---------

Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com>
  • Loading branch information
darrenburns and rodrigogiraoserrao authored Nov 14, 2023
1 parent b32e42b commit 59b1aca
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 38 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,5 @@ venv.bak/
# airspeed velocity
benchmarks/env/
benchmarks/html/

sandbox/
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Some text goes missing during wrapping when it contains double width characters /~https://github.com/Textualize/rich/issues/3176
- Ensure font is correctly inherited in exported HTML /~https://github.com/Textualize/rich/issues/3104
- Fixed typing for `FloatPrompt`.

Expand Down
73 changes: 55 additions & 18 deletions rich/_wrap.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from __future__ import annotations

import re
from typing import Iterable, List, Tuple
from typing import Iterable

from ._loop import loop_last
from .cells import cell_len, chop_cells

re_word = re.compile(r"\s*\S+\s*")


def words(text: str) -> Iterable[Tuple[int, int, str]]:
def words(text: str) -> Iterable[tuple[int, int, str]]:
"""Yields each word from the text as a tuple
containing (start_index, end_index, word). A "word" in this context may
include the actual word and any whitespace to the right.
"""
position = 0
word_match = re_word.match(text, position)
while word_match is not None:
Expand All @@ -17,40 +23,71 @@ def words(text: str) -> Iterable[Tuple[int, int, str]]:
word_match = re_word.match(text, end)


def divide_line(text: str, width: int, fold: bool = True) -> List[int]:
divides: List[int] = []
append = divides.append
line_position = 0
def divide_line(text: str, width: int, fold: bool = True) -> list[int]:
"""Given a string of text, and a width (measured in cells), return a list
of cell offsets which the string should be split at in order for it to fit
within the given width.
Args:
text: The text to examine.
width: The available cell width.
fold: If True, words longer than `width` will be folded onto a new line.
Returns:
A list of indices to break the line at.
"""
break_positions: list[int] = [] # offsets to insert the breaks at
append = break_positions.append
cell_offset = 0
_cell_len = cell_len

for start, _end, word in words(text):
word_length = _cell_len(word.rstrip())
if line_position + word_length > width:
remaining_space = width - cell_offset
word_fits_remaining_space = remaining_space >= word_length

if word_fits_remaining_space:
# Simplest case - the word fits within the remaining width for this line.
cell_offset += _cell_len(word)
else:
# Not enough space remaining for this word on the current line.
if word_length > width:
# The word doesn't fit on any line, so we can't simply
# place it on the next line...
if fold:
chopped_words = chop_cells(word, max_size=width, position=0)
for last, line in loop_last(chopped_words):
# Fold the word across multiple lines.
folded_word = chop_cells(word, width=width)
for last, line in loop_last(folded_word):
if start:
append(start)

if last:
line_position = _cell_len(line)
cell_offset = _cell_len(line)
else:
start += len(line)
else:
# Folding isn't allowed, so crop the word.
if start:
append(start)
line_position = _cell_len(word)
elif line_position and start:
cell_offset = _cell_len(word)
elif cell_offset and start:
# The word doesn't fit within the remaining space on the current
# line, but it *can* fit on to the next (empty) line.
append(start)
line_position = _cell_len(word)
else:
line_position += _cell_len(word)
return divides
cell_offset = _cell_len(word)

return break_positions


if __name__ == "__main__": # pragma: no cover
from .console import Console

console = Console(width=10)
console.print("12345 abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ 12345")
print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10, position=2))
print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10))

console = Console(width=20)
console.rule()
console.print("TextualはPythonの高速アプリケーション開発フレームワークです")

console.rule()
console.print("アプリケーションは1670万色を使用でき")
54 changes: 34 additions & 20 deletions rich/cells.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import re
from functools import lru_cache
from typing import Callable, List
from typing import Callable

from ._cell_widths import CELL_WIDTHS

Expand Down Expand Up @@ -119,27 +121,39 @@ def set_cell_size(text: str, total: int) -> str:
start = pos


# TODO: This is inefficient
# TODO: This might not work with CWJ type characters
def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]:
"""Break text in to equal (cell) length strings, returning the characters in reverse
order"""
def chop_cells(
text: str,
width: int,
) -> list[str]:
"""Split text into lines such that each line fits within the available (cell) width.
Args:
text: The text to fold such that it fits in the given width.
width: The width available (number of cells).
Returns:
A list of strings such that each string in the list has cell width
less than or equal to the available width.
"""
_get_character_cell_size = get_character_cell_size
characters = [
(character, _get_character_cell_size(character)) for character in text
]
total_size = position
lines: List[List[str]] = [[]]
append = lines[-1].append

for character, size in reversed(characters):
if total_size + size > max_size:
lines.append([character])
append = lines[-1].append
total_size = size
lines: list[list[str]] = [[]]

append_new_line = lines.append
append_to_last_line = lines[-1].append

total_width = 0

for character in text:
cell_width = _get_character_cell_size(character)
char_doesnt_fit = total_width + cell_width > width

if char_doesnt_fit:
append_new_line([character])
append_to_last_line = lines[-1].append
total_width = cell_width
else:
total_size += size
append(character)
append_to_last_line(character)
total_width += cell_width

return ["".join(line) for line in lines]

Expand Down
19 changes: 19 additions & 0 deletions tests/test_cells.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from rich import cells
from rich.cells import chop_cells


def test_cell_len_long_string():
Expand Down Expand Up @@ -40,3 +41,21 @@ def test_set_cell_size_infinite():
)
== size
)


def test_chop_cells():
"""Simple example of splitting cells into lines of width 3."""
text = "abcdefghijk"
assert chop_cells(text, 3) == ["abc", "def", "ghi", "jk"]


def test_chop_cells_double_width_boundary():
"""The available width lies within a double-width character."""
text = "ありがとう"
assert chop_cells(text, 3) == ["あ", "り", "が", "と", "う"]


def test_chop_cells_mixed_width():
"""Mixed single and double-width characters."""
text = "あ1り234が5と6う78"
assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]
66 changes: 66 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,20 @@ def test_wrap_cjk_width_mid_character():
]


def test_wrap_cjk_mixed():
"""Regression test covering /~https://github.com/Textualize/rich/issues/3176 and
/~https://github.com/Textualize/textual/issues/3567 - double width characters could
result in text going missing when wrapping."""
text = Text("123ありがとうございました")
console = Console(width=20) # let's ensure the width passed to wrap() wins.

wrapped_lines = text.wrap(console, width=8)
with console.capture() as capture:
console.print(wrapped_lines)

assert capture.get() == "123あり\nがとうご\nざいまし\n\n"


def test_wrap_long():
text = Text("abracadabra", justify="left")
lines = text.wrap(Console(), 4)
Expand Down Expand Up @@ -497,6 +511,47 @@ def test_wrap_long_words_2():
]


def test_wrap_long_words_followed_by_other_words():
"""After folding a word across multiple lines, we should continue from
the next word immediately after the folded word (don't take a newline
following completion of the folded word)."""
text = Text("123 12345678 123 123")
lines = text.wrap(Console(), 6)
assert lines._lines == [
Text("123 "),
Text("123456"),
Text("78 123"),
Text("123"),
]


def test_wrap_long_word_preceeded_by_word_of_full_line_length():
"""The width of the first word is the same as the available width.
Ensures that folding works correctly when there's no space available
on the current line."""
text = Text("123456 12345678 123 123")
lines = text.wrap(Console(), 6)
assert lines._lines == [
Text("123456"),
Text("123456"),
Text("78 123"),
Text("123"),
]


def test_wrap_multiple_consecutive_spaces():
"""Adding multiple consecutive spaces at the end of a line does not impact
the location at which a break will be added during the process of wrapping."""
text = Text("123456 12345678 123 123")
lines = text.wrap(Console(), 6)
assert lines._lines == [
Text("123456"),
Text("123456"),
Text("78 123"),
Text("123"),
]


def test_wrap_long_words_justify_left():
text = Text("X 123456789", justify="left")
lines = text.wrap(Console(), 4)
Expand All @@ -508,6 +563,17 @@ def test_wrap_long_words_justify_left():
assert lines[3] == Text("9 ")


def test_wrap_leading_and_trailing_whitespace():
text = Text(" 123 456 789 ")
lines = text.wrap(Console(), 4)
assert lines._lines == [
Text(" 1"),
Text("23 "),
Text("456 "),
Text("789 "),
]


def test_no_wrap_no_crop():
text = Text("Hello World!" * 3)

Expand Down

0 comments on commit 59b1aca

Please sign in to comment.