Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add line breaking rules for Japanese text #1498

Merged
merged 1 commit into from
Apr 16, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions epaint/src/text/text_layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ fn line_break(
let mut row_start_idx = 0;
let mut non_empty_rows = 0;

for (i, glyph) in paragraph.glyphs.iter().enumerate() {
let potential_row_width = glyph.max_x() - row_start_x;
for i in 0..paragraph.glyphs.len() {
let potential_row_width = paragraph.glyphs[i].max_x() - row_start_x;

if job.wrap.max_rows > 0 && non_empty_rows >= job.wrap.max_rows {
break;
Expand Down Expand Up @@ -245,7 +245,7 @@ fn line_break(
}
}

row_break_candidates.add(i, glyph.chr);
row_break_candidates.add(i, &paragraph.glyphs[i..]);
}

if row_start_idx < paragraph.glyphs.len() {
Expand Down Expand Up @@ -716,6 +716,8 @@ struct RowBreakCandidates {
space: Option<usize>,
/// Logograms (single character representing a whole word) are good candidates for line break.
logogram: Option<usize>,
/// Kana (Japanese hiragana and katakana) may be line broken unless before a gyōtō kinsoku character.
kana: Option<usize>,
/// Breaking at a dash is a super-
/// good idea.
dash: Option<usize>,
Expand All @@ -728,16 +730,19 @@ struct RowBreakCandidates {
}

impl RowBreakCandidates {
fn add(&mut self, index: usize, chr: char) {
fn add(&mut self, index: usize, glyphs: &[Glyph]) {
let chr = glyphs[0].chr;
const NON_BREAKING_SPACE: char = '\u{A0}';
if chr.is_whitespace() && chr != NON_BREAKING_SPACE {
self.space = Some(index);
} else if is_chinese(chr) {
} else if is_cjk_ideograph(chr) {
self.logogram = Some(index);
} else if chr == '-' {
self.dash = Some(index);
} else if chr.is_ascii_punctuation() {
self.punctuation = Some(index);
} else if is_kana(chr) && (glyphs.len() == 1 || !is_gyoto_kinsoku(glyphs[1].chr)) {
self.kana = Some(index);
}
self.any = Some(index);
}
Expand All @@ -759,6 +764,7 @@ impl RowBreakCandidates {
self.any
} else {
self.space
.or(self.kana)
.or(self.logogram)
.or(self.dash)
.or(self.punctuation)
Expand All @@ -768,12 +774,25 @@ impl RowBreakCandidates {
}

#[inline]
fn is_chinese(c: char) -> bool {
fn is_cjk_ideograph(c: char) -> bool {
('\u{4E00}' <= c && c <= '\u{9FFF}')
|| ('\u{3400}' <= c && c <= '\u{4DBF}')
|| ('\u{2B740}' <= c && c <= '\u{2B81F}')
}

#[inline]
fn is_kana(c: char) -> bool {
('\u{3040}' <= c && c <= '\u{309F}') // Hiragana block
|| ('\u{30A0}' <= c && c <= '\u{30FF}') // Katakana block
}

#[inline]
fn is_gyoto_kinsoku(c: char) -> bool {
// Gyōtō (meaning "beginning of line") kinsoku characters in Japanese typesetting are characters that may not appear at the start of a line, according to kinsoku shori rules.
// The list of gyōtō kinsoku characters can be found at https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages#Characters_not_permitted_on_the_start_of_a_line.
")]}〕〉》」』】〙〗〟'\"⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.".contains(c)
}

// ----------------------------------------------------------------------------

#[test]
Expand Down