From e37e7d231d951a48dff88e981185283ecc0b6c63 Mon Sep 17 00:00:00 2001 From: Ovilia Date: Thu, 24 Nov 2022 18:22:53 +0800 Subject: [PATCH] fix: Charsets like Cyrillic should break work the same as Latin fix apache/echarts#17941 --- src/graphic/helper/parseText.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/graphic/helper/parseText.ts b/src/graphic/helper/parseText.ts index b7f1c14f2..201d8fc00 100644 --- a/src/graphic/helper/parseText.ts +++ b/src/graphic/helper/parseText.ts @@ -591,9 +591,17 @@ function pushTokens( } -function isLatin(ch: string) { +function isAlphabeticLetter(ch: string) { + // Unicode Character Ranges + // https://jrgraphix.net/research/unicode_blocks.php + // The following ranges may not cover all letter ranges but only the more + // popular ones. Developers could make pull requests when they find those + // not covered. let code = ch.charCodeAt(0); - return code >= 0x21 && code <= 0x17F; + return code >= 0x20 && code <= 0x24F // Latin + || code >= 0x370 && code <= 0x10FF // Greek, Coptic, Cyrilic, and etc. + || code >= 0x1200 && code <= 0x13FF // Ethiopic and Cherokee + || code >= 0x1E00 && code <= 0x206F; // Latin and Greek extended } const breakCharMap = reduce(',&?/;] '.split(''), function (obj, ch) { @@ -604,7 +612,7 @@ const breakCharMap = reduce(',&?/;] '.split(''), function (obj, ch) { * If break by word. For latin languages. */ function isWordBreakChar(ch: string) { - if (isLatin(ch)) { + if (isAlphabeticLetter(ch)) { if (breakCharMap[ch]) { return true; }