From e37e7d231d951a48dff88e981185283ecc0b6c63 Mon Sep 17 00:00:00 2001
From: Ovilia <zwl.sjtu@gmail.com>
Date: Thu, 24 Nov 2022 18:22:53 +0800
Subject: [PATCH] fix: Charsets like Cyrillic should break work the same as
 Latin

fix apache/echarts#17941
---
 src/graphic/helper/parseText.ts | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/graphic/helper/parseText.ts b/src/graphic/helper/parseText.ts
index b7f1c14f2..201d8fc00 100644
--- a/src/graphic/helper/parseText.ts
+++ b/src/graphic/helper/parseText.ts
@@ -591,9 +591,17 @@ function pushTokens(
 }
 
 
-function isLatin(ch: string) {
+function isAlphabeticLetter(ch: string) {
+    // Unicode Character Ranges
+    // https://jrgraphix.net/research/unicode_blocks.php
+    // The following ranges may not cover all letter ranges but only the more
+    // popular ones. Developers could make pull requests when they find those
+    // not covered.
     let code = ch.charCodeAt(0);
-    return code >= 0x21 && code <= 0x17F;
+    return code >= 0x20 && code <= 0x24F // Latin
+        || code >= 0x370 && code <= 0x10FF // Greek, Coptic, Cyrilic, and etc.
+        || code >= 0x1200 && code <= 0x13FF // Ethiopic and Cherokee
+        || code >= 0x1E00 && code <= 0x206F; // Latin and Greek extended
 }
 
 const breakCharMap = reduce(',&?/;] '.split(''), function (obj, ch) {
@@ -604,7 +612,7 @@ const breakCharMap = reduce(',&?/;] '.split(''), function (obj, ch) {
  * If break by word. For latin languages.
  */
 function isWordBreakChar(ch: string) {
-    if (isLatin(ch)) {
+    if (isAlphabeticLetter(ch)) {
         if (breakCharMap[ch]) {
             return true;
         }