From f20f9e6341276f96c60820f6d39ade9c80d7b5d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Sun, 5 May 2024 15:29:03 +0200 Subject: [PATCH] Fix handling of upper-case sigma (#124714) --- library/alloc/src/str.rs | 4 +++- library/alloc/tests/str.rs | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index e52b855b30d89..56d7885c129bf 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -369,6 +369,8 @@ impl str { pub fn to_lowercase(&self) -> String { let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase); + let prefix_len = s.len(); + for (i, c) in rest[..].char_indices() { if c == 'Σ' { // Σ maps to σ, except at the end of a word where it maps to ς. @@ -376,7 +378,7 @@ impl str { // in `SpecialCasing.txt`, // so hard-code it rather than have a generic "condition" mechanism. // See /~https://github.com/rust-lang/rust/issues/26035 - map_uppercase_sigma(rest, i, &mut s) + map_uppercase_sigma(self, prefix_len + i, &mut s) } else { match conversions::to_lower(c) { [a, '\0', _] => s.push(a), diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index df8a260624a28..6eb0011627342 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1826,6 +1826,19 @@ fn to_lowercase() { assert_eq!("Α'Σ".to_lowercase(), "α'ς"); assert_eq!("Α''Σ".to_lowercase(), "α''ς"); + assert_eq!("aΣ".to_lowercase(), "aς"); + assert_eq!("a'Σ".to_lowercase(), "a'ς"); + assert_eq!("a''Σ".to_lowercase(), "a''ς"); + + assert_eq!("ÄΣ".to_lowercase(), "äς"); + assert_eq!("ä'Σ".to_lowercase(), "ä'ς"); + assert_eq!("ä''Σ".to_lowercase(), "ä''ς"); + + // input lengths around the boundary of the chunk size used by the ascii prefix optimization + assert_eq!("abcdefghijklmnoΣ".to_lowercase(), "abcdefghijklmnoς"); + assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς"); + assert_eq!("abcdefghijklmnopqΣ".to_lowercase(), "abcdefghijklmnopqς"); + assert_eq!("ΑΣ Α".to_lowercase(), "ας α"); assert_eq!("Α'Σ Α".to_lowercase(), "α'ς α"); assert_eq!("Α''Σ Α".to_lowercase(), "α''ς α"); @@ -1840,6 +1853,10 @@ fn to_lowercase() { assert_eq!("Α 'Σ".to_lowercase(), "α 'σ"); assert_eq!("Α ''Σ".to_lowercase(), "α ''σ"); + assert_eq!("Ä Σ".to_lowercase(), "ä σ"); + assert_eq!("Ä 'Σ".to_lowercase(), "ä 'σ"); + assert_eq!("Ä ''Σ".to_lowercase(), "ä ''σ"); + assert_eq!("Σ".to_lowercase(), "σ"); assert_eq!("'Σ".to_lowercase(), "'σ"); assert_eq!("''Σ".to_lowercase(), "''σ");