diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py b/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py index 7e88914110631..7791fb5dc4d76 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/confusables.py @@ -45,3 +45,11 @@ def f(): # And here's a comment with a greek alpha: ∗ foo # And here's a comment with an unusual punctuation mark: ᜵ }" + +# At runtime the attribute will be stored as Greek small letter mu instead of +# micro sign because of PEP 3131's NFKC normalization +class Labware: + µL = 1.5 + + +assert getattr(Labware(), "µL") == 1.5 diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index 5ebe88a11c775..505d1b9a42e7c 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -163,7 +163,7 @@ pub(crate) fn ambiguous_unicode_character( let candidate = Candidate::new( TextSize::try_from(relative_offset).unwrap() + range.start(), current_char, - representant as char, + char::from_u32(representant).unwrap(), ); if let Some(diagnostic) = candidate.into_diagnostic(context, settings) { diagnostics.push(diagnostic); @@ -178,7 +178,7 @@ pub(crate) fn ambiguous_unicode_character( word_candidates.push(Candidate::new( TextSize::try_from(relative_offset).unwrap() + range.start(), current_char, - representant as char, + char::from_u32(representant).unwrap(), )); } else { // The current word contains at least one unambiguous unicode character. diff --git a/crates/ruff_linter/src/rules/ruff/rules/confusables.rs b/crates/ruff_linter/src/rules/ruff/rules/confusables.rs index 74ed6ce936b6c..11963751882ed 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/confusables.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/confusables.rs @@ -2,7 +2,7 @@ /// Via: /// See: -pub(crate) fn confusable(c: u32) -> Option { +pub(crate) fn confusable(c: u32) -> Option { let result = match c { 160u32 => 32, 180u32 => 96, @@ -1586,6 +1586,9 @@ pub(crate) fn confusable(c: u32) -> Option { 130_039_u32 => 55, 130_040_u32 => 56, 130_041_u32 => 57, + 0x212B => 0x00C5, + 0x2126 => 0x03A9, + 0x00B5 => 0x03BC, _ => return None, }; Some(result) diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap index 541fc82af67a1..1a7b2d480542d 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__confusables.snap @@ -155,4 +155,10 @@ confusables.py:46:62: RUF003 Comment contains ambiguous `᜵` (PHILIPPINE SINGLE 47 | }" | +confusables.py:55:28: RUF001 String contains ambiguous `µ` (MICRO SIGN). Did you mean `μ` (GREEK SMALL LETTER MU)? + | +55 | assert getattr(Labware(), "µL") == 1.5 + | ^ RUF001 + | + diff --git a/scripts/update_ambiguous_characters.py b/scripts/update_ambiguous_characters.py index 55604a94b0a5f..c4c98fc4d6afb 100644 --- a/scripts/update_ambiguous_characters.py +++ b/scripts/update_ambiguous_characters.py @@ -51,7 +51,7 @@ def format_number(number: int) -> str: def format_confusables_rs(raw_data: dict[str, list[int]]) -> str: """Format the downloaded data into a Rust source file.""" - # The input data contains duplicate entries + # The input data contains duplicate entries. flattened_items: set[tuple[int, int]] = set() for _category, items in raw_data.items(): assert len(items) % 2 == 0, "Expected pairs of items" @@ -63,6 +63,18 @@ def format_confusables_rs(raw_data: dict[str, list[int]]) -> str: for left, right in sorted(flattened_items) ] + # Add some additional confusable pairs that are not included in the VS Code data, + # as they're unicode-to-unicode confusables, not unicode-to-ASCII confusables. + confusable_units = [ + # ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE + ("0x212B", "0x00C5"), + # OHM SIGN → GREEK CAPITAL LETTER OMEGA + ("0x2126", "0x03A9"), + # MICRO SIGN → GREEK SMALL LETTER MU + ("0x00B5", "0x03BC"), + ] + tuples += [f" {left} => {right},\n" for left, right in confusable_units] + print(f"{len(tuples)} confusable tuples.") return prelude + "".join(tuples) + postlude