From 5a9f4aa3ed1f6703b2d39592334240a7339c0fcd Mon Sep 17 00:00:00 2001 From: Christopher Covington Date: Sun, 14 May 2023 06:54:56 -0400 Subject: [PATCH] Autoformat unit characters --- .../test/fixtures/ruff/confusables.py | 9 +++++++++ .../ruff/rules/ambiguous_unicode_character.rs | 4 ++-- .../ruff/src/rules/ruff/rules/confusables.rs | 5 ++++- ...ruff__rules__ruff__tests__confusables.snap | 20 +++++++++++++++++++ scripts/update_ambiguous_characters.py | 10 ++++++++-- 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/crates/ruff/resources/test/fixtures/ruff/confusables.py b/crates/ruff/resources/test/fixtures/ruff/confusables.py index 3ae350887fa3f1..98e4865116156a 100644 --- a/crates/ruff/resources/test/fixtures/ruff/confusables.py +++ b/crates/ruff/resources/test/fixtures/ruff/confusables.py @@ -29,3 +29,12 @@ def f(): # consisting of a single ambiguous character, while the second character is a "word # boundary" (whitespace) that it itself ambiguous. x = "Р усский" + + +# At runtime the attribute will be stored as Greek small letter mu instead of +# micro sign because of PEP 3131's NFKC normalization +class Labware: + µL = 1.5 + + +assert getattr(Labware(), "µL") == 1.5 diff --git a/crates/ruff/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff/src/rules/ruff/rules/ambiguous_unicode_character.rs index 65d05f9095b4d3..61a0ac7cd8eea3 100644 --- a/crates/ruff/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -150,7 +150,7 @@ pub(crate) fn ambiguous_unicode_character( let candidate = Candidate::new( TextSize::try_from(relative_offset).unwrap() + range.start(), current_char, - representant as char, + char::from_u32(representant).unwrap(), ); if let Some(diagnostic) = candidate.into_diagnostic(context, settings) { diagnostics.push(diagnostic); @@ -165,7 +165,7 @@ pub(crate) fn ambiguous_unicode_character( word_candidates.push(Candidate::new( TextSize::try_from(relative_offset).unwrap() + range.start(), current_char, - representant as char, + char::from_u32(representant).unwrap(), )); } else { // The current word contains at least one unambiguous unicode character. diff --git a/crates/ruff/src/rules/ruff/rules/confusables.rs b/crates/ruff/src/rules/ruff/rules/confusables.rs index e3ff076259c5f3..57f2bb88a0de25 100644 --- a/crates/ruff/src/rules/ruff/rules/confusables.rs +++ b/crates/ruff/src/rules/ruff/rules/confusables.rs @@ -4,7 +4,7 @@ use rustc_hash::FxHashMap; /// Via: /// See: -pub(crate) static CONFUSABLES: Lazy> = Lazy::new(|| { +pub(crate) static CONFUSABLES: Lazy> = Lazy::new(|| { #[allow(clippy::unreadable_literal)] FxHashMap::from_iter([ (8232, 32), @@ -2116,5 +2116,8 @@ pub(crate) static CONFUSABLES: Lazy> = Lazy::new(|| { (1059, 89), (65283, 35), (65307, 59), + (0x212B, 0x00C5), // ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE + (0x2126, 0x03A9), // OHM SIGN → GREEK CAPITAL LETTER OMEGA + (0x00B5, 0x03BC), // MICRO SIGN → GREEK SMALL LETTER MU ]) }); diff --git a/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__confusables.snap b/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__confusables.snap index e9e180c02383ec..f6a1b8eb87f9d4 100644 --- a/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__confusables.snap +++ b/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__confusables.snap @@ -110,6 +110,9 @@ confusables.py:31:6: RUF001 [*] String contains ambiguous `Р` (CYRILLIC CAPITAL 30 30 | # boundary" (whitespace) that it itself ambiguous. 31 |-x = "Р усский" 31 |+x = "P усский" +32 32 | +33 33 | +34 34 | # At runtime the attribute will be stored as Greek small letter mu instead of confusables.py:31:7: RUF001 [*] String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)? | @@ -126,5 +129,22 @@ confusables.py:31:7: RUF001 [*] String contains ambiguous ` ` (EN QUAD). Did y 30 30 | # boundary" (whitespace) that it itself ambiguous. 31 |-x = "Р усский" 31 |+x = "Р усский" +32 32 | +33 33 | +34 34 | # At runtime the attribute will be stored as Greek small letter mu instead of + +confusables.py:40:28: RUF001 [*] String contains ambiguous `µ` (MICRO SIGN). Did you mean `μ` (GREEK SMALL LETTER MU)? + | +40 | assert getattr(Labware(), "µL") == 1.5 + | ^ RUF001 + | + = help: Replace `µ` (MICRO SIGN) with `μ` (GREEK SMALL LETTER MU) + +ℹ Suggested fix +37 37 | µL = 1.5 +38 38 | +39 39 | +40 |-assert getattr(Labware(), "µL") == 1.5 + 40 |+assert getattr(Labware(), "μL") == 1.5 diff --git a/scripts/update_ambiguous_characters.py b/scripts/update_ambiguous_characters.py index 8413c5bcfb0829..5b18315ebc4183 100644 --- a/scripts/update_ambiguous_characters.py +++ b/scripts/update_ambiguous_characters.py @@ -32,7 +32,7 @@ def format_confusables_rs(raw_data: dict) -> str: /// Via: /// See: -pub(crate) static CONFUSABLES: Lazy> = Lazy::new(|| { +pub(crate) static CONFUSABLES: Lazy> = Lazy::new(|| { #[allow(clippy::unreadable_literal)] FxHashMap::from_iter([ """.lstrip() @@ -40,7 +40,13 @@ def format_confusables_rs(raw_data: dict) -> str: for _category, items in raw_data.items(): for i in range(0, len(items), 2): tuples.append(f"({items[i]}, {items[i + 1]}),") - postlude = """])});""" + postlude = """ + (0x212B, 0x00C5), // ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE + (0x2126, 0x03A9), // OHM SIGN → GREEK CAPITAL LETTER OMEGA + (0x00B5, 0x03BC), // MICRO SIGN → GREEK SMALL LETTER MU + ]) +}); +""" print(f"{len(tuples)} confusable tuples.")