Skip to content

Commit

Permalink
Autoformat unit characters
Browse files Browse the repository at this point in the history
  • Loading branch information
covracer committed Jun 3, 2023
1 parent e82160a commit 5a9f4aa
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 5 deletions.
9 changes: 9 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/confusables.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,12 @@ def f():
# consisting of a single ambiguous character, while the second character is a "word
# boundary" (whitespace) that it itself ambiguous.
x = "Р усский"


# At runtime the attribute will be stored as Greek small letter mu instead of
# micro sign because of PEP 3131's NFKC normalization
class Labware:
µL = 1.5


assert getattr(Labware(), "µL") == 1.5
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ pub(crate) fn ambiguous_unicode_character(
let candidate = Candidate::new(
TextSize::try_from(relative_offset).unwrap() + range.start(),
current_char,
representant as char,
char::from_u32(representant).unwrap(),
);
if let Some(diagnostic) = candidate.into_diagnostic(context, settings) {
diagnostics.push(diagnostic);
Expand All @@ -165,7 +165,7 @@ pub(crate) fn ambiguous_unicode_character(
word_candidates.push(Candidate::new(
TextSize::try_from(relative_offset).unwrap() + range.start(),
current_char,
representant as char,
char::from_u32(representant).unwrap(),
));
} else {
// The current word contains at least one unambiguous unicode character.
Expand Down
5 changes: 4 additions & 1 deletion crates/ruff/src/rules/ruff/rules/confusables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use rustc_hash::FxHashMap;

/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u8>> = Lazy::new(|| {
pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u32>> = Lazy::new(|| {
#[allow(clippy::unreadable_literal)]
FxHashMap::from_iter([
(8232, 32),
Expand Down Expand Up @@ -2116,5 +2116,8 @@ pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u8>> = Lazy::new(|| {
(1059, 89),
(65283, 35),
(65307, 59),
(0x212B, 0x00C5), // ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE
(0x2126, 0x03A9), // OHM SIGN → GREEK CAPITAL LETTER OMEGA
(0x00B5, 0x03BC), // MICRO SIGN → GREEK SMALL LETTER MU
])
});
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ confusables.py:31:6: RUF001 [*] String contains ambiguous `Р` (CYRILLIC CAPITAL
30 30 | # boundary" (whitespace) that it itself ambiguous.
31 |-x = "Р усский"
31 |+x = "P усский"
32 32 |
33 33 |
34 34 | # At runtime the attribute will be stored as Greek small letter mu instead of

confusables.py:31:7: RUF001 [*] String contains ambiguous ` ` (EN QUAD). Did you mean ` ` (SPACE)?
|
Expand All @@ -126,5 +129,22 @@ confusables.py:31:7: RUF001 [*] String contains ambiguous ` ` (EN QUAD). Did y
30 30 | # boundary" (whitespace) that it itself ambiguous.
31 |-x = "Р усский"
31 |+x = "Р усский"
32 32 |
33 33 |
34 34 | # At runtime the attribute will be stored as Greek small letter mu instead of

confusables.py:40:28: RUF001 [*] String contains ambiguous `µ` (MICRO SIGN). Did you mean `μ` (GREEK SMALL LETTER MU)?
|
40 | assert getattr(Labware(), "µL") == 1.5
| ^ RUF001
|
= help: Replace `µ` (MICRO SIGN) with `μ` (GREEK SMALL LETTER MU)

Suggested fix
37 37 | µL = 1.5
38 38 |
39 39 |
40 |-assert getattr(Labware(), "µL") == 1.5
40 |+assert getattr(Labware(), "μL") == 1.5


10 changes: 8 additions & 2 deletions scripts/update_ambiguous_characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,21 @@ def format_confusables_rs(raw_data: dict) -> str:
/// Via: <https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json>
/// See: <https://github.com/microsoft/vscode/blob/095ddabc52b82498ee7f718a34f9dd11d59099a8/src/vs/base/common/strings.ts#L1094>
pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u8>> = Lazy::new(|| {
pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u32>> = Lazy::new(|| {
#[allow(clippy::unreadable_literal)]
FxHashMap::from_iter([
""".lstrip()
tuples = []
for _category, items in raw_data.items():
for i in range(0, len(items), 2):
tuples.append(f"({items[i]}, {items[i + 1]}),")
postlude = """])});"""
postlude = """
(0x212B, 0x00C5), // ANGSTROM SIGN → LATIN CAPITAL LETTER A WITH RING ABOVE
(0x2126, 0x03A9), // OHM SIGN → GREEK CAPITAL LETTER OMEGA
(0x00B5, 0x03BC), // MICRO SIGN → GREEK SMALL LETTER MU
])
});
"""

print(f"{len(tuples)} confusable tuples.")

Expand Down

0 comments on commit 5a9f4aa

Please sign in to comment.