Feat/support more languages in handles (#2163)

# Goal The goal of this PR is to: 1. support more languages in handles, 2. close some gaps in language support for existing ones 3. disallow a '/' in handles. Closes #2162 Co-authored-by: Wil Wade <wil.wade@amplica.io>
frequency-chain · Sep 27, 2024 · c8f184f · c8f184f
1 parent 3a59277
commit c8f184f
Show file tree

Hide file tree

Showing 5 changed files with 276 additions and 43 deletions.
diff --git a/pallets/handles/src/handles-utils/constants.rs b/pallets/handles/src/handles-utils/constants.rs
@@ -2,27 +2,168 @@
 
 use core::ops::RangeInclusive;
 
-/// Character that are allowed.
-pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 21] = [
-	0x0020..=0x007F, // Basic Latin
-	0x0080..=0x00FF, // Latin-1 Supplement
-	0x0100..=0x017F, // Latin Extended-A
-	0x0370..=0x03FF, // Greek and Coptic
-	0x0400..=0x04FF, // Cyrillic
-	0x0500..=0x052F, // Cyrillic Supplementary
-	0x0590..=0x05FF, // Hebrew
-	0x0600..=0x06FF, // Arabic
-	0x0900..=0x097F, // Devanagari
-	0x0980..=0x09FF, // Bengali
-	0x0E00..=0x0E7F, // Thai
-	0x1100..=0x11FF, // Hangul Jamo
-	0x1E00..=0x1EFF, // Latin Extended Additional
-	0x1F00..=0x1FFF, // Greek Extended
-	0x3040..=0x309F, // Hiragana
-	0x30A0..=0x30FF, // Katakana
-	0x3400..=0x4DBF, // CJK Unified Ideographs Extension A
-	0x4E00..=0x9FFF, // CJK Unified Ideographs
-	0xAC00..=0xD7AF, // Hangul Syllables
-	0xF900..=0xFAFF, // CJK Compatibility Ideographs
-	0xFB50..=0xFDFF, // Arabic Presentation Forms-A
+#[cfg(test)]
+pub fn build_allowed_char_ranges() -> Vec<RangeInclusive<u16>> {
+	let mut new_allowed: Vec<RangeInclusive<u16>> = Vec::new();
+	let mut last: RangeInclusive<u16> = RangeInclusive::new(0u16, 0u16);
+	// assumes the list is sorted!
+	for allowed in ALLOWED_UNICODE_CHARACTER_RANGES {
+		let last_start = last.start();
+		let last_end = last.end();
+		let allowed_start = allowed.start();
+		let allowed_end = allowed.end();
+		if *allowed_start == *last_end + 1u16 {
+			println!(
+				"joining {last_start:#X}..{last_end:#X} with {allowed_start:#X}..#{allowed_end:#X}"
+			);
+			last = RangeInclusive::new(*last.start(), *allowed.end());
+		} else {
+			println!("adding {last_start:#X}..{last_end:#4X}");
+			if *last_end > 0u16 {
+				new_allowed.push(last.clone());
+			}
+			last = allowed.clone()
+		}
+	}
+	new_allowed
+}
+
+/// Characters that are allowed.
+/// This is generated using test_build_allowed_char_ranges
+#[rustfmt::skip]
+pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 54] = [
+0x0020..=0x007A,
+0x0080..=0x0024F,
+0x02B0..=0x04FF,
+0x0531..=0x058A,
+0x0591..=0x05F4,
+0x0600..=0x07B1,
+0x07C0..=0x07FA,
+0x0900..=0x097F,
+0x0981..=0x09FB,
+0x0A01..=0x0A75,
+0x0A81..=0x0AF1,
+0x0B01..=0x0B77,
+0x0B82..=0x0BFA,
+0x0C01..=0x0C7F,
+0x0C82..=0x0CF2,
+0x0D02..=0x0D7F,
+0x0D82..=0x0DF4,
+0x0E01..=0x0E5B,
+0x0E81..=0x0EDD,
+0x0F00..=0x0FDA,
+0x1000..=0x10FC,
+0x1100..=0x137C,
+0x1380..=0x1399,
+0x13A0..=0x13F4,
+0x1400..=0x167F,
+0x1700..=0x1714,
+0x1720..=0x1736,
+0x1740..=0x1753,
+0x1760..=0x1773,
+0x1780..=0x17F9,
+0x1800..=0x18AA,
+0x18B0..=0x18F5,
+0x1900..=0x1974,
+0x1980..=0x1AAD,
+0x1B00..=0x1B7C,
+0x1B80..=0x1BB9,
+0x1BC0..=0x1C7F,
+0x1E00..=0x1FFF,
+0x200C..=0x206F,
+0x2C80..=0x2CFF,
+0x2D30..=0x2D7F,
+0x3040..=0x30FF,
+0x3400..=0x4DBF,
+0x4E00..=0x9FFF,
+0xA500..=0xA62B,
+0xA880..=0xA8D9,
+0xA8E0..=0xA8FB,
+0xA900..=0xA95F,
+0xA980..=0xA9DF,
+0xAA00..=0xAA7B,
+0xAA80..=0xAADF,
+0xABC0..=0xABF9,
+0xAC00..=0xD7AF,
+0xF900..=0xFAFF,
 ];
+
+// Keep this to show what languages are supported and to generate a new compact
+// list whenever the list is updated.
+// pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 75] = [
+//     0x0020..=0x007A, // BasicLatin
+//     0x0080..=0x00FF, // Latin-1 Supplement
+//     0x0100..=0x017F, // Latin Extended-A
+//     0x0180..=0x024F,   // Latin Extended-B
+//     0x02B0..=0x02FF, // Spacing Modifier Letters
+//     0x0300..=0x036F, // Combining diacritical marks
+//     0x0370..=0x03FF, // Greek and Coptic
+//     0x0400..=0x04FF, // Cyrillic
+//     0x0531..=0x058A, // Armenian
+//     0x0591..=0x05F4, // Hebrew
+//     0x0600..=0x06FF, // Arabic
+//     0x0700..=0x074F, // Syriac
+//     0x0750..=0x077F, // ArabicSupplement
+//     0x0780..=0x07B1, // Thaana
+//     0x07C0..=0x07FA, // N'Ko
+//     0x0900..=0x097F, // Devanagari
+//     0x0981..=0x09FB, // Bengali
+//     0x0A01..=0x0A75, // Gurmukhi
+//     0x0A81..=0x0AF1, // Gujarati
+//     0x0B01..=0x0B77, // Oriya
+//     0x0B82..=0x0BFA, // Tamil
+//     0x0C01..=0x0C7F, // Telugu
+//     0x0C82..=0x0CF2, // Kannada
+//     0x0D02..=0x0D7F, // Malayalam
+//     0x0D82..=0x0DF4, // Sinhala
+//     0x0E01..=0x0E5B, // Thai
+//     0x0E81..=0x0EDD, // Lao
+//     0x0F00..=0x0FDA, // Tibetan
+//     0x1000..=0x109F, // Myanmar
+//     0x10A0..=0x10FC, // Georgian
+//     0x1100..=0x11FF, // HangulJamo
+//     0x1200..=0x137C, // Ethiopic
+//     0x1380..=0x1399, // EthiopicSupplement
+//     0x13A0..=0x13F4, // Cherokee
+//     0x1400..=0x167F, // UnifiedCanadianAboriginalSyllabics
+//     0x1700..=0x1714, // Tagalog
+//     0x1720..=0x1736, // Hanunoo
+//     0x1740..=0x1753, // Buhid
+//     0x1760..=0x1773, // Tagbanwa
+//     0x1780..=0x17F9, // Khmer
+//     0x1800..=0x18AA, // Mongolian
+//     0x18B0..=0x18F5, // Unified Canadian Aboriginal Syllabics Extended
+//     0x1900..=0x194F, // Limbu
+//     0x1950..=0x1974, // Tai Le
+//     0x1980..=0x19DF, // New Tai Le
+//     0x19E0..=0x19FF, // Khmer Symbols
+//     0x1A00..=0x1A1F, // Buginese
+//     0x1A20..=0x1AAD, // Tai Tham
+//     0x1B00..=0x1B7C, // Balinese
+//     0x1B80..=0x1BB9, // Sundanese
+//     0x1BC0..=0x1BFF, // Batak
+//     0x1C00..=0x1C4F, // Lepcha
+//     0x1C50..=0x1C7F, // Ol Chiki
+//     0x1E00..=0x1EFF, // Latin Extended Additional
+//     0x1F00..=0x1FFF, // Greek Extended
+//     0x200C..=0x206F, // General punctuation, used in some languages to indicate syllables such as glottal stops
+//     0x2C80..=0x2CFF, // Coptic
+//     0x2D30..=0x2D7F, // Tifinagh
+//     0x3040..=0x309F, // Hiragana
+//     0x30A0..=0x30FF, // Katakana
+//     0x3400..=0x4DBF, // CJK Unified Ideographs Extension A
+//     0x4E00..=0x9FFF, // CJK Unified Ideographs
+//     0xA500..=0xA62B, // Vai
+//     0xA880..=0xA8D9, // Saurashtra
+//     0xA8E0..=0xA8FB, // Devanagari Extended
+//     0xA900..=0xA92F, // Kayah Li
+//     0xA930..=0xA95F, // Rejang
+//     0xA980..=0xA9DF, // Javanese
+//     0xAA00..=0xAA5F, // Cham
+//     0xAA60..=0xAA7B, // Myanmar Extended-A
+//     0xAA80..=0xAADF, // Tai Viet
+//     0xABC0..=0xABF9, // Meetei Mayek
+//     0xAC00..=0xD7AF, // Hangul Syllables
+//     0xF900..=0xFAFF, // CJK Compatibility Ideographs
+//     0xFB50..=0xFDFF, // Arabic Presentation Forms-A
+// ];
diff --git a/pallets/handles/src/handles-utils/src/tests/constants_tests.rs b/pallets/handles/src/handles-utils/src/tests/constants_tests.rs
@@ -0,0 +1,17 @@
+#[path = "../../constants.rs"]
+mod constants;
+use constants::*;
+
+// You can comment out the current one and uncomment the original, specific one
+// for all the languages supported.
+#[test]
+#[ignore = "use only to regenerate compacted ALLOWED_UNICODE_CHARACTER_RANGES"]
+fn test_build_allowed_char_ranges() {
+	let res = build_allowed_char_ranges();
+	assert_eq!(res.len(), 54usize);
+	for range in res {
+		let start = range.start();
+		let end = range.end();
+		println!("{start:#4X}..={end:#4X},")
+	}
+}
diff --git a/pallets/handles/src/handles-utils/src/tests/mod.rs b/pallets/handles/src/handles-utils/src/tests/mod.rs
@@ -1,3 +1,4 @@
+mod constants_tests;
 mod converter_tests;
 mod suffix_tests;
 mod validator_tests;
diff --git a/pallets/handles/src/handles-utils/src/tests/validator_tests.rs b/pallets/handles/src/handles-utils/src/tests/validator_tests.rs
@@ -24,7 +24,7 @@ fn test_is_reserved_canonical_handle_negative() {
 #[test]
 fn test_contains_blocked_characters_happy_path() {
 	let handles: Vec<&str> =
-		vec!["@lbert", "coca:cola", "#freemont", "charles.darwin", "`String`", ":(){ :|:& };:"];
+		vec!["@lbert", "coca:cola", "#freemont", "charles.darwin", "`String`", ":(){ :|:& };:/"];
 	for handle in handles {
 		assert!(contains_blocked_characters(handle));
 	}
@@ -39,37 +39,111 @@ fn test_contains_blocked_characters_negative() {
 	}
 }
 
+// To validate new test cases, add a string/sentence in the new language, run the test
+// A test of a sentence can reveal character ranges needed for language rendering.
+// Unicode groups: https://www.unicodepedia.com/groups/  for character ranges
+// If you don't know why a test is failing, decode the string here to check the range:
+//     https://unicodedecode.com/
+// Translations of "I can eat glass" from https://www.kermitproject.org/utf8.html
+// Some translations: https://translate.glosbe.com/
+// Others from Wikipedia
+// Many are (supposed to be) common names or greetings, or translations of "beautiful flower"
+#[rustfmt::skip]
 #[test]
 fn test_consists_of_supported_unicode_character_sets_happy_path() {
 	let strings_containing_characters_in_supported_unicode_character_sets = Vec::from([
-		"John",                                                     // Basic Latin
-		"Álvaro",                                                   // Latin-1 Supplement
-		"가영",                                                     // Hangul Syllables
-		"가나다",                                                   // Hangul Syllables
-		"アキラ",                                                   // Katakana
-		"あいこ",                                                   // Hiragana
-		"李明",                                                     // CJK Unified Ideographs
-		"严勇",                                                     // CJK Unified Ideographs
-		"龍",                                                       // CJK Unified Ideographs
-		"অমিত",                                                     // Bengali
-		"आरव",                                                      // Devanagari
-		"Александр",                                                // Cyrillic
+		"John",                                                      // Basic Latin
+		"Álvaro",                                                    // Latin-1 Supplement
+		"가영",                                                       // Hangul Syllables
+		"가나다",                                                     // Hangul Syllables
+		"アキラ",                                                    // Katakana
+		"あいこ",                                                    // Hiragana
+		"私はガラスを食べられますそれは私を傷つけません",                  // Japanese:
+		"李明",                                                      // CJK Unified Ideographs
+		"严勇",                                                      // CJK Unified Ideographs
+		"龍",                                                        // CJK Unified Ideographs
 		"Αλέξανδρος",                                               // Greek and Coptic
-		"Ἀναξαγόρας",                                               // Greek Extended
-		"กัญญา",                                                     // Thai
-		"ابجدهوزحطيكلمنسعفصقرشتثخذضظغءعمر",                         // Arabic
-		"דָּנִיֵּאלאבּבגּגדּדהווּוֹזחטי ִיכּךּכךלמםנןסעפּףּפףצץקרשׁשׂתּת",            // Hewbrew
 		"AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóRrSsŚśYyZzŹźŻż", // Polish
 		"ÄäÖöÜüẞß",                                                 // German
 		"AÁBCČDĎEÉĚFGHChIÍJKLMNŇOÓPQRŘSŠTŤUÚŮVWXYÝZŽaábcčdďeéěfghchiíjklmnňoópqrřsštťuúůvwxyýzž", // Czech
 		"αιαιαιᾳειειηιῃοιοιυιυιωιῳαυαυᾹυᾱυευευηυηυουουωυωυγγγγγκγκγξγξγχγχμπμπντντΖζΤΖτζ", // Greek
 		"ÅåÄäÖö",                                                   // Swedish
 		"ÅåÄäÖöŠšŽž",                                               // Finnish
 		"ÆæØøÅå",                                                   // Danish
+		"Александр",                                                // Cyrillic
+		"Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։",             // Armenian
+		"דָּנִיֵּאלאבּבגּגדּדהווּוֹזחטי ִיכּךּכךלמםנןסעפּףּפףצץקרשׁשׂתּת",            // Hebrew
+		"ابجدهوزحطيكلمنسعفصقرشتثخذضظغءعمر",                         // Arabic
+		"ܐܠܦ ܒܝܬ ܣܘܪܝܝܐ",										    // Syriac
+		"ދިވެހިބަސް",                                                   // Thaana
+		"ߒߞߏ ߞߊ߲ߜߍ", 													// N'Ko
+		"शक्नोम्यत्तुम्",                                                  // Devanagari
+		"म काँच खान सक्छू र मलाई केहि नी हुन्‍न् ।",                           // Nepali
+		"আমিকাঁচখেতেপারিতাতেআমারকোনোক্ষতিহয়না।",                           // Bengali
+		"मीकाचखाऊशकतोमलातेदुखतनाही",                                     // Marathi
+		"ನನಗೆಹಾನಿಆಗದೆ,ನಾನುಗಜನ್ನುತಿನಬಹುದು",                              // Kannada
+		"मैंकाँचखासकतीहूँऔरमुझेउससेकोईचोटनहींपहुंचती",                            // Hindi
+		"நான்கண்ணாடிசாப்பிடுவேன்,அதனால்எனக்குஒருகேடும்வராது",              // Tamil
+		"నేనుగాజుతినగలనుమరియుఅలాచేసినానాకుఏమిఇబ్బందిలేదు",                // Telugu
+		" මටවීදුරුකෑමටහැකියි.එයින්මටකිසිහානියක්සිදුනොවේ",                       // Sinhalese
+		"Ἀναξαγόρας",                                               // Greek Extended
+		" 我能吞下玻璃而不伤身体",                                     // Chinese
+		" 我能吞下玻璃而不傷身體",                                     // Chinese (Traditional)
+		"ฉันกินกระจกได้แต่มันไม่ทำให้ฉันเจ็บ",                               // Thai
+		"ຂອ້ຍກິນແກ້ວໄດ້ໂດຍທີ່ມັນບໍ່ໄດ້ເຮັດໃຫ້ຂອ້ຍເຈັບ",                              // Lao
+		" ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད།",                                           // Tibetan
+		"က္ယ္ဝန္‌တော္‌၊က္ယ္ဝန္‌မ မ္ယက္‌စားနုိင္‌သည္‌။ ၎က္ရောင္‌့ ထိခုိက္‌မ္ဟု မရ္ဟိပာ။",      // Burmese (Unicode 4.0)
+		"ကျွန်တော် ကျွန်မ မှန်စားနိုင်တယ်။ ၎င်းကြောင့် ထိခိုက်မှုမရှိပါ။",            // Burmese (Unicode 5.0)
+		"თამარი მადლობა",                                           // Georgian
+		"እናመሰግናለን አቢታ መልካም ቀን", 								// Ethiopian
+		"ᜀᜆᜇ᜔ ᜇᜃᜓ", 											    // Hanunoo
+		"ᝊᝓᝑᝒᝇ ᝌᝃ ᝈᝅᝋ ", 											// Buhid
+		"ᝐᝓᝆᝎᝓ ᝐᝆᝓ", 											    // Tagbanwa
+		"Би шил идэй чадна, надад хортой биш",                      // Mongolian (Cyrillic)
+		"ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ",                     // Mongolian (Classic) (5)
+		" ᐊᓕᒍᖅ ᓂᕆᔭᕌᖓᒃᑯ ᓱᕋᙱᑦᑐᓐᓇᖅᑐᖓ",                           // Inuktitut
+		"ᤋᤠᤱᤛᤠ ᤕᤠᤰᤁᤢ ", 												    // Limbu
+		"ᥕᥤᥒᥱ ᥘᥦᥝᥲ", 												// Tai Le
+		"ᦉᦱᧃ ᦃᦺᦟᦹ", 												// New Tai Le
+		"ᨆᨗᨕᨚ ᨅᨔᨒᨀ", 												// Buginese
+		"ᨠᩯᩬ ᨴᩱᨶᩣ ᨧᩥᨶᩬᩁᩣ", 											// Tai Tham
+		"ᬳᬸᬜ ᬳᬶᬦ ᬳᬸᬢ᭄ᬤᬸᬳ᭄ᬯᬸᬭ᭄", 									// Balinese
+		"ᮞᮀᮛᮥᮔ᮪ ᮞᮩᮞᮤ ᮊᮔ᮪ᮓᮥ", 											// Sundanese
+		"ᯀᯩᯖ᯲ᯔ ᯂᯞᯒ ᯊᯭᯉᯮ ᯂᯪᯒᯖᯮ ᯘᯮ", 								// Batak
+		"ᰗᰱᰠ ᰛᰥᰧ ᰛᰣᰵ ᰔᰠᰯ", 											// Lepcha
+		"ᱪᱮᱫᱮ ᱨᱩᱜ ᱢᱟᱦᱟᱭ ᱚᱲᱤᱠ", 									// Ol Chiki
+		"ⲙⲁⲣⲓⲁ ⲟⲩⲁⲣⲉⲟⲩ ⲡⲉⲗⲓⲛⲟⲛ", 										// Coptic
+		"ⴰⵎⵎⵉⵙⵏⴰ ⴰⵎⵍⵓⵍ ⵉⵎⴰⵍⵉⵏ", 										// Tifinagh  http://tifinaghtools.eazypo.ca/
+		"ꕉꕜꕮ ꔔꘋ ꖸ ꔰ ꗋꘋ ꕮꕨ ꔔꘋ ꖸ ꕎ ꕉꖸꕊ ꕴꖃ ꕃꔤꘂ ꗱ, ꕉꖷ ꗪꗡ ꔻꔤ ꗏꗒꗡ ꕎ ꗪ ꕉꖸꕊ ꖏꕎ", // Vai
+		"ꢪꢶꢥꢳ ꢥ꣄ꢳꢯꢳ", 											    // Saurashtra
+		"ꤊꤢꤛꤢ꤭ ꤜꤟꤤ꤬ ꤞ꤮ꤣ ꤟꤢꤨ꤭ ꤊꤢ", 									// Kayah Li
+		" ꤰꥍꤲꥒ ꤿꥍꥎꥂ ꥆꤰ꥓ꤼꤽ ꤽꥍꤺꥏ ", 										// Rejang
+		"ꦲꦏ꧀ꦱꦫ ꦮꦾꦚ꧀ꦗꦤ ꦩꦒꦢꦁ ꦧꦸꦭꦏ꧀ꦭꦏ꧀", 					// Javanese
+		"ꨀꨇꩉ ꨌꩌ ꨤꨨꨪꩀ ꨎꨳꨯꨮꩆ ꨕꨴꨭꩅ ꨕꨴꨭꩈ ꨨꨕꨯꩌ ꨨꨣꨬ", 				// Cham
+		"ꪎꪳ ꪼꪕ ꪣꪱ꫁ꪙ ꪕꪴ", 											    // Tai Viet
+		"ꯁꯤꯗꯤ ꯑꯩꯁꯨ ꯃꯩꯇꯩ ꯃꯌꯦꯛ ꯏꯕ ꯍꯩꯔꯅꯤ ꯕꯨ", 							// Meetei Mayek https://abhisanoujam.github.io/meitei_mayek/
+		"ᏌᏃᏂ ᎣᏏᏲ ᏙᎯᏧ ᏣᎳᎩ ᎦᏬᏂᎯᏍᏗ ᏓᎾᏁᎵᏗᎲᎢ",  						// Cherokee https://language.cherokee.org/word-list/ and  https://chren.cs.unc.edu/
+		"Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da.",   	// Navajo
+		"ᜋᜄᜇᜅ᜔ᜇᜅ᜔ ᜊᜓᜎᜃ᜔ᜃᜎᜃ᜔", 									// Tagalog
+		"میں کانچکھاسکتاہوںورمجھےتکلیفنہیںہوتی",                    // Urdu
+		"شيشهخوړلېشمهغه ما نه خوږوي",                               // Pashto
+		" .من می توانم بدونِ احساس درد شيشه بخورم",                  // Farsi / Persian(3)
+		"أنا قادر على أكل الزجاج و هذا لا يؤلمني. ",                 // Arabic
+		" إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا",                          // Hausa
+		"Tôi có thể ăn thủy tinh mà không hại gì.",                 // Vietnamese (quốc ngữ)
+		" ខ្ញុំអាចញុំកញ្ចក់បាន ដោយគ្មានបញ្ហារ ",                                // Khmer
+		"Góa ē-tàng chia̍h po-lê mā bē tio̍h-siong",                 // Taiwanese
+		" 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요",                  // Korean
+		"mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi", // Lojban
+		" Ljœr ye caudran créneþ ý jor cẃran.",                    // Nórdicg
+		" Ég get etið gler án þess að meiða mig.",                 // Íslenska / Icelandic
+		" Mogę jeść szkło, i mi nie szkodzi.",                     // Polish
+		" Pot să mănânc sticlă și ea nu mă rănește.",              // Romanian
+		" Я можу їсти шкло, й воно мені не пошкодить.",            // Ukrainian
 	]);
 
 	for string in strings_containing_characters_in_supported_unicode_character_sets {
-		assert!(consists_of_supported_unicode_character_sets(string));
+        assert!(consists_of_supported_unicode_character_sets(string), "failed at {string}",);
 	}
 }
 

diff --git a/pallets/handles/src/handles-utils/src/validator.rs b/pallets/handles/src/handles-utils/src/validator.rs
@@ -27,8 +27,8 @@ fn ensure_reserved_words_canonical() {
 }
 
 /// Characters that cannot be used in the handle.
-const BLOCKED_CHARACTERS: [char; 16] =
-	['"', '#', '%', '(', ')', ',', '.', ':', ';', '<', '>', '@', '\\', '`', '{', '}'];
+const BLOCKED_CHARACTERS: [char; 17] =
+	['"', '#', '%', '(', ')', ',', '.', '/', ':', ';', '<', '>', '@', '\\', '`', '{', '}'];
 
 // We MUST have the BLOCKED_CHARACTERS constant sorted or we cannot use the faster `binary_search` function.
 // Cannot easily be sorted at compile time currently