Skip to content

Commit

Permalink
syntax: fix 'C' alias bug
Browse files Browse the repository at this point in the history
This re-generates the Unicode table for property name aliases after fixing
a bug in property name canonicalization. Namely, the 'isc' alias of the
'ISO_Comment' property was being canonicalized to 'c', which is actually
an alias of the 'Other' general category. This is a result of the
canonicalization procedure ignoring 'is' prefixes, as permitted by UTS#18.

Fixes #466
  • Loading branch information
BurntSushi committed Apr 28, 2018
1 parent f7ea409 commit d5e5da6
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 7 deletions.
6 changes: 6 additions & 0 deletions regex-syntax/src/hir/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1897,6 +1897,12 @@ mod tests {
assert_eq!(
t(r"\p{gc=Separator}"),
hir_uclass_query(ClassQuery::Binary("Z")));
assert_eq!(
t(r"\p{Other}"),
hir_uclass_query(ClassQuery::Binary("Other")));
assert_eq!(
t(r"\pC"),
hir_uclass_query(ClassQuery::Binary("Other")));

assert_eq!(
t(r"\PZ"),
Expand Down
12 changes: 11 additions & 1 deletion regex-syntax/src/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ impl<'a> ClassQuery<'a> {
/// Like ClassQuery, but its parameters have been canonicalized. This also
/// differentiates binary properties from flattened general categories and
/// scripts.
#[derive(Debug)]
#[derive(Debug, Eq, PartialEq)]
enum CanonicalClassQuery {
/// The canonical binary property name.
Binary(&'static str),
Expand Down Expand Up @@ -459,4 +459,14 @@ mod tests {

assert!(!contains_simple_case_mapping('☃', '☃'));
}

#[test]
fn regression_466() {
use super::{CanonicalClassQuery, ClassQuery};

let q = ClassQuery::OneLetter('C');
assert_eq!(
q.canonicalize().unwrap(),
CanonicalClassQuery::GeneralCategory("Other"));
}
}
11 changes: 5 additions & 6 deletions regex-syntax/src/unicode_tables/property_names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"), ("blk", "Block"),
("block", "Block"), ("bmg", "Bidi_Mirroring_Glyph"),
("bpb", "Bidi_Paired_Bracket"), ("bpt", "Bidi_Paired_Bracket_Type"),
("c", "ISO_Comment"),
("canonicalcombiningclass", "Canonical_Combining_Class"),
("cased", "Cased"), ("casefolding", "Case_Folding"),
("caseignorable", "Case_Ignorable"), ("ccc", "Canonical_Combining_Class"),
Expand Down Expand Up @@ -71,11 +70,11 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("indicpositionalcategory", "Indic_Positional_Category"),
("indicsyllabiccategory", "Indic_Syllabic_Category"),
("inpc", "Indic_Positional_Category"), ("insc", "Indic_Syllabic_Category"),
("jamoshortname", "Jamo_Short_Name"), ("jg", "Joining_Group"),
("joinc", "Join_Control"), ("joincontrol", "Join_Control"),
("joininggroup", "Joining_Group"), ("joiningtype", "Joining_Type"),
("jsn", "Jamo_Short_Name"), ("jt", "Joining_Type"),
("kaccountingnumeric", "kAccountingNumeric"),
("isc", "ISO_Comment"), ("jamoshortname", "Jamo_Short_Name"),
("jg", "Joining_Group"), ("joinc", "Join_Control"),
("joincontrol", "Join_Control"), ("joininggroup", "Joining_Group"),
("joiningtype", "Joining_Type"), ("jsn", "Jamo_Short_Name"),
("jt", "Joining_Type"), ("kaccountingnumeric", "kAccountingNumeric"),
("kcompatibilityvariant", "kCompatibilityVariant"), ("kiicore", "kIICore"),
("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"),
("kirgjsource", "kIRG_JSource"), ("kirgkpsource", "kIRG_KPSource"),
Expand Down

0 comments on commit d5e5da6

Please sign in to comment.