From 6ecceaa0de879bd39f5760e6c1ed74e2e9654e0a Mon Sep 17 00:00:00 2001 From: Youming Lin Date: Mon, 17 Oct 2016 16:56:19 -0500 Subject: [PATCH] Spec-compliant HTML5 decode (#7) * Spec-compliant HTML5 decode * Changes for Linux * Update Package-Builder --- Package-Builder | 2 +- README.md | 52 +- Sources/HTMLEntities/Constants.swift | 724 ++++++++++++++++-- Sources/HTMLEntities/ParseError.swift | 25 + .../HTMLEntities/String+HTMLEntities.swift | 521 ++++++++----- Sources/HTMLEntities/Utilities.swift | 111 ++- .../HTMLEntitiesTests/HTMLEntitiesTest.swift | 342 +++++++-- 7 files changed, 1406 insertions(+), 371 deletions(-) create mode 100644 Sources/HTMLEntities/ParseError.swift diff --git a/Package-Builder b/Package-Builder index f15b3cc..7fae859 160000 --- a/Package-Builder +++ b/Package-Builder @@ -1 +1 @@ -Subproject commit f15b3cc4de0345999098153584ebc89cb64240b0 +Subproject commit 7fae85911b84e55df5c3fef3160859aba69bc498 diff --git a/README.md b/README.md index f39aff4..ad70f19 100644 --- a/README.md +++ b/README.md @@ -6,27 +6,43 @@ ![Apache 2](https://img.shields.io/badge/license-Apache2-blue.svg?style=flat) ## Summary -Pure Swift HTML character escape utility tool for Swift 3.0. +Pure Swift HTML encode/decode utility tool for Swift 3.0. -Currently includes support for HTML4 named character references. You can find the list of all 252 HTML4 named character references [here](https://www.w3.org/TR/html4/sgml/entities.html). +Now includes support for HTML5 named character references. You can find the list of all 2231 HTML5 named character references [here](https://www.w3.org/TR/html5/syntax.html#named-character-references). -`HTMLEntities` escapes ALL non-ASCII characters, as well as the characters `<`, `>`, `&`, `”`, `’` as these five characters are part of the HTML tag and HTML attribute syntaxes. +`HTMLEntities` can escape ALL non-ASCII characters and ASCII non-print character (i.e. NUL, ESC, DEL), as well as the characters `<`, `>`, `&`, `"`, `’` as these five characters are part of the HTML tag and HTML attribute syntaxes. -In addition, `HTMLEntities` can unescape encoded HTML text that contains decimal, hexadecimal, or HTML4 named character reference escapes. +In addition, `HTMLEntities` can unescape encoded HTML text that contains decimal, hexadecimal, or HTML5 named character references. ## Features -* Supports HTML4 named character references (`nbsp`, `cent`, etc.) +* Supports HTML5 named character references (`NegativeMediumSpace;` etc.) +* HTML5 spec-compliant; strict parse mode recognizes [parse errors](https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references) * Supports decimal and hexadecimal escapes for non-named characters * Simple to use as functions are added by way of extending the default `String` class * Minimal dependencies; implementation is completely self-contained -## Swift Version +## Version Info -HTMLEntities 1.0 runs on Swift 3.0, on both macOS and Ubuntu Linux. +HTMLEntities 2.0 runs on Swift 3.0, on both macOS and Ubuntu Linux. ## Usage +### Install via Swift Package Manager (SPM) + +```swift +import PackageDescription + +let package = Package( + name: "package-name", + dependencies: [ + .Package(url: "https://github.com/IBM-Swift/swift-html-entities.git", majorVersion: 2, minor: 0) + ] +) +``` + +### In code + ```swift import HTMLEntities @@ -34,13 +50,13 @@ import HTMLEntities let html = "" print(html.htmlEscape()) -// Prints ”<script>alert("abc")</script>" +// Prints "<script>alert("abc")</script>" // decode example let htmlencoded = "<script>alert("abc")</script>" print(htmlencoded.htmlUnescape()) -// Prints ”" +// Prints "" ``` ## Advanced Options @@ -56,18 +72,18 @@ Defaults to `false`. Specifies if decimal character escapes should be used inste ```swift import HTMLEntities -let text = "한, 한, é, é, 🇺🇸" +let text = "한, 한, ế, ế, 🇺🇸" print(text.htmlEscape()) -// Prints “한, 한, é, é, 🇺🇸” +// Prints "한, 한, ế, ế, 🇺🇸" print(text.htmlEscape(decimal: true)) -// Prints “한, 한, é, é, 🇺🇸” +// Prints "한, 한, ế, ế, 🇺🇸" ``` #### `useNamedReferences` -Defaults to `true`. Specifies if named character references should be used whenever possible. Set to `false` to always use numeric character escape, i.e., for compatibility with older browsers that do not recognize named character references. +Defaults to `true`. Specifies if named character references should be used whenever possible. Set to `false` to always use numeric character references, i.e., for compatibility with older browsers that do not recognize named character references. ```swift import HTMLEntities @@ -77,7 +93,7 @@ let html = "" print(html.htmlEscape()) // Prints “<script>alert("abc")</script>” -print(html.htmlEscape(userNamedReferences: false)) +print(html.htmlEscape(useNamedReferences: false)) // Prints “<script>alert("abc")</script>” ``` @@ -85,7 +101,7 @@ print(html.htmlEscape(userNamedReferences: false)) #### `strict` -Defaults to `true`. Specifies if HTML numeric character escapes MUST always end with `;`. Some browsers allow numeric character escapes (i.e., decimal and hexadecimal types) to end without `;`. Always ending character escapes with `;` is recommended; however, for compatibility reasons, `HTMLEntities` allows non-strict ending option for situations that require it. +Defaults to `false`. Specifies if HTML5 parse errors should be thrown or simply passed over. **NOTE**: `htmlUnescape()` is a throwing function if `strict` is used in call argument (no matter if it is set to `true` or `false`); `htmlUnescape()` is NOT a throwing function if no argument is provided. ```swift import HTMLEntities @@ -95,10 +111,10 @@ let text = "한" print(text.htmlUnescape()) // Prints “한” -print(text.htmlUnescape(strict: false)) -// Prints “한” +print(try text.htmlUnescape(strict: true)) +// Throws a `ParseError.MissingSemicolon` instance ``` ## License -Apache 2.0 \ No newline at end of file +Apache 2.0 diff --git a/Sources/HTMLEntities/Constants.swift b/Sources/HTMLEntities/Constants.swift index 0339ee5..a1421b6 100644 --- a/Sources/HTMLEntities/Constants.swift +++ b/Sources/HTMLEntities/Constants.swift @@ -14,83 +14,657 @@ * limitations under the License. */ -let html4NamedCharactersEncodeMap = invert(html4NamedCharactersDecodeMap) - -/// Generated from the list of HTML4 entities here: -/// https://www.w3.org/TR/html4/sgml/entities.html -let html4NamedCharactersDecodeMap: [String: UInt32] = [ - " ":160,"¡":161,"¢":162,"£":163, - "¤":164,"¥":165,"¦":166,"§":167, - "¨":168,"©":169,"ª":170,"«":171, - "¬":172,"­":173,"®":174,"¯":175, - "°":176,"±":177,"²":178,"³":179, - "´":180,"µ":181,"¶":182,"·":183, - "¸":184,"¹":185,"º":186,"»":187, - "¼":188,"½":189,"¾":190,"¿":191, - "À":192,"Á":193,"Â":194,"Ã":195, - "Ä":196,"Å":197,"Æ":198,"Ç":199, - "È":200,"É":201,"Ê":202,"Ë":203, - "Ì":204,"Í":205,"Î":206,"Ï":207, - "Ð":208,"Ñ":209,"Ò":210,"Ó":211, - "Ô":212,"Õ":213,"Ö":214,"×":215, - "Ø":216,"Ù":217,"Ú":218,"Û":219, - "Ü":220,"Ý":221,"Þ":222,"ß":223, - "à":224,"á":225,"â":226,"ã":227, - "ä":228,"å":229,"æ":230,"ç":231, - "è":232,"é":233,"ê":234,"ë":235, - "ì":236,"í":237,"î":238,"ï":239, - "ð":240,"ñ":241,"ò":242,"ó":243, - "ô":244,"õ":245,"ö":246,"÷":247, - "ø":248,"ù":249,"ú":250,"û":251, - "ü":252,"ý":253,"þ":254,"ÿ":255, - "ƒ":402,"Α":913,"Β":914,"Γ":915, - "Δ":916,"Ε":917,"Ζ":918,"Η":919, - "Θ":920,"Ι":921,"Κ":922,"Λ":923, - "Μ":924,"Ν":925,"Ξ":926,"Ο":927, - "Π":928,"Ρ":929,"Σ":931,"Τ":932, - "Υ":933,"Φ":934,"Χ":935,"Ψ":936, - "Ω":937,"α":945,"β":946,"γ":947, - "δ":948,"ε":949,"ζ":950,"η":951, - "θ":952,"ι":953,"κ":954,"λ":955, - "μ":956,"ν":957,"ξ":958,"ο":959, - "π":960,"ρ":961,"ς":962,"σ":963, - "τ":964,"υ":965,"φ":966,"χ":967, - "ψ":968,"ω":969,"ϑ":977,"ϒ":978, - "ϖ":982,"•":8226,"…":8230,"′":8242, - "″":8243,"‾":8254,"⁄":8260,"℘":8472, - "ℑ":8465,"ℜ":8476,"™":8482,"ℵ":8501, - "←":8592,"↑":8593,"→":8594,"↓":8595, - "↔":8596,"↵":8629,"⇐":8656,"⇑":8657, - "⇒":8658,"⇓":8659,"⇔":8660,"∀":8704, - "∂":8706,"∃":8707,"∅":8709,"∇":8711, - "∈":8712,"∉":8713,"∋":8715,"∏":8719, - "∑":8721,"−":8722,"∗":8727,"√":8730, - "∝":8733,"∞":8734,"∠":8736,"∧":8743, - "∨":8744,"∩":8745,"∪":8746,"∫":8747, - "∴":8756,"∼":8764,"≅":8773,"≈":8776, - "≠":8800,"≡":8801,"≤":8804,"≥":8805, - "⊂":8834,"⊃":8835,"⊄":8836,"⊆":8838, - "⊇":8839,"⊕":8853,"⊗":8855,"⊥":8869, - "⋅":8901,"⌈":8968,"⌉":8969,"⌊":8970, - "⌋":8971,"⟨":9001,"⟩":9002,"◊":9674, - "♠":9824,"♣":9827,"♥":9829,"♦":9830, - """:34,"&":38,"<":60,">":62, - "Œ":338,"œ":339,"Š":352,"š":353, - "Ÿ":376,"ˆ":710,"˜":732," ":8194, - " ":8195," ":8201,"‌":8204,"‍":8205, - "‎":8206,"‏":8207,"–":8211,"—":8212, - "‘":8216,"’":8217,"‚":8218,"“":8220, - "”":8221,"„":8222,"†":8224,"‡":8225, - "‰":8240,"‹":8249,"›":8250,"€":8364 -] +// Linux toolchain requires Foundation to resolve `String` class's `hasSuffix()` function +#if os(Linux) +import Foundation +#endif -let replacementCharacterAsUInt32: UInt32 = 65533 +/// Replacement character U+FFFD +let replacementCharacterAsUInt32: UInt32 = 0xFFFD /// Generated from /// https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references -let htmlSpecialNumericDecodeMap: [UInt32: UInt32] = [ - 0:65533,128:8364,130:8218,131:402,132:8222,133:8230,134:8224, - 135:8225,136:710,137:8240,138:352,139:8249,140:338,142:381, - 145:8216,146:8217,147:8220,148:8221,149:8226,150:8211,151:8212, - 152:732,153:8482,154:353,155:8250,156:339,158:382,159:376 +let deprecatedNumericDecodeMap: [UInt32: UInt32] = [ + 0x00:0xFFFD,0x80:0x20AC,0x82:0x201A,0x83:0x0192,0x84:0x201E,0x85:0x2026,0x86:0x2020, + 0x87:0x2021,0x88:0x02C6,0x89:0x2030,0x8A:0x0160,0x8B:0x2039,0x8C:0x0152,0x8E:0x017D, + 0x91:0x2018,0x92:0x2019,0x93:0x201C,0x94:0x201D,0x95:0x2022,0x96:0x2013,0x97:0x2014, + 0x98:0x02DC,0x99:0x2122,0x9A:0x0161,0x9B:0x203A,0x9C:0x0153,0x9E:0x017E,0x9F:0x0178 +] + +// map is faster lookup than array.indexOf +let disallowedNumericReferences: [UInt32: Bool] = [ + 0x1:true,0x2:true,0x3:true,0x4:true,0x5:true,0x6:true,0x7:true,0x8:true,0xB:true, + 0xD:true,0xE:true,0xF:true,0x10:true,0x11:true,0x12:true,0x13:true,0x14:true, + 0x15:true,0x16:true,0x17:true,0x18:true,0x19:true,0x1A:true,0x1B:true,0x1C:true, + 0x1D:true,0x1E:true,0x1F:true,0xFDD0:true,0xFDD1:true,0xFDD2:true,0xFDD3:true, + 0xFDD4:true,0xFDD5:true,0xFDD6:true,0xFDD7:true,0xFDD8:true,0xFDD9:true,0xFDDA:true, + 0xFDDB:true,0xFDDC:true,0xFDDD:true,0xFDDE:true,0xFDDF:true,0xFDE0:true,0xFDE1:true, + 0xFDE2:true,0xFDE3:true,0xFDE4:true,0xFDE5:true,0xFDE6:true,0xFDE7:true,0xFDE8:true, + 0xFDE9:true,0xFDEA:true,0xFDEB:true,0xFDEC:true,0xFDED:true,0xFDEE:true,0xFDEF:true, + 0xFFFE:true,0xFFFF:true,0x1FFFE:true,0x1FFFF:true,0x2FFFE:true,0x2FFFF:true,0x3FFFE:true, + 0x3FFFF:true,0x4FFFE:true,0x4FFFF:true,0x5FFFE:true,0x5FFFF:true,0x6FFFE:true,0x6FFFF:true, + 0x7FFFE:true,0x7FFFF:true,0x8FFFE:true,0x8FFFF:true,0x9FFFE:true,0x9FFFF:true,0xAFFFE:true, + 0xAFFFF:true,0xBFFFE:true,0xBFFFF:true,0xCFFFE:true,0xCFFFF:true,0xDFFFE:true,0xDFFFF:true, + 0xEFFFE:true,0xEFFFF:true,0xFFFFE:true,0xFFFFF:true,0x10FFFE:true,0x10FFFF:true +] + +// only encode to named character references that end with ; +// if multiple exists for a given character, i.e., 'AMP;' and 'amp;', pick the one +// that is shorter and/or all lowercase +let namedCharactersEncodeMap = namedCharactersDecodeMap.inverting() { + existing, new in + let isExistingLegacy = !existing.hasSuffix(";") + let isNewLegacy = !new.hasSuffix(";") + let existingCount = existing.characters.count + let newCount = new.characters.count + + if isExistingLegacy && !isNewLegacy { + // prefer non-legacy + return new + } + + if !isExistingLegacy && isNewLegacy { + // prefer non-legacy + return existing + } + + if existingCount < newCount { + // if both are same type, prefer shorter name + return existing + } + + if newCount < existingCount { + // if both are same type, prefer shorter name + return new + } + + if new == new.lowercased() { + // if both are same type and same length, prefer lowercase name + return new + } + + // new isn't better than existing + // return existing + return existing +} + +// entities that map to more than one character +// e.g., their decoded form spans more than one extended grapheme cluster +let specialNamedCharactersDecodeMap: [String: String] = [ + "fjlig;":"\u{66}\u{6A}", + "ThickSpace;":"\u{205F}\u{200A}" +] + +// named character references that may be parsed without an ending ; +let legacyNamedCharactersDecodeMap: [String: Character] = [ + "Aacute":"\u{C1}","aacute":"\u{E1}","Acirc":"\u{C2}","acirc":"\u{E2}", + "acute":"\u{B4}","AElig":"\u{C6}","aelig":"\u{E6}","Agrave":"\u{C0}", + "agrave":"\u{E0}","AMP":"\u{26}","amp":"\u{26}","Aring":"\u{C5}", + "aring":"\u{E5}","Atilde":"\u{C3}","atilde":"\u{E3}","Auml":"\u{C4}", + "auml":"\u{E4}","brvbar":"\u{A6}","Ccedil":"\u{C7}","ccedil":"\u{E7}", + "cedil":"\u{B8}","cent":"\u{A2}","COPY":"\u{A9}","copy":"\u{A9}", + "curren":"\u{A4}","deg":"\u{B0}","divide":"\u{F7}","Eacute":"\u{C9}", + "eacute":"\u{E9}","Ecirc":"\u{CA}","ecirc":"\u{EA}","Egrave":"\u{C8}", + "egrave":"\u{E8}","ETH":"\u{D0}","eth":"\u{F0}","Euml":"\u{CB}", + "euml":"\u{EB}","frac12":"\u{BD}","frac14":"\u{BC}","frac34":"\u{BE}", + "GT":"\u{3E}","gt":"\u{3E}","Iacute":"\u{CD}","iacute":"\u{ED}", + "Icirc":"\u{CE}","icirc":"\u{EE}","iexcl":"\u{A1}","Igrave":"\u{CC}", + "igrave":"\u{EC}","iquest":"\u{BF}","Iuml":"\u{CF}","iuml":"\u{EF}", + "laquo":"\u{AB}","LT":"\u{3C}","lt":"\u{3C}","macr":"\u{AF}", + "micro":"\u{B5}","middot":"\u{B7}","nbsp":"\u{A0}","not":"\u{AC}", + "Ntilde":"\u{D1}","ntilde":"\u{F1}","Oacute":"\u{D3}","oacute":"\u{F3}", + "Ocirc":"\u{D4}","ocirc":"\u{F4}","Ograve":"\u{D2}","ograve":"\u{F2}", + "ordf":"\u{AA}","ordm":"\u{BA}","Oslash":"\u{D8}","oslash":"\u{F8}", + "Otilde":"\u{D5}","otilde":"\u{F5}","Ouml":"\u{D6}","ouml":"\u{F6}", + "para":"\u{B6}","plusmn":"\u{B1}","pound":"\u{A3}","QUOT":"\u{22}", + "quot":"\u{22}","raquo":"\u{BB}","REG":"\u{AE}","reg":"\u{AE}", + "sect":"\u{A7}","shy":"\u{AD}","sup1":"\u{B9}","sup2":"\u{B2}", + "sup3":"\u{B3}","szlig":"\u{DF}","THORN":"\u{DE}","thorn":"\u{FE}", + "times":"\u{D7}","Uacute":"\u{DA}","uacute":"\u{FA}","Ucirc":"\u{DB}", + "ucirc":"\u{FB}","Ugrave":"\u{D9}","ugrave":"\u{F9}","uml":"\u{A8}", + "Uuml":"\u{DC}","uuml":"\u{FC}","Yacute":"\u{DD}","yacute":"\u{FD}", + "yen":"\u{A5}","yuml":"\u{FF}" +] + +// split map into two halves; otherwise, segmentation fault when compiling +let namedCharactersDecodeMap = namedCharactersDecodeMap1.updating(namedCharactersDecodeMap2) + +let namedCharactersDecodeMap1: [String: Character] = [ + "Aacute;":"\u{C1}","aacute;":"\u{E1}","Abreve;":"\u{102}","abreve;":"\u{103}", + "ac;":"\u{223E}","acd;":"\u{223F}","acE;":"\u{223E}\u{333}","Acirc;":"\u{C2}", + "acirc;":"\u{E2}","acute;":"\u{B4}","Acy;":"\u{410}","acy;":"\u{430}", + "AElig;":"\u{C6}","aelig;":"\u{E6}","af;":"\u{2061}","Afr;":"\u{1D504}", + "afr;":"\u{1D51E}","Agrave;":"\u{C0}","agrave;":"\u{E0}","alefsym;":"\u{2135}", + "aleph;":"\u{2135}","Alpha;":"\u{391}","alpha;":"\u{3B1}","Amacr;":"\u{100}", + "amacr;":"\u{101}","amalg;":"\u{2A3F}","AMP;":"\u{26}","amp;":"\u{26}", + "And;":"\u{2A53}","and;":"\u{2227}","andand;":"\u{2A55}","andd;":"\u{2A5C}", + "andslope;":"\u{2A58}","andv;":"\u{2A5A}","ang;":"\u{2220}","ange;":"\u{29A4}", + "angle;":"\u{2220}","angmsd;":"\u{2221}","angmsdaa;":"\u{29A8}","angmsdab;":"\u{29A9}", + "angmsdac;":"\u{29AA}","angmsdad;":"\u{29AB}","angmsdae;":"\u{29AC}","angmsdaf;":"\u{29AD}", + "angmsdag;":"\u{29AE}","angmsdah;":"\u{29AF}","angrt;":"\u{221F}","angrtvb;":"\u{22BE}", + "angrtvbd;":"\u{299D}","angsph;":"\u{2222}","angst;":"\u{C5}","angzarr;":"\u{237C}", + "Aogon;":"\u{104}","aogon;":"\u{105}","Aopf;":"\u{1D538}","aopf;":"\u{1D552}", + "ap;":"\u{2248}","apacir;":"\u{2A6F}","apE;":"\u{2A70}","ape;":"\u{224A}", + "apid;":"\u{224B}","apos;":"\u{27}","ApplyFunction;":"\u{2061}","approx;":"\u{2248}", + "approxeq;":"\u{224A}","Aring;":"\u{C5}","aring;":"\u{E5}","Ascr;":"\u{1D49C}", + "ascr;":"\u{1D4B6}","Assign;":"\u{2254}","ast;":"\u{2A}","asymp;":"\u{2248}", + "asympeq;":"\u{224D}","Atilde;":"\u{C3}","atilde;":"\u{E3}","Auml;":"\u{C4}", + "auml;":"\u{E4}","awconint;":"\u{2233}","awint;":"\u{2A11}","backcong;":"\u{224C}", + "backepsilon;":"\u{3F6}","backprime;":"\u{2035}","backsim;":"\u{223D}","backsimeq;":"\u{22CD}", + "Backslash;":"\u{2216}","Barv;":"\u{2AE7}","barvee;":"\u{22BD}","Barwed;":"\u{2306}", + "barwed;":"\u{2305}","barwedge;":"\u{2305}","bbrk;":"\u{23B5}","bbrktbrk;":"\u{23B6}", + "bcong;":"\u{224C}","Bcy;":"\u{411}","bcy;":"\u{431}","bdquo;":"\u{201E}", + "becaus;":"\u{2235}","Because;":"\u{2235}","because;":"\u{2235}","bemptyv;":"\u{29B0}", + "bepsi;":"\u{3F6}","bernou;":"\u{212C}","Bernoullis;":"\u{212C}","Beta;":"\u{392}", + "beta;":"\u{3B2}","beth;":"\u{2136}","between;":"\u{226C}","Bfr;":"\u{1D505}", + "bfr;":"\u{1D51F}","bigcap;":"\u{22C2}","bigcirc;":"\u{25EF}","bigcup;":"\u{22C3}", + "bigodot;":"\u{2A00}","bigoplus;":"\u{2A01}","bigotimes;":"\u{2A02}","bigsqcup;":"\u{2A06}", + "bigstar;":"\u{2605}","bigtriangledown;":"\u{25BD}","bigtriangleup;":"\u{25B3}","biguplus;":"\u{2A04}", + "bigvee;":"\u{22C1}","bigwedge;":"\u{22C0}","bkarow;":"\u{290D}","blacklozenge;":"\u{29EB}", + "blacksquare;":"\u{25AA}","blacktriangle;":"\u{25B4}","blacktriangledown;":"\u{25BE}","blacktriangleleft;":"\u{25C2}", + "blacktriangleright;":"\u{25B8}","blank;":"\u{2423}","blk12;":"\u{2592}","blk14;":"\u{2591}", + "blk34;":"\u{2593}","block;":"\u{2588}","bne;":"\u{3D}\u{20E5}","bnequiv;":"\u{2261}\u{20E5}", + "bNot;":"\u{2AED}","bnot;":"\u{2310}","Bopf;":"\u{1D539}","bopf;":"\u{1D553}", + "bot;":"\u{22A5}","bottom;":"\u{22A5}","bowtie;":"\u{22C8}","boxbox;":"\u{29C9}", + "boxDL;":"\u{2557}","boxDl;":"\u{2556}","boxdL;":"\u{2555}","boxdl;":"\u{2510}", + "boxDR;":"\u{2554}","boxDr;":"\u{2553}","boxdR;":"\u{2552}","boxdr;":"\u{250C}", + "boxH;":"\u{2550}","boxh;":"\u{2500}","boxHD;":"\u{2566}","boxHd;":"\u{2564}", + "boxhD;":"\u{2565}","boxhd;":"\u{252C}","boxHU;":"\u{2569}","boxHu;":"\u{2567}", + "boxhU;":"\u{2568}","boxhu;":"\u{2534}","boxminus;":"\u{229F}","boxplus;":"\u{229E}", + "boxtimes;":"\u{22A0}","boxUL;":"\u{255D}","boxUl;":"\u{255C}","boxuL;":"\u{255B}", + "boxul;":"\u{2518}","boxUR;":"\u{255A}","boxUr;":"\u{2559}","boxuR;":"\u{2558}", + "boxur;":"\u{2514}","boxV;":"\u{2551}","boxv;":"\u{2502}","boxVH;":"\u{256C}", + "boxVh;":"\u{256B}","boxvH;":"\u{256A}","boxvh;":"\u{253C}","boxVL;":"\u{2563}", + "boxVl;":"\u{2562}","boxvL;":"\u{2561}","boxvl;":"\u{2524}","boxVR;":"\u{2560}", + "boxVr;":"\u{255F}","boxvR;":"\u{255E}","boxvr;":"\u{251C}","bprime;":"\u{2035}", + "Breve;":"\u{2D8}","breve;":"\u{2D8}","brvbar;":"\u{A6}","Bscr;":"\u{212C}", + "bscr;":"\u{1D4B7}","bsemi;":"\u{204F}","bsim;":"\u{223D}","bsime;":"\u{22CD}", + "bsol;":"\u{5C}","bsolb;":"\u{29C5}","bsolhsub;":"\u{27C8}","bull;":"\u{2022}", + "bullet;":"\u{2022}","bump;":"\u{224E}","bumpE;":"\u{2AAE}","bumpe;":"\u{224F}", + "Bumpeq;":"\u{224E}","bumpeq;":"\u{224F}","Cacute;":"\u{106}","cacute;":"\u{107}", + "Cap;":"\u{22D2}","cap;":"\u{2229}","capand;":"\u{2A44}","capbrcup;":"\u{2A49}", + "capcap;":"\u{2A4B}","capcup;":"\u{2A47}","capdot;":"\u{2A40}","CapitalDifferentialD;":"\u{2145}", + "caps;":"\u{2229}\u{FE00}","caret;":"\u{2041}","caron;":"\u{2C7}","Cayleys;":"\u{212D}", + "ccaps;":"\u{2A4D}","Ccaron;":"\u{10C}","ccaron;":"\u{10D}","Ccedil;":"\u{C7}", + "ccedil;":"\u{E7}","Ccirc;":"\u{108}","ccirc;":"\u{109}","Cconint;":"\u{2230}", + "ccups;":"\u{2A4C}","ccupssm;":"\u{2A50}","Cdot;":"\u{10A}","cdot;":"\u{10B}", + "cedil;":"\u{B8}","Cedilla;":"\u{B8}","cemptyv;":"\u{29B2}","cent;":"\u{A2}", + "CenterDot;":"\u{B7}","centerdot;":"\u{B7}","Cfr;":"\u{212D}","cfr;":"\u{1D520}", + "CHcy;":"\u{427}","chcy;":"\u{447}","check;":"\u{2713}","checkmark;":"\u{2713}", + "Chi;":"\u{3A7}","chi;":"\u{3C7}","cir;":"\u{25CB}","circ;":"\u{2C6}", + "circeq;":"\u{2257}","circlearrowleft;":"\u{21BA}","circlearrowright;":"\u{21BB}","circledast;":"\u{229B}", + "circledcirc;":"\u{229A}","circleddash;":"\u{229D}","CircleDot;":"\u{2299}","circledR;":"\u{AE}", + "circledS;":"\u{24C8}","CircleMinus;":"\u{2296}","CirclePlus;":"\u{2295}","CircleTimes;":"\u{2297}", + "cirE;":"\u{29C3}","cire;":"\u{2257}","cirfnint;":"\u{2A10}","cirmid;":"\u{2AEF}", + "cirscir;":"\u{29C2}","ClockwiseContourIntegral;":"\u{2232}","CloseCurlyDoubleQuote;":"\u{201D}","CloseCurlyQuote;":"\u{2019}", + "clubs;":"\u{2663}","clubsuit;":"\u{2663}","Colon;":"\u{2237}","colon;":"\u{3A}", + "Colone;":"\u{2A74}","colone;":"\u{2254}","coloneq;":"\u{2254}","comma;":"\u{2C}", + "commat;":"\u{40}","comp;":"\u{2201}","compfn;":"\u{2218}","complement;":"\u{2201}", + "complexes;":"\u{2102}","cong;":"\u{2245}","congdot;":"\u{2A6D}","Congruent;":"\u{2261}", + "Conint;":"\u{222F}","conint;":"\u{222E}","ContourIntegral;":"\u{222E}","Copf;":"\u{2102}", + "copf;":"\u{1D554}","coprod;":"\u{2210}","Coproduct;":"\u{2210}","COPY;":"\u{A9}", + "copy;":"\u{A9}","copysr;":"\u{2117}","CounterClockwiseContourIntegral;":"\u{2233}","crarr;":"\u{21B5}", + "Cross;":"\u{2A2F}","cross;":"\u{2717}","Cscr;":"\u{1D49E}","cscr;":"\u{1D4B8}", + "csub;":"\u{2ACF}","csube;":"\u{2AD1}","csup;":"\u{2AD0}","csupe;":"\u{2AD2}", + "ctdot;":"\u{22EF}","cudarrl;":"\u{2938}","cudarrr;":"\u{2935}","cuepr;":"\u{22DE}", + "cuesc;":"\u{22DF}","cularr;":"\u{21B6}","cularrp;":"\u{293D}","Cup;":"\u{22D3}", + "cup;":"\u{222A}","cupbrcap;":"\u{2A48}","CupCap;":"\u{224D}","cupcap;":"\u{2A46}", + "cupcup;":"\u{2A4A}","cupdot;":"\u{228D}","cupor;":"\u{2A45}","cups;":"\u{222A}\u{FE00}", + "curarr;":"\u{21B7}","curarrm;":"\u{293C}","curlyeqprec;":"\u{22DE}","curlyeqsucc;":"\u{22DF}", + "curlyvee;":"\u{22CE}","curlywedge;":"\u{22CF}","curren;":"\u{A4}","curvearrowleft;":"\u{21B6}", + "curvearrowright;":"\u{21B7}","cuvee;":"\u{22CE}","cuwed;":"\u{22CF}","cwconint;":"\u{2232}", + "cwint;":"\u{2231}","cylcty;":"\u{232D}","Dagger;":"\u{2021}","dagger;":"\u{2020}", + "daleth;":"\u{2138}","Darr;":"\u{21A1}","dArr;":"\u{21D3}","darr;":"\u{2193}", + "dash;":"\u{2010}","Dashv;":"\u{2AE4}","dashv;":"\u{22A3}","dbkarow;":"\u{290F}", + "dblac;":"\u{2DD}","Dcaron;":"\u{10E}","dcaron;":"\u{10F}","Dcy;":"\u{414}", + "dcy;":"\u{434}","DD;":"\u{2145}","dd;":"\u{2146}","ddagger;":"\u{2021}", + "ddarr;":"\u{21CA}","DDotrahd;":"\u{2911}","ddotseq;":"\u{2A77}","deg;":"\u{B0}", + "Del;":"\u{2207}","Delta;":"\u{394}","delta;":"\u{3B4}","demptyv;":"\u{29B1}", + "dfisht;":"\u{297F}","Dfr;":"\u{1D507}","dfr;":"\u{1D521}","dHar;":"\u{2965}", + "dharl;":"\u{21C3}","dharr;":"\u{21C2}","DiacriticalAcute;":"\u{B4}","DiacriticalDot;":"\u{2D9}", + "DiacriticalDoubleAcute;":"\u{2DD}","DiacriticalGrave;":"\u{60}","DiacriticalTilde;":"\u{2DC}","diam;":"\u{22C4}", + "Diamond;":"\u{22C4}","diamond;":"\u{22C4}","diamondsuit;":"\u{2666}","diams;":"\u{2666}", + "die;":"\u{A8}","DifferentialD;":"\u{2146}","digamma;":"\u{3DD}","disin;":"\u{22F2}", + "div;":"\u{F7}","divide;":"\u{F7}","divideontimes;":"\u{22C7}","divonx;":"\u{22C7}", + "DJcy;":"\u{402}","djcy;":"\u{452}","dlcorn;":"\u{231E}","dlcrop;":"\u{230D}", + "dollar;":"\u{24}","Dopf;":"\u{1D53B}","dopf;":"\u{1D555}","Dot;":"\u{A8}", + "dot;":"\u{2D9}","DotDot;":"\u{20DC}","doteq;":"\u{2250}","doteqdot;":"\u{2251}", + "DotEqual;":"\u{2250}","dotminus;":"\u{2238}","dotplus;":"\u{2214}","dotsquare;":"\u{22A1}", + "doublebarwedge;":"\u{2306}","DoubleContourIntegral;":"\u{222F}","DoubleDot;":"\u{A8}","DoubleDownArrow;":"\u{21D3}", + "DoubleLeftArrow;":"\u{21D0}","DoubleLeftRightArrow;":"\u{21D4}","DoubleLeftTee;":"\u{2AE4}","DoubleLongLeftArrow;":"\u{27F8}", + "DoubleLongLeftRightArrow;":"\u{27FA}","DoubleLongRightArrow;":"\u{27F9}","DoubleRightArrow;":"\u{21D2}","DoubleRightTee;":"\u{22A8}", + "DoubleUpArrow;":"\u{21D1}","DoubleUpDownArrow;":"\u{21D5}","DoubleVerticalBar;":"\u{2225}","DownArrow;":"\u{2193}", + "Downarrow;":"\u{21D3}","downarrow;":"\u{2193}","DownArrowBar;":"\u{2913}","DownArrowUpArrow;":"\u{21F5}", + "DownBreve;":"\u{311}","downdownarrows;":"\u{21CA}","downharpoonleft;":"\u{21C3}","downharpoonright;":"\u{21C2}", + "DownLeftRightVector;":"\u{2950}","DownLeftTeeVector;":"\u{295E}","DownLeftVector;":"\u{21BD}","DownLeftVectorBar;":"\u{2956}", + "DownRightTeeVector;":"\u{295F}","DownRightVector;":"\u{21C1}","DownRightVectorBar;":"\u{2957}","DownTee;":"\u{22A4}", + "DownTeeArrow;":"\u{21A7}","drbkarow;":"\u{2910}","drcorn;":"\u{231F}","drcrop;":"\u{230C}", + "Dscr;":"\u{1D49F}","dscr;":"\u{1D4B9}","DScy;":"\u{405}","dscy;":"\u{455}", + "dsol;":"\u{29F6}","Dstrok;":"\u{110}","dstrok;":"\u{111}","dtdot;":"\u{22F1}", + "dtri;":"\u{25BF}","dtrif;":"\u{25BE}","duarr;":"\u{21F5}","duhar;":"\u{296F}", + "dwangle;":"\u{29A6}","DZcy;":"\u{40F}","dzcy;":"\u{45F}","dzigrarr;":"\u{27FF}", + "Eacute;":"\u{C9}","eacute;":"\u{E9}","easter;":"\u{2A6E}","Ecaron;":"\u{11A}", + "ecaron;":"\u{11B}","ecir;":"\u{2256}","Ecirc;":"\u{CA}","ecirc;":"\u{EA}", + "ecolon;":"\u{2255}","Ecy;":"\u{42D}","ecy;":"\u{44D}","eDDot;":"\u{2A77}", + "Edot;":"\u{116}","eDot;":"\u{2251}","edot;":"\u{117}","ee;":"\u{2147}", + "efDot;":"\u{2252}","Efr;":"\u{1D508}","efr;":"\u{1D522}","eg;":"\u{2A9A}", + "Egrave;":"\u{C8}","egrave;":"\u{E8}","egs;":"\u{2A96}","egsdot;":"\u{2A98}", + "el;":"\u{2A99}","Element;":"\u{2208}","elinters;":"\u{23E7}","ell;":"\u{2113}", + "els;":"\u{2A95}","elsdot;":"\u{2A97}","Emacr;":"\u{112}","emacr;":"\u{113}", + "empty;":"\u{2205}","emptyset;":"\u{2205}","EmptySmallSquare;":"\u{25FB}","emptyv;":"\u{2205}", + "EmptyVerySmallSquare;":"\u{25AB}","emsp;":"\u{2003}","emsp13;":"\u{2004}","emsp14;":"\u{2005}", + "ENG;":"\u{14A}","eng;":"\u{14B}","ensp;":"\u{2002}","Eogon;":"\u{118}", + "eogon;":"\u{119}","Eopf;":"\u{1D53C}","eopf;":"\u{1D556}","epar;":"\u{22D5}", + "eparsl;":"\u{29E3}","eplus;":"\u{2A71}","epsi;":"\u{3B5}","Epsilon;":"\u{395}", + "epsilon;":"\u{3B5}","epsiv;":"\u{3F5}","eqcirc;":"\u{2256}","eqcolon;":"\u{2255}", + "eqsim;":"\u{2242}","eqslantgtr;":"\u{2A96}","eqslantless;":"\u{2A95}","Equal;":"\u{2A75}", + "equals;":"\u{3D}","EqualTilde;":"\u{2242}","equest;":"\u{225F}","Equilibrium;":"\u{21CC}", + "equiv;":"\u{2261}","equivDD;":"\u{2A78}","eqvparsl;":"\u{29E5}","erarr;":"\u{2971}", + "erDot;":"\u{2253}","Escr;":"\u{2130}","escr;":"\u{212F}","esdot;":"\u{2250}", + "Esim;":"\u{2A73}","esim;":"\u{2242}","Eta;":"\u{397}","eta;":"\u{3B7}", + "ETH;":"\u{D0}","eth;":"\u{F0}","Euml;":"\u{CB}","euml;":"\u{EB}", + "euro;":"\u{20AC}","excl;":"\u{21}","exist;":"\u{2203}","Exists;":"\u{2203}", + "expectation;":"\u{2130}","ExponentialE;":"\u{2147}","exponentiale;":"\u{2147}","fallingdotseq;":"\u{2252}", + "Fcy;":"\u{424}","fcy;":"\u{444}","female;":"\u{2640}","ffilig;":"\u{FB03}", + "fflig;":"\u{FB00}","ffllig;":"\u{FB04}","Ffr;":"\u{1D509}","ffr;":"\u{1D523}", + "filig;":"\u{FB01}","FilledSmallSquare;":"\u{25FC}","FilledVerySmallSquare;":"\u{25AA}", + // "fjlig;":"\u{66}\u{6A}", + "flat;":"\u{266D}","fllig;":"\u{FB02}","fltns;":"\u{25B1}","fnof;":"\u{192}", + "Fopf;":"\u{1D53D}","fopf;":"\u{1D557}","ForAll;":"\u{2200}","forall;":"\u{2200}", + "fork;":"\u{22D4}","forkv;":"\u{2AD9}","Fouriertrf;":"\u{2131}","fpartint;":"\u{2A0D}", + "frac12;":"\u{BD}","frac13;":"\u{2153}","frac14;":"\u{BC}","frac15;":"\u{2155}", + "frac16;":"\u{2159}","frac18;":"\u{215B}","frac23;":"\u{2154}","frac25;":"\u{2156}", + "frac34;":"\u{BE}","frac35;":"\u{2157}","frac38;":"\u{215C}","frac45;":"\u{2158}", + "frac56;":"\u{215A}","frac58;":"\u{215D}","frac78;":"\u{215E}","frasl;":"\u{2044}", + "frown;":"\u{2322}","Fscr;":"\u{2131}","fscr;":"\u{1D4BB}","gacute;":"\u{1F5}", + "Gamma;":"\u{393}","gamma;":"\u{3B3}","Gammad;":"\u{3DC}","gammad;":"\u{3DD}", + "gap;":"\u{2A86}","Gbreve;":"\u{11E}","gbreve;":"\u{11F}","Gcedil;":"\u{122}", + "Gcirc;":"\u{11C}","gcirc;":"\u{11D}","Gcy;":"\u{413}","gcy;":"\u{433}", + "Gdot;":"\u{120}","gdot;":"\u{121}","gE;":"\u{2267}","ge;":"\u{2265}", + "gEl;":"\u{2A8C}","gel;":"\u{22DB}","geq;":"\u{2265}","geqq;":"\u{2267}", + "geqslant;":"\u{2A7E}","ges;":"\u{2A7E}","gescc;":"\u{2AA9}","gesdot;":"\u{2A80}", + "gesdoto;":"\u{2A82}","gesdotol;":"\u{2A84}","gesl;":"\u{22DB}\u{FE00}","gesles;":"\u{2A94}", + "Gfr;":"\u{1D50A}","gfr;":"\u{1D524}","Gg;":"\u{22D9}","gg;":"\u{226B}", + "ggg;":"\u{22D9}","gimel;":"\u{2137}","GJcy;":"\u{403}","gjcy;":"\u{453}", + "gl;":"\u{2277}","gla;":"\u{2AA5}","glE;":"\u{2A92}","glj;":"\u{2AA4}", + "gnap;":"\u{2A8A}","gnapprox;":"\u{2A8A}","gnE;":"\u{2269}","gne;":"\u{2A88}", + "gneq;":"\u{2A88}","gneqq;":"\u{2269}","gnsim;":"\u{22E7}","Gopf;":"\u{1D53E}", + "gopf;":"\u{1D558}","grave;":"\u{60}","GreaterEqual;":"\u{2265}","GreaterEqualLess;":"\u{22DB}", + "GreaterFullEqual;":"\u{2267}","GreaterGreater;":"\u{2AA2}","GreaterLess;":"\u{2277}","GreaterSlantEqual;":"\u{2A7E}", + "GreaterTilde;":"\u{2273}","Gscr;":"\u{1D4A2}","gscr;":"\u{210A}","gsim;":"\u{2273}", + "gsime;":"\u{2A8E}","gsiml;":"\u{2A90}","GT;":"\u{3E}","Gt;":"\u{226B}", + "gt;":"\u{3E}","gtcc;":"\u{2AA7}","gtcir;":"\u{2A7A}","gtdot;":"\u{22D7}", + "gtlPar;":"\u{2995}","gtquest;":"\u{2A7C}","gtrapprox;":"\u{2A86}","gtrarr;":"\u{2978}", + "gtrdot;":"\u{22D7}","gtreqless;":"\u{22DB}","gtreqqless;":"\u{2A8C}","gtrless;":"\u{2277}", + "gtrsim;":"\u{2273}","gvertneqq;":"\u{2269}\u{FE00}","gvnE;":"\u{2269}\u{FE00}","Hacek;":"\u{2C7}", + "hairsp;":"\u{200A}","half;":"\u{BD}","hamilt;":"\u{210B}","HARDcy;":"\u{42A}", + "hardcy;":"\u{44A}","hArr;":"\u{21D4}","harr;":"\u{2194}","harrcir;":"\u{2948}", + "harrw;":"\u{21AD}","Hat;":"\u{5E}","hbar;":"\u{210F}","Hcirc;":"\u{124}", + "hcirc;":"\u{125}","hearts;":"\u{2665}","heartsuit;":"\u{2665}","hellip;":"\u{2026}", + "hercon;":"\u{22B9}","Hfr;":"\u{210C}","hfr;":"\u{1D525}","HilbertSpace;":"\u{210B}", + "hksearow;":"\u{2925}","hkswarow;":"\u{2926}","hoarr;":"\u{21FF}","homtht;":"\u{223B}", + "hookleftarrow;":"\u{21A9}","hookrightarrow;":"\u{21AA}","Hopf;":"\u{210D}","hopf;":"\u{1D559}", + "horbar;":"\u{2015}","HorizontalLine;":"\u{2500}","Hscr;":"\u{210B}","hscr;":"\u{1D4BD}", + "hslash;":"\u{210F}","Hstrok;":"\u{126}","hstrok;":"\u{127}","HumpDownHump;":"\u{224E}", + "HumpEqual;":"\u{224F}","hybull;":"\u{2043}","hyphen;":"\u{2010}","Iacute;":"\u{CD}", + "iacute;":"\u{ED}","ic;":"\u{2063}","Icirc;":"\u{CE}","icirc;":"\u{EE}", + "Icy;":"\u{418}","icy;":"\u{438}","Idot;":"\u{130}","IEcy;":"\u{415}", + "iecy;":"\u{435}","iexcl;":"\u{A1}","iff;":"\u{21D4}","Ifr;":"\u{2111}", + "ifr;":"\u{1D526}","Igrave;":"\u{CC}","igrave;":"\u{EC}","ii;":"\u{2148}", + "iiiint;":"\u{2A0C}","iiint;":"\u{222D}","iinfin;":"\u{29DC}","iiota;":"\u{2129}", + "IJlig;":"\u{132}","ijlig;":"\u{133}","Im;":"\u{2111}","Imacr;":"\u{12A}", + "imacr;":"\u{12B}","image;":"\u{2111}","ImaginaryI;":"\u{2148}","imagline;":"\u{2110}", + "imagpart;":"\u{2111}","imath;":"\u{131}","imof;":"\u{22B7}","imped;":"\u{1B5}", + "Implies;":"\u{21D2}","in;":"\u{2208}","incare;":"\u{2105}","infin;":"\u{221E}", + "infintie;":"\u{29DD}","inodot;":"\u{131}","Int;":"\u{222C}","int;":"\u{222B}", + "intcal;":"\u{22BA}","integers;":"\u{2124}","Integral;":"\u{222B}","intercal;":"\u{22BA}", + "Intersection;":"\u{22C2}","intlarhk;":"\u{2A17}","intprod;":"\u{2A3C}","InvisibleComma;":"\u{2063}", + "InvisibleTimes;":"\u{2062}","IOcy;":"\u{401}","iocy;":"\u{451}","Iogon;":"\u{12E}", + "iogon;":"\u{12F}","Iopf;":"\u{1D540}","iopf;":"\u{1D55A}","Iota;":"\u{399}", + "iota;":"\u{3B9}","iprod;":"\u{2A3C}","iquest;":"\u{BF}","Iscr;":"\u{2110}", + "iscr;":"\u{1D4BE}","isin;":"\u{2208}","isindot;":"\u{22F5}","isinE;":"\u{22F9}", + "isins;":"\u{22F4}","isinsv;":"\u{22F3}","isinv;":"\u{2208}","it;":"\u{2062}", + "Itilde;":"\u{128}","itilde;":"\u{129}","Iukcy;":"\u{406}","iukcy;":"\u{456}", + "Iuml;":"\u{CF}","iuml;":"\u{EF}","Jcirc;":"\u{134}","jcirc;":"\u{135}", + "Jcy;":"\u{419}","jcy;":"\u{439}","Jfr;":"\u{1D50D}","jfr;":"\u{1D527}", + "jmath;":"\u{237}","Jopf;":"\u{1D541}","jopf;":"\u{1D55B}","Jscr;":"\u{1D4A5}", + "jscr;":"\u{1D4BF}","Jsercy;":"\u{408}","jsercy;":"\u{458}","Jukcy;":"\u{404}", + "jukcy;":"\u{454}","Kappa;":"\u{39A}","kappa;":"\u{3BA}","kappav;":"\u{3F0}", + "Kcedil;":"\u{136}","kcedil;":"\u{137}","Kcy;":"\u{41A}","kcy;":"\u{43A}", + "Kfr;":"\u{1D50E}","kfr;":"\u{1D528}","kgreen;":"\u{138}","KHcy;":"\u{425}", + "khcy;":"\u{445}","KJcy;":"\u{40C}","kjcy;":"\u{45C}","Kopf;":"\u{1D542}", + "kopf;":"\u{1D55C}","Kscr;":"\u{1D4A6}","kscr;":"\u{1D4C0}","lAarr;":"\u{21DA}", + "Lacute;":"\u{139}","lacute;":"\u{13A}","laemptyv;":"\u{29B4}","lagran;":"\u{2112}", + "Lambda;":"\u{39B}","lambda;":"\u{3BB}","Lang;":"\u{27EA}","lang;":"\u{27E8}", + "langd;":"\u{2991}","langle;":"\u{27E8}","lap;":"\u{2A85}","Laplacetrf;":"\u{2112}", + "laquo;":"\u{AB}","Larr;":"\u{219E}","lArr;":"\u{21D0}","larr;":"\u{2190}", + "larrb;":"\u{21E4}","larrbfs;":"\u{291F}","larrfs;":"\u{291D}","larrhk;":"\u{21A9}", + "larrlp;":"\u{21AB}","larrpl;":"\u{2939}","larrsim;":"\u{2973}","larrtl;":"\u{21A2}", + "lat;":"\u{2AAB}","lAtail;":"\u{291B}","latail;":"\u{2919}","late;":"\u{2AAD}", + "lates;":"\u{2AAD}\u{FE00}","lBarr;":"\u{290E}","lbarr;":"\u{290C}","lbbrk;":"\u{2772}", + "lbrace;":"\u{7B}","lbrack;":"\u{5B}","lbrke;":"\u{298B}","lbrksld;":"\u{298F}", + "lbrkslu;":"\u{298D}","Lcaron;":"\u{13D}","lcaron;":"\u{13E}","Lcedil;":"\u{13B}", + "lcedil;":"\u{13C}","lceil;":"\u{2308}","lcub;":"\u{7B}","Lcy;":"\u{41B}", + "lcy;":"\u{43B}","ldca;":"\u{2936}","ldquo;":"\u{201C}","ldquor;":"\u{201E}", + "ldrdhar;":"\u{2967}","ldrushar;":"\u{294B}","ldsh;":"\u{21B2}","lE;":"\u{2266}", + "le;":"\u{2264}","LeftAngleBracket;":"\u{27E8}","LeftArrow;":"\u{2190}","Leftarrow;":"\u{21D0}", + "leftarrow;":"\u{2190}","LeftArrowBar;":"\u{21E4}","LeftArrowRightArrow;":"\u{21C6}","leftarrowtail;":"\u{21A2}", + "LeftCeiling;":"\u{2308}","LeftDoubleBracket;":"\u{27E6}","LeftDownTeeVector;":"\u{2961}","LeftDownVector;":"\u{21C3}", + "LeftDownVectorBar;":"\u{2959}","LeftFloor;":"\u{230A}","leftharpoondown;":"\u{21BD}","leftharpoonup;":"\u{21BC}", + "leftleftarrows;":"\u{21C7}","LeftRightArrow;":"\u{2194}","Leftrightarrow;":"\u{21D4}","leftrightarrow;":"\u{2194}", + "leftrightarrows;":"\u{21C6}","leftrightharpoons;":"\u{21CB}","leftrightsquigarrow;":"\u{21AD}","LeftRightVector;":"\u{294E}", + "LeftTee;":"\u{22A3}","LeftTeeArrow;":"\u{21A4}","LeftTeeVector;":"\u{295A}","leftthreetimes;":"\u{22CB}", + "LeftTriangle;":"\u{22B2}","LeftTriangleBar;":"\u{29CF}","LeftTriangleEqual;":"\u{22B4}","LeftUpDownVector;":"\u{2951}", + "LeftUpTeeVector;":"\u{2960}","LeftUpVector;":"\u{21BF}","LeftUpVectorBar;":"\u{2958}","LeftVector;":"\u{21BC}", + "LeftVectorBar;":"\u{2952}","lEg;":"\u{2A8B}","leg;":"\u{22DA}","leq;":"\u{2264}", + "leqq;":"\u{2266}","leqslant;":"\u{2A7D}","les;":"\u{2A7D}","lescc;":"\u{2AA8}", + "lesdot;":"\u{2A7F}","lesdoto;":"\u{2A81}","lesdotor;":"\u{2A83}","lesg;":"\u{22DA}\u{FE00}", + "lesges;":"\u{2A93}","lessapprox;":"\u{2A85}","lessdot;":"\u{22D6}","lesseqgtr;":"\u{22DA}", + "lesseqqgtr;":"\u{2A8B}","LessEqualGreater;":"\u{22DA}","LessFullEqual;":"\u{2266}","LessGreater;":"\u{2276}", + "lessgtr;":"\u{2276}","LessLess;":"\u{2AA1}","lesssim;":"\u{2272}","LessSlantEqual;":"\u{2A7D}", + "LessTilde;":"\u{2272}","lfisht;":"\u{297C}","lfloor;":"\u{230A}","Lfr;":"\u{1D50F}", + "lfr;":"\u{1D529}","lg;":"\u{2276}","lgE;":"\u{2A91}","lHar;":"\u{2962}", + "lhard;":"\u{21BD}","lharu;":"\u{21BC}","lharul;":"\u{296A}","lhblk;":"\u{2584}", + "LJcy;":"\u{409}","ljcy;":"\u{459}","Ll;":"\u{22D8}","ll;":"\u{226A}", + "llarr;":"\u{21C7}","llcorner;":"\u{231E}","Lleftarrow;":"\u{21DA}","llhard;":"\u{296B}", + "lltri;":"\u{25FA}","Lmidot;":"\u{13F}","lmidot;":"\u{140}","lmoust;":"\u{23B0}", + "lmoustache;":"\u{23B0}","lnap;":"\u{2A89}","lnapprox;":"\u{2A89}","lnE;":"\u{2268}", + "lne;":"\u{2A87}","lneq;":"\u{2A87}","lneqq;":"\u{2268}","lnsim;":"\u{22E6}", + "loang;":"\u{27EC}","loarr;":"\u{21FD}","lobrk;":"\u{27E6}","LongLeftArrow;":"\u{27F5}", + "Longleftarrow;":"\u{27F8}","longleftarrow;":"\u{27F5}","LongLeftRightArrow;":"\u{27F7}","Longleftrightarrow;":"\u{27FA}", + "longleftrightarrow;":"\u{27F7}","longmapsto;":"\u{27FC}","LongRightArrow;":"\u{27F6}","Longrightarrow;":"\u{27F9}", + "longrightarrow;":"\u{27F6}","looparrowleft;":"\u{21AB}","looparrowright;":"\u{21AC}","lopar;":"\u{2985}", + "Lopf;":"\u{1D543}","lopf;":"\u{1D55D}","loplus;":"\u{2A2D}","lotimes;":"\u{2A34}", + "lowast;":"\u{2217}","lowbar;":"\u{5F}","LowerLeftArrow;":"\u{2199}","LowerRightArrow;":"\u{2198}", + "loz;":"\u{25CA}","lozenge;":"\u{25CA}","lozf;":"\u{29EB}","lpar;":"\u{28}", + "lparlt;":"\u{2993}","lrarr;":"\u{21C6}","lrcorner;":"\u{231F}","lrhar;":"\u{21CB}", + "lrhard;":"\u{296D}","lrm;":"\u{200E}","lrtri;":"\u{22BF}","lsaquo;":"\u{2039}", + "Lscr;":"\u{2112}","lscr;":"\u{1D4C1}","Lsh;":"\u{21B0}","lsh;":"\u{21B0}", + "lsim;":"\u{2272}","lsime;":"\u{2A8D}","lsimg;":"\u{2A8F}","lsqb;":"\u{5B}", + "lsquo;":"\u{2018}","lsquor;":"\u{201A}","Lstrok;":"\u{141}","lstrok;":"\u{142}", + "LT;":"\u{3C}","Lt;":"\u{226A}","lt;":"\u{3C}","ltcc;":"\u{2AA6}", + "ltcir;":"\u{2A79}","ltdot;":"\u{22D6}","lthree;":"\u{22CB}","ltimes;":"\u{22C9}", + "ltlarr;":"\u{2976}","ltquest;":"\u{2A7B}","ltri;":"\u{25C3}","ltrie;":"\u{22B4}", + "ltrif;":"\u{25C2}","ltrPar;":"\u{2996}","lurdshar;":"\u{294A}","luruhar;":"\u{2966}", + "lvertneqq;":"\u{2268}\u{FE00}","lvnE;":"\u{2268}\u{FE00}","macr;":"\u{AF}","male;":"\u{2642}", + "malt;":"\u{2720}","maltese;":"\u{2720}","Map;":"\u{2905}","map;":"\u{21A6}", + "mapsto;":"\u{21A6}","mapstodown;":"\u{21A7}","mapstoleft;":"\u{21A4}","mapstoup;":"\u{21A5}", + "marker;":"\u{25AE}","mcomma;":"\u{2A29}","Mcy;":"\u{41C}","mcy;":"\u{43C}", + "mdash;":"\u{2014}","mDDot;":"\u{223A}","measuredangle;":"\u{2221}","MediumSpace;":"\u{205F}", + "Mellintrf;":"\u{2133}","Mfr;":"\u{1D510}","mfr;":"\u{1D52A}","mho;":"\u{2127}", + "micro;":"\u{B5}","mid;":"\u{2223}","midast;":"\u{2A}","midcir;":"\u{2AF0}", + "middot;":"\u{B7}","minus;":"\u{2212}","minusb;":"\u{229F}","minusd;":"\u{2238}", + "minusdu;":"\u{2A2A}","MinusPlus;":"\u{2213}","mlcp;":"\u{2ADB}","mldr;":"\u{2026}" +] + +let namedCharactersDecodeMap2: [String: Character] = [ + "mnplus;":"\u{2213}","models;":"\u{22A7}","Mopf;":"\u{1D544}","mopf;":"\u{1D55E}", + "mp;":"\u{2213}","Mscr;":"\u{2133}","mscr;":"\u{1D4C2}","mstpos;":"\u{223E}", + "Mu;":"\u{39C}","mu;":"\u{3BC}","multimap;":"\u{22B8}","mumap;":"\u{22B8}", + "nabla;":"\u{2207}","Nacute;":"\u{143}","nacute;":"\u{144}","nang;":"\u{2220}\u{20D2}", + "nap;":"\u{2249}","napE;":"\u{2A70}\u{338}","napid;":"\u{224B}\u{338}","napos;":"\u{149}", + "napprox;":"\u{2249}","natur;":"\u{266E}","natural;":"\u{266E}","naturals;":"\u{2115}", + "nbsp;":"\u{A0}","nbump;":"\u{224E}\u{338}","nbumpe;":"\u{224F}\u{338}","ncap;":"\u{2A43}", + "Ncaron;":"\u{147}","ncaron;":"\u{148}","Ncedil;":"\u{145}","ncedil;":"\u{146}", + "ncong;":"\u{2247}","ncongdot;":"\u{2A6D}\u{338}","ncup;":"\u{2A42}","Ncy;":"\u{41D}", + "ncy;":"\u{43D}","ndash;":"\u{2013}","ne;":"\u{2260}","nearhk;":"\u{2924}", + "neArr;":"\u{21D7}","nearr;":"\u{2197}","nearrow;":"\u{2197}","nedot;":"\u{2250}\u{338}", + "NegativeMediumSpace;":"\u{200B}","NegativeThickSpace;":"\u{200B}","NegativeThinSpace;":"\u{200B}","NegativeVeryThinSpace;":"\u{200B}", + "nequiv;":"\u{2262}","nesear;":"\u{2928}","nesim;":"\u{2242}\u{338}","NestedGreaterGreater;":"\u{226B}", + "NestedLessLess;":"\u{226A}","NewLine;":"\u{A}","nexist;":"\u{2204}","nexists;":"\u{2204}", + "Nfr;":"\u{1D511}","nfr;":"\u{1D52B}","ngE;":"\u{2267}\u{338}","nge;":"\u{2271}", + "ngeq;":"\u{2271}","ngeqq;":"\u{2267}\u{338}","ngeqslant;":"\u{2A7E}\u{338}","nges;":"\u{2A7E}\u{338}", + "nGg;":"\u{22D9}\u{338}","ngsim;":"\u{2275}","nGt;":"\u{226B}\u{20D2}","ngt;":"\u{226F}", + "ngtr;":"\u{226F}","nGtv;":"\u{226B}\u{338}","nhArr;":"\u{21CE}","nharr;":"\u{21AE}", + "nhpar;":"\u{2AF2}","ni;":"\u{220B}","nis;":"\u{22FC}","nisd;":"\u{22FA}", + "niv;":"\u{220B}","NJcy;":"\u{40A}","njcy;":"\u{45A}","nlArr;":"\u{21CD}", + "nlarr;":"\u{219A}","nldr;":"\u{2025}","nlE;":"\u{2266}\u{338}","nle;":"\u{2270}", + "nLeftarrow;":"\u{21CD}","nleftarrow;":"\u{219A}","nLeftrightarrow;":"\u{21CE}","nleftrightarrow;":"\u{21AE}", + "nleq;":"\u{2270}","nleqq;":"\u{2266}\u{338}","nleqslant;":"\u{2A7D}\u{338}","nles;":"\u{2A7D}\u{338}", + "nless;":"\u{226E}","nLl;":"\u{22D8}\u{338}","nlsim;":"\u{2274}","nLt;":"\u{226A}\u{20D2}", + "nlt;":"\u{226E}","nltri;":"\u{22EA}","nltrie;":"\u{22EC}","nLtv;":"\u{226A}\u{338}", + "nmid;":"\u{2224}","NoBreak;":"\u{2060}","NonBreakingSpace;":"\u{A0}","Nopf;":"\u{2115}", + "nopf;":"\u{1D55F}","Not;":"\u{2AEC}","not;":"\u{AC}","NotCongruent;":"\u{2262}", + "NotCupCap;":"\u{226D}","NotDoubleVerticalBar;":"\u{2226}","NotElement;":"\u{2209}","NotEqual;":"\u{2260}", + "NotEqualTilde;":"\u{2242}\u{338}","NotExists;":"\u{2204}","NotGreater;":"\u{226F}","NotGreaterEqual;":"\u{2271}", + "NotGreaterFullEqual;":"\u{2267}\u{338}","NotGreaterGreater;":"\u{226B}\u{338}","NotGreaterLess;":"\u{2279}","NotGreaterSlantEqual;":"\u{2A7E}\u{338}", + "NotGreaterTilde;":"\u{2275}","NotHumpDownHump;":"\u{224E}\u{338}","NotHumpEqual;":"\u{224F}\u{338}","notin;":"\u{2209}", + "notindot;":"\u{22F5}\u{338}","notinE;":"\u{22F9}\u{338}","notinva;":"\u{2209}","notinvb;":"\u{22F7}", + "notinvc;":"\u{22F6}","NotLeftTriangle;":"\u{22EA}","NotLeftTriangleBar;":"\u{29CF}\u{338}","NotLeftTriangleEqual;":"\u{22EC}", + "NotLess;":"\u{226E}","NotLessEqual;":"\u{2270}","NotLessGreater;":"\u{2278}","NotLessLess;":"\u{226A}\u{338}", + "NotLessSlantEqual;":"\u{2A7D}\u{338}","NotLessTilde;":"\u{2274}","NotNestedGreaterGreater;":"\u{2AA2}\u{338}","NotNestedLessLess;":"\u{2AA1}\u{338}", + "notni;":"\u{220C}","notniva;":"\u{220C}","notnivb;":"\u{22FE}","notnivc;":"\u{22FD}", + "NotPrecedes;":"\u{2280}","NotPrecedesEqual;":"\u{2AAF}\u{338}","NotPrecedesSlantEqual;":"\u{22E0}","NotReverseElement;":"\u{220C}", + "NotRightTriangle;":"\u{22EB}","NotRightTriangleBar;":"\u{29D0}\u{338}","NotRightTriangleEqual;":"\u{22ED}","NotSquareSubset;":"\u{228F}\u{338}", + "NotSquareSubsetEqual;":"\u{22E2}","NotSquareSuperset;":"\u{2290}\u{338}","NotSquareSupersetEqual;":"\u{22E3}","NotSubset;":"\u{2282}\u{20D2}", + "NotSubsetEqual;":"\u{2288}","NotSucceeds;":"\u{2281}","NotSucceedsEqual;":"\u{2AB0}\u{338}","NotSucceedsSlantEqual;":"\u{22E1}", + "NotSucceedsTilde;":"\u{227F}\u{338}","NotSuperset;":"\u{2283}\u{20D2}","NotSupersetEqual;":"\u{2289}","NotTilde;":"\u{2241}", + "NotTildeEqual;":"\u{2244}","NotTildeFullEqual;":"\u{2247}","NotTildeTilde;":"\u{2249}","NotVerticalBar;":"\u{2224}", + "npar;":"\u{2226}","nparallel;":"\u{2226}","nparsl;":"\u{2AFD}\u{20E5}","npart;":"\u{2202}\u{338}", + "npolint;":"\u{2A14}","npr;":"\u{2280}","nprcue;":"\u{22E0}","npre;":"\u{2AAF}\u{338}", + "nprec;":"\u{2280}","npreceq;":"\u{2AAF}\u{338}","nrArr;":"\u{21CF}","nrarr;":"\u{219B}", + "nrarrc;":"\u{2933}\u{338}","nrarrw;":"\u{219D}\u{338}","nRightarrow;":"\u{21CF}","nrightarrow;":"\u{219B}", + "nrtri;":"\u{22EB}","nrtrie;":"\u{22ED}","nsc;":"\u{2281}","nsccue;":"\u{22E1}", + "nsce;":"\u{2AB0}\u{338}","Nscr;":"\u{1D4A9}","nscr;":"\u{1D4C3}","nshortmid;":"\u{2224}", + "nshortparallel;":"\u{2226}","nsim;":"\u{2241}","nsime;":"\u{2244}","nsimeq;":"\u{2244}", + "nsmid;":"\u{2224}","nspar;":"\u{2226}","nsqsube;":"\u{22E2}","nsqsupe;":"\u{22E3}", + "nsub;":"\u{2284}","nsubE;":"\u{2AC5}\u{338}","nsube;":"\u{2288}","nsubset;":"\u{2282}\u{20D2}", + "nsubseteq;":"\u{2288}","nsubseteqq;":"\u{2AC5}\u{338}","nsucc;":"\u{2281}","nsucceq;":"\u{2AB0}\u{338}", + "nsup;":"\u{2285}","nsupE;":"\u{2AC6}\u{338}","nsupe;":"\u{2289}","nsupset;":"\u{2283}\u{20D2}", + "nsupseteq;":"\u{2289}","nsupseteqq;":"\u{2AC6}\u{338}","ntgl;":"\u{2279}","Ntilde;":"\u{D1}", + "ntilde;":"\u{F1}","ntlg;":"\u{2278}","ntriangleleft;":"\u{22EA}","ntrianglelefteq;":"\u{22EC}", + "ntriangleright;":"\u{22EB}","ntrianglerighteq;":"\u{22ED}","Nu;":"\u{39D}","nu;":"\u{3BD}", + "num;":"\u{23}","numero;":"\u{2116}","numsp;":"\u{2007}","nvap;":"\u{224D}\u{20D2}", + "nVDash;":"\u{22AF}","nVdash;":"\u{22AE}","nvDash;":"\u{22AD}","nvdash;":"\u{22AC}", + "nvge;":"\u{2265}\u{20D2}","nvgt;":"\u{3E}\u{20D2}","nvHarr;":"\u{2904}","nvinfin;":"\u{29DE}", + "nvlArr;":"\u{2902}","nvle;":"\u{2264}\u{20D2}","nvlt;":"\u{3C}\u{20D2}","nvltrie;":"\u{22B4}\u{20D2}", + "nvrArr;":"\u{2903}","nvrtrie;":"\u{22B5}\u{20D2}","nvsim;":"\u{223C}\u{20D2}","nwarhk;":"\u{2923}", + "nwArr;":"\u{21D6}","nwarr;":"\u{2196}","nwarrow;":"\u{2196}","nwnear;":"\u{2927}", + "Oacute;":"\u{D3}","oacute;":"\u{F3}","oast;":"\u{229B}","ocir;":"\u{229A}", + "Ocirc;":"\u{D4}","ocirc;":"\u{F4}","Ocy;":"\u{41E}","ocy;":"\u{43E}", + "odash;":"\u{229D}","Odblac;":"\u{150}","odblac;":"\u{151}","odiv;":"\u{2A38}", + "odot;":"\u{2299}","odsold;":"\u{29BC}","OElig;":"\u{152}","oelig;":"\u{153}", + "ofcir;":"\u{29BF}","Ofr;":"\u{1D512}","ofr;":"\u{1D52C}","ogon;":"\u{2DB}", + "Ograve;":"\u{D2}","ograve;":"\u{F2}","ogt;":"\u{29C1}","ohbar;":"\u{29B5}", + "ohm;":"\u{3A9}","oint;":"\u{222E}","olarr;":"\u{21BA}","olcir;":"\u{29BE}", + "olcross;":"\u{29BB}","oline;":"\u{203E}","olt;":"\u{29C0}","Omacr;":"\u{14C}", + "omacr;":"\u{14D}","Omega;":"\u{3A9}","omega;":"\u{3C9}","Omicron;":"\u{39F}", + "omicron;":"\u{3BF}","omid;":"\u{29B6}","ominus;":"\u{2296}","Oopf;":"\u{1D546}", + "oopf;":"\u{1D560}","opar;":"\u{29B7}","OpenCurlyDoubleQuote;":"\u{201C}","OpenCurlyQuote;":"\u{2018}", + "operp;":"\u{29B9}","oplus;":"\u{2295}","Or;":"\u{2A54}","or;":"\u{2228}", + "orarr;":"\u{21BB}","ord;":"\u{2A5D}","order;":"\u{2134}","orderof;":"\u{2134}", + "ordf;":"\u{AA}","ordm;":"\u{BA}","origof;":"\u{22B6}","oror;":"\u{2A56}", + "orslope;":"\u{2A57}","orv;":"\u{2A5B}","oS;":"\u{24C8}","Oscr;":"\u{1D4AA}", + "oscr;":"\u{2134}","Oslash;":"\u{D8}","oslash;":"\u{F8}","osol;":"\u{2298}", + "Otilde;":"\u{D5}","otilde;":"\u{F5}","Otimes;":"\u{2A37}","otimes;":"\u{2297}", + "otimesas;":"\u{2A36}","Ouml;":"\u{D6}","ouml;":"\u{F6}","ovbar;":"\u{233D}", + "OverBar;":"\u{203E}","OverBrace;":"\u{23DE}","OverBracket;":"\u{23B4}","OverParenthesis;":"\u{23DC}", + "par;":"\u{2225}","para;":"\u{B6}","parallel;":"\u{2225}","parsim;":"\u{2AF3}", + "parsl;":"\u{2AFD}","part;":"\u{2202}","PartialD;":"\u{2202}","Pcy;":"\u{41F}", + "pcy;":"\u{43F}","percnt;":"\u{25}","period;":"\u{2E}","permil;":"\u{2030}", + "perp;":"\u{22A5}","pertenk;":"\u{2031}","Pfr;":"\u{1D513}","pfr;":"\u{1D52D}", + "Phi;":"\u{3A6}","phi;":"\u{3C6}","phiv;":"\u{3D5}","phmmat;":"\u{2133}", + "phone;":"\u{260E}","Pi;":"\u{3A0}","pi;":"\u{3C0}","pitchfork;":"\u{22D4}", + "piv;":"\u{3D6}","planck;":"\u{210F}","planckh;":"\u{210E}","plankv;":"\u{210F}", + "plus;":"\u{2B}","plusacir;":"\u{2A23}","plusb;":"\u{229E}","pluscir;":"\u{2A22}", + "plusdo;":"\u{2214}","plusdu;":"\u{2A25}","pluse;":"\u{2A72}","PlusMinus;":"\u{B1}", + "plusmn;":"\u{B1}","plussim;":"\u{2A26}","plustwo;":"\u{2A27}","pm;":"\u{B1}", + "Poincareplane;":"\u{210C}","pointint;":"\u{2A15}","Popf;":"\u{2119}","popf;":"\u{1D561}", + "pound;":"\u{A3}","Pr;":"\u{2ABB}","pr;":"\u{227A}","prap;":"\u{2AB7}", + "prcue;":"\u{227C}","prE;":"\u{2AB3}","pre;":"\u{2AAF}","prec;":"\u{227A}", + "precapprox;":"\u{2AB7}","preccurlyeq;":"\u{227C}","Precedes;":"\u{227A}","PrecedesEqual;":"\u{2AAF}", + "PrecedesSlantEqual;":"\u{227C}","PrecedesTilde;":"\u{227E}","preceq;":"\u{2AAF}","precnapprox;":"\u{2AB9}", + "precneqq;":"\u{2AB5}","precnsim;":"\u{22E8}","precsim;":"\u{227E}","Prime;":"\u{2033}", + "prime;":"\u{2032}","primes;":"\u{2119}","prnap;":"\u{2AB9}","prnE;":"\u{2AB5}", + "prnsim;":"\u{22E8}","prod;":"\u{220F}","Product;":"\u{220F}","profalar;":"\u{232E}", + "profline;":"\u{2312}","profsurf;":"\u{2313}","prop;":"\u{221D}","Proportion;":"\u{2237}", + "Proportional;":"\u{221D}","propto;":"\u{221D}","prsim;":"\u{227E}","prurel;":"\u{22B0}", + "Pscr;":"\u{1D4AB}","pscr;":"\u{1D4C5}","Psi;":"\u{3A8}","psi;":"\u{3C8}", + "puncsp;":"\u{2008}","Qfr;":"\u{1D514}","qfr;":"\u{1D52E}","qint;":"\u{2A0C}", + "Qopf;":"\u{211A}","qopf;":"\u{1D562}","qprime;":"\u{2057}","Qscr;":"\u{1D4AC}", + "qscr;":"\u{1D4C6}","quaternions;":"\u{210D}","quatint;":"\u{2A16}","quest;":"\u{3F}", + "questeq;":"\u{225F}","QUOT;":"\u{22}","quot;":"\u{22}","rAarr;":"\u{21DB}", + "race;":"\u{223D}\u{331}","Racute;":"\u{154}","racute;":"\u{155}","radic;":"\u{221A}", + "raemptyv;":"\u{29B3}","Rang;":"\u{27EB}","rang;":"\u{27E9}","rangd;":"\u{2992}", + "range;":"\u{29A5}","rangle;":"\u{27E9}","raquo;":"\u{BB}","Rarr;":"\u{21A0}", + "rArr;":"\u{21D2}","rarr;":"\u{2192}","rarrap;":"\u{2975}","rarrb;":"\u{21E5}", + "rarrbfs;":"\u{2920}","rarrc;":"\u{2933}","rarrfs;":"\u{291E}","rarrhk;":"\u{21AA}", + "rarrlp;":"\u{21AC}","rarrpl;":"\u{2945}","rarrsim;":"\u{2974}","Rarrtl;":"\u{2916}", + "rarrtl;":"\u{21A3}","rarrw;":"\u{219D}","rAtail;":"\u{291C}","ratail;":"\u{291A}", + "ratio;":"\u{2236}","rationals;":"\u{211A}","RBarr;":"\u{2910}","rBarr;":"\u{290F}", + "rbarr;":"\u{290D}","rbbrk;":"\u{2773}","rbrace;":"\u{7D}","rbrack;":"\u{5D}", + "rbrke;":"\u{298C}","rbrksld;":"\u{298E}","rbrkslu;":"\u{2990}","Rcaron;":"\u{158}", + "rcaron;":"\u{159}","Rcedil;":"\u{156}","rcedil;":"\u{157}","rceil;":"\u{2309}", + "rcub;":"\u{7D}","Rcy;":"\u{420}","rcy;":"\u{440}","rdca;":"\u{2937}", + "rdldhar;":"\u{2969}","rdquo;":"\u{201D}","rdquor;":"\u{201D}","rdsh;":"\u{21B3}", + "Re;":"\u{211C}","real;":"\u{211C}","realine;":"\u{211B}","realpart;":"\u{211C}", + "reals;":"\u{211D}","rect;":"\u{25AD}","REG;":"\u{AE}","reg;":"\u{AE}", + "ReverseElement;":"\u{220B}","ReverseEquilibrium;":"\u{21CB}","ReverseUpEquilibrium;":"\u{296F}","rfisht;":"\u{297D}", + "rfloor;":"\u{230B}","Rfr;":"\u{211C}","rfr;":"\u{1D52F}","rHar;":"\u{2964}", + "rhard;":"\u{21C1}","rharu;":"\u{21C0}","rharul;":"\u{296C}","Rho;":"\u{3A1}", + "rho;":"\u{3C1}","rhov;":"\u{3F1}","RightAngleBracket;":"\u{27E9}","RightArrow;":"\u{2192}", + "Rightarrow;":"\u{21D2}","rightarrow;":"\u{2192}","RightArrowBar;":"\u{21E5}","RightArrowLeftArrow;":"\u{21C4}", + "rightarrowtail;":"\u{21A3}","RightCeiling;":"\u{2309}","RightDoubleBracket;":"\u{27E7}","RightDownTeeVector;":"\u{295D}", + "RightDownVector;":"\u{21C2}","RightDownVectorBar;":"\u{2955}","RightFloor;":"\u{230B}","rightharpoondown;":"\u{21C1}", + "rightharpoonup;":"\u{21C0}","rightleftarrows;":"\u{21C4}","rightleftharpoons;":"\u{21CC}","rightrightarrows;":"\u{21C9}", + "rightsquigarrow;":"\u{219D}","RightTee;":"\u{22A2}","RightTeeArrow;":"\u{21A6}","RightTeeVector;":"\u{295B}", + "rightthreetimes;":"\u{22CC}","RightTriangle;":"\u{22B3}","RightTriangleBar;":"\u{29D0}","RightTriangleEqual;":"\u{22B5}", + "RightUpDownVector;":"\u{294F}","RightUpTeeVector;":"\u{295C}","RightUpVector;":"\u{21BE}","RightUpVectorBar;":"\u{2954}", + "RightVector;":"\u{21C0}","RightVectorBar;":"\u{2953}","ring;":"\u{2DA}","risingdotseq;":"\u{2253}", + "rlarr;":"\u{21C4}","rlhar;":"\u{21CC}","rlm;":"\u{200F}","rmoust;":"\u{23B1}", + "rmoustache;":"\u{23B1}","rnmid;":"\u{2AEE}","roang;":"\u{27ED}","roarr;":"\u{21FE}", + "robrk;":"\u{27E7}","ropar;":"\u{2986}","Ropf;":"\u{211D}","ropf;":"\u{1D563}", + "roplus;":"\u{2A2E}","rotimes;":"\u{2A35}","RoundImplies;":"\u{2970}","rpar;":"\u{29}", + "rpargt;":"\u{2994}","rppolint;":"\u{2A12}","rrarr;":"\u{21C9}","Rrightarrow;":"\u{21DB}", + "rsaquo;":"\u{203A}","Rscr;":"\u{211B}","rscr;":"\u{1D4C7}","Rsh;":"\u{21B1}", + "rsh;":"\u{21B1}","rsqb;":"\u{5D}","rsquo;":"\u{2019}","rsquor;":"\u{2019}", + "rthree;":"\u{22CC}","rtimes;":"\u{22CA}","rtri;":"\u{25B9}","rtrie;":"\u{22B5}", + "rtrif;":"\u{25B8}","rtriltri;":"\u{29CE}","RuleDelayed;":"\u{29F4}","ruluhar;":"\u{2968}", + "rx;":"\u{211E}","Sacute;":"\u{15A}","sacute;":"\u{15B}","sbquo;":"\u{201A}", + "Sc;":"\u{2ABC}","sc;":"\u{227B}","scap;":"\u{2AB8}","Scaron;":"\u{160}", + "scaron;":"\u{161}","sccue;":"\u{227D}","scE;":"\u{2AB4}","sce;":"\u{2AB0}", + "Scedil;":"\u{15E}","scedil;":"\u{15F}","Scirc;":"\u{15C}","scirc;":"\u{15D}", + "scnap;":"\u{2ABA}","scnE;":"\u{2AB6}","scnsim;":"\u{22E9}","scpolint;":"\u{2A13}", + "scsim;":"\u{227F}","Scy;":"\u{421}","scy;":"\u{441}","sdot;":"\u{22C5}", + "sdotb;":"\u{22A1}","sdote;":"\u{2A66}","searhk;":"\u{2925}","seArr;":"\u{21D8}", + "searr;":"\u{2198}","searrow;":"\u{2198}","sect;":"\u{A7}","semi;":"\u{3B}", + "seswar;":"\u{2929}","setminus;":"\u{2216}","setmn;":"\u{2216}","sext;":"\u{2736}", + "Sfr;":"\u{1D516}","sfr;":"\u{1D530}","sfrown;":"\u{2322}","sharp;":"\u{266F}", + "SHCHcy;":"\u{429}","shchcy;":"\u{449}","SHcy;":"\u{428}","shcy;":"\u{448}", + "ShortDownArrow;":"\u{2193}","ShortLeftArrow;":"\u{2190}","shortmid;":"\u{2223}","shortparallel;":"\u{2225}", + "ShortRightArrow;":"\u{2192}","ShortUpArrow;":"\u{2191}","shy;":"\u{AD}","Sigma;":"\u{3A3}", + "sigma;":"\u{3C3}","sigmaf;":"\u{3C2}","sigmav;":"\u{3C2}","sim;":"\u{223C}", + "simdot;":"\u{2A6A}","sime;":"\u{2243}","simeq;":"\u{2243}","simg;":"\u{2A9E}", + "simgE;":"\u{2AA0}","siml;":"\u{2A9D}","simlE;":"\u{2A9F}","simne;":"\u{2246}", + "simplus;":"\u{2A24}","simrarr;":"\u{2972}","slarr;":"\u{2190}","SmallCircle;":"\u{2218}", + "smallsetminus;":"\u{2216}","smashp;":"\u{2A33}","smeparsl;":"\u{29E4}","smid;":"\u{2223}", + "smile;":"\u{2323}","smt;":"\u{2AAA}","smte;":"\u{2AAC}","smtes;":"\u{2AAC}\u{FE00}", + "SOFTcy;":"\u{42C}","softcy;":"\u{44C}","sol;":"\u{2F}","solb;":"\u{29C4}", + "solbar;":"\u{233F}","Sopf;":"\u{1D54A}","sopf;":"\u{1D564}","spades;":"\u{2660}", + "spadesuit;":"\u{2660}","spar;":"\u{2225}","sqcap;":"\u{2293}","sqcaps;":"\u{2293}\u{FE00}", + "sqcup;":"\u{2294}","sqcups;":"\u{2294}\u{FE00}","Sqrt;":"\u{221A}","sqsub;":"\u{228F}", + "sqsube;":"\u{2291}","sqsubset;":"\u{228F}","sqsubseteq;":"\u{2291}","sqsup;":"\u{2290}", + "sqsupe;":"\u{2292}","sqsupset;":"\u{2290}","sqsupseteq;":"\u{2292}","squ;":"\u{25A1}", + "Square;":"\u{25A1}","square;":"\u{25A1}","SquareIntersection;":"\u{2293}","SquareSubset;":"\u{228F}", + "SquareSubsetEqual;":"\u{2291}","SquareSuperset;":"\u{2290}","SquareSupersetEqual;":"\u{2292}","SquareUnion;":"\u{2294}", + "squarf;":"\u{25AA}","squf;":"\u{25AA}","srarr;":"\u{2192}","Sscr;":"\u{1D4AE}", + "sscr;":"\u{1D4C8}","ssetmn;":"\u{2216}","ssmile;":"\u{2323}","sstarf;":"\u{22C6}", + "Star;":"\u{22C6}","star;":"\u{2606}","starf;":"\u{2605}","straightepsilon;":"\u{3F5}", + "straightphi;":"\u{3D5}","strns;":"\u{AF}","Sub;":"\u{22D0}","sub;":"\u{2282}", + "subdot;":"\u{2ABD}","subE;":"\u{2AC5}","sube;":"\u{2286}","subedot;":"\u{2AC3}", + "submult;":"\u{2AC1}","subnE;":"\u{2ACB}","subne;":"\u{228A}","subplus;":"\u{2ABF}", + "subrarr;":"\u{2979}","Subset;":"\u{22D0}","subset;":"\u{2282}","subseteq;":"\u{2286}", + "subseteqq;":"\u{2AC5}","SubsetEqual;":"\u{2286}","subsetneq;":"\u{228A}","subsetneqq;":"\u{2ACB}", + "subsim;":"\u{2AC7}","subsub;":"\u{2AD5}","subsup;":"\u{2AD3}","succ;":"\u{227B}", + "succapprox;":"\u{2AB8}","succcurlyeq;":"\u{227D}","Succeeds;":"\u{227B}","SucceedsEqual;":"\u{2AB0}", + "SucceedsSlantEqual;":"\u{227D}","SucceedsTilde;":"\u{227F}","succeq;":"\u{2AB0}","succnapprox;":"\u{2ABA}", + "succneqq;":"\u{2AB6}","succnsim;":"\u{22E9}","succsim;":"\u{227F}","SuchThat;":"\u{220B}", + "Sum;":"\u{2211}","sum;":"\u{2211}","sung;":"\u{266A}","Sup;":"\u{22D1}", + "sup;":"\u{2283}","sup1;":"\u{B9}","sup2;":"\u{B2}","sup3;":"\u{B3}", + "supdot;":"\u{2ABE}","supdsub;":"\u{2AD8}","supE;":"\u{2AC6}","supe;":"\u{2287}", + "supedot;":"\u{2AC4}","Superset;":"\u{2283}","SupersetEqual;":"\u{2287}","suphsol;":"\u{27C9}", + "suphsub;":"\u{2AD7}","suplarr;":"\u{297B}","supmult;":"\u{2AC2}","supnE;":"\u{2ACC}", + "supne;":"\u{228B}","supplus;":"\u{2AC0}","Supset;":"\u{22D1}","supset;":"\u{2283}", + "supseteq;":"\u{2287}","supseteqq;":"\u{2AC6}","supsetneq;":"\u{228B}","supsetneqq;":"\u{2ACC}", + "supsim;":"\u{2AC8}","supsub;":"\u{2AD4}","supsup;":"\u{2AD6}","swarhk;":"\u{2926}", + "swArr;":"\u{21D9}","swarr;":"\u{2199}","swarrow;":"\u{2199}","swnwar;":"\u{292A}", + "szlig;":"\u{DF}","Tab;":"\u{9}","target;":"\u{2316}","Tau;":"\u{3A4}", + "tau;":"\u{3C4}","tbrk;":"\u{23B4}","Tcaron;":"\u{164}","tcaron;":"\u{165}", + "Tcedil;":"\u{162}","tcedil;":"\u{163}","Tcy;":"\u{422}","tcy;":"\u{442}", + "tdot;":"\u{20DB}","telrec;":"\u{2315}","Tfr;":"\u{1D517}","tfr;":"\u{1D531}", + "there4;":"\u{2234}","Therefore;":"\u{2234}","therefore;":"\u{2234}","Theta;":"\u{398}", + "theta;":"\u{3B8}","thetasym;":"\u{3D1}","thetav;":"\u{3D1}","thickapprox;":"\u{2248}", + "thicksim;":"\u{223C}", + // "ThickSpace;":"\u{205F}\u{200A}", + "thinsp;":"\u{2009}","ThinSpace;":"\u{2009}","thkap;":"\u{2248}","thksim;":"\u{223C}", + "THORN;":"\u{DE}","thorn;":"\u{FE}","Tilde;":"\u{223C}","tilde;":"\u{2DC}", + "TildeEqual;":"\u{2243}","TildeFullEqual;":"\u{2245}","TildeTilde;":"\u{2248}","times;":"\u{D7}", + "timesb;":"\u{22A0}","timesbar;":"\u{2A31}","timesd;":"\u{2A30}","tint;":"\u{222D}", + "toea;":"\u{2928}","top;":"\u{22A4}","topbot;":"\u{2336}","topcir;":"\u{2AF1}", + "Topf;":"\u{1D54B}","topf;":"\u{1D565}","topfork;":"\u{2ADA}","tosa;":"\u{2929}", + "tprime;":"\u{2034}","TRADE;":"\u{2122}","trade;":"\u{2122}","triangle;":"\u{25B5}", + "triangledown;":"\u{25BF}","triangleleft;":"\u{25C3}","trianglelefteq;":"\u{22B4}","triangleq;":"\u{225C}", + "triangleright;":"\u{25B9}","trianglerighteq;":"\u{22B5}","tridot;":"\u{25EC}","trie;":"\u{225C}", + "triminus;":"\u{2A3A}","TripleDot;":"\u{20DB}","triplus;":"\u{2A39}","trisb;":"\u{29CD}", + "tritime;":"\u{2A3B}","trpezium;":"\u{23E2}","Tscr;":"\u{1D4AF}","tscr;":"\u{1D4C9}", + "TScy;":"\u{426}","tscy;":"\u{446}","TSHcy;":"\u{40B}","tshcy;":"\u{45B}", + "Tstrok;":"\u{166}","tstrok;":"\u{167}","twixt;":"\u{226C}","twoheadleftarrow;":"\u{219E}", + "twoheadrightarrow;":"\u{21A0}","Uacute;":"\u{DA}","uacute;":"\u{FA}","Uarr;":"\u{219F}", + "uArr;":"\u{21D1}","uarr;":"\u{2191}","Uarrocir;":"\u{2949}","Ubrcy;":"\u{40E}", + "ubrcy;":"\u{45E}","Ubreve;":"\u{16C}","ubreve;":"\u{16D}","Ucirc;":"\u{DB}", + "ucirc;":"\u{FB}","Ucy;":"\u{423}","ucy;":"\u{443}","udarr;":"\u{21C5}", + "Udblac;":"\u{170}","udblac;":"\u{171}","udhar;":"\u{296E}","ufisht;":"\u{297E}", + "Ufr;":"\u{1D518}","ufr;":"\u{1D532}","Ugrave;":"\u{D9}","ugrave;":"\u{F9}", + "uHar;":"\u{2963}","uharl;":"\u{21BF}","uharr;":"\u{21BE}","uhblk;":"\u{2580}", + "ulcorn;":"\u{231C}","ulcorner;":"\u{231C}","ulcrop;":"\u{230F}","ultri;":"\u{25F8}", + "Umacr;":"\u{16A}","umacr;":"\u{16B}","uml;":"\u{A8}","UnderBar;":"\u{5F}", + "UnderBrace;":"\u{23DF}","UnderBracket;":"\u{23B5}","UnderParenthesis;":"\u{23DD}","Union;":"\u{22C3}", + "UnionPlus;":"\u{228E}","Uogon;":"\u{172}","uogon;":"\u{173}","Uopf;":"\u{1D54C}", + "uopf;":"\u{1D566}","UpArrow;":"\u{2191}","Uparrow;":"\u{21D1}","uparrow;":"\u{2191}", + "UpArrowBar;":"\u{2912}","UpArrowDownArrow;":"\u{21C5}","UpDownArrow;":"\u{2195}","Updownarrow;":"\u{21D5}", + "updownarrow;":"\u{2195}","UpEquilibrium;":"\u{296E}","upharpoonleft;":"\u{21BF}","upharpoonright;":"\u{21BE}", + "uplus;":"\u{228E}","UpperLeftArrow;":"\u{2196}","UpperRightArrow;":"\u{2197}","Upsi;":"\u{3D2}", + "upsi;":"\u{3C5}","upsih;":"\u{3D2}","Upsilon;":"\u{3A5}","upsilon;":"\u{3C5}", + "UpTee;":"\u{22A5}","UpTeeArrow;":"\u{21A5}","upuparrows;":"\u{21C8}","urcorn;":"\u{231D}", + "urcorner;":"\u{231D}","urcrop;":"\u{230E}","Uring;":"\u{16E}","uring;":"\u{16F}", + "urtri;":"\u{25F9}","Uscr;":"\u{1D4B0}","uscr;":"\u{1D4CA}","utdot;":"\u{22F0}", + "Utilde;":"\u{168}","utilde;":"\u{169}","utri;":"\u{25B5}","utrif;":"\u{25B4}", + "uuarr;":"\u{21C8}","Uuml;":"\u{DC}","uuml;":"\u{FC}","uwangle;":"\u{29A7}", + "vangrt;":"\u{299C}","varepsilon;":"\u{3F5}","varkappa;":"\u{3F0}","varnothing;":"\u{2205}", + "varphi;":"\u{3D5}","varpi;":"\u{3D6}","varpropto;":"\u{221D}","vArr;":"\u{21D5}", + "varr;":"\u{2195}","varrho;":"\u{3F1}","varsigma;":"\u{3C2}","varsubsetneq;":"\u{228A}\u{FE00}", + "varsubsetneqq;":"\u{2ACB}\u{FE00}","varsupsetneq;":"\u{228B}\u{FE00}","varsupsetneqq;":"\u{2ACC}\u{FE00}","vartheta;":"\u{3D1}", + "vartriangleleft;":"\u{22B2}","vartriangleright;":"\u{22B3}","Vbar;":"\u{2AEB}","vBar;":"\u{2AE8}", + "vBarv;":"\u{2AE9}","Vcy;":"\u{412}","vcy;":"\u{432}","VDash;":"\u{22AB}", + "Vdash;":"\u{22A9}","vDash;":"\u{22A8}","vdash;":"\u{22A2}","Vdashl;":"\u{2AE6}", + "Vee;":"\u{22C1}","vee;":"\u{2228}","veebar;":"\u{22BB}","veeeq;":"\u{225A}", + "vellip;":"\u{22EE}","Verbar;":"\u{2016}","verbar;":"\u{7C}","Vert;":"\u{2016}", + "vert;":"\u{7C}","VerticalBar;":"\u{2223}","VerticalLine;":"\u{7C}","VerticalSeparator;":"\u{2758}", + "VerticalTilde;":"\u{2240}","VeryThinSpace;":"\u{200A}","Vfr;":"\u{1D519}","vfr;":"\u{1D533}", + "vltri;":"\u{22B2}","vnsub;":"\u{2282}\u{20D2}","vnsup;":"\u{2283}\u{20D2}","Vopf;":"\u{1D54D}", + "vopf;":"\u{1D567}","vprop;":"\u{221D}","vrtri;":"\u{22B3}","Vscr;":"\u{1D4B1}", + "vscr;":"\u{1D4CB}","vsubnE;":"\u{2ACB}\u{FE00}","vsubne;":"\u{228A}\u{FE00}","vsupnE;":"\u{2ACC}\u{FE00}", + "vsupne;":"\u{228B}\u{FE00}","Vvdash;":"\u{22AA}","vzigzag;":"\u{299A}","Wcirc;":"\u{174}", + "wcirc;":"\u{175}","wedbar;":"\u{2A5F}","Wedge;":"\u{22C0}","wedge;":"\u{2227}", + "wedgeq;":"\u{2259}","weierp;":"\u{2118}","Wfr;":"\u{1D51A}","wfr;":"\u{1D534}", + "Wopf;":"\u{1D54E}","wopf;":"\u{1D568}","wp;":"\u{2118}","wr;":"\u{2240}", + "wreath;":"\u{2240}","Wscr;":"\u{1D4B2}","wscr;":"\u{1D4CC}","xcap;":"\u{22C2}", + "xcirc;":"\u{25EF}","xcup;":"\u{22C3}","xdtri;":"\u{25BD}","Xfr;":"\u{1D51B}", + "xfr;":"\u{1D535}","xhArr;":"\u{27FA}","xharr;":"\u{27F7}","Xi;":"\u{39E}", + "xi;":"\u{3BE}","xlArr;":"\u{27F8}","xlarr;":"\u{27F5}","xmap;":"\u{27FC}", + "xnis;":"\u{22FB}","xodot;":"\u{2A00}","Xopf;":"\u{1D54F}","xopf;":"\u{1D569}", + "xoplus;":"\u{2A01}","xotime;":"\u{2A02}","xrArr;":"\u{27F9}","xrarr;":"\u{27F6}", + "Xscr;":"\u{1D4B3}","xscr;":"\u{1D4CD}","xsqcup;":"\u{2A06}","xuplus;":"\u{2A04}", + "xutri;":"\u{25B3}","xvee;":"\u{22C1}","xwedge;":"\u{22C0}","Yacute;":"\u{DD}", + "yacute;":"\u{FD}","YAcy;":"\u{42F}","yacy;":"\u{44F}","Ycirc;":"\u{176}", + "ycirc;":"\u{177}","Ycy;":"\u{42B}","ycy;":"\u{44B}","yen;":"\u{A5}", + "Yfr;":"\u{1D51C}","yfr;":"\u{1D536}","YIcy;":"\u{407}","yicy;":"\u{457}", + "Yopf;":"\u{1D550}","yopf;":"\u{1D56A}","Yscr;":"\u{1D4B4}","yscr;":"\u{1D4CE}", + "YUcy;":"\u{42E}","yucy;":"\u{44E}","Yuml;":"\u{178}","yuml;":"\u{FF}", + "Zacute;":"\u{179}","zacute;":"\u{17A}","Zcaron;":"\u{17D}","zcaron;":"\u{17E}", + "Zcy;":"\u{417}","zcy;":"\u{437}","Zdot;":"\u{17B}","zdot;":"\u{17C}", + "zeetrf;":"\u{2128}","ZeroWidthSpace;":"\u{200B}","Zeta;":"\u{396}","zeta;":"\u{3B6}", + "Zfr;":"\u{2128}","zfr;":"\u{1D537}","ZHcy;":"\u{416}","zhcy;":"\u{436}", + "zigrarr;":"\u{21DD}","Zopf;":"\u{2124}","zopf;":"\u{1D56B}","Zscr;":"\u{1D4B5}", + "zscr;":"\u{1D4CF}","zwj;":"\u{200D}","zwnj;":"\u{200C}" ] diff --git a/Sources/HTMLEntities/ParseError.swift b/Sources/HTMLEntities/ParseError.swift new file mode 100644 index 0000000..4bc2159 --- /dev/null +++ b/Sources/HTMLEntities/ParseError.swift @@ -0,0 +1,25 @@ +/* + * Copyright IBM Corporation 2016 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public enum ParseError: Error { + case DeprecatedNumericReference(String) + case DisallowedNumericReference(String) + case IllegalArgument(String) + case InvalidNamedReference(String) + case MalformedNumericReference(String) + case MissingSemicolon(String) + case OutsideValidUnicodeRange(String) +} diff --git a/Sources/HTMLEntities/String+HTMLEntities.swift b/Sources/HTMLEntities/String+HTMLEntities.swift index 537b140..aaeda46 100644 --- a/Sources/HTMLEntities/String+HTMLEntities.swift +++ b/Sources/HTMLEntities/String+HTMLEntities.swift @@ -26,266 +26,421 @@ public extension String { /// *Optional*. Defaults to `false`. /// - parameter useNamedReferences: Specifies if named character references /// should be used whenever possible. *Optional*. Defaults to `true`. - public func htmlEscape(decimal: Bool = false, useNamedReferences: Bool = true) - -> String { - let unicodes = self.unicodeScalars - + public func htmlEscape(decimal: Bool = false, + useNamedReferences: Bool = true) -> String { // result buffer var str: String = "" - // indices for substringing and iterating - var leftIndex = unicodes.startIndex - var currentIndex = leftIndex - - while (currentIndex < unicodes.endIndex) { - let nextIndex = unicodes.index(after: currentIndex) - let unicode = unicodes[currentIndex].value - - if useNamedReferences, - let entity = html4NamedCharactersEncodeMap[unicode] { - // move unbuffered characters over to the result buffer - str.append(String(unicodes[leftIndex..alert(\"abc\")"` /// - parameter strict: Specifies if escapes MUST always end with `;`. - /// *Optional*. Defaults to true. - public func htmlUnescape(strict: Bool = true) -> String { - let unicodes = self.unicodeScalars - + /// - throws: An error of type `ParseError` + public func htmlUnescape(strict: Bool) throws -> String { // result buffer - var str: String? = nil + var str = "" - // entity buffer - // use optional string since there are issues on Linux when checking - // again empty string, i.e., "\u{200C}" == "" is true; "\u{200C}" is - // the named character ‌ - var entity: String = "" + // entity buffers + var entityPrefix = "" + var entity = "" // current parse state var state = EntityParseState.Invalid - // indices for substringing and iterating - var leftIndex = unicodes.startIndex - var currentIndex = leftIndex - var ampersandIndex = unicodes.endIndex - - // closure for resetting parse state to its original state - let reset = { - entity = "" - state = .Invalid - ampersandIndex = unicodes.endIndex - } - - while (currentIndex < unicodes.endIndex) { - var nextIndex = unicodes.index(after: currentIndex) - let unicode = unicodes[currentIndex].value + for u in self.unicodeScalars { + let unicodeAsString = String(u) + let unicode = u.value // nondeterminstic finite automaton for parsing entity - // NOTE: While all entities begin with &, - // not all HTML5 named character references end with ;, - // nor do numeric entities, hex or dec, have to end with ; switch state { case .Invalid: if unicode.isAmpersand { - // start of a possible entity of unknown type + // start of a possible character reference state = .Unknown - ampersandIndex = currentIndex + entityPrefix = unicodeAsString + } + else { + // move unicode to result buffer + str += unicodeAsString } case .Unknown: - // parsed an & unicode - // need to determine type of entity - if unicode.isHash { - // entity can only be a number type + // previously parsed & + // need to determine type of character reference + if unicode.isAmpersand { + // parsed & again + // move previous & to result buffer + str += unicodeAsString + } + else if unicode.isHash { + // numeric character reference state = .Number + entityPrefix += unicodeAsString } else if unicode.isAlphaNumeric { - // entity can only be named character reference type + // named character reference state = .Named - // walk back one unicode - nextIndex = currentIndex - } - else if unicode.isAmpersand { - // parsed & again, ignore the previous one - ampersandIndex = currentIndex + // move current unicode to entity buffer + entity += unicodeAsString } else { - // false alarm, not an entity; reset state - reset() + // false alarm, not a character reference + // move back to invalid state + entityPrefix = "" + state = .Invalid + + // move the consumed & and current unicode to result buffer + str += entityPrefix + unicodeAsString } case .Number: - // parsed a # unicode + // previously parsed &# // need to determine dec or hex - if unicode.isX { - // entity can only be hexadecimal type + if unicode.isAmpersand { + // parsed & again + if strict { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "If no characters match the range, then don't consume any characters + // (and unconsume the U+0023 NUMBER SIGN character and, if appropriate, + // the X character). This is a parse error; nothing is returned." + throw ParseError.MalformedNumericReference(entityPrefix + unicodeAsString) + } + + // move the consume &# to result buffer + str += entityPrefix + + // move to unknown state + state = .Unknown + entityPrefix = unicodeAsString + } + else if unicode.isX { + // hexadecimal numeric character reference state = .Hex + entityPrefix += unicodeAsString } else if unicode.isNumeral { - // entity can only be decimal type + // decimal numeric character reference state = .Dec - - // walk back one unicode - nextIndex = currentIndex - } - else if unicode.isAmpersand { - // parsed & again, ignore the previous one - state = .Unknown - ampersandIndex = currentIndex + entity += unicodeAsString } else { - // false alarm, not an entity; reset state - reset() - } - case .Dec, .Hex, .Named: - if unicode.isAmpersand { - // parsed & again, ignore the previous one - state = .Unknown - ampersandIndex = currentIndex - entity = "" + // false alarm, not a character reference + if strict { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "If no characters match the range, then don't consume any characters + // (and unconsume the U+0023 NUMBER SIGN character and, if appropriate, + // the X character). This is a parse error; nothing is returned." + throw ParseError.MalformedNumericReference(entityPrefix + unicodeAsString) + } + + // move the consumed &# and current unicode to result buffer + str += entityPrefix + unicodeAsString - break + // move to invalid state + state = .Invalid + entityPrefix = "" + entity = "" + } + case .Dec, .Hex: + // previously parsed &#[0-9]+ or &#[xX][0-9A-Fa-f]* + if state == .Dec && unicode.isNumeral || state == .Hex && unicode.isHexNumeral { + // greedy matching + // consume as many valid characters as possible before unescaping + entity += unicodeAsString } + else { + // current character is not in matching range + if strict { + if entity == "" { + // no characters matching range was parsed + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "If no characters match the range, then don't consume any characters + // (and unconsume the U+0023 NUMBER SIGN character and, if appropriate, + // the X character). This is a parse error; nothing is returned." + throw ParseError.MalformedNumericReference(entityPrefix + unicodeAsString) + } - // lookahead one unicode to help decide next action - let lookahead: UInt32? = nextIndex == unicodes.endIndex - ? nil : unicodes[nextIndex].value + if !unicode.isSemicolon { + // entity did not end with ; + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "[I]f the next character is a U+003B SEMICOLON, consume that too. + // If it isn't, there is a parse error." + throw ParseError.MissingSemicolon(entityPrefix + entity) + } + } - var isEndOfEntity = false + let unescaped = try decode(entity: entity, entityPrefix: entityPrefix, strict: strict) - if unicode.isValidEntityUnicode(for: state) { - // buffer current unicode - entity.append(String(unicodes[currentIndex])) + // append unescaped numeric reference to result buffer + str += unescaped - if let lookahead = lookahead { - // lookahead is not empty - isEndOfEntity = lookahead.isSemicolon - || !strict && !lookahead.isValidEntityUnicode(for: state) + if unicode.isAmpersand { + // parsed & again + // move to unknown state + state = .Unknown + entityPrefix = unicodeAsString + entity = "" } else { - // lookahead is empty - isEndOfEntity = !strict + if !unicode.isSemicolon { + // move current unicode to result buffer + str += unicodeAsString + } + + // move back to invalid state + state = .Invalid + entityPrefix = "" + entity = "" } } - else { - // strict parsing, but encountered something - // other than ; or hexadecimal numeral - reset() + case .Named: + // previously parsed &[0-9A-Za-z]+ + if unicode.isAlphaNumeric { + // keep consuming alphanumeric unicodes + // only try to decode it when we encounter a nonalphanumeric unicode + entity += unicodeAsString } - - if isEndOfEntity { - if let lookahead = lookahead, lookahead.isSemicolon { - // consume the ; by moving nextIndex by one so that - // nextIndex is pointing to the unicode after the ; - nextIndex = unicodes.index(after: nextIndex) - - if state == .Named { - entity.append(";") - } + else { + if unicode.isSemicolon { + entity += unicodeAsString } - var code: UInt32? = nil + // try to decode parsed chunk of alphanumeric unicodes + let unescaped = try decode(entity: entity, entityPrefix: entityPrefix, strict: strict) - switch state { - case .Dec, .Hex: - let radix = state == .Dec ? 10 : 16 + str += unescaped - code = UInt32(entity, radix: radix) + if unicode.isAmpersand { + // parsed & again + // move to unknown state + state = .Unknown + entityPrefix = unicodeAsString + entity = "" - if let c = code { - if c.isReplacementCharacterEquivalent { - code = replacementCharacterAsUInt32 - } - else { - code = htmlSpecialNumericDecodeMap[c] ?? code - } - } - else { - // code is invalid anyway, let's replace it with 0xFFFD - code = replacementCharacterAsUInt32 - } - case .Named: - code = html4NamedCharactersDecodeMap["&" + entity] - default: break } + else if !unicode.isSemicolon { + // move current unicode to result buffer + str += unicodeAsString + } + + // move back to invalid state + state = .Invalid + entityPrefix = "" + entity = "" + } + } + } - if let code = code, - let unicodeScalar = UnicodeScalar(code) { - // reached end of entity - // move unbuffered unicodes over to the result buffer - str = str == nil ? "" : str + // one more round of finite automaton to catch the edge case where the original string + // ends with a character reference that isn't terminated by ; + switch state { + case .Dec, .Hex: + // parsed a partial numeric character reference + if strict { + if entity == "" { + // no characters matching range was parsed + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "If no characters match the range, then don't consume any characters + // (and unconsume the U+0023 NUMBER SIGN character and, if appropriate, + // the X character). This is a parse error; nothing is returned." + throw ParseError.MalformedNumericReference(entityPrefix) + } - str?.append(String(unicodes[leftIndex..alert(\"abc\")"` + /// Equivalent to `htmlUnescape(strict: false)`, but does not throw parse errors. + public func htmlUnescape() -> String { + // non-strict mode should never throw error + return try! self.htmlUnescape(strict: false) + } +} - // even if entity wasn't unescapable, reset since it is - // end of entity - reset() +private func decode(entity: String, entityPrefix: String, strict: Bool) throws -> String { + switch entityPrefix { + case "&#", "&#x", "&#X": + // numeric character reference + let radix = entityPrefix == "&#" ? 10 : 16 + + if strict && entity == "" { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "If no characters match the range, then don't consume any characters + // (and unconsume the U+0023 NUMBER SIGN character and, if appropriate, + // the X character). This is a parse error; nothing is returned." + throw ParseError.MalformedNumericReference(entityPrefix) + } + else if var code = UInt32(entity, radix: radix) { + if code.isReplacementCharacterEquivalent { + code = replacementCharacterAsUInt32 + + if strict { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "[I]f the number is in the range 0xD800 to 0xDFFF or is greater + // than 0x10FFFF, then this is a parse error." + throw ParseError.OutsideValidUnicodeRange(entityPrefix + entity) + } + } + else if let c = deprecatedNumericDecodeMap[code] { + code = c + + if strict { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "If that number is one of the numbers in the first column of the + // following table, then this is a parse error." + throw ParseError.DeprecatedNumericReference(entityPrefix + entity) } } + else if strict && code.isDisallowedReference { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "[I]f the number is in the range 0x0001 to 0x0008, 0x000D to 0x001F, 0x007F + // to 0x009F, 0xFDD0 to 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, + // 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + // 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, + // 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + // 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, or 0x10FFFF, then + // this is a parse error." + throw ParseError.DisallowedNumericReference(entityPrefix + entity) + } + + return String(UnicodeScalar(code)!) + } + else { + // Assume entity is nonempty and only contains valid characters for the given type + // of numeric character reference. Given this assumption, at this point in the code + // the numeric character reference must be greater than `UInt32.max`, i.e., it is + // not representable by UInt32 (and it is, by transitivity, greater than 0x10FFFF); + // therefore, the numeric character reference should be replaced by U+FFFD + if strict { + // https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references + // "[I]f the number is in the range 0xD800 to 0xDFFF or is greater + // than 0x10FFFF, then this is a parse error." + throw ParseError.OutsideValidUnicodeRange(entityPrefix + entity) + } + + return String(UnicodeScalar(replacementCharacterAsUInt32)!) + } + case "&": + // named character reference + if entity == "" { + return entityPrefix + } + + if entity.hasSuffix(";") { + // Step 1: check all other named characters first + // Assume special case is rare, always check regular case first to minimize + // search time cost amortization + if let c = namedCharactersDecodeMap[entity] { + return String(c) + } - // move currentIndex to the position of the next unicode to be consumed - currentIndex = nextIndex + // Step 2: check special named characters if entity didn't match any regular + // named character references + if let s = specialNamedCharactersDecodeMap[entity] { + return s + } } - if var str = str { - // append rest of string to result buffer - str.append(String(unicodes[leftIndex.. -> -/// Note: Does not check for uniqueness among values -func invert(_ dict: [K: V]) -> [V: K] { - var inverseDict: [V: K] = [:] +extension Dictionary { + /// Union of two dictionaries + /// Note: The in the argument will override + /// the current dictionary's if the keys match + func updating(_ dict: [Key: Value]) -> [Key: Value] { + var newDict = self + + for (key, value) in dict { + newDict[key] = value + } - for (key, value) in dict { - inverseDict[value] = key + return newDict } +} + +extension Dictionary where Value: Hashable { + /// Invert a dictionary: -> + /// Note: Does not check for uniqueness among values + func inverting(_ pick: (Key, Key) -> Key = { existingValue, newValue in + return newValue + }) -> [Value: Key] { + var inverseDict: [Value: Key] = [:] + + for (key, value) in self { + if let existing = inverseDict[value] { + inverseDict[value] = pick(existing, key) + } + else { + inverseDict[value] = key + } + } - return inverseDict + return inverseDict + } } extension UInt32 { var isAlphaNumeric: Bool { - // ASCII values of [0-9], [A-Z0, [and [a-z] - return self.isNumeral || 65...90 ~= self || 97...122 ~= self + // unicode values of [0-9], [A-Z], and [a-z] + return self.isNumeral || 0x41...0x5A ~= self || 0x61...0x7A ~= self } var isAmpersand: Bool { - // ASCII value of & - return self == 38 + // unicode value of & + return self == 0x26 } var isASCII: Bool { - // Less than 2^7 - return self < 128 + // Less than 0x80 + return self < 0x80 } /// https://www.w3.org/International/questions/qa-escapes#use var isAttributeSyntax: Bool { - // ASCII values of [", '] - return self == 34 || self == 39 + // unicode values of [", '] + return self == 0x22 || self == 0x27 + } + + var isDisallowedReference: Bool { + // unicode values of [0x1-0x8], [0xD-0x1F], [0x7F-0x9F], [0xFDD0-0xFDEF], 0xB, 0xFFFE, + // 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + // 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, + // 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, + // 0xFFFFF, 0x10FFFE, and 0x10FFFF + // return disallowedNumericReferences[self] ?? false + + return 0x1...0x8 ~= self || 0xD...0x1F ~= self || 0xFDD0...0xFDEF ~= self || self == 0xB + || self == 0xFFFE || self == 0xFFFF || self == 0x1FFFE || self == 0x1FFFF + || self == 0x2FFFE || self == 0x2FFFF || self == 0x3FFFE || self == 0x3FFFF + || self == 0x4FFFE || self == 0x4FFFF || self == 0x5FFFE || self == 0x5FFFF + || self == 0x6FFFE || self == 0x6FFFF || self == 0x7FFFE || self == 0x7FFFF + || self == 0x8FFFE || self == 0x8FFFF || self == 0x9FFFE || self == 0x9FFFF + || self == 0xAFFFE || self == 0xAFFFF || self == 0xBFFFE || self == 0xBFFFF + || self == 0xCFFFE || self == 0xCFFFF || self == 0xDFFFE || self == 0xDFFFF + || self == 0xEFFFE || self == 0xEFFFF || self == 0xFFFFE || self == 0xFFFFF + || self == 0x10FFFE || self == 0x10FFFF } var isHash: Bool { - // ASCII value of # - return self == 35 + // unicode value of # + return self == 0x23 } var isHexNumeral: Bool { - // ASCII values of [0-9], [A-F], and [a-f] - return isNumeral || 65...70 ~= self || 97...102 ~= self + // unicode values of [0-9], [A-F], and [a-f] + return isNumeral || 0x41...0x46 ~= self || 0x61...0x66 ~= self + } + + var isNonprinting: Bool { + // unicode values of [NUL-US] and DEL non-printing characters + return 0x0...0x1F ~= self || self == 0x7F } var isNumeral: Bool { - // ASCII values of [0-9] - return 48...57 ~= self + // unicode values of [0-9] + return 0x30...0x39 ~= self } /// https://www.w3.org/TR/html5/syntax.html#tokenizing-character-references var isReplacementCharacterEquivalent: Bool { - // UTF32 values of [0xD800-0xDFFF], (0x10FFFF-∞] - return 55296...57343 ~= self || 1114111 < self + // UInt32 values of [0xD800-0xDFFF], (0x10FFFF-∞] + return 0xD800...0xDFFF ~= self || 0x10FFFF < self + } + + var isSafeASCII: Bool { + return self.isASCII && !self.isNonprinting && !self.isAttributeSyntax && !self.isTagSyntax } var isSemicolon: Bool { - // ASCII value of ; - return self == 59 + // unicode value of ; + return self == 0x3B } /// https://www.w3.org/International/questions/qa-escapes#use var isTagSyntax: Bool { - // ASCII values of [&, < , >] - return self == 38 || self == 60 || self == 62 + // unicode values of [&, < , >] + return self.isAmpersand || self == 0x3C || self == 0x3E } var isX: Bool { - // ASCII values of X and x - return self == 88 || self == 120 + // unicode values of X and x + return self == 0x58 || self == 0x78 } func isValidEntityUnicode(for state: EntityParseState) -> Bool { diff --git a/Tests/HTMLEntitiesTests/HTMLEntitiesTest.swift b/Tests/HTMLEntitiesTests/HTMLEntitiesTest.swift index 82136b3..d4ab9d8 100644 --- a/Tests/HTMLEntitiesTests/HTMLEntitiesTest.swift +++ b/Tests/HTMLEntitiesTests/HTMLEntitiesTest.swift @@ -14,9 +14,6 @@ * limitations under the License. */ -/// Generated from the list of HTML4 entities here: -/// https://www.w3.org/TR/html4/sgml/entities.html - import XCTest @testable import HTMLEntities @@ -27,121 +24,336 @@ let str1Unescaped = "" let str1Escaped = "<script>alert("abc")</script>" /// Extended grapheme clusters with combined unicode scalars -let str2Unescaped = "한, 한, é, é, 🇺🇸" -let str2Escaped = "한, 한, é, é, 🇺🇸" +let str2Unescaped = "한, 한, ế, ế, 🇺🇸" +let str2Escaped = "한, 한, ế, ế, 🇺🇸" /// Text with non-ASCII characters let str3Unescaped = "Jako efektivnější se nám jeví pořádání tzv. Road Show prostřednictvím našich autorizovaných dealerů v Čechách a na Moravě, které proběhnou v průběhu září a října." -let str3Escaped = "Jako efektivnější se nám jeví pořádání tzv. Road Show prostřednictvím našich autorizovaných dealerů v Čechách a na Moravě, které proběhnou v průběhu září a října." +let str3Escaped = "Jako efektivnější se nám jeví pořádání tzv. Road Show prostřednictvím našich autorizovaných dealerů v Čechách a na Moravě, které proběhnou v průběhu září a října." class HTMLEntitiesTests: XCTestCase { func testNamedCharacterReferences() { - XCTAssertEqual(html4NamedCharactersDecodeMap.count, html4NamedCharactersEncodeMap.count) +#if os(Linux) + XCTAssertEqual(namedCharactersEncodeMap.count, 1367) +#else + XCTAssertEqual(namedCharactersEncodeMap.count, 1509) +#endif - for (reference, unicode) in html4NamedCharactersDecodeMap { - let unescaped = String(UnicodeScalar(unicode)!) - let escaped = reference + XCTAssertEqual(specialNamedCharactersDecodeMap.count, 2) + XCTAssertEqual(legacyNamedCharactersDecodeMap.count, 106) + XCTAssertEqual(namedCharactersDecodeMap.count, 2123) + + // make sure regular named character references can be escaped/unescaped + for (character, reference) in namedCharactersEncodeMap { + let unescaped = String(character) + let escaped = "&" + reference + + XCTAssertEqual(try escaped.htmlUnescape(strict: true), unescaped) + } + + // make sure legacy named character references can be unescaped in nonstrict mode, + // and that the correct ParseError is thrown in strict mode + for (reference, character) in legacyNamedCharactersDecodeMap { + let unescaped = String(character) + let escaped = "&" + reference - XCTAssertEqual(unescaped.htmlEscape(), escaped) XCTAssertEqual(escaped.htmlUnescape(), unescaped) + + do { + _ = try escaped.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.MissingSemicolon { + XCTAssert(true) + } + catch { + XCTAssert(false) + } + } + + // make sure the two special named character references can be unescaped + for (reference, string) in specialNamedCharactersDecodeMap { + let unescaped = string + let escaped = "&" + reference + + XCTAssertEqual(try escaped.htmlUnescape(strict: true), unescaped) } } - func testSpecialNumericCharacters() { - for (left, right) in htmlSpecialNumericDecodeMap { - let decEscaped = "&#" + String(left) + ";" - let hexEscaped = "&#x" + String(left, radix: 16) + ";" + func testNumericCharacterReferences() { + XCTAssertEqual(deprecatedNumericDecodeMap.count, 28) + XCTAssertEqual(disallowedNumericReferences.count, 94) + + // make sure the deprecated numeric references can be correctly unescaped in + // nonstrict mode, and that the correct ParseError is thrown in strict mode + for (left, right) in deprecatedNumericDecodeMap { let unescaped = String(UnicodeScalar(right)!) + var decEscaped = "&#" + String(left) + var hexEscaped = "&#x" + String(left, radix: 16) XCTAssertEqual(decEscaped.htmlUnescape(), unescaped) XCTAssertEqual(hexEscaped.htmlUnescape(), unescaped) + + decEscaped += ";" + hexEscaped += ";" + + do { + _ = try decEscaped.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.DeprecatedNumericReference { + XCTAssert(true) + } + catch { + XCTAssert(false) + } + + do { + _ = try hexEscaped.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.DeprecatedNumericReference { + XCTAssert(true) + } + catch { + XCTAssert(false) + } + } + + // make sure invalid ranges of unicode characters can be correctly unescaped + // into U+FFFD in nonstrict mode, and that the correct ParseError is thrown + // in strict mode + func testReplacementCharacter(_ code: UInt64) { + var decEscaped = "&#" + String(code) + var hexEscaped = "&#x" + String(code, radix: 16) + + XCTAssertEqual(decEscaped.htmlUnescape(), replacementCharacterAsString) + XCTAssertEqual(hexEscaped.htmlUnescape(), replacementCharacterAsString) + + decEscaped += ";" + hexEscaped += ";" + + do { + _ = try decEscaped.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.OutsideValidUnicodeRange { + XCTAssert(true) + } + catch { + XCTAssert(false) + } + + do { + _ = try hexEscaped.htmlUnescape(strict: true) + } + catch ParseError.OutsideValidUnicodeRange { + XCTAssert(true) + } + catch { + XCTAssert(false) + } } - XCTAssertEqual("�".htmlUnescape(), replacementCharacterAsString) - XCTAssertEqual("�".htmlUnescape(), replacementCharacterAsString) - XCTAssertEqual("�".htmlUnescape(), replacementCharacterAsString) - XCTAssertEqual("�".htmlUnescape(), replacementCharacterAsString) - XCTAssertEqual("�".htmlUnescape(), replacementCharacterAsString) + testReplacementCharacter(0xD800) + testReplacementCharacter(0xDDDD) + testReplacementCharacter(0xDFFF) + testReplacementCharacter(0x10FFFF + 1) + testReplacementCharacter(0xDDDDDD) + testReplacementCharacter(UInt64(UInt32.max)) + testReplacementCharacter(0xDDDDDDDDDD) + + // make sure the disallowed numeric references can be correctly unescaped in + // nonstrict mode, and that the correct ParseError is thrown in strict mode + for unicode in disallowedNumericReferences.keys { + let unescaped = String(UnicodeScalar(unicode)!) + var decEscaped = "&#" + String(unicode) + var hexEscaped = "&#x" + String(unicode, radix: 16) + + XCTAssertEqual(decEscaped.htmlUnescape(), unescaped) + XCTAssertEqual(hexEscaped.htmlUnescape(), unescaped) + + decEscaped += ";" + hexEscaped += ";" + + do { + _ = try decEscaped.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.DisallowedNumericReference { + XCTAssert(true) + } + catch { + XCTAssert(false) + } + + do { + _ = try hexEscaped.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.DisallowedNumericReference { + XCTAssert(true) + } + catch { + XCTAssert(false) + } + } } func testEncode() { XCTAssertEqual(str1Unescaped.htmlEscape(), str1Escaped) XCTAssertEqual(str2Unescaped.htmlEscape(), str2Escaped) - XCTAssertEqual(str3Unescaped.htmlEscape(useNamedReferences: false), str3Escaped) + XCTAssertEqual(str3Unescaped.htmlEscape(), str3Escaped) } func testDecode() { - XCTAssertEqual(str1Escaped.htmlUnescape(), str1Unescaped) - XCTAssertEqual(str2Escaped.htmlUnescape(), str2Unescaped) - XCTAssertEqual(str3Escaped.htmlUnescape(), str3Unescaped) + XCTAssertEqual(try str1Escaped.htmlUnescape(strict: true), str1Unescaped) + XCTAssertEqual(try str2Escaped.htmlUnescape(strict: true), str2Unescaped) + XCTAssertEqual(try str3Escaped.htmlUnescape(strict: true), str3Unescaped) } func testInvertibility() { - XCTAssertEqual(str1Unescaped.htmlEscape().htmlUnescape(), str1Unescaped) - XCTAssertEqual(str1Unescaped.htmlEscape(useNamedReferences: false).htmlUnescape(), str1Unescaped) - XCTAssertEqual(str1Unescaped.htmlEscape(decimal: true, useNamedReferences: false).htmlUnescape(), str1Unescaped) + XCTAssertEqual(try str1Unescaped.htmlEscape().htmlUnescape(strict: true), str1Unescaped) + XCTAssertEqual(try str1Unescaped.htmlEscape(useNamedReferences: false).htmlUnescape(strict: true), str1Unescaped) + XCTAssertEqual(try str1Unescaped.htmlEscape(decimal: true, useNamedReferences: false).htmlUnescape(strict: true), str1Unescaped) - XCTAssertEqual(str2Unescaped.htmlEscape().htmlUnescape(), str2Unescaped) - XCTAssertEqual(str2Unescaped.htmlEscape(useNamedReferences: false).htmlUnescape(), str2Unescaped) - XCTAssertEqual(str2Unescaped.htmlEscape(decimal: true, useNamedReferences: false).htmlUnescape(), str2Unescaped) + XCTAssertEqual(try str2Unescaped.htmlEscape().htmlUnescape(strict: true), str2Unescaped) + XCTAssertEqual(try str2Unescaped.htmlEscape(useNamedReferences: false).htmlUnescape(strict: true), str2Unescaped) + XCTAssertEqual(try str2Unescaped.htmlEscape(decimal: true, useNamedReferences: false).htmlUnescape(strict: true), str2Unescaped) - XCTAssertEqual(str2Unescaped.htmlEscape().htmlUnescape(), str2Unescaped) - XCTAssertEqual(str3Unescaped.htmlEscape(useNamedReferences: false).htmlUnescape(), str3Unescaped) - XCTAssertEqual(str3Unescaped.htmlEscape(decimal: true, useNamedReferences: false).htmlUnescape(), str3Unescaped) + XCTAssertEqual(try str3Unescaped.htmlEscape().htmlUnescape(strict: true), str3Unescaped) + XCTAssertEqual(try str3Unescaped.htmlEscape(useNamedReferences: false).htmlUnescape(strict: true), str3Unescaped) + XCTAssertEqual(try str3Unescaped.htmlEscape(decimal: true, useNamedReferences: false).htmlUnescape(strict: true), str3Unescaped) } func testEdgeCases() { - let emptyString = "" - XCTAssertEqual(emptyString.htmlEscape(), emptyString) - XCTAssertEqual(emptyString.htmlUnescape(), emptyString) + XCTAssertEqual("".htmlEscape(), "") + XCTAssertEqual(try "".htmlUnescape(strict: true), "") - let noSemicolonEnding = "한" - XCTAssertEqual(noSemicolonEnding.htmlUnescape(), noSemicolonEnding) - XCTAssertEqual(noSemicolonEnding.htmlUnescape(strict: false), "한") + let simpleString = "abcdefghijklmnopqrstuvwxyz1234567890" + XCTAssertEqual(simpleString.htmlEscape(), simpleString) + XCTAssertEqual(try simpleString.htmlUnescape(strict: true), simpleString) - let mixedEnding = "한" - XCTAssertEqual(mixedEnding.htmlUnescape(), "한") - XCTAssertEqual(mixedEnding.htmlUnescape(strict: false), "한") + XCTAssertEqual("한".htmlUnescape(), "한") - let undefinedNameReference = "&undefined;" - XCTAssertEqual(undefinedNameReference.htmlUnescape(), undefinedNameReference) + do { + _ = try "한".htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.MissingSemicolon { + XCTAssert(true) + } + catch { + XCTAssert(false) + } - let missingsemicolon = "some text here " some more text here" - XCTAssertEqual(missingsemicolon.htmlUnescape(), missingsemicolon) + let badEntity = "&some &text; here <script> some more; text here;" + XCTAssertEqual(badEntity.htmlUnescape(), "&some &text; here " + + XCTAssertEqual(html.htmlEscape(), "<script>alert("abc")</script>") + + // decode example + let htmlencoded = "<script>alert("abc")</script>" + + XCTAssertEqual(htmlencoded.htmlUnescape(), "") + + var text = "한, 한, ế, ế, 🇺🇸" + + XCTAssertEqual(text.htmlEscape(), "한, 한, ế, ế, 🇺🇸") + + XCTAssertEqual(text.htmlEscape(decimal: true), "한, 한, ế, ế, 🇺🇸") + + html = "" + + XCTAssertEqual(html.htmlEscape(), "<script>alert("abc")</script>") + + XCTAssertEqual(html.htmlEscape(useNamedReferences: false), "<script>alert("abc")</script>") + + text = "한" + + XCTAssertEqual(text.htmlUnescape(), "한") + + do { + _ = try text.htmlUnescape(strict: true) + XCTAssert(false) + } + catch ParseError.MissingSemicolon { + XCTAssert(true) + } + catch { + XCTAssert(false) + } } static var allTests : [(String, (HTMLEntitiesTests) -> () throws -> Void)] { return [ ("testNamedCharacterReferences", testNamedCharacterReferences), - ("testSpecialNumericCharacters", testSpecialNumericCharacters), + ("testNumericCharacterReferences", testNumericCharacterReferences), ("testEncode", testEncode), ("testDecode", testDecode), ("testInvertibility", testInvertibility), - ("testEdgeCases", testEdgeCases) + ("testEdgeCases", testEdgeCases), + ("testREADMEExamples", testREADMEExamples) ] } }