diff --git a/dist/index.cjs b/dist/index.cjs index 8946fbc..bd6a596 100644 --- a/dist/index.cjs +++ b/dist/index.cjs @@ -8,6 +8,25 @@ function $parcel$export(e, n, v, s) { $parcel$defineInteropFlag(module.exports); $parcel$export(module.exports, "default", () => $882b6d93070905b3$export$2e2bcd8739ae039); +let $3aefcdddac337967$export$f435f793048e7a0f; +(function(TokenType) { + TokenType["Akshara"] = "akshara"; + TokenType["Symbol"] = "symbol"; + TokenType["Whitespace"] = "whitespace"; + TokenType["Invalid"] = "invalid"; + TokenType["Unrecognised"] = "unrecognised"; +})($3aefcdddac337967$export$f435f793048e7a0f || ($3aefcdddac337967$export$f435f793048e7a0f = {})); +class $3aefcdddac337967$export$50792b0e93539fde { + constructor(type, value, pos, attributes){ + this.type = type; + this.value = value; + this.from = pos; + this.to = pos + (value.length - 1); + this.attributes = attributes; + } +} + + const $25e20db0144ee577$export$7963e2ffafd9d670 = { अ: "vowels", आ: "vowels", @@ -194,43 +213,25 @@ class $823e42faaa542640$export$3ae2e3e9a9c21123 { } -let $3aefcdddac337967$export$f435f793048e7a0f; -(function(TokenType) { - TokenType["Akshara"] = "akshara"; - TokenType["Symbol"] = "symbol"; - TokenType["Whitespace"] = "whitespace"; - TokenType["Invalid"] = "invalid"; - TokenType["Unrecognised"] = "unrecognised"; -})($3aefcdddac337967$export$f435f793048e7a0f || ($3aefcdddac337967$export$f435f793048e7a0f = {})); -class $3aefcdddac337967$export$50792b0e93539fde { - constructor(type, value, pos, attributes){ - this.type = type; - this.value = value; - this.from = pos; - this.to = pos + (value.length - 1); - this.attributes = attributes; - } -} - -let $882b6d93070905b3$var$State; +let $da2f48c805d435e9$var$State; (function(State) { State[State["Initial"] = 0] = "Initial"; State[State["Vowel"] = 1] = "Vowel"; State[State["Consonant"] = 2] = "Consonant"; State[State["ConjunctConsonant"] = 3] = "ConjunctConsonant"; -})($882b6d93070905b3$var$State || ($882b6d93070905b3$var$State = {})); -const $882b6d93070905b3$var$tokenize = (input)=>{ +})($da2f48c805d435e9$var$State || ($da2f48c805d435e9$var$State = {})); +const $da2f48c805d435e9$export$660b2ee2d4fb4eff = (input)=>{ const tokens = []; let pos = 0; let acc = ""; let varnasLength = 0; - let state = $882b6d93070905b3$var$State.Initial; + let state = $da2f48c805d435e9$var$State.Initial; const resetVariables = ()=>{ pos = 0; acc = ""; varnasLength = 0; - state = $882b6d93070905b3$var$State.Initial; + state = $da2f48c805d435e9$var$State.Initial; }; const createToken = (tokenType, attributes)=>{ tokens.push(new (0, $3aefcdddac337967$export$50792b0e93539fde)(tokenType, acc, pos, attributes)); @@ -241,7 +242,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ const nextChar = new (0, $823e42faaa542640$export$3ae2e3e9a9c21123)(input[i + 1]); acc += char.value; switch(state){ - case $882b6d93070905b3$var$State.Initial: + case $da2f48c805d435e9$var$State.Initial: pos = i; if (char.isSymbol()) { createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Symbol); @@ -257,7 +258,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ } if (char.isVowel()) { if (nextChar.isVowelAttachment()) { - state = $882b6d93070905b3$var$State.Vowel; + state = $da2f48c805d435e9$var$State.Vowel; break; } createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara, { @@ -268,7 +269,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ if (char.isConsonant()) { if (nextChar.isNukta() || nextChar.isConsonantAttachment()) { varnasLength += 1; - state = $882b6d93070905b3$var$State.Consonant; + state = $da2f48c805d435e9$var$State.Consonant; break; } createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara, { @@ -278,7 +279,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ } createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Invalid); break; - case $882b6d93070905b3$var$State.Vowel: + case $da2f48c805d435e9$var$State.Vowel: if (char.isAccent()) { createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara, { varnasLength: 1 @@ -293,7 +294,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ break; } break; - case $882b6d93070905b3$var$State.Consonant: + case $da2f48c805d435e9$var$State.Consonant: if (char.isNukta()) { if (nextChar.isConsonantAttachment()) break; createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara, { @@ -304,7 +305,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ if (char.isVirama()) { if (nextChar.isJoiner()) break; if (nextChar.isConsonant()) { - state = $882b6d93070905b3$var$State.ConjunctConsonant; + state = $da2f48c805d435e9$var$State.ConjunctConsonant; break; } createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara, { @@ -315,7 +316,7 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ if (char.isJoiner()) { if (nextChar.isJoiner()) break; if (nextChar.isConsonant()) { - state = $882b6d93070905b3$var$State.ConjunctConsonant; + state = $da2f48c805d435e9$var$State.ConjunctConsonant; break; } createToken((0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara, { @@ -339,10 +340,10 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ }); break; } - case $882b6d93070905b3$var$State.ConjunctConsonant: + case $da2f48c805d435e9$var$State.ConjunctConsonant: if (nextChar.isNukta() || nextChar.isConsonantAttachment()) { varnasLength += 1; - state = $882b6d93070905b3$var$State.Consonant; + state = $da2f48c805d435e9$var$State.Consonant; break; } varnasLength += 2; @@ -356,9 +357,36 @@ const $882b6d93070905b3$var$tokenize = (input)=>{ } return tokens; }; -var $882b6d93070905b3$export$2e2bcd8739ae039 = { - tokenize: $882b6d93070905b3$var$tokenize + + +const $882b6d93070905b3$var$calcTotalVarnasLength = (tokens)=>tokens.reduce((total, akshara)=>akshara.attributes ? total + akshara.attributes.varnasLength : total, 0); +const $882b6d93070905b3$var$filterTokens = (tokens, tokenType)=>tokens.filter((token)=>token.type === tokenType); +const $882b6d93070905b3$var$analyse = (input)=>{ + const tokens = (0, $da2f48c805d435e9$export$660b2ee2d4fb4eff)(input); + const aksharas = $882b6d93070905b3$var$filterTokens(tokens, (0, $3aefcdddac337967$export$f435f793048e7a0f).Akshara); + // const varnas = []; + const chars = input.split(""); + const symbols = $882b6d93070905b3$var$filterTokens(tokens, (0, $3aefcdddac337967$export$f435f793048e7a0f).Symbol); + const invalidChars = $882b6d93070905b3$var$filterTokens(tokens, (0, $3aefcdddac337967$export$f435f793048e7a0f).Invalid); + const whitespaces = $882b6d93070905b3$var$filterTokens(tokens, (0, $3aefcdddac337967$export$f435f793048e7a0f).Whitespace); + const unrecognisedChars = $882b6d93070905b3$var$filterTokens(tokens, (0, $3aefcdddac337967$export$f435f793048e7a0f).Unrecognised); + const varnasLength = $882b6d93070905b3$var$calcTotalVarnasLength(aksharas); + return { + all: tokens, + aksharas: aksharas, + // varnas: varnas, + chars: chars, + symbols: symbols, + invalid: invalidChars, + whitespaces: whitespaces, + unrecognised: unrecognisedChars, + varnasLength: varnasLength + }; +}; +const $882b6d93070905b3$var$Aksharas = { + analyse: $882b6d93070905b3$var$analyse }; +var $882b6d93070905b3$export$2e2bcd8739ae039 = $882b6d93070905b3$var$Aksharas; //# sourceMappingURL=index.cjs.map diff --git a/dist/index.cjs.map b/dist/index.cjs.map index cb5db41..a82902e 100644 --- a/dist/index.cjs.map +++ b/dist/index.cjs.map @@ -1 +1 @@ -{"mappings":";;;;;;;;;;AEAO,MAAM,yCAAe,GAAwB;IAClD,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,OAAO;IACZ,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,CAAC,EAAE,SAAS;IACZ,CAAC,EAAE,SAAS;IACZ,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;CACf,AAAC;;AD9HF;IAEA,8BAeC;UAfI,QAAQ;IAAR,QAAQ,CACX,OAAK,IAAG,QAAQ;IADb,QAAQ,CAEX,QAAM,IAAG,SAAS;IAFf,QAAQ,CAGX,WAAS,IAAG,aAAa;IAHtB,QAAQ,CAIX,UAAQ,IAAG,YAAY;IAJpB,QAAQ,CAKX,QAAM,IAAG,SAAS;IALf,QAAQ,CAMX,QAAM,IAAG,QAAQ;IANd,QAAQ,CAOX,OAAK,IAAG,OAAO;IAPZ,QAAQ,CAQX,MAAI,IAAG,MAAM;IARV,QAAQ,CASX,KAAG,IAAG,KAAK;IATR,QAAQ,CAUX,WAAS,IAAG,YAAY;IAVrB,QAAQ,CAWX,gBAAc,IAAG,kBAAkB;IAXhC,QAAQ,CAYX,YAAU,IAAG,YAAY;IAZtB,QAAQ,CAaX,cAAY,IAAG,cAAc;IAb1B,QAAQ,CAcX,YAAU,IAAG,cAAc;GAdxB,8BAAQ,KAAR,8BAAQ;AAiBb,MAAM,4BAAM,GAAG;IAAC,GAAG;IAAE,GAAG;IAAE,GAAG;CAAC,AAAC;AAExB,MAAM,yCAAI;IAIf,YAAY,KAAa,CAAE;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;KACrC;IAED,WAAW,GAAG,CAAC,IAAY,GAAe;QACxC,IAAI,IAAI,KAAK,SAAS,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEnD,IAAI,KAAK,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEhD,OAAO,CAAA,GAAA,yCAAe,CAAA,CAAC,IAAI,CAAC,IAAI,8BAAQ,CAAC,YAAY,CAAC;KACvD,CAAC;IAEF,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,WAAW,GAAG,IACZ;YAAC,8BAAQ,CAAC,SAAS;YAAE,8BAAQ,CAAC,cAAc;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEpE,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,WAAW,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,SAAS,CAAC;IAErD,UAAU,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,QAAQ,CAAC;IAEnD,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,OAAO,GAAG,IAAM,4BAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE5C,QAAQ,GAAG,IAAM;YAAC,8BAAQ,CAAC,IAAI;YAAE,8BAAQ,CAAC,GAAG;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnE,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,cAAc,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,YAAY,CAAC;IAE3D,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,iBAAiB,GAAG,IAAM,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAE/D,qBAAqB,GAAG,IACtB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,UAAU,EAAE,IACjB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,WAAW,EAAE,IAClB,IAAI,CAAC,OAAO,EAAE,CAAC;IAEjB,qBAAqB,GAAG,IACtB,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;CAI1D;;AD/ED;AGAA,IAAO,yCAMN;UANW,SAAS;IAAT,SAAS,CACnB,SAAO,IAAG,SAAS;IADT,SAAS,CAEnB,QAAM,IAAG,QAAQ;IAFP,SAAS,CAGnB,YAAU,IAAG,YAAY;IAHf,SAAS,CAInB,SAAO,IAAG,SAAS;IAJT,SAAS,CAKnB,cAAY,IAAG,cAAc;GALnB,yCAAS,KAAT,yCAAS;AAUd,MAAM,yCAAK;IAOhB,YACE,IAAe,EACf,KAAa,EACb,GAAW,EACX,UAA4B,CAC5B;QACA,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,EAAE,GAAG,GAAG,GAAI,CAAA,KAAK,CAAC,MAAM,GAAG,CAAC,CAAA,AAAC,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;KAC9B;CAIF;;;IH7BD,2BAKC;UALI,KAAK;IAAL,KAAK,CAAL,KAAK,CACR,SAAO,IAAP,CAAO,IAAP,SAAO;IADJ,KAAK,CAAL,KAAK,CAER,OAAK,IAAL,CAAK,IAAL,OAAK;IAFF,KAAK,CAAL,KAAK,CAGR,WAAS,IAAT,CAAS,IAAT,WAAS;IAHN,KAAK,CAAL,KAAK,CAIR,mBAAiB,IAAjB,CAAiB,IAAjB,mBAAiB;GAJd,2BAAK,KAAL,2BAAK;AAOV,MAAM,8BAAQ,GAAG,CAAC,KAAa,GAAc;IAC3C,MAAM,MAAM,GAAY,EAAE,AAAC;IAE3B,IAAI,GAAG,GAAG,CAAC,AAAC;IACZ,IAAI,GAAG,GAAG,EAAE,AAAC;IACb,IAAI,YAAY,GAAW,CAAC,AAAC;IAC7B,IAAI,KAAK,GAAG,2BAAK,CAAC,OAAO,AAAC;IAE1B,MAAM,cAAc,GAAG,IAAM;QAC3B,GAAG,GAAG,CAAC,CAAC;QACR,GAAG,GAAG,EAAE,CAAC;QACT,YAAY,GAAG,CAAC,CAAC;QACjB,KAAK,GAAG,2BAAK,CAAC,OAAO,CAAC;KACvB,AAAC;IAEF,MAAM,WAAW,GAAG,CAAC,SAAoB,EAAE,UAA4B,GAAK;QAC1E,MAAM,CAAC,IAAI,CAAC,IAAI,CAAA,GAAA,yCAAK,CAAA,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;QACxD,cAAc,EAAE,CAAC;KAClB,AAAC;IAEF,IAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAE;QAC/C,MAAM,IAAI,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,AAAC;QAEhC,MAAM,QAAQ,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,AAAC;QAExC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC;QAElB,OAAQ,KAAK;YACX,KAAK,2BAAK,CAAC,OAAO;gBAChB,GAAG,GAAG,CAAC,CAAC;gBAER,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,MAAM,CAAC,CAAC;oBAC9B,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,YAAY,EAAE,EAAE;oBACvB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,UAAU,CAAC,CAAC;oBAClC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE;oBACzB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,YAAY,CAAC,CAAC;oBACpC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,iBAAiB,EAAE,EAAE;wBAChC,KAAK,GAAG,2BAAK,CAAC,KAAK,CAAC;wBACpB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;wBAC1D,YAAY,IAAI,CAAC,CAAC;wBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;wBACxB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM;YAGR,KAAK,2BAAK,CAAC,KAAK;gBACd,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,UAAU,EAAE,EAAE;oBACrB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,MAAM;YAGR,KAAK,2BAAK,CAAC,SAAS;gBAClB,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,qBAAqB,EAAE,EAAE;oBAChC,YAAY,IAAI,CAAC,CAAC;oBAElB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,YAAY,IAAI,CAAC,CAAC;oBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;YAGH,KAAK,2BAAK,CAAC,iBAAiB;gBAC1B,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;oBAC1D,YAAY,IAAI,CAAC,CAAC;oBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;oBACxB,MAAM;iBACP;gBAED,YAAY,IAAI,CAAC,CAAC;gBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;kCAAE,YAAY;iBAAE,CAAC,CAAC;gBACjD,MAAM;YAGR;gBACE,MAAM;SAET;KACF;IAED,OAAO,MAAM,CAAC;CACf,AAAC;IAEF,wCAA4B,GAAb;cAAE,8BAAQ;CAAE","sources":["src/index.ts","src/char.ts","src/deva-char-types.ts","src/token.ts"],"sourcesContent":["import { Char } from \"./char\";\nimport { Token, TokenAttributes, TokenType } from \"./token\";\n\nenum State {\n Initial,\n Vowel,\n Consonant,\n ConjunctConsonant,\n}\n\nconst tokenize = (input: string): Token[] => {\n const tokens: Token[] = [];\n\n let pos = 0;\n let acc = \"\";\n let varnasLength: number = 0;\n let state = State.Initial;\n\n const resetVariables = () => {\n pos = 0;\n acc = \"\";\n varnasLength = 0;\n state = State.Initial;\n };\n\n const createToken = (tokenType: TokenType, attributes?: TokenAttributes) => {\n tokens.push(new Token(tokenType, acc, pos, attributes));\n resetVariables();\n };\n\n for (let i = 0, l = input.length; i < l; i += 1) {\n const char = new Char(input[i]);\n\n const nextChar = new Char(input[i + 1]);\n\n acc += char.value;\n\n switch (state) {\n case State.Initial: {\n pos = i;\n\n if (char.isSymbol()) {\n createToken(TokenType.Symbol);\n break;\n }\n\n if (char.isWhitespace()) {\n createToken(TokenType.Whitespace);\n break;\n }\n\n if (char.isUnrecognised()) {\n createToken(TokenType.Unrecognised);\n break;\n }\n\n if (char.isVowel()) {\n if (nextChar.isVowelAttachment()) {\n state = State.Vowel;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isConsonant()) {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n createToken(TokenType.Invalid);\n break;\n }\n\n case State.Vowel: {\n if (char.isAccent()) {\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isYogavaha()) {\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n break;\n }\n\n case State.Consonant: {\n if (char.isNukta()) {\n if (nextChar.isConsonantAttachment()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n if (char.isVirama()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isJoiner()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMarkAttachment()) {\n varnasLength += 1;\n\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMark()) {\n if (nextChar.isVowelMarkAttachment()) {\n break;\n }\n\n varnasLength += 1;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n }\n\n case State.ConjunctConsonant: {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n varnasLength += 2;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n default: {\n break;\n }\n }\n }\n\n return tokens;\n};\n\nexport default { tokenize };\n","import { DEVA_CHAR_TYPES } from \"./deva-char-types\";\n\nenum CharType {\n Vowel = \"vowels\",\n Symbol = \"symbols\",\n VowelMark = \"vowel_marks\",\n Yogavaha = \"yogavaahas\",\n Accent = \"accents\",\n Virama = \"virama\",\n Nukta = \"nukta\",\n ZWNJ = \"zwnj\",\n ZWJ = \"zwj\",\n Consonant = \"consonants\",\n ExtraConsonant = \"extra_consonants\",\n Whitespace = \"whitespace\",\n Unrecognised = \"unrecognised\",\n EndOfInput = \"end_of_input\",\n}\n\nconst MATRAS = [\"१\", \"२\", \"३\"];\n\nexport class Char {\n value: string;\n type: CharType;\n\n constructor(value: string) {\n this.value = value;\n this.type = this.getCharType(value);\n }\n\n getCharType = (char: string): CharType => {\n if (char === undefined) return CharType.EndOfInput;\n\n if (/\\s/.test(char)) return CharType.Whitespace;\n\n return DEVA_CHAR_TYPES[char] ?? CharType.Unrecognised;\n };\n\n isVowel = () => this.type === CharType.Vowel;\n\n isConsonant = () =>\n [CharType.Consonant, CharType.ExtraConsonant].includes(this.type);\n\n isSymbol = () => this.type === CharType.Symbol;\n\n isVowelMark = () => this.type === CharType.VowelMark;\n\n isYogavaha = () => this.type === CharType.Yogavaha;\n\n isAccent = () => this.type === CharType.Accent;\n\n isVirama = () => this.type === CharType.Virama;\n\n isNukta = () => this.type === CharType.Nukta;\n\n isMatra = () => MATRAS.includes(this.value);\n\n isJoiner = () => [CharType.ZWNJ, CharType.ZWJ].includes(this.type);\n\n isWhitespace = () => this.type === CharType.Whitespace;\n\n isUnrecognised = () => this.type === CharType.Unrecognised;\n\n isEndOfInput = () => this.type === CharType.EndOfInput;\n\n isVowelAttachment = () => this.isYogavaha() || this.isAccent();\n\n isConsonantAttachment = () =>\n this.isVirama() ||\n this.isYogavaha() ||\n this.isAccent() ||\n this.isVowelMark() ||\n this.isMatra();\n\n isVowelMarkAttachment = () =>\n this.isYogavaha() || this.isAccent() || this.isMatra();\n\n // Only for debugging\n // toString = () => this.value;\n}\n","export const DEVA_CHAR_TYPES: Record = {\n अ: \"vowels\",\n आ: \"vowels\",\n इ: \"vowels\",\n ई: \"vowels\",\n उ: \"vowels\",\n ऊ: \"vowels\",\n ऋ: \"vowels\",\n ॠ: \"vowels\",\n ऌ: \"vowels\",\n ॡ: \"vowels\",\n ऎ: \"vowels\",\n ए: \"vowels\",\n ऐ: \"vowels\",\n ऒ: \"vowels\",\n ओ: \"vowels\",\n औ: \"vowels\",\n ऍ: \"vowels\",\n ऑ: \"vowels\",\n क: \"consonants\",\n ख: \"consonants\",\n ग: \"consonants\",\n घ: \"consonants\",\n ङ: \"consonants\",\n च: \"consonants\",\n छ: \"consonants\",\n ज: \"consonants\",\n झ: \"consonants\",\n ञ: \"consonants\",\n ट: \"consonants\",\n ठ: \"consonants\",\n ड: \"consonants\",\n ढ: \"consonants\",\n ण: \"consonants\",\n त: \"consonants\",\n थ: \"consonants\",\n द: \"consonants\",\n ध: \"consonants\",\n न: \"consonants\",\n प: \"consonants\",\n फ: \"consonants\",\n ब: \"consonants\",\n भ: \"consonants\",\n म: \"consonants\",\n य: \"consonants\",\n र: \"consonants\",\n ल: \"consonants\",\n व: \"consonants\",\n श: \"consonants\",\n ष: \"consonants\",\n स: \"consonants\",\n ह: \"consonants\",\n ळ: \"consonants\",\n क्ष: \"consonants\",\n ज्ञ: \"consonants\",\n क़: \"extra_consonants\",\n ख़: \"extra_consonants\",\n ग़: \"extra_consonants\",\n ज़: \"extra_consonants\",\n ड़: \"extra_consonants\",\n फ़: \"extra_consonants\",\n य़: \"extra_consonants\",\n ऱ: \"extra_consonants\",\n ऴ: \"extra_consonants\",\n ऩ: \"extra_consonants\",\n \"ा\": \"vowel_marks\",\n \"ि\": \"vowel_marks\",\n \"ी\": \"vowel_marks\",\n \"ु\": \"vowel_marks\",\n \"ू\": \"vowel_marks\",\n \"ृ\": \"vowel_marks\",\n \"ॄ\": \"vowel_marks\",\n \"ॢ\": \"vowel_marks\",\n \"ॣ\": \"vowel_marks\",\n \"ॆ\": \"vowel_marks\",\n \"े\": \"vowel_marks\",\n \"ै\": \"vowel_marks\",\n \"ॊ\": \"vowel_marks\",\n \"ो\": \"vowel_marks\",\n \"ौ\": \"vowel_marks\",\n \"ॅ\": \"vowel_marks\",\n \"ॉ\": \"vowel_marks\",\n \"ं\": \"yogavaahas\",\n \"ः\": \"yogavaahas\",\n \"ँ\": \"yogavaahas\",\n ᳵ: \"yogavaahas\",\n ᳶ: \"yogavaahas\",\n ꣳ: \"yogavaahas\",\n \"्\": \"virama\",\n \"़\": \"nukta\",\n \"‍\": \"zwj\",\n \"‌\": \"zwnj\",\n \"॑\": \"accents\",\n \"॒\": \"accents\",\n \"᳡\": \"accents\",\n \"꣡\": \"accents\",\n \"꣢\": \"accents\",\n \"꣣\": \"accents\",\n \"꣤\": \"accents\",\n \"꣥\": \"accents\",\n \"꣦\": \"accents\",\n \"꣧\": \"accents\",\n \"꣨\": \"accents\",\n \"꣩\": \"accents\",\n \"꣪\": \"accents\",\n \"꣫\": \"accents\",\n \"꣬\": \"accents\",\n \"꣭\": \"accents\",\n \"꣮\": \"accents\",\n \"꣯\": \"accents\",\n \"꣰\": \"accents\",\n \"꣱\": \"accents\",\n \"०\": \"symbols\",\n \"१\": \"symbols\",\n \"२\": \"symbols\",\n \"३\": \"symbols\",\n \"४\": \"symbols\",\n \"५\": \"symbols\",\n \"६\": \"symbols\",\n \"७\": \"symbols\",\n \"८\": \"symbols\",\n \"९\": \"symbols\",\n ॐ: \"symbols\",\n ऽ: \"symbols\",\n \"।\": \"symbols\",\n \"॥\": \"symbols\",\n};\n","export enum TokenType {\n Akshara = \"akshara\",\n Symbol = \"symbol\",\n Whitespace = \"whitespace\",\n Invalid = \"invalid\",\n Unrecognised = \"unrecognised\",\n}\n\nexport type TokenAttributes = Record;\n\nexport class Token {\n readonly type: TokenType;\n readonly value: string;\n readonly from: number;\n readonly to: number;\n readonly attributes?: TokenAttributes;\n\n constructor(\n type: TokenType,\n value: string,\n pos: number,\n attributes?: TokenAttributes\n ) {\n this.type = type;\n this.value = value;\n this.from = pos;\n this.to = pos + (value.length - 1);\n this.attributes = attributes;\n }\n\n // Only for debugging\n // toString = () => this.value;\n}\n"],"names":[],"version":3,"file":"index.cjs.map"} \ No newline at end of file +{"mappings":";;;;;;;;;;ACAA,IAAO,yCAMN;UANW,SAAS;IAAT,SAAS,CACnB,SAAO,IAAG,SAAS;IADT,SAAS,CAEnB,QAAM,IAAG,QAAQ;IAFP,SAAS,CAGnB,YAAU,IAAG,YAAY;IAHf,SAAS,CAInB,SAAO,IAAG,SAAS;IAJT,SAAS,CAKnB,cAAY,IAAG,cAAc;GALnB,yCAAS,KAAT,yCAAS;AAUd,MAAM,yCAAK;IAOhB,YACE,IAAe,EACf,KAAa,EACb,GAAW,EACX,UAA4B,CAC5B;QACA,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,EAAE,GAAG,GAAG,GAAI,CAAA,KAAK,CAAC,MAAM,GAAG,CAAC,CAAA,AAAC,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;KAC9B;CAIF;;ADhCD;AIAO,MAAM,yCAAe,GAAwB;IAClD,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,OAAO;IACZ,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,CAAC,EAAE,SAAS;IACZ,CAAC,EAAE,SAAS;IACZ,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;CACf,AAAC;;AD9HF;IAEA,8BAeC;UAfI,QAAQ;IAAR,QAAQ,CACX,OAAK,IAAG,QAAQ;IADb,QAAQ,CAEX,QAAM,IAAG,SAAS;IAFf,QAAQ,CAGX,WAAS,IAAG,aAAa;IAHtB,QAAQ,CAIX,UAAQ,IAAG,YAAY;IAJpB,QAAQ,CAKX,QAAM,IAAG,SAAS;IALf,QAAQ,CAMX,QAAM,IAAG,QAAQ;IANd,QAAQ,CAOX,OAAK,IAAG,OAAO;IAPZ,QAAQ,CAQX,MAAI,IAAG,MAAM;IARV,QAAQ,CASX,KAAG,IAAG,KAAK;IATR,QAAQ,CAUX,WAAS,IAAG,YAAY;IAVrB,QAAQ,CAWX,gBAAc,IAAG,kBAAkB;IAXhC,QAAQ,CAYX,YAAU,IAAG,YAAY;IAZtB,QAAQ,CAaX,cAAY,IAAG,cAAc;IAb1B,QAAQ,CAcX,YAAU,IAAG,cAAc;GAdxB,8BAAQ,KAAR,8BAAQ;AAiBb,MAAM,4BAAM,GAAG;IAAC,GAAG;IAAE,GAAG;IAAE,GAAG;CAAC,AAAC;AAExB,MAAM,yCAAI;IAIf,YAAY,KAAa,CAAE;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;KACrC;IAED,WAAW,GAAG,CAAC,IAAY,GAAe;QACxC,IAAI,IAAI,KAAK,SAAS,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEnD,IAAI,KAAK,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEhD,OAAO,CAAA,GAAA,yCAAe,CAAA,CAAC,IAAI,CAAC,IAAI,8BAAQ,CAAC,YAAY,CAAC;KACvD,CAAC;IAEF,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,WAAW,GAAG,IACZ;YAAC,8BAAQ,CAAC,SAAS;YAAE,8BAAQ,CAAC,cAAc;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEpE,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,WAAW,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,SAAS,CAAC;IAErD,UAAU,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,QAAQ,CAAC;IAEnD,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,OAAO,GAAG,IAAM,4BAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE5C,QAAQ,GAAG,IAAM;YAAC,8BAAQ,CAAC,IAAI;YAAE,8BAAQ,CAAC,GAAG;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnE,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,cAAc,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,YAAY,CAAC;IAE3D,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,iBAAiB,GAAG,IAAM,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAE/D,qBAAqB,GAAG,IACtB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,UAAU,EAAE,IACjB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,WAAW,EAAE,IAClB,IAAI,CAAC,OAAO,EAAE,CAAC;IAEjB,qBAAqB,GAAG,IACtB,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;CAI1D;;AD/ED;;IAGA,2BAKC;UALI,KAAK;IAAL,KAAK,CAAL,KAAK,CACR,SAAO,IAAP,CAAO,IAAP,SAAO;IADJ,KAAK,CAAL,KAAK,CAER,OAAK,IAAL,CAAK,IAAL,OAAK;IAFF,KAAK,CAAL,KAAK,CAGR,WAAS,IAAT,CAAS,IAAT,WAAS;IAHN,KAAK,CAAL,KAAK,CAIR,mBAAiB,IAAjB,CAAiB,IAAjB,mBAAiB;GAJd,2BAAK,KAAL,2BAAK;AAOH,MAAM,yCAAQ,GAAG,CAAC,KAAa,GAAc;IAClD,MAAM,MAAM,GAAY,EAAE,AAAC;IAE3B,IAAI,GAAG,GAAG,CAAC,AAAC;IACZ,IAAI,GAAG,GAAG,EAAE,AAAC;IACb,IAAI,YAAY,GAAW,CAAC,AAAC;IAC7B,IAAI,KAAK,GAAG,2BAAK,CAAC,OAAO,AAAC;IAE1B,MAAM,cAAc,GAAG,IAAM;QAC3B,GAAG,GAAG,CAAC,CAAC;QACR,GAAG,GAAG,EAAE,CAAC;QACT,YAAY,GAAG,CAAC,CAAC;QACjB,KAAK,GAAG,2BAAK,CAAC,OAAO,CAAC;KACvB,AAAC;IAEF,MAAM,WAAW,GAAG,CAAC,SAAoB,EAAE,UAA4B,GAAK;QAC1E,MAAM,CAAC,IAAI,CAAC,IAAI,CAAA,GAAA,yCAAK,CAAA,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;QACxD,cAAc,EAAE,CAAC;KAClB,AAAC;IAEF,IAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAE;QAC/C,MAAM,IAAI,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,AAAC;QAEhC,MAAM,QAAQ,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,AAAC;QAExC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC;QAElB,OAAQ,KAAK;YACX,KAAK,2BAAK,CAAC,OAAO;gBAChB,GAAG,GAAG,CAAC,CAAC;gBAER,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,MAAM,CAAC,CAAC;oBAC9B,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,YAAY,EAAE,EAAE;oBACvB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,UAAU,CAAC,CAAC;oBAClC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE;oBACzB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,YAAY,CAAC,CAAC;oBACpC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,iBAAiB,EAAE,EAAE;wBAChC,KAAK,GAAG,2BAAK,CAAC,KAAK,CAAC;wBACpB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;wBAC1D,YAAY,IAAI,CAAC,CAAC;wBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;wBACxB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM;YAGR,KAAK,2BAAK,CAAC,KAAK;gBACd,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,UAAU,EAAE,EAAE;oBACrB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,MAAM;YAGR,KAAK,2BAAK,CAAC,SAAS;gBAClB,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,qBAAqB,EAAE,EAAE;oBAChC,YAAY,IAAI,CAAC,CAAC;oBAElB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,YAAY,IAAI,CAAC,CAAC;oBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;YAGH,KAAK,2BAAK,CAAC,iBAAiB;gBAC1B,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;oBAC1D,YAAY,IAAI,CAAC,CAAC;oBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;oBACxB,MAAM;iBACP;gBAED,YAAY,IAAI,CAAC,CAAC;gBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;kCAAE,YAAY;iBAAE,CAAC,CAAC;gBACjD,MAAM;YAGR;gBACE,MAAM;SAET;KACF;IAED,OAAO,MAAM,CAAC;CACf,AAAC;;;AFnKF,MAAM,2CAAqB,GAAG,CAAC,MAAe,GAC5C,MAAM,CAAC,MAAM,CACX,CAAC,KAAK,EAAE,OAAO,GACb,OAAO,CAAC,UAAU,GAAG,KAAK,GAAG,OAAO,CAAC,UAAU,CAAC,YAAY,GAAG,KAAK,EACtE,CAAC,CACF,AAAC;AAEJ,MAAM,kCAAY,GAAG,CAAC,MAAe,EAAE,SAAoB,GACzD,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,GAAK,KAAK,CAAC,IAAI,KAAK,SAAS,CAAC,AAAC;AAErD,MAAM,6BAAO,GAAG,CAAC,KAAa,GAAc;IAC1C,MAAM,MAAM,GAAG,CAAA,GAAA,yCAAQ,CAAA,CAAC,KAAK,CAAC,AAAC;IAE/B,MAAM,QAAQ,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,AAAC;IAEzD,uBAAuB;IAEvB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,AAAC;IAE9B,MAAM,OAAO,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,MAAM,CAAC,AAAC;IAEvD,MAAM,YAAY,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,AAAC;IAE7D,MAAM,WAAW,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,UAAU,CAAC,AAAC;IAE/D,MAAM,iBAAiB,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,YAAY,CAAC,AAAC;IAEvE,MAAM,YAAY,GAAG,2CAAqB,CAAC,QAAQ,CAAC,AAAC;IAErD,OAAO;QACL,GAAG,EAAE,MAAM;QACX,QAAQ,EAAE,QAAQ;QAClB,kBAAkB;QAClB,KAAK,EAAE,KAAK;QACZ,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,YAAY;QACrB,WAAW,EAAE,WAAW;QACxB,YAAY,EAAE,iBAAiB;QAC/B,YAAY,EAAE,YAAY;KAC3B,CAAC;CACH,AAAC;AAEF,MAAM,8BAAQ,GAAG;aACf,6BAAO;CACR,AAAC;IAEF,wCAAwB,GAAT,8BAAQ","sources":["src/index.ts","src/token.ts","src/tokenize.ts","src/char.ts","src/deva-char-types.ts"],"sourcesContent":["import { Token, TokenType } from \"./token\";\nimport { tokenize } from \"./tokenize\";\n\ntype Results = {\n all: Token[];\n aksharas: Token[];\n // varnas: Token[] /* Yet to be implemented */;\n symbols: Token[];\n whitespaces: Token[];\n chars: string[];\n invalid: Token[];\n unrecognised: Token[];\n varnasLength: number /* Will be deprecated */;\n};\n\nconst calcTotalVarnasLength = (tokens: Token[]) =>\n tokens.reduce(\n (total, akshara) =>\n akshara.attributes ? total + akshara.attributes.varnasLength : total,\n 0\n );\n\nconst filterTokens = (tokens: Token[], tokenType: TokenType) =>\n tokens.filter((token) => token.type === tokenType);\n\nconst analyse = (input: string): Results => {\n const tokens = tokenize(input);\n\n const aksharas = filterTokens(tokens, TokenType.Akshara);\n\n // const varnas = [];\n\n const chars = input.split(\"\");\n\n const symbols = filterTokens(tokens, TokenType.Symbol);\n\n const invalidChars = filterTokens(tokens, TokenType.Invalid);\n\n const whitespaces = filterTokens(tokens, TokenType.Whitespace);\n\n const unrecognisedChars = filterTokens(tokens, TokenType.Unrecognised);\n\n const varnasLength = calcTotalVarnasLength(aksharas);\n\n return {\n all: tokens,\n aksharas: aksharas,\n // varnas: varnas,\n chars: chars,\n symbols: symbols,\n invalid: invalidChars,\n whitespaces: whitespaces,\n unrecognised: unrecognisedChars,\n varnasLength: varnasLength,\n };\n};\n\nconst Aksharas = {\n analyse,\n};\n\nexport default Aksharas;\n","export enum TokenType {\n Akshara = \"akshara\",\n Symbol = \"symbol\",\n Whitespace = \"whitespace\",\n Invalid = \"invalid\",\n Unrecognised = \"unrecognised\",\n}\n\nexport type TokenAttributes = Record;\n\nexport class Token {\n readonly type: TokenType;\n readonly value: string;\n readonly from: number;\n readonly to: number;\n readonly attributes?: TokenAttributes;\n\n constructor(\n type: TokenType,\n value: string,\n pos: number,\n attributes?: TokenAttributes\n ) {\n this.type = type;\n this.value = value;\n this.from = pos;\n this.to = pos + (value.length - 1);\n this.attributes = attributes;\n }\n\n // Only for debugging\n // toString = () => this.value;\n}\n","import { Char } from \"./char\";\nimport { Token, TokenAttributes, TokenType } from \"./token\";\n\nenum State {\n Initial,\n Vowel,\n Consonant,\n ConjunctConsonant,\n}\n\nexport const tokenize = (input: string): Token[] => {\n const tokens: Token[] = [];\n\n let pos = 0;\n let acc = \"\";\n let varnasLength: number = 0;\n let state = State.Initial;\n\n const resetVariables = () => {\n pos = 0;\n acc = \"\";\n varnasLength = 0;\n state = State.Initial;\n };\n\n const createToken = (tokenType: TokenType, attributes?: TokenAttributes) => {\n tokens.push(new Token(tokenType, acc, pos, attributes));\n resetVariables();\n };\n\n for (let i = 0, l = input.length; i < l; i += 1) {\n const char = new Char(input[i]);\n\n const nextChar = new Char(input[i + 1]);\n\n acc += char.value;\n\n switch (state) {\n case State.Initial: {\n pos = i;\n\n if (char.isSymbol()) {\n createToken(TokenType.Symbol);\n break;\n }\n\n if (char.isWhitespace()) {\n createToken(TokenType.Whitespace);\n break;\n }\n\n if (char.isUnrecognised()) {\n createToken(TokenType.Unrecognised);\n break;\n }\n\n if (char.isVowel()) {\n if (nextChar.isVowelAttachment()) {\n state = State.Vowel;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isConsonant()) {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n createToken(TokenType.Invalid);\n break;\n }\n\n case State.Vowel: {\n if (char.isAccent()) {\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isYogavaha()) {\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n break;\n }\n\n case State.Consonant: {\n if (char.isNukta()) {\n if (nextChar.isConsonantAttachment()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n if (char.isVirama()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isJoiner()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMarkAttachment()) {\n varnasLength += 1;\n\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMark()) {\n if (nextChar.isVowelMarkAttachment()) {\n break;\n }\n\n varnasLength += 1;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n }\n\n case State.ConjunctConsonant: {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n varnasLength += 2;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n default: {\n break;\n }\n }\n }\n\n return tokens;\n};\n","import { DEVA_CHAR_TYPES } from \"./deva-char-types\";\n\nenum CharType {\n Vowel = \"vowels\",\n Symbol = \"symbols\",\n VowelMark = \"vowel_marks\",\n Yogavaha = \"yogavaahas\",\n Accent = \"accents\",\n Virama = \"virama\",\n Nukta = \"nukta\",\n ZWNJ = \"zwnj\",\n ZWJ = \"zwj\",\n Consonant = \"consonants\",\n ExtraConsonant = \"extra_consonants\",\n Whitespace = \"whitespace\",\n Unrecognised = \"unrecognised\",\n EndOfInput = \"end_of_input\",\n}\n\nconst MATRAS = [\"१\", \"२\", \"३\"];\n\nexport class Char {\n value: string;\n type: CharType;\n\n constructor(value: string) {\n this.value = value;\n this.type = this.getCharType(value);\n }\n\n getCharType = (char: string): CharType => {\n if (char === undefined) return CharType.EndOfInput;\n\n if (/\\s/.test(char)) return CharType.Whitespace;\n\n return DEVA_CHAR_TYPES[char] ?? CharType.Unrecognised;\n };\n\n isVowel = () => this.type === CharType.Vowel;\n\n isConsonant = () =>\n [CharType.Consonant, CharType.ExtraConsonant].includes(this.type);\n\n isSymbol = () => this.type === CharType.Symbol;\n\n isVowelMark = () => this.type === CharType.VowelMark;\n\n isYogavaha = () => this.type === CharType.Yogavaha;\n\n isAccent = () => this.type === CharType.Accent;\n\n isVirama = () => this.type === CharType.Virama;\n\n isNukta = () => this.type === CharType.Nukta;\n\n isMatra = () => MATRAS.includes(this.value);\n\n isJoiner = () => [CharType.ZWNJ, CharType.ZWJ].includes(this.type);\n\n isWhitespace = () => this.type === CharType.Whitespace;\n\n isUnrecognised = () => this.type === CharType.Unrecognised;\n\n isEndOfInput = () => this.type === CharType.EndOfInput;\n\n isVowelAttachment = () => this.isYogavaha() || this.isAccent();\n\n isConsonantAttachment = () =>\n this.isVirama() ||\n this.isYogavaha() ||\n this.isAccent() ||\n this.isVowelMark() ||\n this.isMatra();\n\n isVowelMarkAttachment = () =>\n this.isYogavaha() || this.isAccent() || this.isMatra();\n\n // Only for debugging\n // toString = () => this.value;\n}\n","export const DEVA_CHAR_TYPES: Record = {\n अ: \"vowels\",\n आ: \"vowels\",\n इ: \"vowels\",\n ई: \"vowels\",\n उ: \"vowels\",\n ऊ: \"vowels\",\n ऋ: \"vowels\",\n ॠ: \"vowels\",\n ऌ: \"vowels\",\n ॡ: \"vowels\",\n ऎ: \"vowels\",\n ए: \"vowels\",\n ऐ: \"vowels\",\n ऒ: \"vowels\",\n ओ: \"vowels\",\n औ: \"vowels\",\n ऍ: \"vowels\",\n ऑ: \"vowels\",\n क: \"consonants\",\n ख: \"consonants\",\n ग: \"consonants\",\n घ: \"consonants\",\n ङ: \"consonants\",\n च: \"consonants\",\n छ: \"consonants\",\n ज: \"consonants\",\n झ: \"consonants\",\n ञ: \"consonants\",\n ट: \"consonants\",\n ठ: \"consonants\",\n ड: \"consonants\",\n ढ: \"consonants\",\n ण: \"consonants\",\n त: \"consonants\",\n थ: \"consonants\",\n द: \"consonants\",\n ध: \"consonants\",\n न: \"consonants\",\n प: \"consonants\",\n फ: \"consonants\",\n ब: \"consonants\",\n भ: \"consonants\",\n म: \"consonants\",\n य: \"consonants\",\n र: \"consonants\",\n ल: \"consonants\",\n व: \"consonants\",\n श: \"consonants\",\n ष: \"consonants\",\n स: \"consonants\",\n ह: \"consonants\",\n ळ: \"consonants\",\n क्ष: \"consonants\",\n ज्ञ: \"consonants\",\n क़: \"extra_consonants\",\n ख़: \"extra_consonants\",\n ग़: \"extra_consonants\",\n ज़: \"extra_consonants\",\n ड़: \"extra_consonants\",\n फ़: \"extra_consonants\",\n य़: \"extra_consonants\",\n ऱ: \"extra_consonants\",\n ऴ: \"extra_consonants\",\n ऩ: \"extra_consonants\",\n \"ा\": \"vowel_marks\",\n \"ि\": \"vowel_marks\",\n \"ी\": \"vowel_marks\",\n \"ु\": \"vowel_marks\",\n \"ू\": \"vowel_marks\",\n \"ृ\": \"vowel_marks\",\n \"ॄ\": \"vowel_marks\",\n \"ॢ\": \"vowel_marks\",\n \"ॣ\": \"vowel_marks\",\n \"ॆ\": \"vowel_marks\",\n \"े\": \"vowel_marks\",\n \"ै\": \"vowel_marks\",\n \"ॊ\": \"vowel_marks\",\n \"ो\": \"vowel_marks\",\n \"ौ\": \"vowel_marks\",\n \"ॅ\": \"vowel_marks\",\n \"ॉ\": \"vowel_marks\",\n \"ं\": \"yogavaahas\",\n \"ः\": \"yogavaahas\",\n \"ँ\": \"yogavaahas\",\n ᳵ: \"yogavaahas\",\n ᳶ: \"yogavaahas\",\n ꣳ: \"yogavaahas\",\n \"्\": \"virama\",\n \"़\": \"nukta\",\n \"‍\": \"zwj\",\n \"‌\": \"zwnj\",\n \"॑\": \"accents\",\n \"॒\": \"accents\",\n \"᳡\": \"accents\",\n \"꣡\": \"accents\",\n \"꣢\": \"accents\",\n \"꣣\": \"accents\",\n \"꣤\": \"accents\",\n \"꣥\": \"accents\",\n \"꣦\": \"accents\",\n \"꣧\": \"accents\",\n \"꣨\": \"accents\",\n \"꣩\": \"accents\",\n \"꣪\": \"accents\",\n \"꣫\": \"accents\",\n \"꣬\": \"accents\",\n \"꣭\": \"accents\",\n \"꣮\": \"accents\",\n \"꣯\": \"accents\",\n \"꣰\": \"accents\",\n \"꣱\": \"accents\",\n \"०\": \"symbols\",\n \"१\": \"symbols\",\n \"२\": \"symbols\",\n \"३\": \"symbols\",\n \"४\": \"symbols\",\n \"५\": \"symbols\",\n \"६\": \"symbols\",\n \"७\": \"symbols\",\n \"८\": \"symbols\",\n \"९\": \"symbols\",\n ॐ: \"symbols\",\n ऽ: \"symbols\",\n \"।\": \"symbols\",\n \"॥\": \"symbols\",\n};\n"],"names":[],"version":3,"file":"index.cjs.map"} \ No newline at end of file diff --git a/dist/index.d.ts b/dist/index.d.ts index 3acb670..327dcc1 100644 --- a/dist/index.d.ts +++ b/dist/index.d.ts @@ -14,9 +14,19 @@ declare class Token { readonly attributes?: TokenAttributes; constructor(type: TokenType, value: string, pos: number, attributes?: TokenAttributes); } -declare const _default: { - tokenize: (input: string) => Token[]; +type Results = { + all: Token[]; + aksharas: Token[]; + symbols: Token[]; + whitespaces: Token[]; + chars: string[]; + invalid: Token[]; + unrecognised: Token[]; + varnasLength: number; }; -export default _default; +declare const Aksharas: { + analyse: (input: string) => Results; +}; +export default Aksharas; //# sourceMappingURL=index.d.ts.map diff --git a/dist/index.d.ts.map b/dist/index.d.ts.map index 71ecd0e..49e7405 100644 --- a/dist/index.d.ts.map +++ b/dist/index.d.ts.map @@ -1 +1 @@ -{"mappings":"AEAA;IACE,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;IACzB,OAAO,YAAY;IACnB,YAAY,iBAAiB;CAC9B;AAED,uBAA8B,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAElD;IACE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,UAAU,CAAC,EAAE,eAAe,CAAC;gBAGpC,IAAI,EAAE,SAAS,EACf,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,MAAM,EACX,UAAU,CAAC,EAAE,eAAe;CAW/B;;;;ACoJD,wBAA4B","sources":["src/src/deva-char-types.ts","src/src/char.ts","src/src/token.ts","src/src/index.ts","src/index.ts"],"sourcesContent":[null,null,null,null,"import { Char } from \"./char\";\nimport { Token, TokenAttributes, TokenType } from \"./token\";\n\nenum State {\n Initial,\n Vowel,\n Consonant,\n ConjunctConsonant,\n}\n\nconst tokenize = (input: string): Token[] => {\n const tokens: Token[] = [];\n\n let pos = 0;\n let acc = \"\";\n let varnasLength: number = 0;\n let state = State.Initial;\n\n const resetVariables = () => {\n pos = 0;\n acc = \"\";\n varnasLength = 0;\n state = State.Initial;\n };\n\n const createToken = (tokenType: TokenType, attributes?: TokenAttributes) => {\n tokens.push(new Token(tokenType, acc, pos, attributes));\n resetVariables();\n };\n\n for (let i = 0, l = input.length; i < l; i += 1) {\n const char = new Char(input[i]);\n\n const nextChar = new Char(input[i + 1]);\n\n acc += char.value;\n\n switch (state) {\n case State.Initial: {\n pos = i;\n\n if (char.isSymbol()) {\n createToken(TokenType.Symbol);\n break;\n }\n\n if (char.isWhitespace()) {\n createToken(TokenType.Whitespace);\n break;\n }\n\n if (char.isUnrecognised()) {\n createToken(TokenType.Unrecognised);\n break;\n }\n\n if (char.isVowel()) {\n if (nextChar.isVowelAttachment()) {\n state = State.Vowel;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isConsonant()) {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n createToken(TokenType.Invalid);\n break;\n }\n\n case State.Vowel: {\n if (char.isAccent()) {\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isYogavaha()) {\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n break;\n }\n\n case State.Consonant: {\n if (char.isNukta()) {\n if (nextChar.isConsonantAttachment()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n if (char.isVirama()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isJoiner()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMarkAttachment()) {\n varnasLength += 1;\n\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMark()) {\n if (nextChar.isVowelMarkAttachment()) {\n break;\n }\n\n varnasLength += 1;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n }\n\n case State.ConjunctConsonant: {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n varnasLength += 2;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n default: {\n break;\n }\n }\n }\n\n return tokens;\n};\n\nexport default { tokenize };\n"],"names":[],"version":3,"file":"index.d.ts.map"} \ No newline at end of file +{"mappings":"AAAA;IACE,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;IACzB,OAAO,YAAY;IACnB,YAAY,iBAAiB;CAC9B;AAED,uBAA8B,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAElD;IACE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,UAAU,CAAC,EAAE,eAAe,CAAC;gBAGpC,IAAI,EAAE,SAAS,EACf,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,MAAM,EACX,UAAU,CAAC,EAAE,eAAe;CAW/B;AI7BD,eAAe;IACb,GAAG,EAAE,KAAK,EAAE,CAAC;IACb,QAAQ,EAAE,KAAK,EAAE,CAAC;IAElB,OAAO,EAAE,KAAK,EAAE,CAAC;IACjB,WAAW,EAAE,KAAK,EAAE,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,EAAE,KAAK,EAAE,CAAC;IACjB,YAAY,EAAE,KAAK,EAAE,CAAC;IACtB,YAAY,EAAE,MAAM,CAA0B;CAC/C,CAAC;AA4CF,QAAA,MAAM,QAAQ;qBAhCU,MAAM,KAAG,OAAO;CAkCvC,CAAC;AAEF,eAAe,QAAQ,CAAC","sources":["src/src/token.ts","src/src/deva-char-types.ts","src/src/char.ts","src/src/tokenize.ts","src/src/index.ts","src/index.ts"],"sourcesContent":[null,null,null,null,null,"import { Token, TokenType } from \"./token\";\nimport { tokenize } from \"./tokenize\";\n\ntype Results = {\n all: Token[];\n aksharas: Token[];\n // varnas: Token[] /* Yet to be implemented */;\n symbols: Token[];\n whitespaces: Token[];\n chars: string[];\n invalid: Token[];\n unrecognised: Token[];\n varnasLength: number /* Will be deprecated */;\n};\n\nconst calcTotalVarnasLength = (tokens: Token[]) =>\n tokens.reduce(\n (total, akshara) =>\n akshara.attributes ? total + akshara.attributes.varnasLength : total,\n 0\n );\n\nconst filterTokens = (tokens: Token[], tokenType: TokenType) =>\n tokens.filter((token) => token.type === tokenType);\n\nconst analyse = (input: string): Results => {\n const tokens = tokenize(input);\n\n const aksharas = filterTokens(tokens, TokenType.Akshara);\n\n // const varnas = [];\n\n const chars = input.split(\"\");\n\n const symbols = filterTokens(tokens, TokenType.Symbol);\n\n const invalidChars = filterTokens(tokens, TokenType.Invalid);\n\n const whitespaces = filterTokens(tokens, TokenType.Whitespace);\n\n const unrecognisedChars = filterTokens(tokens, TokenType.Unrecognised);\n\n const varnasLength = calcTotalVarnasLength(aksharas);\n\n return {\n all: tokens,\n aksharas: aksharas,\n // varnas: varnas,\n chars: chars,\n symbols: symbols,\n invalid: invalidChars,\n whitespaces: whitespaces,\n unrecognised: unrecognisedChars,\n varnasLength: varnasLength,\n };\n};\n\nconst Aksharas = {\n analyse,\n};\n\nexport default Aksharas;\n"],"names":[],"version":3,"file":"index.d.ts.map"} \ No newline at end of file diff --git a/dist/index.mjs b/dist/index.mjs index 3ce852f..edaa45e 100644 --- a/dist/index.mjs +++ b/dist/index.mjs @@ -1,3 +1,22 @@ +let $66d137fe0087513e$export$f435f793048e7a0f; +(function(TokenType) { + TokenType["Akshara"] = "akshara"; + TokenType["Symbol"] = "symbol"; + TokenType["Whitespace"] = "whitespace"; + TokenType["Invalid"] = "invalid"; + TokenType["Unrecognised"] = "unrecognised"; +})($66d137fe0087513e$export$f435f793048e7a0f || ($66d137fe0087513e$export$f435f793048e7a0f = {})); +class $66d137fe0087513e$export$50792b0e93539fde { + constructor(type, value, pos, attributes){ + this.type = type; + this.value = value; + this.from = pos; + this.to = pos + (value.length - 1); + this.attributes = attributes; + } +} + + const $dbcc9feb5155574f$export$7963e2ffafd9d670 = { अ: "vowels", आ: "vowels", @@ -184,43 +203,25 @@ class $21fe2fa54792efd1$export$3ae2e3e9a9c21123 { } -let $66d137fe0087513e$export$f435f793048e7a0f; -(function(TokenType) { - TokenType["Akshara"] = "akshara"; - TokenType["Symbol"] = "symbol"; - TokenType["Whitespace"] = "whitespace"; - TokenType["Invalid"] = "invalid"; - TokenType["Unrecognised"] = "unrecognised"; -})($66d137fe0087513e$export$f435f793048e7a0f || ($66d137fe0087513e$export$f435f793048e7a0f = {})); -class $66d137fe0087513e$export$50792b0e93539fde { - constructor(type, value, pos, attributes){ - this.type = type; - this.value = value; - this.from = pos; - this.to = pos + (value.length - 1); - this.attributes = attributes; - } -} - -let $149c1bd638913645$var$State; +let $b4bfbd25bd11531b$var$State; (function(State) { State[State["Initial"] = 0] = "Initial"; State[State["Vowel"] = 1] = "Vowel"; State[State["Consonant"] = 2] = "Consonant"; State[State["ConjunctConsonant"] = 3] = "ConjunctConsonant"; -})($149c1bd638913645$var$State || ($149c1bd638913645$var$State = {})); -const $149c1bd638913645$var$tokenize = (input)=>{ +})($b4bfbd25bd11531b$var$State || ($b4bfbd25bd11531b$var$State = {})); +const $b4bfbd25bd11531b$export$660b2ee2d4fb4eff = (input)=>{ const tokens = []; let pos = 0; let acc = ""; let varnasLength = 0; - let state = $149c1bd638913645$var$State.Initial; + let state = $b4bfbd25bd11531b$var$State.Initial; const resetVariables = ()=>{ pos = 0; acc = ""; varnasLength = 0; - state = $149c1bd638913645$var$State.Initial; + state = $b4bfbd25bd11531b$var$State.Initial; }; const createToken = (tokenType, attributes)=>{ tokens.push(new (0, $66d137fe0087513e$export$50792b0e93539fde)(tokenType, acc, pos, attributes)); @@ -231,7 +232,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ const nextChar = new (0, $21fe2fa54792efd1$export$3ae2e3e9a9c21123)(input[i + 1]); acc += char.value; switch(state){ - case $149c1bd638913645$var$State.Initial: + case $b4bfbd25bd11531b$var$State.Initial: pos = i; if (char.isSymbol()) { createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Symbol); @@ -247,7 +248,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ } if (char.isVowel()) { if (nextChar.isVowelAttachment()) { - state = $149c1bd638913645$var$State.Vowel; + state = $b4bfbd25bd11531b$var$State.Vowel; break; } createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara, { @@ -258,7 +259,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ if (char.isConsonant()) { if (nextChar.isNukta() || nextChar.isConsonantAttachment()) { varnasLength += 1; - state = $149c1bd638913645$var$State.Consonant; + state = $b4bfbd25bd11531b$var$State.Consonant; break; } createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara, { @@ -268,7 +269,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ } createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Invalid); break; - case $149c1bd638913645$var$State.Vowel: + case $b4bfbd25bd11531b$var$State.Vowel: if (char.isAccent()) { createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara, { varnasLength: 1 @@ -283,7 +284,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ break; } break; - case $149c1bd638913645$var$State.Consonant: + case $b4bfbd25bd11531b$var$State.Consonant: if (char.isNukta()) { if (nextChar.isConsonantAttachment()) break; createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara, { @@ -294,7 +295,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ if (char.isVirama()) { if (nextChar.isJoiner()) break; if (nextChar.isConsonant()) { - state = $149c1bd638913645$var$State.ConjunctConsonant; + state = $b4bfbd25bd11531b$var$State.ConjunctConsonant; break; } createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara, { @@ -305,7 +306,7 @@ const $149c1bd638913645$var$tokenize = (input)=>{ if (char.isJoiner()) { if (nextChar.isJoiner()) break; if (nextChar.isConsonant()) { - state = $149c1bd638913645$var$State.ConjunctConsonant; + state = $b4bfbd25bd11531b$var$State.ConjunctConsonant; break; } createToken((0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara, { @@ -329,10 +330,10 @@ const $149c1bd638913645$var$tokenize = (input)=>{ }); break; } - case $149c1bd638913645$var$State.ConjunctConsonant: + case $b4bfbd25bd11531b$var$State.ConjunctConsonant: if (nextChar.isNukta() || nextChar.isConsonantAttachment()) { varnasLength += 1; - state = $149c1bd638913645$var$State.Consonant; + state = $b4bfbd25bd11531b$var$State.Consonant; break; } varnasLength += 2; @@ -346,9 +347,36 @@ const $149c1bd638913645$var$tokenize = (input)=>{ } return tokens; }; -var $149c1bd638913645$export$2e2bcd8739ae039 = { - tokenize: $149c1bd638913645$var$tokenize + + +const $149c1bd638913645$var$calcTotalVarnasLength = (tokens)=>tokens.reduce((total, akshara)=>akshara.attributes ? total + akshara.attributes.varnasLength : total, 0); +const $149c1bd638913645$var$filterTokens = (tokens, tokenType)=>tokens.filter((token)=>token.type === tokenType); +const $149c1bd638913645$var$analyse = (input)=>{ + const tokens = (0, $b4bfbd25bd11531b$export$660b2ee2d4fb4eff)(input); + const aksharas = $149c1bd638913645$var$filterTokens(tokens, (0, $66d137fe0087513e$export$f435f793048e7a0f).Akshara); + // const varnas = []; + const chars = input.split(""); + const symbols = $149c1bd638913645$var$filterTokens(tokens, (0, $66d137fe0087513e$export$f435f793048e7a0f).Symbol); + const invalidChars = $149c1bd638913645$var$filterTokens(tokens, (0, $66d137fe0087513e$export$f435f793048e7a0f).Invalid); + const whitespaces = $149c1bd638913645$var$filterTokens(tokens, (0, $66d137fe0087513e$export$f435f793048e7a0f).Whitespace); + const unrecognisedChars = $149c1bd638913645$var$filterTokens(tokens, (0, $66d137fe0087513e$export$f435f793048e7a0f).Unrecognised); + const varnasLength = $149c1bd638913645$var$calcTotalVarnasLength(aksharas); + return { + all: tokens, + aksharas: aksharas, + // varnas: varnas, + chars: chars, + symbols: symbols, + invalid: invalidChars, + whitespaces: whitespaces, + unrecognised: unrecognisedChars, + varnasLength: varnasLength + }; +}; +const $149c1bd638913645$var$Aksharas = { + analyse: $149c1bd638913645$var$analyse }; +var $149c1bd638913645$export$2e2bcd8739ae039 = $149c1bd638913645$var$Aksharas; export {$149c1bd638913645$export$2e2bcd8739ae039 as default}; diff --git a/dist/index.mjs.map b/dist/index.mjs.map index 252d99d..9b67bd1 100644 --- a/dist/index.mjs.map +++ b/dist/index.mjs.map @@ -1 +1 @@ -{"mappings":"AEAO,MAAM,yCAAe,GAAwB;IAClD,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,OAAO;IACZ,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,CAAC,EAAE,SAAS;IACZ,CAAC,EAAE,SAAS;IACZ,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;CACf,AAAC;;AD9HF;IAEA,8BAeC;UAfI,QAAQ;IAAR,QAAQ,CACX,OAAK,IAAG,QAAQ;IADb,QAAQ,CAEX,QAAM,IAAG,SAAS;IAFf,QAAQ,CAGX,WAAS,IAAG,aAAa;IAHtB,QAAQ,CAIX,UAAQ,IAAG,YAAY;IAJpB,QAAQ,CAKX,QAAM,IAAG,SAAS;IALf,QAAQ,CAMX,QAAM,IAAG,QAAQ;IANd,QAAQ,CAOX,OAAK,IAAG,OAAO;IAPZ,QAAQ,CAQX,MAAI,IAAG,MAAM;IARV,QAAQ,CASX,KAAG,IAAG,KAAK;IATR,QAAQ,CAUX,WAAS,IAAG,YAAY;IAVrB,QAAQ,CAWX,gBAAc,IAAG,kBAAkB;IAXhC,QAAQ,CAYX,YAAU,IAAG,YAAY;IAZtB,QAAQ,CAaX,cAAY,IAAG,cAAc;IAb1B,QAAQ,CAcX,YAAU,IAAG,cAAc;GAdxB,8BAAQ,KAAR,8BAAQ;AAiBb,MAAM,4BAAM,GAAG;IAAC,GAAG;IAAE,GAAG;IAAE,GAAG;CAAC,AAAC;AAExB,MAAM,yCAAI;IAIf,YAAY,KAAa,CAAE;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;KACrC;IAED,WAAW,GAAG,CAAC,IAAY,GAAe;QACxC,IAAI,IAAI,KAAK,SAAS,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEnD,IAAI,KAAK,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEhD,OAAO,CAAA,GAAA,yCAAe,CAAA,CAAC,IAAI,CAAC,IAAI,8BAAQ,CAAC,YAAY,CAAC;KACvD,CAAC;IAEF,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,WAAW,GAAG,IACZ;YAAC,8BAAQ,CAAC,SAAS;YAAE,8BAAQ,CAAC,cAAc;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEpE,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,WAAW,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,SAAS,CAAC;IAErD,UAAU,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,QAAQ,CAAC;IAEnD,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,OAAO,GAAG,IAAM,4BAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE5C,QAAQ,GAAG,IAAM;YAAC,8BAAQ,CAAC,IAAI;YAAE,8BAAQ,CAAC,GAAG;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnE,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,cAAc,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,YAAY,CAAC;IAE3D,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,iBAAiB,GAAG,IAAM,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAE/D,qBAAqB,GAAG,IACtB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,UAAU,EAAE,IACjB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,WAAW,EAAE,IAClB,IAAI,CAAC,OAAO,EAAE,CAAC;IAEjB,qBAAqB,GAAG,IACtB,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;CAI1D;;AD/ED;AGAA,IAAO,yCAMN;UANW,SAAS;IAAT,SAAS,CACnB,SAAO,IAAG,SAAS;IADT,SAAS,CAEnB,QAAM,IAAG,QAAQ;IAFP,SAAS,CAGnB,YAAU,IAAG,YAAY;IAHf,SAAS,CAInB,SAAO,IAAG,SAAS;IAJT,SAAS,CAKnB,cAAY,IAAG,cAAc;GALnB,yCAAS,KAAT,yCAAS;AAUd,MAAM,yCAAK;IAOhB,YACE,IAAe,EACf,KAAa,EACb,GAAW,EACX,UAA4B,CAC5B;QACA,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,EAAE,GAAG,GAAG,GAAI,CAAA,KAAK,CAAC,MAAM,GAAG,CAAC,CAAA,AAAC,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;KAC9B;CAIF;;;IH7BD,2BAKC;UALI,KAAK;IAAL,KAAK,CAAL,KAAK,CACR,SAAO,IAAP,CAAO,IAAP,SAAO;IADJ,KAAK,CAAL,KAAK,CAER,OAAK,IAAL,CAAK,IAAL,OAAK;IAFF,KAAK,CAAL,KAAK,CAGR,WAAS,IAAT,CAAS,IAAT,WAAS;IAHN,KAAK,CAAL,KAAK,CAIR,mBAAiB,IAAjB,CAAiB,IAAjB,mBAAiB;GAJd,2BAAK,KAAL,2BAAK;AAOV,MAAM,8BAAQ,GAAG,CAAC,KAAa,GAAc;IAC3C,MAAM,MAAM,GAAY,EAAE,AAAC;IAE3B,IAAI,GAAG,GAAG,CAAC,AAAC;IACZ,IAAI,GAAG,GAAG,EAAE,AAAC;IACb,IAAI,YAAY,GAAW,CAAC,AAAC;IAC7B,IAAI,KAAK,GAAG,2BAAK,CAAC,OAAO,AAAC;IAE1B,MAAM,cAAc,GAAG,IAAM;QAC3B,GAAG,GAAG,CAAC,CAAC;QACR,GAAG,GAAG,EAAE,CAAC;QACT,YAAY,GAAG,CAAC,CAAC;QACjB,KAAK,GAAG,2BAAK,CAAC,OAAO,CAAC;KACvB,AAAC;IAEF,MAAM,WAAW,GAAG,CAAC,SAAoB,EAAE,UAA4B,GAAK;QAC1E,MAAM,CAAC,IAAI,CAAC,IAAI,CAAA,GAAA,yCAAK,CAAA,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;QACxD,cAAc,EAAE,CAAC;KAClB,AAAC;IAEF,IAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAE;QAC/C,MAAM,IAAI,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,AAAC;QAEhC,MAAM,QAAQ,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,AAAC;QAExC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC;QAElB,OAAQ,KAAK;YACX,KAAK,2BAAK,CAAC,OAAO;gBAChB,GAAG,GAAG,CAAC,CAAC;gBAER,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,MAAM,CAAC,CAAC;oBAC9B,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,YAAY,EAAE,EAAE;oBACvB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,UAAU,CAAC,CAAC;oBAClC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE;oBACzB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,YAAY,CAAC,CAAC;oBACpC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,iBAAiB,EAAE,EAAE;wBAChC,KAAK,GAAG,2BAAK,CAAC,KAAK,CAAC;wBACpB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;wBAC1D,YAAY,IAAI,CAAC,CAAC;wBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;wBACxB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM;YAGR,KAAK,2BAAK,CAAC,KAAK;gBACd,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,UAAU,EAAE,EAAE;oBACrB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,MAAM;YAGR,KAAK,2BAAK,CAAC,SAAS;gBAClB,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,qBAAqB,EAAE,EAAE;oBAChC,YAAY,IAAI,CAAC,CAAC;oBAElB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,YAAY,IAAI,CAAC,CAAC;oBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;YAGH,KAAK,2BAAK,CAAC,iBAAiB;gBAC1B,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;oBAC1D,YAAY,IAAI,CAAC,CAAC;oBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;oBACxB,MAAM;iBACP;gBAED,YAAY,IAAI,CAAC,CAAC;gBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;kCAAE,YAAY;iBAAE,CAAC,CAAC;gBACjD,MAAM;YAGR;gBACE,MAAM;SAET;KACF;IAED,OAAO,MAAM,CAAC;CACf,AAAC;IAEF,wCAA4B,GAAb;cAAE,8BAAQ;CAAE","sources":["src/index.ts","src/char.ts","src/deva-char-types.ts","src/token.ts"],"sourcesContent":["import { Char } from \"./char\";\nimport { Token, TokenAttributes, TokenType } from \"./token\";\n\nenum State {\n Initial,\n Vowel,\n Consonant,\n ConjunctConsonant,\n}\n\nconst tokenize = (input: string): Token[] => {\n const tokens: Token[] = [];\n\n let pos = 0;\n let acc = \"\";\n let varnasLength: number = 0;\n let state = State.Initial;\n\n const resetVariables = () => {\n pos = 0;\n acc = \"\";\n varnasLength = 0;\n state = State.Initial;\n };\n\n const createToken = (tokenType: TokenType, attributes?: TokenAttributes) => {\n tokens.push(new Token(tokenType, acc, pos, attributes));\n resetVariables();\n };\n\n for (let i = 0, l = input.length; i < l; i += 1) {\n const char = new Char(input[i]);\n\n const nextChar = new Char(input[i + 1]);\n\n acc += char.value;\n\n switch (state) {\n case State.Initial: {\n pos = i;\n\n if (char.isSymbol()) {\n createToken(TokenType.Symbol);\n break;\n }\n\n if (char.isWhitespace()) {\n createToken(TokenType.Whitespace);\n break;\n }\n\n if (char.isUnrecognised()) {\n createToken(TokenType.Unrecognised);\n break;\n }\n\n if (char.isVowel()) {\n if (nextChar.isVowelAttachment()) {\n state = State.Vowel;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isConsonant()) {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n createToken(TokenType.Invalid);\n break;\n }\n\n case State.Vowel: {\n if (char.isAccent()) {\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isYogavaha()) {\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n break;\n }\n\n case State.Consonant: {\n if (char.isNukta()) {\n if (nextChar.isConsonantAttachment()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n if (char.isVirama()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isJoiner()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMarkAttachment()) {\n varnasLength += 1;\n\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMark()) {\n if (nextChar.isVowelMarkAttachment()) {\n break;\n }\n\n varnasLength += 1;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n }\n\n case State.ConjunctConsonant: {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n varnasLength += 2;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n default: {\n break;\n }\n }\n }\n\n return tokens;\n};\n\nexport default { tokenize };\n","import { DEVA_CHAR_TYPES } from \"./deva-char-types\";\n\nenum CharType {\n Vowel = \"vowels\",\n Symbol = \"symbols\",\n VowelMark = \"vowel_marks\",\n Yogavaha = \"yogavaahas\",\n Accent = \"accents\",\n Virama = \"virama\",\n Nukta = \"nukta\",\n ZWNJ = \"zwnj\",\n ZWJ = \"zwj\",\n Consonant = \"consonants\",\n ExtraConsonant = \"extra_consonants\",\n Whitespace = \"whitespace\",\n Unrecognised = \"unrecognised\",\n EndOfInput = \"end_of_input\",\n}\n\nconst MATRAS = [\"१\", \"२\", \"३\"];\n\nexport class Char {\n value: string;\n type: CharType;\n\n constructor(value: string) {\n this.value = value;\n this.type = this.getCharType(value);\n }\n\n getCharType = (char: string): CharType => {\n if (char === undefined) return CharType.EndOfInput;\n\n if (/\\s/.test(char)) return CharType.Whitespace;\n\n return DEVA_CHAR_TYPES[char] ?? CharType.Unrecognised;\n };\n\n isVowel = () => this.type === CharType.Vowel;\n\n isConsonant = () =>\n [CharType.Consonant, CharType.ExtraConsonant].includes(this.type);\n\n isSymbol = () => this.type === CharType.Symbol;\n\n isVowelMark = () => this.type === CharType.VowelMark;\n\n isYogavaha = () => this.type === CharType.Yogavaha;\n\n isAccent = () => this.type === CharType.Accent;\n\n isVirama = () => this.type === CharType.Virama;\n\n isNukta = () => this.type === CharType.Nukta;\n\n isMatra = () => MATRAS.includes(this.value);\n\n isJoiner = () => [CharType.ZWNJ, CharType.ZWJ].includes(this.type);\n\n isWhitespace = () => this.type === CharType.Whitespace;\n\n isUnrecognised = () => this.type === CharType.Unrecognised;\n\n isEndOfInput = () => this.type === CharType.EndOfInput;\n\n isVowelAttachment = () => this.isYogavaha() || this.isAccent();\n\n isConsonantAttachment = () =>\n this.isVirama() ||\n this.isYogavaha() ||\n this.isAccent() ||\n this.isVowelMark() ||\n this.isMatra();\n\n isVowelMarkAttachment = () =>\n this.isYogavaha() || this.isAccent() || this.isMatra();\n\n // Only for debugging\n // toString = () => this.value;\n}\n","export const DEVA_CHAR_TYPES: Record = {\n अ: \"vowels\",\n आ: \"vowels\",\n इ: \"vowels\",\n ई: \"vowels\",\n उ: \"vowels\",\n ऊ: \"vowels\",\n ऋ: \"vowels\",\n ॠ: \"vowels\",\n ऌ: \"vowels\",\n ॡ: \"vowels\",\n ऎ: \"vowels\",\n ए: \"vowels\",\n ऐ: \"vowels\",\n ऒ: \"vowels\",\n ओ: \"vowels\",\n औ: \"vowels\",\n ऍ: \"vowels\",\n ऑ: \"vowels\",\n क: \"consonants\",\n ख: \"consonants\",\n ग: \"consonants\",\n घ: \"consonants\",\n ङ: \"consonants\",\n च: \"consonants\",\n छ: \"consonants\",\n ज: \"consonants\",\n झ: \"consonants\",\n ञ: \"consonants\",\n ट: \"consonants\",\n ठ: \"consonants\",\n ड: \"consonants\",\n ढ: \"consonants\",\n ण: \"consonants\",\n त: \"consonants\",\n थ: \"consonants\",\n द: \"consonants\",\n ध: \"consonants\",\n न: \"consonants\",\n प: \"consonants\",\n फ: \"consonants\",\n ब: \"consonants\",\n भ: \"consonants\",\n म: \"consonants\",\n य: \"consonants\",\n र: \"consonants\",\n ल: \"consonants\",\n व: \"consonants\",\n श: \"consonants\",\n ष: \"consonants\",\n स: \"consonants\",\n ह: \"consonants\",\n ळ: \"consonants\",\n क्ष: \"consonants\",\n ज्ञ: \"consonants\",\n क़: \"extra_consonants\",\n ख़: \"extra_consonants\",\n ग़: \"extra_consonants\",\n ज़: \"extra_consonants\",\n ड़: \"extra_consonants\",\n फ़: \"extra_consonants\",\n य़: \"extra_consonants\",\n ऱ: \"extra_consonants\",\n ऴ: \"extra_consonants\",\n ऩ: \"extra_consonants\",\n \"ा\": \"vowel_marks\",\n \"ि\": \"vowel_marks\",\n \"ी\": \"vowel_marks\",\n \"ु\": \"vowel_marks\",\n \"ू\": \"vowel_marks\",\n \"ृ\": \"vowel_marks\",\n \"ॄ\": \"vowel_marks\",\n \"ॢ\": \"vowel_marks\",\n \"ॣ\": \"vowel_marks\",\n \"ॆ\": \"vowel_marks\",\n \"े\": \"vowel_marks\",\n \"ै\": \"vowel_marks\",\n \"ॊ\": \"vowel_marks\",\n \"ो\": \"vowel_marks\",\n \"ौ\": \"vowel_marks\",\n \"ॅ\": \"vowel_marks\",\n \"ॉ\": \"vowel_marks\",\n \"ं\": \"yogavaahas\",\n \"ः\": \"yogavaahas\",\n \"ँ\": \"yogavaahas\",\n ᳵ: \"yogavaahas\",\n ᳶ: \"yogavaahas\",\n ꣳ: \"yogavaahas\",\n \"्\": \"virama\",\n \"़\": \"nukta\",\n \"‍\": \"zwj\",\n \"‌\": \"zwnj\",\n \"॑\": \"accents\",\n \"॒\": \"accents\",\n \"᳡\": \"accents\",\n \"꣡\": \"accents\",\n \"꣢\": \"accents\",\n \"꣣\": \"accents\",\n \"꣤\": \"accents\",\n \"꣥\": \"accents\",\n \"꣦\": \"accents\",\n \"꣧\": \"accents\",\n \"꣨\": \"accents\",\n \"꣩\": \"accents\",\n \"꣪\": \"accents\",\n \"꣫\": \"accents\",\n \"꣬\": \"accents\",\n \"꣭\": \"accents\",\n \"꣮\": \"accents\",\n \"꣯\": \"accents\",\n \"꣰\": \"accents\",\n \"꣱\": \"accents\",\n \"०\": \"symbols\",\n \"१\": \"symbols\",\n \"२\": \"symbols\",\n \"३\": \"symbols\",\n \"४\": \"symbols\",\n \"५\": \"symbols\",\n \"६\": \"symbols\",\n \"७\": \"symbols\",\n \"८\": \"symbols\",\n \"९\": \"symbols\",\n ॐ: \"symbols\",\n ऽ: \"symbols\",\n \"।\": \"symbols\",\n \"॥\": \"symbols\",\n};\n","export enum TokenType {\n Akshara = \"akshara\",\n Symbol = \"symbol\",\n Whitespace = \"whitespace\",\n Invalid = \"invalid\",\n Unrecognised = \"unrecognised\",\n}\n\nexport type TokenAttributes = Record;\n\nexport class Token {\n readonly type: TokenType;\n readonly value: string;\n readonly from: number;\n readonly to: number;\n readonly attributes?: TokenAttributes;\n\n constructor(\n type: TokenType,\n value: string,\n pos: number,\n attributes?: TokenAttributes\n ) {\n this.type = type;\n this.value = value;\n this.from = pos;\n this.to = pos + (value.length - 1);\n this.attributes = attributes;\n }\n\n // Only for debugging\n // toString = () => this.value;\n}\n"],"names":[],"version":3,"file":"index.mjs.map"} \ No newline at end of file +{"mappings":"ACAA,IAAO,yCAMN;UANW,SAAS;IAAT,SAAS,CACnB,SAAO,IAAG,SAAS;IADT,SAAS,CAEnB,QAAM,IAAG,QAAQ;IAFP,SAAS,CAGnB,YAAU,IAAG,YAAY;IAHf,SAAS,CAInB,SAAO,IAAG,SAAS;IAJT,SAAS,CAKnB,cAAY,IAAG,cAAc;GALnB,yCAAS,KAAT,yCAAS;AAUd,MAAM,yCAAK;IAOhB,YACE,IAAe,EACf,KAAa,EACb,GAAW,EACX,UAA4B,CAC5B;QACA,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,EAAE,GAAG,GAAG,GAAI,CAAA,KAAK,CAAC,MAAM,GAAG,CAAC,CAAA,AAAC,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;KAC9B;CAIF;;ADhCD;AIAO,MAAM,yCAAe,GAAwB;IAClD,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,CAAC,EAAE,kBAAkB;IACrB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,aAAa;IAClB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,GAAG,EAAE,YAAY;IACjB,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,OAAO;IACZ,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;IACd,CAAC,EAAE,SAAS;IACZ,CAAC,EAAE,SAAS;IACZ,GAAG,EAAE,SAAS;IACd,GAAG,EAAE,SAAS;CACf,AAAC;;AD9HF;IAEA,8BAeC;UAfI,QAAQ;IAAR,QAAQ,CACX,OAAK,IAAG,QAAQ;IADb,QAAQ,CAEX,QAAM,IAAG,SAAS;IAFf,QAAQ,CAGX,WAAS,IAAG,aAAa;IAHtB,QAAQ,CAIX,UAAQ,IAAG,YAAY;IAJpB,QAAQ,CAKX,QAAM,IAAG,SAAS;IALf,QAAQ,CAMX,QAAM,IAAG,QAAQ;IANd,QAAQ,CAOX,OAAK,IAAG,OAAO;IAPZ,QAAQ,CAQX,MAAI,IAAG,MAAM;IARV,QAAQ,CASX,KAAG,IAAG,KAAK;IATR,QAAQ,CAUX,WAAS,IAAG,YAAY;IAVrB,QAAQ,CAWX,gBAAc,IAAG,kBAAkB;IAXhC,QAAQ,CAYX,YAAU,IAAG,YAAY;IAZtB,QAAQ,CAaX,cAAY,IAAG,cAAc;IAb1B,QAAQ,CAcX,YAAU,IAAG,cAAc;GAdxB,8BAAQ,KAAR,8BAAQ;AAiBb,MAAM,4BAAM,GAAG;IAAC,GAAG;IAAE,GAAG;IAAE,GAAG;CAAC,AAAC;AAExB,MAAM,yCAAI;IAIf,YAAY,KAAa,CAAE;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;KACrC;IAED,WAAW,GAAG,CAAC,IAAY,GAAe;QACxC,IAAI,IAAI,KAAK,SAAS,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEnD,IAAI,KAAK,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,8BAAQ,CAAC,UAAU,CAAC;QAEhD,OAAO,CAAA,GAAA,yCAAe,CAAA,CAAC,IAAI,CAAC,IAAI,8BAAQ,CAAC,YAAY,CAAC;KACvD,CAAC;IAEF,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,WAAW,GAAG,IACZ;YAAC,8BAAQ,CAAC,SAAS;YAAE,8BAAQ,CAAC,cAAc;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEpE,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,WAAW,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,SAAS,CAAC;IAErD,UAAU,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,QAAQ,CAAC;IAEnD,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,QAAQ,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,MAAM,CAAC;IAE/C,OAAO,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,KAAK,CAAC;IAE7C,OAAO,GAAG,IAAM,4BAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE5C,QAAQ,GAAG,IAAM;YAAC,8BAAQ,CAAC,IAAI;YAAE,8BAAQ,CAAC,GAAG;SAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnE,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,cAAc,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,YAAY,CAAC;IAE3D,YAAY,GAAG,IAAM,IAAI,CAAC,IAAI,KAAK,8BAAQ,CAAC,UAAU,CAAC;IAEvD,iBAAiB,GAAG,IAAM,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAE/D,qBAAqB,GAAG,IACtB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,UAAU,EAAE,IACjB,IAAI,CAAC,QAAQ,EAAE,IACf,IAAI,CAAC,WAAW,EAAE,IAClB,IAAI,CAAC,OAAO,EAAE,CAAC;IAEjB,qBAAqB,GAAG,IACtB,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;CAI1D;;AD/ED;;IAGA,2BAKC;UALI,KAAK;IAAL,KAAK,CAAL,KAAK,CACR,SAAO,IAAP,CAAO,IAAP,SAAO;IADJ,KAAK,CAAL,KAAK,CAER,OAAK,IAAL,CAAK,IAAL,OAAK;IAFF,KAAK,CAAL,KAAK,CAGR,WAAS,IAAT,CAAS,IAAT,WAAS;IAHN,KAAK,CAAL,KAAK,CAIR,mBAAiB,IAAjB,CAAiB,IAAjB,mBAAiB;GAJd,2BAAK,KAAL,2BAAK;AAOH,MAAM,yCAAQ,GAAG,CAAC,KAAa,GAAc;IAClD,MAAM,MAAM,GAAY,EAAE,AAAC;IAE3B,IAAI,GAAG,GAAG,CAAC,AAAC;IACZ,IAAI,GAAG,GAAG,EAAE,AAAC;IACb,IAAI,YAAY,GAAW,CAAC,AAAC;IAC7B,IAAI,KAAK,GAAG,2BAAK,CAAC,OAAO,AAAC;IAE1B,MAAM,cAAc,GAAG,IAAM;QAC3B,GAAG,GAAG,CAAC,CAAC;QACR,GAAG,GAAG,EAAE,CAAC;QACT,YAAY,GAAG,CAAC,CAAC;QACjB,KAAK,GAAG,2BAAK,CAAC,OAAO,CAAC;KACvB,AAAC;IAEF,MAAM,WAAW,GAAG,CAAC,SAAoB,EAAE,UAA4B,GAAK;QAC1E,MAAM,CAAC,IAAI,CAAC,IAAI,CAAA,GAAA,yCAAK,CAAA,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;QACxD,cAAc,EAAE,CAAC;KAClB,AAAC;IAEF,IAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAE;QAC/C,MAAM,IAAI,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,AAAC;QAEhC,MAAM,QAAQ,GAAG,IAAI,CAAA,GAAA,yCAAI,CAAA,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,AAAC;QAExC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC;QAElB,OAAQ,KAAK;YACX,KAAK,2BAAK,CAAC,OAAO;gBAChB,GAAG,GAAG,CAAC,CAAC;gBAER,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,MAAM,CAAC,CAAC;oBAC9B,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,YAAY,EAAE,EAAE;oBACvB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,UAAU,CAAC,CAAC;oBAClC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE;oBACzB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,YAAY,CAAC,CAAC;oBACpC,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,iBAAiB,EAAE,EAAE;wBAChC,KAAK,GAAG,2BAAK,CAAC,KAAK,CAAC;wBACpB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;wBAC1D,YAAY,IAAI,CAAC,CAAC;wBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;wBACxB,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM;YAGR,KAAK,2BAAK,CAAC,KAAK;gBACd,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,UAAU,EAAE,EAAE;oBACrB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,MAAM;YAGR,KAAK,2BAAK,CAAC,SAAS;gBAClB,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;oBAClB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;wBAAE,YAAY,EAAE,CAAC;qBAAE,CAAC,CAAC;oBACpD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,QAAQ,EAAE,EAAE;oBACnB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE;wBAC1B,KAAK,GAAG,2BAAK,CAAC,iBAAiB,CAAC;wBAChC,MAAM;qBACP;oBAED,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,qBAAqB,EAAE,EAAE;oBAChC,YAAY,IAAI,CAAC,CAAC;oBAElB,IAAI,QAAQ,CAAC,QAAQ,EAAE,EACrB,MAAM;oBAGR,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;gBAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE;oBACtB,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAClC,MAAM;oBAGR,YAAY,IAAI,CAAC,CAAC;oBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;sCAAE,YAAY;qBAAE,CAAC,CAAC;oBACjD,MAAM;iBACP;YAGH,KAAK,2BAAK,CAAC,iBAAiB;gBAC1B,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,qBAAqB,EAAE,EAAE;oBAC1D,YAAY,IAAI,CAAC,CAAC;oBAClB,KAAK,GAAG,2BAAK,CAAC,SAAS,CAAC;oBACxB,MAAM;iBACP;gBAED,YAAY,IAAI,CAAC,CAAC;gBAClB,WAAW,CAAC,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,EAAE;kCAAE,YAAY;iBAAE,CAAC,CAAC;gBACjD,MAAM;YAGR;gBACE,MAAM;SAET;KACF;IAED,OAAO,MAAM,CAAC;CACf,AAAC;;;AFnKF,MAAM,2CAAqB,GAAG,CAAC,MAAe,GAC5C,MAAM,CAAC,MAAM,CACX,CAAC,KAAK,EAAE,OAAO,GACb,OAAO,CAAC,UAAU,GAAG,KAAK,GAAG,OAAO,CAAC,UAAU,CAAC,YAAY,GAAG,KAAK,EACtE,CAAC,CACF,AAAC;AAEJ,MAAM,kCAAY,GAAG,CAAC,MAAe,EAAE,SAAoB,GACzD,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,GAAK,KAAK,CAAC,IAAI,KAAK,SAAS,CAAC,AAAC;AAErD,MAAM,6BAAO,GAAG,CAAC,KAAa,GAAc;IAC1C,MAAM,MAAM,GAAG,CAAA,GAAA,yCAAQ,CAAA,CAAC,KAAK,CAAC,AAAC;IAE/B,MAAM,QAAQ,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,AAAC;IAEzD,uBAAuB;IAEvB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,AAAC;IAE9B,MAAM,OAAO,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,MAAM,CAAC,AAAC;IAEvD,MAAM,YAAY,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,OAAO,CAAC,AAAC;IAE7D,MAAM,WAAW,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,UAAU,CAAC,AAAC;IAE/D,MAAM,iBAAiB,GAAG,kCAAY,CAAC,MAAM,EAAE,CAAA,GAAA,yCAAS,CAAA,CAAC,YAAY,CAAC,AAAC;IAEvE,MAAM,YAAY,GAAG,2CAAqB,CAAC,QAAQ,CAAC,AAAC;IAErD,OAAO;QACL,GAAG,EAAE,MAAM;QACX,QAAQ,EAAE,QAAQ;QAClB,kBAAkB;QAClB,KAAK,EAAE,KAAK;QACZ,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,YAAY;QACrB,WAAW,EAAE,WAAW;QACxB,YAAY,EAAE,iBAAiB;QAC/B,YAAY,EAAE,YAAY;KAC3B,CAAC;CACH,AAAC;AAEF,MAAM,8BAAQ,GAAG;aACf,6BAAO;CACR,AAAC;IAEF,wCAAwB,GAAT,8BAAQ","sources":["src/index.ts","src/token.ts","src/tokenize.ts","src/char.ts","src/deva-char-types.ts"],"sourcesContent":["import { Token, TokenType } from \"./token\";\nimport { tokenize } from \"./tokenize\";\n\ntype Results = {\n all: Token[];\n aksharas: Token[];\n // varnas: Token[] /* Yet to be implemented */;\n symbols: Token[];\n whitespaces: Token[];\n chars: string[];\n invalid: Token[];\n unrecognised: Token[];\n varnasLength: number /* Will be deprecated */;\n};\n\nconst calcTotalVarnasLength = (tokens: Token[]) =>\n tokens.reduce(\n (total, akshara) =>\n akshara.attributes ? total + akshara.attributes.varnasLength : total,\n 0\n );\n\nconst filterTokens = (tokens: Token[], tokenType: TokenType) =>\n tokens.filter((token) => token.type === tokenType);\n\nconst analyse = (input: string): Results => {\n const tokens = tokenize(input);\n\n const aksharas = filterTokens(tokens, TokenType.Akshara);\n\n // const varnas = [];\n\n const chars = input.split(\"\");\n\n const symbols = filterTokens(tokens, TokenType.Symbol);\n\n const invalidChars = filterTokens(tokens, TokenType.Invalid);\n\n const whitespaces = filterTokens(tokens, TokenType.Whitespace);\n\n const unrecognisedChars = filterTokens(tokens, TokenType.Unrecognised);\n\n const varnasLength = calcTotalVarnasLength(aksharas);\n\n return {\n all: tokens,\n aksharas: aksharas,\n // varnas: varnas,\n chars: chars,\n symbols: symbols,\n invalid: invalidChars,\n whitespaces: whitespaces,\n unrecognised: unrecognisedChars,\n varnasLength: varnasLength,\n };\n};\n\nconst Aksharas = {\n analyse,\n};\n\nexport default Aksharas;\n","export enum TokenType {\n Akshara = \"akshara\",\n Symbol = \"symbol\",\n Whitespace = \"whitespace\",\n Invalid = \"invalid\",\n Unrecognised = \"unrecognised\",\n}\n\nexport type TokenAttributes = Record;\n\nexport class Token {\n readonly type: TokenType;\n readonly value: string;\n readonly from: number;\n readonly to: number;\n readonly attributes?: TokenAttributes;\n\n constructor(\n type: TokenType,\n value: string,\n pos: number,\n attributes?: TokenAttributes\n ) {\n this.type = type;\n this.value = value;\n this.from = pos;\n this.to = pos + (value.length - 1);\n this.attributes = attributes;\n }\n\n // Only for debugging\n // toString = () => this.value;\n}\n","import { Char } from \"./char\";\nimport { Token, TokenAttributes, TokenType } from \"./token\";\n\nenum State {\n Initial,\n Vowel,\n Consonant,\n ConjunctConsonant,\n}\n\nexport const tokenize = (input: string): Token[] => {\n const tokens: Token[] = [];\n\n let pos = 0;\n let acc = \"\";\n let varnasLength: number = 0;\n let state = State.Initial;\n\n const resetVariables = () => {\n pos = 0;\n acc = \"\";\n varnasLength = 0;\n state = State.Initial;\n };\n\n const createToken = (tokenType: TokenType, attributes?: TokenAttributes) => {\n tokens.push(new Token(tokenType, acc, pos, attributes));\n resetVariables();\n };\n\n for (let i = 0, l = input.length; i < l; i += 1) {\n const char = new Char(input[i]);\n\n const nextChar = new Char(input[i + 1]);\n\n acc += char.value;\n\n switch (state) {\n case State.Initial: {\n pos = i;\n\n if (char.isSymbol()) {\n createToken(TokenType.Symbol);\n break;\n }\n\n if (char.isWhitespace()) {\n createToken(TokenType.Whitespace);\n break;\n }\n\n if (char.isUnrecognised()) {\n createToken(TokenType.Unrecognised);\n break;\n }\n\n if (char.isVowel()) {\n if (nextChar.isVowelAttachment()) {\n state = State.Vowel;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isConsonant()) {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n createToken(TokenType.Invalid);\n break;\n }\n\n case State.Vowel: {\n if (char.isAccent()) {\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n if (char.isYogavaha()) {\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 1 });\n break;\n }\n\n break;\n }\n\n case State.Consonant: {\n if (char.isNukta()) {\n if (nextChar.isConsonantAttachment()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength: 2 });\n break;\n }\n\n if (char.isVirama()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isJoiner()) {\n if (nextChar.isJoiner()) {\n break;\n }\n\n if (nextChar.isConsonant()) {\n state = State.ConjunctConsonant;\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMarkAttachment()) {\n varnasLength += 1;\n\n if (nextChar.isAccent()) {\n break;\n }\n\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n if (char.isVowelMark()) {\n if (nextChar.isVowelMarkAttachment()) {\n break;\n }\n\n varnasLength += 1;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n }\n\n case State.ConjunctConsonant: {\n if (nextChar.isNukta() || nextChar.isConsonantAttachment()) {\n varnasLength += 1;\n state = State.Consonant;\n break;\n }\n\n varnasLength += 2;\n createToken(TokenType.Akshara, { varnasLength });\n break;\n }\n\n default: {\n break;\n }\n }\n }\n\n return tokens;\n};\n","import { DEVA_CHAR_TYPES } from \"./deva-char-types\";\n\nenum CharType {\n Vowel = \"vowels\",\n Symbol = \"symbols\",\n VowelMark = \"vowel_marks\",\n Yogavaha = \"yogavaahas\",\n Accent = \"accents\",\n Virama = \"virama\",\n Nukta = \"nukta\",\n ZWNJ = \"zwnj\",\n ZWJ = \"zwj\",\n Consonant = \"consonants\",\n ExtraConsonant = \"extra_consonants\",\n Whitespace = \"whitespace\",\n Unrecognised = \"unrecognised\",\n EndOfInput = \"end_of_input\",\n}\n\nconst MATRAS = [\"१\", \"२\", \"३\"];\n\nexport class Char {\n value: string;\n type: CharType;\n\n constructor(value: string) {\n this.value = value;\n this.type = this.getCharType(value);\n }\n\n getCharType = (char: string): CharType => {\n if (char === undefined) return CharType.EndOfInput;\n\n if (/\\s/.test(char)) return CharType.Whitespace;\n\n return DEVA_CHAR_TYPES[char] ?? CharType.Unrecognised;\n };\n\n isVowel = () => this.type === CharType.Vowel;\n\n isConsonant = () =>\n [CharType.Consonant, CharType.ExtraConsonant].includes(this.type);\n\n isSymbol = () => this.type === CharType.Symbol;\n\n isVowelMark = () => this.type === CharType.VowelMark;\n\n isYogavaha = () => this.type === CharType.Yogavaha;\n\n isAccent = () => this.type === CharType.Accent;\n\n isVirama = () => this.type === CharType.Virama;\n\n isNukta = () => this.type === CharType.Nukta;\n\n isMatra = () => MATRAS.includes(this.value);\n\n isJoiner = () => [CharType.ZWNJ, CharType.ZWJ].includes(this.type);\n\n isWhitespace = () => this.type === CharType.Whitespace;\n\n isUnrecognised = () => this.type === CharType.Unrecognised;\n\n isEndOfInput = () => this.type === CharType.EndOfInput;\n\n isVowelAttachment = () => this.isYogavaha() || this.isAccent();\n\n isConsonantAttachment = () =>\n this.isVirama() ||\n this.isYogavaha() ||\n this.isAccent() ||\n this.isVowelMark() ||\n this.isMatra();\n\n isVowelMarkAttachment = () =>\n this.isYogavaha() || this.isAccent() || this.isMatra();\n\n // Only for debugging\n // toString = () => this.value;\n}\n","export const DEVA_CHAR_TYPES: Record = {\n अ: \"vowels\",\n आ: \"vowels\",\n इ: \"vowels\",\n ई: \"vowels\",\n उ: \"vowels\",\n ऊ: \"vowels\",\n ऋ: \"vowels\",\n ॠ: \"vowels\",\n ऌ: \"vowels\",\n ॡ: \"vowels\",\n ऎ: \"vowels\",\n ए: \"vowels\",\n ऐ: \"vowels\",\n ऒ: \"vowels\",\n ओ: \"vowels\",\n औ: \"vowels\",\n ऍ: \"vowels\",\n ऑ: \"vowels\",\n क: \"consonants\",\n ख: \"consonants\",\n ग: \"consonants\",\n घ: \"consonants\",\n ङ: \"consonants\",\n च: \"consonants\",\n छ: \"consonants\",\n ज: \"consonants\",\n झ: \"consonants\",\n ञ: \"consonants\",\n ट: \"consonants\",\n ठ: \"consonants\",\n ड: \"consonants\",\n ढ: \"consonants\",\n ण: \"consonants\",\n त: \"consonants\",\n थ: \"consonants\",\n द: \"consonants\",\n ध: \"consonants\",\n न: \"consonants\",\n प: \"consonants\",\n फ: \"consonants\",\n ब: \"consonants\",\n भ: \"consonants\",\n म: \"consonants\",\n य: \"consonants\",\n र: \"consonants\",\n ल: \"consonants\",\n व: \"consonants\",\n श: \"consonants\",\n ष: \"consonants\",\n स: \"consonants\",\n ह: \"consonants\",\n ळ: \"consonants\",\n क्ष: \"consonants\",\n ज्ञ: \"consonants\",\n क़: \"extra_consonants\",\n ख़: \"extra_consonants\",\n ग़: \"extra_consonants\",\n ज़: \"extra_consonants\",\n ड़: \"extra_consonants\",\n फ़: \"extra_consonants\",\n य़: \"extra_consonants\",\n ऱ: \"extra_consonants\",\n ऴ: \"extra_consonants\",\n ऩ: \"extra_consonants\",\n \"ा\": \"vowel_marks\",\n \"ि\": \"vowel_marks\",\n \"ी\": \"vowel_marks\",\n \"ु\": \"vowel_marks\",\n \"ू\": \"vowel_marks\",\n \"ृ\": \"vowel_marks\",\n \"ॄ\": \"vowel_marks\",\n \"ॢ\": \"vowel_marks\",\n \"ॣ\": \"vowel_marks\",\n \"ॆ\": \"vowel_marks\",\n \"े\": \"vowel_marks\",\n \"ै\": \"vowel_marks\",\n \"ॊ\": \"vowel_marks\",\n \"ो\": \"vowel_marks\",\n \"ौ\": \"vowel_marks\",\n \"ॅ\": \"vowel_marks\",\n \"ॉ\": \"vowel_marks\",\n \"ं\": \"yogavaahas\",\n \"ः\": \"yogavaahas\",\n \"ँ\": \"yogavaahas\",\n ᳵ: \"yogavaahas\",\n ᳶ: \"yogavaahas\",\n ꣳ: \"yogavaahas\",\n \"्\": \"virama\",\n \"़\": \"nukta\",\n \"‍\": \"zwj\",\n \"‌\": \"zwnj\",\n \"॑\": \"accents\",\n \"॒\": \"accents\",\n \"᳡\": \"accents\",\n \"꣡\": \"accents\",\n \"꣢\": \"accents\",\n \"꣣\": \"accents\",\n \"꣤\": \"accents\",\n \"꣥\": \"accents\",\n \"꣦\": \"accents\",\n \"꣧\": \"accents\",\n \"꣨\": \"accents\",\n \"꣩\": \"accents\",\n \"꣪\": \"accents\",\n \"꣫\": \"accents\",\n \"꣬\": \"accents\",\n \"꣭\": \"accents\",\n \"꣮\": \"accents\",\n \"꣯\": \"accents\",\n \"꣰\": \"accents\",\n \"꣱\": \"accents\",\n \"०\": \"symbols\",\n \"१\": \"symbols\",\n \"२\": \"symbols\",\n \"३\": \"symbols\",\n \"४\": \"symbols\",\n \"५\": \"symbols\",\n \"६\": \"symbols\",\n \"७\": \"symbols\",\n \"८\": \"symbols\",\n \"९\": \"symbols\",\n ॐ: \"symbols\",\n ऽ: \"symbols\",\n \"।\": \"symbols\",\n \"॥\": \"symbols\",\n};\n"],"names":[],"version":3,"file":"index.mjs.map"} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 131a096..a3816b4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@vipran/aksharas", - "version": "0.1.1", + "version": "0.2.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@vipran/aksharas", - "version": "0.1.1", + "version": "0.2.0", "license": "MIT", "devDependencies": { "@parcel/packager-ts": "^2.7.0", diff --git a/package.json b/package.json index 275ce07..234c9fe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@vipran/aksharas", - "version": "0.1.1", + "version": "0.2.0", "description": "Aksharas is an utility for splitting an Devanagari string into akṣaras and varṇas.", "source": "src/index.ts", "main": "dist/index.cjs",